dataeval 0.86.9__py3-none-any.whl → 0.88.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_log.py +1 -1
- dataeval/_version.py +2 -2
- dataeval/config.py +4 -19
- dataeval/data/_embeddings.py +78 -35
- dataeval/data/_images.py +41 -8
- dataeval/data/_metadata.py +348 -66
- dataeval/data/_selection.py +22 -7
- dataeval/data/_split.py +3 -2
- dataeval/data/selections/_classbalance.py +4 -3
- dataeval/data/selections/_classfilter.py +9 -8
- dataeval/data/selections/_indices.py +4 -3
- dataeval/data/selections/_prioritize.py +249 -29
- dataeval/data/selections/_reverse.py +1 -1
- dataeval/data/selections/_shuffle.py +5 -4
- dataeval/detectors/drift/_base.py +2 -1
- dataeval/detectors/drift/_mmd.py +2 -1
- dataeval/detectors/drift/_nml/_base.py +1 -1
- dataeval/detectors/drift/_nml/_chunk.py +2 -1
- dataeval/detectors/drift/_nml/_result.py +3 -2
- dataeval/detectors/drift/_nml/_thresholds.py +6 -5
- dataeval/detectors/drift/_uncertainty.py +2 -1
- dataeval/detectors/linters/duplicates.py +2 -1
- dataeval/detectors/linters/outliers.py +4 -3
- dataeval/detectors/ood/__init__.py +2 -1
- dataeval/detectors/ood/ae.py +1 -1
- dataeval/detectors/ood/base.py +39 -1
- dataeval/detectors/ood/knn.py +95 -0
- dataeval/detectors/ood/mixin.py +2 -1
- dataeval/metadata/_utils.py +1 -1
- dataeval/metrics/bias/_balance.py +29 -22
- dataeval/metrics/bias/_diversity.py +4 -4
- dataeval/metrics/bias/_parity.py +2 -2
- dataeval/metrics/stats/_base.py +3 -29
- dataeval/metrics/stats/_boxratiostats.py +2 -1
- dataeval/metrics/stats/_dimensionstats.py +2 -1
- dataeval/metrics/stats/_hashstats.py +21 -3
- dataeval/metrics/stats/_pixelstats.py +2 -1
- dataeval/metrics/stats/_visualstats.py +2 -1
- dataeval/outputs/_base.py +2 -3
- dataeval/outputs/_bias.py +2 -1
- dataeval/outputs/_estimators.py +1 -1
- dataeval/outputs/_linters.py +3 -3
- dataeval/outputs/_stats.py +3 -3
- dataeval/outputs/_utils.py +1 -1
- dataeval/outputs/_workflows.py +49 -31
- dataeval/typing.py +23 -9
- dataeval/utils/__init__.py +2 -2
- dataeval/utils/_array.py +3 -2
- dataeval/utils/_bin.py +9 -7
- dataeval/utils/_method.py +2 -3
- dataeval/utils/_multiprocessing.py +34 -0
- dataeval/utils/_plot.py +2 -1
- dataeval/utils/data/__init__.py +6 -5
- dataeval/utils/data/{metadata.py → _merge.py} +3 -2
- dataeval/utils/data/_validate.py +170 -0
- dataeval/utils/data/collate.py +2 -1
- dataeval/utils/torch/_internal.py +2 -1
- dataeval/utils/torch/trainer.py +1 -1
- dataeval/workflows/sufficiency.py +13 -9
- {dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/METADATA +8 -21
- dataeval-0.88.0.dist-info/RECORD +105 -0
- dataeval/utils/data/_dataset.py +0 -246
- dataeval/utils/datasets/__init__.py +0 -21
- dataeval/utils/datasets/_antiuav.py +0 -189
- dataeval/utils/datasets/_base.py +0 -266
- dataeval/utils/datasets/_cifar10.py +0 -201
- dataeval/utils/datasets/_fileio.py +0 -142
- dataeval/utils/datasets/_milco.py +0 -197
- dataeval/utils/datasets/_mixin.py +0 -54
- dataeval/utils/datasets/_mnist.py +0 -202
- dataeval/utils/datasets/_seadrone.py +0 -512
- dataeval/utils/datasets/_ships.py +0 -144
- dataeval/utils/datasets/_types.py +0 -48
- dataeval/utils/datasets/_voc.py +0 -583
- dataeval-0.86.9.dist-info/RECORD +0 -115
- {dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/WHEEL +0 -0
- /dataeval-0.86.9.dist-info/licenses/LICENSE.txt → /dataeval-0.88.0.dist-info/licenses/LICENSE +0 -0
dataeval/utils/torch/trainer.py
CHANGED
@@ -16,7 +16,7 @@ from dataeval.config import DeviceLike, get_device
|
|
16
16
|
|
17
17
|
def get_images_from_batch(batch: Any) -> Any:
|
18
18
|
"""Extracts images from a batch of collated data by DataLoader"""
|
19
|
-
return batch[0] if isinstance(batch,
|
19
|
+
return batch[0] if isinstance(batch, list | tuple) else batch
|
20
20
|
|
21
21
|
|
22
22
|
class AETrainer:
|
@@ -2,7 +2,8 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
from
|
5
|
+
from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
|
6
|
+
from typing import Any, Generic, TypeVar
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import torch
|
@@ -207,7 +208,9 @@ class Sufficiency(Generic[T]):
|
|
207
208
|
... substeps=5,
|
208
209
|
... )
|
209
210
|
>>> suff.evaluate()
|
210
|
-
SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([1., 1., 1., 1., 1.]
|
211
|
+
SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([[1., 1., 1., 1., 1.],
|
212
|
+
[1., 1., 1., 1., 1.],
|
213
|
+
[1., 1., 1., 1., 1.]])}, averaged_measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
|
211
214
|
|
212
215
|
Evaluate at a single value
|
213
216
|
|
@@ -219,7 +222,7 @@ class Sufficiency(Generic[T]):
|
|
219
222
|
... eval_fn=eval_fn,
|
220
223
|
... )
|
221
224
|
>>> suff.evaluate(eval_at=50)
|
222
|
-
SufficiencyOutput(steps=array([50]), measures={'test': array([1.])}, n_iter=1000)
|
225
|
+
SufficiencyOutput(steps=array([50]), measures={'test': array([[1.]])}, averaged_measures={'test': array([1.])}, n_iter=1000)
|
223
226
|
|
224
227
|
Evaluating at linear steps from 0-100 inclusive
|
225
228
|
|
@@ -231,7 +234,7 @@ class Sufficiency(Generic[T]):
|
|
231
234
|
... eval_fn=eval_fn,
|
232
235
|
... )
|
233
236
|
>>> suff.evaluate(eval_at=np.arange(0, 101, 20))
|
234
|
-
SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
|
237
|
+
SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([[1., 1., 1., 1., 1., 1.]])}, averaged_measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
|
235
238
|
|
236
239
|
""" # noqa: E501
|
237
240
|
if eval_at is not None:
|
@@ -249,7 +252,7 @@ class Sufficiency(Generic[T]):
|
|
249
252
|
measures = {}
|
250
253
|
|
251
254
|
# Run each model over all indices
|
252
|
-
for
|
255
|
+
for run in range(self.runs):
|
253
256
|
# Create a randomized set of indices to use
|
254
257
|
indices = np.random.randint(0, self._length, size=self._length)
|
255
258
|
# Reset the network weights to "create" an untrained model
|
@@ -272,9 +275,10 @@ class Sufficiency(Generic[T]):
|
|
272
275
|
# Sum result into current substep iteration to be averaged later
|
273
276
|
value = np.array(value).ravel()
|
274
277
|
if name not in measures:
|
275
|
-
measures[name] = np.zeros(
|
276
|
-
|
278
|
+
measures[name] = np.zeros(
|
279
|
+
(self.runs, substeps) if len(value) == 1 else (self.runs, substeps, len(value))
|
280
|
+
)
|
277
281
|
|
282
|
+
measures[name][run, iteration] = value
|
278
283
|
# The mean for each measure must be calculated before being returned
|
279
|
-
|
280
|
-
return SufficiencyOutput(ranges, measures)
|
284
|
+
return SufficiencyOutput(ranges, measures=measures)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.88.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Project-URL: Homepage, https://dataeval.ai/
|
6
6
|
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
@@ -8,33 +8,29 @@ Project-URL: Documentation, https://dataeval.readthedocs.io/
|
|
8
8
|
Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
|
9
9
|
Maintainer-email: ARiA <dataeval@ariacoustics.com>
|
10
10
|
License-Expression: MIT
|
11
|
-
License-File: LICENSE
|
11
|
+
License-File: LICENSE
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
14
14
|
Classifier: License :: OSI Approved :: MIT License
|
15
15
|
Classifier: Operating System :: OS Independent
|
16
16
|
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
20
19
|
Classifier: Programming Language :: Python :: 3.12
|
21
20
|
Classifier: Topic :: Scientific/Engineering
|
22
|
-
Requires-Python: <3.13,>=3.
|
23
|
-
Requires-Dist: defusedxml>=0.7.1
|
21
|
+
Requires-Python: <3.13,>=3.10
|
24
22
|
Requires-Dist: fast-hdbscan==0.2.0
|
25
23
|
Requires-Dist: lightgbm>=4
|
26
24
|
Requires-Dist: numba>=0.59.1
|
27
25
|
Requires-Dist: numpy>=1.24.2
|
28
26
|
Requires-Dist: pandas>=2.0
|
29
|
-
Requires-Dist: pillow>=10.3.0
|
30
27
|
Requires-Dist: polars>=1.0.0
|
31
|
-
Requires-Dist: requests>=2.32.3
|
32
28
|
Requires-Dist: scikit-learn>=1.5.0
|
33
29
|
Requires-Dist: scipy>=1.10
|
34
30
|
Requires-Dist: torch>=2.2.0
|
35
31
|
Requires-Dist: torchvision>=0.17.0
|
36
32
|
Requires-Dist: tqdm>=4.66
|
37
|
-
Requires-Dist: typing-extensions>=4.12
|
33
|
+
Requires-Dist: typing-extensions>=4.12
|
38
34
|
Requires-Dist: xxhash>=3.3
|
39
35
|
Provides-Extra: all
|
40
36
|
Requires-Dist: matplotlib>=3.7.1; extra == 'all'
|
@@ -91,7 +87,7 @@ using MAITE-compliant datasets and models.
|
|
91
87
|
|
92
88
|
## Getting Started
|
93
89
|
|
94
|
-
**Python versions:** 3.
|
90
|
+
**Python versions:** 3.10 - 3.12
|
95
91
|
|
96
92
|
**Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
|
97
93
|
|
@@ -123,14 +119,8 @@ micromamba create -f environment\environment.yaml -c pytorch
|
|
123
119
|
|
124
120
|
### **Installing from GitHub**
|
125
121
|
|
126
|
-
To install DataEval from source locally on Ubuntu,
|
127
|
-
|
128
|
-
|
129
|
-
```bash
|
130
|
-
sudo apt-get install git-lfs
|
131
|
-
```
|
132
|
-
|
133
|
-
Pull the source down and change to the DataEval project directory.
|
122
|
+
To install DataEval from source locally on Ubuntu, pull the source down and
|
123
|
+
change to the DataEval project directory.
|
134
124
|
|
135
125
|
```bash
|
136
126
|
git clone https://github.com/aria-ml/dataeval.git
|
@@ -167,10 +157,7 @@ source .venv/bin/activate
|
|
167
157
|
|
168
158
|
## Contact Us
|
169
159
|
|
170
|
-
If you have any questions, feel free to reach out to
|
171
|
-
|
172
|
-
- **POC**: Scott Swan @scott.swan
|
173
|
-
- **DPOC**: Andrew Weng @aweng
|
160
|
+
If you have any questions, feel free to reach out to [us](mailto:dataeval@ariacoustics.com)!
|
174
161
|
|
175
162
|
## Acknowledgement
|
176
163
|
|
@@ -0,0 +1,105 @@
|
|
1
|
+
dataeval/__init__.py,sha256=aFzX3SLx8wgc763RY772P41ZLqeHcUHRKW9XAN0KfHQ,1793
|
2
|
+
dataeval/_log.py,sha256=Q2d6oqYKXyn1wkgMdNX9iswod4Jq0jPADShrCFVgJI0,374
|
3
|
+
dataeval/_version.py,sha256=p36W3DcVLrkAWnGoljUjU-PF8_IvHjfGbC98bXZ2g_c,513
|
4
|
+
dataeval/config.py,sha256=lL73s_xa9pBxHHCnBKi59D_tl4vS7ig1rfWbIYkM_ac,3839
|
5
|
+
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
dataeval/typing.py,sha256=cKpK8rY7iVf-KL9kuye6qi_6LS6hKbMxHpurdWlYY44,7445
|
7
|
+
dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
|
8
|
+
dataeval/data/_embeddings.py,sha256=BHoiSdt46TblVSglg9Cyrm8iAJJq1Z2jR4wesL0WOf4,14731
|
9
|
+
dataeval/data/_images.py,sha256=9q0O5Zurf-5727rMC7DB_i3TtXcL67D9a5GGx5qlov8,3875
|
10
|
+
dataeval/data/_metadata.py,sha256=-dhmyX6vvv97S9YHr6roNcbzXequkxMw48PwNDdzZ9I,24163
|
11
|
+
dataeval/data/_selection.py,sha256=4qI-GwSdEGiRCyr3kqxr6uOiyRRKsPBRzYHmpgdWLY0,5301
|
12
|
+
dataeval/data/_split.py,sha256=aCkXFvkCw8VkWICdCmY9tHiEvkQI5j9jUa7QLjm-gZE,16759
|
13
|
+
dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
|
14
|
+
dataeval/data/selections/_classbalance.py,sha256=AqExg-QnYBcfBNzS1Ygsz3Cgb2cqcgGXE0-cseD8_vA,1580
|
15
|
+
dataeval/data/selections/_classfilter.py,sha256=sn7Lbhb5KJyiqDH7ZdugX5WnRlglwCckWzK6K5uXbwg,4447
|
16
|
+
dataeval/data/selections/_indices.py,sha256=PcM4qQwFVUXO9xY2brO6OUDFMeVgiSlTRKFDhTA_W-0,655
|
17
|
+
dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
|
18
|
+
dataeval/data/selections/_prioritize.py,sha256=ss_GZ5MB2ohdNuB55C69TYNwV3PUSmk715gDJI6qfYA,20140
|
19
|
+
dataeval/data/selections/_reverse.py,sha256=FqYlpPg-0Vz75kbEhGFrJlzIGELSmDZxPlBMY18a57I,365
|
20
|
+
dataeval/data/selections/_shuffle.py,sha256=uW_Zss773ob2swqwTdL6G-CzMElCq8TO2TScvABQR1U,1268
|
21
|
+
dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
|
22
|
+
dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
|
23
|
+
dataeval/detectors/drift/_base.py,sha256=__mlqkiPW0GcVSVE4u9t6M2mp3rAU5leSk_XPQn_Mp8,7619
|
24
|
+
dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
|
25
|
+
dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
|
26
|
+
dataeval/detectors/drift/_mmd.py,sha256=EkfbeK5L6xGGQrcA1v_0YlpIOingF73jn2H6s3tRKbo,11550
|
27
|
+
dataeval/detectors/drift/_mvdc.py,sha256=WMN6aDOWCh1q1MtdRXFIZlFcfnVi4XgBHsS0A6L5UuY,2942
|
28
|
+
dataeval/detectors/drift/_uncertainty.py,sha256=-4aiwNosJ1_4kY-d2n4YbZV_jvnf5xdTMDELXSoW6OM,5874
|
29
|
+
dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
|
30
|
+
dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
|
31
|
+
dataeval/detectors/drift/_nml/_base.py,sha256=wMqegfa92Tldqix1RL6dLMdiKgX0GqHmTiFxO38ja_c,2672
|
32
|
+
dataeval/detectors/drift/_nml/_chunk.py,sha256=5WhpcIHJ3EVBGZjDJLri54fWikYAT_7sC0DxQkSs0tI,13591
|
33
|
+
dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
|
34
|
+
dataeval/detectors/drift/_nml/_result.py,sha256=mH_tYrYVaIXhsU9gcSFqEdaI38BArlpKuW0-8SPS8aY,3295
|
35
|
+
dataeval/detectors/drift/_nml/_thresholds.py,sha256=jAbRdAPP4O4hJqTLpvfVAbWNdw3zL6UrTl2KNWphQPc,12083
|
36
|
+
dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
|
37
|
+
dataeval/detectors/linters/duplicates.py,sha256=k5cQz_1i9vchugSfC267mugWzgy6sVDa36BdQAy_PXs,4990
|
38
|
+
dataeval/detectors/linters/outliers.py,sha256=a980lDV9g_tZYHV9k6wSh2d11nNYEqTy56IduC-H5GA,10159
|
39
|
+
dataeval/detectors/ood/__init__.py,sha256=qDoDdQetJY1xZB43dNzcOIO_8NiEuEU0z1QNU4QkEXs,341
|
40
|
+
dataeval/detectors/ood/ae.py,sha256=jo6aHcKT1N13ew0tV6FZa3vQI5DQivZo5-uAm4uLaAs,2950
|
41
|
+
dataeval/detectors/ood/base.py,sha256=eWzODq2i1Tah7Mqm1guASTf9p2tF4Tr6mZoDT3pDvsk,4401
|
42
|
+
dataeval/detectors/ood/knn.py,sha256=Fu77geQFHPYNOn81VIXUJ3yC3t5Ylv0ZgvwMeA2JX6I,3782
|
43
|
+
dataeval/detectors/ood/mixin.py,sha256=cNmRrR9cv9phwAGSuQMC7EhmrFtf68C63wdTggy1UaU,5458
|
44
|
+
dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
|
45
|
+
dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
|
46
|
+
dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
|
47
|
+
dataeval/metadata/_utils.py,sha256=4fX-1eA3fK4uwNh_DfOGiXxl4PHZ1AghOejJ03rV3RI,1219
|
48
|
+
dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
|
49
|
+
dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
|
50
|
+
dataeval/metrics/bias/_balance.py,sha256=aDAII2lXeAz9dZk0TdgZHtyab1tObQlya8jdNNdn8eI,5718
|
51
|
+
dataeval/metrics/bias/_completeness.py,sha256=2cvOXe7fhtxZGH_4QBuiCafIeamxFBarMiUBuEP7QGI,4596
|
52
|
+
dataeval/metrics/bias/_coverage.py,sha256=v2x2hbOf2za9jFcSVSJUAoJ2BJfzzlCzt0mFIGtBL0A,3639
|
53
|
+
dataeval/metrics/bias/_diversity.py,sha256=Z7UQzKp9bsmB-hC3_sY6HIJUJRkLHb5cVEoU79cNDzc,5800
|
54
|
+
dataeval/metrics/bias/_parity.py,sha256=ZIKc5OK6wQ4moleBJzGDfOPvyNzj03-KoHAGBZnO4pk,11433
|
55
|
+
dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
|
56
|
+
dataeval/metrics/estimators/_ber.py,sha256=7noeRyOJJYqrJ_jt90nRHtR2t2u5MIvTCmWt0_rd4EU,5370
|
57
|
+
dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
|
58
|
+
dataeval/metrics/estimators/_divergence.py,sha256=t-Z_7Bq4V4FunxKlq7G4ThtgLany8n4iEU0n0afr7F8,3991
|
59
|
+
dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
|
60
|
+
dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
|
61
|
+
dataeval/metrics/stats/_base.py,sha256=vE8dvrNqjAKGyCzqlgQa-3ArP6PJ-P8Y4rdNPUZ0ml8,11703
|
62
|
+
dataeval/metrics/stats/_boxratiostats.py,sha256=CFn-BqnPmAXagaLlhJGusdGLQewWxRweb9Xxv_JAOaw,6477
|
63
|
+
dataeval/metrics/stats/_dimensionstats.py,sha256=GlzshH7nZurVWANmZmpuXy_v5ZfMrdAfO_FbtHTL38Q,2903
|
64
|
+
dataeval/metrics/stats/_hashstats.py,sha256=wsy8F8-UMUbtjeCnaqAR9Yxv_jp4kFerHH2L0UMIAgY,5415
|
65
|
+
dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
|
66
|
+
dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
|
67
|
+
dataeval/metrics/stats/_pixelstats.py,sha256=XEFByxMUbNaCvEsnVhH5ewJ8UH253ySdpFe5u1jr38w,3339
|
68
|
+
dataeval/metrics/stats/_visualstats.py,sha256=SbXvNWxfKrw-2wCu5FXMsnpsMUVaQzdJkj6RB4qEsBM,3740
|
69
|
+
dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
|
70
|
+
dataeval/outputs/_base.py,sha256=lVC7xmBgv3JYY2wVLaGBMPlkRE_KV9UloaeQn0nQydA,5875
|
71
|
+
dataeval/outputs/_bias.py,sha256=gj2AgSKOdq6bj59RMiHpha4Skld6ZMB8cW5KesOZ6T4,10483
|
72
|
+
dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
|
73
|
+
dataeval/outputs/_estimators.py,sha256=SUjur5jI6OU9C7GpsAuA_qqO1PRnS-8eZN-otsaV5q0,3120
|
74
|
+
dataeval/outputs/_linters.py,sha256=N4nP5HMoeN2zLndWzhoIT5QB1Ujxbs8Gx5pWPKhl3yc,6683
|
75
|
+
dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
|
76
|
+
dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
|
77
|
+
dataeval/outputs/_stats.py,sha256=PsDV0uw41aTy-X9tjz-PqOj78TTnH4JQVpOrU3OThAE,17423
|
78
|
+
dataeval/outputs/_utils.py,sha256=KJ1P8tcMFIkGi2A6VfqbZwLcT1cD0c2YssTbWbHALjE,938
|
79
|
+
dataeval/outputs/_workflows.py,sha256=sw13FNx1vANX7DBsKeOLfP2bkp5r6SexBorfb9dxYxU,12160
|
80
|
+
dataeval/utils/__init__.py,sha256=sjelzMPaTImF6isiRcp8UGDE3tppEpWS5GoR8WKPZ1k,242
|
81
|
+
dataeval/utils/_array.py,sha256=P4_gyH3kkksUJm9Vqx-oPtLWxFmqMacUJzhj0vmrUd8,6361
|
82
|
+
dataeval/utils/_bin.py,sha256=QjlRCB5mOauETdxSbvRxRG17riO6gScsMd_lNnnvqxs,7391
|
83
|
+
dataeval/utils/_clusterer.py,sha256=rUvEdyMwp95lffmt6xKMEwsjRXNoBS0n5mAS_HNOnck,5656
|
84
|
+
dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8061
|
85
|
+
dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
|
86
|
+
dataeval/utils/_method.py,sha256=53Q3xfQvpyGa-z9_rn6GhjfCcUR5Q9nuWQtCNav4Ftc,391
|
87
|
+
dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
|
88
|
+
dataeval/utils/_multiprocessing.py,sha256=n6qCVybheWry42NCdxzcsgkJ9xLGkz8m12SWgviTJQM,1060
|
89
|
+
dataeval/utils/_plot.py,sha256=kwYZbSdHCV02wRrZDTxR3xd0XfQ-6TlLmfvubwiEQcw,7252
|
90
|
+
dataeval/utils/data/__init__.py,sha256=_XoNxADxBdR96Ca-0RgkDlQa6C2FHld1nwts-Xdif3g,294
|
91
|
+
dataeval/utils/data/_merge.py,sha256=9JKtlpBarMX_9jlhnQg1AmBwTe9I2w6xQkFGss3IkkU,14729
|
92
|
+
dataeval/utils/data/_validate.py,sha256=YH5Q6uzcTRdf_AMKMRyYW37RUlXm-S8ddhw6cegdNkc,6950
|
93
|
+
dataeval/utils/data/collate.py,sha256=AWoQ2k9FXyTeq6ExTsGa6sBML_lZm9p38-DN9hnpm8E,3963
|
94
|
+
dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
|
95
|
+
dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
|
96
|
+
dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
|
97
|
+
dataeval/utils/torch/_internal.py,sha256=LiuqZGIzKewp_29_Lskj0mnNqdMffMheMdgGeXLDI5g,4173
|
98
|
+
dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
|
99
|
+
dataeval/utils/torch/trainer.py,sha256=kBdgxd9TL1Pvz-dyZbS__POAKeFrDiQ4vKFh8ltJApc,5543
|
100
|
+
dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
|
101
|
+
dataeval/workflows/sufficiency.py,sha256=4DTDaYyEuAfO0LTFpQGXXXayV5aCIbziSL2Rddd1vQ0,10360
|
102
|
+
dataeval-0.88.0.dist-info/METADATA,sha256=Y5NRZgrhfpyGQKHUnqnO6rAItVR3oWUqIp646_0xluQ,5601
|
103
|
+
dataeval-0.88.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
104
|
+
dataeval-0.88.0.dist-info/licenses/LICENSE,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
105
|
+
dataeval-0.88.0.dist-info/RECORD,,
|
dataeval/utils/data/_dataset.py
DELETED
@@ -1,246 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
__all__ = []
|
4
|
-
|
5
|
-
from typing import Any, Generic, Iterable, Literal, Sequence, SupportsFloat, SupportsInt, TypeVar, cast
|
6
|
-
|
7
|
-
from dataeval.typing import (
|
8
|
-
Array,
|
9
|
-
ArrayLike,
|
10
|
-
DatasetMetadata,
|
11
|
-
ImageClassificationDataset,
|
12
|
-
ObjectDetectionDataset,
|
13
|
-
)
|
14
|
-
from dataeval.utils._array import as_numpy
|
15
|
-
|
16
|
-
|
17
|
-
def _validate_data(
|
18
|
-
datum_type: Literal["ic", "od"],
|
19
|
-
images: Array | Sequence[Array],
|
20
|
-
labels: Array | Sequence[int] | Sequence[Array] | Sequence[Sequence[int]],
|
21
|
-
bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]] | None,
|
22
|
-
metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
|
23
|
-
) -> None:
|
24
|
-
# Validate inputs
|
25
|
-
dataset_len = len(images)
|
26
|
-
|
27
|
-
if not isinstance(images, (Sequence, Array)) or len(images[0].shape) != 3:
|
28
|
-
raise ValueError("Images must be a sequence or array of 3 dimensional arrays (H, W, C).")
|
29
|
-
if len(labels) != dataset_len:
|
30
|
-
raise ValueError(f"Number of labels ({len(labels)}) does not match number of images ({dataset_len}).")
|
31
|
-
if bboxes is not None and len(bboxes) != dataset_len:
|
32
|
-
raise ValueError(f"Number of bboxes ({len(bboxes)}) does not match number of images ({dataset_len}).")
|
33
|
-
if metadata is not None and (
|
34
|
-
len(metadata) != dataset_len
|
35
|
-
if isinstance(metadata, Sequence)
|
36
|
-
else any(
|
37
|
-
not isinstance(metadatum, Sequence) or len(metadatum) != dataset_len for metadatum in metadata.values()
|
38
|
-
)
|
39
|
-
):
|
40
|
-
raise ValueError(f"Number of metadata ({len(metadata)}) does not match number of images ({dataset_len}).")
|
41
|
-
|
42
|
-
if datum_type == "ic":
|
43
|
-
if not isinstance(labels, (Sequence, Array)) or not isinstance(labels[0], (int, SupportsInt)):
|
44
|
-
raise TypeError("Labels must be a sequence of integers for image classification.")
|
45
|
-
elif datum_type == "od":
|
46
|
-
if (
|
47
|
-
not isinstance(labels, (Sequence, Array))
|
48
|
-
or not isinstance(labels[0], (Sequence, Array))
|
49
|
-
or not isinstance(cast(Sequence[Any], labels[0])[0], (int, SupportsInt))
|
50
|
-
):
|
51
|
-
raise TypeError("Labels must be a sequence of sequences of integers for object detection.")
|
52
|
-
if (
|
53
|
-
bboxes is None
|
54
|
-
or not isinstance(bboxes, (Sequence, Array))
|
55
|
-
or not isinstance(bboxes[0], (Sequence, Array))
|
56
|
-
or not isinstance(bboxes[0][0], (Sequence, Array))
|
57
|
-
or not isinstance(bboxes[0][0][0], (float, SupportsFloat))
|
58
|
-
or not len(bboxes[0][0]) == 4
|
59
|
-
):
|
60
|
-
raise TypeError("Boxes must be a sequence of sequences of (x0, y0, x1, y1) for object detection.")
|
61
|
-
else:
|
62
|
-
raise ValueError(f"Unknown datum type '{datum_type}'. Must be 'ic' or 'od'.")
|
63
|
-
|
64
|
-
|
65
|
-
def _listify_metadata(
|
66
|
-
metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
|
67
|
-
) -> Sequence[dict[str, Any]] | None:
|
68
|
-
if isinstance(metadata, dict):
|
69
|
-
return [{k: v[i] for k, v in metadata.items()} for i in range(len(next(iter(metadata.values()))))]
|
70
|
-
return metadata
|
71
|
-
|
72
|
-
|
73
|
-
def _find_max(arr: ArrayLike) -> Any:
|
74
|
-
if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
|
75
|
-
nested = [x for x in [_find_max(x) for x in arr] if x is not None]
|
76
|
-
return max(nested) if len(nested) > 0 else None
|
77
|
-
return arr
|
78
|
-
|
79
|
-
|
80
|
-
_TLabels = TypeVar("_TLabels", Sequence[int], Sequence[Sequence[int]])
|
81
|
-
|
82
|
-
|
83
|
-
class BaseAnnotatedDataset(Generic[_TLabels]):
|
84
|
-
def __init__(
|
85
|
-
self,
|
86
|
-
datum_type: Literal["ic", "od"],
|
87
|
-
images: Array | Sequence[Array],
|
88
|
-
labels: _TLabels,
|
89
|
-
metadata: Sequence[dict[str, Any]] | None,
|
90
|
-
classes: Sequence[str] | None,
|
91
|
-
name: str | None = None,
|
92
|
-
) -> None:
|
93
|
-
self._classes = classes if classes is not None else [str(i) for i in range(_find_max(labels) + 1)]
|
94
|
-
self._index2label = dict(enumerate(self._classes))
|
95
|
-
self._images = images
|
96
|
-
self._labels = labels
|
97
|
-
self._metadata = metadata
|
98
|
-
self._id = name or f"{len(self._images)}_image_{len(self._index2label)}_class_{datum_type}_dataset"
|
99
|
-
|
100
|
-
@property
|
101
|
-
def metadata(self) -> DatasetMetadata:
|
102
|
-
return DatasetMetadata(id=self._id, index2label=self._index2label)
|
103
|
-
|
104
|
-
def __len__(self) -> int:
|
105
|
-
return len(self._images)
|
106
|
-
|
107
|
-
|
108
|
-
class CustomImageClassificationDataset(BaseAnnotatedDataset[Sequence[int]], ImageClassificationDataset):
|
109
|
-
def __init__(
|
110
|
-
self,
|
111
|
-
images: Array | Sequence[Array],
|
112
|
-
labels: Array | Sequence[int],
|
113
|
-
metadata: Sequence[dict[str, Any]] | None,
|
114
|
-
classes: Sequence[str] | None,
|
115
|
-
name: str | None = None,
|
116
|
-
) -> None:
|
117
|
-
super().__init__(
|
118
|
-
"ic", images, as_numpy(labels).tolist() if isinstance(labels, Array) else labels, metadata, classes
|
119
|
-
)
|
120
|
-
if name is not None:
|
121
|
-
self.__name__ = name
|
122
|
-
self.__class__.__name__ = name
|
123
|
-
self.__class__.__qualname__ = name
|
124
|
-
|
125
|
-
def __getitem__(self, idx: int, /) -> tuple[Array, Array, dict[str, Any]]:
|
126
|
-
one_hot = [0.0] * len(self._index2label)
|
127
|
-
one_hot[self._labels[idx]] = 1.0
|
128
|
-
return (
|
129
|
-
self._images[idx],
|
130
|
-
as_numpy(one_hot),
|
131
|
-
self._metadata[idx] if self._metadata is not None else {},
|
132
|
-
)
|
133
|
-
|
134
|
-
|
135
|
-
class CustomObjectDetectionDataset(BaseAnnotatedDataset[Sequence[Sequence[int]]], ObjectDetectionDataset):
|
136
|
-
class ObjectDetectionTarget:
|
137
|
-
def __init__(self, labels: Sequence[int], bboxes: Sequence[Sequence[float]]) -> None:
|
138
|
-
self._labels = labels
|
139
|
-
self._bboxes = bboxes
|
140
|
-
self._scores = [1.0] * len(labels)
|
141
|
-
|
142
|
-
@property
|
143
|
-
def labels(self) -> Sequence[int]:
|
144
|
-
return self._labels
|
145
|
-
|
146
|
-
@property
|
147
|
-
def boxes(self) -> Sequence[Sequence[float]]:
|
148
|
-
return self._bboxes
|
149
|
-
|
150
|
-
@property
|
151
|
-
def scores(self) -> Sequence[float]:
|
152
|
-
return self._scores
|
153
|
-
|
154
|
-
def __init__(
|
155
|
-
self,
|
156
|
-
images: Array | Sequence[Array],
|
157
|
-
labels: Array | Sequence[Array] | Sequence[Sequence[int]],
|
158
|
-
bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]],
|
159
|
-
metadata: Sequence[dict[str, Any]] | None,
|
160
|
-
classes: Sequence[str] | None,
|
161
|
-
name: str | None = None,
|
162
|
-
) -> None:
|
163
|
-
super().__init__(
|
164
|
-
"od",
|
165
|
-
images,
|
166
|
-
[as_numpy(label).tolist() if isinstance(label, Array) else label for label in labels],
|
167
|
-
metadata,
|
168
|
-
classes,
|
169
|
-
)
|
170
|
-
if name is not None:
|
171
|
-
self.__name__ = name
|
172
|
-
self.__class__.__name__ = name
|
173
|
-
self.__class__.__qualname__ = name
|
174
|
-
self._bboxes = [[as_numpy(box).tolist() if isinstance(box, Array) else box for box in bbox] for bbox in bboxes]
|
175
|
-
|
176
|
-
@property
|
177
|
-
def metadata(self) -> DatasetMetadata:
|
178
|
-
return DatasetMetadata(id=self._id, index2label=self._index2label)
|
179
|
-
|
180
|
-
def __getitem__(self, idx: int, /) -> tuple[Array, ObjectDetectionTarget, dict[str, Any]]:
|
181
|
-
return (
|
182
|
-
self._images[idx],
|
183
|
-
self.ObjectDetectionTarget(self._labels[idx], self._bboxes[idx]),
|
184
|
-
self._metadata[idx] if self._metadata is not None else {},
|
185
|
-
)
|
186
|
-
|
187
|
-
|
188
|
-
def to_image_classification_dataset(
|
189
|
-
images: Array | Sequence[Array],
|
190
|
-
labels: Array | Sequence[int],
|
191
|
-
metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
|
192
|
-
classes: Sequence[str] | None,
|
193
|
-
name: str | None = None,
|
194
|
-
) -> ImageClassificationDataset:
|
195
|
-
"""
|
196
|
-
Helper function to create custom ImageClassificationDataset classes.
|
197
|
-
|
198
|
-
Parameters
|
199
|
-
----------
|
200
|
-
images : Array | Sequence[Array]
|
201
|
-
The images to use in the dataset.
|
202
|
-
labels : Array | Sequence[int]
|
203
|
-
The labels to use in the dataset.
|
204
|
-
metadata : Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None
|
205
|
-
The metadata to use in the dataset.
|
206
|
-
classes : Sequence[str] | None
|
207
|
-
The classes to use in the dataset.
|
208
|
-
|
209
|
-
Returns
|
210
|
-
-------
|
211
|
-
ImageClassificationDataset
|
212
|
-
"""
|
213
|
-
_validate_data("ic", images, labels, None, metadata)
|
214
|
-
return CustomImageClassificationDataset(images, labels, _listify_metadata(metadata), classes, name)
|
215
|
-
|
216
|
-
|
217
|
-
def to_object_detection_dataset(
|
218
|
-
images: Array | Sequence[Array],
|
219
|
-
labels: Array | Sequence[Array] | Sequence[Sequence[int]],
|
220
|
-
bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]],
|
221
|
-
metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
|
222
|
-
classes: Sequence[str] | None,
|
223
|
-
name: str | None = None,
|
224
|
-
) -> ObjectDetectionDataset:
|
225
|
-
"""
|
226
|
-
Helper function to create custom ObjectDetectionDataset classes.
|
227
|
-
|
228
|
-
Parameters
|
229
|
-
----------
|
230
|
-
images : Array | Sequence[Array]
|
231
|
-
The images to use in the dataset.
|
232
|
-
labels : Array | Sequence[Array] | Sequence[Sequence[int]]
|
233
|
-
The labels to use in the dataset.
|
234
|
-
bboxes : Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]]
|
235
|
-
The bounding boxes (x0,y0,x1,y0) to use in the dataset.
|
236
|
-
metadata : Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None
|
237
|
-
The metadata to use in the dataset.
|
238
|
-
classes : Sequence[str] | None
|
239
|
-
The classes to use in the dataset.
|
240
|
-
|
241
|
-
Returns
|
242
|
-
-------
|
243
|
-
ObjectDetectionDataset
|
244
|
-
"""
|
245
|
-
_validate_data("od", images, labels, bboxes, metadata)
|
246
|
-
return CustomObjectDetectionDataset(images, labels, bboxes, _listify_metadata(metadata), classes, name)
|
@@ -1,21 +0,0 @@
|
|
1
|
-
"""Provides access to common Computer Vision datasets."""
|
2
|
-
|
3
|
-
from dataeval.utils.datasets._antiuav import AntiUAVDetection
|
4
|
-
from dataeval.utils.datasets._cifar10 import CIFAR10
|
5
|
-
from dataeval.utils.datasets._milco import MILCO
|
6
|
-
from dataeval.utils.datasets._mnist import MNIST
|
7
|
-
from dataeval.utils.datasets._seadrone import SeaDrone
|
8
|
-
from dataeval.utils.datasets._ships import Ships
|
9
|
-
from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"MNIST",
|
13
|
-
"Ships",
|
14
|
-
"CIFAR10",
|
15
|
-
"AntiUAVDetection",
|
16
|
-
"MILCO",
|
17
|
-
"SeaDrone",
|
18
|
-
"VOCDetection",
|
19
|
-
"VOCDetectionTorch",
|
20
|
-
"VOCSegmentation",
|
21
|
-
]
|