dataeval 0.86.9__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. dataeval/__init__.py +1 -1
  2. dataeval/_log.py +1 -1
  3. dataeval/_version.py +2 -2
  4. dataeval/config.py +4 -19
  5. dataeval/data/_embeddings.py +78 -35
  6. dataeval/data/_images.py +41 -8
  7. dataeval/data/_metadata.py +348 -66
  8. dataeval/data/_selection.py +22 -7
  9. dataeval/data/_split.py +3 -2
  10. dataeval/data/selections/_classbalance.py +4 -3
  11. dataeval/data/selections/_classfilter.py +9 -8
  12. dataeval/data/selections/_indices.py +4 -3
  13. dataeval/data/selections/_prioritize.py +249 -29
  14. dataeval/data/selections/_reverse.py +1 -1
  15. dataeval/data/selections/_shuffle.py +5 -4
  16. dataeval/detectors/drift/_base.py +2 -1
  17. dataeval/detectors/drift/_mmd.py +2 -1
  18. dataeval/detectors/drift/_nml/_base.py +1 -1
  19. dataeval/detectors/drift/_nml/_chunk.py +2 -1
  20. dataeval/detectors/drift/_nml/_result.py +3 -2
  21. dataeval/detectors/drift/_nml/_thresholds.py +6 -5
  22. dataeval/detectors/drift/_uncertainty.py +2 -1
  23. dataeval/detectors/linters/duplicates.py +2 -1
  24. dataeval/detectors/linters/outliers.py +4 -3
  25. dataeval/detectors/ood/__init__.py +2 -1
  26. dataeval/detectors/ood/ae.py +1 -1
  27. dataeval/detectors/ood/base.py +39 -1
  28. dataeval/detectors/ood/knn.py +95 -0
  29. dataeval/detectors/ood/mixin.py +2 -1
  30. dataeval/metadata/_utils.py +1 -1
  31. dataeval/metrics/bias/_balance.py +29 -22
  32. dataeval/metrics/bias/_diversity.py +4 -4
  33. dataeval/metrics/bias/_parity.py +2 -2
  34. dataeval/metrics/stats/_base.py +3 -29
  35. dataeval/metrics/stats/_boxratiostats.py +2 -1
  36. dataeval/metrics/stats/_dimensionstats.py +2 -1
  37. dataeval/metrics/stats/_hashstats.py +21 -3
  38. dataeval/metrics/stats/_pixelstats.py +2 -1
  39. dataeval/metrics/stats/_visualstats.py +2 -1
  40. dataeval/outputs/_base.py +2 -3
  41. dataeval/outputs/_bias.py +2 -1
  42. dataeval/outputs/_estimators.py +1 -1
  43. dataeval/outputs/_linters.py +3 -3
  44. dataeval/outputs/_stats.py +3 -3
  45. dataeval/outputs/_utils.py +1 -1
  46. dataeval/outputs/_workflows.py +49 -31
  47. dataeval/typing.py +23 -9
  48. dataeval/utils/__init__.py +2 -2
  49. dataeval/utils/_array.py +3 -2
  50. dataeval/utils/_bin.py +9 -7
  51. dataeval/utils/_method.py +2 -3
  52. dataeval/utils/_multiprocessing.py +34 -0
  53. dataeval/utils/_plot.py +2 -1
  54. dataeval/utils/data/__init__.py +6 -5
  55. dataeval/utils/data/{metadata.py → _merge.py} +3 -2
  56. dataeval/utils/data/_validate.py +170 -0
  57. dataeval/utils/data/collate.py +2 -1
  58. dataeval/utils/torch/_internal.py +2 -1
  59. dataeval/utils/torch/trainer.py +1 -1
  60. dataeval/workflows/sufficiency.py +13 -9
  61. {dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/METADATA +8 -21
  62. dataeval-0.88.0.dist-info/RECORD +105 -0
  63. dataeval/utils/data/_dataset.py +0 -246
  64. dataeval/utils/datasets/__init__.py +0 -21
  65. dataeval/utils/datasets/_antiuav.py +0 -189
  66. dataeval/utils/datasets/_base.py +0 -266
  67. dataeval/utils/datasets/_cifar10.py +0 -201
  68. dataeval/utils/datasets/_fileio.py +0 -142
  69. dataeval/utils/datasets/_milco.py +0 -197
  70. dataeval/utils/datasets/_mixin.py +0 -54
  71. dataeval/utils/datasets/_mnist.py +0 -202
  72. dataeval/utils/datasets/_seadrone.py +0 -512
  73. dataeval/utils/datasets/_ships.py +0 -144
  74. dataeval/utils/datasets/_types.py +0 -48
  75. dataeval/utils/datasets/_voc.py +0 -583
  76. dataeval-0.86.9.dist-info/RECORD +0 -115
  77. {dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/WHEEL +0 -0
  78. /dataeval-0.86.9.dist-info/licenses/LICENSE.txt → /dataeval-0.88.0.dist-info/licenses/LICENSE +0 -0
@@ -16,7 +16,7 @@ from dataeval.config import DeviceLike, get_device
16
16
 
17
17
  def get_images_from_batch(batch: Any) -> Any:
18
18
  """Extracts images from a batch of collated data by DataLoader"""
19
- return batch[0] if isinstance(batch, (list, tuple)) else batch
19
+ return batch[0] if isinstance(batch, list | tuple) else batch
20
20
 
21
21
 
22
22
  class AETrainer:
@@ -2,7 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar
5
+ from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
6
+ from typing import Any, Generic, TypeVar
6
7
 
7
8
  import numpy as np
8
9
  import torch
@@ -207,7 +208,9 @@ class Sufficiency(Generic[T]):
207
208
  ... substeps=5,
208
209
  ... )
209
210
  >>> suff.evaluate()
210
- SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
211
+ SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([[1., 1., 1., 1., 1.],
212
+ [1., 1., 1., 1., 1.],
213
+ [1., 1., 1., 1., 1.]])}, averaged_measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
211
214
 
212
215
  Evaluate at a single value
213
216
 
@@ -219,7 +222,7 @@ class Sufficiency(Generic[T]):
219
222
  ... eval_fn=eval_fn,
220
223
  ... )
221
224
  >>> suff.evaluate(eval_at=50)
222
- SufficiencyOutput(steps=array([50]), measures={'test': array([1.])}, n_iter=1000)
225
+ SufficiencyOutput(steps=array([50]), measures={'test': array([[1.]])}, averaged_measures={'test': array([1.])}, n_iter=1000)
223
226
 
224
227
  Evaluating at linear steps from 0-100 inclusive
225
228
 
@@ -231,7 +234,7 @@ class Sufficiency(Generic[T]):
231
234
  ... eval_fn=eval_fn,
232
235
  ... )
233
236
  >>> suff.evaluate(eval_at=np.arange(0, 101, 20))
234
- SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
237
+ SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([[1., 1., 1., 1., 1., 1.]])}, averaged_measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
235
238
 
236
239
  """ # noqa: E501
237
240
  if eval_at is not None:
@@ -249,7 +252,7 @@ class Sufficiency(Generic[T]):
249
252
  measures = {}
250
253
 
251
254
  # Run each model over all indices
252
- for _ in range(self.runs):
255
+ for run in range(self.runs):
253
256
  # Create a randomized set of indices to use
254
257
  indices = np.random.randint(0, self._length, size=self._length)
255
258
  # Reset the network weights to "create" an untrained model
@@ -272,9 +275,10 @@ class Sufficiency(Generic[T]):
272
275
  # Sum result into current substep iteration to be averaged later
273
276
  value = np.array(value).ravel()
274
277
  if name not in measures:
275
- measures[name] = np.zeros(substeps if len(value) == 1 else (substeps, len(value)))
276
- measures[name][iteration] += value
278
+ measures[name] = np.zeros(
279
+ (self.runs, substeps) if len(value) == 1 else (self.runs, substeps, len(value))
280
+ )
277
281
 
282
+ measures[name][run, iteration] = value
278
283
  # The mean for each measure must be calculated before being returned
279
- measures = {k: (v / self.runs).T for k, v in measures.items()}
280
- return SufficiencyOutput(ranges, measures)
284
+ return SufficiencyOutput(ranges, measures=measures)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 0.86.9
3
+ Version: 0.88.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Project-URL: Homepage, https://dataeval.ai/
6
6
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -8,33 +8,29 @@ Project-URL: Documentation, https://dataeval.readthedocs.io/
8
8
  Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
9
9
  Maintainer-email: ARiA <dataeval@ariacoustics.com>
10
10
  License-Expression: MIT
11
- License-File: LICENSE.txt
11
+ License-File: LICENSE
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Science/Research
14
14
  Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Operating System :: OS Independent
16
16
  Classifier: Programming Language :: Python :: 3 :: Only
17
- Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
20
19
  Classifier: Programming Language :: Python :: 3.12
21
20
  Classifier: Topic :: Scientific/Engineering
22
- Requires-Python: <3.13,>=3.9
23
- Requires-Dist: defusedxml>=0.7.1
21
+ Requires-Python: <3.13,>=3.10
24
22
  Requires-Dist: fast-hdbscan==0.2.0
25
23
  Requires-Dist: lightgbm>=4
26
24
  Requires-Dist: numba>=0.59.1
27
25
  Requires-Dist: numpy>=1.24.2
28
26
  Requires-Dist: pandas>=2.0
29
- Requires-Dist: pillow>=10.3.0
30
27
  Requires-Dist: polars>=1.0.0
31
- Requires-Dist: requests>=2.32.3
32
28
  Requires-Dist: scikit-learn>=1.5.0
33
29
  Requires-Dist: scipy>=1.10
34
30
  Requires-Dist: torch>=2.2.0
35
31
  Requires-Dist: torchvision>=0.17.0
36
32
  Requires-Dist: tqdm>=4.66
37
- Requires-Dist: typing-extensions>=4.12; python_version ~= '3.9'
33
+ Requires-Dist: typing-extensions>=4.12
38
34
  Requires-Dist: xxhash>=3.3
39
35
  Provides-Extra: all
40
36
  Requires-Dist: matplotlib>=3.7.1; extra == 'all'
@@ -91,7 +87,7 @@ using MAITE-compliant datasets and models.
91
87
 
92
88
  ## Getting Started
93
89
 
94
- **Python versions:** 3.9 - 3.12
90
+ **Python versions:** 3.10 - 3.12
95
91
 
96
92
  **Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
97
93
 
@@ -123,14 +119,8 @@ micromamba create -f environment\environment.yaml -c pytorch
123
119
 
124
120
  ### **Installing from GitHub**
125
121
 
126
- To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
127
- download larger, binary source files.
128
-
129
- ```bash
130
- sudo apt-get install git-lfs
131
- ```
132
-
133
- Pull the source down and change to the DataEval project directory.
122
+ To install DataEval from source locally on Ubuntu, pull the source down and
123
+ change to the DataEval project directory.
134
124
 
135
125
  ```bash
136
126
  git clone https://github.com/aria-ml/dataeval.git
@@ -167,10 +157,7 @@ source .venv/bin/activate
167
157
 
168
158
  ## Contact Us
169
159
 
170
- If you have any questions, feel free to reach out to the people below:
171
-
172
- - **POC**: Scott Swan @scott.swan
173
- - **DPOC**: Andrew Weng @aweng
160
+ If you have any questions, feel free to reach out to [us](mailto:dataeval@ariacoustics.com)!
174
161
 
175
162
  ## Acknowledgement
176
163
 
@@ -0,0 +1,105 @@
1
+ dataeval/__init__.py,sha256=aFzX3SLx8wgc763RY772P41ZLqeHcUHRKW9XAN0KfHQ,1793
2
+ dataeval/_log.py,sha256=Q2d6oqYKXyn1wkgMdNX9iswod4Jq0jPADShrCFVgJI0,374
3
+ dataeval/_version.py,sha256=p36W3DcVLrkAWnGoljUjU-PF8_IvHjfGbC98bXZ2g_c,513
4
+ dataeval/config.py,sha256=lL73s_xa9pBxHHCnBKi59D_tl4vS7ig1rfWbIYkM_ac,3839
5
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ dataeval/typing.py,sha256=cKpK8rY7iVf-KL9kuye6qi_6LS6hKbMxHpurdWlYY44,7445
7
+ dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
8
+ dataeval/data/_embeddings.py,sha256=BHoiSdt46TblVSglg9Cyrm8iAJJq1Z2jR4wesL0WOf4,14731
9
+ dataeval/data/_images.py,sha256=9q0O5Zurf-5727rMC7DB_i3TtXcL67D9a5GGx5qlov8,3875
10
+ dataeval/data/_metadata.py,sha256=-dhmyX6vvv97S9YHr6roNcbzXequkxMw48PwNDdzZ9I,24163
11
+ dataeval/data/_selection.py,sha256=4qI-GwSdEGiRCyr3kqxr6uOiyRRKsPBRzYHmpgdWLY0,5301
12
+ dataeval/data/_split.py,sha256=aCkXFvkCw8VkWICdCmY9tHiEvkQI5j9jUa7QLjm-gZE,16759
13
+ dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
14
+ dataeval/data/selections/_classbalance.py,sha256=AqExg-QnYBcfBNzS1Ygsz3Cgb2cqcgGXE0-cseD8_vA,1580
15
+ dataeval/data/selections/_classfilter.py,sha256=sn7Lbhb5KJyiqDH7ZdugX5WnRlglwCckWzK6K5uXbwg,4447
16
+ dataeval/data/selections/_indices.py,sha256=PcM4qQwFVUXO9xY2brO6OUDFMeVgiSlTRKFDhTA_W-0,655
17
+ dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
18
+ dataeval/data/selections/_prioritize.py,sha256=ss_GZ5MB2ohdNuB55C69TYNwV3PUSmk715gDJI6qfYA,20140
19
+ dataeval/data/selections/_reverse.py,sha256=FqYlpPg-0Vz75kbEhGFrJlzIGELSmDZxPlBMY18a57I,365
20
+ dataeval/data/selections/_shuffle.py,sha256=uW_Zss773ob2swqwTdL6G-CzMElCq8TO2TScvABQR1U,1268
21
+ dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
22
+ dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
23
+ dataeval/detectors/drift/_base.py,sha256=__mlqkiPW0GcVSVE4u9t6M2mp3rAU5leSk_XPQn_Mp8,7619
24
+ dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
25
+ dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
26
+ dataeval/detectors/drift/_mmd.py,sha256=EkfbeK5L6xGGQrcA1v_0YlpIOingF73jn2H6s3tRKbo,11550
27
+ dataeval/detectors/drift/_mvdc.py,sha256=WMN6aDOWCh1q1MtdRXFIZlFcfnVi4XgBHsS0A6L5UuY,2942
28
+ dataeval/detectors/drift/_uncertainty.py,sha256=-4aiwNosJ1_4kY-d2n4YbZV_jvnf5xdTMDELXSoW6OM,5874
29
+ dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
30
+ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
31
+ dataeval/detectors/drift/_nml/_base.py,sha256=wMqegfa92Tldqix1RL6dLMdiKgX0GqHmTiFxO38ja_c,2672
32
+ dataeval/detectors/drift/_nml/_chunk.py,sha256=5WhpcIHJ3EVBGZjDJLri54fWikYAT_7sC0DxQkSs0tI,13591
33
+ dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
34
+ dataeval/detectors/drift/_nml/_result.py,sha256=mH_tYrYVaIXhsU9gcSFqEdaI38BArlpKuW0-8SPS8aY,3295
35
+ dataeval/detectors/drift/_nml/_thresholds.py,sha256=jAbRdAPP4O4hJqTLpvfVAbWNdw3zL6UrTl2KNWphQPc,12083
36
+ dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
37
+ dataeval/detectors/linters/duplicates.py,sha256=k5cQz_1i9vchugSfC267mugWzgy6sVDa36BdQAy_PXs,4990
38
+ dataeval/detectors/linters/outliers.py,sha256=a980lDV9g_tZYHV9k6wSh2d11nNYEqTy56IduC-H5GA,10159
39
+ dataeval/detectors/ood/__init__.py,sha256=qDoDdQetJY1xZB43dNzcOIO_8NiEuEU0z1QNU4QkEXs,341
40
+ dataeval/detectors/ood/ae.py,sha256=jo6aHcKT1N13ew0tV6FZa3vQI5DQivZo5-uAm4uLaAs,2950
41
+ dataeval/detectors/ood/base.py,sha256=eWzODq2i1Tah7Mqm1guASTf9p2tF4Tr6mZoDT3pDvsk,4401
42
+ dataeval/detectors/ood/knn.py,sha256=Fu77geQFHPYNOn81VIXUJ3yC3t5Ylv0ZgvwMeA2JX6I,3782
43
+ dataeval/detectors/ood/mixin.py,sha256=cNmRrR9cv9phwAGSuQMC7EhmrFtf68C63wdTggy1UaU,5458
44
+ dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
45
+ dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
46
+ dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
47
+ dataeval/metadata/_utils.py,sha256=4fX-1eA3fK4uwNh_DfOGiXxl4PHZ1AghOejJ03rV3RI,1219
48
+ dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
49
+ dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
50
+ dataeval/metrics/bias/_balance.py,sha256=aDAII2lXeAz9dZk0TdgZHtyab1tObQlya8jdNNdn8eI,5718
51
+ dataeval/metrics/bias/_completeness.py,sha256=2cvOXe7fhtxZGH_4QBuiCafIeamxFBarMiUBuEP7QGI,4596
52
+ dataeval/metrics/bias/_coverage.py,sha256=v2x2hbOf2za9jFcSVSJUAoJ2BJfzzlCzt0mFIGtBL0A,3639
53
+ dataeval/metrics/bias/_diversity.py,sha256=Z7UQzKp9bsmB-hC3_sY6HIJUJRkLHb5cVEoU79cNDzc,5800
54
+ dataeval/metrics/bias/_parity.py,sha256=ZIKc5OK6wQ4moleBJzGDfOPvyNzj03-KoHAGBZnO4pk,11433
55
+ dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
56
+ dataeval/metrics/estimators/_ber.py,sha256=7noeRyOJJYqrJ_jt90nRHtR2t2u5MIvTCmWt0_rd4EU,5370
57
+ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
58
+ dataeval/metrics/estimators/_divergence.py,sha256=t-Z_7Bq4V4FunxKlq7G4ThtgLany8n4iEU0n0afr7F8,3991
59
+ dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
60
+ dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
61
+ dataeval/metrics/stats/_base.py,sha256=vE8dvrNqjAKGyCzqlgQa-3ArP6PJ-P8Y4rdNPUZ0ml8,11703
62
+ dataeval/metrics/stats/_boxratiostats.py,sha256=CFn-BqnPmAXagaLlhJGusdGLQewWxRweb9Xxv_JAOaw,6477
63
+ dataeval/metrics/stats/_dimensionstats.py,sha256=GlzshH7nZurVWANmZmpuXy_v5ZfMrdAfO_FbtHTL38Q,2903
64
+ dataeval/metrics/stats/_hashstats.py,sha256=wsy8F8-UMUbtjeCnaqAR9Yxv_jp4kFerHH2L0UMIAgY,5415
65
+ dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
66
+ dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
67
+ dataeval/metrics/stats/_pixelstats.py,sha256=XEFByxMUbNaCvEsnVhH5ewJ8UH253ySdpFe5u1jr38w,3339
68
+ dataeval/metrics/stats/_visualstats.py,sha256=SbXvNWxfKrw-2wCu5FXMsnpsMUVaQzdJkj6RB4qEsBM,3740
69
+ dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
70
+ dataeval/outputs/_base.py,sha256=lVC7xmBgv3JYY2wVLaGBMPlkRE_KV9UloaeQn0nQydA,5875
71
+ dataeval/outputs/_bias.py,sha256=gj2AgSKOdq6bj59RMiHpha4Skld6ZMB8cW5KesOZ6T4,10483
72
+ dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
73
+ dataeval/outputs/_estimators.py,sha256=SUjur5jI6OU9C7GpsAuA_qqO1PRnS-8eZN-otsaV5q0,3120
74
+ dataeval/outputs/_linters.py,sha256=N4nP5HMoeN2zLndWzhoIT5QB1Ujxbs8Gx5pWPKhl3yc,6683
75
+ dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
76
+ dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
77
+ dataeval/outputs/_stats.py,sha256=PsDV0uw41aTy-X9tjz-PqOj78TTnH4JQVpOrU3OThAE,17423
78
+ dataeval/outputs/_utils.py,sha256=KJ1P8tcMFIkGi2A6VfqbZwLcT1cD0c2YssTbWbHALjE,938
79
+ dataeval/outputs/_workflows.py,sha256=sw13FNx1vANX7DBsKeOLfP2bkp5r6SexBorfb9dxYxU,12160
80
+ dataeval/utils/__init__.py,sha256=sjelzMPaTImF6isiRcp8UGDE3tppEpWS5GoR8WKPZ1k,242
81
+ dataeval/utils/_array.py,sha256=P4_gyH3kkksUJm9Vqx-oPtLWxFmqMacUJzhj0vmrUd8,6361
82
+ dataeval/utils/_bin.py,sha256=QjlRCB5mOauETdxSbvRxRG17riO6gScsMd_lNnnvqxs,7391
83
+ dataeval/utils/_clusterer.py,sha256=rUvEdyMwp95lffmt6xKMEwsjRXNoBS0n5mAS_HNOnck,5656
84
+ dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8061
85
+ dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
86
+ dataeval/utils/_method.py,sha256=53Q3xfQvpyGa-z9_rn6GhjfCcUR5Q9nuWQtCNav4Ftc,391
87
+ dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
88
+ dataeval/utils/_multiprocessing.py,sha256=n6qCVybheWry42NCdxzcsgkJ9xLGkz8m12SWgviTJQM,1060
89
+ dataeval/utils/_plot.py,sha256=kwYZbSdHCV02wRrZDTxR3xd0XfQ-6TlLmfvubwiEQcw,7252
90
+ dataeval/utils/data/__init__.py,sha256=_XoNxADxBdR96Ca-0RgkDlQa6C2FHld1nwts-Xdif3g,294
91
+ dataeval/utils/data/_merge.py,sha256=9JKtlpBarMX_9jlhnQg1AmBwTe9I2w6xQkFGss3IkkU,14729
92
+ dataeval/utils/data/_validate.py,sha256=YH5Q6uzcTRdf_AMKMRyYW37RUlXm-S8ddhw6cegdNkc,6950
93
+ dataeval/utils/data/collate.py,sha256=AWoQ2k9FXyTeq6ExTsGa6sBML_lZm9p38-DN9hnpm8E,3963
94
+ dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
95
+ dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
96
+ dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
97
+ dataeval/utils/torch/_internal.py,sha256=LiuqZGIzKewp_29_Lskj0mnNqdMffMheMdgGeXLDI5g,4173
98
+ dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
99
+ dataeval/utils/torch/trainer.py,sha256=kBdgxd9TL1Pvz-dyZbS__POAKeFrDiQ4vKFh8ltJApc,5543
100
+ dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
101
+ dataeval/workflows/sufficiency.py,sha256=4DTDaYyEuAfO0LTFpQGXXXayV5aCIbziSL2Rddd1vQ0,10360
102
+ dataeval-0.88.0.dist-info/METADATA,sha256=Y5NRZgrhfpyGQKHUnqnO6rAItVR3oWUqIp646_0xluQ,5601
103
+ dataeval-0.88.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
104
+ dataeval-0.88.0.dist-info/licenses/LICENSE,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
105
+ dataeval-0.88.0.dist-info/RECORD,,
@@ -1,246 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = []
4
-
5
- from typing import Any, Generic, Iterable, Literal, Sequence, SupportsFloat, SupportsInt, TypeVar, cast
6
-
7
- from dataeval.typing import (
8
- Array,
9
- ArrayLike,
10
- DatasetMetadata,
11
- ImageClassificationDataset,
12
- ObjectDetectionDataset,
13
- )
14
- from dataeval.utils._array import as_numpy
15
-
16
-
17
- def _validate_data(
18
- datum_type: Literal["ic", "od"],
19
- images: Array | Sequence[Array],
20
- labels: Array | Sequence[int] | Sequence[Array] | Sequence[Sequence[int]],
21
- bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]] | None,
22
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
23
- ) -> None:
24
- # Validate inputs
25
- dataset_len = len(images)
26
-
27
- if not isinstance(images, (Sequence, Array)) or len(images[0].shape) != 3:
28
- raise ValueError("Images must be a sequence or array of 3 dimensional arrays (H, W, C).")
29
- if len(labels) != dataset_len:
30
- raise ValueError(f"Number of labels ({len(labels)}) does not match number of images ({dataset_len}).")
31
- if bboxes is not None and len(bboxes) != dataset_len:
32
- raise ValueError(f"Number of bboxes ({len(bboxes)}) does not match number of images ({dataset_len}).")
33
- if metadata is not None and (
34
- len(metadata) != dataset_len
35
- if isinstance(metadata, Sequence)
36
- else any(
37
- not isinstance(metadatum, Sequence) or len(metadatum) != dataset_len for metadatum in metadata.values()
38
- )
39
- ):
40
- raise ValueError(f"Number of metadata ({len(metadata)}) does not match number of images ({dataset_len}).")
41
-
42
- if datum_type == "ic":
43
- if not isinstance(labels, (Sequence, Array)) or not isinstance(labels[0], (int, SupportsInt)):
44
- raise TypeError("Labels must be a sequence of integers for image classification.")
45
- elif datum_type == "od":
46
- if (
47
- not isinstance(labels, (Sequence, Array))
48
- or not isinstance(labels[0], (Sequence, Array))
49
- or not isinstance(cast(Sequence[Any], labels[0])[0], (int, SupportsInt))
50
- ):
51
- raise TypeError("Labels must be a sequence of sequences of integers for object detection.")
52
- if (
53
- bboxes is None
54
- or not isinstance(bboxes, (Sequence, Array))
55
- or not isinstance(bboxes[0], (Sequence, Array))
56
- or not isinstance(bboxes[0][0], (Sequence, Array))
57
- or not isinstance(bboxes[0][0][0], (float, SupportsFloat))
58
- or not len(bboxes[0][0]) == 4
59
- ):
60
- raise TypeError("Boxes must be a sequence of sequences of (x0, y0, x1, y1) for object detection.")
61
- else:
62
- raise ValueError(f"Unknown datum type '{datum_type}'. Must be 'ic' or 'od'.")
63
-
64
-
65
- def _listify_metadata(
66
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
67
- ) -> Sequence[dict[str, Any]] | None:
68
- if isinstance(metadata, dict):
69
- return [{k: v[i] for k, v in metadata.items()} for i in range(len(next(iter(metadata.values()))))]
70
- return metadata
71
-
72
-
73
- def _find_max(arr: ArrayLike) -> Any:
74
- if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
75
- nested = [x for x in [_find_max(x) for x in arr] if x is not None]
76
- return max(nested) if len(nested) > 0 else None
77
- return arr
78
-
79
-
80
- _TLabels = TypeVar("_TLabels", Sequence[int], Sequence[Sequence[int]])
81
-
82
-
83
- class BaseAnnotatedDataset(Generic[_TLabels]):
84
- def __init__(
85
- self,
86
- datum_type: Literal["ic", "od"],
87
- images: Array | Sequence[Array],
88
- labels: _TLabels,
89
- metadata: Sequence[dict[str, Any]] | None,
90
- classes: Sequence[str] | None,
91
- name: str | None = None,
92
- ) -> None:
93
- self._classes = classes if classes is not None else [str(i) for i in range(_find_max(labels) + 1)]
94
- self._index2label = dict(enumerate(self._classes))
95
- self._images = images
96
- self._labels = labels
97
- self._metadata = metadata
98
- self._id = name or f"{len(self._images)}_image_{len(self._index2label)}_class_{datum_type}_dataset"
99
-
100
- @property
101
- def metadata(self) -> DatasetMetadata:
102
- return DatasetMetadata(id=self._id, index2label=self._index2label)
103
-
104
- def __len__(self) -> int:
105
- return len(self._images)
106
-
107
-
108
- class CustomImageClassificationDataset(BaseAnnotatedDataset[Sequence[int]], ImageClassificationDataset):
109
- def __init__(
110
- self,
111
- images: Array | Sequence[Array],
112
- labels: Array | Sequence[int],
113
- metadata: Sequence[dict[str, Any]] | None,
114
- classes: Sequence[str] | None,
115
- name: str | None = None,
116
- ) -> None:
117
- super().__init__(
118
- "ic", images, as_numpy(labels).tolist() if isinstance(labels, Array) else labels, metadata, classes
119
- )
120
- if name is not None:
121
- self.__name__ = name
122
- self.__class__.__name__ = name
123
- self.__class__.__qualname__ = name
124
-
125
- def __getitem__(self, idx: int, /) -> tuple[Array, Array, dict[str, Any]]:
126
- one_hot = [0.0] * len(self._index2label)
127
- one_hot[self._labels[idx]] = 1.0
128
- return (
129
- self._images[idx],
130
- as_numpy(one_hot),
131
- self._metadata[idx] if self._metadata is not None else {},
132
- )
133
-
134
-
135
- class CustomObjectDetectionDataset(BaseAnnotatedDataset[Sequence[Sequence[int]]], ObjectDetectionDataset):
136
- class ObjectDetectionTarget:
137
- def __init__(self, labels: Sequence[int], bboxes: Sequence[Sequence[float]]) -> None:
138
- self._labels = labels
139
- self._bboxes = bboxes
140
- self._scores = [1.0] * len(labels)
141
-
142
- @property
143
- def labels(self) -> Sequence[int]:
144
- return self._labels
145
-
146
- @property
147
- def boxes(self) -> Sequence[Sequence[float]]:
148
- return self._bboxes
149
-
150
- @property
151
- def scores(self) -> Sequence[float]:
152
- return self._scores
153
-
154
- def __init__(
155
- self,
156
- images: Array | Sequence[Array],
157
- labels: Array | Sequence[Array] | Sequence[Sequence[int]],
158
- bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]],
159
- metadata: Sequence[dict[str, Any]] | None,
160
- classes: Sequence[str] | None,
161
- name: str | None = None,
162
- ) -> None:
163
- super().__init__(
164
- "od",
165
- images,
166
- [as_numpy(label).tolist() if isinstance(label, Array) else label for label in labels],
167
- metadata,
168
- classes,
169
- )
170
- if name is not None:
171
- self.__name__ = name
172
- self.__class__.__name__ = name
173
- self.__class__.__qualname__ = name
174
- self._bboxes = [[as_numpy(box).tolist() if isinstance(box, Array) else box for box in bbox] for bbox in bboxes]
175
-
176
- @property
177
- def metadata(self) -> DatasetMetadata:
178
- return DatasetMetadata(id=self._id, index2label=self._index2label)
179
-
180
- def __getitem__(self, idx: int, /) -> tuple[Array, ObjectDetectionTarget, dict[str, Any]]:
181
- return (
182
- self._images[idx],
183
- self.ObjectDetectionTarget(self._labels[idx], self._bboxes[idx]),
184
- self._metadata[idx] if self._metadata is not None else {},
185
- )
186
-
187
-
188
- def to_image_classification_dataset(
189
- images: Array | Sequence[Array],
190
- labels: Array | Sequence[int],
191
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
192
- classes: Sequence[str] | None,
193
- name: str | None = None,
194
- ) -> ImageClassificationDataset:
195
- """
196
- Helper function to create custom ImageClassificationDataset classes.
197
-
198
- Parameters
199
- ----------
200
- images : Array | Sequence[Array]
201
- The images to use in the dataset.
202
- labels : Array | Sequence[int]
203
- The labels to use in the dataset.
204
- metadata : Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None
205
- The metadata to use in the dataset.
206
- classes : Sequence[str] | None
207
- The classes to use in the dataset.
208
-
209
- Returns
210
- -------
211
- ImageClassificationDataset
212
- """
213
- _validate_data("ic", images, labels, None, metadata)
214
- return CustomImageClassificationDataset(images, labels, _listify_metadata(metadata), classes, name)
215
-
216
-
217
- def to_object_detection_dataset(
218
- images: Array | Sequence[Array],
219
- labels: Array | Sequence[Array] | Sequence[Sequence[int]],
220
- bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]],
221
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
222
- classes: Sequence[str] | None,
223
- name: str | None = None,
224
- ) -> ObjectDetectionDataset:
225
- """
226
- Helper function to create custom ObjectDetectionDataset classes.
227
-
228
- Parameters
229
- ----------
230
- images : Array | Sequence[Array]
231
- The images to use in the dataset.
232
- labels : Array | Sequence[Array] | Sequence[Sequence[int]]
233
- The labels to use in the dataset.
234
- bboxes : Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]]
235
- The bounding boxes (x0,y0,x1,y0) to use in the dataset.
236
- metadata : Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None
237
- The metadata to use in the dataset.
238
- classes : Sequence[str] | None
239
- The classes to use in the dataset.
240
-
241
- Returns
242
- -------
243
- ObjectDetectionDataset
244
- """
245
- _validate_data("od", images, labels, bboxes, metadata)
246
- return CustomObjectDetectionDataset(images, labels, bboxes, _listify_metadata(metadata), classes, name)
@@ -1,21 +0,0 @@
1
- """Provides access to common Computer Vision datasets."""
2
-
3
- from dataeval.utils.datasets._antiuav import AntiUAVDetection
4
- from dataeval.utils.datasets._cifar10 import CIFAR10
5
- from dataeval.utils.datasets._milco import MILCO
6
- from dataeval.utils.datasets._mnist import MNIST
7
- from dataeval.utils.datasets._seadrone import SeaDrone
8
- from dataeval.utils.datasets._ships import Ships
9
- from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
10
-
11
- __all__ = [
12
- "MNIST",
13
- "Ships",
14
- "CIFAR10",
15
- "AntiUAVDetection",
16
- "MILCO",
17
- "SeaDrone",
18
- "VOCDetection",
19
- "VOCDetectionTorch",
20
- "VOCSegmentation",
21
- ]