dataeval 0.87.0__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. dataeval/_log.py +1 -1
  2. dataeval/_version.py +2 -2
  3. dataeval/data/_embeddings.py +78 -35
  4. dataeval/data/_images.py +41 -8
  5. dataeval/data/_metadata.py +294 -41
  6. dataeval/data/_selection.py +22 -7
  7. dataeval/data/_split.py +2 -1
  8. dataeval/data/selections/_classfilter.py +4 -3
  9. dataeval/data/selections/_indices.py +2 -1
  10. dataeval/data/selections/_shuffle.py +3 -2
  11. dataeval/detectors/drift/_base.py +2 -1
  12. dataeval/detectors/drift/_mmd.py +2 -1
  13. dataeval/detectors/drift/_nml/_base.py +1 -1
  14. dataeval/detectors/drift/_nml/_chunk.py +2 -1
  15. dataeval/detectors/drift/_nml/_result.py +3 -2
  16. dataeval/detectors/drift/_nml/_thresholds.py +6 -5
  17. dataeval/detectors/drift/_uncertainty.py +2 -1
  18. dataeval/detectors/linters/duplicates.py +2 -1
  19. dataeval/detectors/linters/outliers.py +4 -3
  20. dataeval/detectors/ood/ae.py +1 -1
  21. dataeval/detectors/ood/base.py +2 -1
  22. dataeval/detectors/ood/mixin.py +2 -1
  23. dataeval/metadata/_utils.py +1 -1
  24. dataeval/metrics/bias/_balance.py +1 -1
  25. dataeval/metrics/stats/_base.py +3 -29
  26. dataeval/metrics/stats/_boxratiostats.py +2 -1
  27. dataeval/metrics/stats/_dimensionstats.py +2 -1
  28. dataeval/metrics/stats/_hashstats.py +2 -1
  29. dataeval/metrics/stats/_pixelstats.py +2 -1
  30. dataeval/metrics/stats/_visualstats.py +2 -1
  31. dataeval/outputs/_base.py +2 -3
  32. dataeval/outputs/_bias.py +2 -1
  33. dataeval/outputs/_estimators.py +1 -1
  34. dataeval/outputs/_linters.py +3 -3
  35. dataeval/outputs/_stats.py +3 -3
  36. dataeval/outputs/_utils.py +1 -1
  37. dataeval/outputs/_workflows.py +29 -24
  38. dataeval/typing.py +11 -9
  39. dataeval/utils/_array.py +3 -2
  40. dataeval/utils/_bin.py +2 -1
  41. dataeval/utils/_method.py +2 -3
  42. dataeval/utils/_multiprocessing.py +34 -0
  43. dataeval/utils/_plot.py +2 -1
  44. dataeval/utils/data/__init__.py +4 -5
  45. dataeval/utils/data/{metadata.py → _merge.py} +3 -2
  46. dataeval/utils/data/_validate.py +2 -1
  47. dataeval/utils/data/collate.py +2 -1
  48. dataeval/utils/torch/_internal.py +2 -1
  49. dataeval/utils/torch/trainer.py +1 -1
  50. dataeval/workflows/sufficiency.py +13 -9
  51. {dataeval-0.87.0.dist-info → dataeval-0.88.0.dist-info}/METADATA +4 -5
  52. dataeval-0.88.0.dist-info/RECORD +105 -0
  53. dataeval/utils/data/_dataset.py +0 -253
  54. dataeval-0.87.0.dist-info/RECORD +0 -105
  55. {dataeval-0.87.0.dist-info → dataeval-0.88.0.dist-info}/WHEEL +0 -0
  56. {dataeval-0.87.0.dist-info → dataeval-0.88.0.dist-info}/licenses/LICENSE +0 -0
dataeval/utils/_plot.py CHANGED
@@ -4,7 +4,8 @@ __all__ = []
4
4
 
5
5
  import contextlib
6
6
  import math
7
- from typing import Any, Mapping, Sequence
7
+ from collections.abc import Mapping, Sequence
8
+ from typing import Any
8
9
 
9
10
  import numpy as np
10
11
 
@@ -1,13 +1,12 @@
1
1
  """Provides access to common Computer Vision datasets."""
2
2
 
3
- from dataeval.utils.data import collate, metadata
4
- from dataeval.utils.data._dataset import to_image_classification_dataset, to_object_detection_dataset
3
+ from dataeval.utils.data import collate
4
+ from dataeval.utils.data._merge import flatten, merge
5
5
  from dataeval.utils.data._validate import validate_dataset
6
6
 
7
7
  __all__ = [
8
8
  "collate",
9
- "metadata",
10
- "to_image_classification_dataset",
11
- "to_object_detection_dataset",
9
+ "flatten",
10
+ "merge",
12
11
  "validate_dataset",
13
12
  ]
@@ -7,8 +7,9 @@ from __future__ import annotations
7
7
  __all__ = ["merge", "flatten"]
8
8
 
9
9
  import warnings
10
+ from collections.abc import Iterable, Mapping, Sequence
10
11
  from enum import Enum
11
- from typing import Any, Iterable, Literal, Mapping, Sequence, overload
12
+ from typing import Any, Literal, overload
12
13
 
13
14
  import numpy as np
14
15
  from numpy.typing import NDArray
@@ -132,7 +133,7 @@ def _flatten_dict_inner(
132
133
  if isinstance(v, dict):
133
134
  fd, size = _flatten_dict_inner(v, dropped, new_keys, size=size, nested=nested)
134
135
  items.update(fd)
135
- elif isinstance(v, (list, tuple)):
136
+ elif isinstance(v, list | tuple):
136
137
  if nested:
137
138
  dropped.setdefault(parent_keys + (k,), set()).add(DropReason.NESTED_LIST)
138
139
  elif size is not None and size != len(v):
@@ -2,7 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- from typing import Any, Literal, Sequence, Sized
5
+ from collections.abc import Sequence, Sized
6
+ from typing import Any, Literal
6
7
 
7
8
  from dataeval.config import EPSILON
8
9
  from dataeval.typing import Array, ObjectDetectionTarget
@@ -6,7 +6,8 @@ from __future__ import annotations
6
6
 
7
7
  __all__ = ["list_collate_fn", "numpy_collate_fn", "torch_collate_fn"]
8
8
 
9
- from typing import Any, Iterable, Sequence, TypeVar
9
+ from collections.abc import Iterable, Sequence
10
+ from typing import Any, TypeVar
10
11
 
11
12
  import numpy as np
12
13
  import torch
@@ -2,7 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- from typing import Any, Callable
5
+ from collections.abc import Callable
6
+ from typing import Any
6
7
 
7
8
  import numpy as np
8
9
  import torch
@@ -16,7 +16,7 @@ from dataeval.config import DeviceLike, get_device
16
16
 
17
17
  def get_images_from_batch(batch: Any) -> Any:
18
18
  """Extracts images from a batch of collated data by DataLoader"""
19
- return batch[0] if isinstance(batch, (list, tuple)) else batch
19
+ return batch[0] if isinstance(batch, list | tuple) else batch
20
20
 
21
21
 
22
22
  class AETrainer:
@@ -2,7 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar
5
+ from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
6
+ from typing import Any, Generic, TypeVar
6
7
 
7
8
  import numpy as np
8
9
  import torch
@@ -207,7 +208,9 @@ class Sufficiency(Generic[T]):
207
208
  ... substeps=5,
208
209
  ... )
209
210
  >>> suff.evaluate()
210
- SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
211
+ SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), measures={'test': array([[1., 1., 1., 1., 1.],
212
+ [1., 1., 1., 1., 1.],
213
+ [1., 1., 1., 1., 1.]])}, averaged_measures={'test': array([1., 1., 1., 1., 1.])}, n_iter=1000)
211
214
 
212
215
  Evaluate at a single value
213
216
 
@@ -219,7 +222,7 @@ class Sufficiency(Generic[T]):
219
222
  ... eval_fn=eval_fn,
220
223
  ... )
221
224
  >>> suff.evaluate(eval_at=50)
222
- SufficiencyOutput(steps=array([50]), measures={'test': array([1.])}, n_iter=1000)
225
+ SufficiencyOutput(steps=array([50]), measures={'test': array([[1.]])}, averaged_measures={'test': array([1.])}, n_iter=1000)
223
226
 
224
227
  Evaluating at linear steps from 0-100 inclusive
225
228
 
@@ -231,7 +234,7 @@ class Sufficiency(Generic[T]):
231
234
  ... eval_fn=eval_fn,
232
235
  ... )
233
236
  >>> suff.evaluate(eval_at=np.arange(0, 101, 20))
234
- SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
237
+ SufficiencyOutput(steps=array([ 0, 20, 40, 60, 80, 100]), measures={'test': array([[1., 1., 1., 1., 1., 1.]])}, averaged_measures={'test': array([1., 1., 1., 1., 1., 1.])}, n_iter=1000)
235
238
 
236
239
  """ # noqa: E501
237
240
  if eval_at is not None:
@@ -249,7 +252,7 @@ class Sufficiency(Generic[T]):
249
252
  measures = {}
250
253
 
251
254
  # Run each model over all indices
252
- for _ in range(self.runs):
255
+ for run in range(self.runs):
253
256
  # Create a randomized set of indices to use
254
257
  indices = np.random.randint(0, self._length, size=self._length)
255
258
  # Reset the network weights to "create" an untrained model
@@ -272,9 +275,10 @@ class Sufficiency(Generic[T]):
272
275
  # Sum result into current substep iteration to be averaged later
273
276
  value = np.array(value).ravel()
274
277
  if name not in measures:
275
- measures[name] = np.zeros(substeps if len(value) == 1 else (substeps, len(value)))
276
- measures[name][iteration] += value
278
+ measures[name] = np.zeros(
279
+ (self.runs, substeps) if len(value) == 1 else (self.runs, substeps, len(value))
280
+ )
277
281
 
282
+ measures[name][run, iteration] = value
278
283
  # The mean for each measure must be calculated before being returned
279
- measures = {k: (v / self.runs).T for k, v in measures.items()}
280
- return SufficiencyOutput(ranges, measures)
284
+ return SufficiencyOutput(ranges, measures=measures)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 0.87.0
3
+ Version: 0.88.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Project-URL: Homepage, https://dataeval.ai/
6
6
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -14,12 +14,11 @@ Classifier: Intended Audience :: Science/Research
14
14
  Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Operating System :: OS Independent
16
16
  Classifier: Programming Language :: Python :: 3 :: Only
17
- Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
20
19
  Classifier: Programming Language :: Python :: 3.12
21
20
  Classifier: Topic :: Scientific/Engineering
22
- Requires-Python: <3.13,>=3.9
21
+ Requires-Python: <3.13,>=3.10
23
22
  Requires-Dist: fast-hdbscan==0.2.0
24
23
  Requires-Dist: lightgbm>=4
25
24
  Requires-Dist: numba>=0.59.1
@@ -31,7 +30,7 @@ Requires-Dist: scipy>=1.10
31
30
  Requires-Dist: torch>=2.2.0
32
31
  Requires-Dist: torchvision>=0.17.0
33
32
  Requires-Dist: tqdm>=4.66
34
- Requires-Dist: typing-extensions>=4.12; python_version ~= '3.9'
33
+ Requires-Dist: typing-extensions>=4.12
35
34
  Requires-Dist: xxhash>=3.3
36
35
  Provides-Extra: all
37
36
  Requires-Dist: matplotlib>=3.7.1; extra == 'all'
@@ -88,7 +87,7 @@ using MAITE-compliant datasets and models.
88
87
 
89
88
  ## Getting Started
90
89
 
91
- **Python versions:** 3.9 - 3.12
90
+ **Python versions:** 3.10 - 3.12
92
91
 
93
92
  **Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
94
93
 
@@ -0,0 +1,105 @@
1
+ dataeval/__init__.py,sha256=aFzX3SLx8wgc763RY772P41ZLqeHcUHRKW9XAN0KfHQ,1793
2
+ dataeval/_log.py,sha256=Q2d6oqYKXyn1wkgMdNX9iswod4Jq0jPADShrCFVgJI0,374
3
+ dataeval/_version.py,sha256=p36W3DcVLrkAWnGoljUjU-PF8_IvHjfGbC98bXZ2g_c,513
4
+ dataeval/config.py,sha256=lL73s_xa9pBxHHCnBKi59D_tl4vS7ig1rfWbIYkM_ac,3839
5
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ dataeval/typing.py,sha256=cKpK8rY7iVf-KL9kuye6qi_6LS6hKbMxHpurdWlYY44,7445
7
+ dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
8
+ dataeval/data/_embeddings.py,sha256=BHoiSdt46TblVSglg9Cyrm8iAJJq1Z2jR4wesL0WOf4,14731
9
+ dataeval/data/_images.py,sha256=9q0O5Zurf-5727rMC7DB_i3TtXcL67D9a5GGx5qlov8,3875
10
+ dataeval/data/_metadata.py,sha256=-dhmyX6vvv97S9YHr6roNcbzXequkxMw48PwNDdzZ9I,24163
11
+ dataeval/data/_selection.py,sha256=4qI-GwSdEGiRCyr3kqxr6uOiyRRKsPBRzYHmpgdWLY0,5301
12
+ dataeval/data/_split.py,sha256=aCkXFvkCw8VkWICdCmY9tHiEvkQI5j9jUa7QLjm-gZE,16759
13
+ dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
14
+ dataeval/data/selections/_classbalance.py,sha256=AqExg-QnYBcfBNzS1Ygsz3Cgb2cqcgGXE0-cseD8_vA,1580
15
+ dataeval/data/selections/_classfilter.py,sha256=sn7Lbhb5KJyiqDH7ZdugX5WnRlglwCckWzK6K5uXbwg,4447
16
+ dataeval/data/selections/_indices.py,sha256=PcM4qQwFVUXO9xY2brO6OUDFMeVgiSlTRKFDhTA_W-0,655
17
+ dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
18
+ dataeval/data/selections/_prioritize.py,sha256=ss_GZ5MB2ohdNuB55C69TYNwV3PUSmk715gDJI6qfYA,20140
19
+ dataeval/data/selections/_reverse.py,sha256=FqYlpPg-0Vz75kbEhGFrJlzIGELSmDZxPlBMY18a57I,365
20
+ dataeval/data/selections/_shuffle.py,sha256=uW_Zss773ob2swqwTdL6G-CzMElCq8TO2TScvABQR1U,1268
21
+ dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
22
+ dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
23
+ dataeval/detectors/drift/_base.py,sha256=__mlqkiPW0GcVSVE4u9t6M2mp3rAU5leSk_XPQn_Mp8,7619
24
+ dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
25
+ dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
26
+ dataeval/detectors/drift/_mmd.py,sha256=EkfbeK5L6xGGQrcA1v_0YlpIOingF73jn2H6s3tRKbo,11550
27
+ dataeval/detectors/drift/_mvdc.py,sha256=WMN6aDOWCh1q1MtdRXFIZlFcfnVi4XgBHsS0A6L5UuY,2942
28
+ dataeval/detectors/drift/_uncertainty.py,sha256=-4aiwNosJ1_4kY-d2n4YbZV_jvnf5xdTMDELXSoW6OM,5874
29
+ dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
30
+ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
31
+ dataeval/detectors/drift/_nml/_base.py,sha256=wMqegfa92Tldqix1RL6dLMdiKgX0GqHmTiFxO38ja_c,2672
32
+ dataeval/detectors/drift/_nml/_chunk.py,sha256=5WhpcIHJ3EVBGZjDJLri54fWikYAT_7sC0DxQkSs0tI,13591
33
+ dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
34
+ dataeval/detectors/drift/_nml/_result.py,sha256=mH_tYrYVaIXhsU9gcSFqEdaI38BArlpKuW0-8SPS8aY,3295
35
+ dataeval/detectors/drift/_nml/_thresholds.py,sha256=jAbRdAPP4O4hJqTLpvfVAbWNdw3zL6UrTl2KNWphQPc,12083
36
+ dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
37
+ dataeval/detectors/linters/duplicates.py,sha256=k5cQz_1i9vchugSfC267mugWzgy6sVDa36BdQAy_PXs,4990
38
+ dataeval/detectors/linters/outliers.py,sha256=a980lDV9g_tZYHV9k6wSh2d11nNYEqTy56IduC-H5GA,10159
39
+ dataeval/detectors/ood/__init__.py,sha256=qDoDdQetJY1xZB43dNzcOIO_8NiEuEU0z1QNU4QkEXs,341
40
+ dataeval/detectors/ood/ae.py,sha256=jo6aHcKT1N13ew0tV6FZa3vQI5DQivZo5-uAm4uLaAs,2950
41
+ dataeval/detectors/ood/base.py,sha256=eWzODq2i1Tah7Mqm1guASTf9p2tF4Tr6mZoDT3pDvsk,4401
42
+ dataeval/detectors/ood/knn.py,sha256=Fu77geQFHPYNOn81VIXUJ3yC3t5Ylv0ZgvwMeA2JX6I,3782
43
+ dataeval/detectors/ood/mixin.py,sha256=cNmRrR9cv9phwAGSuQMC7EhmrFtf68C63wdTggy1UaU,5458
44
+ dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
45
+ dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
46
+ dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
47
+ dataeval/metadata/_utils.py,sha256=4fX-1eA3fK4uwNh_DfOGiXxl4PHZ1AghOejJ03rV3RI,1219
48
+ dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
49
+ dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
50
+ dataeval/metrics/bias/_balance.py,sha256=aDAII2lXeAz9dZk0TdgZHtyab1tObQlya8jdNNdn8eI,5718
51
+ dataeval/metrics/bias/_completeness.py,sha256=2cvOXe7fhtxZGH_4QBuiCafIeamxFBarMiUBuEP7QGI,4596
52
+ dataeval/metrics/bias/_coverage.py,sha256=v2x2hbOf2za9jFcSVSJUAoJ2BJfzzlCzt0mFIGtBL0A,3639
53
+ dataeval/metrics/bias/_diversity.py,sha256=Z7UQzKp9bsmB-hC3_sY6HIJUJRkLHb5cVEoU79cNDzc,5800
54
+ dataeval/metrics/bias/_parity.py,sha256=ZIKc5OK6wQ4moleBJzGDfOPvyNzj03-KoHAGBZnO4pk,11433
55
+ dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
56
+ dataeval/metrics/estimators/_ber.py,sha256=7noeRyOJJYqrJ_jt90nRHtR2t2u5MIvTCmWt0_rd4EU,5370
57
+ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
58
+ dataeval/metrics/estimators/_divergence.py,sha256=t-Z_7Bq4V4FunxKlq7G4ThtgLany8n4iEU0n0afr7F8,3991
59
+ dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
60
+ dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
61
+ dataeval/metrics/stats/_base.py,sha256=vE8dvrNqjAKGyCzqlgQa-3ArP6PJ-P8Y4rdNPUZ0ml8,11703
62
+ dataeval/metrics/stats/_boxratiostats.py,sha256=CFn-BqnPmAXagaLlhJGusdGLQewWxRweb9Xxv_JAOaw,6477
63
+ dataeval/metrics/stats/_dimensionstats.py,sha256=GlzshH7nZurVWANmZmpuXy_v5ZfMrdAfO_FbtHTL38Q,2903
64
+ dataeval/metrics/stats/_hashstats.py,sha256=wsy8F8-UMUbtjeCnaqAR9Yxv_jp4kFerHH2L0UMIAgY,5415
65
+ dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
66
+ dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
67
+ dataeval/metrics/stats/_pixelstats.py,sha256=XEFByxMUbNaCvEsnVhH5ewJ8UH253ySdpFe5u1jr38w,3339
68
+ dataeval/metrics/stats/_visualstats.py,sha256=SbXvNWxfKrw-2wCu5FXMsnpsMUVaQzdJkj6RB4qEsBM,3740
69
+ dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
70
+ dataeval/outputs/_base.py,sha256=lVC7xmBgv3JYY2wVLaGBMPlkRE_KV9UloaeQn0nQydA,5875
71
+ dataeval/outputs/_bias.py,sha256=gj2AgSKOdq6bj59RMiHpha4Skld6ZMB8cW5KesOZ6T4,10483
72
+ dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
73
+ dataeval/outputs/_estimators.py,sha256=SUjur5jI6OU9C7GpsAuA_qqO1PRnS-8eZN-otsaV5q0,3120
74
+ dataeval/outputs/_linters.py,sha256=N4nP5HMoeN2zLndWzhoIT5QB1Ujxbs8Gx5pWPKhl3yc,6683
75
+ dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
76
+ dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
77
+ dataeval/outputs/_stats.py,sha256=PsDV0uw41aTy-X9tjz-PqOj78TTnH4JQVpOrU3OThAE,17423
78
+ dataeval/outputs/_utils.py,sha256=KJ1P8tcMFIkGi2A6VfqbZwLcT1cD0c2YssTbWbHALjE,938
79
+ dataeval/outputs/_workflows.py,sha256=sw13FNx1vANX7DBsKeOLfP2bkp5r6SexBorfb9dxYxU,12160
80
+ dataeval/utils/__init__.py,sha256=sjelzMPaTImF6isiRcp8UGDE3tppEpWS5GoR8WKPZ1k,242
81
+ dataeval/utils/_array.py,sha256=P4_gyH3kkksUJm9Vqx-oPtLWxFmqMacUJzhj0vmrUd8,6361
82
+ dataeval/utils/_bin.py,sha256=QjlRCB5mOauETdxSbvRxRG17riO6gScsMd_lNnnvqxs,7391
83
+ dataeval/utils/_clusterer.py,sha256=rUvEdyMwp95lffmt6xKMEwsjRXNoBS0n5mAS_HNOnck,5656
84
+ dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8061
85
+ dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
86
+ dataeval/utils/_method.py,sha256=53Q3xfQvpyGa-z9_rn6GhjfCcUR5Q9nuWQtCNav4Ftc,391
87
+ dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
88
+ dataeval/utils/_multiprocessing.py,sha256=n6qCVybheWry42NCdxzcsgkJ9xLGkz8m12SWgviTJQM,1060
89
+ dataeval/utils/_plot.py,sha256=kwYZbSdHCV02wRrZDTxR3xd0XfQ-6TlLmfvubwiEQcw,7252
90
+ dataeval/utils/data/__init__.py,sha256=_XoNxADxBdR96Ca-0RgkDlQa6C2FHld1nwts-Xdif3g,294
91
+ dataeval/utils/data/_merge.py,sha256=9JKtlpBarMX_9jlhnQg1AmBwTe9I2w6xQkFGss3IkkU,14729
92
+ dataeval/utils/data/_validate.py,sha256=YH5Q6uzcTRdf_AMKMRyYW37RUlXm-S8ddhw6cegdNkc,6950
93
+ dataeval/utils/data/collate.py,sha256=AWoQ2k9FXyTeq6ExTsGa6sBML_lZm9p38-DN9hnpm8E,3963
94
+ dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
95
+ dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
96
+ dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
97
+ dataeval/utils/torch/_internal.py,sha256=LiuqZGIzKewp_29_Lskj0mnNqdMffMheMdgGeXLDI5g,4173
98
+ dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
99
+ dataeval/utils/torch/trainer.py,sha256=kBdgxd9TL1Pvz-dyZbS__POAKeFrDiQ4vKFh8ltJApc,5543
100
+ dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
101
+ dataeval/workflows/sufficiency.py,sha256=4DTDaYyEuAfO0LTFpQGXXXayV5aCIbziSL2Rddd1vQ0,10360
102
+ dataeval-0.88.0.dist-info/METADATA,sha256=Y5NRZgrhfpyGQKHUnqnO6rAItVR3oWUqIp646_0xluQ,5601
103
+ dataeval-0.88.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
104
+ dataeval-0.88.0.dist-info/licenses/LICENSE,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
105
+ dataeval-0.88.0.dist-info/RECORD,,
@@ -1,253 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = []
4
-
5
- from typing import Any, Generic, Iterable, Literal, Sequence, SupportsFloat, SupportsInt, TypeVar, cast
6
-
7
- from dataeval.typing import (
8
- Array,
9
- ArrayLike,
10
- DatasetMetadata,
11
- ImageClassificationDataset,
12
- ObjectDetectionDataset,
13
- )
14
- from dataeval.utils._array import as_numpy
15
-
16
-
17
- def _ensure_id(index: int, metadata: dict[str, Any]) -> dict[str, Any]:
18
- return {"id": index, **metadata} if "id" not in metadata else metadata
19
-
20
-
21
- def _validate_data(
22
- datum_type: Literal["ic", "od"],
23
- images: Array | Sequence[Array],
24
- labels: Array | Sequence[int] | Sequence[Array] | Sequence[Sequence[int]],
25
- bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]] | None,
26
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
27
- ) -> None:
28
- # Validate inputs
29
- dataset_len = len(images)
30
-
31
- if not isinstance(images, (Sequence, Array)) or len(images[0].shape) != 3:
32
- raise ValueError("Images must be a sequence or array of 3 dimensional arrays (H, W, C).")
33
- if len(labels) != dataset_len:
34
- raise ValueError(f"Number of labels ({len(labels)}) does not match number of images ({dataset_len}).")
35
- if bboxes is not None and len(bboxes) != dataset_len:
36
- raise ValueError(f"Number of bboxes ({len(bboxes)}) does not match number of images ({dataset_len}).")
37
- if metadata is not None and (
38
- len(metadata) != dataset_len
39
- if isinstance(metadata, Sequence)
40
- else any(
41
- not isinstance(metadatum, Sequence) or len(metadatum) != dataset_len for metadatum in metadata.values()
42
- )
43
- ):
44
- raise ValueError(f"Number of metadata ({len(metadata)}) does not match number of images ({dataset_len}).")
45
-
46
- if datum_type == "ic":
47
- if not isinstance(labels, (Sequence, Array)) or not isinstance(labels[0], (int, SupportsInt)):
48
- raise TypeError("Labels must be a sequence of integers for image classification.")
49
- elif datum_type == "od":
50
- if (
51
- not isinstance(labels, (Sequence, Array))
52
- or not isinstance(labels[0], (Sequence, Array))
53
- or not isinstance(cast(Sequence[Any], labels[0])[0], (int, SupportsInt))
54
- ):
55
- raise TypeError("Labels must be a sequence of sequences of integers for object detection.")
56
- if (
57
- bboxes is None
58
- or not isinstance(bboxes, (Sequence, Array))
59
- or not isinstance(bboxes[0], (Sequence, Array))
60
- or not isinstance(bboxes[0][0], (Sequence, Array))
61
- or not isinstance(bboxes[0][0][0], (float, SupportsFloat))
62
- or not len(bboxes[0][0]) == 4
63
- ):
64
- raise TypeError("Boxes must be a sequence of sequences of (x0, y0, x1, y1) for object detection.")
65
- else:
66
- raise ValueError(f"Unknown datum type '{datum_type}'. Must be 'ic' or 'od'.")
67
-
68
-
69
- def _listify_metadata(
70
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
71
- ) -> Sequence[dict[str, Any]] | None:
72
- if isinstance(metadata, dict):
73
- return [{k: v[i] for k, v in metadata.items()} for i in range(len(next(iter(metadata.values()))))]
74
- return metadata
75
-
76
-
77
- def _find_max(arr: ArrayLike) -> Any:
78
- if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
79
- nested = [x for x in [_find_max(x) for x in arr] if x is not None]
80
- return max(nested) if len(nested) > 0 else None
81
- return arr
82
-
83
-
84
- _TLabels = TypeVar("_TLabels", Sequence[int], Sequence[Sequence[int]])
85
-
86
-
87
- class BaseAnnotatedDataset(Generic[_TLabels]):
88
- def __init__(
89
- self,
90
- datum_type: Literal["ic", "od"],
91
- images: Array | Sequence[Array],
92
- labels: _TLabels,
93
- metadata: Sequence[dict[str, Any]] | None,
94
- classes: Sequence[str] | None,
95
- name: str | None = None,
96
- ) -> None:
97
- self._classes = classes if classes is not None else [str(i) for i in range(_find_max(labels) + 1)]
98
- self._index2label = dict(enumerate(self._classes))
99
- self._images = images
100
- self._labels = labels
101
- self._metadata = metadata
102
- self._id = name or f"{len(self._images)}_image_{len(self._index2label)}_class_{datum_type}_dataset"
103
-
104
- @property
105
- def metadata(self) -> DatasetMetadata:
106
- return DatasetMetadata(id=self._id, index2label=self._index2label)
107
-
108
- def __len__(self) -> int:
109
- return len(self._images)
110
-
111
-
112
- class CustomImageClassificationDataset(BaseAnnotatedDataset[Sequence[int]], ImageClassificationDataset):
113
- def __init__(
114
- self,
115
- images: Array | Sequence[Array],
116
- labels: Array | Sequence[int],
117
- metadata: Sequence[dict[str, Any]] | None,
118
- classes: Sequence[str] | None,
119
- name: str | None = None,
120
- ) -> None:
121
- super().__init__(
122
- "ic", images, as_numpy(labels).tolist() if isinstance(labels, Array) else labels, metadata, classes
123
- )
124
- if name is not None:
125
- self.__name__ = name
126
- self.__class__.__name__ = name
127
- self.__class__.__qualname__ = name
128
-
129
- def __getitem__(self, idx: int, /) -> tuple[Array, Array, dict[str, Any]]:
130
- one_hot = [0.0] * len(self._index2label)
131
- one_hot[self._labels[idx]] = 1.0
132
- return (
133
- self._images[idx],
134
- as_numpy(one_hot),
135
- _ensure_id(idx, self._metadata[idx] if self._metadata is not None else {}),
136
- )
137
-
138
-
139
- class CustomObjectDetectionDataset(BaseAnnotatedDataset[Sequence[Sequence[int]]], ObjectDetectionDataset):
140
- class ObjectDetectionTarget:
141
- def __init__(self, labels: Sequence[int], bboxes: Sequence[Sequence[float]], class_count: int) -> None:
142
- self._labels = labels
143
- self._bboxes = bboxes
144
- one_hot = [[0.0] * class_count] * len(labels)
145
- for i, label in enumerate(labels):
146
- one_hot[i][label] = 1.0
147
- self._scores = one_hot
148
-
149
- @property
150
- def labels(self) -> Sequence[int]:
151
- return self._labels
152
-
153
- @property
154
- def boxes(self) -> Sequence[Sequence[float]]:
155
- return self._bboxes
156
-
157
- @property
158
- def scores(self) -> Sequence[Sequence[float]]:
159
- return self._scores
160
-
161
- def __init__(
162
- self,
163
- images: Array | Sequence[Array],
164
- labels: Array | Sequence[Array] | Sequence[Sequence[int]],
165
- bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]],
166
- metadata: Sequence[dict[str, Any]] | None,
167
- classes: Sequence[str] | None,
168
- name: str | None = None,
169
- ) -> None:
170
- super().__init__(
171
- "od",
172
- images,
173
- [as_numpy(label).tolist() if isinstance(label, Array) else label for label in labels],
174
- metadata,
175
- classes,
176
- )
177
- if name is not None:
178
- self.__name__ = name
179
- self.__class__.__name__ = name
180
- self.__class__.__qualname__ = name
181
- self._bboxes = [[as_numpy(box).tolist() if isinstance(box, Array) else box for box in bbox] for bbox in bboxes]
182
-
183
- @property
184
- def metadata(self) -> DatasetMetadata:
185
- return DatasetMetadata(id=self._id, index2label=self._index2label)
186
-
187
- def __getitem__(self, idx: int, /) -> tuple[Array, ObjectDetectionTarget, dict[str, Any]]:
188
- return (
189
- self._images[idx],
190
- self.ObjectDetectionTarget(self._labels[idx], self._bboxes[idx], len(self._classes)),
191
- _ensure_id(idx, self._metadata[idx] if self._metadata is not None else {}),
192
- )
193
-
194
-
195
- def to_image_classification_dataset(
196
- images: Array | Sequence[Array],
197
- labels: Array | Sequence[int],
198
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
199
- classes: Sequence[str] | None,
200
- name: str | None = None,
201
- ) -> ImageClassificationDataset:
202
- """
203
- Helper function to create custom ImageClassificationDataset classes.
204
-
205
- Parameters
206
- ----------
207
- images : Array | Sequence[Array]
208
- The images to use in the dataset.
209
- labels : Array | Sequence[int]
210
- The labels to use in the dataset.
211
- metadata : Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None
212
- The metadata to use in the dataset.
213
- classes : Sequence[str] | None
214
- The classes to use in the dataset.
215
-
216
- Returns
217
- -------
218
- ImageClassificationDataset
219
- """
220
- _validate_data("ic", images, labels, None, metadata)
221
- return CustomImageClassificationDataset(images, labels, _listify_metadata(metadata), classes, name)
222
-
223
-
224
- def to_object_detection_dataset(
225
- images: Array | Sequence[Array],
226
- labels: Array | Sequence[Array] | Sequence[Sequence[int]],
227
- bboxes: Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]],
228
- metadata: Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None,
229
- classes: Sequence[str] | None,
230
- name: str | None = None,
231
- ) -> ObjectDetectionDataset:
232
- """
233
- Helper function to create custom ObjectDetectionDataset classes.
234
-
235
- Parameters
236
- ----------
237
- images : Array | Sequence[Array]
238
- The images to use in the dataset.
239
- labels : Array | Sequence[Array] | Sequence[Sequence[int]]
240
- The labels to use in the dataset.
241
- bboxes : Array | Sequence[Array] | Sequence[Sequence[Array]] | Sequence[Sequence[Sequence[float]]]
242
- The bounding boxes (x0,y0,x1,y0) to use in the dataset.
243
- metadata : Sequence[dict[str, Any]] | dict[str, Sequence[Any]] | None
244
- The metadata to use in the dataset.
245
- classes : Sequence[str] | None
246
- The classes to use in the dataset.
247
-
248
- Returns
249
- -------
250
- ObjectDetectionDataset
251
- """
252
- _validate_data("od", images, labels, bboxes, metadata)
253
- return CustomObjectDetectionDataset(images, labels, bboxes, _listify_metadata(metadata), classes, name)