dataeval 0.84.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. dataeval/__init__.py +1 -1
  2. dataeval/data/__init__.py +19 -0
  3. dataeval/{utils/data → data}/_embeddings.py +137 -17
  4. dataeval/{utils/data → data}/_metadata.py +3 -3
  5. dataeval/{utils/data → data}/_selection.py +22 -9
  6. dataeval/{utils/data → data}/_split.py +1 -1
  7. dataeval/data/selections/__init__.py +19 -0
  8. dataeval/{utils/data → data}/selections/_classbalance.py +1 -2
  9. dataeval/data/selections/_classfilter.py +109 -0
  10. dataeval/{utils/data → data}/selections/_indices.py +1 -1
  11. dataeval/{utils/data → data}/selections/_limit.py +1 -1
  12. dataeval/{utils/data → data}/selections/_prioritize.py +2 -2
  13. dataeval/{utils/data → data}/selections/_reverse.py +1 -1
  14. dataeval/{utils/data → data}/selections/_shuffle.py +1 -1
  15. dataeval/detectors/drift/_base.py +1 -1
  16. dataeval/detectors/drift/_cvm.py +2 -2
  17. dataeval/detectors/drift/_ks.py +2 -2
  18. dataeval/detectors/drift/_mmd.py +2 -2
  19. dataeval/detectors/linters/duplicates.py +1 -1
  20. dataeval/detectors/linters/outliers.py +1 -1
  21. dataeval/metadata/_distance.py +1 -1
  22. dataeval/metadata/_ood.py +4 -4
  23. dataeval/metrics/bias/_balance.py +1 -1
  24. dataeval/metrics/bias/_diversity.py +1 -1
  25. dataeval/metrics/bias/_parity.py +1 -1
  26. dataeval/metrics/stats/_labelstats.py +2 -2
  27. dataeval/outputs/_bias.py +1 -1
  28. dataeval/typing.py +31 -0
  29. dataeval/utils/__init__.py +2 -2
  30. dataeval/utils/data/__init__.py +5 -20
  31. dataeval/utils/data/collate.py +2 -0
  32. dataeval/utils/datasets/__init__.py +17 -0
  33. dataeval/utils/{data/datasets → datasets}/_base.py +3 -3
  34. dataeval/utils/{data/datasets → datasets}/_cifar10.py +2 -2
  35. dataeval/utils/{data/datasets → datasets}/_milco.py +2 -2
  36. dataeval/utils/{data/datasets → datasets}/_mnist.py +2 -2
  37. dataeval/utils/{data/datasets → datasets}/_ships.py +2 -2
  38. dataeval/utils/{data/datasets → datasets}/_voc.py +3 -3
  39. {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/METADATA +1 -1
  40. {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/RECORD +48 -47
  41. dataeval/utils/data/datasets/__init__.py +0 -17
  42. dataeval/utils/data/selections/__init__.py +0 -19
  43. dataeval/utils/data/selections/_classfilter.py +0 -44
  44. /dataeval/{utils/data → data}/_images.py +0 -0
  45. /dataeval/{utils/data → data}/_targets.py +0 -0
  46. /dataeval/utils/{metadata.py → data/metadata.py} +0 -0
  47. /dataeval/utils/{data/datasets → datasets}/_fileio.py +0 -0
  48. /dataeval/utils/{data/datasets → datasets}/_mixin.py +0 -0
  49. /dataeval/utils/{data/datasets → datasets}/_types.py +0 -0
  50. {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/LICENSE.txt +0 -0
  51. {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/WHEEL +0 -0
@@ -4,13 +4,13 @@ __all__ = []
4
4
 
5
5
  from typing import Any, Sequence, overload
6
6
 
7
+ from dataeval.data._images import Images
7
8
  from dataeval.metrics.stats import hashstats
8
9
  from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
9
10
  from dataeval.outputs import DuplicatesOutput, HashStatsOutput
10
11
  from dataeval.outputs._base import set_metadata
11
12
  from dataeval.outputs._linters import DatasetDuplicateGroupMap, DuplicateGroup
12
13
  from dataeval.typing import ArrayLike, Dataset
13
- from dataeval.utils.data._images import Images
14
14
 
15
15
 
16
16
  class Duplicates:
@@ -7,6 +7,7 @@ from typing import Any, Literal, Sequence, overload
7
7
  import numpy as np
8
8
  from numpy.typing import NDArray
9
9
 
10
+ from dataeval.data._images import Images
10
11
  from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
11
12
  from dataeval.metrics.stats._imagestats import imagestats
12
13
  from dataeval.outputs import DimensionStatsOutput, ImageStatsOutput, OutliersOutput, PixelStatsOutput, VisualStatsOutput
@@ -14,7 +15,6 @@ from dataeval.outputs._base import set_metadata
14
15
  from dataeval.outputs._linters import IndexIssueMap, OutlierStatsOutput
15
16
  from dataeval.outputs._stats import BOX_COUNT, SOURCE_INDEX
16
17
  from dataeval.typing import ArrayLike, Dataset
17
- from dataeval.utils.data._images import Images
18
18
 
19
19
 
20
20
  def _get_outlier_mask(
@@ -9,11 +9,11 @@ import numpy as np
9
9
  from scipy.stats import iqr, ks_2samp
10
10
  from scipy.stats import wasserstein_distance as emd
11
11
 
12
+ from dataeval.data import Metadata
12
13
  from dataeval.metadata._utils import _compare_keys, _validate_factors_and_data
13
14
  from dataeval.outputs import MetadataDistanceOutput, MetadataDistanceValues
14
15
  from dataeval.outputs._base import set_metadata
15
16
  from dataeval.typing import ArrayLike
16
- from dataeval.utils.data import Metadata
17
17
 
18
18
 
19
19
  class KSType(NamedTuple):
dataeval/metadata/_ood.py CHANGED
@@ -9,10 +9,10 @@ from numpy.typing import NDArray
9
9
  from sklearn.feature_selection import mutual_info_classif
10
10
 
11
11
  from dataeval.config import get_seed
12
+ from dataeval.data import Metadata
12
13
  from dataeval.metadata._utils import _compare_keys, _validate_factors_and_data
13
14
  from dataeval.outputs import MostDeviatedFactorsOutput, OODOutput, OODPredictorOutput
14
15
  from dataeval.outputs._base import set_metadata
15
- from dataeval.utils.data import Metadata
16
16
 
17
17
 
18
18
  def _combine_discrete_continuous(metadata: Metadata) -> tuple[list[str], NDArray[np.float64]]:
@@ -201,7 +201,7 @@ def find_most_deviated_factors(
201
201
  MostDeviatedFactorsOutput([])
202
202
  """
203
203
 
204
- ood_mask: NDArray[np.bool] = ood.is_ood
204
+ ood_mask: NDArray[np.bool_] = ood.is_ood
205
205
 
206
206
  # No metadata correlated with out of distribution data
207
207
  if not any(ood_mask):
@@ -303,7 +303,7 @@ def find_ood_predictors(
303
303
  OODPredictorOutput({})
304
304
  """
305
305
 
306
- ood_mask: NDArray[np.bool] = ood.is_ood
306
+ ood_mask: NDArray[np.bool_] = ood.is_ood
307
307
 
308
308
  discrete_features_count = len(metadata.discrete_factor_names)
309
309
  factors, data = _combine_discrete_continuous(metadata) # (F, ), (S, F) => F = Fd + Fc
@@ -320,7 +320,7 @@ def find_ood_predictors(
320
320
  # Calculate mean, std of each factor over all samples
321
321
  scaled_data = (data - np.mean(data, axis=0)) / np.std(data, axis=0, ddof=1) # (S, F)
322
322
 
323
- discrete_features = np.zeros_like(factors, dtype=np.bool)
323
+ discrete_features = np.zeros_like(factors, dtype=np.bool_)
324
324
  discrete_features[:discrete_features_count] = True
325
325
 
326
326
  mutual_info_values = (
@@ -9,10 +9,10 @@ import scipy as sp
9
9
  from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
10
10
 
11
11
  from dataeval.config import EPSILON, get_seed
12
+ from dataeval.data import Metadata
12
13
  from dataeval.outputs import BalanceOutput
13
14
  from dataeval.outputs._base import set_metadata
14
15
  from dataeval.utils._bin import get_counts
15
- from dataeval.utils.data import Metadata
16
16
 
17
17
 
18
18
  def _validate_num_neighbors(num_neighbors: int) -> int:
@@ -8,11 +8,11 @@ import numpy as np
8
8
  import scipy as sp
9
9
  from numpy.typing import NDArray
10
10
 
11
+ from dataeval.data import Metadata
11
12
  from dataeval.outputs import DiversityOutput
12
13
  from dataeval.outputs._base import set_metadata
13
14
  from dataeval.utils._bin import get_counts
14
15
  from dataeval.utils._method import get_method
15
- from dataeval.utils.data import Metadata
16
16
 
17
17
 
18
18
  def diversity_shannon(
@@ -10,11 +10,11 @@ from numpy.typing import NDArray
10
10
  from scipy.stats import chisquare
11
11
  from scipy.stats.contingency import chi2_contingency, crosstab
12
12
 
13
+ from dataeval.data import Metadata
13
14
  from dataeval.outputs import LabelParityOutput, ParityOutput
14
15
  from dataeval.outputs._base import set_metadata
15
16
  from dataeval.typing import ArrayLike
16
17
  from dataeval.utils._array import as_numpy
17
- from dataeval.utils.data import Metadata
18
18
 
19
19
 
20
20
  def normalize_expected_dist(expected_dist: NDArray[Any], observed_dist: NDArray[Any]) -> NDArray[Any]:
@@ -5,10 +5,10 @@ __all__ = []
5
5
  from collections import Counter, defaultdict
6
6
  from typing import Any, Mapping, TypeVar
7
7
 
8
+ from dataeval.data._metadata import Metadata
8
9
  from dataeval.outputs import LabelStatsOutput
9
10
  from dataeval.outputs._base import set_metadata
10
11
  from dataeval.typing import AnnotatedDataset
11
- from dataeval.utils.data._metadata import Metadata
12
12
 
13
13
  TValue = TypeVar("TValue")
14
14
 
@@ -38,7 +38,7 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
38
38
  --------
39
39
  Calculate basic :term:`statistics<Statistics>` on labels for a dataset.
40
40
 
41
- >>> from dataeval.utils.data import Metadata
41
+ >>> from dataeval.data import Metadata
42
42
  >>> stats = labelstats(Metadata(dataset))
43
43
  >>> print(stats.to_table())
44
44
  Class Count: 5
dataeval/outputs/_bias.py CHANGED
@@ -13,11 +13,11 @@ with contextlib.suppress(ImportError):
13
13
  import pandas as pd
14
14
  from matplotlib.figure import Figure
15
15
 
16
+ from dataeval.data._images import Images
16
17
  from dataeval.outputs._base import Output
17
18
  from dataeval.typing import ArrayLike, Dataset
18
19
  from dataeval.utils._array import as_numpy, channels_first_to_last
19
20
  from dataeval.utils._plot import heatmap
20
- from dataeval.utils.data._images import Images
21
21
 
22
22
  TData = TypeVar("TData", np.float64, NDArray[np.float64])
23
23
 
dataeval/typing.py CHANGED
@@ -98,6 +98,22 @@ class DatasetMetadata(TypedDict, total=False):
98
98
  index2label: NotRequired[ReadOnly[dict[int, str]]]
99
99
 
100
100
 
101
+ class ModelMetadata(TypedDict, total=False):
102
+ """
103
+ Model metadata required for all `AnnotatedModel` classes.
104
+
105
+ Attributes
106
+ ----------
107
+ id : Required[str]
108
+ A unique identifier for the model
109
+ index2label : NotRequired[dict[int, str]]
110
+ A lookup table converting label value to class name
111
+ """
112
+
113
+ id: Required[ReadOnly[str]]
114
+ index2label: NotRequired[ReadOnly[dict[int, str]]]
115
+
116
+
101
117
  @runtime_checkable
102
118
  class Dataset(Generic[_T_co], Protocol):
103
119
  """
@@ -238,6 +254,21 @@ SegmentationDataset: TypeAlias = AnnotatedDataset[SegmentationDatum]
238
254
  Type alias for an :class:`AnnotatedDataset` of :class:`SegmentationDatum` elements.
239
255
  """
240
256
 
257
+ # ========== MODEL ==========
258
+
259
+
260
+ @runtime_checkable
261
+ class AnnotatedModel(Protocol):
262
+ """
263
+ Protocol for an annotated model.
264
+ """
265
+
266
+ @property
267
+ def metadata(self) -> ModelMetadata: ...
268
+
269
+
270
+ # ========== TRANSFORM ==========
271
+
241
272
 
242
273
  @runtime_checkable
243
274
  class Transform(Generic[_T], Protocol):
@@ -4,6 +4,6 @@ in setting up data and architectures that are guaranteed to work with applicable
4
4
  DataEval metrics.
5
5
  """
6
6
 
7
- __all__ = ["data", "metadata", "torch"]
7
+ __all__ = ["data", "datasets", "torch"]
8
8
 
9
- from . import data, metadata, torch
9
+ from . import data, datasets, torch
@@ -1,26 +1,11 @@
1
- """Provides utility functions for interacting with Computer Vision datasets."""
1
+ """Provides access to common Computer Vision datasets."""
2
+
3
+ from dataeval.utils.data import collate, metadata
4
+ from dataeval.utils.data._dataset import to_image_classification_dataset, to_object_detection_dataset
2
5
 
3
6
  __all__ = [
4
7
  "collate",
5
- "datasets",
6
- "Embeddings",
7
- "Images",
8
- "Metadata",
9
- "Select",
10
- "SplitDatasetOutput",
11
- "Targets",
12
- "split_dataset",
8
+ "metadata",
13
9
  "to_image_classification_dataset",
14
10
  "to_object_detection_dataset",
15
11
  ]
16
-
17
- from dataeval.outputs._utils import SplitDatasetOutput
18
- from dataeval.utils.data._dataset import to_image_classification_dataset, to_object_detection_dataset
19
- from dataeval.utils.data._embeddings import Embeddings
20
- from dataeval.utils.data._images import Images
21
- from dataeval.utils.data._metadata import Metadata
22
- from dataeval.utils.data._selection import Select
23
- from dataeval.utils.data._split import split_dataset
24
- from dataeval.utils.data._targets import Targets
25
-
26
- from . import collate, datasets
@@ -4,6 +4,8 @@ Collate functions used with a PyTorch DataLoader to load data from MAITE complia
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ __all__ = ["list_collate_fn", "numpy_collate_fn", "torch_collate_fn"]
8
+
7
9
  from typing import Any, Iterable, Sequence, TypeVar
8
10
 
9
11
  import numpy as np
@@ -0,0 +1,17 @@
1
+ """Provides access to common Computer Vision datasets."""
2
+
3
+ from dataeval.utils.datasets._cifar10 import CIFAR10
4
+ from dataeval.utils.datasets._milco import MILCO
5
+ from dataeval.utils.datasets._mnist import MNIST
6
+ from dataeval.utils.datasets._ships import Ships
7
+ from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
8
+
9
+ __all__ = [
10
+ "MNIST",
11
+ "Ships",
12
+ "CIFAR10",
13
+ "MILCO",
14
+ "VOCDetection",
15
+ "VOCDetectionTorch",
16
+ "VOCSegmentation",
17
+ ]
@@ -6,9 +6,9 @@ from abc import abstractmethod
6
6
  from pathlib import Path
7
7
  from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, NamedTuple, Sequence, TypeVar
8
8
 
9
- from dataeval.utils.data.datasets._fileio import _ensure_exists
10
- from dataeval.utils.data.datasets._mixin import BaseDatasetMixin
11
- from dataeval.utils.data.datasets._types import (
9
+ from dataeval.utils.datasets._fileio import _ensure_exists
10
+ from dataeval.utils.datasets._mixin import BaseDatasetMixin
11
+ from dataeval.utils.datasets._types import (
12
12
  AnnotatedDataset,
13
13
  DatasetMetadata,
14
14
  ImageClassificationDataset,
@@ -9,8 +9,8 @@ import numpy as np
9
9
  from numpy.typing import NDArray
10
10
  from PIL import Image
11
11
 
12
- from dataeval.utils.data.datasets._base import BaseICDataset, DataLocation
13
- from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
12
+ from dataeval.utils.datasets._base import BaseICDataset, DataLocation
13
+ from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from dataeval.typing import Transform
@@ -7,8 +7,8 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence
7
7
 
8
8
  from numpy.typing import NDArray
9
9
 
10
- from dataeval.utils.data.datasets._base import BaseODDataset, DataLocation
11
- from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
10
+ from dataeval.utils.datasets._base import BaseODDataset, DataLocation
11
+ from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from dataeval.typing import Transform
@@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar
8
8
  import numpy as np
9
9
  from numpy.typing import NDArray
10
10
 
11
- from dataeval.utils.data.datasets._base import BaseICDataset, DataLocation
12
- from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
11
+ from dataeval.utils.datasets._base import BaseICDataset, DataLocation
12
+ from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from dataeval.typing import Transform
@@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, Sequence
8
8
  import numpy as np
9
9
  from numpy.typing import NDArray
10
10
 
11
- from dataeval.utils.data.datasets._base import BaseICDataset, DataLocation
12
- from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin
11
+ from dataeval.utils.datasets._base import BaseICDataset, DataLocation
12
+ from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from dataeval.typing import Transform
@@ -9,7 +9,7 @@ import torch
9
9
  from defusedxml.ElementTree import parse
10
10
  from numpy.typing import NDArray
11
11
 
12
- from dataeval.utils.data.datasets._base import (
12
+ from dataeval.utils.datasets._base import (
13
13
  BaseDataset,
14
14
  BaseODDataset,
15
15
  BaseSegDataset,
@@ -17,8 +17,8 @@ from dataeval.utils.data.datasets._base import (
17
17
  _TArray,
18
18
  _TTarget,
19
19
  )
20
- from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
21
- from dataeval.utils.data.datasets._types import ObjectDetectionTarget, SegmentationTarget
20
+ from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
21
+ from dataeval.utils.datasets._types import ObjectDetectionTarget, SegmentationTarget
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from dataeval.typing import Transform
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.84.1
3
+ Version: 1.0.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,33 +1,48 @@
1
- dataeval/__init__.py,sha256=QzrctVrymZuLN8tnHcF1wp0RTXYM3WSWMozX3NOzIos,1636
1
+ dataeval/__init__.py,sha256=xd1GfD7QmzBG-WN7K6BMJSzV9_UZlX5OiKICdQ5xGfU,1635
2
2
  dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
3
3
  dataeval/config.py,sha256=lD1YDH8HosFeRU5rQEYRBcmXMZy-csWaMlJTRZGd9iU,3582
4
+ dataeval/data/__init__.py,sha256=qNnRRiVP_sLthkkHpUrMgI_r8dQK-cC-xoGrrjQeRKc,544
5
+ dataeval/data/_embeddings.py,sha256=6Medqj_JCQt1iwZwWGSs1OeX-bHB8bg5BJqADY1N2s8,12883
6
+ dataeval/data/_images.py,sha256=WF9XJRka8ohUdyI2IKBMAy3JoJhOm1iC-8tbYl8woRM,2642
7
+ dataeval/data/_metadata.py,sha256=hNgsCEN8EyfDDX7zLKcQnsaDl-9xvvs5tUzqMjVLvI4,14457
8
+ dataeval/data/_selection.py,sha256=V61_pTFj0hSzmltA6CV5t51Znqw2dIQZ71Iu46bLm44,4486
9
+ dataeval/data/_split.py,sha256=6Jtm_i__CcPtNE3eSeBdPxc7gn7Cp-GM7g9wJWFlVus,16761
10
+ dataeval/data/_targets.py,sha256=ws5d9wRiDkIuOV7GSAKNxzgSm6AWTgb0BFroQK5nAmM,3057
11
+ dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
12
+ dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
13
+ dataeval/data/selections/_classfilter.py,sha256=rEeq959p_SLl_etS7pcM8ZxK4yzEYlYZAQ3FlcLV0R8,4330
14
+ dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
15
+ dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
16
+ dataeval/data/selections/_prioritize.py,sha256=yw51ZQk6FPvyC38M4_pS_Se2Dq0LDFcdDhfbsELzTZc,11306
17
+ dataeval/data/selections/_reverse.py,sha256=b67kNC43A5KpQOic5gifjo9HpJ7FMh4LFCrfovPiJ-M,368
18
+ dataeval/data/selections/_shuffle.py,sha256=gVz_2T4rlucq8Ytqz5jvmmZdTrZDaIv43jJbq97tLjQ,1173
4
19
  dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
5
20
  dataeval/detectors/drift/__init__.py,sha256=gD8aY5PotS-S2ot7iB_z_zzSOjIbQLw5znFBNj0jtHE,646
6
- dataeval/detectors/drift/_base.py,sha256=PdWyEuYqExFdyxvyOh7Q8yXnjNm0D3KfpDUn0bUixtY,7580
7
- dataeval/detectors/drift/_cvm.py,sha256=CSEyNN9u1MzUI6QmCSlexTUSlHzK1kYh36Nv2L72WbY,3016
8
- dataeval/detectors/drift/_ks.py,sha256=ifFb_0JcyykJyF9DAVkQqWCXc-3aA0AC8c8to_oOPKo,3198
9
- dataeval/detectors/drift/_mmd.py,sha256=DOHBNyNNxosR67yM9HTxbvqp1IZ8_KSvTVlX-JtKtjM,11601
21
+ dataeval/detectors/drift/_base.py,sha256=amGqzUAe8fU5qwM5lq1p8PCuhjGh9MHkdW1zeBF1LEE,7574
22
+ dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
23
+ dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
24
+ dataeval/detectors/drift/_mmd.py,sha256=wHUy_vUafCikrZ_WX8qQXpxFwzw07-5zVutloR6hl1k,11589
10
25
  dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
11
26
  dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
12
27
  dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
13
- dataeval/detectors/linters/duplicates.py,sha256=x36-0EAlO_AuOttvElJOZCa0R3VzrlII0NxjwhdkrpE,4969
14
- dataeval/detectors/linters/outliers.py,sha256=Z0Sbtluu2im1IRGsjhXF2AhrShKDrEkF8BWzAZyPwlA,9054
28
+ dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
29
+ dataeval/detectors/linters/outliers.py,sha256=D8A-Fov5iUrlU9xMX5Ht33FqUY8Lk5ulC6BlHbUoLwU,9048
15
30
  dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
16
31
  dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
17
32
  dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
18
33
  dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
19
34
  dataeval/detectors/ood/vae.py,sha256=Fcq0-WbLhzYCgYOAJPBklHm7yuXmFJuEpBkhgwM5kiA,2291
20
35
  dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
21
- dataeval/metadata/_distance.py,sha256=xsXMMg1pJkHcEZ-KIlqv9YOGYVID3ELjt3-fr1QVnOs,4082
22
- dataeval/metadata/_ood.py,sha256=HbS5MusWl62hjixUAd-xaaT0KXkYY1M-MlnUaAI_-8M,12751
36
+ dataeval/metadata/_distance.py,sha256=T1Umju_QwBiLmn1iUbxZagzBS2VnHaDIdp6j-NpaZuk,4076
37
+ dataeval/metadata/_ood.py,sha256=lnKtKModArnUrAhH_XswEtUAhUkh1U_oNsLt1UmNP44,12748
23
38
  dataeval/metadata/_utils.py,sha256=r8qBJT83RblobD5W5zyTVi6vYi51Dwkqswizdbzss-M,1169
24
39
  dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
25
40
  dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
26
- dataeval/metrics/bias/_balance.py,sha256=UnUgbPk2ybFfS5qxv8e_uim7RxamWj0UQP71x3omGs0,6158
41
+ dataeval/metrics/bias/_balance.py,sha256=l1hTVkVwD85bP20MTthA-I5BkvbytylQkJu3Q6iTuPA,6152
27
42
  dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
28
43
  dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
29
- dataeval/metrics/bias/_diversity.py,sha256=U_l4oYjH39rON2Io0BdCIwJxxob0cKTW8bZNufG0CWs,5820
30
- dataeval/metrics/bias/_parity.py,sha256=8JRZv4wLpxN9zTvMDlcpKgz-2nO-9eVjqccODcf2nbw,11535
44
+ dataeval/metrics/bias/_diversity.py,sha256=B_qWVDMZfh818U0qVm8yidquB0H0XvW8N75OWVWXy2g,5814
45
+ dataeval/metrics/bias/_parity.py,sha256=ea1D-eJh6cJxQ11XD6VbDXBKecE0jJJwptGD7LQJmBw,11529
31
46
  dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
32
47
  dataeval/metrics/estimators/_ber.py,sha256=C30E5LiGGTAfo31zWFYDptDg0R7CTJGJ-a60YgzSkYY,5382
33
48
  dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
@@ -39,12 +54,12 @@ dataeval/metrics/stats/_boxratiostats.py,sha256=8Kd2FTZ5PLNYZfdAjU_R385gb0Z16JY0
39
54
  dataeval/metrics/stats/_dimensionstats.py,sha256=73mFP-Myxne0peFliwvTntc0kk4cpq0krzMvSLDSIMM,2702
40
55
  dataeval/metrics/stats/_hashstats.py,sha256=gp9X_pnTT3mPH9YNrWLdn2LQPK_epJ3dQRoyOCwmKlg,4758
41
56
  dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
42
- dataeval/metrics/stats/_labelstats.py,sha256=WbvXZ831a5BDfm58HF8Z8i5JUV1tgw7tcfzUh8pOXSo,2825
57
+ dataeval/metrics/stats/_labelstats.py,sha256=lz8I6eSd8tFkmQqy5cOG8hn9yxs0mP-Ic9ratFHiuoU,2813
43
58
  dataeval/metrics/stats/_pixelstats.py,sha256=SfergRbjNJE4h0xqe-0c8RnKtZmEkZ9MwExdipLSGvg,3247
44
59
  dataeval/metrics/stats/_visualstats.py,sha256=cq4AbF2B50Ihbzb86FphcnKQ1TSwNnP3PsnbpiPQZWw,3698
45
60
  dataeval/outputs/__init__.py,sha256=ciK-RdXgtn_s7MSCUW1UXvrXltMbltqbpfe9_V7xGrI,1701
46
61
  dataeval/outputs/_base.py,sha256=aZFbgybnZSQ3ws7QYRLTbDFqUfBFRVtIwX2LZfeGFUA,5703
47
- dataeval/outputs/_bias.py,sha256=GwbjLdppUODOeudYb_7ki2ejDmAYthlRKGijVwgVePE,12407
62
+ dataeval/outputs/_bias.py,sha256=7L-d3DUWY6Vud7iX_VoQT0HG0KaV1U35gvmRApqzyB0,12401
48
63
  dataeval/outputs/_drift.py,sha256=gOiu2C-ERTWiRqlP0auMYxPBGdm9HecWPqWfg7I4tZg,2015
49
64
  dataeval/outputs/_estimators.py,sha256=a2oAIxxEDZ9WLGfMWH8KD-BVUS_SnULRPR-iI9hFPoQ,3047
50
65
  dataeval/outputs/_linters.py,sha256=YOdjrfm8ypdRrqYOaPM9nc6wVJI3-ita3Haj7LHDNaw,6416
@@ -54,8 +69,8 @@ dataeval/outputs/_stats.py,sha256=c73Yc3Kkrl-MN6BGKe1V0Yr6Ix2Yp_DZZfFSp8fZMZ0,13
54
69
  dataeval/outputs/_utils.py,sha256=HHlGC7sk416m_3Bgn075Qdblz_aPup_UOafJpB0RuXY,893
55
70
  dataeval/outputs/_workflows.py,sha256=MkRD6ubI4NCBXb9v3kjXy64cUGs3G-JKkBdOpRD9XVE,10750
56
71
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- dataeval/typing.py,sha256=zn6smomSdcO7EeZpeeSP5-8sknTdgUuU7TKe-3UFVrg,6550
58
- dataeval/utils/__init__.py,sha256=T8F8zJh4ZAeu0wDzfpld92I2zJg9mWBmkGCHrDPU7gk,264
72
+ dataeval/typing.py,sha256=GDMuef-oFFukNtsiKFmsExHdNvYR_j-tQcsCwZ9reow,7198
73
+ dataeval/utils/__init__.py,sha256=hRvyUK7b3d6JBEV5u47rFcOHEcmDYqAvZQw_T5pDAWw,264
59
74
  dataeval/utils/_array.py,sha256=KqAdXEMjcXYvdWdYEEoEbigwQJ4S9VYxQS3sRFeY5XY,5929
60
75
  dataeval/utils/_bin.py,sha256=nylthmsC3vzLHLhlUMACvZs--h7xvAh9Pt75InaQJW8,7322
61
76
  dataeval/utils/_clusterer.py,sha256=fw5x-2QN0TIbiodDKHZxRgxKHINedpPcOklzce0Rbjg,5436
@@ -64,34 +79,20 @@ dataeval/utils/_image.py,sha256=capzF_X5H0jy0PmTP3Hf52GFgLqrnfU6gS4tiwck9jo,1939
64
79
  dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
65
80
  dataeval/utils/_mst.py,sha256=f0vXytTUjlOS6AyL7c6PkXmaHuuGUK-vMLpq-5xMgxk,2183
66
81
  dataeval/utils/_plot.py,sha256=mTRQNbJsA42QMiOwZbJaH8sNYgP996QFDEGVVE9HSgY,7076
67
- dataeval/utils/data/__init__.py,sha256=vldQ2ZXl8gnI3s4vAGqUUVi6dc_R58F3JMSpbCOyFRI,820
82
+ dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
68
83
  dataeval/utils/data/_dataset.py,sha256=MHY582yRm4FxQkkLWUhKZBb7ZyvWypM6ldUG89vd3uE,7936
69
- dataeval/utils/data/_embeddings.py,sha256=iDtfLJY1uHoTP4UdQoOt-3wopc6kSOXH_4CVNnmXXA4,8356
70
- dataeval/utils/data/_images.py,sha256=WF9XJRka8ohUdyI2IKBMAy3JoJhOm1iC-8tbYl8woRM,2642
71
- dataeval/utils/data/_metadata.py,sha256=62z_qHjoGjiMdpuT36QpNhbWy2UClHWUcjHHlIWT470,14464
72
- dataeval/utils/data/_selection.py,sha256=2c6DjyeDIJapbI7xL36eBxFnJHIP8Yxt3oU3rBGMqLk,3948
73
- dataeval/utils/data/_split.py,sha256=q-2RwllJgazwuyxB_GoBqK_nLkqIjyTVr2SQKj_7lhw,16767
74
- dataeval/utils/data/_targets.py,sha256=ws5d9wRiDkIuOV7GSAKNxzgSm6AWTgb0BFroQK5nAmM,3057
75
- dataeval/utils/data/collate.py,sha256=Z5nmBnWV_IoJzMp_tj8RCKjMJA9sSCY_zZITqISGixc,3865
76
- dataeval/utils/data/datasets/__init__.py,sha256=jBrswiERrvBx4pJQJZIq_B5UE-Wy8a2_SBfM2crG8R8,511
77
- dataeval/utils/data/datasets/_base.py,sha256=827nSVhZ-tqeHw1HQ7Qj060CSDd90fEWZomN6FaWnQA,8872
78
- dataeval/utils/data/datasets/_cifar10.py,sha256=R7QgcCHowAkqhEXOvUhybXTmMlA4BJXkTuAeV9uDgfU,5449
79
- dataeval/utils/data/datasets/_fileio.py,sha256=SixIk5nIlIwJdX9zjNXS10vHA3hL8aaYbqHsDg1xSpY,6447
80
- dataeval/utils/data/datasets/_milco.py,sha256=bVVDl5W8TdTPU2RiwoPXrfFDM1rKyb-LslwTThBXEr0,7583
81
- dataeval/utils/data/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
82
- dataeval/utils/data/datasets/_mnist.py,sha256=kNDJw0oyqa6QgU1y9lg-3AzStavK1BB8iHnDOdv9nyE,8112
83
- dataeval/utils/data/datasets/_ships.py,sha256=rsyIoRAIk40liFgaEb2dg0lYB7__bAGd9zh9ouzjFKg,4880
84
- dataeval/utils/data/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
85
- dataeval/utils/data/datasets/_voc.py,sha256=QUtpbh2EpiBoicsmOo-YIfwRwPXyHj-zB2hFn7tlz0Y,15580
86
- dataeval/utils/data/selections/__init__.py,sha256=iUbMZRDuBXwY3SNAtZTdCVu7SI4zbCyaL6ItXnnq1yI,655
87
- dataeval/utils/data/selections/_classbalance.py,sha256=hHq9frdwzFLCUmfeJq977Sot_SXhuGANlSsetokhRDc,1465
88
- dataeval/utils/data/selections/_classfilter.py,sha256=xdR5uX7W5Yivf-mE_CikbRi2fGrZLFrPYun3TeQHTA0,1267
89
- dataeval/utils/data/selections/_indices.py,sha256=QdLgXN7GABCvGPYe28PV1RAc_RSP_nZOyCvEpKRBdWg,636
90
- dataeval/utils/data/selections/_limit.py,sha256=ECvHRsp7OF4LZw2tE4sGqqJ085kjC-hd2c7QDMfvXr8,518
91
- dataeval/utils/data/selections/_prioritize.py,sha256=uRQjeQiAc-vvwHMH4CQtXTGJCfjj_h5dgGlhQYFMz1c,11318
92
- dataeval/utils/data/selections/_reverse.py,sha256=6SWpELC9Wgx-kPqzhDrPNn4NKU6FqDJveLrxV4D2Ypk,374
93
- dataeval/utils/data/selections/_shuffle.py,sha256=_jwms0qcwrknf2Fx84cCXyNOJyhE_V8rcnDOTDn1S2A,1179
94
- dataeval/utils/metadata.py,sha256=1XeGYj_e97-nJ_IrWEHPhWICmouYU5qbXWbp7uhZrIE,14171
84
+ dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
85
+ dataeval/utils/data/metadata.py,sha256=1XeGYj_e97-nJ_IrWEHPhWICmouYU5qbXWbp7uhZrIE,14171
86
+ dataeval/utils/datasets/__init__.py,sha256=Jfe7XI_9U5S4wuI_2QCoeuWNOxz4j0nAQvxc5wG5mWY,486
87
+ dataeval/utils/datasets/_base.py,sha256=TpmgPzF3EShCLAF5S4Zf9lFN78q17bTZF6AUE1qKdlk,8857
88
+ dataeval/utils/datasets/_cifar10.py,sha256=oSX5JEzbBM4zGC9kC7-hVTOglms3rYaUuYiA00_DUJ4,5439
89
+ dataeval/utils/datasets/_fileio.py,sha256=SixIk5nIlIwJdX9zjNXS10vHA3hL8aaYbqHsDg1xSpY,6447
90
+ dataeval/utils/datasets/_milco.py,sha256=BF2XvyzuOop1mg5pFZcRfYmZcezlbpZWHyd_TtEHFF4,7573
91
+ dataeval/utils/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
92
+ dataeval/utils/datasets/_mnist.py,sha256=4WOkQTORYMs6KEeyyJgChTnH03797y4ezgaZtYqplh4,8102
93
+ dataeval/utils/datasets/_ships.py,sha256=RMdX2KlnXJYOTzBb6euA5TAqxs-S8b56pAGiyQhNMuo,4870
94
+ dataeval/utils/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
95
+ dataeval/utils/datasets/_voc.py,sha256=kif6ms_romK6VElP4pf2SK4cJ5dEHDOkxSaSaeP3c5k,15565
95
96
  dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
96
97
  dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
97
98
  dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
@@ -100,7 +101,7 @@ dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQ
100
101
  dataeval/utils/torch/trainer.py,sha256=iUotX4OdirH8-ZtjdpU8gbJavkYW9YY9qpA2mAlFy1Y,5520
101
102
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
102
103
  dataeval/workflows/sufficiency.py,sha256=mjKmfRrAjShLUFIARv5o8yT5fnFvDsS5Qu6ujIPUgQg,8497
103
- dataeval-0.84.1.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
104
- dataeval-0.84.1.dist-info/METADATA,sha256=F7L5PSWHV3z0_4pwA-JSgucW2A4bEv_dtvIMzCTGLZ8,5308
105
- dataeval-0.84.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
106
- dataeval-0.84.1.dist-info/RECORD,,
104
+ dataeval-1.0.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
105
+ dataeval-1.0.0.dist-info/METADATA,sha256=ma_TquWQQl0QETiK4-wH1jfAe2my33Cl37GswNe0ZM8,5307
106
+ dataeval-1.0.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
107
+ dataeval-1.0.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- """Provides access to common Computer Vision datasets."""
2
-
3
- from dataeval.utils.data.datasets._cifar10 import CIFAR10
4
- from dataeval.utils.data.datasets._milco import MILCO
5
- from dataeval.utils.data.datasets._mnist import MNIST
6
- from dataeval.utils.data.datasets._ships import Ships
7
- from dataeval.utils.data.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
8
-
9
- __all__ = [
10
- "MNIST",
11
- "Ships",
12
- "CIFAR10",
13
- "MILCO",
14
- "VOCDetection",
15
- "VOCDetectionTorch",
16
- "VOCSegmentation",
17
- ]
@@ -1,19 +0,0 @@
1
- """Provides selection classes for selecting subsets of Computer Vision datasets."""
2
-
3
- __all__ = [
4
- "ClassBalance",
5
- "ClassFilter",
6
- "Indices",
7
- "Limit",
8
- "Prioritize",
9
- "Reverse",
10
- "Shuffle",
11
- ]
12
-
13
- from dataeval.utils.data.selections._classbalance import ClassBalance
14
- from dataeval.utils.data.selections._classfilter import ClassFilter
15
- from dataeval.utils.data.selections._indices import Indices
16
- from dataeval.utils.data.selections._limit import Limit
17
- from dataeval.utils.data.selections._prioritize import Prioritize
18
- from dataeval.utils.data.selections._reverse import Reverse
19
- from dataeval.utils.data.selections._shuffle import Shuffle
@@ -1,44 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = []
4
-
5
- from typing import Sequence
6
-
7
- import numpy as np
8
-
9
- from dataeval.typing import Array, ImageClassificationDatum
10
- from dataeval.utils._array import as_numpy
11
- from dataeval.utils.data._selection import Select, Selection, SelectionStage
12
-
13
-
14
- class ClassFilter(Selection[ImageClassificationDatum]):
15
- """
16
- Filter the dataset by class.
17
-
18
- Parameters
19
- ----------
20
- classes : Sequence[int]
21
- The classes to filter by.
22
- """
23
-
24
- stage = SelectionStage.FILTER
25
-
26
- def __init__(self, classes: Sequence[int]) -> None:
27
- self.classes = classes
28
-
29
- def __call__(self, dataset: Select[ImageClassificationDatum]) -> None:
30
- if not self.classes:
31
- return
32
-
33
- selection = []
34
- for idx in dataset._selection:
35
- target = dataset._dataset[idx][1]
36
- if isinstance(target, Array):
37
- label = int(np.argmax(as_numpy(target)))
38
- else:
39
- # ObjectDetectionTarget and SegmentationTarget not supported yet
40
- raise TypeError("ClassFilter only supports classification targets as an array of confidence scores.")
41
- if label in self.classes:
42
- selection.append(idx)
43
-
44
- dataset._selection = selection
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes