dataeval 0.84.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/data/__init__.py +19 -0
- dataeval/{utils/data → data}/_embeddings.py +137 -17
- dataeval/{utils/data → data}/_metadata.py +3 -3
- dataeval/{utils/data → data}/_selection.py +22 -9
- dataeval/{utils/data → data}/_split.py +1 -1
- dataeval/data/selections/__init__.py +19 -0
- dataeval/{utils/data → data}/selections/_classbalance.py +1 -2
- dataeval/data/selections/_classfilter.py +109 -0
- dataeval/{utils/data → data}/selections/_indices.py +1 -1
- dataeval/{utils/data → data}/selections/_limit.py +1 -1
- dataeval/{utils/data → data}/selections/_prioritize.py +2 -2
- dataeval/{utils/data → data}/selections/_reverse.py +1 -1
- dataeval/{utils/data → data}/selections/_shuffle.py +1 -1
- dataeval/detectors/drift/_base.py +1 -1
- dataeval/detectors/drift/_cvm.py +2 -2
- dataeval/detectors/drift/_ks.py +2 -2
- dataeval/detectors/drift/_mmd.py +2 -2
- dataeval/detectors/linters/duplicates.py +1 -1
- dataeval/detectors/linters/outliers.py +1 -1
- dataeval/metadata/_distance.py +1 -1
- dataeval/metadata/_ood.py +4 -4
- dataeval/metrics/bias/_balance.py +1 -1
- dataeval/metrics/bias/_diversity.py +1 -1
- dataeval/metrics/bias/_parity.py +1 -1
- dataeval/metrics/stats/_labelstats.py +2 -2
- dataeval/outputs/_bias.py +1 -1
- dataeval/typing.py +31 -0
- dataeval/utils/__init__.py +2 -2
- dataeval/utils/data/__init__.py +5 -20
- dataeval/utils/data/collate.py +2 -0
- dataeval/utils/datasets/__init__.py +17 -0
- dataeval/utils/{data/datasets → datasets}/_base.py +3 -3
- dataeval/utils/{data/datasets → datasets}/_cifar10.py +2 -2
- dataeval/utils/{data/datasets → datasets}/_milco.py +2 -2
- dataeval/utils/{data/datasets → datasets}/_mnist.py +2 -2
- dataeval/utils/{data/datasets → datasets}/_ships.py +2 -2
- dataeval/utils/{data/datasets → datasets}/_voc.py +3 -3
- {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/METADATA +1 -1
- {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/RECORD +48 -47
- dataeval/utils/data/datasets/__init__.py +0 -17
- dataeval/utils/data/selections/__init__.py +0 -19
- dataeval/utils/data/selections/_classfilter.py +0 -44
- /dataeval/{utils/data → data}/_images.py +0 -0
- /dataeval/{utils/data → data}/_targets.py +0 -0
- /dataeval/utils/{metadata.py → data/metadata.py} +0 -0
- /dataeval/utils/{data/datasets → datasets}/_fileio.py +0 -0
- /dataeval/utils/{data/datasets → datasets}/_mixin.py +0 -0
- /dataeval/utils/{data/datasets → datasets}/_types.py +0 -0
- {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.84.1.dist-info → dataeval-1.0.0.dist-info}/WHEEL +0 -0
@@ -4,13 +4,13 @@ __all__ = []
|
|
4
4
|
|
5
5
|
from typing import Any, Sequence, overload
|
6
6
|
|
7
|
+
from dataeval.data._images import Images
|
7
8
|
from dataeval.metrics.stats import hashstats
|
8
9
|
from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
|
9
10
|
from dataeval.outputs import DuplicatesOutput, HashStatsOutput
|
10
11
|
from dataeval.outputs._base import set_metadata
|
11
12
|
from dataeval.outputs._linters import DatasetDuplicateGroupMap, DuplicateGroup
|
12
13
|
from dataeval.typing import ArrayLike, Dataset
|
13
|
-
from dataeval.utils.data._images import Images
|
14
14
|
|
15
15
|
|
16
16
|
class Duplicates:
|
@@ -7,6 +7,7 @@ from typing import Any, Literal, Sequence, overload
|
|
7
7
|
import numpy as np
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
|
+
from dataeval.data._images import Images
|
10
11
|
from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
|
11
12
|
from dataeval.metrics.stats._imagestats import imagestats
|
12
13
|
from dataeval.outputs import DimensionStatsOutput, ImageStatsOutput, OutliersOutput, PixelStatsOutput, VisualStatsOutput
|
@@ -14,7 +15,6 @@ from dataeval.outputs._base import set_metadata
|
|
14
15
|
from dataeval.outputs._linters import IndexIssueMap, OutlierStatsOutput
|
15
16
|
from dataeval.outputs._stats import BOX_COUNT, SOURCE_INDEX
|
16
17
|
from dataeval.typing import ArrayLike, Dataset
|
17
|
-
from dataeval.utils.data._images import Images
|
18
18
|
|
19
19
|
|
20
20
|
def _get_outlier_mask(
|
dataeval/metadata/_distance.py
CHANGED
@@ -9,11 +9,11 @@ import numpy as np
|
|
9
9
|
from scipy.stats import iqr, ks_2samp
|
10
10
|
from scipy.stats import wasserstein_distance as emd
|
11
11
|
|
12
|
+
from dataeval.data import Metadata
|
12
13
|
from dataeval.metadata._utils import _compare_keys, _validate_factors_and_data
|
13
14
|
from dataeval.outputs import MetadataDistanceOutput, MetadataDistanceValues
|
14
15
|
from dataeval.outputs._base import set_metadata
|
15
16
|
from dataeval.typing import ArrayLike
|
16
|
-
from dataeval.utils.data import Metadata
|
17
17
|
|
18
18
|
|
19
19
|
class KSType(NamedTuple):
|
dataeval/metadata/_ood.py
CHANGED
@@ -9,10 +9,10 @@ from numpy.typing import NDArray
|
|
9
9
|
from sklearn.feature_selection import mutual_info_classif
|
10
10
|
|
11
11
|
from dataeval.config import get_seed
|
12
|
+
from dataeval.data import Metadata
|
12
13
|
from dataeval.metadata._utils import _compare_keys, _validate_factors_and_data
|
13
14
|
from dataeval.outputs import MostDeviatedFactorsOutput, OODOutput, OODPredictorOutput
|
14
15
|
from dataeval.outputs._base import set_metadata
|
15
|
-
from dataeval.utils.data import Metadata
|
16
16
|
|
17
17
|
|
18
18
|
def _combine_discrete_continuous(metadata: Metadata) -> tuple[list[str], NDArray[np.float64]]:
|
@@ -201,7 +201,7 @@ def find_most_deviated_factors(
|
|
201
201
|
MostDeviatedFactorsOutput([])
|
202
202
|
"""
|
203
203
|
|
204
|
-
ood_mask: NDArray[np.
|
204
|
+
ood_mask: NDArray[np.bool_] = ood.is_ood
|
205
205
|
|
206
206
|
# No metadata correlated with out of distribution data
|
207
207
|
if not any(ood_mask):
|
@@ -303,7 +303,7 @@ def find_ood_predictors(
|
|
303
303
|
OODPredictorOutput({})
|
304
304
|
"""
|
305
305
|
|
306
|
-
ood_mask: NDArray[np.
|
306
|
+
ood_mask: NDArray[np.bool_] = ood.is_ood
|
307
307
|
|
308
308
|
discrete_features_count = len(metadata.discrete_factor_names)
|
309
309
|
factors, data = _combine_discrete_continuous(metadata) # (F, ), (S, F) => F = Fd + Fc
|
@@ -320,7 +320,7 @@ def find_ood_predictors(
|
|
320
320
|
# Calculate mean, std of each factor over all samples
|
321
321
|
scaled_data = (data - np.mean(data, axis=0)) / np.std(data, axis=0, ddof=1) # (S, F)
|
322
322
|
|
323
|
-
discrete_features = np.zeros_like(factors, dtype=np.
|
323
|
+
discrete_features = np.zeros_like(factors, dtype=np.bool_)
|
324
324
|
discrete_features[:discrete_features_count] = True
|
325
325
|
|
326
326
|
mutual_info_values = (
|
@@ -9,10 +9,10 @@ import scipy as sp
|
|
9
9
|
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
|
10
10
|
|
11
11
|
from dataeval.config import EPSILON, get_seed
|
12
|
+
from dataeval.data import Metadata
|
12
13
|
from dataeval.outputs import BalanceOutput
|
13
14
|
from dataeval.outputs._base import set_metadata
|
14
15
|
from dataeval.utils._bin import get_counts
|
15
|
-
from dataeval.utils.data import Metadata
|
16
16
|
|
17
17
|
|
18
18
|
def _validate_num_neighbors(num_neighbors: int) -> int:
|
@@ -8,11 +8,11 @@ import numpy as np
|
|
8
8
|
import scipy as sp
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
|
11
|
+
from dataeval.data import Metadata
|
11
12
|
from dataeval.outputs import DiversityOutput
|
12
13
|
from dataeval.outputs._base import set_metadata
|
13
14
|
from dataeval.utils._bin import get_counts
|
14
15
|
from dataeval.utils._method import get_method
|
15
|
-
from dataeval.utils.data import Metadata
|
16
16
|
|
17
17
|
|
18
18
|
def diversity_shannon(
|
dataeval/metrics/bias/_parity.py
CHANGED
@@ -10,11 +10,11 @@ from numpy.typing import NDArray
|
|
10
10
|
from scipy.stats import chisquare
|
11
11
|
from scipy.stats.contingency import chi2_contingency, crosstab
|
12
12
|
|
13
|
+
from dataeval.data import Metadata
|
13
14
|
from dataeval.outputs import LabelParityOutput, ParityOutput
|
14
15
|
from dataeval.outputs._base import set_metadata
|
15
16
|
from dataeval.typing import ArrayLike
|
16
17
|
from dataeval.utils._array import as_numpy
|
17
|
-
from dataeval.utils.data import Metadata
|
18
18
|
|
19
19
|
|
20
20
|
def normalize_expected_dist(expected_dist: NDArray[Any], observed_dist: NDArray[Any]) -> NDArray[Any]:
|
@@ -5,10 +5,10 @@ __all__ = []
|
|
5
5
|
from collections import Counter, defaultdict
|
6
6
|
from typing import Any, Mapping, TypeVar
|
7
7
|
|
8
|
+
from dataeval.data._metadata import Metadata
|
8
9
|
from dataeval.outputs import LabelStatsOutput
|
9
10
|
from dataeval.outputs._base import set_metadata
|
10
11
|
from dataeval.typing import AnnotatedDataset
|
11
|
-
from dataeval.utils.data._metadata import Metadata
|
12
12
|
|
13
13
|
TValue = TypeVar("TValue")
|
14
14
|
|
@@ -38,7 +38,7 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
|
|
38
38
|
--------
|
39
39
|
Calculate basic :term:`statistics<Statistics>` on labels for a dataset.
|
40
40
|
|
41
|
-
>>> from dataeval.
|
41
|
+
>>> from dataeval.data import Metadata
|
42
42
|
>>> stats = labelstats(Metadata(dataset))
|
43
43
|
>>> print(stats.to_table())
|
44
44
|
Class Count: 5
|
dataeval/outputs/_bias.py
CHANGED
@@ -13,11 +13,11 @@ with contextlib.suppress(ImportError):
|
|
13
13
|
import pandas as pd
|
14
14
|
from matplotlib.figure import Figure
|
15
15
|
|
16
|
+
from dataeval.data._images import Images
|
16
17
|
from dataeval.outputs._base import Output
|
17
18
|
from dataeval.typing import ArrayLike, Dataset
|
18
19
|
from dataeval.utils._array import as_numpy, channels_first_to_last
|
19
20
|
from dataeval.utils._plot import heatmap
|
20
|
-
from dataeval.utils.data._images import Images
|
21
21
|
|
22
22
|
TData = TypeVar("TData", np.float64, NDArray[np.float64])
|
23
23
|
|
dataeval/typing.py
CHANGED
@@ -98,6 +98,22 @@ class DatasetMetadata(TypedDict, total=False):
|
|
98
98
|
index2label: NotRequired[ReadOnly[dict[int, str]]]
|
99
99
|
|
100
100
|
|
101
|
+
class ModelMetadata(TypedDict, total=False):
|
102
|
+
"""
|
103
|
+
Model metadata required for all `AnnotatedModel` classes.
|
104
|
+
|
105
|
+
Attributes
|
106
|
+
----------
|
107
|
+
id : Required[str]
|
108
|
+
A unique identifier for the model
|
109
|
+
index2label : NotRequired[dict[int, str]]
|
110
|
+
A lookup table converting label value to class name
|
111
|
+
"""
|
112
|
+
|
113
|
+
id: Required[ReadOnly[str]]
|
114
|
+
index2label: NotRequired[ReadOnly[dict[int, str]]]
|
115
|
+
|
116
|
+
|
101
117
|
@runtime_checkable
|
102
118
|
class Dataset(Generic[_T_co], Protocol):
|
103
119
|
"""
|
@@ -238,6 +254,21 @@ SegmentationDataset: TypeAlias = AnnotatedDataset[SegmentationDatum]
|
|
238
254
|
Type alias for an :class:`AnnotatedDataset` of :class:`SegmentationDatum` elements.
|
239
255
|
"""
|
240
256
|
|
257
|
+
# ========== MODEL ==========
|
258
|
+
|
259
|
+
|
260
|
+
@runtime_checkable
|
261
|
+
class AnnotatedModel(Protocol):
|
262
|
+
"""
|
263
|
+
Protocol for an annotated model.
|
264
|
+
"""
|
265
|
+
|
266
|
+
@property
|
267
|
+
def metadata(self) -> ModelMetadata: ...
|
268
|
+
|
269
|
+
|
270
|
+
# ========== TRANSFORM ==========
|
271
|
+
|
241
272
|
|
242
273
|
@runtime_checkable
|
243
274
|
class Transform(Generic[_T], Protocol):
|
dataeval/utils/__init__.py
CHANGED
@@ -4,6 +4,6 @@ in setting up data and architectures that are guaranteed to work with applicable
|
|
4
4
|
DataEval metrics.
|
5
5
|
"""
|
6
6
|
|
7
|
-
__all__ = ["data", "
|
7
|
+
__all__ = ["data", "datasets", "torch"]
|
8
8
|
|
9
|
-
from . import data,
|
9
|
+
from . import data, datasets, torch
|
dataeval/utils/data/__init__.py
CHANGED
@@ -1,26 +1,11 @@
|
|
1
|
-
"""Provides
|
1
|
+
"""Provides access to common Computer Vision datasets."""
|
2
|
+
|
3
|
+
from dataeval.utils.data import collate, metadata
|
4
|
+
from dataeval.utils.data._dataset import to_image_classification_dataset, to_object_detection_dataset
|
2
5
|
|
3
6
|
__all__ = [
|
4
7
|
"collate",
|
5
|
-
"
|
6
|
-
"Embeddings",
|
7
|
-
"Images",
|
8
|
-
"Metadata",
|
9
|
-
"Select",
|
10
|
-
"SplitDatasetOutput",
|
11
|
-
"Targets",
|
12
|
-
"split_dataset",
|
8
|
+
"metadata",
|
13
9
|
"to_image_classification_dataset",
|
14
10
|
"to_object_detection_dataset",
|
15
11
|
]
|
16
|
-
|
17
|
-
from dataeval.outputs._utils import SplitDatasetOutput
|
18
|
-
from dataeval.utils.data._dataset import to_image_classification_dataset, to_object_detection_dataset
|
19
|
-
from dataeval.utils.data._embeddings import Embeddings
|
20
|
-
from dataeval.utils.data._images import Images
|
21
|
-
from dataeval.utils.data._metadata import Metadata
|
22
|
-
from dataeval.utils.data._selection import Select
|
23
|
-
from dataeval.utils.data._split import split_dataset
|
24
|
-
from dataeval.utils.data._targets import Targets
|
25
|
-
|
26
|
-
from . import collate, datasets
|
dataeval/utils/data/collate.py
CHANGED
@@ -4,6 +4,8 @@ Collate functions used with a PyTorch DataLoader to load data from MAITE complia
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
|
+
__all__ = ["list_collate_fn", "numpy_collate_fn", "torch_collate_fn"]
|
8
|
+
|
7
9
|
from typing import Any, Iterable, Sequence, TypeVar
|
8
10
|
|
9
11
|
import numpy as np
|
@@ -0,0 +1,17 @@
|
|
1
|
+
"""Provides access to common Computer Vision datasets."""
|
2
|
+
|
3
|
+
from dataeval.utils.datasets._cifar10 import CIFAR10
|
4
|
+
from dataeval.utils.datasets._milco import MILCO
|
5
|
+
from dataeval.utils.datasets._mnist import MNIST
|
6
|
+
from dataeval.utils.datasets._ships import Ships
|
7
|
+
from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"MNIST",
|
11
|
+
"Ships",
|
12
|
+
"CIFAR10",
|
13
|
+
"MILCO",
|
14
|
+
"VOCDetection",
|
15
|
+
"VOCDetectionTorch",
|
16
|
+
"VOCSegmentation",
|
17
|
+
]
|
@@ -6,9 +6,9 @@ from abc import abstractmethod
|
|
6
6
|
from pathlib import Path
|
7
7
|
from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, NamedTuple, Sequence, TypeVar
|
8
8
|
|
9
|
-
from dataeval.utils.
|
10
|
-
from dataeval.utils.
|
11
|
-
from dataeval.utils.
|
9
|
+
from dataeval.utils.datasets._fileio import _ensure_exists
|
10
|
+
from dataeval.utils.datasets._mixin import BaseDatasetMixin
|
11
|
+
from dataeval.utils.datasets._types import (
|
12
12
|
AnnotatedDataset,
|
13
13
|
DatasetMetadata,
|
14
14
|
ImageClassificationDataset,
|
@@ -9,8 +9,8 @@ import numpy as np
|
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
from PIL import Image
|
11
11
|
|
12
|
-
from dataeval.utils.
|
13
|
-
from dataeval.utils.
|
12
|
+
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
13
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
from dataeval.typing import Transform
|
@@ -7,8 +7,8 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence
|
|
7
7
|
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
|
-
from dataeval.utils.
|
11
|
-
from dataeval.utils.
|
10
|
+
from dataeval.utils.datasets._base import BaseODDataset, DataLocation
|
11
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
12
12
|
|
13
13
|
if TYPE_CHECKING:
|
14
14
|
from dataeval.typing import Transform
|
@@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar
|
|
8
8
|
import numpy as np
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
|
11
|
-
from dataeval.utils.
|
12
|
-
from dataeval.utils.
|
11
|
+
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
12
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from dataeval.typing import Transform
|
@@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, Sequence
|
|
8
8
|
import numpy as np
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
|
11
|
-
from dataeval.utils.
|
12
|
-
from dataeval.utils.
|
11
|
+
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
12
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from dataeval.typing import Transform
|
@@ -9,7 +9,7 @@ import torch
|
|
9
9
|
from defusedxml.ElementTree import parse
|
10
10
|
from numpy.typing import NDArray
|
11
11
|
|
12
|
-
from dataeval.utils.
|
12
|
+
from dataeval.utils.datasets._base import (
|
13
13
|
BaseDataset,
|
14
14
|
BaseODDataset,
|
15
15
|
BaseSegDataset,
|
@@ -17,8 +17,8 @@ from dataeval.utils.data.datasets._base import (
|
|
17
17
|
_TArray,
|
18
18
|
_TTarget,
|
19
19
|
)
|
20
|
-
from dataeval.utils.
|
21
|
-
from dataeval.utils.
|
20
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
|
21
|
+
from dataeval.utils.datasets._types import ObjectDetectionTarget, SegmentationTarget
|
22
22
|
|
23
23
|
if TYPE_CHECKING:
|
24
24
|
from dataeval.typing import Transform
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -1,33 +1,48 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=xd1GfD7QmzBG-WN7K6BMJSzV9_UZlX5OiKICdQ5xGfU,1635
|
2
2
|
dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
|
3
3
|
dataeval/config.py,sha256=lD1YDH8HosFeRU5rQEYRBcmXMZy-csWaMlJTRZGd9iU,3582
|
4
|
+
dataeval/data/__init__.py,sha256=qNnRRiVP_sLthkkHpUrMgI_r8dQK-cC-xoGrrjQeRKc,544
|
5
|
+
dataeval/data/_embeddings.py,sha256=6Medqj_JCQt1iwZwWGSs1OeX-bHB8bg5BJqADY1N2s8,12883
|
6
|
+
dataeval/data/_images.py,sha256=WF9XJRka8ohUdyI2IKBMAy3JoJhOm1iC-8tbYl8woRM,2642
|
7
|
+
dataeval/data/_metadata.py,sha256=hNgsCEN8EyfDDX7zLKcQnsaDl-9xvvs5tUzqMjVLvI4,14457
|
8
|
+
dataeval/data/_selection.py,sha256=V61_pTFj0hSzmltA6CV5t51Znqw2dIQZ71Iu46bLm44,4486
|
9
|
+
dataeval/data/_split.py,sha256=6Jtm_i__CcPtNE3eSeBdPxc7gn7Cp-GM7g9wJWFlVus,16761
|
10
|
+
dataeval/data/_targets.py,sha256=ws5d9wRiDkIuOV7GSAKNxzgSm6AWTgb0BFroQK5nAmM,3057
|
11
|
+
dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
|
12
|
+
dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
|
13
|
+
dataeval/data/selections/_classfilter.py,sha256=rEeq959p_SLl_etS7pcM8ZxK4yzEYlYZAQ3FlcLV0R8,4330
|
14
|
+
dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
|
15
|
+
dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
|
16
|
+
dataeval/data/selections/_prioritize.py,sha256=yw51ZQk6FPvyC38M4_pS_Se2Dq0LDFcdDhfbsELzTZc,11306
|
17
|
+
dataeval/data/selections/_reverse.py,sha256=b67kNC43A5KpQOic5gifjo9HpJ7FMh4LFCrfovPiJ-M,368
|
18
|
+
dataeval/data/selections/_shuffle.py,sha256=gVz_2T4rlucq8Ytqz5jvmmZdTrZDaIv43jJbq97tLjQ,1173
|
4
19
|
dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
|
5
20
|
dataeval/detectors/drift/__init__.py,sha256=gD8aY5PotS-S2ot7iB_z_zzSOjIbQLw5znFBNj0jtHE,646
|
6
|
-
dataeval/detectors/drift/_base.py,sha256=
|
7
|
-
dataeval/detectors/drift/_cvm.py,sha256=
|
8
|
-
dataeval/detectors/drift/_ks.py,sha256=
|
9
|
-
dataeval/detectors/drift/_mmd.py,sha256=
|
21
|
+
dataeval/detectors/drift/_base.py,sha256=amGqzUAe8fU5qwM5lq1p8PCuhjGh9MHkdW1zeBF1LEE,7574
|
22
|
+
dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
|
23
|
+
dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
|
24
|
+
dataeval/detectors/drift/_mmd.py,sha256=wHUy_vUafCikrZ_WX8qQXpxFwzw07-5zVutloR6hl1k,11589
|
10
25
|
dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
|
11
26
|
dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
|
12
27
|
dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
|
13
|
-
dataeval/detectors/linters/duplicates.py,sha256=
|
14
|
-
dataeval/detectors/linters/outliers.py,sha256=
|
28
|
+
dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
|
29
|
+
dataeval/detectors/linters/outliers.py,sha256=D8A-Fov5iUrlU9xMX5Ht33FqUY8Lk5ulC6BlHbUoLwU,9048
|
15
30
|
dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
|
16
31
|
dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
|
17
32
|
dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
|
18
33
|
dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
|
19
34
|
dataeval/detectors/ood/vae.py,sha256=Fcq0-WbLhzYCgYOAJPBklHm7yuXmFJuEpBkhgwM5kiA,2291
|
20
35
|
dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
|
21
|
-
dataeval/metadata/_distance.py,sha256=
|
22
|
-
dataeval/metadata/_ood.py,sha256=
|
36
|
+
dataeval/metadata/_distance.py,sha256=T1Umju_QwBiLmn1iUbxZagzBS2VnHaDIdp6j-NpaZuk,4076
|
37
|
+
dataeval/metadata/_ood.py,sha256=lnKtKModArnUrAhH_XswEtUAhUkh1U_oNsLt1UmNP44,12748
|
23
38
|
dataeval/metadata/_utils.py,sha256=r8qBJT83RblobD5W5zyTVi6vYi51Dwkqswizdbzss-M,1169
|
24
39
|
dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
|
25
40
|
dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
|
26
|
-
dataeval/metrics/bias/_balance.py,sha256=
|
41
|
+
dataeval/metrics/bias/_balance.py,sha256=l1hTVkVwD85bP20MTthA-I5BkvbytylQkJu3Q6iTuPA,6152
|
27
42
|
dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
|
28
43
|
dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
|
29
|
-
dataeval/metrics/bias/_diversity.py,sha256=
|
30
|
-
dataeval/metrics/bias/_parity.py,sha256=
|
44
|
+
dataeval/metrics/bias/_diversity.py,sha256=B_qWVDMZfh818U0qVm8yidquB0H0XvW8N75OWVWXy2g,5814
|
45
|
+
dataeval/metrics/bias/_parity.py,sha256=ea1D-eJh6cJxQ11XD6VbDXBKecE0jJJwptGD7LQJmBw,11529
|
31
46
|
dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
|
32
47
|
dataeval/metrics/estimators/_ber.py,sha256=C30E5LiGGTAfo31zWFYDptDg0R7CTJGJ-a60YgzSkYY,5382
|
33
48
|
dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
|
@@ -39,12 +54,12 @@ dataeval/metrics/stats/_boxratiostats.py,sha256=8Kd2FTZ5PLNYZfdAjU_R385gb0Z16JY0
|
|
39
54
|
dataeval/metrics/stats/_dimensionstats.py,sha256=73mFP-Myxne0peFliwvTntc0kk4cpq0krzMvSLDSIMM,2702
|
40
55
|
dataeval/metrics/stats/_hashstats.py,sha256=gp9X_pnTT3mPH9YNrWLdn2LQPK_epJ3dQRoyOCwmKlg,4758
|
41
56
|
dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
|
42
|
-
dataeval/metrics/stats/_labelstats.py,sha256=
|
57
|
+
dataeval/metrics/stats/_labelstats.py,sha256=lz8I6eSd8tFkmQqy5cOG8hn9yxs0mP-Ic9ratFHiuoU,2813
|
43
58
|
dataeval/metrics/stats/_pixelstats.py,sha256=SfergRbjNJE4h0xqe-0c8RnKtZmEkZ9MwExdipLSGvg,3247
|
44
59
|
dataeval/metrics/stats/_visualstats.py,sha256=cq4AbF2B50Ihbzb86FphcnKQ1TSwNnP3PsnbpiPQZWw,3698
|
45
60
|
dataeval/outputs/__init__.py,sha256=ciK-RdXgtn_s7MSCUW1UXvrXltMbltqbpfe9_V7xGrI,1701
|
46
61
|
dataeval/outputs/_base.py,sha256=aZFbgybnZSQ3ws7QYRLTbDFqUfBFRVtIwX2LZfeGFUA,5703
|
47
|
-
dataeval/outputs/_bias.py,sha256=
|
62
|
+
dataeval/outputs/_bias.py,sha256=7L-d3DUWY6Vud7iX_VoQT0HG0KaV1U35gvmRApqzyB0,12401
|
48
63
|
dataeval/outputs/_drift.py,sha256=gOiu2C-ERTWiRqlP0auMYxPBGdm9HecWPqWfg7I4tZg,2015
|
49
64
|
dataeval/outputs/_estimators.py,sha256=a2oAIxxEDZ9WLGfMWH8KD-BVUS_SnULRPR-iI9hFPoQ,3047
|
50
65
|
dataeval/outputs/_linters.py,sha256=YOdjrfm8ypdRrqYOaPM9nc6wVJI3-ita3Haj7LHDNaw,6416
|
@@ -54,8 +69,8 @@ dataeval/outputs/_stats.py,sha256=c73Yc3Kkrl-MN6BGKe1V0Yr6Ix2Yp_DZZfFSp8fZMZ0,13
|
|
54
69
|
dataeval/outputs/_utils.py,sha256=HHlGC7sk416m_3Bgn075Qdblz_aPup_UOafJpB0RuXY,893
|
55
70
|
dataeval/outputs/_workflows.py,sha256=MkRD6ubI4NCBXb9v3kjXy64cUGs3G-JKkBdOpRD9XVE,10750
|
56
71
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
|
-
dataeval/typing.py,sha256=
|
58
|
-
dataeval/utils/__init__.py,sha256=
|
72
|
+
dataeval/typing.py,sha256=GDMuef-oFFukNtsiKFmsExHdNvYR_j-tQcsCwZ9reow,7198
|
73
|
+
dataeval/utils/__init__.py,sha256=hRvyUK7b3d6JBEV5u47rFcOHEcmDYqAvZQw_T5pDAWw,264
|
59
74
|
dataeval/utils/_array.py,sha256=KqAdXEMjcXYvdWdYEEoEbigwQJ4S9VYxQS3sRFeY5XY,5929
|
60
75
|
dataeval/utils/_bin.py,sha256=nylthmsC3vzLHLhlUMACvZs--h7xvAh9Pt75InaQJW8,7322
|
61
76
|
dataeval/utils/_clusterer.py,sha256=fw5x-2QN0TIbiodDKHZxRgxKHINedpPcOklzce0Rbjg,5436
|
@@ -64,34 +79,20 @@ dataeval/utils/_image.py,sha256=capzF_X5H0jy0PmTP3Hf52GFgLqrnfU6gS4tiwck9jo,1939
|
|
64
79
|
dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
|
65
80
|
dataeval/utils/_mst.py,sha256=f0vXytTUjlOS6AyL7c6PkXmaHuuGUK-vMLpq-5xMgxk,2183
|
66
81
|
dataeval/utils/_plot.py,sha256=mTRQNbJsA42QMiOwZbJaH8sNYgP996QFDEGVVE9HSgY,7076
|
67
|
-
dataeval/utils/data/__init__.py,sha256=
|
82
|
+
dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
|
68
83
|
dataeval/utils/data/_dataset.py,sha256=MHY582yRm4FxQkkLWUhKZBb7ZyvWypM6ldUG89vd3uE,7936
|
69
|
-
dataeval/utils/data/
|
70
|
-
dataeval/utils/data/
|
71
|
-
dataeval/utils/
|
72
|
-
dataeval/utils/
|
73
|
-
dataeval/utils/
|
74
|
-
dataeval/utils/
|
75
|
-
dataeval/utils/
|
76
|
-
dataeval/utils/
|
77
|
-
dataeval/utils/
|
78
|
-
dataeval/utils/
|
79
|
-
dataeval/utils/
|
80
|
-
dataeval/utils/
|
81
|
-
dataeval/utils/data/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
|
82
|
-
dataeval/utils/data/datasets/_mnist.py,sha256=kNDJw0oyqa6QgU1y9lg-3AzStavK1BB8iHnDOdv9nyE,8112
|
83
|
-
dataeval/utils/data/datasets/_ships.py,sha256=rsyIoRAIk40liFgaEb2dg0lYB7__bAGd9zh9ouzjFKg,4880
|
84
|
-
dataeval/utils/data/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
|
85
|
-
dataeval/utils/data/datasets/_voc.py,sha256=QUtpbh2EpiBoicsmOo-YIfwRwPXyHj-zB2hFn7tlz0Y,15580
|
86
|
-
dataeval/utils/data/selections/__init__.py,sha256=iUbMZRDuBXwY3SNAtZTdCVu7SI4zbCyaL6ItXnnq1yI,655
|
87
|
-
dataeval/utils/data/selections/_classbalance.py,sha256=hHq9frdwzFLCUmfeJq977Sot_SXhuGANlSsetokhRDc,1465
|
88
|
-
dataeval/utils/data/selections/_classfilter.py,sha256=xdR5uX7W5Yivf-mE_CikbRi2fGrZLFrPYun3TeQHTA0,1267
|
89
|
-
dataeval/utils/data/selections/_indices.py,sha256=QdLgXN7GABCvGPYe28PV1RAc_RSP_nZOyCvEpKRBdWg,636
|
90
|
-
dataeval/utils/data/selections/_limit.py,sha256=ECvHRsp7OF4LZw2tE4sGqqJ085kjC-hd2c7QDMfvXr8,518
|
91
|
-
dataeval/utils/data/selections/_prioritize.py,sha256=uRQjeQiAc-vvwHMH4CQtXTGJCfjj_h5dgGlhQYFMz1c,11318
|
92
|
-
dataeval/utils/data/selections/_reverse.py,sha256=6SWpELC9Wgx-kPqzhDrPNn4NKU6FqDJveLrxV4D2Ypk,374
|
93
|
-
dataeval/utils/data/selections/_shuffle.py,sha256=_jwms0qcwrknf2Fx84cCXyNOJyhE_V8rcnDOTDn1S2A,1179
|
94
|
-
dataeval/utils/metadata.py,sha256=1XeGYj_e97-nJ_IrWEHPhWICmouYU5qbXWbp7uhZrIE,14171
|
84
|
+
dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
|
85
|
+
dataeval/utils/data/metadata.py,sha256=1XeGYj_e97-nJ_IrWEHPhWICmouYU5qbXWbp7uhZrIE,14171
|
86
|
+
dataeval/utils/datasets/__init__.py,sha256=Jfe7XI_9U5S4wuI_2QCoeuWNOxz4j0nAQvxc5wG5mWY,486
|
87
|
+
dataeval/utils/datasets/_base.py,sha256=TpmgPzF3EShCLAF5S4Zf9lFN78q17bTZF6AUE1qKdlk,8857
|
88
|
+
dataeval/utils/datasets/_cifar10.py,sha256=oSX5JEzbBM4zGC9kC7-hVTOglms3rYaUuYiA00_DUJ4,5439
|
89
|
+
dataeval/utils/datasets/_fileio.py,sha256=SixIk5nIlIwJdX9zjNXS10vHA3hL8aaYbqHsDg1xSpY,6447
|
90
|
+
dataeval/utils/datasets/_milco.py,sha256=BF2XvyzuOop1mg5pFZcRfYmZcezlbpZWHyd_TtEHFF4,7573
|
91
|
+
dataeval/utils/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
|
92
|
+
dataeval/utils/datasets/_mnist.py,sha256=4WOkQTORYMs6KEeyyJgChTnH03797y4ezgaZtYqplh4,8102
|
93
|
+
dataeval/utils/datasets/_ships.py,sha256=RMdX2KlnXJYOTzBb6euA5TAqxs-S8b56pAGiyQhNMuo,4870
|
94
|
+
dataeval/utils/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
|
95
|
+
dataeval/utils/datasets/_voc.py,sha256=kif6ms_romK6VElP4pf2SK4cJ5dEHDOkxSaSaeP3c5k,15565
|
95
96
|
dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
|
96
97
|
dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
|
97
98
|
dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
|
@@ -100,7 +101,7 @@ dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQ
|
|
100
101
|
dataeval/utils/torch/trainer.py,sha256=iUotX4OdirH8-ZtjdpU8gbJavkYW9YY9qpA2mAlFy1Y,5520
|
101
102
|
dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
|
102
103
|
dataeval/workflows/sufficiency.py,sha256=mjKmfRrAjShLUFIARv5o8yT5fnFvDsS5Qu6ujIPUgQg,8497
|
103
|
-
dataeval-0.
|
104
|
-
dataeval-0.
|
105
|
-
dataeval-0.
|
106
|
-
dataeval-0.
|
104
|
+
dataeval-1.0.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
105
|
+
dataeval-1.0.0.dist-info/METADATA,sha256=ma_TquWQQl0QETiK4-wH1jfAe2my33Cl37GswNe0ZM8,5307
|
106
|
+
dataeval-1.0.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
107
|
+
dataeval-1.0.0.dist-info/RECORD,,
|
@@ -1,17 +0,0 @@
|
|
1
|
-
"""Provides access to common Computer Vision datasets."""
|
2
|
-
|
3
|
-
from dataeval.utils.data.datasets._cifar10 import CIFAR10
|
4
|
-
from dataeval.utils.data.datasets._milco import MILCO
|
5
|
-
from dataeval.utils.data.datasets._mnist import MNIST
|
6
|
-
from dataeval.utils.data.datasets._ships import Ships
|
7
|
-
from dataeval.utils.data.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
|
8
|
-
|
9
|
-
__all__ = [
|
10
|
-
"MNIST",
|
11
|
-
"Ships",
|
12
|
-
"CIFAR10",
|
13
|
-
"MILCO",
|
14
|
-
"VOCDetection",
|
15
|
-
"VOCDetectionTorch",
|
16
|
-
"VOCSegmentation",
|
17
|
-
]
|
@@ -1,19 +0,0 @@
|
|
1
|
-
"""Provides selection classes for selecting subsets of Computer Vision datasets."""
|
2
|
-
|
3
|
-
__all__ = [
|
4
|
-
"ClassBalance",
|
5
|
-
"ClassFilter",
|
6
|
-
"Indices",
|
7
|
-
"Limit",
|
8
|
-
"Prioritize",
|
9
|
-
"Reverse",
|
10
|
-
"Shuffle",
|
11
|
-
]
|
12
|
-
|
13
|
-
from dataeval.utils.data.selections._classbalance import ClassBalance
|
14
|
-
from dataeval.utils.data.selections._classfilter import ClassFilter
|
15
|
-
from dataeval.utils.data.selections._indices import Indices
|
16
|
-
from dataeval.utils.data.selections._limit import Limit
|
17
|
-
from dataeval.utils.data.selections._prioritize import Prioritize
|
18
|
-
from dataeval.utils.data.selections._reverse import Reverse
|
19
|
-
from dataeval.utils.data.selections._shuffle import Shuffle
|
@@ -1,44 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
__all__ = []
|
4
|
-
|
5
|
-
from typing import Sequence
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
|
9
|
-
from dataeval.typing import Array, ImageClassificationDatum
|
10
|
-
from dataeval.utils._array import as_numpy
|
11
|
-
from dataeval.utils.data._selection import Select, Selection, SelectionStage
|
12
|
-
|
13
|
-
|
14
|
-
class ClassFilter(Selection[ImageClassificationDatum]):
|
15
|
-
"""
|
16
|
-
Filter the dataset by class.
|
17
|
-
|
18
|
-
Parameters
|
19
|
-
----------
|
20
|
-
classes : Sequence[int]
|
21
|
-
The classes to filter by.
|
22
|
-
"""
|
23
|
-
|
24
|
-
stage = SelectionStage.FILTER
|
25
|
-
|
26
|
-
def __init__(self, classes: Sequence[int]) -> None:
|
27
|
-
self.classes = classes
|
28
|
-
|
29
|
-
def __call__(self, dataset: Select[ImageClassificationDatum]) -> None:
|
30
|
-
if not self.classes:
|
31
|
-
return
|
32
|
-
|
33
|
-
selection = []
|
34
|
-
for idx in dataset._selection:
|
35
|
-
target = dataset._dataset[idx][1]
|
36
|
-
if isinstance(target, Array):
|
37
|
-
label = int(np.argmax(as_numpy(target)))
|
38
|
-
else:
|
39
|
-
# ObjectDetectionTarget and SegmentationTarget not supported yet
|
40
|
-
raise TypeError("ClassFilter only supports classification targets as an array of confidence scores.")
|
41
|
-
if label in self.classes:
|
42
|
-
selection.append(idx)
|
43
|
-
|
44
|
-
dataset._selection = selection
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|