dataeval 0.86.7__py3-none-any.whl → 0.86.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dataeval/__init__.py CHANGED
@@ -7,12 +7,19 @@ shifts that impact performance of deployed models.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
11
- __version__ = "0.86.7"
10
+ try:
11
+ from ._version import __version__
12
+ except ImportError:
13
+ __version__ = "unknown"
14
+
15
+ # Strongly type for pyright
16
+ __version__ = str(__version__)
17
+
18
+ __all__ = ["__version__", "config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
12
19
 
13
20
  import logging
14
21
 
15
- from dataeval import config, detectors, metrics, typing, utils, workflows
22
+ from . import config, detectors, metrics, typing, utils, workflows
16
23
 
17
24
  logging.getLogger(__name__).addHandler(logging.NullHandler())
18
25
 
dataeval/_version.py ADDED
@@ -0,0 +1,21 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '0.86.8'
21
+ __version_tuple__ = version_tuple = (0, 86, 8)
dataeval/config.py CHANGED
@@ -77,7 +77,13 @@ def get_device(override: DeviceLike | None = None) -> torch.device:
77
77
  """
78
78
  if override is None:
79
79
  global _device
80
- return torch.get_default_device() if _device is None else _device
80
+ return (
81
+ torch.get_default_device()
82
+ if hasattr(torch, "get_default_device")
83
+ else torch.device("cpu")
84
+ if _device is None
85
+ else _device
86
+ )
81
87
  return _todevice(override)
82
88
 
83
89
 
@@ -1,16 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
4
-
5
3
  import numpy as np
6
4
  import pandas as pd
7
5
  from numpy.typing import ArrayLike
8
6
 
9
- if TYPE_CHECKING:
10
- from typing import Self
11
- else:
12
- from typing_extensions import Self
13
-
14
7
  from dataeval.detectors.drift._nml._chunk import CountBasedChunker, SizeBasedChunker
15
8
  from dataeval.detectors.drift._nml._domainclassifier import DomainClassifierCalculator
16
9
  from dataeval.detectors.drift._nml._thresholds import ConstantThreshold
@@ -52,7 +45,7 @@ class DriftMVDC:
52
45
  threshold=ConstantThreshold(lower=self.threshold[0], upper=self.threshold[1]),
53
46
  )
54
47
 
55
- def fit(self, x_ref: ArrayLike) -> Self:
48
+ def fit(self, x_ref: ArrayLike) -> DriftMVDC:
56
49
  """
57
50
  Fit the domain classifier on the training dataframe
58
51
 
@@ -63,7 +56,7 @@ class DriftMVDC:
63
56
 
64
57
  Returns
65
58
  -------
66
- Self
59
+ DriftMVDC
67
60
 
68
61
  """
69
62
  # for 1D input, assume that is 1 sample: dim[1,n_features]
@@ -46,10 +46,10 @@ class Chunk(ABC):
46
46
  return self.data.shape[0]
47
47
 
48
48
  @abstractmethod
49
- def __add__(self, other: Self) -> Self: ...
49
+ def __add__(self, other: Any) -> Any: ...
50
50
 
51
51
  @abstractmethod
52
- def __lt__(self, other: Self) -> bool: ...
52
+ def __lt__(self, other: Any) -> bool: ...
53
53
 
54
54
  @abstractmethod
55
55
  def dict(self) -> dict[str, Any]: ...
@@ -65,7 +65,7 @@ class OOD_AE(OODBase):
65
65
  self,
66
66
  x_ref: ArrayLike,
67
67
  threshold_perc: float,
68
- loss_fn: Callable[..., torch.nn.Module] | None = None,
68
+ loss_fn: Callable[..., torch.Tensor] | None = None,
69
69
  optimizer: torch.optim.Optimizer | None = None,
70
70
  epochs: int = 20,
71
71
  batch_size: int = 64,
@@ -22,7 +22,7 @@ from dataeval.utils.torch._gmm import GaussianMixtureModelParams, gmm_params
22
22
  from dataeval.utils.torch._internal import trainer
23
23
 
24
24
 
25
- class OODBase(OODBaseMixin[torch.nn.Module], OODFitMixin[Callable[..., torch.nn.Module], torch.optim.Optimizer]):
25
+ class OODBase(OODBaseMixin[torch.nn.Module], OODFitMixin[Callable[..., torch.Tensor], torch.optim.Optimizer]):
26
26
  def __init__(self, model: torch.nn.Module, device: DeviceLike | None = None) -> None:
27
27
  self.device: torch.device = get_device(device)
28
28
  super().__init__(model)
@@ -31,7 +31,7 @@ class OODBase(OODBaseMixin[torch.nn.Module], OODFitMixin[Callable[..., torch.nn.
31
31
  self,
32
32
  x_ref: ArrayLike,
33
33
  threshold_perc: float,
34
- loss_fn: Callable[..., torch.nn.Module] | None,
34
+ loss_fn: Callable[..., torch.Tensor] | None,
35
35
  optimizer: torch.optim.Optimizer | None,
36
36
  epochs: int,
37
37
  batch_size: int,
@@ -82,7 +82,7 @@ class OODBaseGMM(OODBase, OODGMMMixin[GaussianMixtureModelParams]):
82
82
  self,
83
83
  x_ref: ArrayLike,
84
84
  threshold_perc: float,
85
- loss_fn: Callable[..., torch.nn.Module] | None,
85
+ loss_fn: Callable[..., torch.Tensor] | None,
86
86
  optimizer: torch.optim.Optimizer | None,
87
87
  epochs: int,
88
88
  batch_size: int,
@@ -9,11 +9,11 @@ import numpy as np
9
9
 
10
10
  from dataeval.config import EPSILON
11
11
  from dataeval.outputs import CompletenessOutput
12
- from dataeval.typing import ArrayLike
12
+ from dataeval.typing import Array
13
13
  from dataeval.utils._array import ensure_embeddings
14
14
 
15
15
 
16
- def completeness(embeddings: ArrayLike, quantiles: int) -> CompletenessOutput:
16
+ def completeness(embeddings: Array, quantiles: int) -> CompletenessOutput:
17
17
  """
18
18
  Calculate the fraction of boxes in a grid defined by quantiles that
19
19
  contain at least one data point.
@@ -21,7 +21,7 @@ def completeness(embeddings: ArrayLike, quantiles: int) -> CompletenessOutput:
21
21
 
22
22
  Parameters
23
23
  ----------
24
- embeddings : ArrayLike
24
+ embeddings : Array
25
25
  Embedded dataset (or other low-dimensional data) (nxp)
26
26
  quantiles : int
27
27
  number of quantile values to use for partitioning each dimension
@@ -10,13 +10,13 @@ from scipy.spatial.distance import pdist, squareform
10
10
 
11
11
  from dataeval.outputs import CoverageOutput
12
12
  from dataeval.outputs._base import set_metadata
13
- from dataeval.typing import ArrayLike
13
+ from dataeval.typing import Array
14
14
  from dataeval.utils._array import ensure_embeddings, flatten
15
15
 
16
16
 
17
17
  @set_metadata
18
18
  def coverage(
19
- embeddings: ArrayLike,
19
+ embeddings: Array,
20
20
  radius_type: Literal["adaptive", "naive"] = "adaptive",
21
21
  num_observations: int = 20,
22
22
  percent: float = 0.01,
@@ -271,7 +271,7 @@ def parity(metadata: Metadata) -> ParityOutput:
271
271
  # because scipy.stats.chi2_contingency fails when there are rows containing only zeros.
272
272
  contingency_matrix = contingency_matrix[np.any(contingency_matrix, axis=1)]
273
273
 
274
- chi_scores[i], p_values[i] = chi2_contingency(contingency_matrix)[:2]
274
+ chi_scores[i], p_values[i] = chi2_contingency(contingency_matrix)[:2] # type: ignore
275
275
 
276
276
  if insufficient_data:
277
277
  warnings.warn(
@@ -22,7 +22,7 @@ from scipy.stats import mode
22
22
  from dataeval.config import EPSILON
23
23
  from dataeval.outputs import BEROutput
24
24
  from dataeval.outputs._base import set_metadata
25
- from dataeval.typing import ArrayLike
25
+ from dataeval.typing import Array
26
26
  from dataeval.utils._array import as_numpy, ensure_embeddings
27
27
  from dataeval.utils._method import get_method
28
28
  from dataeval.utils._mst import compute_neighbors, minimum_spanning_tree
@@ -105,7 +105,7 @@ _BER_FN_MAP = {"KNN": ber_knn, "MST": ber_mst}
105
105
 
106
106
 
107
107
  @set_metadata
108
- def ber(embeddings: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
108
+ def ber(embeddings: Array, labels: Array, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
109
109
  """
110
110
  An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` \
111
111
  using FR or KNN test statistic basis.
@@ -14,7 +14,7 @@ from numpy.typing import NDArray
14
14
 
15
15
  from dataeval.outputs import DivergenceOutput
16
16
  from dataeval.outputs._base import set_metadata
17
- from dataeval.typing import ArrayLike
17
+ from dataeval.typing import Array
18
18
  from dataeval.utils._array import ensure_embeddings
19
19
  from dataeval.utils._method import get_method
20
20
  from dataeval.utils._mst import compute_neighbors, minimum_spanning_tree
@@ -65,7 +65,7 @@ _DIVERGENCE_FN_MAP = {"FNN": divergence_fnn, "MST": divergence_mst}
65
65
 
66
66
 
67
67
  @set_metadata
68
- def divergence(emb_a: ArrayLike, emb_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
68
+ def divergence(emb_a: Array, emb_b: Array, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
69
69
  """
70
70
  Calculates the :term:`divergence` and any errors between the datasets.
71
71
 
@@ -47,11 +47,11 @@ class ClustererOutput(Output):
47
47
  The strength of the data point belonging to the assigned cluster
48
48
  """
49
49
 
50
- clusters: NDArray[np.int_]
51
- mst: NDArray[np.double]
52
- linkage_tree: NDArray[np.double]
53
- condensed_tree: NDArray[np.double]
54
- membership_strengths: NDArray[np.double]
50
+ clusters: NDArray[np.intp]
51
+ mst: NDArray[np.float32]
52
+ linkage_tree: NDArray[np.float32]
53
+ condensed_tree: NDArray[np.float32]
54
+ membership_strengths: NDArray[np.float32]
55
55
 
56
56
  def find_outliers(self) -> NDArray[np.int_]:
57
57
  """
@@ -77,7 +77,7 @@ class ClustererOutput(Output):
77
77
  # Delay load numba compiled functions
78
78
  from dataeval.utils._clusterer import compare_links_to_cluster_std, sorted_union_find
79
79
 
80
- exact_indices, near_indices = compare_links_to_cluster_std(self.mst, self.clusters)
80
+ exact_indices, near_indices = compare_links_to_cluster_std(self.mst, self.clusters) # type: ignore
81
81
  exact_dupes = sorted_union_find(exact_indices)
82
82
  near_dupes = sorted_union_find(near_indices)
83
83
 
dataeval/utils/_array.py CHANGED
@@ -19,7 +19,7 @@ _logger = logging.getLogger(__name__)
19
19
 
20
20
  _MODULE_CACHE = {}
21
21
 
22
- T = TypeVar("T", ArrayLike, np.ndarray, torch.Tensor)
22
+ T = TypeVar("T", Array, np.ndarray, torch.Tensor)
23
23
  _np_dtype = TypeVar("_np_dtype", bound=np.generic)
24
24
 
25
25
 
@@ -73,6 +73,19 @@ def to_numpy_iter(iterable: Iterable[ArrayLike]) -> Iterator[NDArray[Any]]:
73
73
  yield to_numpy(array)
74
74
 
75
75
 
76
+ @overload
77
+ def rescale_array(array: NDArray[_np_dtype]) -> NDArray[_np_dtype]: ...
78
+ @overload
79
+ def rescale_array(array: torch.Tensor) -> torch.Tensor: ...
80
+ def rescale_array(array: Array | NDArray[_np_dtype] | torch.Tensor) -> Array | NDArray[_np_dtype] | torch.Tensor:
81
+ """Rescale an array to the range [0, 1]"""
82
+ if isinstance(array, (np.ndarray, torch.Tensor)):
83
+ arr_min = array.min()
84
+ arr_max = array.max()
85
+ return (array - arr_min) / (arr_max - arr_min)
86
+ raise TypeError(f"Unsupported type: {type(array)}")
87
+
88
+
76
89
  @overload
77
90
  def ensure_embeddings(
78
91
  embeddings: T,
@@ -137,14 +150,12 @@ def ensure_embeddings(
137
150
  if arr.ndim != 2:
138
151
  raise ValueError(f"Expected a 2D array, but got a {arr.ndim}D array.")
139
152
 
140
- if unit_interval:
141
- arr_min, arr_max = arr.min(), arr.max()
142
- if arr_min < 0 or arr_max > 1:
143
- if unit_interval == "force":
144
- warnings.warn("Embeddings are not unit interval [0, 1]. Forcing to unit interval.")
145
- arr = (arr - arr_min) / (arr_max - arr_min)
146
- else:
147
- raise ValueError("Embeddings must be unit interval [0, 1].")
153
+ if unit_interval and (arr.min() < 0 or arr.max() > 1):
154
+ if unit_interval == "force":
155
+ warnings.warn("Embeddings are not unit interval [0, 1]. Forcing to unit interval.")
156
+ arr = rescale_array(arr)
157
+ else:
158
+ raise ValueError("Embeddings must be unit interval [0, 1].")
148
159
 
149
160
  if dtype is None:
150
161
  return embeddings
@@ -69,12 +69,12 @@ def compare_links_to_cluster_std(
69
69
  @dataclass
70
70
  class ClusterData:
71
71
  clusters: NDArray[np.intp]
72
- mst: NDArray[np.double]
73
- linkage_tree: NDArray[np.double]
72
+ mst: NDArray[np.float32]
73
+ linkage_tree: NDArray[np.float32]
74
74
  condensed_tree: CondensedTree
75
- membership_strengths: NDArray[np.double]
75
+ membership_strengths: NDArray[np.float32]
76
76
  k_neighbors: NDArray[np.int32]
77
- k_distances: NDArray[np.double]
77
+ k_distances: NDArray[np.float32]
78
78
 
79
79
 
80
80
  def cluster(data: ArrayLike) -> ClusterData:
@@ -95,9 +95,9 @@ def cluster(data: ArrayLike) -> ClusterData:
95
95
 
96
96
  max_neighbors = min(25, num_samples - 1)
97
97
  kneighbors, kdistances = calculate_neighbor_distances(x, max_neighbors)
98
- unsorted_mst: NDArray[np.double] = minimum_spanning_tree(x, kneighbors, kdistances)
99
- mst: NDArray[np.double] = unsorted_mst[np.argsort(unsorted_mst.T[2])]
100
- linkage_tree: NDArray[np.double] = mst_to_linkage_tree(mst)
98
+ unsorted_mst: NDArray[np.float32] = minimum_spanning_tree(x, kneighbors, kdistances)
99
+ mst: NDArray[np.float32] = unsorted_mst[np.argsort(unsorted_mst.T[2])]
100
+ linkage_tree: NDArray[np.float32] = mst_to_linkage_tree(mst).astype(np.float32)
101
101
  condensed_tree: CondensedTree = condense_tree(linkage_tree, min_cluster_size, None)
102
102
 
103
103
  cluster_tree = cluster_tree_from_condensed_tree(condensed_tree)
@@ -65,7 +65,7 @@ def trainer(
65
65
  model: torch.nn.Module,
66
66
  x_train: NDArray[Any],
67
67
  y_train: NDArray[Any] | None,
68
- loss_fn: Callable[..., torch.Tensor | torch.nn.Module] | None,
68
+ loss_fn: Callable[..., torch.Tensor] | None,
69
69
  optimizer: torch.optim.Optimizer | None,
70
70
  preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None,
71
71
  epochs: int,
@@ -117,7 +117,7 @@ def trainer(
117
117
  model = model.to(device)
118
118
 
119
119
  # iterate over epochs
120
- loss = torch.nan
120
+ loss = torch.scalar_tensor(torch.nan)
121
121
  disable_tqdm = not verbose
122
122
  for epoch in (pbar := tqdm(range(epochs), disable=disable_tqdm)):
123
123
  epoch_loss = loss
@@ -133,7 +133,7 @@ def trainer(
133
133
  y_hat = model(x)
134
134
  y = x if y is None else y
135
135
 
136
- loss = loss_fn(y, *y_hat) if isinstance(y_hat, tuple) else loss_fn(y, y_hat) # type: ignore
136
+ loss = loss_fn(y, *y_hat) if isinstance(y_hat, tuple) else loss_fn(y, y_hat)
137
137
 
138
138
  optimizer.zero_grad()
139
139
  loss.backward()
@@ -172,7 +172,7 @@ class AETrainer:
172
172
  for batch in dl:
173
173
  imgs = get_images_from_batch(batch)
174
174
  imgs = imgs.to(self.device)
175
- embeddings = encode_func(imgs).to("cpu")
175
+ embeddings = encode_func(imgs).to("cpu") # type: ignore
176
176
  encodings = torch.vstack((encodings, embeddings)) if len(encodings) else embeddings
177
177
 
178
178
  return encodings
@@ -1,45 +1,52 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 0.86.7
3
+ Version: 0.86.8
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
- Home-page: https://dataeval.ai/
6
- License: MIT
7
- Author: Andrew Weng
8
- Author-email: andrew.weng@ariacoustics.com
9
- Maintainer: ARiA
10
- Maintainer-email: dataeval@ariacoustics.com
11
- Requires-Python: >=3.9,<3.13
5
+ Project-URL: Homepage, https://dataeval.ai/
6
+ Project-URL: Repository, https://github.com/aria-ml/dataeval/
7
+ Project-URL: Documentation, https://dataeval.readthedocs.io/
8
+ Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
9
+ Maintainer-email: ARiA <dataeval@ariacoustics.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE.txt
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Science/Research
14
14
  Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Operating System :: OS Independent
16
- Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
17
  Classifier: Programming Language :: Python :: 3.9
18
18
  Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Programming Language :: Python :: 3 :: Only
22
21
  Classifier: Topic :: Scientific/Engineering
22
+ Requires-Python: <3.13,>=3.9
23
+ Requires-Dist: defusedxml>=0.7.1
24
+ Requires-Dist: fast-hdbscan==0.2.0
25
+ Requires-Dist: lightgbm>=4
26
+ Requires-Dist: numba>=0.59.1
27
+ Requires-Dist: numpy>=1.24.2
28
+ Requires-Dist: pandas>=2.0
29
+ Requires-Dist: pillow>=10.3.0
30
+ Requires-Dist: polars>=1.0.0
31
+ Requires-Dist: requests>=2.32.3
32
+ Requires-Dist: scikit-learn>=1.5.0
33
+ Requires-Dist: scipy>=1.10
34
+ Requires-Dist: torch>=2.2.0
35
+ Requires-Dist: torchvision>=0.17.0
36
+ Requires-Dist: tqdm>=4.66
37
+ Requires-Dist: typing-extensions>=4.12; python_version ~= '3.9'
38
+ Requires-Dist: xxhash>=3.3
23
39
  Provides-Extra: all
24
- Requires-Dist: defusedxml (>=0.7.1)
25
- Requires-Dist: fast_hdbscan (==0.2.0)
26
- Requires-Dist: lightgbm (>=4)
27
- Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
28
- Requires-Dist: numba (>=0.59.1)
29
- Requires-Dist: numpy (>=1.24.2)
30
- Requires-Dist: pandas (>=2.0)
31
- Requires-Dist: pillow (>=10.3.0)
32
- Requires-Dist: polars (>=1.0.0)
33
- Requires-Dist: requests
34
- Requires-Dist: scikit-learn (>=1.5.0)
35
- Requires-Dist: scipy (>=1.10)
36
- Requires-Dist: torch (>=2.2.0)
37
- Requires-Dist: torchvision (>=0.17.0)
38
- Requires-Dist: tqdm
39
- Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
40
- Requires-Dist: xxhash (>=3.3)
41
- Project-URL: Documentation, https://dataeval.readthedocs.io/
42
- Project-URL: Repository, https://github.com/aria-ml/dataeval/
40
+ Requires-Dist: matplotlib>=3.7.1; extra == 'all'
41
+ Provides-Extra: cpu
42
+ Requires-Dist: torch>=2.2.0; extra == 'cpu'
43
+ Requires-Dist: torchvision>=0.17.0; extra == 'cpu'
44
+ Provides-Extra: cu118
45
+ Requires-Dist: torch>=2.2.0; extra == 'cu118'
46
+ Requires-Dist: torchvision>=0.17.0; extra == 'cu118'
47
+ Provides-Extra: cu124
48
+ Requires-Dist: torch>=2.2.0; extra == 'cu124'
49
+ Requires-Dist: torchvision>=0.17.0; extra == 'cu124'
43
50
  Description-Content-Type: text/markdown
44
51
 
45
52
  # DataEval
@@ -72,26 +79,28 @@ estimation, bias detection, and dataset linting.
72
79
  <!-- end needs -->
73
80
 
74
81
  <!-- start JATIC interop -->
82
+
75
83
  DataEval is easy to install, supports a wide range of Python versions, and is
76
84
  compatible with many of the most popular packages in the scientific and T&E
77
85
  communities.
78
86
 
79
87
  DataEval also has native interoperability between JATIC's suite of tools when
80
88
  using MAITE-compliant datasets and models.
89
+
81
90
  <!-- end JATIC interop -->
82
91
 
83
92
  ## Getting Started
84
93
 
85
94
  **Python versions:** 3.9 - 3.12
86
95
 
87
- **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*
96
+ **Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
88
97
 
89
98
  Choose your preferred method of installation below or follow our
90
99
  [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
91
100
 
92
- * [Installing with pip](#installing-with-pip)
93
- * [Installing with conda/mamba](#installing-with-conda)
94
- * [Installing from GitHub](#installing-from-github)
101
+ - [Installing with pip](#installing-with-pip)
102
+ - [Installing with conda/mamba](#installing-with-conda)
103
+ - [Installing from GitHub](#installing-from-github)
95
104
 
96
105
  ### **Installing with pip**
97
106
 
@@ -105,7 +114,7 @@ pip install dataeval[all]
105
114
  ### **Installing with conda**
106
115
 
107
116
  DataEval can be installed in a Conda/Mamba environment using the provided
108
- `environment.yaml` file. As some dependencies are installed from the `pytorch`
117
+ `environment.yaml` file. As some dependencies are installed from the `pytorch`
109
118
  channel, the channel is specified in the below example.
110
119
 
111
120
  ```bash
@@ -115,12 +124,10 @@ micromamba create -f environment\environment.yaml -c pytorch
115
124
  ### **Installing from GitHub**
116
125
 
117
126
  To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
118
- download larger, binary source files and `poetry` for project dependency
119
- management.
127
+ download larger, binary source files.
120
128
 
121
129
  ```bash
122
130
  sudo apt-get install git-lfs
123
- pip install poetry
124
131
  ```
125
132
 
126
133
  Pull the source down and change to the DataEval project directory.
@@ -130,26 +137,40 @@ git clone https://github.com/aria-ml/dataeval.git
130
137
  cd dataeval
131
138
  ```
132
139
 
133
- Install DataEval with optional dependencies for development.
140
+ #### **Using Poetry**
141
+
142
+ Install DataEval with all extras.
134
143
 
135
144
  ```bash
136
- poetry install --all-extras --with dev
145
+ poetry install --extras=all
137
146
  ```
138
147
 
139
- Now that DataEval is installed, you can run commands in the poetry virtual
140
- environment by prefixing shell commands with `poetry run`, or activate the
141
- virtual environment directly in the shell.
148
+ Enable Poetry's virtual environment.
142
149
 
143
150
  ```bash
144
- poetry shell
151
+ poetry env activate
152
+ ```
153
+
154
+ #### **Using uv**
155
+
156
+ Install DataEval with all extras and dependencies for development.
157
+
158
+ ```bash
159
+ uv sync --extra=all
160
+ ```
161
+
162
+ Enable uv's virtual environment.
163
+
164
+ ```bash
165
+ source .venv/bin/activate
145
166
  ```
146
167
 
147
168
  ## Contact Us
148
169
 
149
170
  If you have any questions, feel free to reach out to the people below:
150
171
 
151
- * **POC**: Scott Swan @scott.swan
152
- * **DPOC**: Andrew Weng @aweng
172
+ - **POC**: Scott Swan @scott.swan
173
+ - **DPOC**: Andrew Weng @aweng
153
174
 
154
175
  ## Acknowledgement
155
176
 
@@ -164,4 +185,3 @@ interpreted as necessarily representing the official policies or endorsements,
164
185
  either expressed or implied, of the U.S. Government.
165
186
 
166
187
  <!-- end acknowledgement -->
167
-
@@ -1,6 +1,9 @@
1
- dataeval/__init__.py,sha256=P6WvVjHlE2nH57bXBR4A9ez6R32OQGm9bshYrxRKwFw,1636
1
+ dataeval/__init__.py,sha256=dEDltdHOnbk4-XAbQwJLOZtCbRLZsDMnptWRwbF2r54,1773
2
2
  dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
3
- dataeval/config.py,sha256=bHa8np4FCtLLv8_xlfdDC4lb1InJ_kT0vXDO5P42rvk,4082
3
+ dataeval/_version.py,sha256=IPUOExUy8nF4kYGtCPV5bg6_IYDRLVOKnFJcNllcO1M,513
4
+ dataeval/config.py,sha256=g3Np0Q3J5Rzij6Gsz7tJh7eOxgwNPf6NsFYmAR8Atfs,4219
5
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ dataeval/typing.py,sha256=W8rqFFkAqE5a5ar3MmB-O5gcMJqvoDKXC8Y0ggBqAKo,7216
4
7
  dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
5
8
  dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
6
9
  dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
@@ -21,21 +24,21 @@ dataeval/detectors/drift/_base.py,sha256=6aNF1LzG3w1sNUrmSBbsvuN5IkQnoRikRacqobY
21
24
  dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
22
25
  dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
23
26
  dataeval/detectors/drift/_mmd.py,sha256=uw8axM6dWxTBrCaXwkbldIDcdhe4hmim9yrsbuOwA-0,11523
24
- dataeval/detectors/drift/_mvdc.py,sha256=ABxGut6KzxF_oM-Hs87WARCR0692dhPVdZNoGGwJaa4,3058
27
+ dataeval/detectors/drift/_mvdc.py,sha256=WMN6aDOWCh1q1MtdRXFIZlFcfnVi4XgBHsS0A6L5UuY,2942
28
+ dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
29
+ dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
25
30
  dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
26
31
  dataeval/detectors/drift/_nml/_base.py,sha256=o34LcCsD9p1A6u8UdQn-dxIVwC2CMr6uCpC0vq16JX0,2663
27
- dataeval/detectors/drift/_nml/_chunk.py,sha256=t12eouanRNiu5DJXOaYDZXUvFMqfcp1BETLOufdV79M,13567
32
+ dataeval/detectors/drift/_nml/_chunk.py,sha256=xF3U-CAobzoKX-20yjWjGVD14IOcAV6rPaIrqCMwGdQ,13564
28
33
  dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
29
34
  dataeval/detectors/drift/_nml/_result.py,sha256=TMK17bnlgSdL0MCRHtQZJO8YoWWe4C2kh_akESrlP1g,3269
30
35
  dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHIwM78k_aB3eoh31Q,12060
31
- dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
32
- dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
33
36
  dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
34
37
  dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
35
38
  dataeval/detectors/linters/outliers.py,sha256=GaM9n8yPgBPzVOL_bxJCj0eCwobEEP4JHKHD9liRdlw,10130
36
39
  dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
37
- dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
38
- dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
40
+ dataeval/detectors/ood/ae.py,sha256=cJ7nq4iwTvW8uihHCUhGfTlKsAlthJ2tOhgSsB27cOY,2941
41
+ dataeval/detectors/ood/base.py,sha256=hx-TPJnUTZ7KcBkm8SbN1RGhtJyQN0XLajDyNqiZrJo,3042
39
42
  dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
40
43
  dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
41
44
  dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
@@ -44,14 +47,14 @@ dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1
44
47
  dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
45
48
  dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
46
49
  dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
47
- dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
48
- dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
50
+ dataeval/metrics/bias/_completeness.py,sha256=2cvOXe7fhtxZGH_4QBuiCafIeamxFBarMiUBuEP7QGI,4596
51
+ dataeval/metrics/bias/_coverage.py,sha256=v2x2hbOf2za9jFcSVSJUAoJ2BJfzzlCzt0mFIGtBL0A,3639
49
52
  dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
50
- dataeval/metrics/bias/_parity.py,sha256=Kmzr9-NXxGzGtj6A-qUa88FTGaRyJU2xQj7tsplXJH4,11427
53
+ dataeval/metrics/bias/_parity.py,sha256=MKpqL4aoqEHkRl0vtGvVq9V3KBOtDFTtAo5I2GfIG4A,11443
51
54
  dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
52
- dataeval/metrics/estimators/_ber.py,sha256=C30E5LiGGTAfo31zWFYDptDg0R7CTJGJ-a60YgzSkYY,5382
55
+ dataeval/metrics/estimators/_ber.py,sha256=7noeRyOJJYqrJ_jt90nRHtR2t2u5MIvTCmWt0_rd4EU,5370
53
56
  dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
54
- dataeval/metrics/estimators/_divergence.py,sha256=-np4nWNtRrHnvo4xdWuTzkyJJmobyjDnVDBOMjtBS1Y,4003
57
+ dataeval/metrics/estimators/_divergence.py,sha256=t-Z_7Bq4V4FunxKlq7G4ThtgLany8n4iEU0n0afr7F8,3991
55
58
  dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
56
59
  dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
57
60
  dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
@@ -66,19 +69,17 @@ dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,
66
69
  dataeval/outputs/_base.py,sha256=-Wa0gFcBVLbfWPMZyCql7x4vGsnkLP4pecsQIeUZ2_Y,5904
67
70
  dataeval/outputs/_bias.py,sha256=1OZpKncYTryjPLRHb4d6NlhE27uPT57gCob_5jtjKDI,10456
68
71
  dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
69
- dataeval/outputs/_estimators.py,sha256=mh-R08CgYtmq9ffANDMYR-V4vrZnSjOjEyOMiMDZ2Ic,3091
72
+ dataeval/outputs/_estimators.py,sha256=IQgSbOPHYzzxn1X64XF2XxQhDlWy6jwy6RNyoyvsipE,3111
70
73
  dataeval/outputs/_linters.py,sha256=k8lkd8EZ23q0m-HOD-FgqMcLQFy1UH7vws2ucLPyn08,6697
71
74
  dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
72
75
  dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
73
76
  dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
74
77
  dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
75
78
  dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
76
- dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- dataeval/typing.py,sha256=W8rqFFkAqE5a5ar3MmB-O5gcMJqvoDKXC8Y0ggBqAKo,7216
78
79
  dataeval/utils/__init__.py,sha256=hRvyUK7b3d6JBEV5u47rFcOHEcmDYqAvZQw_T5pDAWw,264
79
- dataeval/utils/_array.py,sha256=ftX8S6HKAIUOuc1xd30VC3Pz5yUzRglDpCLisWY_tHs,5888
80
+ dataeval/utils/_array.py,sha256=bIDbnv15_hNzFn2Uc4WV1qRyFzubQj2nNYsFUDIdwT0,6335
80
81
  dataeval/utils/_bin.py,sha256=w3eJ2Szw5eapqQ0cGv731rhNgLFGW0cCz2pXo9I6CuY,7296
81
- dataeval/utils/_clusterer.py,sha256=XmyW2j_JLMYLds8QYgV0nAfdqxWfNR0ZI-6rnZsyHwU,5630
82
+ dataeval/utils/_clusterer.py,sha256=rUvEdyMwp95lffmt6xKMEwsjRXNoBS0n5mAS_HNOnck,5656
82
83
  dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8061
83
84
  dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
84
85
  dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
@@ -102,12 +103,12 @@ dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8Sxts
102
103
  dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
103
104
  dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
104
105
  dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
105
- dataeval/utils/torch/_internal.py,sha256=HuyBB7NWFI9sUrRbOCZFxOfZjRGPdqr5iF7_DT2S0wo,4159
106
+ dataeval/utils/torch/_internal.py,sha256=9rzlMeM8i3p-ctulh9WDQATMXtlp-Jk2pBX7NGC8l2I,4146
106
107
  dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
107
- dataeval/utils/torch/trainer.py,sha256=Oc2lK13uPGhmLYbmAqlPWyKxgG4YJFlnSXCqFHUZbdA,5528
108
+ dataeval/utils/torch/trainer.py,sha256=DRyPScGdE4o5Xo3BmD9p2PGOApzi1E-QfsBRNZ5IXW8,5544
108
109
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
109
110
  dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
110
- dataeval-0.86.7.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
111
- dataeval-0.86.7.dist-info/METADATA,sha256=7FTgPB4Yj2zF7z2B6IIRe9WFc9VCBqrcFEIf5ByVHdw,5353
112
- dataeval-0.86.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
113
- dataeval-0.86.7.dist-info/RECORD,,
111
+ dataeval-0.86.8.dist-info/METADATA,sha256=rCf58-uzgjsTNZkY3LOBMSi5fhQ2cdAtnrrDI_eYR_I,5925
112
+ dataeval-0.86.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
+ dataeval-0.86.8.dist-info/licenses/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
114
+ dataeval-0.86.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any