dataeval 0.74.2__tar.gz → 0.76.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {dataeval-0.74.2 → dataeval-0.76.0}/LICENSE.txt +2 -2
  2. dataeval-0.76.0/PKG-INFO +137 -0
  3. dataeval-0.76.0/README.md +98 -0
  4. {dataeval-0.74.2 → dataeval-0.76.0}/pyproject.toml +16 -15
  5. dataeval-0.76.0/src/dataeval/__init__.py +40 -0
  6. dataeval-0.76.0/src/dataeval/detectors/drift/__init__.py +22 -0
  7. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/base.py +3 -3
  8. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/cvm.py +1 -1
  9. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/ks.py +3 -2
  10. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/mmd.py +9 -7
  11. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/torch.py +12 -12
  12. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/uncertainty.py +5 -4
  13. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/drift/updates.py +1 -1
  14. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/linters/clusterer.py +5 -9
  15. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/linters/duplicates.py +10 -14
  16. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/linters/outliers.py +100 -5
  17. dataeval-0.76.0/src/dataeval/detectors/ood/__init__.py +8 -0
  18. dataeval-0.74.2/src/dataeval/detectors/ood/ae_torch.py → dataeval-0.76.0/src/dataeval/detectors/ood/ae.py +6 -4
  19. dataeval-0.74.2/src/dataeval/detectors/ood/base_torch.py → dataeval-0.76.0/src/dataeval/detectors/ood/base.py +7 -22
  20. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/ood/metadata_ks_compare.py +34 -42
  21. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/ood/metadata_least_likely.py +3 -3
  22. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/ood/metadata_ood_mi.py +6 -5
  23. dataeval-0.74.2/src/dataeval/detectors/ood/base.py → dataeval-0.76.0/src/dataeval/detectors/ood/mixin.py +11 -72
  24. dataeval-0.76.0/src/dataeval/detectors/ood/output.py +63 -0
  25. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/interop.py +7 -6
  26. dataeval-0.74.2/src/dataeval/logging.py → dataeval-0.76.0/src/dataeval/log.py +2 -0
  27. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/__init__.py +3 -3
  28. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/bias/__init__.py +10 -13
  29. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/bias/balance.py +13 -11
  30. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/bias/coverage.py +53 -5
  31. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/bias/diversity.py +56 -24
  32. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/bias/parity.py +20 -17
  33. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/estimators/ber.py +7 -4
  34. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/estimators/divergence.py +4 -4
  35. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/estimators/uap.py +4 -4
  36. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/__init__.py +19 -19
  37. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/base.py +28 -12
  38. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/boxratiostats.py +13 -14
  39. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/datasetstats.py +49 -20
  40. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/dimensionstats.py +8 -8
  41. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/hashstats.py +14 -10
  42. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/labelstats.py +94 -11
  43. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/pixelstats.py +11 -14
  44. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/stats/visualstats.py +10 -13
  45. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/output.py +23 -14
  46. dataeval-0.76.0/src/dataeval/utils/__init__.py +9 -0
  47. dataeval-0.76.0/src/dataeval/utils/dataset/__init__.py +7 -0
  48. {dataeval-0.74.2/src/dataeval/utils/torch → dataeval-0.76.0/src/dataeval/utils/dataset}/datasets.py +2 -0
  49. dataeval-0.76.0/src/dataeval/utils/dataset/read.py +63 -0
  50. dataeval-0.74.2/src/dataeval/utils/split_dataset.py → dataeval-0.76.0/src/dataeval/utils/dataset/split.py +38 -30
  51. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/utils/image.py +2 -2
  52. dataeval-0.76.0/src/dataeval/utils/metadata.py +581 -0
  53. dataeval-0.74.2/src/dataeval/metrics/bias/metadata_utils.py → dataeval-0.76.0/src/dataeval/utils/plot.py +91 -71
  54. dataeval-0.76.0/src/dataeval/utils/torch/__init__.py +10 -0
  55. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/utils/torch/gmm.py +29 -6
  56. dataeval-0.74.2/src/dataeval/utils/torch/utils.py → dataeval-0.76.0/src/dataeval/utils/torch/internal.py +82 -58
  57. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/utils/torch/models.py +10 -8
  58. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/utils/torch/trainer.py +6 -85
  59. dataeval-0.76.0/src/dataeval/workflows/__init__.py +7 -0
  60. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/workflows/sufficiency.py +18 -8
  61. dataeval-0.74.2/PKG-INFO +0 -120
  62. dataeval-0.74.2/README.md +0 -81
  63. dataeval-0.74.2/src/dataeval/__init__.py +0 -36
  64. dataeval-0.74.2/src/dataeval/detectors/drift/__init__.py +0 -20
  65. dataeval-0.74.2/src/dataeval/detectors/ood/__init__.py +0 -15
  66. dataeval-0.74.2/src/dataeval/metrics/bias/metadata_preprocessing.py +0 -285
  67. dataeval-0.74.2/src/dataeval/utils/__init__.py +0 -18
  68. dataeval-0.74.2/src/dataeval/utils/gmm.py +0 -26
  69. dataeval-0.74.2/src/dataeval/utils/metadata.py +0 -278
  70. dataeval-0.74.2/src/dataeval/utils/torch/__init__.py +0 -25
  71. dataeval-0.74.2/src/dataeval/workflows/__init__.py +0 -10
  72. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/__init__.py +2 -2
  73. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/linters/__init__.py +4 -4
  74. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/detectors/linters/merged_stats.py +0 -0
  75. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/metrics/estimators/__init__.py +2 -2
  76. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/py.typed +0 -0
  77. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/utils/shared.py +0 -0
  78. {dataeval-0.74.2 → dataeval-0.76.0}/src/dataeval/utils/torch/blocks.py +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 ARiA
3
+ Copyright (c) 2025 ARiA
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.1
2
+ Name: dataeval
3
+ Version: 0.76.0
4
+ Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
+ Home-page: https://dataeval.ai/
6
+ License: MIT
7
+ Author: Andrew Weng
8
+ Author-email: andrew.weng@ariacoustics.com
9
+ Maintainer: ARiA
10
+ Maintainer-email: dataeval@ariacoustics.com
11
+ Requires-Python: >=3.9,<3.13
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3 :: Only
22
+ Classifier: Topic :: Scientific/Engineering
23
+ Provides-Extra: all
24
+ Requires-Dist: matplotlib ; extra == "all"
25
+ Requires-Dist: numpy (>=1.24.2)
26
+ Requires-Dist: pillow (>=10.3.0)
27
+ Requires-Dist: requests
28
+ Requires-Dist: scikit-learn (>=1.5.0)
29
+ Requires-Dist: scipy (>=1.10)
30
+ Requires-Dist: torch (>=2.2.0)
31
+ Requires-Dist: torchvision (>=0.17.0)
32
+ Requires-Dist: tqdm
33
+ Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
34
+ Requires-Dist: xxhash (>=3.3)
35
+ Project-URL: Documentation, https://dataeval.readthedocs.io/
36
+ Project-URL: Repository, https://github.com/aria-ml/dataeval/
37
+ Description-Content-Type: text/markdown
38
+
39
+ # DataEval
40
+
41
+ To view our extensive collection of tutorials, how-to's, explanation guides, and reference material, please visit our documentation on **[Read the Docs](https://dataeval.readthedocs.io/)**
42
+
43
+ ## About DataEval
44
+
45
+ <!-- start tagline -->
46
+
47
+ DataEval curates datasets to train and test performant, robust, unbiased and reliable AI models and monitors for data shifts that impact performance of deployed models.
48
+
49
+ <!-- end tagline -->
50
+
51
+ ### Our mission
52
+
53
+ <!-- start needs -->
54
+
55
+ DataEval is an effective, powerful, and reliable set of tools for any T&E engineer. Throughout all stages of the machine learning lifecycle, DataEval supports model development, data analysis, and monitoring with state-of-the-art algorithms to help you solve difficult problems. With a focus on computer vision tasks, DataEval provides simple, but effective metrics for performance estimation, bias detection, and dataset linting.
56
+
57
+ <!-- end needs -->
58
+
59
+ <!-- start JATIC interop -->
60
+ DataEval is easy to install, supports a wide range of Python versions, and is compatible with many of the most popular packages in the scientific and T&E communities.
61
+ DataEval also has native interopability between JATIC's suite of tools when using MAITE-compliant datasets and models.
62
+ <!-- end JATIC interop -->
63
+
64
+ ## Getting Started
65
+
66
+ **Python versions:** 3.9 - 3.12
67
+
68
+ **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*, *Gradient*
69
+
70
+ Choose your preferred method of installation below or follow our [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
71
+
72
+ * [Installing with pip](#installing-with-pip)
73
+ * [Installing with conda/mamba](#installing-with-conda)
74
+ * [Installing from GitHub](#installing-from-github)
75
+
76
+ ### **Installing with pip**
77
+
78
+ You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `all`.
79
+
80
+ ```bash
81
+ pip install dataeval[all]
82
+ ```
83
+
84
+ ### **Installing with conda**
85
+
86
+ DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
87
+ are installed from the `pytorch` channel, the channel is specified in the below example.
88
+
89
+ ```bash
90
+ micromamba create -f environment\environment.yaml -c pytorch
91
+ ```
92
+
93
+ ### **Installing from GitHub**
94
+
95
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
96
+
97
+ ```bash
98
+ sudo apt-get install git-lfs
99
+ pip install poetry
100
+ ```
101
+
102
+ Pull the source down and change to the DataEval project directory.
103
+
104
+ ```bash
105
+ git clone https://github.com/aria-ml/dataeval.git
106
+ cd dataeval
107
+ ```
108
+
109
+ Install DataEval with optional dependencies for development.
110
+
111
+ ```bash
112
+ poetry install --all-extras --with dev
113
+ ```
114
+
115
+ Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
116
+
117
+ ```bash
118
+ poetry shell
119
+ ```
120
+
121
+ ## Contact Us
122
+
123
+ If you have any questions, feel free to reach out to the people below:
124
+
125
+ * **POC**: Scott Swan @scott.swan
126
+ * **DPOC**: Andrew Weng @aweng
127
+
128
+ ## Acknowledgement
129
+
130
+ <!-- start acknowledgement -->
131
+
132
+ ### CDAO Funding Acknowledgement
133
+
134
+ This material is based upon work supported by the Chief Digital and Artificial Intelligence Office under Contract No. W519TC-23-9-2033. The views and conclusions contained herein are those of the author(s) and should not be interpreted as necessarily representing the official policies or endorsements, either expressed or implied, of the U.S. Government.
135
+
136
+ <!-- end acknowledgement -->
137
+
@@ -0,0 +1,98 @@
1
+ # DataEval
2
+
3
+ To view our extensive collection of tutorials, how-to's, explanation guides, and reference material, please visit our documentation on **[Read the Docs](https://dataeval.readthedocs.io/)**
4
+
5
+ ## About DataEval
6
+
7
+ <!-- start tagline -->
8
+
9
+ DataEval curates datasets to train and test performant, robust, unbiased and reliable AI models and monitors for data shifts that impact performance of deployed models.
10
+
11
+ <!-- end tagline -->
12
+
13
+ ### Our mission
14
+
15
+ <!-- start needs -->
16
+
17
+ DataEval is an effective, powerful, and reliable set of tools for any T&E engineer. Throughout all stages of the machine learning lifecycle, DataEval supports model development, data analysis, and monitoring with state-of-the-art algorithms to help you solve difficult problems. With a focus on computer vision tasks, DataEval provides simple, but effective metrics for performance estimation, bias detection, and dataset linting.
18
+
19
+ <!-- end needs -->
20
+
21
+ <!-- start JATIC interop -->
22
+ DataEval is easy to install, supports a wide range of Python versions, and is compatible with many of the most popular packages in the scientific and T&E communities.
23
+ DataEval also has native interopability between JATIC's suite of tools when using MAITE-compliant datasets and models.
24
+ <!-- end JATIC interop -->
25
+
26
+ ## Getting Started
27
+
28
+ **Python versions:** 3.9 - 3.12
29
+
30
+ **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*, *Gradient*
31
+
32
+ Choose your preferred method of installation below or follow our [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
33
+
34
+ * [Installing with pip](#installing-with-pip)
35
+ * [Installing with conda/mamba](#installing-with-conda)
36
+ * [Installing from GitHub](#installing-from-github)
37
+
38
+ ### **Installing with pip**
39
+
40
+ You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `all`.
41
+
42
+ ```bash
43
+ pip install dataeval[all]
44
+ ```
45
+
46
+ ### **Installing with conda**
47
+
48
+ DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
49
+ are installed from the `pytorch` channel, the channel is specified in the below example.
50
+
51
+ ```bash
52
+ micromamba create -f environment\environment.yaml -c pytorch
53
+ ```
54
+
55
+ ### **Installing from GitHub**
56
+
57
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
58
+
59
+ ```bash
60
+ sudo apt-get install git-lfs
61
+ pip install poetry
62
+ ```
63
+
64
+ Pull the source down and change to the DataEval project directory.
65
+
66
+ ```bash
67
+ git clone https://github.com/aria-ml/dataeval.git
68
+ cd dataeval
69
+ ```
70
+
71
+ Install DataEval with optional dependencies for development.
72
+
73
+ ```bash
74
+ poetry install --all-extras --with dev
75
+ ```
76
+
77
+ Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
78
+
79
+ ```bash
80
+ poetry shell
81
+ ```
82
+
83
+ ## Contact Us
84
+
85
+ If you have any questions, feel free to reach out to the people below:
86
+
87
+ * **POC**: Scott Swan @scott.swan
88
+ * **DPOC**: Andrew Weng @aweng
89
+
90
+ ## Acknowledgement
91
+
92
+ <!-- start acknowledgement -->
93
+
94
+ ### CDAO Funding Acknowledgement
95
+
96
+ This material is based upon work supported by the Chief Digital and Artificial Intelligence Office under Contract No. W519TC-23-9-2033. The views and conclusions contained herein are those of the author(s) and should not be interpreted as necessarily representing the official policies or endorsements, either expressed or implied, of the U.S. Government.
97
+
98
+ <!-- end acknowledgement -->
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dataeval"
3
- version = "0.74.2" # dynamic
3
+ version = "0.76.0" # dynamic
4
4
  description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
5
5
  license = "MIT"
6
6
  readme = "README.md"
@@ -42,22 +42,22 @@ packages = [
42
42
  [tool.poetry.dependencies]
43
43
  # required
44
44
  python = ">=3.9,<3.13"
45
- numpy = {version = ">=1.24.3"}
45
+ numpy = {version = ">=1.24.2"}
46
46
  pillow = {version = ">=10.3.0"}
47
+ requests = {version = "*"}
47
48
  scipy = {version = ">=1.10"}
48
49
  scikit-learn = {version = ">=1.5.0"}
50
+ torch = {version = ">=2.2.0", source = "pytorch"}
51
+ torchvision = {version = ">=0.17.0", source = "pytorch"}
49
52
  tqdm = {version = "*"}
50
- typing-extensions = {version = ">=4.12", python = ">=3.9,<3.10"} # ParamSpec
53
+ typing-extensions = {version = ">=4.12", python = "^3.9"} # ParamSpec
51
54
  xxhash = {version = ">=3.3"}
52
55
 
53
56
  # optional
54
57
  matplotlib = {version = "*", optional = true}
55
- torch = {version = ">=2.2.0", source = "pytorch", optional = true}
56
- torchvision = {version = ">=0.17.0", source = "pytorch", optional = true}
57
58
 
58
59
  [tool.poetry.extras]
59
- torch = ["torch", "torchvision"]
60
- all = ["matplotlib", "torch", "torchvision"]
60
+ all = ["matplotlib"]
61
61
 
62
62
  [tool.poetry.group.dev]
63
63
  optional = true
@@ -65,9 +65,10 @@ optional = true
65
65
  [tool.poetry.group.dev.dependencies]
66
66
  nox = {version = "*", extras = ["uv"]}
67
67
  uv = {version = "*"}
68
- poetry = {version = "*"}
68
+ poetry = {version = "<2"}
69
69
  poetry-lock-groups-plugin = {version = "*"}
70
70
  poetry2conda = {version = "*"}
71
+ numpy = {version = ">=2.0.2"}
71
72
  # lint
72
73
  ruff = {version = "*"}
73
74
  codespell = {version = "*", extras = ["toml"]}
@@ -76,26 +77,27 @@ pytest = {version = "*"}
76
77
  pytest-cov = {version = "*"}
77
78
  pytest-xdist = {version = "*"}
78
79
  coverage = {version = "*", extras = ["toml"]}
79
- torchmetrics = {version = ">=1.0.0", source = "pytorch"}
80
80
  # type
81
81
  pyright = {version = "*", extras = ["nodejs"]}
82
82
  # prototype
83
83
  maite = {version = "*"}
84
84
  pandas = {version = "*"}
85
85
  seaborn = {version = "*"}
86
- numpy = {version = ">=2.0.2"}
87
86
  # docs
88
87
  certifi = {version = ">=2024.07.04"}
89
88
  enum_tools = {version = ">=0.12.0", extras = ["sphinx"]}
90
89
  ipykernel = {version = ">=6.26.0"}
91
90
  ipywidgets = {version = ">=8.1.1"}
91
+ jinja2 = {version = ">=3.1.5"}
92
92
  jupyter-client = {version = ">=8.6.0"}
93
93
  jupyter-cache = {version = "*"}
94
94
  myst-nb = {version = ">=1.0.0"}
95
- pydata-sphinx-theme = {version = ">=0.15.4"}
95
+ sphinx-immaterial = {version = "*"}
96
+ sphinx-autoapi = {version = "*"}
96
97
  sphinx-design = {version = "*"}
97
98
  sphinx-tabs = {version = "*"}
98
99
  Sphinx = {version = ">=7.2.6"}
100
+ torchmetrics = {version = ">=1.0.0", source = "pytorch"}
99
101
  markupsafe = {version = "<3.0.2", optional = true}
100
102
 
101
103
  [[tool.poetry.source]]
@@ -136,8 +138,7 @@ parallel = true
136
138
  [tool.coverage.report]
137
139
  exclude_also = [
138
140
  "raise NotImplementedError",
139
- "if _IS_TORCH_AVAILABLE",
140
- "if _IS_TORCHVISION_AVAILABLE",
141
+ ": \\.\\.\\."
141
142
  ]
142
143
  include = ["*/src/dataeval/*"]
143
144
  omit = [
@@ -155,7 +156,7 @@ exclude = [
155
156
  ".jupyter_cache",
156
157
  "*env*",
157
158
  "output",
158
- "_build",
159
+ "build",
159
160
  ".nox",
160
161
  ".tox",
161
162
  "prototype",
@@ -185,7 +186,7 @@ docstring-code-format = true
185
186
  docstring-code-line-length = "dynamic"
186
187
 
187
188
  [tool.codespell]
188
- skip = './*env*,./prototype,./output,./docs/_build,./docs/.jupyter_cache,CHANGELOG.md,poetry.lock,*.html'
189
+ skip = './*env*,./prototype,./output,./docs/build,./docs/source/.jupyter_cache,CHANGELOG.md,poetry.lock,*.html'
189
190
  ignore-words-list = ["Hart"]
190
191
 
191
192
  [build-system]
@@ -0,0 +1,40 @@
1
+ """
2
+ DataEval provides a simple interface to characterize image data and its impact on model performance
3
+ across classification and object-detection tasks. It also provides capabilities to select and curate
4
+ datasets to test and train performant, robust, unbiased and reliable AI models and monitor for data
5
+ shifts that impact performance of deployed models.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ __all__ = ["detectors", "log", "metrics", "utils", "workflows"]
11
+ __version__ = "0.76.0"
12
+
13
+ import logging
14
+
15
+ from dataeval import detectors, metrics, utils, workflows
16
+
17
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
18
+
19
+
20
+ def log(level: int = logging.DEBUG, handler: logging.Handler | None = None) -> None:
21
+ """
22
+ Helper for quickly adding a StreamHandler to the logger. Useful for debugging.
23
+
24
+ Parameters
25
+ ----------
26
+ level : int, default logging.DEBUG(10)
27
+ Set the logging level for the logger.
28
+ handler : logging.Handler, optional
29
+ Sets the logging handler for the logger if provided, otherwise logger will be
30
+ provided with a StreamHandler.
31
+ """
32
+ import logging
33
+
34
+ logger = logging.getLogger(__name__)
35
+ if handler is None:
36
+ handler = logging.StreamHandler() if handler is None else handler
37
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
38
+ logger.addHandler(handler)
39
+ logger.setLevel(level)
40
+ logger.debug(f"Added logging handler {handler} to logger: {__name__}")
@@ -0,0 +1,22 @@
1
+ """
2
+ :term:`Drift` detectors identify if the statistical properties of the data has changed.
3
+ """
4
+
5
+ __all__ = [
6
+ "DriftCVM",
7
+ "DriftKS",
8
+ "DriftMMD",
9
+ "DriftMMDOutput",
10
+ "DriftOutput",
11
+ "DriftUncertainty",
12
+ "preprocess_drift",
13
+ "updates",
14
+ ]
15
+
16
+ from dataeval.detectors.drift import updates
17
+ from dataeval.detectors.drift.base import DriftOutput
18
+ from dataeval.detectors.drift.cvm import DriftCVM
19
+ from dataeval.detectors.drift.ks import DriftKS
20
+ from dataeval.detectors.drift.mmd import DriftMMD, DriftMMDOutput
21
+ from dataeval.detectors.drift.torch import preprocess_drift
22
+ from dataeval.detectors.drift.uncertainty import DriftUncertainty
@@ -8,7 +8,7 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- __all__ = ["DriftOutput"]
11
+ __all__ = []
12
12
 
13
13
  from abc import ABC, abstractmethod
14
14
  from dataclasses import dataclass
@@ -45,7 +45,7 @@ class UpdateStrategy(ABC):
45
45
  @dataclass(frozen=True)
46
46
  class DriftBaseOutput(Output):
47
47
  """
48
- Base output class for Drift detector classes
48
+ Base output class for Drift Detector classes
49
49
 
50
50
  Attributes
51
51
  ----------
@@ -64,7 +64,7 @@ class DriftBaseOutput(Output):
64
64
  @dataclass(frozen=True)
65
65
  class DriftOutput(DriftBaseOutput):
66
66
  """
67
- Output class for :class:`DriftCVM`, :class:`DriftKS`, and :class:`DriftUncertainty` drift detectors
67
+ Output class for :class:`DriftCVM`, :class:`DriftKS`, and :class:`DriftUncertainty` drift detectors.
68
68
 
69
69
  Attributes
70
70
  ----------
@@ -8,7 +8,7 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- __all__ = ["DriftCVM"]
11
+ __all__ = []
12
12
 
13
13
  from typing import Callable, Literal
14
14
 
@@ -8,7 +8,7 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- __all__ = ["DriftKS"]
11
+ __all__ = []
12
12
 
13
13
  from typing import Callable, Literal
14
14
 
@@ -22,7 +22,8 @@ from dataeval.interop import to_numpy
22
22
 
23
23
  class DriftKS(BaseDriftUnivariate):
24
24
  """
25
- :term:`Drift` detector employing the Kolmogorov-Smirnov (KS) distribution test.
25
+ :term:`Drift` detector employing the :term:`Kolmogorov-Smirnov (KS) \
26
+ distribution<Kolmogorov-Smirnov (K-S) test>` test.
26
27
 
27
28
  The KS test detects changes in the maximum distance between two data
28
29
  distributions with Bonferroni or :term:`False Discovery Rate (FDR)` correction
@@ -8,7 +8,7 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- __all__ = ["DriftMMD", "DriftMMDOutput"]
11
+ __all__ = []
12
12
 
13
13
  from dataclasses import dataclass
14
14
  from typing import Callable
@@ -17,15 +17,16 @@ import torch
17
17
  from numpy.typing import ArrayLike
18
18
 
19
19
  from dataeval.detectors.drift.base import BaseDrift, DriftBaseOutput, UpdateStrategy, preprocess_x, update_x_ref
20
- from dataeval.detectors.drift.torch import _GaussianRBF, _mmd2_from_kernel_matrix, get_device
20
+ from dataeval.detectors.drift.torch import GaussianRBF, mmd2_from_kernel_matrix
21
21
  from dataeval.interop import as_numpy
22
22
  from dataeval.output import set_metadata
23
+ from dataeval.utils.torch.internal import get_device
23
24
 
24
25
 
25
26
  @dataclass(frozen=True)
26
27
  class DriftMMDOutput(DriftBaseOutput):
27
28
  """
28
- Output class for :class:`DriftMMD` :term:`drift<Drift>` detector
29
+ Output class for :class:`DriftMMD` :term:`drift<Drift>` detector.
29
30
 
30
31
  Attributes
31
32
  ----------
@@ -50,7 +51,8 @@ class DriftMMDOutput(DriftBaseOutput):
50
51
 
51
52
  class DriftMMD(BaseDrift):
52
53
  """
53
- :term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm using a permutation test.
54
+ :term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm \
55
+ using a permutation test.
54
56
 
55
57
  Parameters
56
58
  ----------
@@ -109,7 +111,7 @@ class DriftMMD(BaseDrift):
109
111
 
110
112
  # initialize kernel
111
113
  sigma_tensor = torch.from_numpy(as_numpy(sigma)).to(self.device) if sigma is not None else None
112
- self._kernel = _GaussianRBF(sigma_tensor).to(self.device)
114
+ self._kernel = GaussianRBF(sigma_tensor).to(self.device)
113
115
 
114
116
  # compute kernel matrix for the reference data
115
117
  if self._infer_sigma or isinstance(sigma_tensor, torch.Tensor):
@@ -150,9 +152,9 @@ class DriftMMD(BaseDrift):
150
152
  n = x.shape[0]
151
153
  kernel_mat = self._kernel_matrix(x_ref, torch.from_numpy(x).to(self.device))
152
154
  kernel_mat = kernel_mat - torch.diag(kernel_mat.diag()) # zero diagonal
153
- mmd2 = _mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
155
+ mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
154
156
  mmd2_permuted = torch.Tensor(
155
- [_mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
157
+ [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
156
158
  )
157
159
  mmd2, mmd2_permuted = mmd2.detach().cpu(), mmd2_permuted.detach().cpu()
158
160
  p_val = (mmd2 <= mmd2_permuted).float().mean()
@@ -17,10 +17,10 @@ import torch
17
17
  import torch.nn as nn
18
18
  from numpy.typing import NDArray
19
19
 
20
- from dataeval.utils.torch.utils import get_device, predict_batch
20
+ from dataeval.utils.torch.internal import get_device, predict_batch
21
21
 
22
22
 
23
- def _mmd2_from_kernel_matrix(
23
+ def mmd2_from_kernel_matrix(
24
24
  kernel_mat: torch.Tensor, m: int, permute: bool = False, zero_diag: bool = True
25
25
  ) -> torch.Tensor:
26
26
  """
@@ -127,7 +127,7 @@ def _squared_pairwise_distance(
127
127
 
128
128
  def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor:
129
129
  """
130
- Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`.
130
+ Bandwidth estimation using the median heuristic `Gretton2012`
131
131
 
132
132
  Parameters
133
133
  ----------
@@ -151,7 +151,7 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.
151
151
  return sigma
152
152
 
153
153
 
154
- class _GaussianRBF(nn.Module):
154
+ class GaussianRBF(nn.Module):
155
155
  """
156
156
  Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2).
157
157
 
@@ -179,18 +179,18 @@ class _GaussianRBF(nn.Module):
179
179
  ) -> None:
180
180
  super().__init__()
181
181
  init_sigma_fn = sigma_median if init_sigma_fn is None else init_sigma_fn
182
- self.config = {
182
+ self.config: dict[str, Any] = {
183
183
  "sigma": sigma,
184
184
  "trainable": trainable,
185
185
  "init_sigma_fn": init_sigma_fn,
186
186
  }
187
187
  if sigma is None:
188
- self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable)
189
- self.init_required = True
188
+ self.log_sigma: nn.Parameter = nn.Parameter(torch.empty(1), requires_grad=trainable)
189
+ self.init_required: bool = True
190
190
  else:
191
191
  sigma = sigma.reshape(-1) # [Ns,]
192
- self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable)
193
- self.init_required = False
192
+ self.log_sigma: nn.Parameter = nn.Parameter(sigma.log(), requires_grad=trainable)
193
+ self.init_required: bool = False
194
194
  self.init_sigma_fn = init_sigma_fn
195
195
  self.trainable = trainable
196
196
 
@@ -200,8 +200,8 @@ class _GaussianRBF(nn.Module):
200
200
 
201
201
  def forward(
202
202
  self,
203
- x: np.ndarray | torch.Tensor,
204
- y: np.ndarray | torch.Tensor,
203
+ x: np.ndarray[Any, Any] | torch.Tensor,
204
+ y: np.ndarray[Any, Any] | torch.Tensor,
205
205
  infer_sigma: bool = False,
206
206
  ) -> torch.Tensor:
207
207
  x, y = torch.as_tensor(x), torch.as_tensor(y)
@@ -213,7 +213,7 @@ class _GaussianRBF(nn.Module):
213
213
  sigma = self.init_sigma_fn(x, y, dist)
214
214
  with torch.no_grad():
215
215
  self.log_sigma.copy_(sigma.log().clone())
216
- self.init_required = False
216
+ self.init_required: bool = False
217
217
 
218
218
  gamma = 1.0 / (2.0 * self.sigma**2) # [Ns,]
219
219
  # TODO: do matrix multiplication after all?
@@ -8,7 +8,7 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- __all__ = ["DriftUncertainty"]
11
+ __all__ = []
12
12
 
13
13
  from functools import partial
14
14
  from typing import Callable, Literal
@@ -20,7 +20,8 @@ from scipy.stats import entropy
20
20
 
21
21
  from dataeval.detectors.drift.base import DriftOutput, UpdateStrategy
22
22
  from dataeval.detectors.drift.ks import DriftKS
23
- from dataeval.detectors.drift.torch import get_device, preprocess_drift
23
+ from dataeval.detectors.drift.torch import preprocess_drift
24
+ from dataeval.utils.torch.internal import get_device
24
25
 
25
26
 
26
27
  def classifier_uncertainty(
@@ -65,8 +66,8 @@ def classifier_uncertainty(
65
66
 
66
67
  class DriftUncertainty:
67
68
  """
68
- Test for a change in the number of instances falling into regions on which the
69
- model is uncertain.
69
+ Test for a change in the number of instances falling into regions on which \
70
+ the model is uncertain.
70
71
 
71
72
  Performs a K-S test on prediction entropies.
72
73
 
@@ -1,5 +1,5 @@
1
1
  """
2
- Update strategies inform how the :term:`drift<Drift>` detector classes update the reference data when monitoring
2
+ Update strategies inform how the :term:`drift<Drift>` detector classes update the reference data when monitoring.
3
3
  for drift.
4
4
  """
5
5