dataeval 0.61.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. dataeval-0.61.0/LICENSE.txt +21 -0
  2. dataeval-0.61.0/PKG-INFO +114 -0
  3. dataeval-0.61.0/README.md +72 -0
  4. dataeval-0.61.0/pyproject.toml +177 -0
  5. dataeval-0.61.0/src/dataeval/__init__.py +18 -0
  6. dataeval-0.61.0/src/dataeval/_internal/detectors/__init__.py +0 -0
  7. dataeval-0.61.0/src/dataeval/_internal/detectors/clusterer.py +469 -0
  8. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/__init__.py +0 -0
  9. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/base.py +265 -0
  10. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/cvm.py +97 -0
  11. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/ks.py +100 -0
  12. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/mmd.py +166 -0
  13. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/torch.py +310 -0
  14. dataeval-0.61.0/src/dataeval/_internal/detectors/drift/uncertainty.py +149 -0
  15. dataeval-0.61.0/src/dataeval/_internal/detectors/duplicates.py +49 -0
  16. dataeval-0.61.0/src/dataeval/_internal/detectors/linter.py +78 -0
  17. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/__init__.py +0 -0
  18. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/ae.py +77 -0
  19. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/aegmm.py +69 -0
  20. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/base.py +199 -0
  21. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/llr.py +284 -0
  22. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/vae.py +86 -0
  23. dataeval-0.61.0/src/dataeval/_internal/detectors/ood/vaegmm.py +79 -0
  24. dataeval-0.61.0/src/dataeval/_internal/flags.py +47 -0
  25. dataeval-0.61.0/src/dataeval/_internal/metrics/__init__.py +0 -0
  26. dataeval-0.61.0/src/dataeval/_internal/metrics/base.py +92 -0
  27. dataeval-0.61.0/src/dataeval/_internal/metrics/ber.py +124 -0
  28. dataeval-0.61.0/src/dataeval/_internal/metrics/coverage.py +80 -0
  29. dataeval-0.61.0/src/dataeval/_internal/metrics/divergence.py +94 -0
  30. dataeval-0.61.0/src/dataeval/_internal/metrics/hash.py +79 -0
  31. dataeval-0.61.0/src/dataeval/_internal/metrics/parity.py +180 -0
  32. dataeval-0.61.0/src/dataeval/_internal/metrics/stats.py +332 -0
  33. dataeval-0.61.0/src/dataeval/_internal/metrics/uap.py +45 -0
  34. dataeval-0.61.0/src/dataeval/_internal/metrics/utils.py +158 -0
  35. dataeval-0.61.0/src/dataeval/_internal/models/__init__.py +0 -0
  36. dataeval-0.61.0/src/dataeval/_internal/models/pytorch/__init__.py +0 -0
  37. dataeval-0.61.0/src/dataeval/_internal/models/pytorch/autoencoder.py +202 -0
  38. dataeval-0.61.0/src/dataeval/_internal/models/pytorch/blocks.py +46 -0
  39. dataeval-0.61.0/src/dataeval/_internal/models/pytorch/utils.py +67 -0
  40. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/__init__.py +0 -0
  41. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/autoencoder.py +317 -0
  42. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/gmm.py +115 -0
  43. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/losses.py +107 -0
  44. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/pixelcnn.py +1106 -0
  45. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/trainer.py +102 -0
  46. dataeval-0.61.0/src/dataeval/_internal/models/tensorflow/utils.py +254 -0
  47. dataeval-0.61.0/src/dataeval/_internal/workflows/sufficiency.py +555 -0
  48. dataeval-0.61.0/src/dataeval/detectors/__init__.py +29 -0
  49. dataeval-0.61.0/src/dataeval/flags/__init__.py +3 -0
  50. dataeval-0.61.0/src/dataeval/metrics/__init__.py +7 -0
  51. dataeval-0.61.0/src/dataeval/models/__init__.py +15 -0
  52. dataeval-0.61.0/src/dataeval/models/tensorflow/__init__.py +6 -0
  53. dataeval-0.61.0/src/dataeval/models/torch/__init__.py +8 -0
  54. dataeval-0.61.0/src/dataeval/py.typed +0 -0
  55. dataeval-0.61.0/src/dataeval/workflows/__init__.py +8 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 ARiA
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.1
2
+ Name: dataeval
3
+ Version: 0.61.0
4
+ Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
+ Home-page: https://dataeval.ai/
6
+ License: MIT
7
+ Author: Andrew Weng
8
+ Author-email: andrew.weng@ariacoustics.com
9
+ Maintainer: ARiA
10
+ Maintainer-email: dataeval@ariacoustics.com
11
+ Requires-Python: >=3.9,<3.12
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3 :: Only
21
+ Classifier: Topic :: Scientific/Engineering
22
+ Provides-Extra: all
23
+ Provides-Extra: tensorflow
24
+ Provides-Extra: torch
25
+ Requires-Dist: hdbscan (>=0.8.36)
26
+ Requires-Dist: maite
27
+ Requires-Dist: matplotlib ; extra == "torch" or extra == "all"
28
+ Requires-Dist: numpy (>1.24.3)
29
+ Requires-Dist: nvidia-cudnn-cu11 (>=8.6.0.163) ; extra == "tensorflow" or extra == "torch" or extra == "all"
30
+ Requires-Dist: pillow (>=10.3.0)
31
+ Requires-Dist: scikit-learn (>=1.5.0)
32
+ Requires-Dist: scipy (>=1.10)
33
+ Requires-Dist: tensorflow (>=2.14.1,<2.16) ; extra == "tensorflow" or extra == "all"
34
+ Requires-Dist: tensorflow-io-gcs-filesystem (>=0.35.0,<0.37) ; extra == "tensorflow" or extra == "all"
35
+ Requires-Dist: tensorflow_probability (>=0.22.1,<0.24) ; extra == "tensorflow" or extra == "all"
36
+ Requires-Dist: torch (>=2.0.1,!=2.2.0) ; extra == "torch" or extra == "all"
37
+ Requires-Dist: xxhash (>=3.3)
38
+ Project-URL: Documentation, https://dataeval.readthedocs.io/
39
+ Project-URL: Repository, https://github.com/aria-ml/dataeval/
40
+ Description-Content-Type: text/markdown
41
+
42
+ # DataEval
43
+
44
+ ## About DataEval
45
+
46
+ DataEval focuses on characterizing image data and its impact on model performance across classification and object-detection tasks.
47
+
48
+ <!-- start about -->
49
+
50
+ **Model-agnostic metrics that bound real-world performance**
51
+ - relevance/completeness/coverage
52
+ - metafeatures (data complexity)
53
+
54
+ **Model-specific metrics that guide model selection and training**
55
+ - dataset sufficiency
56
+ - data/model complexity mismatch
57
+
58
+ **Metrics for post-deployment monitoring of data with bounds on model performance to guide retraining**
59
+ - dataset-shift metrics
60
+ - model performance bounds under covariate shift
61
+ - guidance on sampling to assess model error and model retraining
62
+
63
+ <!-- end about -->
64
+
65
+ ## Getting Started
66
+
67
+ ### Requirements
68
+ - Python 3.9-3.11
69
+
70
+ ### Installing DataEval
71
+
72
+ You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `torch`, `tensorflow` and `all`. Using `torch` enables Sufficiency metrics, and `tensorflow` enables OOD Detection.
73
+
74
+ ```
75
+ pip install dataeval[all]
76
+ ```
77
+
78
+ ### Installing DataEval from GitHub
79
+
80
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
81
+
82
+ ```
83
+ sudo apt-get install git-lfs
84
+ pip install poetry
85
+ ```
86
+
87
+ Pull the source down and change to the DataEval project directory.
88
+ ```
89
+ git clone https://github.com/aria-ml/dataeval.git
90
+ cd dataeval
91
+ ```
92
+
93
+
94
+
95
+ Install DataEval with optional dependencies for development.
96
+ ```
97
+ poetry install --all-extras --with dev
98
+ ```
99
+
100
+ Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
101
+ ```
102
+ poetry shell
103
+ ```
104
+
105
+ ### Documentation and Tutorials
106
+ For more ideas on getting started using DataEval in your workflow, additional information and tutorials are in our Sphinx documentation hosted on [Read the Docs](https://dataeval.readthedocs.io/).
107
+
108
+ ## Attribution
109
+ This project uses code from the [Alibi-Detect](https://github.com/SeldonIO/alibi-detect) python library developed by SeldonIO. Additional documentation from the developers are also available [here](https://docs.seldon.io/projects/alibi-detect/en/stable/).
110
+
111
+ ## POCs
112
+ - **POC**: Scott Swan @scott.swan
113
+ - **DPOC**: Andrew Weng @aweng
114
+
@@ -0,0 +1,72 @@
1
+ # DataEval
2
+
3
+ ## About DataEval
4
+
5
+ DataEval focuses on characterizing image data and its impact on model performance across classification and object-detection tasks.
6
+
7
+ <!-- start about -->
8
+
9
+ **Model-agnostic metrics that bound real-world performance**
10
+ - relevance/completeness/coverage
11
+ - metafeatures (data complexity)
12
+
13
+ **Model-specific metrics that guide model selection and training**
14
+ - dataset sufficiency
15
+ - data/model complexity mismatch
16
+
17
+ **Metrics for post-deployment monitoring of data with bounds on model performance to guide retraining**
18
+ - dataset-shift metrics
19
+ - model performance bounds under covariate shift
20
+ - guidance on sampling to assess model error and model retraining
21
+
22
+ <!-- end about -->
23
+
24
+ ## Getting Started
25
+
26
+ ### Requirements
27
+ - Python 3.9-3.11
28
+
29
+ ### Installing DataEval
30
+
31
+ You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `torch`, `tensorflow` and `all`. Using `torch` enables Sufficiency metrics, and `tensorflow` enables OOD Detection.
32
+
33
+ ```
34
+ pip install dataeval[all]
35
+ ```
36
+
37
+ ### Installing DataEval from GitHub
38
+
39
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
40
+
41
+ ```
42
+ sudo apt-get install git-lfs
43
+ pip install poetry
44
+ ```
45
+
46
+ Pull the source down and change to the DataEval project directory.
47
+ ```
48
+ git clone https://github.com/aria-ml/dataeval.git
49
+ cd dataeval
50
+ ```
51
+
52
+
53
+
54
+ Install DataEval with optional dependencies for development.
55
+ ```
56
+ poetry install --all-extras --with dev
57
+ ```
58
+
59
+ Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
60
+ ```
61
+ poetry shell
62
+ ```
63
+
64
+ ### Documentation and Tutorials
65
+ For more ideas on getting started using DataEval in your workflow, additional information and tutorials are in our Sphinx documentation hosted on [Read the Docs](https://dataeval.readthedocs.io/).
66
+
67
+ ## Attribution
68
+ This project uses code from the [Alibi-Detect](https://github.com/SeldonIO/alibi-detect) python library developed by SeldonIO. Additional documentation from the developers are also available [here](https://docs.seldon.io/projects/alibi-detect/en/stable/).
69
+
70
+ ## POCs
71
+ - **POC**: Scott Swan @scott.swan
72
+ - **DPOC**: Andrew Weng @aweng
@@ -0,0 +1,177 @@
1
+ [tool.poetry]
2
+ name = "dataeval"
3
+ version = "0.61.0" # dynamic
4
+ description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
5
+ license = "MIT"
6
+ readme = "README.md"
7
+ homepage = "https://dataeval.ai/"
8
+ repository = "https://github.com/aria-ml/dataeval/"
9
+ documentation = "https://dataeval.readthedocs.io/"
10
+
11
+ authors = [
12
+ "Andrew Weng <andrew.weng@ariacoustics.com>",
13
+ "James Gleeson <james.gleeson@ariacoustics.com>",
14
+ "Scott Swan <scott.swan@ariacoustics.com>",
15
+ "Shaun Jullens <shaun.jullens@ariacoustics.com>",
16
+ "Thayer Fisher <thayer.fisher@ariacoustics.com>",
17
+ ]
18
+ maintainers = [
19
+ "ARiA <dataeval@ariacoustics.com>"
20
+ ]
21
+
22
+ classifiers = [
23
+ "Development Status :: 4 - Beta",
24
+ "Operating System :: OS Independent",
25
+ "Intended Audience :: Science/Research",
26
+ "License :: OSI Approved :: MIT License",
27
+ "Programming Language :: Python :: 3 :: Only",
28
+ "Programming Language :: Python :: 3.9",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Topic :: Scientific/Engineering",
32
+ ]
33
+
34
+ packages = [
35
+ {include = "dataeval", from = "src"}
36
+ ]
37
+
38
+ [tool.poetry.dependencies]
39
+ # required
40
+ python = ">=3.9,<3.12"
41
+ hdbscan = {version = ">=0.8.36"}
42
+ numpy = {version = ">1.24.3"}
43
+ pillow = {version = ">=10.3.0"}
44
+ scipy = {version = ">=1.10"}
45
+ scikit-learn = {version = ">=1.5.0"}
46
+ xxhash = {version = ">=3.3"}
47
+ maite = {version = "*"}
48
+
49
+ # optional
50
+ matplotlib = {version = "*", optional = true}
51
+ nvidia-cudnn-cu11 = {version = ">=8.6.0.163", optional = true}
52
+ tensorflow = {version = ">=2.14.1, <2.16", optional = true}
53
+ tensorflow-io-gcs-filesystem = {version = ">=0.35.0, <0.37", optional = true}
54
+ tensorflow_probability = {version = ">=0.22.1, <0.24", optional = true}
55
+ torch = {version = ">=2.0.1, !=2.2.0", source = "pytorch", optional = true}
56
+
57
+ [tool.poetry.extras]
58
+ tensorflow = ["tensorflow", "tensorflow-io-gcs-filesystem", "tensorflow_probability", "nvidia-cudnn-cu11"]
59
+ torch = ["torch", "matplotlib", "nvidia-cudnn-cu11"]
60
+ all = ["matplotlib", "nvidia-cudnn-cu11", "tensorflow", "tensorflow-io-gcs-filesystem", "tensorflow_probability", "torch"]
61
+
62
+ [tool.poetry.group.dev]
63
+ optional = true
64
+
65
+ [tool.poetry.group.dev.dependencies]
66
+ tox = {version = "*"}
67
+ tox-uv = {version = "*"}
68
+ uv = {version = "*"}
69
+ poetry = {version = "*"}
70
+ # lint
71
+ ruff = {version = "*"}
72
+ codespell = {version = "*", extras = ["toml"]}
73
+ # unit
74
+ pytest = {version = "*"}
75
+ pytest-cov = {version = "*"}
76
+ pytest-xdist = {version = "*"}
77
+ coverage = {version = "*", extras = ["toml"]}
78
+ torchmetrics = {version = ">=1.0.0", source = "pytorch"}
79
+ # type
80
+ pyright = {version = "*, !=1.1.340"}
81
+ # docs
82
+ certifi = {version = ">=2024.07.04", python = "~3.11"}
83
+ enum_tools = {version = "0.12.0", extras = ["sphinx"], python = "~3.11"}
84
+ ipykernel = {version = "6.26.0", python = "~3.11"}
85
+ ipywidgets = {version = "8.1.1", python = "~3.11"}
86
+ jupyter-client = {version = "8.6.0", python = "~3.11"}
87
+ jupyter-cache = {version = "*", python = "~3.11"}
88
+ myst-nb = {version = "1.0.0", python = "~3.11"}
89
+ protobuf = {version = "4.25.3", python = "~3.11"}
90
+ sphinx-rtd-size = {version = "0.2.0", python = "~3.11"}
91
+ sphinx-rtd-theme = {version = "1.3.0", python = "~3.11"}
92
+ sphinx-design = {version = "*", python = "~3.11"}
93
+ sphinx-tabs = {version = "*", python = "~3.11"}
94
+ Sphinx = {version = "7.2.6", python = "~3.11"}
95
+ tensorflow-datasets = {version = "4.9.3", python = "~3.11"}
96
+ torchvision = {version = ">=0.16.0", source = "pytorch", python = "~3.11"}
97
+
98
+ [[tool.poetry.source]]
99
+ name = "pytorch"
100
+ url = "https://download.pytorch.org/whl/cu118"
101
+ priority = "explicit"
102
+
103
+ [tool.poetry-dynamic-versioning]
104
+ enable = false
105
+ vcs = "git"
106
+ style = "semver"
107
+ pattern = "v(?P<base>\\d+\\.\\d+\\.\\d+)$"
108
+
109
+ [tool.poetry-dynamic-versioning.substitution]
110
+ files = ["src/dataeval/__init__.py"]
111
+
112
+ [tool.pyright]
113
+ reportMissingImports = false
114
+
115
+ [tool.pytest.ini_options]
116
+ norecursedirs = ["prototype"]
117
+ addopts = ["--pythonwarnings=ignore::DeprecationWarning", "--verbose", "--durations=20", "--durations-min=1.0"]
118
+
119
+ [tool.coverage.run]
120
+ source = ["dataeval"]
121
+ branch = true
122
+
123
+ [tool.coverage.report]
124
+ exclude_also = [
125
+ "raise NotImplementedError"
126
+ ]
127
+ omit = [
128
+ "*/_internal/models/tensorflow/pixelcnn.py",
129
+ "*/_prototype/*",
130
+ "/tmp/*",
131
+ "tests/*"
132
+ ]
133
+ fail_under = 90
134
+
135
+ # Ruff rules - https://docs.astral.sh/ruff/rules/
136
+ [tool.ruff]
137
+ exclude = [
138
+ ".devcontainer",
139
+ ".github",
140
+ ".vscode",
141
+ ".jupyter_cache",
142
+ "*env*",
143
+ "output",
144
+ "_build",
145
+ ".tox",
146
+ "prototype",
147
+ ]
148
+ line-length = 120
149
+ indent-width = 4
150
+ target-version = "py38"
151
+ extend-include = ["*.ipynb"]
152
+
153
+ [tool.ruff.lint]
154
+ select = ["A", "E", "F", "C4", "I", "UP", "NPY", "SIM", "RUF100"]
155
+ ignore = ["NPY002"]
156
+ fixable = ["ALL"]
157
+ unfixable = []
158
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
159
+ per-file-ignores = { "*.ipynb" = ["E402"] }
160
+
161
+ [tool.ruff.lint.isort]
162
+ known-first-party = ["dataeval"]
163
+
164
+ [tool.ruff.format]
165
+ quote-style = "double"
166
+ indent-style = "space"
167
+ skip-magic-trailing-comma = false
168
+ line-ending = "auto"
169
+ docstring-code-format = true
170
+ docstring-code-line-length = "dynamic"
171
+
172
+ [tool.codespell]
173
+ skip = './*env*,./prototype,./docs/.jupyter_cache,./.tox,CHANGELOG.md,poetry.lock,./output,*.html'
174
+
175
+ [build-system]
176
+ requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
177
+ build-backend = "poetry_dynamic_versioning.backend"
@@ -0,0 +1,18 @@
1
+ from importlib.util import find_spec
2
+
3
+ from . import detectors, flags, metrics
4
+
5
+ __version__ = "0.61.0"
6
+
7
+ __all__ = ["detectors", "flags", "metrics"]
8
+
9
+ if find_spec("torch") is not None: # pragma: no cover
10
+ from . import models, workflows
11
+
12
+ __all__ += ["models", "workflows"]
13
+ elif find_spec("tensorflow") is not None: # pragma: no cover
14
+ from . import models
15
+
16
+ __all__ += ["models"]
17
+
18
+ del find_spec