dataeval 0.86.7__tar.gz → 0.86.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval-0.86.8/.gitignore +28 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/PKG-INFO +67 -47
- {dataeval-0.86.7 → dataeval-0.86.8}/README.md +30 -16
- dataeval-0.86.8/pyproject.toml +261 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/__init__.py +10 -3
- dataeval-0.86.8/src/dataeval/_version.py +21 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/config.py +7 -1
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_mvdc.py +2 -9
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_chunk.py +2 -2
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/ae.py +1 -1
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/base.py +3 -3
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_completeness.py +3 -3
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_coverage.py +2 -2
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_parity.py +1 -1
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_ber.py +2 -2
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_divergence.py +2 -2
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_estimators.py +6 -6
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_array.py +20 -9
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_clusterer.py +7 -7
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/_internal.py +3 -3
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/trainer.py +1 -1
- dataeval-0.86.7/pyproject.toml +0 -207
- {dataeval-0.86.7 → dataeval-0.86.8}/LICENSE.txt +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/_log.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_embeddings.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_images.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_metadata.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_selection.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_split.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_classbalance.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_classfilter.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_indices.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_limit.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_prioritize.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_reverse.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_shuffle.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_base.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_cvm.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_ks.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_mmd.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_base.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_domainclassifier.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_result.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_thresholds.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_uncertainty.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/updates.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/linters/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/linters/duplicates.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/linters/outliers.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/mixin.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/_distance.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/_ood.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/_utils.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_balance.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_diversity.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_clusterer.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_uap.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_base.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_boxratiostats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_dimensionstats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_hashstats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_imagestats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_labelstats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_pixelstats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_visualstats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_base.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_bias.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_drift.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_linters.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_metadata.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_ood.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_stats.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_utils.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_workflows.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/py.typed +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/typing.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_bin.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_fast_mst.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_image.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_method.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_mst.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_plot.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/_dataset.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/collate.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/metadata.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_antiuav.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_base.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_cifar10.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_fileio.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_milco.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_mixin.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_mnist.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_ships.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_types.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_voc.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/_blocks.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/_gmm.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/models.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/workflows/__init__.py +0 -0
- {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/workflows/sufficiency.py +0 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
*venv*/
|
2
|
+
__pycache__/
|
3
|
+
dist/
|
4
|
+
|
5
|
+
data/
|
6
|
+
!src/dataeval/data
|
7
|
+
!src/dataeval/utils/data
|
8
|
+
!tests/data
|
9
|
+
!tests/utils/data
|
10
|
+
|
11
|
+
docs/build/
|
12
|
+
docs/source/reference/autoapi
|
13
|
+
docs/source/tutorials/notebooks/checkpoints/
|
14
|
+
|
15
|
+
output/
|
16
|
+
.coverage*
|
17
|
+
|
18
|
+
.tox/
|
19
|
+
.nox/
|
20
|
+
.python-version
|
21
|
+
|
22
|
+
# Used to store user customizable settings
|
23
|
+
.settings
|
24
|
+
# debug profiles
|
25
|
+
.vscode/launch.json
|
26
|
+
|
27
|
+
# Autogenerated version file
|
28
|
+
src/dataeval/_version.py
|
@@ -1,45 +1,52 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.86.
|
3
|
+
Version: 0.86.8
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
Author-email: andrew.weng@ariacoustics.com
|
9
|
-
Maintainer: ARiA
|
10
|
-
|
11
|
-
|
5
|
+
Project-URL: Homepage, https://dataeval.ai/
|
6
|
+
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
7
|
+
Project-URL: Documentation, https://dataeval.readthedocs.io/
|
8
|
+
Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
|
9
|
+
Maintainer-email: ARiA <dataeval@ariacoustics.com>
|
10
|
+
License-Expression: MIT
|
11
|
+
License-File: LICENSE.txt
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
14
14
|
Classifier: License :: OSI Approved :: MIT License
|
15
15
|
Classifier: Operating System :: OS Independent
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
17
|
Classifier: Programming Language :: Python :: 3.9
|
18
18
|
Classifier: Programming Language :: Python :: 3.10
|
19
19
|
Classifier: Programming Language :: Python :: 3.11
|
20
20
|
Classifier: Programming Language :: Python :: 3.12
|
21
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
22
21
|
Classifier: Topic :: Scientific/Engineering
|
22
|
+
Requires-Python: <3.13,>=3.9
|
23
|
+
Requires-Dist: defusedxml>=0.7.1
|
24
|
+
Requires-Dist: fast-hdbscan==0.2.0
|
25
|
+
Requires-Dist: lightgbm>=4
|
26
|
+
Requires-Dist: numba>=0.59.1
|
27
|
+
Requires-Dist: numpy>=1.24.2
|
28
|
+
Requires-Dist: pandas>=2.0
|
29
|
+
Requires-Dist: pillow>=10.3.0
|
30
|
+
Requires-Dist: polars>=1.0.0
|
31
|
+
Requires-Dist: requests>=2.32.3
|
32
|
+
Requires-Dist: scikit-learn>=1.5.0
|
33
|
+
Requires-Dist: scipy>=1.10
|
34
|
+
Requires-Dist: torch>=2.2.0
|
35
|
+
Requires-Dist: torchvision>=0.17.0
|
36
|
+
Requires-Dist: tqdm>=4.66
|
37
|
+
Requires-Dist: typing-extensions>=4.12; python_version ~= '3.9'
|
38
|
+
Requires-Dist: xxhash>=3.3
|
23
39
|
Provides-Extra: all
|
24
|
-
Requires-Dist:
|
25
|
-
|
26
|
-
Requires-Dist:
|
27
|
-
Requires-Dist:
|
28
|
-
|
29
|
-
Requires-Dist:
|
30
|
-
Requires-Dist:
|
31
|
-
|
32
|
-
Requires-Dist:
|
33
|
-
Requires-Dist:
|
34
|
-
Requires-Dist: scikit-learn (>=1.5.0)
|
35
|
-
Requires-Dist: scipy (>=1.10)
|
36
|
-
Requires-Dist: torch (>=2.2.0)
|
37
|
-
Requires-Dist: torchvision (>=0.17.0)
|
38
|
-
Requires-Dist: tqdm
|
39
|
-
Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
|
40
|
-
Requires-Dist: xxhash (>=3.3)
|
41
|
-
Project-URL: Documentation, https://dataeval.readthedocs.io/
|
42
|
-
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
40
|
+
Requires-Dist: matplotlib>=3.7.1; extra == 'all'
|
41
|
+
Provides-Extra: cpu
|
42
|
+
Requires-Dist: torch>=2.2.0; extra == 'cpu'
|
43
|
+
Requires-Dist: torchvision>=0.17.0; extra == 'cpu'
|
44
|
+
Provides-Extra: cu118
|
45
|
+
Requires-Dist: torch>=2.2.0; extra == 'cu118'
|
46
|
+
Requires-Dist: torchvision>=0.17.0; extra == 'cu118'
|
47
|
+
Provides-Extra: cu124
|
48
|
+
Requires-Dist: torch>=2.2.0; extra == 'cu124'
|
49
|
+
Requires-Dist: torchvision>=0.17.0; extra == 'cu124'
|
43
50
|
Description-Content-Type: text/markdown
|
44
51
|
|
45
52
|
# DataEval
|
@@ -72,26 +79,28 @@ estimation, bias detection, and dataset linting.
|
|
72
79
|
<!-- end needs -->
|
73
80
|
|
74
81
|
<!-- start JATIC interop -->
|
82
|
+
|
75
83
|
DataEval is easy to install, supports a wide range of Python versions, and is
|
76
84
|
compatible with many of the most popular packages in the scientific and T&E
|
77
85
|
communities.
|
78
86
|
|
79
87
|
DataEval also has native interoperability between JATIC's suite of tools when
|
80
88
|
using MAITE-compliant datasets and models.
|
89
|
+
|
81
90
|
<!-- end JATIC interop -->
|
82
91
|
|
83
92
|
## Getting Started
|
84
93
|
|
85
94
|
**Python versions:** 3.9 - 3.12
|
86
95
|
|
87
|
-
**Supported packages**:
|
96
|
+
**Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
|
88
97
|
|
89
98
|
Choose your preferred method of installation below or follow our
|
90
99
|
[installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
|
91
100
|
|
92
|
-
|
93
|
-
|
94
|
-
|
101
|
+
- [Installing with pip](#installing-with-pip)
|
102
|
+
- [Installing with conda/mamba](#installing-with-conda)
|
103
|
+
- [Installing from GitHub](#installing-from-github)
|
95
104
|
|
96
105
|
### **Installing with pip**
|
97
106
|
|
@@ -105,7 +114,7 @@ pip install dataeval[all]
|
|
105
114
|
### **Installing with conda**
|
106
115
|
|
107
116
|
DataEval can be installed in a Conda/Mamba environment using the provided
|
108
|
-
`environment.yaml` file.
|
117
|
+
`environment.yaml` file. As some dependencies are installed from the `pytorch`
|
109
118
|
channel, the channel is specified in the below example.
|
110
119
|
|
111
120
|
```bash
|
@@ -115,12 +124,10 @@ micromamba create -f environment\environment.yaml -c pytorch
|
|
115
124
|
### **Installing from GitHub**
|
116
125
|
|
117
126
|
To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
|
118
|
-
download larger, binary source files
|
119
|
-
management.
|
127
|
+
download larger, binary source files.
|
120
128
|
|
121
129
|
```bash
|
122
130
|
sudo apt-get install git-lfs
|
123
|
-
pip install poetry
|
124
131
|
```
|
125
132
|
|
126
133
|
Pull the source down and change to the DataEval project directory.
|
@@ -130,26 +137,40 @@ git clone https://github.com/aria-ml/dataeval.git
|
|
130
137
|
cd dataeval
|
131
138
|
```
|
132
139
|
|
133
|
-
|
140
|
+
#### **Using Poetry**
|
141
|
+
|
142
|
+
Install DataEval with all extras.
|
134
143
|
|
135
144
|
```bash
|
136
|
-
poetry install --all
|
145
|
+
poetry install --extras=all
|
137
146
|
```
|
138
147
|
|
139
|
-
|
140
|
-
environment by prefixing shell commands with `poetry run`, or activate the
|
141
|
-
virtual environment directly in the shell.
|
148
|
+
Enable Poetry's virtual environment.
|
142
149
|
|
143
150
|
```bash
|
144
|
-
poetry
|
151
|
+
poetry env activate
|
152
|
+
```
|
153
|
+
|
154
|
+
#### **Using uv**
|
155
|
+
|
156
|
+
Install DataEval with all extras and dependencies for development.
|
157
|
+
|
158
|
+
```bash
|
159
|
+
uv sync --extra=all
|
160
|
+
```
|
161
|
+
|
162
|
+
Enable uv's virtual environment.
|
163
|
+
|
164
|
+
```bash
|
165
|
+
source .venv/bin/activate
|
145
166
|
```
|
146
167
|
|
147
168
|
## Contact Us
|
148
169
|
|
149
170
|
If you have any questions, feel free to reach out to the people below:
|
150
171
|
|
151
|
-
|
152
|
-
|
172
|
+
- **POC**: Scott Swan @scott.swan
|
173
|
+
- **DPOC**: Andrew Weng @aweng
|
153
174
|
|
154
175
|
## Acknowledgement
|
155
176
|
|
@@ -164,4 +185,3 @@ interpreted as necessarily representing the official policies or endorsements,
|
|
164
185
|
either expressed or implied, of the U.S. Government.
|
165
186
|
|
166
187
|
<!-- end acknowledgement -->
|
167
|
-
|
@@ -28,26 +28,28 @@ estimation, bias detection, and dataset linting.
|
|
28
28
|
<!-- end needs -->
|
29
29
|
|
30
30
|
<!-- start JATIC interop -->
|
31
|
+
|
31
32
|
DataEval is easy to install, supports a wide range of Python versions, and is
|
32
33
|
compatible with many of the most popular packages in the scientific and T&E
|
33
34
|
communities.
|
34
35
|
|
35
36
|
DataEval also has native interoperability between JATIC's suite of tools when
|
36
37
|
using MAITE-compliant datasets and models.
|
38
|
+
|
37
39
|
<!-- end JATIC interop -->
|
38
40
|
|
39
41
|
## Getting Started
|
40
42
|
|
41
43
|
**Python versions:** 3.9 - 3.12
|
42
44
|
|
43
|
-
**Supported packages**:
|
45
|
+
**Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
|
44
46
|
|
45
47
|
Choose your preferred method of installation below or follow our
|
46
48
|
[installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
|
47
49
|
|
48
|
-
|
49
|
-
|
50
|
-
|
50
|
+
- [Installing with pip](#installing-with-pip)
|
51
|
+
- [Installing with conda/mamba](#installing-with-conda)
|
52
|
+
- [Installing from GitHub](#installing-from-github)
|
51
53
|
|
52
54
|
### **Installing with pip**
|
53
55
|
|
@@ -61,7 +63,7 @@ pip install dataeval[all]
|
|
61
63
|
### **Installing with conda**
|
62
64
|
|
63
65
|
DataEval can be installed in a Conda/Mamba environment using the provided
|
64
|
-
`environment.yaml` file.
|
66
|
+
`environment.yaml` file. As some dependencies are installed from the `pytorch`
|
65
67
|
channel, the channel is specified in the below example.
|
66
68
|
|
67
69
|
```bash
|
@@ -71,12 +73,10 @@ micromamba create -f environment\environment.yaml -c pytorch
|
|
71
73
|
### **Installing from GitHub**
|
72
74
|
|
73
75
|
To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
|
74
|
-
download larger, binary source files
|
75
|
-
management.
|
76
|
+
download larger, binary source files.
|
76
77
|
|
77
78
|
```bash
|
78
79
|
sudo apt-get install git-lfs
|
79
|
-
pip install poetry
|
80
80
|
```
|
81
81
|
|
82
82
|
Pull the source down and change to the DataEval project directory.
|
@@ -86,26 +86,40 @@ git clone https://github.com/aria-ml/dataeval.git
|
|
86
86
|
cd dataeval
|
87
87
|
```
|
88
88
|
|
89
|
-
|
89
|
+
#### **Using Poetry**
|
90
|
+
|
91
|
+
Install DataEval with all extras.
|
92
|
+
|
93
|
+
```bash
|
94
|
+
poetry install --extras=all
|
95
|
+
```
|
96
|
+
|
97
|
+
Enable Poetry's virtual environment.
|
98
|
+
|
99
|
+
```bash
|
100
|
+
poetry env activate
|
101
|
+
```
|
102
|
+
|
103
|
+
#### **Using uv**
|
104
|
+
|
105
|
+
Install DataEval with all extras and dependencies for development.
|
90
106
|
|
91
107
|
```bash
|
92
|
-
|
108
|
+
uv sync --extra=all
|
93
109
|
```
|
94
110
|
|
95
|
-
|
96
|
-
environment by prefixing shell commands with `poetry run`, or activate the
|
97
|
-
virtual environment directly in the shell.
|
111
|
+
Enable uv's virtual environment.
|
98
112
|
|
99
113
|
```bash
|
100
|
-
|
114
|
+
source .venv/bin/activate
|
101
115
|
```
|
102
116
|
|
103
117
|
## Contact Us
|
104
118
|
|
105
119
|
If you have any questions, feel free to reach out to the people below:
|
106
120
|
|
107
|
-
|
108
|
-
|
121
|
+
- **POC**: Scott Swan @scott.swan
|
122
|
+
- **DPOC**: Andrew Weng @aweng
|
109
123
|
|
110
124
|
## Acknowledgement
|
111
125
|
|
@@ -0,0 +1,261 @@
|
|
1
|
+
[project]
|
2
|
+
name = "dataeval"
|
3
|
+
dynamic = ["version"]
|
4
|
+
description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
|
5
|
+
authors = [
|
6
|
+
{ name = "Andrew Weng", email = "andrew.weng@ariacoustics.com" },
|
7
|
+
{ name = "Bill Peria", email = "bill.peria@ariacoustics.com" },
|
8
|
+
{ name = "Jon Botts", email = "jonathan.botts@ariacoustics.com" },
|
9
|
+
{ name = "Jonathan Christian", email = "jonathan.christian@ariacoustics.com" },
|
10
|
+
{ name = "Justin McMillan", email = "justin.mcmillan@ariacoustics.com" },
|
11
|
+
{ name = "Ryan Wood", email = "ryan.wood@ariacoustics.com" },
|
12
|
+
{ name = "Scott Swan", email = "scott.swan@ariacoustics.com" },
|
13
|
+
{ name = "Shaun Jullens", email = "shaun.jullens@ariacoustics.com" },
|
14
|
+
]
|
15
|
+
requires-python = ">=3.9,<3.13"
|
16
|
+
readme = "README.md"
|
17
|
+
license = "MIT"
|
18
|
+
maintainers = [
|
19
|
+
{ name = "ARiA", email = "dataeval@ariacoustics.com" },
|
20
|
+
]
|
21
|
+
classifiers = [
|
22
|
+
"Development Status :: 4 - Beta",
|
23
|
+
"Operating System :: OS Independent",
|
24
|
+
"Intended Audience :: Science/Research",
|
25
|
+
"License :: OSI Approved :: MIT License",
|
26
|
+
"Programming Language :: Python :: 3 :: Only",
|
27
|
+
"Programming Language :: Python :: 3.9",
|
28
|
+
"Programming Language :: Python :: 3.10",
|
29
|
+
"Programming Language :: Python :: 3.11",
|
30
|
+
"Programming Language :: Python :: 3.12",
|
31
|
+
"Topic :: Scientific/Engineering",
|
32
|
+
]
|
33
|
+
dependencies = [
|
34
|
+
"defusedxml>=0.7.1",
|
35
|
+
"fast_hdbscan==0.2.0",
|
36
|
+
"lightgbm>=4",
|
37
|
+
"numba>=0.59.1",
|
38
|
+
"numpy>=1.24.2",
|
39
|
+
"pandas>=2.0",
|
40
|
+
"pillow>=10.3.0",
|
41
|
+
"polars>=1.0.0",
|
42
|
+
"requests>=2.32.3",
|
43
|
+
"scipy>=1.10",
|
44
|
+
"scikit-learn>=1.5.0",
|
45
|
+
"torch>=2.2.0",
|
46
|
+
"torchvision>=0.17.0",
|
47
|
+
"tqdm>=4.66",
|
48
|
+
"typing-extensions>=4.12 ; python_version ~= '3.9'",
|
49
|
+
"xxhash>=3.3",
|
50
|
+
]
|
51
|
+
|
52
|
+
[project.optional-dependencies]
|
53
|
+
cpu = [
|
54
|
+
"torch>=2.2.0",
|
55
|
+
"torchvision>=0.17.0",
|
56
|
+
]
|
57
|
+
cu118 = [
|
58
|
+
"torch>=2.2.0",
|
59
|
+
"torchvision>=0.17.0",
|
60
|
+
]
|
61
|
+
cu124 = [
|
62
|
+
"torch>=2.2.0",
|
63
|
+
"torchvision>=0.17.0",
|
64
|
+
]
|
65
|
+
all = ["matplotlib>=3.7.1"]
|
66
|
+
|
67
|
+
[project.urls]
|
68
|
+
Homepage = "https://dataeval.ai/"
|
69
|
+
Repository = "https://github.com/aria-ml/dataeval/"
|
70
|
+
Documentation = "https://dataeval.readthedocs.io/"
|
71
|
+
|
72
|
+
[dependency-groups]
|
73
|
+
base = [
|
74
|
+
"nox[uv]>=2025.5.1",
|
75
|
+
"uv>=0.7.8",
|
76
|
+
"pip>=25",
|
77
|
+
]
|
78
|
+
lint = [
|
79
|
+
"ruff>=0.11",
|
80
|
+
"codespell[toml]>=2.3",
|
81
|
+
]
|
82
|
+
test = [
|
83
|
+
"pytest>=8.3",
|
84
|
+
"pytest-cov>=6.1",
|
85
|
+
"pytest-xdist>=3.6.1",
|
86
|
+
"coverage[toml]>=7.6",
|
87
|
+
]
|
88
|
+
type = [
|
89
|
+
"pyright[nodejs]>=1.1.400",
|
90
|
+
]
|
91
|
+
docs = [
|
92
|
+
"numpy>=2.0.2",
|
93
|
+
"certifi>=2024.07.04",
|
94
|
+
"ipykernel>=6.26.0",
|
95
|
+
"ipywidgets>=8.1.1",
|
96
|
+
"jinja2>=3.1.6",
|
97
|
+
"jupyter-client>=8.6.0",
|
98
|
+
"jupyter-cache>=1.0",
|
99
|
+
"myst-nb>=1.0",
|
100
|
+
"sphinx-autoapi>=3.6.0",
|
101
|
+
"sphinx-design>=0.6.1",
|
102
|
+
"sphinx-immaterial>=0.12.5",
|
103
|
+
"sphinx-new-tab-link>=0.8.0",
|
104
|
+
"sphinx-tabs>=3.4.7",
|
105
|
+
"Sphinx>=7.2.6",
|
106
|
+
"torchmetrics>=1.0.0",
|
107
|
+
"markupsafe>=3,<3.0.2",
|
108
|
+
]
|
109
|
+
dev = [
|
110
|
+
{ include-group = "base" },
|
111
|
+
{ include-group = "lint" },
|
112
|
+
{ include-group = "test" },
|
113
|
+
{ include-group = "type" },
|
114
|
+
{ include-group = "docs" },
|
115
|
+
]
|
116
|
+
|
117
|
+
[tool.uv]
|
118
|
+
conflicts = [
|
119
|
+
[
|
120
|
+
{ extra = "cpu" },
|
121
|
+
{ extra = "cu118" },
|
122
|
+
{ extra = "cu124" },
|
123
|
+
],
|
124
|
+
]
|
125
|
+
|
126
|
+
[[tool.uv.index]]
|
127
|
+
name = "pytorch-cpu"
|
128
|
+
url = "https://download.pytorch.org/whl/cpu"
|
129
|
+
explicit = true
|
130
|
+
|
131
|
+
[[tool.uv.index]]
|
132
|
+
name = "pytorch-cu118"
|
133
|
+
url = "https://download.pytorch.org/whl/cu118"
|
134
|
+
explicit = true
|
135
|
+
|
136
|
+
[[tool.uv.index]]
|
137
|
+
name = "pytorch-cu124"
|
138
|
+
url = "https://download.pytorch.org/whl/cu124"
|
139
|
+
explicit = true
|
140
|
+
|
141
|
+
[tool.uv.sources]
|
142
|
+
torch = [
|
143
|
+
{ index = "pytorch-cpu", extra = "cpu" },
|
144
|
+
{ index = "pytorch-cu118", extra = "cu118" },
|
145
|
+
{ index = "pytorch-cu124", extra = "cu124" },
|
146
|
+
]
|
147
|
+
torchvision = [
|
148
|
+
{ index = "pytorch-cpu", extra = "cpu" },
|
149
|
+
{ index = "pytorch-cu118", extra = "cu118" },
|
150
|
+
{ index = "pytorch-cu124", extra = "cu124" },
|
151
|
+
]
|
152
|
+
|
153
|
+
[tool.hatch.build.targets.sdist]
|
154
|
+
include = ["src/dataeval"]
|
155
|
+
|
156
|
+
[tool.hatch.build.targets.wheel]
|
157
|
+
include = ["src/dataeval"]
|
158
|
+
|
159
|
+
[tool.hatch.build.targets.wheel.sources]
|
160
|
+
"src/dataeval" = "dataeval"
|
161
|
+
|
162
|
+
[tool.hatch.version]
|
163
|
+
source = "vcs"
|
164
|
+
|
165
|
+
[tool.hatch.build.hooks.vcs]
|
166
|
+
version-file = "src/dataeval/_version.py"
|
167
|
+
|
168
|
+
[tool.poetry]
|
169
|
+
version = "0.0.0" # unused
|
170
|
+
|
171
|
+
[tool.pyproject2conda.dependencies]
|
172
|
+
torch = { pip = true }
|
173
|
+
torchvision = { pip = true }
|
174
|
+
xxhash = { skip = true, packages = "python-xxhash>=3.3" }
|
175
|
+
|
176
|
+
[tool.pyright]
|
177
|
+
reportMissingImports = false
|
178
|
+
|
179
|
+
[tool.pytest.ini_options]
|
180
|
+
testpaths = ["tests"]
|
181
|
+
addopts = [
|
182
|
+
"--pythonwarnings=ignore::DeprecationWarning",
|
183
|
+
"--verbose",
|
184
|
+
"--durations=20",
|
185
|
+
"--durations-min=1.0",
|
186
|
+
]
|
187
|
+
markers = [
|
188
|
+
"required: marks tests for required features",
|
189
|
+
"optional: marks tests for optional features",
|
190
|
+
"requires_all: marks tests that require matplotlib",
|
191
|
+
"cuda: marks tests that require cuda",
|
192
|
+
"year: marks tests that need a specified dataset year",
|
193
|
+
]
|
194
|
+
|
195
|
+
[tool.coverage.run]
|
196
|
+
source = ["src/dataeval"]
|
197
|
+
branch = true
|
198
|
+
concurrency = ["multiprocessing"]
|
199
|
+
parallel = true
|
200
|
+
omit = ["src/dataeval/_version.py"]
|
201
|
+
|
202
|
+
[tool.coverage.report]
|
203
|
+
exclude_also = [
|
204
|
+
"raise NotImplementedError",
|
205
|
+
": \\.\\.\\.",
|
206
|
+
"if TYPE_CHECKING:"
|
207
|
+
]
|
208
|
+
include = ["*/src/dataeval/*"]
|
209
|
+
omit = [
|
210
|
+
"*/torch/_blocks.py",
|
211
|
+
"*/_clusterer.py",
|
212
|
+
"*/_fast_mst.py",
|
213
|
+
]
|
214
|
+
fail_under = 90
|
215
|
+
|
216
|
+
[tool.ruff]
|
217
|
+
exclude = [
|
218
|
+
".devcontainer",
|
219
|
+
".github",
|
220
|
+
".vscode",
|
221
|
+
".jupyter_cache",
|
222
|
+
"*env*",
|
223
|
+
"output",
|
224
|
+
"build",
|
225
|
+
".nox",
|
226
|
+
".tox",
|
227
|
+
]
|
228
|
+
line-length = 120
|
229
|
+
indent-width = 4
|
230
|
+
target-version = "py38"
|
231
|
+
extend-include = ["*.ipynb"]
|
232
|
+
|
233
|
+
[tool.ruff.lint]
|
234
|
+
select = ["A", "ANN", "C4", "C90", "E", "F", "I", "NPY", "S", "SIM", "RET", "RUF100", "UP"]
|
235
|
+
ignore = ["ANN401", "NPY002"]
|
236
|
+
fixable = ["ALL"]
|
237
|
+
unfixable = []
|
238
|
+
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
239
|
+
per-file-ignores = { "*.ipynb" = ["E402"], "!src/*" = ["ANN", "S", "RET"]}
|
240
|
+
|
241
|
+
[tool.ruff.lint.isort]
|
242
|
+
known-first-party = ["dataeval"]
|
243
|
+
|
244
|
+
[tool.ruff.lint.flake8-builtins]
|
245
|
+
builtins-strict-checking = false
|
246
|
+
|
247
|
+
[tool.ruff.format]
|
248
|
+
quote-style = "double"
|
249
|
+
indent-style = "space"
|
250
|
+
skip-magic-trailing-comma = false
|
251
|
+
line-ending = "auto"
|
252
|
+
docstring-code-format = true
|
253
|
+
docstring-code-line-length = "dynamic"
|
254
|
+
|
255
|
+
[tool.codespell]
|
256
|
+
skip = './*env*,./output,./docs/build,./docs/source/.jupyter_cache,CHANGELOG.md,uv.lock,requirements.txt,*.html,./docs/source/*/data'
|
257
|
+
ignore-words-list = ["Hart"]
|
258
|
+
|
259
|
+
[build-system]
|
260
|
+
requires = ["hatchling", "hatch-vcs"]
|
261
|
+
build-backend = "hatchling.build"
|
@@ -7,12 +7,19 @@ shifts that impact performance of deployed models.
|
|
7
7
|
|
8
8
|
from __future__ import annotations
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
try:
|
11
|
+
from ._version import __version__
|
12
|
+
except ImportError:
|
13
|
+
__version__ = "unknown"
|
14
|
+
|
15
|
+
# Strongly type for pyright
|
16
|
+
__version__ = str(__version__)
|
17
|
+
|
18
|
+
__all__ = ["__version__", "config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
|
12
19
|
|
13
20
|
import logging
|
14
21
|
|
15
|
-
from
|
22
|
+
from . import config, detectors, metrics, typing, utils, workflows
|
16
23
|
|
17
24
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
18
25
|
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# file generated by setuptools-scm
|
2
|
+
# don't change, don't track in version control
|
3
|
+
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
5
|
+
|
6
|
+
TYPE_CHECKING = False
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from typing import Tuple
|
9
|
+
from typing import Union
|
10
|
+
|
11
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
12
|
+
else:
|
13
|
+
VERSION_TUPLE = object
|
14
|
+
|
15
|
+
version: str
|
16
|
+
__version__: str
|
17
|
+
__version_tuple__: VERSION_TUPLE
|
18
|
+
version_tuple: VERSION_TUPLE
|
19
|
+
|
20
|
+
__version__ = version = '0.86.8'
|
21
|
+
__version_tuple__ = version_tuple = (0, 86, 8)
|
@@ -77,7 +77,13 @@ def get_device(override: DeviceLike | None = None) -> torch.device:
|
|
77
77
|
"""
|
78
78
|
if override is None:
|
79
79
|
global _device
|
80
|
-
return
|
80
|
+
return (
|
81
|
+
torch.get_default_device()
|
82
|
+
if hasattr(torch, "get_default_device")
|
83
|
+
else torch.device("cpu")
|
84
|
+
if _device is None
|
85
|
+
else _device
|
86
|
+
)
|
81
87
|
return _todevice(override)
|
82
88
|
|
83
89
|
|
@@ -1,16 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING
|
4
|
-
|
5
3
|
import numpy as np
|
6
4
|
import pandas as pd
|
7
5
|
from numpy.typing import ArrayLike
|
8
6
|
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from typing import Self
|
11
|
-
else:
|
12
|
-
from typing_extensions import Self
|
13
|
-
|
14
7
|
from dataeval.detectors.drift._nml._chunk import CountBasedChunker, SizeBasedChunker
|
15
8
|
from dataeval.detectors.drift._nml._domainclassifier import DomainClassifierCalculator
|
16
9
|
from dataeval.detectors.drift._nml._thresholds import ConstantThreshold
|
@@ -52,7 +45,7 @@ class DriftMVDC:
|
|
52
45
|
threshold=ConstantThreshold(lower=self.threshold[0], upper=self.threshold[1]),
|
53
46
|
)
|
54
47
|
|
55
|
-
def fit(self, x_ref: ArrayLike) ->
|
48
|
+
def fit(self, x_ref: ArrayLike) -> DriftMVDC:
|
56
49
|
"""
|
57
50
|
Fit the domain classifier on the training dataframe
|
58
51
|
|
@@ -63,7 +56,7 @@ class DriftMVDC:
|
|
63
56
|
|
64
57
|
Returns
|
65
58
|
-------
|
66
|
-
|
59
|
+
DriftMVDC
|
67
60
|
|
68
61
|
"""
|
69
62
|
# for 1D input, assume that is 1 sample: dim[1,n_features]
|