dataeval 0.86.7__tar.gz → 0.86.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. dataeval-0.86.8/.gitignore +28 -0
  2. {dataeval-0.86.7 → dataeval-0.86.8}/PKG-INFO +67 -47
  3. {dataeval-0.86.7 → dataeval-0.86.8}/README.md +30 -16
  4. dataeval-0.86.8/pyproject.toml +261 -0
  5. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/__init__.py +10 -3
  6. dataeval-0.86.8/src/dataeval/_version.py +21 -0
  7. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/config.py +7 -1
  8. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_mvdc.py +2 -9
  9. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_chunk.py +2 -2
  10. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/ae.py +1 -1
  11. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/base.py +3 -3
  12. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_completeness.py +3 -3
  13. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_coverage.py +2 -2
  14. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_parity.py +1 -1
  15. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_ber.py +2 -2
  16. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_divergence.py +2 -2
  17. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_estimators.py +6 -6
  18. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_array.py +20 -9
  19. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_clusterer.py +7 -7
  20. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/_internal.py +3 -3
  21. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/trainer.py +1 -1
  22. dataeval-0.86.7/pyproject.toml +0 -207
  23. {dataeval-0.86.7 → dataeval-0.86.8}/LICENSE.txt +0 -0
  24. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/_log.py +0 -0
  25. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/__init__.py +0 -0
  26. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_embeddings.py +0 -0
  27. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_images.py +0 -0
  28. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_metadata.py +0 -0
  29. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_selection.py +0 -0
  30. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/_split.py +0 -0
  31. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/__init__.py +0 -0
  32. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_classbalance.py +0 -0
  33. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_classfilter.py +0 -0
  34. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_indices.py +0 -0
  35. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_limit.py +0 -0
  36. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_prioritize.py +0 -0
  37. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_reverse.py +0 -0
  38. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/data/selections/_shuffle.py +0 -0
  39. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/__init__.py +0 -0
  40. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/__init__.py +0 -0
  41. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_base.py +0 -0
  42. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_cvm.py +0 -0
  43. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_ks.py +0 -0
  44. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_mmd.py +0 -0
  45. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/__init__.py +0 -0
  46. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_base.py +0 -0
  47. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_domainclassifier.py +0 -0
  48. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_result.py +0 -0
  49. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_nml/_thresholds.py +0 -0
  50. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/_uncertainty.py +0 -0
  51. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/drift/updates.py +0 -0
  52. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/linters/__init__.py +0 -0
  53. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/linters/duplicates.py +0 -0
  54. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/linters/outliers.py +0 -0
  55. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/__init__.py +0 -0
  56. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/detectors/ood/mixin.py +0 -0
  57. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/__init__.py +0 -0
  58. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/_distance.py +0 -0
  59. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/_ood.py +0 -0
  60. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metadata/_utils.py +0 -0
  61. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/__init__.py +0 -0
  62. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/__init__.py +0 -0
  63. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_balance.py +0 -0
  64. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/bias/_diversity.py +0 -0
  65. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/__init__.py +0 -0
  66. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_clusterer.py +0 -0
  67. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/estimators/_uap.py +0 -0
  68. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/__init__.py +0 -0
  69. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_base.py +0 -0
  70. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_boxratiostats.py +0 -0
  71. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_dimensionstats.py +0 -0
  72. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_hashstats.py +0 -0
  73. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_imagestats.py +0 -0
  74. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_labelstats.py +0 -0
  75. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_pixelstats.py +0 -0
  76. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/metrics/stats/_visualstats.py +0 -0
  77. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/__init__.py +0 -0
  78. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_base.py +0 -0
  79. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_bias.py +0 -0
  80. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_drift.py +0 -0
  81. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_linters.py +0 -0
  82. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_metadata.py +0 -0
  83. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_ood.py +0 -0
  84. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_stats.py +0 -0
  85. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_utils.py +0 -0
  86. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/outputs/_workflows.py +0 -0
  87. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/py.typed +0 -0
  88. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/typing.py +0 -0
  89. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/__init__.py +0 -0
  90. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_bin.py +0 -0
  91. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_fast_mst.py +0 -0
  92. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_image.py +0 -0
  93. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_method.py +0 -0
  94. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_mst.py +0 -0
  95. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/_plot.py +0 -0
  96. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/__init__.py +0 -0
  97. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/_dataset.py +0 -0
  98. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/collate.py +0 -0
  99. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/data/metadata.py +0 -0
  100. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/__init__.py +0 -0
  101. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_antiuav.py +0 -0
  102. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_base.py +0 -0
  103. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_cifar10.py +0 -0
  104. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_fileio.py +0 -0
  105. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_milco.py +0 -0
  106. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_mixin.py +0 -0
  107. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_mnist.py +0 -0
  108. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_ships.py +0 -0
  109. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_types.py +0 -0
  110. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/datasets/_voc.py +0 -0
  111. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/__init__.py +0 -0
  112. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/_blocks.py +0 -0
  113. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/_gmm.py +0 -0
  114. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/utils/torch/models.py +0 -0
  115. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/workflows/__init__.py +0 -0
  116. {dataeval-0.86.7 → dataeval-0.86.8}/src/dataeval/workflows/sufficiency.py +0 -0
@@ -0,0 +1,28 @@
1
+ *venv*/
2
+ __pycache__/
3
+ dist/
4
+
5
+ data/
6
+ !src/dataeval/data
7
+ !src/dataeval/utils/data
8
+ !tests/data
9
+ !tests/utils/data
10
+
11
+ docs/build/
12
+ docs/source/reference/autoapi
13
+ docs/source/tutorials/notebooks/checkpoints/
14
+
15
+ output/
16
+ .coverage*
17
+
18
+ .tox/
19
+ .nox/
20
+ .python-version
21
+
22
+ # Used to store user customizable settings
23
+ .settings
24
+ # debug profiles
25
+ .vscode/launch.json
26
+
27
+ # Autogenerated version file
28
+ src/dataeval/_version.py
@@ -1,45 +1,52 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 0.86.7
3
+ Version: 0.86.8
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
- Home-page: https://dataeval.ai/
6
- License: MIT
7
- Author: Andrew Weng
8
- Author-email: andrew.weng@ariacoustics.com
9
- Maintainer: ARiA
10
- Maintainer-email: dataeval@ariacoustics.com
11
- Requires-Python: >=3.9,<3.13
5
+ Project-URL: Homepage, https://dataeval.ai/
6
+ Project-URL: Repository, https://github.com/aria-ml/dataeval/
7
+ Project-URL: Documentation, https://dataeval.readthedocs.io/
8
+ Author-email: Andrew Weng <andrew.weng@ariacoustics.com>, Bill Peria <bill.peria@ariacoustics.com>, Jon Botts <jonathan.botts@ariacoustics.com>, Jonathan Christian <jonathan.christian@ariacoustics.com>, Justin McMillan <justin.mcmillan@ariacoustics.com>, Ryan Wood <ryan.wood@ariacoustics.com>, Scott Swan <scott.swan@ariacoustics.com>, Shaun Jullens <shaun.jullens@ariacoustics.com>
9
+ Maintainer-email: ARiA <dataeval@ariacoustics.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE.txt
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Science/Research
14
14
  Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Operating System :: OS Independent
16
- Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
17
  Classifier: Programming Language :: Python :: 3.9
18
18
  Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3.11
20
20
  Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Programming Language :: Python :: 3 :: Only
22
21
  Classifier: Topic :: Scientific/Engineering
22
+ Requires-Python: <3.13,>=3.9
23
+ Requires-Dist: defusedxml>=0.7.1
24
+ Requires-Dist: fast-hdbscan==0.2.0
25
+ Requires-Dist: lightgbm>=4
26
+ Requires-Dist: numba>=0.59.1
27
+ Requires-Dist: numpy>=1.24.2
28
+ Requires-Dist: pandas>=2.0
29
+ Requires-Dist: pillow>=10.3.0
30
+ Requires-Dist: polars>=1.0.0
31
+ Requires-Dist: requests>=2.32.3
32
+ Requires-Dist: scikit-learn>=1.5.0
33
+ Requires-Dist: scipy>=1.10
34
+ Requires-Dist: torch>=2.2.0
35
+ Requires-Dist: torchvision>=0.17.0
36
+ Requires-Dist: tqdm>=4.66
37
+ Requires-Dist: typing-extensions>=4.12; python_version ~= '3.9'
38
+ Requires-Dist: xxhash>=3.3
23
39
  Provides-Extra: all
24
- Requires-Dist: defusedxml (>=0.7.1)
25
- Requires-Dist: fast_hdbscan (==0.2.0)
26
- Requires-Dist: lightgbm (>=4)
27
- Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
28
- Requires-Dist: numba (>=0.59.1)
29
- Requires-Dist: numpy (>=1.24.2)
30
- Requires-Dist: pandas (>=2.0)
31
- Requires-Dist: pillow (>=10.3.0)
32
- Requires-Dist: polars (>=1.0.0)
33
- Requires-Dist: requests
34
- Requires-Dist: scikit-learn (>=1.5.0)
35
- Requires-Dist: scipy (>=1.10)
36
- Requires-Dist: torch (>=2.2.0)
37
- Requires-Dist: torchvision (>=0.17.0)
38
- Requires-Dist: tqdm
39
- Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
40
- Requires-Dist: xxhash (>=3.3)
41
- Project-URL: Documentation, https://dataeval.readthedocs.io/
42
- Project-URL: Repository, https://github.com/aria-ml/dataeval/
40
+ Requires-Dist: matplotlib>=3.7.1; extra == 'all'
41
+ Provides-Extra: cpu
42
+ Requires-Dist: torch>=2.2.0; extra == 'cpu'
43
+ Requires-Dist: torchvision>=0.17.0; extra == 'cpu'
44
+ Provides-Extra: cu118
45
+ Requires-Dist: torch>=2.2.0; extra == 'cu118'
46
+ Requires-Dist: torchvision>=0.17.0; extra == 'cu118'
47
+ Provides-Extra: cu124
48
+ Requires-Dist: torch>=2.2.0; extra == 'cu124'
49
+ Requires-Dist: torchvision>=0.17.0; extra == 'cu124'
43
50
  Description-Content-Type: text/markdown
44
51
 
45
52
  # DataEval
@@ -72,26 +79,28 @@ estimation, bias detection, and dataset linting.
72
79
  <!-- end needs -->
73
80
 
74
81
  <!-- start JATIC interop -->
82
+
75
83
  DataEval is easy to install, supports a wide range of Python versions, and is
76
84
  compatible with many of the most popular packages in the scientific and T&E
77
85
  communities.
78
86
 
79
87
  DataEval also has native interoperability between JATIC's suite of tools when
80
88
  using MAITE-compliant datasets and models.
89
+
81
90
  <!-- end JATIC interop -->
82
91
 
83
92
  ## Getting Started
84
93
 
85
94
  **Python versions:** 3.9 - 3.12
86
95
 
87
- **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*
96
+ **Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
88
97
 
89
98
  Choose your preferred method of installation below or follow our
90
99
  [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
91
100
 
92
- * [Installing with pip](#installing-with-pip)
93
- * [Installing with conda/mamba](#installing-with-conda)
94
- * [Installing from GitHub](#installing-from-github)
101
+ - [Installing with pip](#installing-with-pip)
102
+ - [Installing with conda/mamba](#installing-with-conda)
103
+ - [Installing from GitHub](#installing-from-github)
95
104
 
96
105
  ### **Installing with pip**
97
106
 
@@ -105,7 +114,7 @@ pip install dataeval[all]
105
114
  ### **Installing with conda**
106
115
 
107
116
  DataEval can be installed in a Conda/Mamba environment using the provided
108
- `environment.yaml` file. As some dependencies are installed from the `pytorch`
117
+ `environment.yaml` file. As some dependencies are installed from the `pytorch`
109
118
  channel, the channel is specified in the below example.
110
119
 
111
120
  ```bash
@@ -115,12 +124,10 @@ micromamba create -f environment\environment.yaml -c pytorch
115
124
  ### **Installing from GitHub**
116
125
 
117
126
  To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
118
- download larger, binary source files and `poetry` for project dependency
119
- management.
127
+ download larger, binary source files.
120
128
 
121
129
  ```bash
122
130
  sudo apt-get install git-lfs
123
- pip install poetry
124
131
  ```
125
132
 
126
133
  Pull the source down and change to the DataEval project directory.
@@ -130,26 +137,40 @@ git clone https://github.com/aria-ml/dataeval.git
130
137
  cd dataeval
131
138
  ```
132
139
 
133
- Install DataEval with optional dependencies for development.
140
+ #### **Using Poetry**
141
+
142
+ Install DataEval with all extras.
134
143
 
135
144
  ```bash
136
- poetry install --all-extras --with dev
145
+ poetry install --extras=all
137
146
  ```
138
147
 
139
- Now that DataEval is installed, you can run commands in the poetry virtual
140
- environment by prefixing shell commands with `poetry run`, or activate the
141
- virtual environment directly in the shell.
148
+ Enable Poetry's virtual environment.
142
149
 
143
150
  ```bash
144
- poetry shell
151
+ poetry env activate
152
+ ```
153
+
154
+ #### **Using uv**
155
+
156
+ Install DataEval with all extras and dependencies for development.
157
+
158
+ ```bash
159
+ uv sync --extra=all
160
+ ```
161
+
162
+ Enable uv's virtual environment.
163
+
164
+ ```bash
165
+ source .venv/bin/activate
145
166
  ```
146
167
 
147
168
  ## Contact Us
148
169
 
149
170
  If you have any questions, feel free to reach out to the people below:
150
171
 
151
- * **POC**: Scott Swan @scott.swan
152
- * **DPOC**: Andrew Weng @aweng
172
+ - **POC**: Scott Swan @scott.swan
173
+ - **DPOC**: Andrew Weng @aweng
153
174
 
154
175
  ## Acknowledgement
155
176
 
@@ -164,4 +185,3 @@ interpreted as necessarily representing the official policies or endorsements,
164
185
  either expressed or implied, of the U.S. Government.
165
186
 
166
187
  <!-- end acknowledgement -->
167
-
@@ -28,26 +28,28 @@ estimation, bias detection, and dataset linting.
28
28
  <!-- end needs -->
29
29
 
30
30
  <!-- start JATIC interop -->
31
+
31
32
  DataEval is easy to install, supports a wide range of Python versions, and is
32
33
  compatible with many of the most popular packages in the scientific and T&E
33
34
  communities.
34
35
 
35
36
  DataEval also has native interoperability between JATIC's suite of tools when
36
37
  using MAITE-compliant datasets and models.
38
+
37
39
  <!-- end JATIC interop -->
38
40
 
39
41
  ## Getting Started
40
42
 
41
43
  **Python versions:** 3.9 - 3.12
42
44
 
43
- **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*
45
+ **Supported packages**: _NumPy_, _Pandas_, _Sci-kit learn_, _MAITE_, _NRTK_
44
46
 
45
47
  Choose your preferred method of installation below or follow our
46
48
  [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
47
49
 
48
- * [Installing with pip](#installing-with-pip)
49
- * [Installing with conda/mamba](#installing-with-conda)
50
- * [Installing from GitHub](#installing-from-github)
50
+ - [Installing with pip](#installing-with-pip)
51
+ - [Installing with conda/mamba](#installing-with-conda)
52
+ - [Installing from GitHub](#installing-from-github)
51
53
 
52
54
  ### **Installing with pip**
53
55
 
@@ -61,7 +63,7 @@ pip install dataeval[all]
61
63
  ### **Installing with conda**
62
64
 
63
65
  DataEval can be installed in a Conda/Mamba environment using the provided
64
- `environment.yaml` file. As some dependencies are installed from the `pytorch`
66
+ `environment.yaml` file. As some dependencies are installed from the `pytorch`
65
67
  channel, the channel is specified in the below example.
66
68
 
67
69
  ```bash
@@ -71,12 +73,10 @@ micromamba create -f environment\environment.yaml -c pytorch
71
73
  ### **Installing from GitHub**
72
74
 
73
75
  To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
74
- download larger, binary source files and `poetry` for project dependency
75
- management.
76
+ download larger, binary source files.
76
77
 
77
78
  ```bash
78
79
  sudo apt-get install git-lfs
79
- pip install poetry
80
80
  ```
81
81
 
82
82
  Pull the source down and change to the DataEval project directory.
@@ -86,26 +86,40 @@ git clone https://github.com/aria-ml/dataeval.git
86
86
  cd dataeval
87
87
  ```
88
88
 
89
- Install DataEval with optional dependencies for development.
89
+ #### **Using Poetry**
90
+
91
+ Install DataEval with all extras.
92
+
93
+ ```bash
94
+ poetry install --extras=all
95
+ ```
96
+
97
+ Enable Poetry's virtual environment.
98
+
99
+ ```bash
100
+ poetry env activate
101
+ ```
102
+
103
+ #### **Using uv**
104
+
105
+ Install DataEval with all extras and dependencies for development.
90
106
 
91
107
  ```bash
92
- poetry install --all-extras --with dev
108
+ uv sync --extra=all
93
109
  ```
94
110
 
95
- Now that DataEval is installed, you can run commands in the poetry virtual
96
- environment by prefixing shell commands with `poetry run`, or activate the
97
- virtual environment directly in the shell.
111
+ Enable uv's virtual environment.
98
112
 
99
113
  ```bash
100
- poetry shell
114
+ source .venv/bin/activate
101
115
  ```
102
116
 
103
117
  ## Contact Us
104
118
 
105
119
  If you have any questions, feel free to reach out to the people below:
106
120
 
107
- * **POC**: Scott Swan @scott.swan
108
- * **DPOC**: Andrew Weng @aweng
121
+ - **POC**: Scott Swan @scott.swan
122
+ - **DPOC**: Andrew Weng @aweng
109
123
 
110
124
  ## Acknowledgement
111
125
 
@@ -0,0 +1,261 @@
1
+ [project]
2
+ name = "dataeval"
3
+ dynamic = ["version"]
4
+ description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
5
+ authors = [
6
+ { name = "Andrew Weng", email = "andrew.weng@ariacoustics.com" },
7
+ { name = "Bill Peria", email = "bill.peria@ariacoustics.com" },
8
+ { name = "Jon Botts", email = "jonathan.botts@ariacoustics.com" },
9
+ { name = "Jonathan Christian", email = "jonathan.christian@ariacoustics.com" },
10
+ { name = "Justin McMillan", email = "justin.mcmillan@ariacoustics.com" },
11
+ { name = "Ryan Wood", email = "ryan.wood@ariacoustics.com" },
12
+ { name = "Scott Swan", email = "scott.swan@ariacoustics.com" },
13
+ { name = "Shaun Jullens", email = "shaun.jullens@ariacoustics.com" },
14
+ ]
15
+ requires-python = ">=3.9,<3.13"
16
+ readme = "README.md"
17
+ license = "MIT"
18
+ maintainers = [
19
+ { name = "ARiA", email = "dataeval@ariacoustics.com" },
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 4 - Beta",
23
+ "Operating System :: OS Independent",
24
+ "Intended Audience :: Science/Research",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Programming Language :: Python :: 3 :: Only",
27
+ "Programming Language :: Python :: 3.9",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
31
+ "Topic :: Scientific/Engineering",
32
+ ]
33
+ dependencies = [
34
+ "defusedxml>=0.7.1",
35
+ "fast_hdbscan==0.2.0",
36
+ "lightgbm>=4",
37
+ "numba>=0.59.1",
38
+ "numpy>=1.24.2",
39
+ "pandas>=2.0",
40
+ "pillow>=10.3.0",
41
+ "polars>=1.0.0",
42
+ "requests>=2.32.3",
43
+ "scipy>=1.10",
44
+ "scikit-learn>=1.5.0",
45
+ "torch>=2.2.0",
46
+ "torchvision>=0.17.0",
47
+ "tqdm>=4.66",
48
+ "typing-extensions>=4.12 ; python_version ~= '3.9'",
49
+ "xxhash>=3.3",
50
+ ]
51
+
52
+ [project.optional-dependencies]
53
+ cpu = [
54
+ "torch>=2.2.0",
55
+ "torchvision>=0.17.0",
56
+ ]
57
+ cu118 = [
58
+ "torch>=2.2.0",
59
+ "torchvision>=0.17.0",
60
+ ]
61
+ cu124 = [
62
+ "torch>=2.2.0",
63
+ "torchvision>=0.17.0",
64
+ ]
65
+ all = ["matplotlib>=3.7.1"]
66
+
67
+ [project.urls]
68
+ Homepage = "https://dataeval.ai/"
69
+ Repository = "https://github.com/aria-ml/dataeval/"
70
+ Documentation = "https://dataeval.readthedocs.io/"
71
+
72
+ [dependency-groups]
73
+ base = [
74
+ "nox[uv]>=2025.5.1",
75
+ "uv>=0.7.8",
76
+ "pip>=25",
77
+ ]
78
+ lint = [
79
+ "ruff>=0.11",
80
+ "codespell[toml]>=2.3",
81
+ ]
82
+ test = [
83
+ "pytest>=8.3",
84
+ "pytest-cov>=6.1",
85
+ "pytest-xdist>=3.6.1",
86
+ "coverage[toml]>=7.6",
87
+ ]
88
+ type = [
89
+ "pyright[nodejs]>=1.1.400",
90
+ ]
91
+ docs = [
92
+ "numpy>=2.0.2",
93
+ "certifi>=2024.07.04",
94
+ "ipykernel>=6.26.0",
95
+ "ipywidgets>=8.1.1",
96
+ "jinja2>=3.1.6",
97
+ "jupyter-client>=8.6.0",
98
+ "jupyter-cache>=1.0",
99
+ "myst-nb>=1.0",
100
+ "sphinx-autoapi>=3.6.0",
101
+ "sphinx-design>=0.6.1",
102
+ "sphinx-immaterial>=0.12.5",
103
+ "sphinx-new-tab-link>=0.8.0",
104
+ "sphinx-tabs>=3.4.7",
105
+ "Sphinx>=7.2.6",
106
+ "torchmetrics>=1.0.0",
107
+ "markupsafe>=3,<3.0.2",
108
+ ]
109
+ dev = [
110
+ { include-group = "base" },
111
+ { include-group = "lint" },
112
+ { include-group = "test" },
113
+ { include-group = "type" },
114
+ { include-group = "docs" },
115
+ ]
116
+
117
+ [tool.uv]
118
+ conflicts = [
119
+ [
120
+ { extra = "cpu" },
121
+ { extra = "cu118" },
122
+ { extra = "cu124" },
123
+ ],
124
+ ]
125
+
126
+ [[tool.uv.index]]
127
+ name = "pytorch-cpu"
128
+ url = "https://download.pytorch.org/whl/cpu"
129
+ explicit = true
130
+
131
+ [[tool.uv.index]]
132
+ name = "pytorch-cu118"
133
+ url = "https://download.pytorch.org/whl/cu118"
134
+ explicit = true
135
+
136
+ [[tool.uv.index]]
137
+ name = "pytorch-cu124"
138
+ url = "https://download.pytorch.org/whl/cu124"
139
+ explicit = true
140
+
141
+ [tool.uv.sources]
142
+ torch = [
143
+ { index = "pytorch-cpu", extra = "cpu" },
144
+ { index = "pytorch-cu118", extra = "cu118" },
145
+ { index = "pytorch-cu124", extra = "cu124" },
146
+ ]
147
+ torchvision = [
148
+ { index = "pytorch-cpu", extra = "cpu" },
149
+ { index = "pytorch-cu118", extra = "cu118" },
150
+ { index = "pytorch-cu124", extra = "cu124" },
151
+ ]
152
+
153
+ [tool.hatch.build.targets.sdist]
154
+ include = ["src/dataeval"]
155
+
156
+ [tool.hatch.build.targets.wheel]
157
+ include = ["src/dataeval"]
158
+
159
+ [tool.hatch.build.targets.wheel.sources]
160
+ "src/dataeval" = "dataeval"
161
+
162
+ [tool.hatch.version]
163
+ source = "vcs"
164
+
165
+ [tool.hatch.build.hooks.vcs]
166
+ version-file = "src/dataeval/_version.py"
167
+
168
+ [tool.poetry]
169
+ version = "0.0.0" # unused
170
+
171
+ [tool.pyproject2conda.dependencies]
172
+ torch = { pip = true }
173
+ torchvision = { pip = true }
174
+ xxhash = { skip = true, packages = "python-xxhash>=3.3" }
175
+
176
+ [tool.pyright]
177
+ reportMissingImports = false
178
+
179
+ [tool.pytest.ini_options]
180
+ testpaths = ["tests"]
181
+ addopts = [
182
+ "--pythonwarnings=ignore::DeprecationWarning",
183
+ "--verbose",
184
+ "--durations=20",
185
+ "--durations-min=1.0",
186
+ ]
187
+ markers = [
188
+ "required: marks tests for required features",
189
+ "optional: marks tests for optional features",
190
+ "requires_all: marks tests that require matplotlib",
191
+ "cuda: marks tests that require cuda",
192
+ "year: marks tests that need a specified dataset year",
193
+ ]
194
+
195
+ [tool.coverage.run]
196
+ source = ["src/dataeval"]
197
+ branch = true
198
+ concurrency = ["multiprocessing"]
199
+ parallel = true
200
+ omit = ["src/dataeval/_version.py"]
201
+
202
+ [tool.coverage.report]
203
+ exclude_also = [
204
+ "raise NotImplementedError",
205
+ ": \\.\\.\\.",
206
+ "if TYPE_CHECKING:"
207
+ ]
208
+ include = ["*/src/dataeval/*"]
209
+ omit = [
210
+ "*/torch/_blocks.py",
211
+ "*/_clusterer.py",
212
+ "*/_fast_mst.py",
213
+ ]
214
+ fail_under = 90
215
+
216
+ [tool.ruff]
217
+ exclude = [
218
+ ".devcontainer",
219
+ ".github",
220
+ ".vscode",
221
+ ".jupyter_cache",
222
+ "*env*",
223
+ "output",
224
+ "build",
225
+ ".nox",
226
+ ".tox",
227
+ ]
228
+ line-length = 120
229
+ indent-width = 4
230
+ target-version = "py38"
231
+ extend-include = ["*.ipynb"]
232
+
233
+ [tool.ruff.lint]
234
+ select = ["A", "ANN", "C4", "C90", "E", "F", "I", "NPY", "S", "SIM", "RET", "RUF100", "UP"]
235
+ ignore = ["ANN401", "NPY002"]
236
+ fixable = ["ALL"]
237
+ unfixable = []
238
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
239
+ per-file-ignores = { "*.ipynb" = ["E402"], "!src/*" = ["ANN", "S", "RET"]}
240
+
241
+ [tool.ruff.lint.isort]
242
+ known-first-party = ["dataeval"]
243
+
244
+ [tool.ruff.lint.flake8-builtins]
245
+ builtins-strict-checking = false
246
+
247
+ [tool.ruff.format]
248
+ quote-style = "double"
249
+ indent-style = "space"
250
+ skip-magic-trailing-comma = false
251
+ line-ending = "auto"
252
+ docstring-code-format = true
253
+ docstring-code-line-length = "dynamic"
254
+
255
+ [tool.codespell]
256
+ skip = './*env*,./output,./docs/build,./docs/source/.jupyter_cache,CHANGELOG.md,uv.lock,requirements.txt,*.html,./docs/source/*/data'
257
+ ignore-words-list = ["Hart"]
258
+
259
+ [build-system]
260
+ requires = ["hatchling", "hatch-vcs"]
261
+ build-backend = "hatchling.build"
@@ -7,12 +7,19 @@ shifts that impact performance of deployed models.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
11
- __version__ = "0.86.7"
10
+ try:
11
+ from ._version import __version__
12
+ except ImportError:
13
+ __version__ = "unknown"
14
+
15
+ # Strongly type for pyright
16
+ __version__ = str(__version__)
17
+
18
+ __all__ = ["__version__", "config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
12
19
 
13
20
  import logging
14
21
 
15
- from dataeval import config, detectors, metrics, typing, utils, workflows
22
+ from . import config, detectors, metrics, typing, utils, workflows
16
23
 
17
24
  logging.getLogger(__name__).addHandler(logging.NullHandler())
18
25
 
@@ -0,0 +1,21 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '0.86.8'
21
+ __version_tuple__ = version_tuple = (0, 86, 8)
@@ -77,7 +77,13 @@ def get_device(override: DeviceLike | None = None) -> torch.device:
77
77
  """
78
78
  if override is None:
79
79
  global _device
80
- return torch.get_default_device() if _device is None else _device
80
+ return (
81
+ torch.get_default_device()
82
+ if hasattr(torch, "get_default_device")
83
+ else torch.device("cpu")
84
+ if _device is None
85
+ else _device
86
+ )
81
87
  return _todevice(override)
82
88
 
83
89
 
@@ -1,16 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
4
-
5
3
  import numpy as np
6
4
  import pandas as pd
7
5
  from numpy.typing import ArrayLike
8
6
 
9
- if TYPE_CHECKING:
10
- from typing import Self
11
- else:
12
- from typing_extensions import Self
13
-
14
7
  from dataeval.detectors.drift._nml._chunk import CountBasedChunker, SizeBasedChunker
15
8
  from dataeval.detectors.drift._nml._domainclassifier import DomainClassifierCalculator
16
9
  from dataeval.detectors.drift._nml._thresholds import ConstantThreshold
@@ -52,7 +45,7 @@ class DriftMVDC:
52
45
  threshold=ConstantThreshold(lower=self.threshold[0], upper=self.threshold[1]),
53
46
  )
54
47
 
55
- def fit(self, x_ref: ArrayLike) -> Self:
48
+ def fit(self, x_ref: ArrayLike) -> DriftMVDC:
56
49
  """
57
50
  Fit the domain classifier on the training dataframe
58
51
 
@@ -63,7 +56,7 @@ class DriftMVDC:
63
56
 
64
57
  Returns
65
58
  -------
66
- Self
59
+ DriftMVDC
67
60
 
68
61
  """
69
62
  # for 1D input, assume that is 1 sample: dim[1,n_features]