dataeval 0.85.0__py3-none-any.whl → 0.86.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/data/_metadata.py +17 -5
- dataeval/data/_selection.py +1 -1
- dataeval/data/selections/_classfilter.py +4 -3
- dataeval/detectors/drift/__init__.py +4 -1
- dataeval/detectors/drift/_mvdc.py +92 -0
- dataeval/detectors/drift/_nml/__init__.py +6 -0
- dataeval/detectors/drift/_nml/_base.py +68 -0
- dataeval/detectors/drift/_nml/_chunk.py +404 -0
- dataeval/detectors/drift/_nml/_domainclassifier.py +192 -0
- dataeval/detectors/drift/_nml/_result.py +98 -0
- dataeval/detectors/drift/_nml/_thresholds.py +280 -0
- dataeval/outputs/__init__.py +2 -1
- dataeval/outputs/_bias.py +1 -3
- dataeval/outputs/_drift.py +68 -0
- dataeval/outputs/_linters.py +1 -6
- dataeval/outputs/_stats.py +1 -6
- {dataeval-0.85.0.dist-info → dataeval-0.86.0.dist-info}/METADATA +3 -2
- {dataeval-0.85.0.dist-info → dataeval-0.86.0.dist-info}/RECORD +21 -14
- {dataeval-0.85.0.dist-info → dataeval-0.86.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.85.0.dist-info → dataeval-0.86.0.dist-info}/WHEEL +0 -0
dataeval/outputs/_drift.py
CHANGED
@@ -2,11 +2,17 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
+
import contextlib
|
5
6
|
from dataclasses import dataclass
|
6
7
|
|
7
8
|
import numpy as np
|
9
|
+
import pandas as pd
|
8
10
|
from numpy.typing import NDArray
|
9
11
|
|
12
|
+
with contextlib.suppress(ImportError):
|
13
|
+
from matplotlib.figure import Figure
|
14
|
+
|
15
|
+
from dataeval.detectors.drift._nml._result import Metric, PerMetricResult
|
10
16
|
from dataeval.outputs._base import Output
|
11
17
|
|
12
18
|
|
@@ -81,3 +87,65 @@ class DriftOutput(DriftBaseOutput):
|
|
81
87
|
feature_threshold: float
|
82
88
|
p_vals: NDArray[np.float32]
|
83
89
|
distances: NDArray[np.float32]
|
90
|
+
|
91
|
+
|
92
|
+
class DriftMVDCOutput(PerMetricResult):
|
93
|
+
"""Class wrapping the results of the classifier for drift detection and providing plotting functionality."""
|
94
|
+
|
95
|
+
def __init__(self, results_data: pd.DataFrame) -> None:
|
96
|
+
"""Initialize a DomainClassifierCalculator results object.
|
97
|
+
|
98
|
+
Parameters
|
99
|
+
----------
|
100
|
+
results_data : pd.DataFrame
|
101
|
+
Results data returned by a DomainClassifierCalculator.
|
102
|
+
"""
|
103
|
+
metric = Metric(display_name="Domain Classifier", column_name="domain_classifier_auroc")
|
104
|
+
super().__init__(results_data, [metric])
|
105
|
+
|
106
|
+
def plot(self, showme: bool = True) -> Figure:
|
107
|
+
"""
|
108
|
+
Render the roc_auc metric over the train/test data in relation to the threshold.
|
109
|
+
|
110
|
+
Parameters
|
111
|
+
----------
|
112
|
+
showme : bool, default True
|
113
|
+
Option to display the figure.
|
114
|
+
|
115
|
+
Returns
|
116
|
+
-------
|
117
|
+
matplotlib.figure.Figure
|
118
|
+
|
119
|
+
"""
|
120
|
+
import matplotlib.pyplot as plt
|
121
|
+
|
122
|
+
fig, ax = plt.subplots(dpi=300)
|
123
|
+
resdf = self.to_df()
|
124
|
+
xticks = np.arange(resdf.shape[0])
|
125
|
+
trndf = resdf[resdf["chunk"]["period"] == "reference"]
|
126
|
+
tstdf = resdf[resdf["chunk"]["period"] == "analysis"]
|
127
|
+
# Get local indices for drift markers
|
128
|
+
driftx = np.where(resdf["domain_classifier_auroc"]["alert"].values) # type: ignore | dataframe
|
129
|
+
if np.size(driftx) > 2:
|
130
|
+
ax.plot(resdf.index, resdf["domain_classifier_auroc"]["upper_threshold"], "r--", label="thr_up")
|
131
|
+
ax.plot(resdf.index, resdf["domain_classifier_auroc"]["lower_threshold"], "r--", label="thr_low")
|
132
|
+
ax.plot(trndf.index, trndf["domain_classifier_auroc"]["value"], "b", label="train")
|
133
|
+
ax.plot(tstdf.index, tstdf["domain_classifier_auroc"]["value"], "g", label="test")
|
134
|
+
ax.plot(
|
135
|
+
resdf.index.values[driftx], # type: ignore | dataframe
|
136
|
+
resdf["domain_classifier_auroc"]["value"].values[driftx], # type: ignore | dataframe
|
137
|
+
"dm",
|
138
|
+
markersize=3,
|
139
|
+
label="drift",
|
140
|
+
)
|
141
|
+
ax.set_xticks(xticks)
|
142
|
+
ax.tick_params(axis="x", labelsize=6)
|
143
|
+
ax.tick_params(axis="y", labelsize=6)
|
144
|
+
ax.legend(loc="lower left", fontsize=6)
|
145
|
+
ax.set_title("Domain Classifier, Drift Detection", fontsize=8)
|
146
|
+
ax.set_ylabel("ROC AUC", fontsize=7)
|
147
|
+
ax.set_xlabel("Chunk Index", fontsize=7)
|
148
|
+
ax.set_ylim((0.0, 1.1))
|
149
|
+
if showme:
|
150
|
+
plt.show()
|
151
|
+
return fig
|
dataeval/outputs/_linters.py
CHANGED
@@ -2,15 +2,12 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
import contextlib
|
6
5
|
from dataclasses import dataclass
|
7
6
|
from typing import Generic, TypeVar, Union
|
8
7
|
|
8
|
+
import pandas as pd
|
9
9
|
from typing_extensions import TypeAlias
|
10
10
|
|
11
|
-
with contextlib.suppress(ImportError):
|
12
|
-
import pandas as pd
|
13
|
-
|
14
11
|
from dataeval.outputs._base import Output
|
15
12
|
from dataeval.outputs._stats import DimensionStatsOutput, LabelStatsOutput, PixelStatsOutput, VisualStatsOutput
|
16
13
|
|
@@ -168,8 +165,6 @@ class OutliersOutput(Output, Generic[TIndexIssueMap]):
|
|
168
165
|
-----
|
169
166
|
This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
|
170
167
|
"""
|
171
|
-
import pandas as pd
|
172
|
-
|
173
168
|
if isinstance(self.issues, dict):
|
174
169
|
_, classwise = _reorganize_by_class_and_metric(self.issues, labelstats)
|
175
170
|
data = _create_pandas_dataframe(classwise)
|
dataeval/outputs/_stats.py
CHANGED
@@ -2,17 +2,14 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
import contextlib
|
6
5
|
from dataclasses import dataclass
|
7
6
|
from typing import Any, Iterable, NamedTuple, Optional, Union
|
8
7
|
|
9
8
|
import numpy as np
|
9
|
+
import pandas as pd
|
10
10
|
from numpy.typing import NDArray
|
11
11
|
from typing_extensions import TypeAlias
|
12
12
|
|
13
|
-
with contextlib.suppress(ImportError):
|
14
|
-
import pandas as pd
|
15
|
-
|
16
13
|
from dataeval.outputs._base import Output
|
17
14
|
from dataeval.utils._plot import channel_histogram_plot, histogram_plot
|
18
15
|
|
@@ -281,8 +278,6 @@ class LabelStatsOutput(Output):
|
|
281
278
|
-------
|
282
279
|
pd.DataFrame
|
283
280
|
"""
|
284
|
-
import pandas as pd
|
285
|
-
|
286
281
|
total_count = []
|
287
282
|
image_count = []
|
288
283
|
for cls in range(len(self.class_names)):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.86.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -23,10 +23,11 @@ Classifier: Topic :: Scientific/Engineering
|
|
23
23
|
Provides-Extra: all
|
24
24
|
Requires-Dist: defusedxml (>=0.7.1)
|
25
25
|
Requires-Dist: fast_hdbscan (==0.2.0)
|
26
|
+
Requires-Dist: lightgbm (>=4)
|
26
27
|
Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
|
27
28
|
Requires-Dist: numba (>=0.59.1)
|
28
29
|
Requires-Dist: numpy (>=1.24.2)
|
29
|
-
Requires-Dist: pandas (>=2.0)
|
30
|
+
Requires-Dist: pandas (>=2.0)
|
30
31
|
Requires-Dist: pillow (>=10.3.0)
|
31
32
|
Requires-Dist: requests
|
32
33
|
Requires-Dist: scikit-learn (>=1.5.0)
|
@@ -1,27 +1,34 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=GdieNQ3woQUTyIFklJx7AgEeiBCz9gXzo-UVt6YFHPo,1636
|
2
2
|
dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
|
3
3
|
dataeval/config.py,sha256=lD1YDH8HosFeRU5rQEYRBcmXMZy-csWaMlJTRZGd9iU,3582
|
4
4
|
dataeval/data/__init__.py,sha256=qNnRRiVP_sLthkkHpUrMgI_r8dQK-cC-xoGrrjQeRKc,544
|
5
5
|
dataeval/data/_embeddings.py,sha256=6Medqj_JCQt1iwZwWGSs1OeX-bHB8bg5BJqADY1N2s8,12883
|
6
6
|
dataeval/data/_images.py,sha256=WF9XJRka8ohUdyI2IKBMAy3JoJhOm1iC-8tbYl8woRM,2642
|
7
|
-
dataeval/data/_metadata.py,sha256=
|
8
|
-
dataeval/data/_selection.py,sha256=
|
7
|
+
dataeval/data/_metadata.py,sha256=mK-WbrFkMo3v8f66uHT4B6-Fsc1odh0CcMTuz2aXSZc,14968
|
8
|
+
dataeval/data/_selection.py,sha256=rYCM4KTqLSOYOzyjKCQKH2KQgJhxNnB2g3pY4JbOEYc,4503
|
9
9
|
dataeval/data/_split.py,sha256=6Jtm_i__CcPtNE3eSeBdPxc7gn7Cp-GM7g9wJWFlVus,16761
|
10
10
|
dataeval/data/_targets.py,sha256=ws5d9wRiDkIuOV7GSAKNxzgSm6AWTgb0BFroQK5nAmM,3057
|
11
11
|
dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
|
12
12
|
dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
|
13
|
-
dataeval/data/selections/_classfilter.py,sha256=
|
13
|
+
dataeval/data/selections/_classfilter.py,sha256=VSNl_BSPRHQOBU6GYQwPZhl7j2jYESVJSSdyqWiG_vA,4394
|
14
14
|
dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
|
15
15
|
dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
|
16
16
|
dataeval/data/selections/_prioritize.py,sha256=yw51ZQk6FPvyC38M4_pS_Se2Dq0LDFcdDhfbsELzTZc,11306
|
17
17
|
dataeval/data/selections/_reverse.py,sha256=b67kNC43A5KpQOic5gifjo9HpJ7FMh4LFCrfovPiJ-M,368
|
18
18
|
dataeval/data/selections/_shuffle.py,sha256=gVz_2T4rlucq8Ytqz5jvmmZdTrZDaIv43jJbq97tLjQ,1173
|
19
19
|
dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
|
20
|
-
dataeval/detectors/drift/__init__.py,sha256=
|
20
|
+
dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
|
21
21
|
dataeval/detectors/drift/_base.py,sha256=amGqzUAe8fU5qwM5lq1p8PCuhjGh9MHkdW1zeBF1LEE,7574
|
22
22
|
dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
|
23
23
|
dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
|
24
24
|
dataeval/detectors/drift/_mmd.py,sha256=wHUy_vUafCikrZ_WX8qQXpxFwzw07-5zVutloR6hl1k,11589
|
25
|
+
dataeval/detectors/drift/_mvdc.py,sha256=ABxGut6KzxF_oM-Hs87WARCR0692dhPVdZNoGGwJaa4,3058
|
26
|
+
dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
|
27
|
+
dataeval/detectors/drift/_nml/_base.py,sha256=g8RmOnsBVN8vV1S9B9JaQQLudcbyKERwy4OuDjGIxb8,2632
|
28
|
+
dataeval/detectors/drift/_nml/_chunk.py,sha256=QxohvSycm_cjldmK-ll-APfIsopPgeATHV-9aejyIKE,13826
|
29
|
+
dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=ccb1tgJ_K7gMYtg1Wdy2gPIpYIhconHQVu3xW5v0hjs,7743
|
30
|
+
dataeval/detectors/drift/_nml/_result.py,sha256=mnWnP1CwzrDChJygcsuFhkKR5g3yAQS520oo-l9PcZU,3273
|
31
|
+
dataeval/detectors/drift/_nml/_thresholds.py,sha256=jnhfd0qR99TKF0PyUVcbtE7cj9lic0QxwrWq_fwoAHM,12687
|
25
32
|
dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
|
26
33
|
dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
|
27
34
|
dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
|
@@ -57,15 +64,15 @@ dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSn
|
|
57
64
|
dataeval/metrics/stats/_labelstats.py,sha256=lz8I6eSd8tFkmQqy5cOG8hn9yxs0mP-Ic9ratFHiuoU,2813
|
58
65
|
dataeval/metrics/stats/_pixelstats.py,sha256=SfergRbjNJE4h0xqe-0c8RnKtZmEkZ9MwExdipLSGvg,3247
|
59
66
|
dataeval/metrics/stats/_visualstats.py,sha256=cq4AbF2B50Ihbzb86FphcnKQ1TSwNnP3PsnbpiPQZWw,3698
|
60
|
-
dataeval/outputs/__init__.py,sha256=
|
67
|
+
dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
|
61
68
|
dataeval/outputs/_base.py,sha256=aZFbgybnZSQ3ws7QYRLTbDFqUfBFRVtIwX2LZfeGFUA,5703
|
62
|
-
dataeval/outputs/_bias.py,sha256=
|
63
|
-
dataeval/outputs/_drift.py,sha256=
|
69
|
+
dataeval/outputs/_bias.py,sha256=_4qgboPstvEFBjTPZOVAOOaXb_BMARLiHY_ElA5wD8E,12368
|
70
|
+
dataeval/outputs/_drift.py,sha256=kS6gGfaf0XOivf1D8go2fzF5yxl0EHlWFlkwv-4LMNI,4770
|
64
71
|
dataeval/outputs/_estimators.py,sha256=a2oAIxxEDZ9WLGfMWH8KD-BVUS_SnULRPR-iI9hFPoQ,3047
|
65
|
-
dataeval/outputs/_linters.py,sha256=
|
72
|
+
dataeval/outputs/_linters.py,sha256=PqLa2wIAkwC-NCb5dhDN29PtTiCUk2TLDFpsMO7Awrc,6325
|
66
73
|
dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
|
67
74
|
dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
|
68
|
-
dataeval/outputs/_stats.py,sha256=
|
75
|
+
dataeval/outputs/_stats.py,sha256=ACUzwsalDl-bV8llaBArZQ1tLj07RFvzmv-IXViAvSA,13089
|
69
76
|
dataeval/outputs/_utils.py,sha256=HHlGC7sk416m_3Bgn075Qdblz_aPup_UOafJpB0RuXY,893
|
70
77
|
dataeval/outputs/_workflows.py,sha256=MkRD6ubI4NCBXb9v3kjXy64cUGs3G-JKkBdOpRD9XVE,10750
|
71
78
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -101,7 +108,7 @@ dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQ
|
|
101
108
|
dataeval/utils/torch/trainer.py,sha256=iUotX4OdirH8-ZtjdpU8gbJavkYW9YY9qpA2mAlFy1Y,5520
|
102
109
|
dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
|
103
110
|
dataeval/workflows/sufficiency.py,sha256=mjKmfRrAjShLUFIARv5o8yT5fnFvDsS5Qu6ujIPUgQg,8497
|
104
|
-
dataeval-0.
|
105
|
-
dataeval-0.
|
106
|
-
dataeval-0.
|
107
|
-
dataeval-0.
|
111
|
+
dataeval-0.86.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
112
|
+
dataeval-0.86.0.dist-info/METADATA,sha256=viF0VCgv5_1SzwfTVCTNdbw1q5k1D3hgJhB7PoZ1tCM,5321
|
113
|
+
dataeval-0.86.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
114
|
+
dataeval-0.86.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|