dataeval 0.85.0__py3-none-any.whl → 0.86.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,17 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
+ import contextlib
5
6
  from dataclasses import dataclass
6
7
 
7
8
  import numpy as np
9
+ import pandas as pd
8
10
  from numpy.typing import NDArray
9
11
 
12
+ with contextlib.suppress(ImportError):
13
+ from matplotlib.figure import Figure
14
+
15
+ from dataeval.detectors.drift._nml._result import Metric, PerMetricResult
10
16
  from dataeval.outputs._base import Output
11
17
 
12
18
 
@@ -81,3 +87,65 @@ class DriftOutput(DriftBaseOutput):
81
87
  feature_threshold: float
82
88
  p_vals: NDArray[np.float32]
83
89
  distances: NDArray[np.float32]
90
+
91
+
92
+ class DriftMVDCOutput(PerMetricResult):
93
+ """Class wrapping the results of the classifier for drift detection and providing plotting functionality."""
94
+
95
+ def __init__(self, results_data: pd.DataFrame) -> None:
96
+ """Initialize a DomainClassifierCalculator results object.
97
+
98
+ Parameters
99
+ ----------
100
+ results_data : pd.DataFrame
101
+ Results data returned by a DomainClassifierCalculator.
102
+ """
103
+ metric = Metric(display_name="Domain Classifier", column_name="domain_classifier_auroc")
104
+ super().__init__(results_data, [metric])
105
+
106
+ def plot(self, showme: bool = True) -> Figure:
107
+ """
108
+ Render the roc_auc metric over the train/test data in relation to the threshold.
109
+
110
+ Parameters
111
+ ----------
112
+ showme : bool, default True
113
+ Option to display the figure.
114
+
115
+ Returns
116
+ -------
117
+ matplotlib.figure.Figure
118
+
119
+ """
120
+ import matplotlib.pyplot as plt
121
+
122
+ fig, ax = plt.subplots(dpi=300)
123
+ resdf = self.to_df()
124
+ xticks = np.arange(resdf.shape[0])
125
+ trndf = resdf[resdf["chunk"]["period"] == "reference"]
126
+ tstdf = resdf[resdf["chunk"]["period"] == "analysis"]
127
+ # Get local indices for drift markers
128
+ driftx = np.where(resdf["domain_classifier_auroc"]["alert"].values) # type: ignore | dataframe
129
+ if np.size(driftx) > 2:
130
+ ax.plot(resdf.index, resdf["domain_classifier_auroc"]["upper_threshold"], "r--", label="thr_up")
131
+ ax.plot(resdf.index, resdf["domain_classifier_auroc"]["lower_threshold"], "r--", label="thr_low")
132
+ ax.plot(trndf.index, trndf["domain_classifier_auroc"]["value"], "b", label="train")
133
+ ax.plot(tstdf.index, tstdf["domain_classifier_auroc"]["value"], "g", label="test")
134
+ ax.plot(
135
+ resdf.index.values[driftx], # type: ignore | dataframe
136
+ resdf["domain_classifier_auroc"]["value"].values[driftx], # type: ignore | dataframe
137
+ "dm",
138
+ markersize=3,
139
+ label="drift",
140
+ )
141
+ ax.set_xticks(xticks)
142
+ ax.tick_params(axis="x", labelsize=6)
143
+ ax.tick_params(axis="y", labelsize=6)
144
+ ax.legend(loc="lower left", fontsize=6)
145
+ ax.set_title("Domain Classifier, Drift Detection", fontsize=8)
146
+ ax.set_ylabel("ROC AUC", fontsize=7)
147
+ ax.set_xlabel("Chunk Index", fontsize=7)
148
+ ax.set_ylim((0.0, 1.1))
149
+ if showme:
150
+ plt.show()
151
+ return fig
@@ -2,15 +2,12 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- import contextlib
6
5
  from dataclasses import dataclass
7
6
  from typing import Generic, TypeVar, Union
8
7
 
8
+ import pandas as pd
9
9
  from typing_extensions import TypeAlias
10
10
 
11
- with contextlib.suppress(ImportError):
12
- import pandas as pd
13
-
14
11
  from dataeval.outputs._base import Output
15
12
  from dataeval.outputs._stats import DimensionStatsOutput, LabelStatsOutput, PixelStatsOutput, VisualStatsOutput
16
13
 
@@ -168,8 +165,6 @@ class OutliersOutput(Output, Generic[TIndexIssueMap]):
168
165
  -----
169
166
  This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
170
167
  """
171
- import pandas as pd
172
-
173
168
  if isinstance(self.issues, dict):
174
169
  _, classwise = _reorganize_by_class_and_metric(self.issues, labelstats)
175
170
  data = _create_pandas_dataframe(classwise)
@@ -2,17 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- import contextlib
6
5
  from dataclasses import dataclass
7
6
  from typing import Any, Iterable, NamedTuple, Optional, Union
8
7
 
9
8
  import numpy as np
9
+ import pandas as pd
10
10
  from numpy.typing import NDArray
11
11
  from typing_extensions import TypeAlias
12
12
 
13
- with contextlib.suppress(ImportError):
14
- import pandas as pd
15
-
16
13
  from dataeval.outputs._base import Output
17
14
  from dataeval.utils._plot import channel_histogram_plot, histogram_plot
18
15
 
@@ -281,8 +278,6 @@ class LabelStatsOutput(Output):
281
278
  -------
282
279
  pd.DataFrame
283
280
  """
284
- import pandas as pd
285
-
286
281
  total_count = []
287
282
  image_count = []
288
283
  for cls in range(len(self.class_names)):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.85.0
3
+ Version: 0.86.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -23,10 +23,11 @@ Classifier: Topic :: Scientific/Engineering
23
23
  Provides-Extra: all
24
24
  Requires-Dist: defusedxml (>=0.7.1)
25
25
  Requires-Dist: fast_hdbscan (==0.2.0)
26
+ Requires-Dist: lightgbm (>=4)
26
27
  Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
27
28
  Requires-Dist: numba (>=0.59.1)
28
29
  Requires-Dist: numpy (>=1.24.2)
29
- Requires-Dist: pandas (>=2.0) ; extra == "all"
30
+ Requires-Dist: pandas (>=2.0)
30
31
  Requires-Dist: pillow (>=10.3.0)
31
32
  Requires-Dist: requests
32
33
  Requires-Dist: scikit-learn (>=1.5.0)
@@ -1,27 +1,34 @@
1
- dataeval/__init__.py,sha256=6uGTi3XO_V1liRkoVKdH5Ue8LPXkinLwmAsJzZO40Rk,1636
1
+ dataeval/__init__.py,sha256=GdieNQ3woQUTyIFklJx7AgEeiBCz9gXzo-UVt6YFHPo,1636
2
2
  dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
3
3
  dataeval/config.py,sha256=lD1YDH8HosFeRU5rQEYRBcmXMZy-csWaMlJTRZGd9iU,3582
4
4
  dataeval/data/__init__.py,sha256=qNnRRiVP_sLthkkHpUrMgI_r8dQK-cC-xoGrrjQeRKc,544
5
5
  dataeval/data/_embeddings.py,sha256=6Medqj_JCQt1iwZwWGSs1OeX-bHB8bg5BJqADY1N2s8,12883
6
6
  dataeval/data/_images.py,sha256=WF9XJRka8ohUdyI2IKBMAy3JoJhOm1iC-8tbYl8woRM,2642
7
- dataeval/data/_metadata.py,sha256=hNgsCEN8EyfDDX7zLKcQnsaDl-9xvvs5tUzqMjVLvI4,14457
8
- dataeval/data/_selection.py,sha256=V61_pTFj0hSzmltA6CV5t51Znqw2dIQZ71Iu46bLm44,4486
7
+ dataeval/data/_metadata.py,sha256=mK-WbrFkMo3v8f66uHT4B6-Fsc1odh0CcMTuz2aXSZc,14968
8
+ dataeval/data/_selection.py,sha256=rYCM4KTqLSOYOzyjKCQKH2KQgJhxNnB2g3pY4JbOEYc,4503
9
9
  dataeval/data/_split.py,sha256=6Jtm_i__CcPtNE3eSeBdPxc7gn7Cp-GM7g9wJWFlVus,16761
10
10
  dataeval/data/_targets.py,sha256=ws5d9wRiDkIuOV7GSAKNxzgSm6AWTgb0BFroQK5nAmM,3057
11
11
  dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
12
12
  dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
13
- dataeval/data/selections/_classfilter.py,sha256=rEeq959p_SLl_etS7pcM8ZxK4yzEYlYZAQ3FlcLV0R8,4330
13
+ dataeval/data/selections/_classfilter.py,sha256=VSNl_BSPRHQOBU6GYQwPZhl7j2jYESVJSSdyqWiG_vA,4394
14
14
  dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
15
15
  dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
16
16
  dataeval/data/selections/_prioritize.py,sha256=yw51ZQk6FPvyC38M4_pS_Se2Dq0LDFcdDhfbsELzTZc,11306
17
17
  dataeval/data/selections/_reverse.py,sha256=b67kNC43A5KpQOic5gifjo9HpJ7FMh4LFCrfovPiJ-M,368
18
18
  dataeval/data/selections/_shuffle.py,sha256=gVz_2T4rlucq8Ytqz5jvmmZdTrZDaIv43jJbq97tLjQ,1173
19
19
  dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
20
- dataeval/detectors/drift/__init__.py,sha256=gD8aY5PotS-S2ot7iB_z_zzSOjIbQLw5znFBNj0jtHE,646
20
+ dataeval/detectors/drift/__init__.py,sha256=Jqv98oOVeC2tvHlNGxQ8RJ6De2q4SyS5lTpaYlb4ocM,756
21
21
  dataeval/detectors/drift/_base.py,sha256=amGqzUAe8fU5qwM5lq1p8PCuhjGh9MHkdW1zeBF1LEE,7574
22
22
  dataeval/detectors/drift/_cvm.py,sha256=cS33zWJmFY1fft1XcANcP2jSD5ou7TxvIU2AldhTynM,3004
23
23
  dataeval/detectors/drift/_ks.py,sha256=uMc5-NA-lSV1IODrY8uJe87ll3uRJT_oXLJFXy95M1w,3186
24
24
  dataeval/detectors/drift/_mmd.py,sha256=wHUy_vUafCikrZ_WX8qQXpxFwzw07-5zVutloR6hl1k,11589
25
+ dataeval/detectors/drift/_mvdc.py,sha256=ABxGut6KzxF_oM-Hs87WARCR0692dhPVdZNoGGwJaa4,3058
26
+ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie_WJdn09GYmqg,137
27
+ dataeval/detectors/drift/_nml/_base.py,sha256=g8RmOnsBVN8vV1S9B9JaQQLudcbyKERwy4OuDjGIxb8,2632
28
+ dataeval/detectors/drift/_nml/_chunk.py,sha256=QxohvSycm_cjldmK-ll-APfIsopPgeATHV-9aejyIKE,13826
29
+ dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=ccb1tgJ_K7gMYtg1Wdy2gPIpYIhconHQVu3xW5v0hjs,7743
30
+ dataeval/detectors/drift/_nml/_result.py,sha256=mnWnP1CwzrDChJygcsuFhkKR5g3yAQS520oo-l9PcZU,3273
31
+ dataeval/detectors/drift/_nml/_thresholds.py,sha256=jnhfd0qR99TKF0PyUVcbtE7cj9lic0QxwrWq_fwoAHM,12687
25
32
  dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
26
33
  dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
27
34
  dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
@@ -57,15 +64,15 @@ dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSn
57
64
  dataeval/metrics/stats/_labelstats.py,sha256=lz8I6eSd8tFkmQqy5cOG8hn9yxs0mP-Ic9ratFHiuoU,2813
58
65
  dataeval/metrics/stats/_pixelstats.py,sha256=SfergRbjNJE4h0xqe-0c8RnKtZmEkZ9MwExdipLSGvg,3247
59
66
  dataeval/metrics/stats/_visualstats.py,sha256=cq4AbF2B50Ihbzb86FphcnKQ1TSwNnP3PsnbpiPQZWw,3698
60
- dataeval/outputs/__init__.py,sha256=ciK-RdXgtn_s7MSCUW1UXvrXltMbltqbpfe9_V7xGrI,1701
67
+ dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
61
68
  dataeval/outputs/_base.py,sha256=aZFbgybnZSQ3ws7QYRLTbDFqUfBFRVtIwX2LZfeGFUA,5703
62
- dataeval/outputs/_bias.py,sha256=7L-d3DUWY6Vud7iX_VoQT0HG0KaV1U35gvmRApqzyB0,12401
63
- dataeval/outputs/_drift.py,sha256=gOiu2C-ERTWiRqlP0auMYxPBGdm9HecWPqWfg7I4tZg,2015
69
+ dataeval/outputs/_bias.py,sha256=_4qgboPstvEFBjTPZOVAOOaXb_BMARLiHY_ElA5wD8E,12368
70
+ dataeval/outputs/_drift.py,sha256=kS6gGfaf0XOivf1D8go2fzF5yxl0EHlWFlkwv-4LMNI,4770
64
71
  dataeval/outputs/_estimators.py,sha256=a2oAIxxEDZ9WLGfMWH8KD-BVUS_SnULRPR-iI9hFPoQ,3047
65
- dataeval/outputs/_linters.py,sha256=YOdjrfm8ypdRrqYOaPM9nc6wVJI3-ita3Haj7LHDNaw,6416
72
+ dataeval/outputs/_linters.py,sha256=PqLa2wIAkwC-NCb5dhDN29PtTiCUk2TLDFpsMO7Awrc,6325
66
73
  dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
67
74
  dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
68
- dataeval/outputs/_stats.py,sha256=c73Yc3Kkrl-MN6BGKe1V0Yr6Ix2Yp_DZZfFSp8fZMZ0,13180
75
+ dataeval/outputs/_stats.py,sha256=ACUzwsalDl-bV8llaBArZQ1tLj07RFvzmv-IXViAvSA,13089
69
76
  dataeval/outputs/_utils.py,sha256=HHlGC7sk416m_3Bgn075Qdblz_aPup_UOafJpB0RuXY,893
70
77
  dataeval/outputs/_workflows.py,sha256=MkRD6ubI4NCBXb9v3kjXy64cUGs3G-JKkBdOpRD9XVE,10750
71
78
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -101,7 +108,7 @@ dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQ
101
108
  dataeval/utils/torch/trainer.py,sha256=iUotX4OdirH8-ZtjdpU8gbJavkYW9YY9qpA2mAlFy1Y,5520
102
109
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
103
110
  dataeval/workflows/sufficiency.py,sha256=mjKmfRrAjShLUFIARv5o8yT5fnFvDsS5Qu6ujIPUgQg,8497
104
- dataeval-0.85.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
105
- dataeval-0.85.0.dist-info/METADATA,sha256=CFxQYk5W58oMLj9y41anNfkN8kgKvWtERBQBdm4XKEY,5308
106
- dataeval-0.85.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
107
- dataeval-0.85.0.dist-info/RECORD,,
111
+ dataeval-0.86.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
112
+ dataeval-0.86.0.dist-info/METADATA,sha256=viF0VCgv5_1SzwfTVCTNdbw1q5k1D3hgJhB7PoZ1tCM,5321
113
+ dataeval-0.86.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
114
+ dataeval-0.86.0.dist-info/RECORD,,