dataeval 0.63.0__py3-none-any.whl → 0.65.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +4 -4
- dataeval/_internal/detectors/clusterer.py +47 -34
- dataeval/_internal/detectors/drift/base.py +53 -35
- dataeval/_internal/detectors/drift/cvm.py +5 -4
- dataeval/_internal/detectors/drift/ks.py +7 -6
- dataeval/_internal/detectors/drift/mmd.py +39 -19
- dataeval/_internal/detectors/drift/torch.py +6 -5
- dataeval/_internal/detectors/drift/uncertainty.py +7 -8
- dataeval/_internal/detectors/duplicates.py +57 -30
- dataeval/_internal/detectors/linter.py +40 -24
- dataeval/_internal/detectors/ood/ae.py +2 -1
- dataeval/_internal/detectors/ood/aegmm.py +2 -1
- dataeval/_internal/detectors/ood/base.py +37 -15
- dataeval/_internal/detectors/ood/llr.py +9 -8
- dataeval/_internal/detectors/ood/vae.py +2 -1
- dataeval/_internal/detectors/ood/vaegmm.py +2 -1
- dataeval/_internal/flags.py +42 -21
- dataeval/_internal/interop.py +3 -12
- dataeval/_internal/metrics/balance.py +188 -0
- dataeval/_internal/metrics/ber.py +123 -48
- dataeval/_internal/metrics/coverage.py +90 -74
- dataeval/_internal/metrics/divergence.py +101 -67
- dataeval/_internal/metrics/diversity.py +211 -0
- dataeval/_internal/metrics/parity.py +287 -155
- dataeval/_internal/metrics/stats.py +198 -317
- dataeval/_internal/metrics/uap.py +40 -29
- dataeval/_internal/metrics/utils.py +430 -0
- dataeval/_internal/models/tensorflow/losses.py +3 -3
- dataeval/_internal/models/tensorflow/trainer.py +3 -2
- dataeval/_internal/models/tensorflow/utils.py +4 -3
- dataeval/_internal/output.py +82 -0
- dataeval/_internal/utils.py +64 -0
- dataeval/_internal/workflows/sufficiency.py +96 -107
- dataeval/flags/__init__.py +2 -2
- dataeval/metrics/__init__.py +26 -7
- dataeval/utils/__init__.py +9 -0
- {dataeval-0.63.0.dist-info → dataeval-0.65.0.dist-info}/METADATA +1 -1
- dataeval-0.65.0.dist-info/RECORD +60 -0
- dataeval/_internal/functional/__init__.py +0 -0
- dataeval/_internal/functional/ber.py +0 -63
- dataeval/_internal/functional/coverage.py +0 -75
- dataeval/_internal/functional/divergence.py +0 -16
- dataeval/_internal/functional/hash.py +0 -79
- dataeval/_internal/functional/metadata.py +0 -136
- dataeval/_internal/functional/metadataparity.py +0 -190
- dataeval/_internal/functional/uap.py +0 -6
- dataeval/_internal/functional/utils.py +0 -158
- dataeval/_internal/maite/__init__.py +0 -0
- dataeval/_internal/maite/utils.py +0 -30
- dataeval/_internal/metrics/base.py +0 -92
- dataeval/_internal/metrics/metadata.py +0 -610
- dataeval/_internal/metrics/metadataparity.py +0 -67
- dataeval-0.63.0.dist-info/RECORD +0 -68
- {dataeval-0.63.0.dist-info → dataeval-0.65.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.63.0.dist-info → dataeval-0.65.0.dist-info}/WHEEL +0 -0
@@ -1,67 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
|
5
|
-
from dataeval._internal.functional.metadataparity import compute_parity, format_discretize_factors
|
6
|
-
|
7
|
-
|
8
|
-
class MetadataParity:
|
9
|
-
def __init__(
|
10
|
-
self,
|
11
|
-
data_factors: dict[str, np.ndarray],
|
12
|
-
continuous_factor_names: Optional[np.ndarray] = None,
|
13
|
-
continuous_factor_bincounts: Optional[np.ndarray] = None,
|
14
|
-
):
|
15
|
-
"""
|
16
|
-
Sets up the internal list of metadata factors.
|
17
|
-
|
18
|
-
Parameters
|
19
|
-
----------
|
20
|
-
data_factors: Dict[str, np.ndarray]
|
21
|
-
The dataset factors, which are per-image attributes including class label and metadata.
|
22
|
-
Each key of dataset_factors is a factor, whose value is the per-image factor values.
|
23
|
-
continuous_factor_names : np.ndarray, default None
|
24
|
-
The factors in data_factors that have continuous values.
|
25
|
-
All factors are treated as having discrete values unless they
|
26
|
-
are specified in this array. Each element of this array must occur as a key in data_factors.
|
27
|
-
continuous_factor_bincounts : np.ndarray, default None
|
28
|
-
Array of the bin counts to discretize values into for each factor in continuous_factor_names.
|
29
|
-
"""
|
30
|
-
|
31
|
-
continuous_factor_names = (
|
32
|
-
np.array([], dtype=str) if continuous_factor_names is None else np.array(continuous_factor_names)
|
33
|
-
)
|
34
|
-
continuous_factor_bincounts = (
|
35
|
-
10 * np.ones(len(continuous_factor_names), dtype=int)
|
36
|
-
if continuous_factor_bincounts is None
|
37
|
-
else np.array(continuous_factor_bincounts)
|
38
|
-
)
|
39
|
-
|
40
|
-
self.metadata_factors, self.labels = format_discretize_factors(
|
41
|
-
data_factors, continuous_factor_names, continuous_factor_bincounts
|
42
|
-
)
|
43
|
-
|
44
|
-
def evaluate(self) -> dict[str, np.ndarray]:
|
45
|
-
"""
|
46
|
-
Evaluates the statistical independence of metadata factors from class labels.
|
47
|
-
This performs a chi-square test, which provides a score and a p-value for
|
48
|
-
statistical independence between each pair of a metadata factor and a class label.
|
49
|
-
A high score with a low p-value suggests that a metadata factor is strongly
|
50
|
-
correlated with a class label.
|
51
|
-
|
52
|
-
Returns
|
53
|
-
-------
|
54
|
-
Dict[str, np.ndarray]
|
55
|
-
chi_square: np.ndarray
|
56
|
-
Array of length (num_factors) whose (i)th element corresponds to
|
57
|
-
the chi-square score for the relationship between factor i
|
58
|
-
and the class labels in the dataset.
|
59
|
-
p_values: np.ndarray
|
60
|
-
Array of length (num_factors) whose (i)th element corresponds to
|
61
|
-
the p-value for the chi-square test for the relationship between
|
62
|
-
factor i and the class labels in the dataset.
|
63
|
-
"""
|
64
|
-
chi_square, p_values = compute_parity(self.metadata_factors, self.labels)
|
65
|
-
|
66
|
-
formatted_output = {"chi_squares": chi_square, "p_values": p_values}
|
67
|
-
return formatted_output
|
dataeval-0.63.0.dist-info/RECORD
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
dataeval/__init__.py,sha256=kUzF3A_ow1IG-GGqko1M2MRd6wNqNTDSD32Nj-b7tXI,408
|
2
|
-
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
dataeval/_internal/detectors/clusterer.py,sha256=AAIhZQ1Tm5yOh2vLDJ9wlYmz0EuqXiy1tnrkJj119u0,20304
|
4
|
-
dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
dataeval/_internal/detectors/drift/base.py,sha256=MTAuNBx8MQLkGJPt_F2M9hCWfJgMpy1N_fVnUniPul0,9103
|
6
|
-
dataeval/_internal/detectors/drift/cvm.py,sha256=GU2RzqCeu5d-rR-tgpzEo_lqo7Gu7Fzkfxajc5W42ok,4001
|
7
|
-
dataeval/_internal/detectors/drift/ks.py,sha256=MIYFjTSNs3H0pUk6HthYoGDBR6hB7YmS-ie8HOG2K2o,4004
|
8
|
-
dataeval/_internal/detectors/drift/mmd.py,sha256=Q954j5znZNxP4JsEfceY1VAPrMyEsG0jFt5JRvMGbLg,7054
|
9
|
-
dataeval/_internal/detectors/drift/torch.py,sha256=NsQYfDVRcCGmU8k6oBG_aVzmML1zre-xUKBVK1W680o,10872
|
10
|
-
dataeval/_internal/detectors/drift/uncertainty.py,sha256=MnhEpTIkQ1zA3SiaKuNFsMi1Jge2a8uZRm4p2LX46iA,5338
|
11
|
-
dataeval/_internal/detectors/duplicates.py,sha256=IqsX22N7wD8o0bYPc7N1S451w6lzjWLpJZs_RkxOgEU,2168
|
12
|
-
dataeval/_internal/detectors/linter.py,sha256=CJlUviSUl9iVZix1tnZa6ldeYYB-vylA5tgobxq2apk,5292
|
13
|
-
dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
dataeval/_internal/detectors/ood/ae.py,sha256=ASsN2XztlFdpupVgBazh_gInnToSAT8QjvJtA6EiVuY,2657
|
15
|
-
dataeval/_internal/detectors/ood/aegmm.py,sha256=6ExAvYD0pGADmH9igk3r6t8cLPF4MgmrpGPrBybCSBA,2392
|
16
|
-
dataeval/_internal/detectors/ood/base.py,sha256=IQQ0ZprNKpz09J7LKKSgoVvcKYIqaHw2p_tMb0OMwpI,6949
|
17
|
-
dataeval/_internal/detectors/ood/llr.py,sha256=wW8d0pxBXvERCG7m8iBxzSC7hzN19O08IlI3FMvoLBw,10132
|
18
|
-
dataeval/_internal/detectors/ood/vae.py,sha256=mWSbNY3-utuzeVSUkaqD_alLsgcfSMqdy2Aj1G7NUM4,2963
|
19
|
-
dataeval/_internal/detectors/ood/vaegmm.py,sha256=FTHhuAF62hl3PQw2fQUZlPLgsi2ul0xxoUrRwr_Jd-o,2837
|
20
|
-
dataeval/_internal/flags.py,sha256=dRApeFkdSXFbYHSmvzgUP78zH8jUGtfzKFfLQtX0Q18,883
|
21
|
-
dataeval/_internal/functional/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
dataeval/_internal/functional/ber.py,sha256=hJnFswWXnMjoYY62A7x8F3nyctMw3f3UlC8DdUX2Clg,2150
|
23
|
-
dataeval/_internal/functional/coverage.py,sha256=Z56oE9LLosM1228DL0tfs12-MPrg_2ef4KzYhoTPPYg,2520
|
24
|
-
dataeval/_internal/functional/divergence.py,sha256=fSZm2vIqChfj9sbgSuw9P8ZB8ih0nOPryRvnVQHsjzI,517
|
25
|
-
dataeval/_internal/functional/hash.py,sha256=fhcWclSNmp4sJbFSWSqWB2nVxhEFNNje1ifVAX4qqls,2785
|
26
|
-
dataeval/_internal/functional/metadata.py,sha256=6QCxnDzXAF0x6Xx-eUK_c4yOX-_RyTaYCqzDPzDYpLM,4228
|
27
|
-
dataeval/_internal/functional/metadataparity.py,sha256=V6fCtSDx_tf49BnaGgatHNH8lgZoYtPggIuRa13AiHo,7771
|
28
|
-
dataeval/_internal/functional/uap.py,sha256=PIeRAJcA1GBMJK_gBKg0U_bih73h-FKW2ruxJtGiiMc,200
|
29
|
-
dataeval/_internal/functional/utils.py,sha256=u1kkGtS0irnx9dZTo9MahA-_4_uIorPDttQkBe8iU7U,4120
|
30
|
-
dataeval/_internal/interop.py,sha256=tmcp666A_uhndsDLv2PFcZiPH2udz4Y7UVdjqs3vkqc,1280
|
31
|
-
dataeval/_internal/maite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
-
dataeval/_internal/maite/utils.py,sha256=XJ6eCTZA_So_bJ2BoEZvMAY8GyC67p8Q044LGSAZepg,864
|
33
|
-
dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
-
dataeval/_internal/metrics/base.py,sha256=CO2deM6T-q1RAbu-C8k58pULt1KJEJM-kYjLMaHYzms,2641
|
35
|
-
dataeval/_internal/metrics/ber.py,sha256=eQaTMjytVASkZz8bp8CqzpKBPzIYDp2qXmz1MBYNoj4,2261
|
36
|
-
dataeval/_internal/metrics/coverage.py,sha256=Bhsvz0ZG8GsgsuYfUQ7i4xB5ebMBJYLk2QqXVfkjgJM,3421
|
37
|
-
dataeval/_internal/metrics/divergence.py,sha256=QOWDfjlE84S2UvpX7x3pxVAPKdeqrdQ2cXzZkiaOyF8,3247
|
38
|
-
dataeval/_internal/metrics/metadata.py,sha256=U2E2bG8BzRoHS6bx3a5KYEqE96ik9Sz1AW3MMMqQmLU,21472
|
39
|
-
dataeval/_internal/metrics/metadataparity.py,sha256=zLo8WPcPM6OVWf7dK4LT8d3o9FlUHji1SJrDinztrv4,2972
|
40
|
-
dataeval/_internal/metrics/parity.py,sha256=KAp_dznww1GXHwvIqKb3ZJVTQU2w8loyk8JrHBciEm4,6070
|
41
|
-
dataeval/_internal/metrics/stats.py,sha256=6zvGdoYEpVpMMzduxzMkMfsaaKEyC4UVn4SWPcAO5Cg,12581
|
42
|
-
dataeval/_internal/metrics/uap.py,sha256=pgfJY8kM5EFYYNbMzDcodOOzvPlzxrpsSXoL7dCrWn8,1113
|
43
|
-
dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
|
-
dataeval/_internal/models/pytorch/autoencoder.py,sha256=iK3Z9claesU_pJkRaiFJIZ9zKZg-Qj8ugzVYTTokDbE,6123
|
46
|
-
dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
|
47
|
-
dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
|
48
|
-
dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
-
dataeval/_internal/models/tensorflow/autoencoder.py,sha256=rErnOfDFTd7e4brSGQ2Lr1x1kNjSEHdbOREOtUfIhIM,9975
|
50
|
-
dataeval/_internal/models/tensorflow/gmm.py,sha256=wnqQKm3fURuvBROUd2fitCqzKViDo-g0-Djr3TBHZ3U,3640
|
51
|
-
dataeval/_internal/models/tensorflow/losses.py,sha256=3y6tHm7PTQ7hmasJDwTXjdARjCUWycoXqSyXJ1uT2mM,3766
|
52
|
-
dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=B5cwB2IGPw-7b8klt82j_60g_IvqSiDELxvbiBYJtAo,48068
|
53
|
-
dataeval/_internal/models/tensorflow/trainer.py,sha256=2KHtMRniVselCaDXeb8QEfX-wMRsPfT1xiG2gUQgelg,4090
|
54
|
-
dataeval/_internal/models/tensorflow/utils.py,sha256=uK_fQ1JXUSVi0kgnhd9eRArlr36OzXUEdL4inJZCs-8,8579
|
55
|
-
dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
|
-
dataeval/_internal/workflows/sufficiency.py,sha256=QZQdhERVr3EmPA2sAFFudSDL4KLM0eAvYgV38jhjzaI,18374
|
57
|
-
dataeval/detectors/__init__.py,sha256=I2e7YWb55RRlKQll85Z6KdN5wdBa53smn-_fcZIsCwA,1507
|
58
|
-
dataeval/flags/__init__.py,sha256=1-HmwmtfPkHWwqXUjDwWko396qAKBeaSvqVsQZLrzD0,170
|
59
|
-
dataeval/metrics/__init__.py,sha256=GmuGCzF7Sk6TtAH9amB494gNtDjgVsSemPWJChrz2eo,427
|
60
|
-
dataeval/models/__init__.py,sha256=onevPb5wznCggowBnVT0OUa8uBJXZCbrkFuek1UFvOs,293
|
61
|
-
dataeval/models/tensorflow/__init__.py,sha256=A1XRxVGHefuvh_WpaKE1x95pRD1FecuFp66iuNPA_5U,424
|
62
|
-
dataeval/models/torch/__init__.py,sha256=su7P9DF9LChlVCNHWG6d7s_yeIfWQbhCYWIkzJe0Qig,190
|
63
|
-
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
-
dataeval/workflows/__init__.py,sha256=ObgS1cVYFRzFZWbNzGs2OcU02IVkJkAMHNnlnSNTMCE,208
|
65
|
-
dataeval-0.63.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
66
|
-
dataeval-0.63.0.dist-info/METADATA,sha256=6YmwpTrzkKyvPZ0IY3nwlTzeNsl-EaGJe8m_x2vwiaE,4217
|
67
|
-
dataeval-0.63.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
68
|
-
dataeval-0.63.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|