dataeval 0.63.0__py3-none-any.whl → 0.65.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. dataeval/__init__.py +4 -4
  2. dataeval/_internal/detectors/clusterer.py +47 -34
  3. dataeval/_internal/detectors/drift/base.py +53 -35
  4. dataeval/_internal/detectors/drift/cvm.py +5 -4
  5. dataeval/_internal/detectors/drift/ks.py +7 -6
  6. dataeval/_internal/detectors/drift/mmd.py +39 -19
  7. dataeval/_internal/detectors/drift/torch.py +6 -5
  8. dataeval/_internal/detectors/drift/uncertainty.py +7 -8
  9. dataeval/_internal/detectors/duplicates.py +57 -30
  10. dataeval/_internal/detectors/linter.py +40 -24
  11. dataeval/_internal/detectors/ood/ae.py +2 -1
  12. dataeval/_internal/detectors/ood/aegmm.py +2 -1
  13. dataeval/_internal/detectors/ood/base.py +37 -15
  14. dataeval/_internal/detectors/ood/llr.py +9 -8
  15. dataeval/_internal/detectors/ood/vae.py +2 -1
  16. dataeval/_internal/detectors/ood/vaegmm.py +2 -1
  17. dataeval/_internal/flags.py +42 -21
  18. dataeval/_internal/interop.py +3 -12
  19. dataeval/_internal/metrics/balance.py +188 -0
  20. dataeval/_internal/metrics/ber.py +123 -48
  21. dataeval/_internal/metrics/coverage.py +90 -74
  22. dataeval/_internal/metrics/divergence.py +101 -67
  23. dataeval/_internal/metrics/diversity.py +211 -0
  24. dataeval/_internal/metrics/parity.py +287 -155
  25. dataeval/_internal/metrics/stats.py +198 -317
  26. dataeval/_internal/metrics/uap.py +40 -29
  27. dataeval/_internal/metrics/utils.py +430 -0
  28. dataeval/_internal/models/tensorflow/losses.py +3 -3
  29. dataeval/_internal/models/tensorflow/trainer.py +3 -2
  30. dataeval/_internal/models/tensorflow/utils.py +4 -3
  31. dataeval/_internal/output.py +82 -0
  32. dataeval/_internal/utils.py +64 -0
  33. dataeval/_internal/workflows/sufficiency.py +96 -107
  34. dataeval/flags/__init__.py +2 -2
  35. dataeval/metrics/__init__.py +26 -7
  36. dataeval/utils/__init__.py +9 -0
  37. {dataeval-0.63.0.dist-info → dataeval-0.65.0.dist-info}/METADATA +1 -1
  38. dataeval-0.65.0.dist-info/RECORD +60 -0
  39. dataeval/_internal/functional/__init__.py +0 -0
  40. dataeval/_internal/functional/ber.py +0 -63
  41. dataeval/_internal/functional/coverage.py +0 -75
  42. dataeval/_internal/functional/divergence.py +0 -16
  43. dataeval/_internal/functional/hash.py +0 -79
  44. dataeval/_internal/functional/metadata.py +0 -136
  45. dataeval/_internal/functional/metadataparity.py +0 -190
  46. dataeval/_internal/functional/uap.py +0 -6
  47. dataeval/_internal/functional/utils.py +0 -158
  48. dataeval/_internal/maite/__init__.py +0 -0
  49. dataeval/_internal/maite/utils.py +0 -30
  50. dataeval/_internal/metrics/base.py +0 -92
  51. dataeval/_internal/metrics/metadata.py +0 -610
  52. dataeval/_internal/metrics/metadataparity.py +0 -67
  53. dataeval-0.63.0.dist-info/RECORD +0 -68
  54. {dataeval-0.63.0.dist-info → dataeval-0.65.0.dist-info}/LICENSE.txt +0 -0
  55. {dataeval-0.63.0.dist-info → dataeval-0.65.0.dist-info}/WHEEL +0 -0
@@ -1,67 +0,0 @@
1
- from typing import Optional
2
-
3
- import numpy as np
4
-
5
- from dataeval._internal.functional.metadataparity import compute_parity, format_discretize_factors
6
-
7
-
8
- class MetadataParity:
9
- def __init__(
10
- self,
11
- data_factors: dict[str, np.ndarray],
12
- continuous_factor_names: Optional[np.ndarray] = None,
13
- continuous_factor_bincounts: Optional[np.ndarray] = None,
14
- ):
15
- """
16
- Sets up the internal list of metadata factors.
17
-
18
- Parameters
19
- ----------
20
- data_factors: Dict[str, np.ndarray]
21
- The dataset factors, which are per-image attributes including class label and metadata.
22
- Each key of dataset_factors is a factor, whose value is the per-image factor values.
23
- continuous_factor_names : np.ndarray, default None
24
- The factors in data_factors that have continuous values.
25
- All factors are treated as having discrete values unless they
26
- are specified in this array. Each element of this array must occur as a key in data_factors.
27
- continuous_factor_bincounts : np.ndarray, default None
28
- Array of the bin counts to discretize values into for each factor in continuous_factor_names.
29
- """
30
-
31
- continuous_factor_names = (
32
- np.array([], dtype=str) if continuous_factor_names is None else np.array(continuous_factor_names)
33
- )
34
- continuous_factor_bincounts = (
35
- 10 * np.ones(len(continuous_factor_names), dtype=int)
36
- if continuous_factor_bincounts is None
37
- else np.array(continuous_factor_bincounts)
38
- )
39
-
40
- self.metadata_factors, self.labels = format_discretize_factors(
41
- data_factors, continuous_factor_names, continuous_factor_bincounts
42
- )
43
-
44
- def evaluate(self) -> dict[str, np.ndarray]:
45
- """
46
- Evaluates the statistical independence of metadata factors from class labels.
47
- This performs a chi-square test, which provides a score and a p-value for
48
- statistical independence between each pair of a metadata factor and a class label.
49
- A high score with a low p-value suggests that a metadata factor is strongly
50
- correlated with a class label.
51
-
52
- Returns
53
- -------
54
- Dict[str, np.ndarray]
55
- chi_square: np.ndarray
56
- Array of length (num_factors) whose (i)th element corresponds to
57
- the chi-square score for the relationship between factor i
58
- and the class labels in the dataset.
59
- p_values: np.ndarray
60
- Array of length (num_factors) whose (i)th element corresponds to
61
- the p-value for the chi-square test for the relationship between
62
- factor i and the class labels in the dataset.
63
- """
64
- chi_square, p_values = compute_parity(self.metadata_factors, self.labels)
65
-
66
- formatted_output = {"chi_squares": chi_square, "p_values": p_values}
67
- return formatted_output
@@ -1,68 +0,0 @@
1
- dataeval/__init__.py,sha256=kUzF3A_ow1IG-GGqko1M2MRd6wNqNTDSD32Nj-b7tXI,408
2
- dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- dataeval/_internal/detectors/clusterer.py,sha256=AAIhZQ1Tm5yOh2vLDJ9wlYmz0EuqXiy1tnrkJj119u0,20304
4
- dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- dataeval/_internal/detectors/drift/base.py,sha256=MTAuNBx8MQLkGJPt_F2M9hCWfJgMpy1N_fVnUniPul0,9103
6
- dataeval/_internal/detectors/drift/cvm.py,sha256=GU2RzqCeu5d-rR-tgpzEo_lqo7Gu7Fzkfxajc5W42ok,4001
7
- dataeval/_internal/detectors/drift/ks.py,sha256=MIYFjTSNs3H0pUk6HthYoGDBR6hB7YmS-ie8HOG2K2o,4004
8
- dataeval/_internal/detectors/drift/mmd.py,sha256=Q954j5znZNxP4JsEfceY1VAPrMyEsG0jFt5JRvMGbLg,7054
9
- dataeval/_internal/detectors/drift/torch.py,sha256=NsQYfDVRcCGmU8k6oBG_aVzmML1zre-xUKBVK1W680o,10872
10
- dataeval/_internal/detectors/drift/uncertainty.py,sha256=MnhEpTIkQ1zA3SiaKuNFsMi1Jge2a8uZRm4p2LX46iA,5338
11
- dataeval/_internal/detectors/duplicates.py,sha256=IqsX22N7wD8o0bYPc7N1S451w6lzjWLpJZs_RkxOgEU,2168
12
- dataeval/_internal/detectors/linter.py,sha256=CJlUviSUl9iVZix1tnZa6ldeYYB-vylA5tgobxq2apk,5292
13
- dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- dataeval/_internal/detectors/ood/ae.py,sha256=ASsN2XztlFdpupVgBazh_gInnToSAT8QjvJtA6EiVuY,2657
15
- dataeval/_internal/detectors/ood/aegmm.py,sha256=6ExAvYD0pGADmH9igk3r6t8cLPF4MgmrpGPrBybCSBA,2392
16
- dataeval/_internal/detectors/ood/base.py,sha256=IQQ0ZprNKpz09J7LKKSgoVvcKYIqaHw2p_tMb0OMwpI,6949
17
- dataeval/_internal/detectors/ood/llr.py,sha256=wW8d0pxBXvERCG7m8iBxzSC7hzN19O08IlI3FMvoLBw,10132
18
- dataeval/_internal/detectors/ood/vae.py,sha256=mWSbNY3-utuzeVSUkaqD_alLsgcfSMqdy2Aj1G7NUM4,2963
19
- dataeval/_internal/detectors/ood/vaegmm.py,sha256=FTHhuAF62hl3PQw2fQUZlPLgsi2ul0xxoUrRwr_Jd-o,2837
20
- dataeval/_internal/flags.py,sha256=dRApeFkdSXFbYHSmvzgUP78zH8jUGtfzKFfLQtX0Q18,883
21
- dataeval/_internal/functional/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- dataeval/_internal/functional/ber.py,sha256=hJnFswWXnMjoYY62A7x8F3nyctMw3f3UlC8DdUX2Clg,2150
23
- dataeval/_internal/functional/coverage.py,sha256=Z56oE9LLosM1228DL0tfs12-MPrg_2ef4KzYhoTPPYg,2520
24
- dataeval/_internal/functional/divergence.py,sha256=fSZm2vIqChfj9sbgSuw9P8ZB8ih0nOPryRvnVQHsjzI,517
25
- dataeval/_internal/functional/hash.py,sha256=fhcWclSNmp4sJbFSWSqWB2nVxhEFNNje1ifVAX4qqls,2785
26
- dataeval/_internal/functional/metadata.py,sha256=6QCxnDzXAF0x6Xx-eUK_c4yOX-_RyTaYCqzDPzDYpLM,4228
27
- dataeval/_internal/functional/metadataparity.py,sha256=V6fCtSDx_tf49BnaGgatHNH8lgZoYtPggIuRa13AiHo,7771
28
- dataeval/_internal/functional/uap.py,sha256=PIeRAJcA1GBMJK_gBKg0U_bih73h-FKW2ruxJtGiiMc,200
29
- dataeval/_internal/functional/utils.py,sha256=u1kkGtS0irnx9dZTo9MahA-_4_uIorPDttQkBe8iU7U,4120
30
- dataeval/_internal/interop.py,sha256=tmcp666A_uhndsDLv2PFcZiPH2udz4Y7UVdjqs3vkqc,1280
31
- dataeval/_internal/maite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- dataeval/_internal/maite/utils.py,sha256=XJ6eCTZA_So_bJ2BoEZvMAY8GyC67p8Q044LGSAZepg,864
33
- dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- dataeval/_internal/metrics/base.py,sha256=CO2deM6T-q1RAbu-C8k58pULt1KJEJM-kYjLMaHYzms,2641
35
- dataeval/_internal/metrics/ber.py,sha256=eQaTMjytVASkZz8bp8CqzpKBPzIYDp2qXmz1MBYNoj4,2261
36
- dataeval/_internal/metrics/coverage.py,sha256=Bhsvz0ZG8GsgsuYfUQ7i4xB5ebMBJYLk2QqXVfkjgJM,3421
37
- dataeval/_internal/metrics/divergence.py,sha256=QOWDfjlE84S2UvpX7x3pxVAPKdeqrdQ2cXzZkiaOyF8,3247
38
- dataeval/_internal/metrics/metadata.py,sha256=U2E2bG8BzRoHS6bx3a5KYEqE96ik9Sz1AW3MMMqQmLU,21472
39
- dataeval/_internal/metrics/metadataparity.py,sha256=zLo8WPcPM6OVWf7dK4LT8d3o9FlUHji1SJrDinztrv4,2972
40
- dataeval/_internal/metrics/parity.py,sha256=KAp_dznww1GXHwvIqKb3ZJVTQU2w8loyk8JrHBciEm4,6070
41
- dataeval/_internal/metrics/stats.py,sha256=6zvGdoYEpVpMMzduxzMkMfsaaKEyC4UVn4SWPcAO5Cg,12581
42
- dataeval/_internal/metrics/uap.py,sha256=pgfJY8kM5EFYYNbMzDcodOOzvPlzxrpsSXoL7dCrWn8,1113
43
- dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- dataeval/_internal/models/pytorch/autoencoder.py,sha256=iK3Z9claesU_pJkRaiFJIZ9zKZg-Qj8ugzVYTTokDbE,6123
46
- dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
47
- dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
48
- dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- dataeval/_internal/models/tensorflow/autoencoder.py,sha256=rErnOfDFTd7e4brSGQ2Lr1x1kNjSEHdbOREOtUfIhIM,9975
50
- dataeval/_internal/models/tensorflow/gmm.py,sha256=wnqQKm3fURuvBROUd2fitCqzKViDo-g0-Djr3TBHZ3U,3640
51
- dataeval/_internal/models/tensorflow/losses.py,sha256=3y6tHm7PTQ7hmasJDwTXjdARjCUWycoXqSyXJ1uT2mM,3766
52
- dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=B5cwB2IGPw-7b8klt82j_60g_IvqSiDELxvbiBYJtAo,48068
53
- dataeval/_internal/models/tensorflow/trainer.py,sha256=2KHtMRniVselCaDXeb8QEfX-wMRsPfT1xiG2gUQgelg,4090
54
- dataeval/_internal/models/tensorflow/utils.py,sha256=uK_fQ1JXUSVi0kgnhd9eRArlr36OzXUEdL4inJZCs-8,8579
55
- dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- dataeval/_internal/workflows/sufficiency.py,sha256=QZQdhERVr3EmPA2sAFFudSDL4KLM0eAvYgV38jhjzaI,18374
57
- dataeval/detectors/__init__.py,sha256=I2e7YWb55RRlKQll85Z6KdN5wdBa53smn-_fcZIsCwA,1507
58
- dataeval/flags/__init__.py,sha256=1-HmwmtfPkHWwqXUjDwWko396qAKBeaSvqVsQZLrzD0,170
59
- dataeval/metrics/__init__.py,sha256=GmuGCzF7Sk6TtAH9amB494gNtDjgVsSemPWJChrz2eo,427
60
- dataeval/models/__init__.py,sha256=onevPb5wznCggowBnVT0OUa8uBJXZCbrkFuek1UFvOs,293
61
- dataeval/models/tensorflow/__init__.py,sha256=A1XRxVGHefuvh_WpaKE1x95pRD1FecuFp66iuNPA_5U,424
62
- dataeval/models/torch/__init__.py,sha256=su7P9DF9LChlVCNHWG6d7s_yeIfWQbhCYWIkzJe0Qig,190
63
- dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- dataeval/workflows/__init__.py,sha256=ObgS1cVYFRzFZWbNzGs2OcU02IVkJkAMHNnlnSNTMCE,208
65
- dataeval-0.63.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
66
- dataeval-0.63.0.dist-info/METADATA,sha256=6YmwpTrzkKyvPZ0IY3nwlTzeNsl-EaGJe8m_x2vwiaE,4217
67
- dataeval-0.63.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
68
- dataeval-0.63.0.dist-info/RECORD,,