dataeval 0.66.0__py3-none-any.whl → 0.67.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_internal/metrics/diversity.py +28 -18
- {dataeval-0.66.0.dist-info → dataeval-0.67.0.dist-info}/METADATA +1 -1
- {dataeval-0.66.0.dist-info → dataeval-0.67.0.dist-info}/RECORD +6 -6
- {dataeval-0.66.0.dist-info → dataeval-0.67.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.66.0.dist-info → dataeval-0.67.0.dist-info}/WHEEL +0 -0
dataeval/__init__.py
CHANGED
@@ -39,6 +39,13 @@ def diversity_shannon(
|
|
39
39
|
|
40
40
|
Parameters
|
41
41
|
----------
|
42
|
+
data: NDArray
|
43
|
+
Array containing numerical values for metadata factors
|
44
|
+
names: list[str]
|
45
|
+
Names of metadata factors -- keys of the metadata dictionary
|
46
|
+
is_categorical: list[bool]
|
47
|
+
List of flags to identify whether variables are categorical (True) or
|
48
|
+
continuous (False)
|
42
49
|
subset_mask: NDArray[np.bool_] | None
|
43
50
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
44
51
|
|
@@ -76,14 +83,20 @@ def diversity_simpson(
|
|
76
83
|
Compute diversity for discrete/categorical variables and, through standard
|
77
84
|
histogram binning, for continuous variables.
|
78
85
|
|
79
|
-
We define diversity as
|
80
|
-
index.
|
86
|
+
We define diversity as the inverse Simpson diversity index linearly rescaled to the unit interval.
|
81
87
|
|
82
88
|
diversity = 1 implies that samples are evenly distributed across a particular factor
|
83
|
-
diversity =
|
89
|
+
diversity = 0 implies that all samples belong to one category/bin
|
84
90
|
|
85
91
|
Parameters
|
86
92
|
----------
|
93
|
+
data: NDArray
|
94
|
+
Array containing numerical values for metadata factors
|
95
|
+
names: list[str]
|
96
|
+
Names of metadata factors -- keys of the metadata dictionary
|
97
|
+
is_categorical: list[bool]
|
98
|
+
List of flags to identify whether variables are categorical (True) or
|
99
|
+
continuous (False)
|
87
100
|
subset_mask: NDArray[np.bool_] | None
|
88
101
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
89
102
|
|
@@ -91,10 +104,7 @@ def diversity_simpson(
|
|
91
104
|
-----
|
92
105
|
For continuous variables, histogram bins are chosen automatically. See
|
93
106
|
numpy.histogram for details.
|
94
|
-
|
95
|
-
in the limit.
|
96
|
-
If there is only one category, the diversity index takes a value of 1 =
|
97
|
-
1/N = 1/1. Entropy will take a value of 0.
|
107
|
+
If there is only one category, the diversity index takes a value of 0.
|
98
108
|
|
99
109
|
Returns
|
100
110
|
-------
|
@@ -116,8 +126,8 @@ def diversity_simpson(
|
|
116
126
|
# relative frequencies
|
117
127
|
p_i = cnts / cnts.sum()
|
118
128
|
# inverse Simpson index normalized by (number of bins)
|
119
|
-
|
120
|
-
|
129
|
+
s_0 = 1 / np.sum(p_i**2) / num_bins[col]
|
130
|
+
ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
|
121
131
|
return ev_index
|
122
132
|
|
123
133
|
|
@@ -141,9 +151,8 @@ def diversity(
|
|
141
151
|
List of class labels for each image
|
142
152
|
metadata: List[Dict]
|
143
153
|
List of metadata factors for each image
|
144
|
-
|
145
|
-
|
146
|
-
Permissible values include "simpson" and "shannon"
|
154
|
+
method: Literal["shannon", "simpson"], default "simpson"
|
155
|
+
Indicates which diversity index should be computed
|
147
156
|
|
148
157
|
Notes
|
149
158
|
-----
|
@@ -159,7 +168,7 @@ def diversity(
|
|
159
168
|
Compute Simpson diversity index of metadata and class labels
|
160
169
|
|
161
170
|
>>> diversity(class_labels, metadata, method="simpson").diversity_index
|
162
|
-
array([0.
|
171
|
+
array([0.18103448, 0.18103448, 0.88636364])
|
163
172
|
|
164
173
|
Compute Shannon diversity index of metadata and class labels
|
165
174
|
|
@@ -189,7 +198,7 @@ def diversity_classwise(
|
|
189
198
|
index.
|
190
199
|
|
191
200
|
diversity = 1 implies that samples are evenly distributed across a particular factor
|
192
|
-
diversity =
|
201
|
+
diversity = 0 implies that all samples belong to one category/bin
|
193
202
|
|
194
203
|
Parameters
|
195
204
|
----------
|
@@ -197,12 +206,13 @@ def diversity_classwise(
|
|
197
206
|
List of class labels for each image
|
198
207
|
metadata: List[Dict]
|
199
208
|
List of metadata factors for each image
|
209
|
+
method: Literal["shannon", "simpson"], default "simpson"
|
210
|
+
Indicates which diversity index should be computed
|
200
211
|
|
201
212
|
Notes
|
202
213
|
-----
|
203
214
|
- For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
|
204
|
-
-
|
205
|
-
- If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.
|
215
|
+
- If there is only one category, the diversity index takes a value of 0.
|
206
216
|
|
207
217
|
Returns
|
208
218
|
-------
|
@@ -214,8 +224,8 @@ def diversity_classwise(
|
|
214
224
|
Compute classwise Simpson diversity index of metadata and class labels
|
215
225
|
|
216
226
|
>>> diversity_classwise(class_labels, metadata, method="simpson").diversity_index
|
217
|
-
array([[0.
|
218
|
-
[0.
|
227
|
+
array([[0.17241379, 0.39473684],
|
228
|
+
[0.2 , 0.2 ]])
|
219
229
|
|
220
230
|
Compute classwise Shannon diversity index of metadata and class labels
|
221
231
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.67.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -1,4 +1,4 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=RFLBI0XRq8nKsIoRacgjErr1RdQmoGI5gc8F9uAm-U8,590
|
2
2
|
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
|
4
4
|
dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -24,7 +24,7 @@ dataeval/_internal/metrics/balance.py,sha256=g-YYFpq0qy2xq4iHjBKZDMjOn5R9Rit6sSb
|
|
24
24
|
dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
|
25
25
|
dataeval/_internal/metrics/coverage.py,sha256=9ZvcNjItE9rEyA2UHPE1K9zpTbbib4xqk8WpPpDN8ok,4037
|
26
26
|
dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
|
27
|
-
dataeval/_internal/metrics/diversity.py,sha256=
|
27
|
+
dataeval/_internal/metrics/diversity.py,sha256=lkXnraNGCzkzamsBm2kG5eXxdRXHmkXxPWCKnGnPKOI,8620
|
28
28
|
dataeval/_internal/metrics/parity.py,sha256=suv1Pf7gPj0_NxsS0_M6ewfUndsFJyEhbt5NPp6ktMI,15457
|
29
29
|
dataeval/_internal/metrics/stats.py,sha256=Xbm7lLB0OZtsoxClMIrfULSqT8VymQiQmohJFtN7oz8,16332
|
30
30
|
dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
|
@@ -66,7 +66,7 @@ dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uT
|
|
66
66
|
dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
|
67
67
|
dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
|
68
68
|
dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
|
69
|
-
dataeval-0.
|
70
|
-
dataeval-0.
|
71
|
-
dataeval-0.
|
72
|
-
dataeval-0.
|
69
|
+
dataeval-0.67.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
70
|
+
dataeval-0.67.0.dist-info/METADATA,sha256=KDbXi3-uY4kdbK4AuH4C9DXPLnkhfCdNAmqkHpDG1OI,4217
|
71
|
+
dataeval-0.67.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
72
|
+
dataeval-0.67.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|