dataeval 0.66.0__py3-none-any.whl → 0.67.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dataeval/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.66.0"
1
+ __version__ = "0.67.0"
2
2
 
3
3
  from importlib.util import find_spec
4
4
 
@@ -39,6 +39,13 @@ def diversity_shannon(
39
39
 
40
40
  Parameters
41
41
  ----------
42
+ data: NDArray
43
+ Array containing numerical values for metadata factors
44
+ names: list[str]
45
+ Names of metadata factors -- keys of the metadata dictionary
46
+ is_categorical: list[bool]
47
+ List of flags to identify whether variables are categorical (True) or
48
+ continuous (False)
42
49
  subset_mask: NDArray[np.bool_] | None
43
50
  Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
44
51
 
@@ -76,14 +83,20 @@ def diversity_simpson(
76
83
  Compute diversity for discrete/categorical variables and, through standard
77
84
  histogram binning, for continuous variables.
78
85
 
79
- We define diversity as a normalized form of the inverse Simpson diversity
80
- index.
86
+ We define diversity as the inverse Simpson diversity index linearly rescaled to the unit interval.
81
87
 
82
88
  diversity = 1 implies that samples are evenly distributed across a particular factor
83
- diversity = 1/num_categories implies that all samples belong to one category/bin
89
+ diversity = 0 implies that all samples belong to one category/bin
84
90
 
85
91
  Parameters
86
92
  ----------
93
+ data: NDArray
94
+ Array containing numerical values for metadata factors
95
+ names: list[str]
96
+ Names of metadata factors -- keys of the metadata dictionary
97
+ is_categorical: list[bool]
98
+ List of flags to identify whether variables are categorical (True) or
99
+ continuous (False)
87
100
  subset_mask: NDArray[np.bool_] | None
88
101
  Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
89
102
 
@@ -91,10 +104,7 @@ def diversity_simpson(
91
104
  -----
92
105
  For continuous variables, histogram bins are chosen automatically. See
93
106
  numpy.histogram for details.
94
- The expression is undefined for q=1, but it approaches the Shannon entropy
95
- in the limit.
96
- If there is only one category, the diversity index takes a value of 1 =
97
- 1/N = 1/1. Entropy will take a value of 0.
107
+ If there is only one category, the diversity index takes a value of 0.
98
108
 
99
109
  Returns
100
110
  -------
@@ -116,8 +126,8 @@ def diversity_simpson(
116
126
  # relative frequencies
117
127
  p_i = cnts / cnts.sum()
118
128
  # inverse Simpson index normalized by (number of bins)
119
- ev_index[col] = 1 / np.sum(p_i**2) / num_bins[col]
120
-
129
+ s_0 = 1 / np.sum(p_i**2) / num_bins[col]
130
+ ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
121
131
  return ev_index
122
132
 
123
133
 
@@ -141,9 +151,8 @@ def diversity(
141
151
  List of class labels for each image
142
152
  metadata: List[Dict]
143
153
  List of metadata factors for each image
144
- metric: Literal["shannon", "simpson"], default "simpson"
145
- string variable indicating which diversity index should be used.
146
- Permissible values include "simpson" and "shannon"
154
+ method: Literal["shannon", "simpson"], default "simpson"
155
+ Indicates which diversity index should be computed
147
156
 
148
157
  Notes
149
158
  -----
@@ -159,7 +168,7 @@ def diversity(
159
168
  Compute Simpson diversity index of metadata and class labels
160
169
 
161
170
  >>> diversity(class_labels, metadata, method="simpson").diversity_index
162
- array([0.34482759, 0.34482759, 0.90909091])
171
+ array([0.18103448, 0.18103448, 0.88636364])
163
172
 
164
173
  Compute Shannon diversity index of metadata and class labels
165
174
 
@@ -189,7 +198,7 @@ def diversity_classwise(
189
198
  index.
190
199
 
191
200
  diversity = 1 implies that samples are evenly distributed across a particular factor
192
- diversity = 1/num_categories implies that all samples belong to one category/bin
201
+ diversity = 0 implies that all samples belong to one category/bin
193
202
 
194
203
  Parameters
195
204
  ----------
@@ -197,12 +206,13 @@ def diversity_classwise(
197
206
  List of class labels for each image
198
207
  metadata: List[Dict]
199
208
  List of metadata factors for each image
209
+ method: Literal["shannon", "simpson"], default "simpson"
210
+ Indicates which diversity index should be computed
200
211
 
201
212
  Notes
202
213
  -----
203
214
  - For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
204
- - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
205
- - If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.
215
+ - If there is only one category, the diversity index takes a value of 0.
206
216
 
207
217
  Returns
208
218
  -------
@@ -214,8 +224,8 @@ def diversity_classwise(
214
224
  Compute classwise Simpson diversity index of metadata and class labels
215
225
 
216
226
  >>> diversity_classwise(class_labels, metadata, method="simpson").diversity_index
217
- array([[0.33793103, 0.51578947],
218
- [0.36 , 0.36 ]])
227
+ array([[0.17241379, 0.39473684],
228
+ [0.2 , 0.2 ]])
219
229
 
220
230
  Compute classwise Shannon diversity index of metadata and class labels
221
231
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.66.0
3
+ Version: 0.67.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- dataeval/__init__.py,sha256=dshMbJco8lxfbbIg0DO5fSDsvgu4DKPGE5PzA7pwvPQ,590
1
+ dataeval/__init__.py,sha256=RFLBI0XRq8nKsIoRacgjErr1RdQmoGI5gc8F9uAm-U8,590
2
2
  dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
4
4
  dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -24,7 +24,7 @@ dataeval/_internal/metrics/balance.py,sha256=g-YYFpq0qy2xq4iHjBKZDMjOn5R9Rit6sSb
24
24
  dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
25
25
  dataeval/_internal/metrics/coverage.py,sha256=9ZvcNjItE9rEyA2UHPE1K9zpTbbib4xqk8WpPpDN8ok,4037
26
26
  dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
27
- dataeval/_internal/metrics/diversity.py,sha256=2xEkLnaRhPOvsd2DCTDT-dVvPPEZOH4PKm0vufrgBq4,8207
27
+ dataeval/_internal/metrics/diversity.py,sha256=lkXnraNGCzkzamsBm2kG5eXxdRXHmkXxPWCKnGnPKOI,8620
28
28
  dataeval/_internal/metrics/parity.py,sha256=suv1Pf7gPj0_NxsS0_M6ewfUndsFJyEhbt5NPp6ktMI,15457
29
29
  dataeval/_internal/metrics/stats.py,sha256=Xbm7lLB0OZtsoxClMIrfULSqT8VymQiQmohJFtN7oz8,16332
30
30
  dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
@@ -66,7 +66,7 @@ dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uT
66
66
  dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
67
67
  dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
68
68
  dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
69
- dataeval-0.66.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
70
- dataeval-0.66.0.dist-info/METADATA,sha256=P04dHyQOp4_6lg0IkoUEXTGJAPPpgRwf5ZAwdYpuatc,4217
71
- dataeval-0.66.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
72
- dataeval-0.66.0.dist-info/RECORD,,
69
+ dataeval-0.67.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
70
+ dataeval-0.67.0.dist-info/METADATA,sha256=KDbXi3-uY4kdbK4AuH4C9DXPLnkhfCdNAmqkHpDG1OI,4217
71
+ dataeval-0.67.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
72
+ dataeval-0.67.0.dist-info/RECORD,,