dataeval 1.0.4__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataeval-1.0.4 → dataeval-1.0.6}/PKG-INFO +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/pyproject.toml +36 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_embeddings.py +2 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_experimental.py +5 -5
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_metadata.py +8 -8
- dataeval-1.0.6/src/dataeval/_version.py +24 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/_balance.py +20 -17
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/_diversity.py +5 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/_parity.py +0 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/config.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_ber.py +10 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_bin.py +2 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_clusterer.py +5 -5
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_compute_ratios.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_compute_stats.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_coverage.py +31 -13
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_fast_hdbscan/_cluster_trees.py +6 -6
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_fast_hdbscan/_mst.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_feature_distance.py +4 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_label_parity.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_label_stats.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_metadata_insights.py +26 -22
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_mst.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_mutual_info.py +31 -35
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_parity.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_uap.py +10 -12
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/extractors/_onnx.py +2 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/extractors/_torch.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/performance/_output.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/performance/_sufficiency.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/quality/_duplicates.py +49 -36
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/quality/_outliers.py +37 -37
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/quality/_shared.py +3 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/scope/_prioritize.py +8 -5
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_classbalance.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_classfilter.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_base.py +3 -2
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_univariate.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_ood/_domain_classifier.py +25 -9
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_shared/_reconstruction.py +39 -35
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/types.py +4 -4
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/_internal.py +6 -6
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/data.py +4 -4
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/onnx.py +1 -1
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/preprocessing.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/thresholds.py +3 -3
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/training.py +1 -1
- dataeval-1.0.4/src/dataeval/_version.py +0 -34
- {dataeval-1.0.4 → dataeval-1.0.6}/.gitignore +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/LICENSE +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/README.md +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_helpers.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_log.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_warm_cache.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_base.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_cache.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_dimensionstats.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_hashstats.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_pixelstats.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_register.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_registry.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_calculators/_visualstats.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_completeness.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_divergence.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_diversity.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_fast_hdbscan/_disjoint_set.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_hash.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_label_errors.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_nullmodel.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_rank.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/exceptions.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/extractors/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/extractors/_bovw.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/extractors/_flatten.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/extractors/_uncertainty.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/flags.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/performance/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/performance/_aggregator.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/performance/schedules.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/protocols.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/py.typed +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/quality/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/scope/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_indices.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_limit.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_reverse.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_select.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/selection/_shuffle.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_chunk.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_domain_classifier.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_kneighbors.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_mmd.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_drift/_reconstruction.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_ood/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_ood/_base.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_ood/_kneighbors.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_ood/_reconstruction.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_shared/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_shared/_domain_classifier.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/_shared/_kneighbors.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/shift/update_strategies.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/__init__.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/losses.py +0 -0
- {dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/utils/models.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataeval
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
|
5
5
|
Project-URL: Homepage, https://dataeval.ai/
|
|
6
6
|
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
|
@@ -114,10 +114,24 @@ docs = [
|
|
|
114
114
|
"sphinx-tabs>=3.4.7",
|
|
115
115
|
"Sphinx>=7.2.6,<9.0.0", # sphinx-immaterial <= 0.13.9 is not compatible with sphinx >=9.0
|
|
116
116
|
"torchmetrics>=1.0.0",
|
|
117
|
-
"torchvision>=0.17.0",
|
|
118
117
|
"markupsafe>=3,<3.0.2",
|
|
119
118
|
"jupytext>=1.19.1",
|
|
120
119
|
]
|
|
120
|
+
security = [ # keep in sync with [tool.uv.constraint-dependencies]
|
|
121
|
+
"cryptography>=46.0.5", # CVE-2026-26007: Missing Subgroup Validation for SECT Curves
|
|
122
|
+
"filelock>=3.20.3", # GHSA-w853-jp5j-5j7f, GHSA-qmgc-5h2g-mvrw
|
|
123
|
+
"onnx>=1.21.0", # CVE-2026-28500: Untrusted Model Repository Warnings Suppressed by silent=True
|
|
124
|
+
# CVE-2026-34445: Malicious ONNX models can crash servers by exploiting unprotected object settings
|
|
125
|
+
# CVE-2026-27489: Vulnerable to Path Traversal via Symlink
|
|
126
|
+
# GHSA-q56x-g2fj-4rj6: TOCTOU arbitrary file read/write in save_external_dat
|
|
127
|
+
"pillow>=12.1.1", # CVE-2026-25990: OOB write via PSD image
|
|
128
|
+
"poetry>=2.3.3", # CVE-2026-34591: Poetry Has Wheel Path Traversal Which Can Lead to Arbitrary File Write
|
|
129
|
+
"protobuf>=6.33.5", # GHSA-7gcm-g887-7qv7
|
|
130
|
+
"setuptools>=82.0.0", # CVE-2026-23949: (jaraco_context) path traversal in tarball()
|
|
131
|
+
# CVE-2026-24049: (wheel) privilege escalation via unpack
|
|
132
|
+
"tornado>=6.5.5", # CVE-2026-31958: Tornado is vulnerable to DoS due to too many multipart parts
|
|
133
|
+
# CVE-2026-35536: Tornado has cookie attribute injection via .RequestHandler.set_cookie
|
|
134
|
+
]
|
|
121
135
|
dev = [
|
|
122
136
|
{ include-group = "base" },
|
|
123
137
|
{ include-group = "lint" },
|
|
@@ -141,9 +155,18 @@ conflicts = [
|
|
|
141
155
|
]
|
|
142
156
|
constraint-dependencies = [
|
|
143
157
|
"cryptography>=46.0.5", # CVE-2026-26007: Missing Subgroup Validation for SECT Curves
|
|
158
|
+
"filelock>=3.20.3", # GHSA-w853-jp5j-5j7f, GHSA-qmgc-5h2g-mvrw
|
|
159
|
+
"onnx>=1.21.0", # CVE-2026-28500: Untrusted Model Repository Warnings Suppressed by silent=True
|
|
160
|
+
# CVE-2026-34445: Malicious ONNX models can crash servers by exploiting unprotected object settings
|
|
161
|
+
# CVE-2026-27489: Vulnerable to Path Traversal via Symlink
|
|
162
|
+
# GHSA-q56x-g2fj-4rj6: TOCTOU arbitrary file read/write in save_external_dat
|
|
144
163
|
"pillow>=12.1.1", # CVE-2026-25990: OOB write via PSD image
|
|
164
|
+
"poetry>=2.3.3", # CVE-2026-34591: Poetry Has Wheel Path Traversal Which Can Lead to Arbitrary File Write
|
|
165
|
+
"protobuf>=6.33.5", # GHSA-7gcm-g887-7qv7
|
|
145
166
|
"setuptools>=82.0.0", # CVE-2026-23949: (jaraco_context) path traversal in tarball()
|
|
146
167
|
# CVE-2026-24049: (wheel) privilege escalation via unpack
|
|
168
|
+
"tornado>=6.5.5", # CVE-2026-31958: Tornado is vulnerable to DoS due to too many multipart parts
|
|
169
|
+
# CVE-2026-35536: Tornado has cookie attribute injection via .RequestHandler.set_cookie
|
|
147
170
|
]
|
|
148
171
|
|
|
149
172
|
[[tool.uv.index]]
|
|
@@ -201,6 +224,9 @@ version-file = "src/dataeval/_version.py"
|
|
|
201
224
|
[tool.poetry]
|
|
202
225
|
version = "0.0.0" # unused
|
|
203
226
|
|
|
227
|
+
[tool.poetry.dependencies]
|
|
228
|
+
python = ">=3.10,<3.15"
|
|
229
|
+
|
|
204
230
|
[tool.pyproject2conda.dependencies]
|
|
205
231
|
numpy = { skip = true, packages = "numpy>=1.24.2" }
|
|
206
232
|
scikit-learn = { skip = true, packages = "scikit-learn>=1.5.0" }
|
|
@@ -262,20 +288,23 @@ exclude = [
|
|
|
262
288
|
".jupyter_cache",
|
|
263
289
|
"*env*",
|
|
264
290
|
"output",
|
|
291
|
+
"_build",
|
|
265
292
|
"build",
|
|
266
293
|
".nox",
|
|
267
294
|
".tox",
|
|
295
|
+
"prototype",
|
|
268
296
|
"src/dataeval/_version.py",
|
|
269
297
|
]
|
|
270
298
|
line-length = 120
|
|
271
299
|
indent-width = 4
|
|
272
300
|
target-version = "py310"
|
|
301
|
+
extend-include = ["*.ipynb"]
|
|
273
302
|
|
|
274
303
|
[tool.ruff.lint]
|
|
275
304
|
select = ["F", "E", "W", "C90", "I", "N", "D", "UP", "YTT", "ANN", "S", "BLE", "B", "A",
|
|
276
305
|
"COM", "C4", "T10", "ISC", "ICN", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SIM",
|
|
277
|
-
"TID252", "ARG", "FIX", "PD", "FLY", "NPY", "RUF100", "PERF"]
|
|
278
|
-
ignore = ["ANN401", "COM812", "NPY002", "SLF001"]
|
|
306
|
+
"TID252", "ARG", "FIX", "PD", "FLY", "NPY", "RUF027", "RUF100", "PERF"]
|
|
307
|
+
ignore = ["ANN101", "ANN102", "ANN401", "C408", "C416", "COM812", "NPY002", "SLF001"]
|
|
279
308
|
fixable = ["ALL"]
|
|
280
309
|
unfixable = []
|
|
281
310
|
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
@@ -287,10 +316,14 @@ builtins-strict-checking = false
|
|
|
287
316
|
[tool.ruff.lint.isort]
|
|
288
317
|
known-first-party = ["dataeval"]
|
|
289
318
|
|
|
319
|
+
[tool.ruff.lint.mccabe]
|
|
320
|
+
max-complexity = 5
|
|
321
|
+
|
|
290
322
|
[tool.ruff.lint.pydocstyle]
|
|
291
323
|
convention = "numpy"
|
|
292
324
|
|
|
293
325
|
[tool.ruff.format]
|
|
326
|
+
preview = true
|
|
294
327
|
quote-style = "double"
|
|
295
328
|
indent-style = "space"
|
|
296
329
|
skip-magic-trailing-comma = false
|
|
@@ -509,7 +509,7 @@ class Embeddings(Array, FeatureExtractor):
|
|
|
509
509
|
images.append(image)
|
|
510
510
|
return images
|
|
511
511
|
|
|
512
|
-
def _batch(self, indices: Sequence[int]) -> Iterator[NDArray[Any]]:
|
|
512
|
+
def _batch(self, indices: Sequence[int]) -> Iterator[NDArray[Any]]: # noqa: C901
|
|
513
513
|
"""Process indices in batches using the extractor."""
|
|
514
514
|
if self._dataset is None:
|
|
515
515
|
raise NotFittedError("No dataset bound. Call bind() first.")
|
|
@@ -559,7 +559,7 @@ class Embeddings(Array, FeatureExtractor):
|
|
|
559
559
|
batch_indices = list(indices[batch_start : batch_start + self._batch_size])
|
|
560
560
|
yield self._embeddings[batch_indices]
|
|
561
561
|
|
|
562
|
-
def __getitem__(self, key: int | Iterable[int] | slice, /) -> NDArray[Any]:
|
|
562
|
+
def __getitem__(self, key: int | Iterable[int] | slice, /) -> NDArray[Any]: # noqa: C901
|
|
563
563
|
"""
|
|
564
564
|
Access embeddings by index, indices or slice.
|
|
565
565
|
|
|
@@ -13,7 +13,7 @@ from dataeval.exceptions import DeprecatedWarning, ExperimentalWarning
|
|
|
13
13
|
F = TypeVar("F", bound=Callable[..., Any])
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def _make_warning_message(
|
|
16
|
+
def _make_warning_message( # noqa: C901
|
|
17
17
|
name: str,
|
|
18
18
|
kind: str,
|
|
19
19
|
*,
|
|
@@ -51,7 +51,7 @@ def _prepend_doc_note(doc: str | None, note: str) -> str:
|
|
|
51
51
|
def experimental(_target: F) -> F: ...
|
|
52
52
|
@overload
|
|
53
53
|
def experimental(*, alternative: str | None = None, details: str | None = None) -> Callable[[F], F]: ...
|
|
54
|
-
def experimental(
|
|
54
|
+
def experimental( # noqa: C901
|
|
55
55
|
_target: F | None = None,
|
|
56
56
|
*,
|
|
57
57
|
alternative: str | None = None,
|
|
@@ -72,7 +72,7 @@ def experimental(
|
|
|
72
72
|
def my_func(): ...
|
|
73
73
|
"""
|
|
74
74
|
|
|
75
|
-
def decorator(target: F) -> F:
|
|
75
|
+
def decorator(target: F) -> F: # noqa: C901
|
|
76
76
|
name = getattr(target, "__qualname__", getattr(target, "__name__", str(target)))
|
|
77
77
|
msg = _make_warning_message(name, "experimental", alternative=alternative, details=details)
|
|
78
78
|
warned = False
|
|
@@ -118,7 +118,7 @@ def deprecated(
|
|
|
118
118
|
alternative: str | None = None,
|
|
119
119
|
details: str | None = None,
|
|
120
120
|
) -> Callable[[F], F]: ...
|
|
121
|
-
def deprecated(
|
|
121
|
+
def deprecated( # noqa: C901
|
|
122
122
|
_target: F | None = None,
|
|
123
123
|
*,
|
|
124
124
|
since: str | None = None,
|
|
@@ -141,7 +141,7 @@ def deprecated(
|
|
|
141
141
|
def old_func(): ...
|
|
142
142
|
"""
|
|
143
143
|
|
|
144
|
-
def decorator(target: F) -> F:
|
|
144
|
+
def decorator(target: F) -> F: # noqa: C901
|
|
145
145
|
name = getattr(target, "__qualname__", getattr(target, "__name__", str(target)))
|
|
146
146
|
msg = _make_warning_message(
|
|
147
147
|
name,
|
|
@@ -282,7 +282,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
282
282
|
raise NotFittedError("No dataset bound. Call bind() first.")
|
|
283
283
|
yield from self.factor_data
|
|
284
284
|
|
|
285
|
-
def __getitem__(self, index: int | str | slice) -> Array:
|
|
285
|
+
def __getitem__(self, index: int | str | slice) -> Array: # noqa: C901
|
|
286
286
|
"""Get binned metadata for specific indices or factors.
|
|
287
287
|
|
|
288
288
|
Parameters
|
|
@@ -650,7 +650,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
650
650
|
-------
|
|
651
651
|
Sequence[str]
|
|
652
652
|
List of factor names that passed filtering and preprocessing steps.
|
|
653
|
-
Order matches columns in factor_data
|
|
653
|
+
Order matches columns in factor_data.
|
|
654
654
|
|
|
655
655
|
Notes
|
|
656
656
|
-----
|
|
@@ -934,7 +934,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
934
934
|
factor = factor[0] if isinstance(factor, tuple) else factor
|
|
935
935
|
return factor in self.include if self.include else factor not in self.exclude
|
|
936
936
|
|
|
937
|
-
def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
|
|
937
|
+
def _reset_bins(self, cols: Iterable[str] | None = None) -> None: # noqa: C901
|
|
938
938
|
if self._is_binned:
|
|
939
939
|
columns = self._dataframe.columns
|
|
940
940
|
for col in cols or columns:
|
|
@@ -1006,7 +1006,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
1006
1006
|
)
|
|
1007
1007
|
return target_rows
|
|
1008
1008
|
|
|
1009
|
-
def _get_target_factor_values(
|
|
1009
|
+
def _get_target_factor_values( # noqa: C901
|
|
1010
1010
|
self,
|
|
1011
1011
|
factor_name: str,
|
|
1012
1012
|
factor_values: Any,
|
|
@@ -1152,7 +1152,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
1152
1152
|
self._structure()
|
|
1153
1153
|
return bool(self._has_targets)
|
|
1154
1154
|
|
|
1155
|
-
def _process_targets(
|
|
1155
|
+
def _process_targets( # noqa: C901
|
|
1156
1156
|
self,
|
|
1157
1157
|
raw: list,
|
|
1158
1158
|
labels: list,
|
|
@@ -1284,7 +1284,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
1284
1284
|
existing = self._factors if hasattr(self, "_factors") else {}
|
|
1285
1285
|
self._factors = {k: existing.get(k) for k in usable_factors}
|
|
1286
1286
|
|
|
1287
|
-
def _structure(
|
|
1287
|
+
def _structure( # noqa: C901
|
|
1288
1288
|
self,
|
|
1289
1289
|
*,
|
|
1290
1290
|
progress_callback: ProgressCallback | None = None,
|
|
@@ -1478,7 +1478,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
1478
1478
|
df = self._add_column_with_padding(df, col_dg, ordinal.astype(np.int64), is_od)
|
|
1479
1479
|
return df, FactorInfo("discrete", is_digitized=True)
|
|
1480
1480
|
|
|
1481
|
-
def _bin(
|
|
1481
|
+
def _bin( # noqa: C901
|
|
1482
1482
|
self,
|
|
1483
1483
|
*,
|
|
1484
1484
|
progress_callback: ProgressCallback | None = None,
|
|
@@ -1523,7 +1523,7 @@ class Metadata(Array, FeatureExtractor):
|
|
|
1523
1523
|
self._factors.update(factor_info)
|
|
1524
1524
|
self._is_binned = True
|
|
1525
1525
|
|
|
1526
|
-
def add_factors(
|
|
1526
|
+
def add_factors( # noqa: C901
|
|
1527
1527
|
self,
|
|
1528
1528
|
factors: Mapping[str, Array1D[Any]],
|
|
1529
1529
|
level: Literal["image", "target", "auto"] = "auto",
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '1.0.6'
|
|
22
|
+
__version_tuple__ = version_tuple = (1, 0, 6)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
|
@@ -22,28 +22,30 @@ class BalanceOutput(DictOutput):
|
|
|
22
22
|
"""
|
|
23
23
|
Output class for the :class:`.Balance` :term:`bias<Bias>` evaluator.
|
|
24
24
|
|
|
25
|
-
Contains three polars DataFrames with mutual information scores and threshold flags.
|
|
25
|
+
Contains three polars DataFrames with normalized mutual information scores and threshold flags.
|
|
26
26
|
|
|
27
27
|
Attributes
|
|
28
28
|
----------
|
|
29
29
|
balance : pl.DataFrame
|
|
30
|
-
DataFrame with global class-to-factor mutual information:
|
|
30
|
+
DataFrame with global class-to-factor normalized mutual information:
|
|
31
31
|
|
|
32
|
-
- factor_name: str - Name of the metadata factor
|
|
33
|
-
|
|
32
|
+
- factor_name: str - Name of the metadata factor. Includes "class_label"
|
|
33
|
+
which represents the self-information (always 1.0).
|
|
34
|
+
- mi_value: float - Normalized mutual information value between this
|
|
35
|
+
factor and class labels
|
|
34
36
|
factors : pl.DataFrame
|
|
35
|
-
DataFrame with inter-factor mutual information correlations:
|
|
37
|
+
DataFrame with inter-factor normalized mutual information correlations:
|
|
36
38
|
|
|
37
39
|
- factor1: str - Name of the first factor
|
|
38
40
|
- factor2: str - Name of the second factor
|
|
39
|
-
- mi_value: float -
|
|
41
|
+
- mi_value: float - Normalized mutual information value
|
|
40
42
|
- is_correlated: bool - True if mi_value > factor_correlation_threshold
|
|
41
43
|
classwise : pl.DataFrame
|
|
42
|
-
DataFrame with per-class-to-factor mutual information:
|
|
44
|
+
DataFrame with per-class-to-factor normalized mutual information:
|
|
43
45
|
|
|
44
46
|
- class_name: str - Name of the class
|
|
45
47
|
- factor_name: str - Name of the metadata factor
|
|
46
|
-
- mi_value: float -
|
|
48
|
+
- mi_value: float - Normalized mutual information value
|
|
47
49
|
- is_imbalanced: bool - True if mi_value > class_imbalance_threshold
|
|
48
50
|
"""
|
|
49
51
|
|
|
@@ -58,21 +60,21 @@ class BalanceOutput(DictOutput):
|
|
|
58
60
|
|
|
59
61
|
class Balance(Evaluator):
|
|
60
62
|
"""
|
|
61
|
-
Computes mutual information (
|
|
63
|
+
Computes normalized mutual information (NMI) between factors (class label, metadata, label/image properties).
|
|
62
64
|
|
|
63
65
|
Identifies imbalanced classes and highly correlated metadata factors based on
|
|
64
|
-
|
|
66
|
+
NMI thresholds.
|
|
65
67
|
|
|
66
68
|
Parameters
|
|
67
69
|
----------
|
|
68
70
|
num_neighbors : int, default 5
|
|
69
71
|
Number of points to consider as neighbors
|
|
70
72
|
class_imbalance_threshold : float, default 0.3
|
|
71
|
-
Threshold for identifying imbalanced classes. Classes with
|
|
73
|
+
Threshold for identifying imbalanced classes. Classes with NMI above this
|
|
72
74
|
threshold with any metadata factor are considered imbalanced.
|
|
73
75
|
factor_correlation_threshold : float, default 0.5
|
|
74
76
|
Threshold for identifying highly correlated metadata factors. Factor pairs
|
|
75
|
-
with
|
|
77
|
+
with NMI above this threshold are considered highly correlated.
|
|
76
78
|
|
|
77
79
|
Attributes
|
|
78
80
|
----------
|
|
@@ -89,7 +91,8 @@ class Balance(Evaluator):
|
|
|
89
91
|
-----
|
|
90
92
|
We use `mutual_info_classif` from sklearn since class label is categorical.
|
|
91
93
|
`mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
|
|
92
|
-
seed. MI is computed differently for categorical and continuous variables
|
|
94
|
+
seed. MI is computed differently for categorical and continuous variables, and
|
|
95
|
+
in all cases normalized or transformed to [0, 1] prior to being returned.
|
|
93
96
|
|
|
94
97
|
Examples
|
|
95
98
|
--------
|
|
@@ -147,9 +150,9 @@ class Balance(Evaluator):
|
|
|
147
150
|
super().__init__(locals())
|
|
148
151
|
|
|
149
152
|
@set_metadata(state=["num_neighbors", "class_imbalance_threshold", "factor_correlation_threshold"])
|
|
150
|
-
def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> BalanceOutput:
|
|
153
|
+
def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> BalanceOutput: # noqa: C901
|
|
151
154
|
"""
|
|
152
|
-
Compute mutual information between factors and identify imbalanced classes.
|
|
155
|
+
Compute normalized mutual information between factors and identify imbalanced classes.
|
|
153
156
|
|
|
154
157
|
Parameters
|
|
155
158
|
----------
|
|
@@ -160,7 +163,7 @@ class Balance(Evaluator):
|
|
|
160
163
|
Returns
|
|
161
164
|
-------
|
|
162
165
|
BalanceOutput
|
|
163
|
-
Three DataFrames containing
|
|
166
|
+
Three DataFrames containing NMI scores and threshold flags:
|
|
164
167
|
|
|
165
168
|
- balance: Global class-to-factor mutual information
|
|
166
169
|
- factors: Inter-factor mutual information
|
|
@@ -168,7 +171,7 @@ class Balance(Evaluator):
|
|
|
168
171
|
|
|
169
172
|
Example
|
|
170
173
|
-------
|
|
171
|
-
Return balance (
|
|
174
|
+
Return balance (NMI) of factors with class_labels
|
|
172
175
|
|
|
173
176
|
>>> from dataeval import Metadata
|
|
174
177
|
>>> metadata = Metadata(dataset)
|
|
@@ -56,7 +56,7 @@ class Diversity(Evaluator):
|
|
|
56
56
|
Through standard histogram binning, for continuous variables.
|
|
57
57
|
|
|
58
58
|
The method specified defines diversity as the inverse Simpson diversity index linearly rescaled to
|
|
59
|
-
the unit interval, or the normalized form of the Shannon entropy.
|
|
59
|
+
the unit interval [0, 1], or the normalized form of the Shannon entropy.
|
|
60
60
|
|
|
61
61
|
diversity = 1 implies that samples are evenly distributed across a particular factor
|
|
62
62
|
diversity = 0 implies that all samples belong to one category/bin
|
|
@@ -66,7 +66,9 @@ class Diversity(Evaluator):
|
|
|
66
66
|
Parameters
|
|
67
67
|
----------
|
|
68
68
|
method : "simpson" or "shannon", default "simpson"
|
|
69
|
-
The methodology used for defining diversity
|
|
69
|
+
The methodology used for defining diversity. When "simpson" is used,
|
|
70
|
+
the index is linearly rescaled so that 1.0 represents maximum diversity
|
|
71
|
+
(even distribution) and 0.0 represents minimum diversity (all samples in one bin).
|
|
70
72
|
threshold : float, default 0.5
|
|
71
73
|
Threshold for identifying low diversity. Factors with diversity values
|
|
72
74
|
at or below this threshold are flagged as having low diversity.
|
|
@@ -135,7 +137,7 @@ class Diversity(Evaluator):
|
|
|
135
137
|
super().__init__(locals())
|
|
136
138
|
|
|
137
139
|
@set_metadata(state=["method", "threshold"])
|
|
138
|
-
def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> DiversityOutput:
|
|
140
|
+
def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> DiversityOutput: # noqa: C901
|
|
139
141
|
"""
|
|
140
142
|
Compute diversity and classwise diversity for the dataset.
|
|
141
143
|
|
|
@@ -118,8 +118,6 @@ class Parity(Evaluator):
|
|
|
118
118
|
|
|
119
119
|
>>> config = Parity.Config(score_threshold=0.4, p_value_threshold=0.01)
|
|
120
120
|
>>> parity = Parity(config=config)
|
|
121
|
-
|
|
122
|
-
output = parity(metadata.binned_data, metadata.class_labels.tolist())
|
|
123
121
|
"""
|
|
124
122
|
|
|
125
123
|
class Config(EvaluatorConfig):
|
|
@@ -23,7 +23,7 @@ from pydantic import BaseModel, ConfigDict, field_validator
|
|
|
23
23
|
|
|
24
24
|
from dataeval.protocols import DeviceLike
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
# GLOBAL CONFIG ###
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class GlobalConfig(BaseModel):
|
|
@@ -77,7 +77,7 @@ class GlobalConfig(BaseModel):
|
|
|
77
77
|
_config = GlobalConfig()
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
|
|
80
|
+
# CONTEXT MANAGER ###
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
class _ConfigContextManager:
|
|
@@ -96,7 +96,7 @@ class _ConfigContextManager:
|
|
|
96
96
|
setattr(_config, self._attr_name, self._old)
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
|
|
99
|
+
# FUNCS ###
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def _todevice(device: DeviceLike) -> torch.device:
|
|
@@ -78,6 +78,8 @@ def ber_mst(embeddings: ArrayND[float], class_labels: Array1D[int]) -> BERResult
|
|
|
78
78
|
"""
|
|
79
79
|
Estimate Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree.
|
|
80
80
|
|
|
81
|
+
BER bounds the irreducible classification error given the current feature
|
|
82
|
+
representation — the error attributable to class overlap in embedding space.
|
|
81
83
|
Uses FR with a minimum spanning tree (MST) test statistic basis.
|
|
82
84
|
|
|
83
85
|
Parameters
|
|
@@ -137,7 +139,13 @@ def ber_knn(embeddings: ArrayND[float], class_labels: Array1D[int], k: int) -> B
|
|
|
137
139
|
"""
|
|
138
140
|
Estimate Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using KNN.
|
|
139
141
|
|
|
140
|
-
|
|
142
|
+
BER bounds the irreducible classification error given the current feature
|
|
143
|
+
representation — the error attributable to class overlap in embedding space.
|
|
144
|
+
Uses KNN test statistic basis. The estimator's behavior depends on the value of k:
|
|
145
|
+
- k=1: Uses 1-NN for the lower bound and 2-NN for the upper bound.
|
|
146
|
+
- k=2: Uses 2-NN for the lower bound and 3-NN for the upper bound.
|
|
147
|
+
- 2<k<=5: Uses k-NN for the lower bound and (k+1)-NN for the upper bound.
|
|
148
|
+
- k>5: Only available for binary classification; uses k-NN for both bounds with specialized asymptotic weights.
|
|
141
149
|
|
|
142
150
|
Parameters
|
|
143
151
|
----------
|
|
@@ -146,7 +154,7 @@ def ber_knn(embeddings: ArrayND[float], class_labels: Array1D[int], k: int) -> B
|
|
|
146
154
|
class_labels : Array1D[int]
|
|
147
155
|
Array of class labels for each image. Can be a 1D list, or array-like object.
|
|
148
156
|
k : int
|
|
149
|
-
Number of nearest neighbors for KNN estimator
|
|
157
|
+
Number of nearest neighbors for KNN estimator. Should be between 1 and the number of samples.
|
|
150
158
|
|
|
151
159
|
Returns
|
|
152
160
|
-------
|
|
@@ -92,7 +92,7 @@ def bin_data(data: NDArray[Any], bin_method: str) -> NDArray[np.intp]:
|
|
|
92
92
|
return np.digitize(data, bin_edges)
|
|
93
93
|
|
|
94
94
|
|
|
95
|
-
def is_continuous(data: NDArray[np.number[Any]], image_indices: NDArray[np.number[Any]] | None = None) -> bool:
|
|
95
|
+
def is_continuous(data: NDArray[np.number[Any]], image_indices: NDArray[np.number[Any]] | None = None) -> bool: # noqa: C901
|
|
96
96
|
"""
|
|
97
97
|
Determine whether the data is continuous or discrete using the Wasserstein distance.
|
|
98
98
|
|
|
@@ -144,7 +144,7 @@ def is_continuous(data: NDArray[np.number[Any]], image_indices: NDArray[np.numbe
|
|
|
144
144
|
return bool(shift < DISCRETE_MIN_WD) # if NNN is close enough to uniform, consider the sample continuous.
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def _bin_by_clusters(data: NDArray[np.number[Any]]) -> NDArray[np.float64]:
|
|
147
|
+
def _bin_by_clusters(data: NDArray[np.number[Any]]) -> NDArray[np.float64]: # noqa: C901
|
|
148
148
|
"""
|
|
149
149
|
Bin continuous data by using the Clusterer to identify clusters.
|
|
150
150
|
|
|
@@ -99,7 +99,7 @@ class _Clusters:
|
|
|
99
99
|
prob: NDArray[np.float64] = exp / np.sum(exp)
|
|
100
100
|
return prob
|
|
101
101
|
|
|
102
|
-
def _sort_by_weights(self, embeddings: NDArray[np.float64]) -> NDArray[np.intp]:
|
|
102
|
+
def _sort_by_weights(self, embeddings: NDArray[np.float64]) -> NDArray[np.intp]: # noqa: C901
|
|
103
103
|
"""Sort samples using complexity-based weighted sampling."""
|
|
104
104
|
labels = self._get_labels(embeddings)
|
|
105
105
|
pr = self._complexity(embeddings)
|
|
@@ -241,7 +241,7 @@ class _HDBSCANSorter:
|
|
|
241
241
|
n_samples_per_cluster = np.bincount(labels)
|
|
242
242
|
_logger.debug(
|
|
243
243
|
"HDBSCAN clustering complete: %d clusters, samples per cluster: min=%d, max=%d, mean=%.1f",
|
|
244
|
-
clst.unique_clusters,
|
|
244
|
+
len(clst.unique_clusters),
|
|
245
245
|
np.min(n_samples_per_cluster),
|
|
246
246
|
np.max(n_samples_per_cluster),
|
|
247
247
|
np.mean(n_samples_per_cluster),
|
|
@@ -356,7 +356,7 @@ class _HDBSCAN:
|
|
|
356
356
|
self.cluster_selection_epsilon = 0.0
|
|
357
357
|
self.cluster_selection_method = "eom"
|
|
358
358
|
|
|
359
|
-
def fit(self, embeddings: NDArray[np.floating]) -> "_HDBSCAN":
|
|
359
|
+
def fit(self, embeddings: NDArray[np.floating]) -> "_HDBSCAN": # noqa: C901
|
|
360
360
|
"""
|
|
361
361
|
Find clusters based on hierarchical density-based clustering.
|
|
362
362
|
|
|
@@ -541,7 +541,7 @@ class ClusterStats(TypedDict):
|
|
|
541
541
|
nearest_cluster_idx: NDArray[np.int64]
|
|
542
542
|
|
|
543
543
|
|
|
544
|
-
def compute_cluster_stats(
|
|
544
|
+
def compute_cluster_stats( # noqa: C901
|
|
545
545
|
embeddings: NDArray[np.floating],
|
|
546
546
|
cluster_labels: _Clusters | NDArray[np.int64],
|
|
547
547
|
) -> ClusterStats:
|
|
@@ -642,7 +642,7 @@ def compute_cluster_stats(
|
|
|
642
642
|
)
|
|
643
643
|
|
|
644
644
|
|
|
645
|
-
def cluster(
|
|
645
|
+
def cluster( # noqa: C901
|
|
646
646
|
embeddings: ArrayND[float],
|
|
647
647
|
algorithm: Literal["kmeans", "hdbscan"] = "hdbscan",
|
|
648
648
|
n_clusters: int | None = None,
|
|
@@ -101,7 +101,7 @@ def _build_image_lookup(source_indices: Sequence[SourceIndex]) -> dict[tuple[int
|
|
|
101
101
|
return lookup
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
def _calculate_ratio_for_stat(
|
|
104
|
+
def _calculate_ratio_for_stat( # noqa: C901
|
|
105
105
|
stat_name: str,
|
|
106
106
|
box_value: Any,
|
|
107
107
|
img_value: Any,
|
|
@@ -160,7 +160,7 @@ def _calculate_ratio_for_stat(
|
|
|
160
160
|
return box_value
|
|
161
161
|
|
|
162
162
|
|
|
163
|
-
def _validate_separate_inputs(
|
|
163
|
+
def _validate_separate_inputs( # noqa: C901
|
|
164
164
|
stats_output: StatsResult,
|
|
165
165
|
box_stats_output: StatsResult,
|
|
166
166
|
) -> tuple[Sequence[SourceIndex], Sequence[SourceIndex]]:
|
|
@@ -241,7 +241,7 @@ def _validate_unified_input(source_indices: Sequence[SourceIndex]) -> None:
|
|
|
241
241
|
)
|
|
242
242
|
|
|
243
243
|
|
|
244
|
-
def compute_ratios(
|
|
244
|
+
def compute_ratios( # noqa: C901
|
|
245
245
|
stats_output: StatsResult,
|
|
246
246
|
*,
|
|
247
247
|
target_stats_output: StatsResult | None = None,
|
|
@@ -158,7 +158,7 @@ def _determine_channel_indices(calculator_output: list[dict[str, list[Any]]], nu
|
|
|
158
158
|
return sorted(channel_indices_needed, key=lambda x: -1 if x is None else x)
|
|
159
159
|
|
|
160
160
|
|
|
161
|
-
def _reconcile_stats(
|
|
161
|
+
def _reconcile_stats( # noqa: C901
|
|
162
162
|
calculator_output: list[dict[str, list[Any]]],
|
|
163
163
|
sorted_channels: list[int | None],
|
|
164
164
|
empty_values_map: dict[str, Any],
|
|
@@ -344,7 +344,7 @@ def _aggregate_batch(
|
|
|
344
344
|
_UNSET = object()
|
|
345
345
|
|
|
346
346
|
|
|
347
|
-
def compute_stats(
|
|
347
|
+
def compute_stats( # noqa: C901
|
|
348
348
|
data: Iterable[ArrayLike] | Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
|
|
349
349
|
*,
|
|
350
350
|
boxes: Iterable[Iterable[BoxLike] | None] | None = None,
|
|
@@ -544,7 +544,7 @@ def compute_stats(
|
|
|
544
544
|
)
|
|
545
545
|
|
|
546
546
|
|
|
547
|
-
def combine_stats_results(
|
|
547
|
+
def combine_stats_results( # noqa: C901
|
|
548
548
|
results: StatsResult | Sequence[StatsResult],
|
|
549
549
|
) -> tuple[StatsMap, list[SourceIndex], list[int]]:
|
|
550
550
|
"""Combine one or more StatsResults into unified stats, source_index, and dataset_steps.
|