eval-toolkit 1.0.4__tar.gz → 1.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/CHANGELOG.md +47 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/PKG-INFO +1 -1
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/_version.py +1 -1
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/public_api/snapshot.json +1 -1
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/.gitignore +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/LICENSE +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/STYLE.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/archive/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/research/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/research/datasets/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/research/papers/data-integrity/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/research/papers/eval-ecosystem/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/research/papers/inference/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/research/papers/prompt-injection/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/source/adr/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/docs/source/methodology/README.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/pyproject.toml +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/__init__.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/__main__.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/_deprecated.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/_parallel.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/_rng.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/_sweep.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/adversarial.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/analysis.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/artifacts.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/audit_citation_alignment.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/audit_sister_doc_concept_drift.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/audit_value_bindings.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/bootstrap.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/calibration.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/claims.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/config.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/docs.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/embeddings.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/evidence.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/harness.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/leakage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/loaders.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/losses.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/manifest.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/metric_specs.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/metrics.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/operating_points.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/paths.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/plotting.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/preprocessing.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/probes.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/protocols.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/provenance.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/py.typed +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/schemas/manifest.v1.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/schemas/manifest.v2.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/schemas/manifest.v3.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/schemas/ood_manifest.v1.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/schemas/results.v1.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/schemas/results_full.v1.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/scorecards.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/seeds.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/splits.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/stacking.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/text_dedup.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/thresholds.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_bootstrap_distribution.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_confusion_matrix_grid.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_lift_ci.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_metric_bars.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_pareto_frontier.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_pr_curve.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_reliability_diagram.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_roc_curve.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_score_histograms.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_slice_metric_heatmap.png +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/benchmarks/__init__.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/benchmarks/test_kernel_benchmarks.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/conftest.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/bootstrap_ci/cases.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/data/dedup_holdout.jsonl +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/data/dedup_holdout_expected.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/data/dedup_holdout_provenance.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/docs/expected.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/docs/input.md +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/docs/metrics.json +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/golden/test_dedup_holdout_calibration.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/strategies.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_adversarial.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_analysis.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_artifacts.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_audit_citation_alignment.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_audit_sister_doc_concept_drift.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_audit_value_bindings.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_block_bootstrap_on_folds.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_calibration_mc.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_edge_cases.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_golden.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_njobs.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_research_grounded.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_bootstrap_unit.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_binary_adapters.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_bootstrap_chain.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_determinism.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_optimization_failures.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_research_grounded.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_calibration_unit.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_claims.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_claims_coverage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_claims_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_cli.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_config.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_coverage_bootstrap.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_coverage_calibration.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_coverage_harness.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_coverage_metrics.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_coverage_plotting.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_croissant_e2e.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_dedup_split_leakage_chain.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_deprecated_scalars_shim.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_deprecations.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_docs_golden.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_docs_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_embeddings.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_evidence_validators.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_edge_cases.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_fault_injection.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_folded.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_internals.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_metric_options.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_parallelism.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_harness_smoke.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_import_boundaries.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_is_metric_defined_for_slice.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_lazy_extras_messages.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_leakage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_leakage_error_paths.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_leakage_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_loaders.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_loaders_coverage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_loaders_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_logging.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_losses.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_manifest.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_manifest_contamination_round_trip.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_manifest_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_manifest_validation.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_metrics_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_metrics_stratified_subsets.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_metrics_unit.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_misc_coverage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_numeric_edge_cases.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_ood_loader.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_operating_points.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_operating_points_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_parallel.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_paths.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_pipeline_e2e.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_plotting_edge.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_plotting_smoke.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_plotting_visual.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_preprocessing.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_probes.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_protocol_conformance.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_provenance.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_public_api.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_recall_at_fpr.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_reference_equivalence.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_reproducibility_integration.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_rng.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_schemas.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_scorecard.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_seeds.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_splits.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_splits_leakage_integration.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_splits_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_stacking.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_sweep.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_text_dedup.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_text_dedup_coverage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_text_dedup_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_text_dedup_strategies.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_thresholds.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_thresholds_constant_score.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_thresholds_coverage.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_thresholds_props.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_thresholds_research_grounded.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_tokenization_leakage_check.py +0 -0
- {eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/test_v09_contracts.py +0 -0
|
@@ -5,6 +5,53 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.5] — 2026-05-26 — publish workflow hardening (infrastructure-only)
|
|
9
|
+
|
|
10
|
+
Tier-3 / infrastructure-only release. **No library code or public API
|
|
11
|
+
changes.** Hardens the release pipeline against the failure mode
|
|
12
|
+
observed at v1.0.4, where a documented GitHub Actions CRITICAL
|
|
13
|
+
incident (codeload action download failure across the platform) left
|
|
14
|
+
the v1.0.4 wheel un-published on PyPI despite a successful tag and
|
|
15
|
+
GitHub release. The wheel for v1.0.5 is functionally identical to
|
|
16
|
+
v1.0.4; this release exists primarily as a dress rehearsal for the
|
|
17
|
+
new verification step.
|
|
18
|
+
|
|
19
|
+
### Added — `.github/workflows/publish.yml`
|
|
20
|
+
|
|
21
|
+
- **`workflow_dispatch:` trigger** — recovery path for failed
|
|
22
|
+
tag-triggered runs. Manually re-trigger via
|
|
23
|
+
`gh workflow run publish.yml --ref vX.Y.Z` or the Actions UI
|
|
24
|
+
"Run workflow" dropdown. Always uses the workflow file from main
|
|
25
|
+
HEAD, so workflow patches take effect immediately for recovery.
|
|
26
|
+
- **Post-publish `Verify PyPI receipt` step** — polls
|
|
27
|
+
`pypi.org/pypi/eval-toolkit/<version>/json` for HTTP 200 over a
|
|
28
|
+
6-minute window (12 × 30s backoff); fails loudly if the wheel
|
|
29
|
+
never lands. Catches silent half-releases where
|
|
30
|
+
`pypa/gh-action-pypi-publish` returns success but PyPI never
|
|
31
|
+
receives the wheel.
|
|
32
|
+
|
|
33
|
+
### Added — `docs/source/RELEASING.md`
|
|
34
|
+
|
|
35
|
+
- **"Tag-triggered publish failed; need to re-publish to PyPI"**
|
|
36
|
+
recovery recipe under Known gotchas. Documents both the
|
|
37
|
+
`gh run rerun` path (when the original run can be retried) and
|
|
38
|
+
the `workflow_dispatch` path (when the workflow has been patched
|
|
39
|
+
on main since the original tag). References the v1.0.4 incident
|
|
40
|
+
as the canonical example.
|
|
41
|
+
|
|
42
|
+
### Notes
|
|
43
|
+
|
|
44
|
+
- `setup-uv@v8.1.0` pin is intentionally unchanged. The v1.0.4
|
|
45
|
+
failure was a documented GitHub Actions/codeload incident, not
|
|
46
|
+
an action-specific issue; replacing setup-uv with a curl-install
|
|
47
|
+
would lose the cache layer + Python integration + version-from-
|
|
48
|
+
pyproject detection it provides, and would not have prevented the
|
|
49
|
+
observed failure (actions/checkout downloaded successfully in the
|
|
50
|
+
same failing run; codeload was the SPOF, not setup-uv).
|
|
51
|
+
- The other 5 workflows (ci/codeql/docs/nightly-benchmarks/
|
|
52
|
+
nightly-mc) are not patched because they self-heal on the next
|
|
53
|
+
push; the SPOF only matters for one-shot tag-triggered runs.
|
|
54
|
+
|
|
8
55
|
## [1.0.4] — 2026-05-26 — `audit_sister_doc_concept_drift` module (closes #72)
|
|
9
56
|
|
|
10
57
|
Tier-2 ADDITIVE — third (and final) member of the audit-validator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-toolkit
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Reusable evaluation contracts for binary classification: metrics, bootstrap CIs, calibration, artifacts, and evidence gates.
|
|
5
5
|
Project-URL: Homepage, https://github.com/brandon-behring/eval-toolkit
|
|
6
6
|
Project-URL: Documentation, https://brandon-behring.github.io/eval-toolkit/
|
|
@@ -1420,7 +1420,7 @@
|
|
|
1420
1420
|
"doc_first_line": "str(object='') -> str",
|
|
1421
1421
|
"kind": "value",
|
|
1422
1422
|
"type": "str",
|
|
1423
|
-
"value": "'1.0.
|
|
1423
|
+
"value": "'1.0.5'"
|
|
1424
1424
|
},
|
|
1425
1425
|
"apply_operating_points": {
|
|
1426
1426
|
"doc_first_line": "Apply fitted thresholds to a mixed-class or single-class target slice.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/src/eval_toolkit/audit_sister_doc_concept_drift.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_lift_ci.png
RENAMED
|
File without changes
|
{eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_metric_bars.png
RENAMED
|
File without changes
|
|
File without changes
|
{eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_pr_curve.png
RENAMED
|
File without changes
|
|
File without changes
|
{eval_toolkit-1.0.4 → eval_toolkit-1.0.5}/tests/baseline/test_plotting_visual/plot_roc_curve.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|