eval-toolkit 1.4.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/CHANGELOG.md +13 -0
  2. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/PKG-INFO +4 -1
  3. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/pyproject.toml +8 -0
  4. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/_version.py +1 -1
  5. eval_toolkit-1.5.0/src/eval_toolkit/eda/__init__.py +80 -0
  6. eval_toolkit-1.5.0/src/eval_toolkit/eda/data_audit.py +785 -0
  7. eval_toolkit-1.5.0/src/eval_toolkit/eda/obfuscation.py +622 -0
  8. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/loaders.py +46 -8
  9. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/public_api/snapshot.json +1 -1
  10. eval_toolkit-1.5.0/tests/test_eda.py +330 -0
  11. eval_toolkit-1.5.0/tests/test_eda_obfuscation.py +448 -0
  12. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_loaders.py +107 -0
  13. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/.gitignore +0 -0
  14. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/LICENSE +0 -0
  15. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/README.md +0 -0
  16. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/STYLE.md +0 -0
  17. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/archive/README.md +0 -0
  18. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/research/README.md +0 -0
  19. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/research/datasets/README.md +0 -0
  20. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/research/papers/data-integrity/README.md +0 -0
  21. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/research/papers/eval-ecosystem/README.md +0 -0
  22. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/research/papers/inference/README.md +0 -0
  23. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/research/papers/prompt-injection/README.md +0 -0
  24. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/source/adr/README.md +0 -0
  25. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/docs/source/methodology/README.md +0 -0
  26. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/__init__.py +0 -0
  27. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/__main__.py +0 -0
  28. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/_deprecated.py +0 -0
  29. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/_narrative.py +0 -0
  30. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/_parallel.py +0 -0
  31. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/_rng.py +0 -0
  32. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/_sweep.py +0 -0
  33. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/adversarial.py +0 -0
  34. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/analysis.py +0 -0
  35. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/artifacts.py +0 -0
  36. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/audit_citation_alignment.py +0 -0
  37. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/audit_sister_doc_concept_drift.py +0 -0
  38. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/audit_value_bindings.py +0 -0
  39. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/bootstrap.py +0 -0
  40. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/calibration.py +0 -0
  41. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/claims.py +0 -0
  42. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/config.py +0 -0
  43. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/docs.py +0 -0
  44. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/embeddings.py +0 -0
  45. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/evidence.py +0 -0
  46. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/harness.py +0 -0
  47. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/leakage.py +0 -0
  48. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/losses.py +0 -0
  49. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/manifest.py +0 -0
  50. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/metric_specs.py +0 -0
  51. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/metrics.py +0 -0
  52. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/operating_points.py +0 -0
  53. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/paths.py +0 -0
  54. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/plotting.py +0 -0
  55. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/preprocessing.py +0 -0
  56. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/probes.py +0 -0
  57. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/protocols.py +0 -0
  58. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/provenance.py +0 -0
  59. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/py.typed +0 -0
  60. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/schemas/manifest.v1.json +0 -0
  61. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/schemas/manifest.v2.json +0 -0
  62. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/schemas/manifest.v3.json +0 -0
  63. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/schemas/ood_manifest.v1.json +0 -0
  64. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/schemas/results.v1.json +0 -0
  65. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/schemas/results_full.v1.json +0 -0
  66. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/scorecards.py +0 -0
  67. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/seeds.py +0 -0
  68. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/splits.py +0 -0
  69. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/stacking.py +0 -0
  70. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/text_dedup.py +0 -0
  71. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/src/eval_toolkit/thresholds.py +0 -0
  72. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_bootstrap_distribution.png +0 -0
  73. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_confusion_matrix_grid.png +0 -0
  74. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_lift_ci.png +0 -0
  75. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_metric_bars.png +0 -0
  76. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_pareto_frontier.png +0 -0
  77. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_pr_curve.png +0 -0
  78. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_reliability_diagram.png +0 -0
  79. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_roc_curve.png +0 -0
  80. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_score_histograms.png +0 -0
  81. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/baseline/test_plotting_visual/plot_slice_metric_heatmap.png +0 -0
  82. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/benchmarks/__init__.py +0 -0
  83. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/benchmarks/test_kernel_benchmarks.py +0 -0
  84. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/conftest.py +0 -0
  85. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/bootstrap_ci/cases.json +0 -0
  86. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/data/dedup_holdout.jsonl +0 -0
  87. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/data/dedup_holdout_expected.json +0 -0
  88. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/data/dedup_holdout_provenance.md +0 -0
  89. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/docs/expected.md +0 -0
  90. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/docs/input.md +0 -0
  91. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/docs/metrics.json +0 -0
  92. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/golden/test_dedup_holdout_calibration.py +0 -0
  93. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/strategies.py +0 -0
  94. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_adversarial.py +0 -0
  95. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_analysis.py +0 -0
  96. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_artifacts.py +0 -0
  97. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_audit_citation_alignment.py +0 -0
  98. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_audit_sister_doc_concept_drift.py +0 -0
  99. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_audit_value_bindings.py +0 -0
  100. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_block_bootstrap_on_folds.py +0 -0
  101. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_calibration_mc.py +0 -0
  102. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_edge_cases.py +0 -0
  103. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_golden.py +0 -0
  104. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_njobs.py +0 -0
  105. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_props.py +0 -0
  106. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_research_grounded.py +0 -0
  107. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_bootstrap_unit.py +0 -0
  108. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_binary_adapters.py +0 -0
  109. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_bootstrap_chain.py +0 -0
  110. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_determinism.py +0 -0
  111. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_optimization_failures.py +0 -0
  112. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_props.py +0 -0
  113. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_research_grounded.py +0 -0
  114. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_calibration_unit.py +0 -0
  115. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_claims.py +0 -0
  116. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_claims_coverage.py +0 -0
  117. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_claims_props.py +0 -0
  118. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_cli.py +0 -0
  119. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_config.py +0 -0
  120. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_coverage_bootstrap.py +0 -0
  121. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_coverage_calibration.py +0 -0
  122. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_coverage_harness.py +0 -0
  123. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_coverage_metrics.py +0 -0
  124. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_coverage_plotting.py +0 -0
  125. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_croissant_e2e.py +0 -0
  126. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_dedup_split_leakage_chain.py +0 -0
  127. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_deprecated_scalars_shim.py +0 -0
  128. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_deprecations.py +0 -0
  129. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_docs_golden.py +0 -0
  130. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_docs_props.py +0 -0
  131. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_embeddings.py +0 -0
  132. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_evidence_validators.py +0 -0
  133. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_edge_cases.py +0 -0
  134. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_fault_injection.py +0 -0
  135. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_folded.py +0 -0
  136. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_internals.py +0 -0
  137. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_metric_options.py +0 -0
  138. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_parallelism.py +0 -0
  139. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_harness_smoke.py +0 -0
  140. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_import_boundaries.py +0 -0
  141. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_is_metric_defined_for_slice.py +0 -0
  142. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_lazy_extras_messages.py +0 -0
  143. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_leakage.py +0 -0
  144. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_leakage_error_paths.py +0 -0
  145. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_leakage_props.py +0 -0
  146. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_loaders_coverage.py +0 -0
  147. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_loaders_props.py +0 -0
  148. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_logging.py +0 -0
  149. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_losses.py +0 -0
  150. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_manifest.py +0 -0
  151. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_manifest_contamination_round_trip.py +0 -0
  152. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_manifest_props.py +0 -0
  153. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_manifest_validation.py +0 -0
  154. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_metrics_props.py +0 -0
  155. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_metrics_stratified_subsets.py +0 -0
  156. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_metrics_unit.py +0 -0
  157. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_misc_coverage.py +0 -0
  158. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_numeric_edge_cases.py +0 -0
  159. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_ood_loader.py +0 -0
  160. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_operating_points.py +0 -0
  161. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_operating_points_props.py +0 -0
  162. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_parallel.py +0 -0
  163. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_paths.py +0 -0
  164. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_pipeline_e2e.py +0 -0
  165. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_plotting_edge.py +0 -0
  166. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_plotting_smoke.py +0 -0
  167. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_plotting_visual.py +0 -0
  168. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_preprocessing.py +0 -0
  169. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_probes.py +0 -0
  170. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_protocol_conformance.py +0 -0
  171. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_provenance.py +0 -0
  172. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_public_api.py +0 -0
  173. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_recall_at_fpr.py +0 -0
  174. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_reference_equivalence.py +0 -0
  175. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_reproducibility_integration.py +0 -0
  176. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_rng.py +0 -0
  177. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_schemas.py +0 -0
  178. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_scorecard.py +0 -0
  179. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_seeds.py +0 -0
  180. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_splits.py +0 -0
  181. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_splits_leakage_integration.py +0 -0
  182. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_splits_props.py +0 -0
  183. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_stacking.py +0 -0
  184. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_sweep.py +0 -0
  185. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_text_dedup.py +0 -0
  186. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_text_dedup_coverage.py +0 -0
  187. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_text_dedup_props.py +0 -0
  188. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_text_dedup_strategies.py +0 -0
  189. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_thresholds.py +0 -0
  190. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_thresholds_constant_score.py +0 -0
  191. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_thresholds_coverage.py +0 -0
  192. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_thresholds_props.py +0 -0
  193. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_thresholds_research_grounded.py +0 -0
  194. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_tokenization_leakage_check.py +0 -0
  195. {eval_toolkit-1.4.0 → eval_toolkit-1.5.0}/tests/test_v09_contracts.py +0 -0
@@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.5.0] — 2026-05-29 — Tier-2 `eda` layer (#83) + schema-aware `HFDatasetsLoader` (#85)
9
+
10
+ Tier-2 / `loaders` ADDITIVE per [ADR 0003](docs/source/adr/0003-stability-contract-and-gate3-methodology.md) — backward-compatible.
11
+
12
+ - **`eda` Job-1 integrity gate (#83):** `audit_dataset` / `DataAudit` / `SplitSummary` + the
13
+ `class_balance` / `no_cross_split_leakage` / `context_window_fit` gates + the §B2 obfuscation
14
+ prevalence module.
15
+ - **schema-aware `HFDatasetsLoader` (#85):** load real-world dataset schemas without column
16
+ guessing — `feature_cols` + `feature_join` (join multiple columns into one feature; NaN-safe),
17
+ `label_map` (remap raw labels → int; fail-fast `ValueError` lists unmapped values), `revision`
18
+ (pin the HF dataset SHA). All new params default to the prior behavior; a missing feature/label
19
+ column raises `KeyError` listing the observed columns.
20
+
8
21
  ## [1.4.0] — 2026-05-26 — `audit_citation_alignment` Layer 2 + Layer 3 (closes #82); shared `_narrative` helpers (ADR 0007)
9
22
 
10
23
  Tier-1 ADDITIVE per [ADR 0003](docs/source/adr/0003-stability-contract-and-gate3-methodology.md).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-toolkit
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: Reusable evaluation contracts for binary classification: metrics, bootstrap CIs, calibration, artifacts, and evidence gates.
5
5
  Project-URL: Homepage, https://github.com/brandon-behring/eval-toolkit
6
6
  Project-URL: Documentation, https://brandon-behring.github.io/eval-toolkit/
@@ -60,6 +60,9 @@ Requires-Dist: sphinx-autodoc-typehints>=2.0; extra == 'docs'
60
60
  Requires-Dist: sphinx-copybutton>=0.5; extra == 'docs'
61
61
  Requires-Dist: sphinx-design>=0.6; extra == 'docs'
62
62
  Requires-Dist: sphinx>=7.3; extra == 'docs'
63
+ Provides-Extra: eda
64
+ Requires-Dist: matplotlib>=3.8; extra == 'eda'
65
+ Requires-Dist: pandas>=2.0; extra == 'eda'
63
66
  Provides-Extra: embeddings
64
67
  Requires-Dist: sentence-transformers>=3.0; extra == 'embeddings'
65
68
  Provides-Extra: losses
@@ -74,6 +74,14 @@ probes = ["torch>=2.0", "transformers>=4.40"]
74
74
  # (granular extras — losses callers should not have to install the larger
75
75
  # transformers stack). Shares the torch version pin with [probes].
76
76
  losses = ["torch>=2.0"]
77
+ # v1.5.0 (feat/eda-data-audit): eval_toolkit.eda Job-1 integrity-gate layer.
78
+ # Tier-2 surface (ADR 0003) — torch-free by design. pandas powers the
79
+ # DataFrameLoader reuse path; matplotlib is reserved for the EDA layer's
80
+ # future profiling plots. Intentionally NO sentence-transformers / torch:
81
+ # the near-dup / cross-split checks use the lexical TfidfCosineStrategy and
82
+ # token-length quantiles take a caller-supplied tokenizer (no transformers
83
+ # import in this module). NOT folded into [all] / [dev] — opt-in only.
84
+ eda = ["pandas>=2.0", "matplotlib>=3.8"]
77
85
  # NO-OP extra kept for backward compatibility (R3 at v0.49.0).
78
86
  #
79
87
  # jsonschema>=4.21 moved to base deps at v0.16.0; this extra has been a
@@ -2,4 +2,4 @@
2
2
 
3
3
  __all__ = ["__version__"]
4
4
 
5
- __version__ = "1.4.0"
5
+ __version__ = "1.5.0"
@@ -0,0 +1,80 @@
1
+ """``eval_toolkit.eda`` — EDA-first dataset integrity gating (Tier-2 surface).
2
+
3
+ This subpackage is the **Job-1 integrity gate** of an EDA-first research
4
+ program: thin, composable, torch-free per-split profiling + dataset-soundness
5
+ gates, built by reusing the v1.4.0 :mod:`eval_toolkit.leakage`,
6
+ :mod:`~eval_toolkit.text_dedup`, :mod:`~eval_toolkit.claims`, and
7
+ :mod:`~eval_toolkit.artifacts` primitives.
8
+
9
+ Stability tier
10
+ --------------
11
+ Public access is ``eval_toolkit.eda.*`` — **Tier-2** per ADR 0003. This layer
12
+ is intentionally evolvable and is **not** part of the v2.0-frozen top-level
13
+ :mod:`eval_toolkit` surface; nothing here is added to the package-root
14
+ ``_EXPORTS`` / ``__all__``. Import explicitly::
15
+
16
+ from eval_toolkit.eda import audit_dataset, DataAudit, SplitSummary
17
+
18
+ Scope (deliberately narrow)
19
+ --------------------------
20
+ Integrity gating only: row counts, class balance, text-length quantiles,
21
+ dedup / cross-split leakage. **No** embeddings, semantic similarity,
22
+ contamination scoring, or UMAP — those distribution-shift concerns are
23
+ deferred to a future ``distribution_shift`` module.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from eval_toolkit.eda.data_audit import (
29
+ DEFAULT_MAX_NEG_POS_RATIO,
30
+ DEFAULT_MIN_NEG_POS_RATIO,
31
+ DEFAULT_PCT_OVER_CONTEXT_THRESHOLD,
32
+ EDA_AUDIT_SCHEMA_VERSION,
33
+ DataAudit,
34
+ SplitSummary,
35
+ Tokenizer,
36
+ audit_dataset,
37
+ class_balance,
38
+ length_quantiles,
39
+ summarize_split,
40
+ )
41
+ from eval_toolkit.eda.obfuscation import (
42
+ BASE64_ENTROPY_THRESHOLD,
43
+ HEX_ENTROPY_THRESHOLD,
44
+ ObfuscationProfile,
45
+ analyze_obfuscation,
46
+ count_invisible_chars,
47
+ has_high_entropy_alnum_run,
48
+ has_rot13_marker,
49
+ is_leeted_token,
50
+ leetspeak_counts,
51
+ nfkc_changed,
52
+ nfkc_char_delta,
53
+ shannon_entropy,
54
+ )
55
+
56
+ __all__ = [
57
+ "BASE64_ENTROPY_THRESHOLD",
58
+ "DEFAULT_MAX_NEG_POS_RATIO",
59
+ "DEFAULT_MIN_NEG_POS_RATIO",
60
+ "DEFAULT_PCT_OVER_CONTEXT_THRESHOLD",
61
+ "EDA_AUDIT_SCHEMA_VERSION",
62
+ "HEX_ENTROPY_THRESHOLD",
63
+ "DataAudit",
64
+ "ObfuscationProfile",
65
+ "SplitSummary",
66
+ "Tokenizer",
67
+ "analyze_obfuscation",
68
+ "audit_dataset",
69
+ "class_balance",
70
+ "count_invisible_chars",
71
+ "has_high_entropy_alnum_run",
72
+ "has_rot13_marker",
73
+ "is_leeted_token",
74
+ "leetspeak_counts",
75
+ "length_quantiles",
76
+ "nfkc_char_delta",
77
+ "nfkc_changed",
78
+ "shannon_entropy",
79
+ "summarize_split",
80
+ ]