honestml 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. honestml-1.0.0/.gitignore +45 -0
  2. honestml-1.0.0/CHANGELOG.md +79 -0
  3. honestml-1.0.0/LICENSE +21 -0
  4. honestml-1.0.0/PKG-INFO +190 -0
  5. honestml-1.0.0/README.md +105 -0
  6. honestml-1.0.0/pyproject.toml +145 -0
  7. honestml-1.0.0/src/honestml/__init__.py +124 -0
  8. honestml-1.0.0/src/honestml/adapters/__init__.py +157 -0
  9. honestml-1.0.0/src/honestml/adapters/boosting.py +474 -0
  10. honestml-1.0.0/src/honestml/adapters/calibration.py +93 -0
  11. honestml-1.0.0/src/honestml/adapters/candidate_cache.py +97 -0
  12. honestml-1.0.0/src/honestml/adapters/dtype_tokens.py +54 -0
  13. honestml-1.0.0/src/honestml/adapters/ensembling.py +211 -0
  14. honestml-1.0.0/src/honestml/adapters/estimators.py +217 -0
  15. honestml-1.0.0/src/honestml/adapters/feature_rankers.py +321 -0
  16. honestml-1.0.0/src/honestml/adapters/feature_selectors.py +72 -0
  17. honestml-1.0.0/src/honestml/adapters/loader.py +60 -0
  18. honestml-1.0.0/src/honestml/adapters/metrics.py +291 -0
  19. honestml-1.0.0/src/honestml/adapters/onnx_export.py +153 -0
  20. honestml-1.0.0/src/honestml/adapters/polars_dataset.py +172 -0
  21. honestml-1.0.0/src/honestml/adapters/reader.py +791 -0
  22. honestml-1.0.0/src/honestml/adapters/run_budget.py +129 -0
  23. honestml-1.0.0/src/honestml/adapters/serializers.py +228 -0
  24. honestml-1.0.0/src/honestml/adapters/significance.py +183 -0
  25. honestml-1.0.0/src/honestml/adapters/splitters.py +712 -0
  26. honestml-1.0.0/src/honestml/adapters/tracking.py +211 -0
  27. honestml-1.0.0/src/honestml/adapters/tuning.py +79 -0
  28. honestml-1.0.0/src/honestml/application/__init__.py +125 -0
  29. honestml-1.0.0/src/honestml/application/calibration.py +143 -0
  30. honestml-1.0.0/src/honestml/application/ensemble.py +249 -0
  31. honestml-1.0.0/src/honestml/application/feature_compare.py +1144 -0
  32. honestml-1.0.0/src/honestml/application/feature_encoding.py +109 -0
  33. honestml-1.0.0/src/honestml/application/feature_selection.py +197 -0
  34. honestml-1.0.0/src/honestml/application/oof_scorer.py +219 -0
  35. honestml-1.0.0/src/honestml/application/projection.py +87 -0
  36. honestml-1.0.0/src/honestml/application/run_report.py +379 -0
  37. honestml-1.0.0/src/honestml/application/slice.py +1059 -0
  38. honestml-1.0.0/src/honestml/application/tuning.py +174 -0
  39. honestml-1.0.0/src/honestml/composition/__init__.py +60 -0
  40. honestml-1.0.0/src/honestml/composition/artifact.py +524 -0
  41. honestml-1.0.0/src/honestml/composition/build.py +947 -0
  42. honestml-1.0.0/src/honestml/composition/facade.py +1136 -0
  43. honestml-1.0.0/src/honestml/composition/onnx_bundle.py +211 -0
  44. honestml-1.0.0/src/honestml/composition/presets.py +117 -0
  45. honestml-1.0.0/src/honestml/composition/registry.py +244 -0
  46. honestml-1.0.0/src/honestml/composition/run_report.py +407 -0
  47. honestml-1.0.0/src/honestml/core/__init__.py +171 -0
  48. honestml-1.0.0/src/honestml/core/config.py +373 -0
  49. honestml-1.0.0/src/honestml/core/context.py +52 -0
  50. honestml-1.0.0/src/honestml/core/dataset.py +93 -0
  51. honestml-1.0.0/src/honestml/core/exceptions.py +149 -0
  52. honestml-1.0.0/src/honestml/core/logging.py +37 -0
  53. honestml-1.0.0/src/honestml/core/ports/__init__.py +83 -0
  54. honestml-1.0.0/src/honestml/core/ports/budget.py +38 -0
  55. honestml-1.0.0/src/honestml/core/ports/cache.py +33 -0
  56. honestml-1.0.0/src/honestml/core/ports/calibration.py +34 -0
  57. honestml-1.0.0/src/honestml/core/ports/ensembler.py +79 -0
  58. honestml-1.0.0/src/honestml/core/ports/estimator.py +106 -0
  59. honestml-1.0.0/src/honestml/core/ports/feature_ranker.py +55 -0
  60. honestml-1.0.0/src/honestml/core/ports/feature_subset_selector.py +53 -0
  61. honestml-1.0.0/src/honestml/core/ports/metric.py +46 -0
  62. honestml-1.0.0/src/honestml/core/ports/model_serializer.py +42 -0
  63. honestml-1.0.0/src/honestml/core/ports/model_spec.py +52 -0
  64. honestml-1.0.0/src/honestml/core/ports/significance.py +61 -0
  65. honestml-1.0.0/src/honestml/core/ports/splitter.py +112 -0
  66. honestml-1.0.0/src/honestml/core/ports/tracker.py +29 -0
  67. honestml-1.0.0/src/honestml/core/ports/tuner.py +131 -0
  68. honestml-1.0.0/src/honestml/core/schema.py +357 -0
  69. honestml-1.0.0/src/honestml/core/selection_policy.py +209 -0
  70. honestml-1.0.0/src/honestml/core/task.py +83 -0
  71. honestml-1.0.0/src/honestml/py.typed +0 -0
@@ -0,0 +1,45 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ build/
6
+ dist/
7
+ .eggs/
8
+
9
+ # Tooling caches
10
+ .ruff_cache/
11
+ .mypy_cache/
12
+ .pytest_cache/
13
+ .hypothesis/
14
+ .import_linter_cache/
15
+
16
+ # Docs build
17
+ site/
18
+ # docs/automl-productionization/
19
+
20
+ # Environments
21
+ .venv/
22
+ venv/
23
+ .env
24
+
25
+ # IDE / OS
26
+ .idea/
27
+ .vscode/
28
+ .DS_Store
29
+ .CLAUDE/
30
+
31
+ # local working files, not for publication
32
+ _legacy/
33
+ catboost_info/
34
+ benchmarks/results.json
35
+ benchmarks/native_cat_gate_results.json
36
+
37
+ # showcase notebook inputs/outputs (large Kaggle datasets, generated submissions)
38
+ notebooks/data/
39
+ notebooks/results/
40
+ notebooks/.ipynb_checkpoints/
41
+
42
+ # internal design/architecture packages (kept local, not published)
43
+ docs/architecture/
44
+ docs/audit/
45
+ docs/implementation/
@@ -0,0 +1,79 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/) and the project adheres to
5
+ [Semantic Versioning](https://semver.org/) (see `docs/versioning-policy.md`).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [1.0.0] - 2026-06-24
10
+
11
+ First public release: a tabular AutoML library for binary/multiclass classification and
12
+ regression, built around honest model selection — the score you see is the score you can
13
+ expect in production.
14
+
15
+ The honesty-benchmark baseline (`benchmarks/baseline.json`) is bootstrapped for this release;
16
+ subsequent releases gate against it for no regress (see `docs/releasing.md`).
17
+
18
+ ### Added
19
+ - **Task and data input contract:** sklearn-compatible `AutoML` facade
20
+ (`fit`/`predict`/`predict_proba`/`score`, works in `Pipeline`) over pandas/polars/numpy
21
+ inputs, with typed schema inference, stable train↔inference categorical encoding,
22
+ boundary validation, and row metadata via `fit(..., sample_weight=, groups=, time=,
23
+ label_time=)`.
24
+ - **CV schemes:** stratified / kfold / group / holdout / timeseries (purge+embargo, value-based
25
+ time ordering) / timeseries_period (calendar or Δt periods with wall-clock purge/embargo,
26
+ optional per-period weighting and a rolling max-train window) (`CVConfig(scheme=...)`; `"auto"`
27
+ picks the task default); every fold passes an anti-leakage validation.
28
+ - **Honest selection:** out-of-fold scoring on a shared CV split with a seeded
29
+ paired-bootstrap **equivalence band** (`significance="bootstrap"`, default) — the
30
+ simplest model statistically indistinguishable from the best wins and ties are
31
+ disclosed, never hidden; opt-in probability calibration (`CVConfig(calibrate=...)`)
32
+ gated by cross-fitted improvement.
33
+ - **Outer holdout + finalize:** `CVConfig(outer_holdout=...)` carves a scheme-aware,
34
+ untouched holdout scored exactly once for an unbiased final estimate; `finalize=True`
35
+ then refits the shipped model on all data after scoring.
36
+ - **Presets:** `AutoML(preset="fast" | "balanced" | "best")` — declarative partial configs
37
+ that fill only unset parameters; an explicit argument always wins, and honesty settings
38
+ are not presettable.
39
+ - **Budget + resume/cache:** `budget=<seconds>` or `BudgetConfig(...)` (time / trial /
40
+ memory limits) with graceful degradation to the best model so far; `cache="dir/"`
41
+ reuses per-candidate results keyed by a deterministic run fingerprint and resumes
42
+ interrupted runs.
43
+ - **Feature engineering + selection:** opt-in `FEConfig` (leakage-controlled out-of-fold
44
+ target encoding, frequency encoding, datetime deltas, categorical intersections) and
45
+ `FeatureSelectionConfig` (importance / random-probe / null-importance / sequential /
46
+ SHAP strategies with honest multi-strategy arbitration on a holdout or nested CV). The
47
+ `sequential` wrapper chooses its feature count honestly: it explores the full backward
48
+ trajectory and the **fewest features statistically indistinguishable from the best**
49
+ (significance band + Occam tie-break, default `significance="bootstrap"`) win, instead of
50
+ the raw out-of-fold argmax; `significance="off"` reproduces the plain argmax. The band is
51
+ scored on the selection folds and is strictly more conservative than argmax (residual
52
+ optimism documented; independent-OOF scoring is a future improvement).
53
+ - **Hyperparameter optimization:** `hpo=HPOConfig(...)` — seeded Optuna search over
54
+ per-model spaces on an inner CV of the dev data; tuned candidates then compete in the
55
+ regular honest selection, sharing the run's time budget.
56
+ - **Ensembling:** `ensemble=EnsembleConfig(...)` — greedy (Caruana) or weighted blend over
57
+ the out-of-fold predictions, shipped only if significantly better than the best single
58
+ model; the gate decision is always reported.
59
+ - **Run report + rendering:** versioned, tracker-independent JSON `run_report_` with full
60
+ provenance (resolved config, run fingerprint, leaderboard, equivalence band, timings,
61
+ budget/FS/HPO/ensemble outcomes); `save_run_report` plus `render_report` to markdown or
62
+ self-contained HTML (charts via the `report` extra).
63
+ - **Experiment tracking:** opt-in `tracker="mlflow"` / `TrackerConfig(...)` logs the run
64
+ report after fit — fail-soft and free of global MLflow state; custom backends plug in
65
+ via the `ExperimentTracker` port.
66
+ - **Artifacts + serving:** `save_artifact` / `load_artifact` — a self-contained, versioned
67
+ artifact directory with a sha256 integrity manifest; native boosting bodies
68
+ (`model_format="native"`, no pickle); the slim `honestml[inference]` extra serves
69
+ `load_artifact(...).predict(...)` without importing the training stack.
70
+ - **ONNX export:** `export_onnx(model, directory, sample=...)` — a parity-gated,
71
+ export-only bundle (linear and boosting models) for external runtimes.
72
+ - **Models and plugins:** lightweight built-in zoo (baseline, linear) plus
73
+ catboost/lightgbm/xgboost via the `boosting` extra; third-party estimators via
74
+ `honestml.models` entry points (see `docs/plugin-contract.md`), discovered lazily with
75
+ fail-fast name-conflict detection.
76
+ - **Logging, exceptions, typing:** silent-by-default `honestml` logger (`NullHandler`);
77
+ one exception taxonomy rooted at `AutoMLError`; fully typed (`py.typed`);
78
+ `import honestml` stays lightweight — optional extras load lazily and a missing one
79
+ fails fast with the exact `pip install honestml[...]` hint.
honestml-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 honestml contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,190 @@
1
+ Metadata-Version: 2.4
2
+ Name: honestml
3
+ Version: 1.0.0
4
+ Summary: General tabular AutoML library (classification + regression), production-grade.
5
+ Project-URL: Homepage, https://github.com/sukhov-is/HonestML
6
+ Project-URL: Documentation, https://sukhov-is.github.io/HonestML/
7
+ Project-URL: Repository, https://github.com/sukhov-is/HonestML
8
+ Author: honestml contributors
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: automl,boosting,feature-selection,honestml,machine-learning,tabular
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Typing :: Typed
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: numpy>=1.24
21
+ Requires-Dist: pandas>=2.0
22
+ Requires-Dist: polars>=1.0
23
+ Requires-Dist: pydantic>=2
24
+ Requires-Dist: scikit-learn>=1.3
25
+ Provides-Extra: all
26
+ Requires-Dist: catboost>=1.2; extra == 'all'
27
+ Requires-Dist: lightgbm>=4.0; extra == 'all'
28
+ Requires-Dist: matplotlib>=3.7; extra == 'all'
29
+ Requires-Dist: mlflow>=2.9; extra == 'all'
30
+ Requires-Dist: onnx>=1.21; extra == 'all'
31
+ Requires-Dist: onnxmltools>=1.16; extra == 'all'
32
+ Requires-Dist: onnxruntime>=1.23; extra == 'all'
33
+ Requires-Dist: optuna>=3.4; extra == 'all'
34
+ Requires-Dist: psutil>=5.9; extra == 'all'
35
+ Requires-Dist: pyarrow>=14.0; extra == 'all'
36
+ Requires-Dist: shap>=0.44; extra == 'all'
37
+ Requires-Dist: skl2onnx>=1.20; extra == 'all'
38
+ Requires-Dist: xgboost>=2.0; extra == 'all'
39
+ Provides-Extra: boosting
40
+ Requires-Dist: catboost>=1.2; extra == 'boosting'
41
+ Requires-Dist: lightgbm>=4.0; extra == 'boosting'
42
+ Requires-Dist: xgboost>=2.0; extra == 'boosting'
43
+ Provides-Extra: catboost
44
+ Requires-Dist: catboost>=1.2; extra == 'catboost'
45
+ Provides-Extra: dev
46
+ Requires-Dist: hypothesis>=6.90; extra == 'dev'
47
+ Requires-Dist: import-linter>=2.0; extra == 'dev'
48
+ Requires-Dist: mkdocs-material>=9.5; extra == 'dev'
49
+ Requires-Dist: mkdocstrings[python]>=0.27; extra == 'dev'
50
+ Requires-Dist: mypy>=1.8; extra == 'dev'
51
+ Requires-Dist: optuna>=3.4; extra == 'dev'
52
+ Requires-Dist: pre-commit>=3.5; extra == 'dev'
53
+ Requires-Dist: pytest>=7.4; extra == 'dev'
54
+ Requires-Dist: pyyaml>=6; extra == 'dev'
55
+ Requires-Dist: ruff==0.15.16; extra == 'dev'
56
+ Provides-Extra: inference
57
+ Requires-Dist: joblib>=1.3; extra == 'inference'
58
+ Requires-Dist: numpy>=1.24; extra == 'inference'
59
+ Requires-Dist: pandas>=2.0; extra == 'inference'
60
+ Requires-Dist: polars>=1.0; extra == 'inference'
61
+ Requires-Dist: pydantic>=2; extra == 'inference'
62
+ Requires-Dist: scikit-learn>=1.3; extra == 'inference'
63
+ Provides-Extra: lightgbm
64
+ Requires-Dist: lightgbm>=4.0; extra == 'lightgbm'
65
+ Provides-Extra: memory
66
+ Requires-Dist: psutil>=5.9; extra == 'memory'
67
+ Provides-Extra: mlflow
68
+ Requires-Dist: mlflow>=2.9; extra == 'mlflow'
69
+ Provides-Extra: onnx
70
+ Requires-Dist: onnx>=1.21; extra == 'onnx'
71
+ Requires-Dist: onnxmltools>=1.16; extra == 'onnx'
72
+ Requires-Dist: onnxruntime>=1.23; extra == 'onnx'
73
+ Requires-Dist: skl2onnx>=1.20; extra == 'onnx'
74
+ Provides-Extra: optuna
75
+ Requires-Dist: optuna>=3.4; extra == 'optuna'
76
+ Provides-Extra: pyarrow
77
+ Requires-Dist: pyarrow>=14.0; extra == 'pyarrow'
78
+ Provides-Extra: report
79
+ Requires-Dist: matplotlib>=3.7; extra == 'report'
80
+ Provides-Extra: shap
81
+ Requires-Dist: shap>=0.44; extra == 'shap'
82
+ Provides-Extra: xgboost
83
+ Requires-Dist: xgboost>=2.0; extra == 'xgboost'
84
+ Description-Content-Type: text/markdown
85
+
86
+ # honestml
87
+
88
+ [![PyPI](https://img.shields.io/pypi/v/honestml.svg)](https://pypi.org/project/honestml/)
89
+ [![CI](https://github.com/sukhov-is/HonestML/actions/workflows/ci.yml/badge.svg)](https://github.com/sukhov-is/HonestML/actions/workflows/ci.yml)
90
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://pypi.org/project/honestml/)
91
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
92
+
93
+ **Tabular AutoML where the leaderboard doesn't lie.** Most AutoML frameworks ship the
94
+ model with the best validation score — but that number is optimistic, because you
95
+ selected for it. honestml is built so that the score you see is the score you can
96
+ expect in production.
97
+
98
+ It covers binary / multiclass classification and regression behind a clean, extensible
99
+ core. The honesty is in *how it selects*: out-of-fold scoring on a shared CV split; a
100
+ bootstrap **equivalence band** that, among the statistically indistinguishable best
101
+ candidates, ships the simplest one; leakage-controlled feature engineering and
102
+ selection; an optional untouched outer holdout scored exactly once; and reproducible,
103
+ fingerprinted runs.
104
+
105
+ ```python
106
+ from honestml import AutoML
107
+
108
+ model = AutoML(task="binary").fit(X, y)
109
+ proba = model.predict_proba(X_new)
110
+ print(model.best_model_id_, model.leaderboard_)
111
+ ```
112
+
113
+ The library is silent by default (a `NullHandler` on the `honestml` logger); enable
114
+ progress with `logging.getLogger("honestml").setLevel(logging.INFO)` plus
115
+ `logging.basicConfig()`.
116
+
117
+ ## Install
118
+
119
+ ```bash
120
+ pip install honestml # lightweight core (baseline/linear models)
121
+ pip install "honestml[boosting]" # core + catboost, lightgbm, xgboost
122
+ pip install "honestml[all]" # boosting + optuna (HPO), mlflow (tracking), onnx, shap, report and the rest
123
+ pip install "honestml[inference]" # slim serving runtime (load_artifact + predict only)
124
+ ```
125
+
126
+ Requires Python >= 3.10. Heavy dependencies are optional extras and imported
127
+ lazily — `import honestml` stays light, and a missing extra fails fast with the
128
+ exact `pip install honestml[...]` hint.
129
+
130
+ ## What you get
131
+
132
+ | Capability | How |
133
+ |---|---|
134
+ | Honest model selection | OOF scoring on a shared CV split; a seeded bootstrap **equivalence band** (`significance="bootstrap"`, the default) collects candidates statistically indistinguishable from the best, and the simplest member of the band wins — ties are disclosed, not hidden |
135
+ | CV schemes | stratified / kfold / group / holdout / **timeseries** (purge+embargo, value-based time order) / **timeseries_period** (calendar or Δt period folds, wall-clock gaps, optional per-period weighting, rolling train window) — `fit(..., time=, label_time=, groups=)` |
136
+ | Outer holdout + finalize | `cv=CVConfig(outer_holdout=0.2)`: selection sees only DEV, the holdout is scored once; the shipped model is refit on all data after scoring (`finalize=True`) |
137
+ | Presets | `AutoML(preset="fast" / "balanced" / "best")` — declarative, data-driven partial configs; an explicit argument always wins; honesty parameters are not presettable |
138
+ | Budget + resume | `budget=600` (seconds) or `BudgetConfig(...)` with graceful degradation; `cache="runs/"` resumes by run fingerprint |
139
+ | Feature engineering / selection | OOF-honest target (binary-only) / frequency encoding, datetime deltas, intersections; importance / null-importance / random-probe / sequential / SHAP selection with honest arbitration |
140
+ | HPO + ensembling | `hpo=HPOConfig(...)` (Optuna, per-model search before the honest selection); `ensemble=EnsembleConfig()` — a Caruana/weighted blend ships **only if significantly better** |
141
+ | Run report | `model.run_report_` (versioned JSON, tracker-independent); `save_run_report` and `render_report` produce markdown or self-contained HTML (charts via the `report` extra) |
142
+ | Experiment tracking | `tracker="mlflow"` or `TrackerConfig(...)` — post-fit, fail-soft, no global mlflow state; custom backends via the `ExperimentTracker` port |
143
+ | Artifacts + serving | `save_artifact` / `load_artifact` — versioned, integrity-checked artifact directory (see Standalone inference below) |
144
+ | ONNX export | `honestml.export_onnx(model, dir, sample=...)` — parity-gated, export-only bundle for external runtimes |
145
+ | Plugins | third-party models via `honestml.models` entry points (`docs/plugin-contract.md`) |
146
+
147
+ ## Standalone inference
148
+
149
+ ```python
150
+ from honestml import load_artifact
151
+
152
+ model = load_artifact("artifact_dir/") # integrity-checked against the sha256 manifest
153
+ predictions = model.predict(X_new)
154
+ ```
155
+
156
+ The artifact directory is self-contained — manifest, preprocessing schema and the
157
+ model body — and loads under the slim `honestml[inference]` install: no training
158
+ stack is imported. Boosting models can be saved with structural native bodies
159
+ (`model_format="native"`). **Trust model:** the default body is joblib/pickle —
160
+ load only artifacts you trust; native bodies contain no pickle (a non-boosting
161
+ estimator and the optional calibrator still ship as joblib).
162
+
163
+ ## Reproducibility
164
+
165
+ Every run computes a **fingerprint** over the resolved config, data signature,
166
+ estimator set and library versions; the run report carries it together with the
167
+ full provenance (leaderboard, band, budget outcome, FS/HPO/ensemble decisions,
168
+ timings). Same inputs → same selection.
169
+
170
+ ## Documentation
171
+
172
+ Documentation lives in `docs/` — quickstart, API reference, correctness guide and
173
+ the plugin contract; build it locally with `mkdocs serve`. Source and issue
174
+ tracker: <https://github.com/sukhov-is/HonestML>.
175
+
176
+ ## Development
177
+
178
+ ```bash
179
+ uv sync --extra dev --extra boosting --extra shap --extra pyarrow --extra mlflow
180
+ uv run pytest # full suite (onnx export tests also need `--extra onnx`, Python >=3.11)
181
+ uv run ruff check src tests; uv run mypy src/honestml; uv run lint-imports
182
+ ```
183
+
184
+ The layered architecture (core ← adapters ← application ← composition) is enforced
185
+ by import-linter. See `docs/releasing.md` for the release pipeline and
186
+ `benchmarks/` for the honesty benchmark suite.
187
+
188
+ ## License
189
+
190
+ MIT (see `LICENSE`).
@@ -0,0 +1,105 @@
1
+ # honestml
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/honestml.svg)](https://pypi.org/project/honestml/)
4
+ [![CI](https://github.com/sukhov-is/HonestML/actions/workflows/ci.yml/badge.svg)](https://github.com/sukhov-is/HonestML/actions/workflows/ci.yml)
5
+ [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://pypi.org/project/honestml/)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
7
+
8
+ **Tabular AutoML where the leaderboard doesn't lie.** Most AutoML frameworks ship the
9
+ model with the best validation score — but that number is optimistic, because you
10
+ selected for it. honestml is built so that the score you see is the score you can
11
+ expect in production.
12
+
13
+ It covers binary / multiclass classification and regression behind a clean, extensible
14
+ core. The honesty is in *how it selects*: out-of-fold scoring on a shared CV split; a
15
+ bootstrap **equivalence band** that, among the statistically indistinguishable best
16
+ candidates, ships the simplest one; leakage-controlled feature engineering and
17
+ selection; an optional untouched outer holdout scored exactly once; and reproducible,
18
+ fingerprinted runs.
19
+
20
+ ```python
21
+ from honestml import AutoML
22
+
23
+ model = AutoML(task="binary").fit(X, y)
24
+ proba = model.predict_proba(X_new)
25
+ print(model.best_model_id_, model.leaderboard_)
26
+ ```
27
+
28
+ The library is silent by default (a `NullHandler` on the `honestml` logger); enable
29
+ progress with `logging.getLogger("honestml").setLevel(logging.INFO)` plus
30
+ `logging.basicConfig()`.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install honestml # lightweight core (baseline/linear models)
36
+ pip install "honestml[boosting]" # core + catboost, lightgbm, xgboost
37
+ pip install "honestml[all]" # boosting + optuna (HPO), mlflow (tracking), onnx, shap, report and the rest
38
+ pip install "honestml[inference]" # slim serving runtime (load_artifact + predict only)
39
+ ```
40
+
41
+ Requires Python >= 3.10. Heavy dependencies are optional extras and imported
42
+ lazily — `import honestml` stays light, and a missing extra fails fast with the
43
+ exact `pip install honestml[...]` hint.
44
+
45
+ ## What you get
46
+
47
+ | Capability | How |
48
+ |---|---|
49
+ | Honest model selection | OOF scoring on a shared CV split; a seeded bootstrap **equivalence band** (`significance="bootstrap"`, the default) collects candidates statistically indistinguishable from the best, and the simplest member of the band wins — ties are disclosed, not hidden |
50
+ | CV schemes | stratified / kfold / group / holdout / **timeseries** (purge+embargo, value-based time order) / **timeseries_period** (calendar or Δt period folds, wall-clock gaps, optional per-period weighting, rolling train window) — `fit(..., time=, label_time=, groups=)` |
51
+ | Outer holdout + finalize | `cv=CVConfig(outer_holdout=0.2)`: selection sees only DEV, the holdout is scored once; the shipped model is refit on all data after scoring (`finalize=True`) |
52
+ | Presets | `AutoML(preset="fast" / "balanced" / "best")` — declarative, data-driven partial configs; an explicit argument always wins; honesty parameters are not presettable |
53
+ | Budget + resume | `budget=600` (seconds) or `BudgetConfig(...)` with graceful degradation; `cache="runs/"` resumes by run fingerprint |
54
+ | Feature engineering / selection | OOF-honest target (binary-only) / frequency encoding, datetime deltas, intersections; importance / null-importance / random-probe / sequential / SHAP selection with honest arbitration |
55
+ | HPO + ensembling | `hpo=HPOConfig(...)` (Optuna, per-model search before the honest selection); `ensemble=EnsembleConfig()` — a Caruana/weighted blend ships **only if significantly better** |
56
+ | Run report | `model.run_report_` (versioned JSON, tracker-independent); `save_run_report` and `render_report` produce markdown or self-contained HTML (charts via the `report` extra) |
57
+ | Experiment tracking | `tracker="mlflow"` or `TrackerConfig(...)` — post-fit, fail-soft, no global mlflow state; custom backends via the `ExperimentTracker` port |
58
+ | Artifacts + serving | `save_artifact` / `load_artifact` — versioned, integrity-checked artifact directory (see Standalone inference below) |
59
+ | ONNX export | `honestml.export_onnx(model, dir, sample=...)` — parity-gated, export-only bundle for external runtimes |
60
+ | Plugins | third-party models via `honestml.models` entry points (`docs/plugin-contract.md`) |
61
+
62
+ ## Standalone inference
63
+
64
+ ```python
65
+ from honestml import load_artifact
66
+
67
+ model = load_artifact("artifact_dir/") # integrity-checked against the sha256 manifest
68
+ predictions = model.predict(X_new)
69
+ ```
70
+
71
+ The artifact directory is self-contained — manifest, preprocessing schema and the
72
+ model body — and loads under the slim `honestml[inference]` install: no training
73
+ stack is imported. Boosting models can be saved with structural native bodies
74
+ (`model_format="native"`). **Trust model:** the default body is joblib/pickle —
75
+ load only artifacts you trust; native bodies contain no pickle (a non-boosting
76
+ estimator and the optional calibrator still ship as joblib).
77
+
78
+ ## Reproducibility
79
+
80
+ Every run computes a **fingerprint** over the resolved config, data signature,
81
+ estimator set and library versions; the run report carries it together with the
82
+ full provenance (leaderboard, band, budget outcome, FS/HPO/ensemble decisions,
83
+ timings). Same inputs → same selection.
84
+
85
+ ## Documentation
86
+
87
+ Documentation lives in `docs/` — quickstart, API reference, correctness guide and
88
+ the plugin contract; build it locally with `mkdocs serve`. Source and issue
89
+ tracker: <https://github.com/sukhov-is/HonestML>.
90
+
91
+ ## Development
92
+
93
+ ```bash
94
+ uv sync --extra dev --extra boosting --extra shap --extra pyarrow --extra mlflow
95
+ uv run pytest # full suite (onnx export tests also need `--extra onnx`, Python >=3.11)
96
+ uv run ruff check src tests; uv run mypy src/honestml; uv run lint-imports
97
+ ```
98
+
99
+ The layered architecture (core ← adapters ← application ← composition) is enforced
100
+ by import-linter. See `docs/releasing.md` for the release pipeline and
101
+ `benchmarks/` for the honesty benchmark suite.
102
+
103
+ ## License
104
+
105
+ MIT (see `LICENSE`).
@@ -0,0 +1,145 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "honestml"
7
+ version = "1.0.0"
8
+ description = "General tabular AutoML library (classification + regression), production-grade."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ # PEP 639 (ADR-0077 §1): SPDX expression + the license file shipped in the dist
12
+ license = "MIT"
13
+ license-files = ["LICENSE"]
14
+ authors = [{ name = "honestml contributors" }]
15
+ keywords = ["honestml", "automl", "machine-learning", "tabular", "boosting", "feature-selection"]
16
+ classifiers = [
17
+ "Development Status :: 5 - Production/Stable",
18
+ "Intended Audience :: Science/Research",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ "Typing :: Typed",
24
+ ]
25
+
26
+ # Lightweight core: only what is needed to import `honestml` and the domain ports.
27
+ dependencies = [
28
+ "numpy>=1.24",
29
+ "pandas>=2.0",
30
+ "polars>=1.0",
31
+ "scikit-learn>=1.3",
32
+ "pydantic>=2",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ catboost = ["catboost>=1.2"]
37
+ lightgbm = ["lightgbm>=4.0"]
38
+ xgboost = ["xgboost>=2.0"]
39
+ boosting = ["catboost>=1.2", "lightgbm>=4.0", "xgboost>=2.0"]
40
+ # Slim serving runtime: load a ModelArtifact and predict without the training stack (ADR-0066 §3).
41
+ # pandas is REQUIRED — the Reader inference path imports it. Slim-ness vs [all] comes from NOT installing
42
+ # the training-only extras (optuna/shap/mlflow/onnx) plus the lazy import cone never executing them; the
43
+ # set mirrors core + an explicit joblib (a boosting artifact additionally needs its own extra).
44
+ inference = ["numpy>=1.24", "pandas>=2.0", "polars>=1.0", "scikit-learn>=1.3", "pydantic>=2", "joblib>=1.3"]
45
+ shap = ["shap>=0.44"]
46
+ optuna = ["optuna>=3.4"]
47
+ mlflow = ["mlflow>=2.9"]
48
+ # floors validated against the locked boosting zoo (onnx 1.21 / onnxruntime 1.23 /
49
+ # skl2onnx 1.20 / onnxmltools 1.16); onnxruntime powers the export-time
50
+ # parity gate (ADR-0071 §3/§6) and the external consumer.
51
+ onnx = ["onnx>=1.21", "onnxruntime>=1.23", "onnxmltools>=1.16", "skl2onnx>=1.20"]
52
+ report = ["matplotlib>=3.7"]
53
+ # psutil powers the cross-platform RSS probe for memory_limit_mb (ADR-0039); imported lazily in
54
+ # RunBudget only when a memory limit is set, never at top-level import.
55
+ memory = ["psutil>=5.9"]
56
+ # pyarrow accelerates native pandas materialization in string-heavy adapters
57
+ # (ADR-0005); the numpy+codes path does not require it.
58
+ pyarrow = ["pyarrow>=14.0"]
59
+ all = [
60
+ "catboost>=1.2", "lightgbm>=4.0", "xgboost>=2.0", "shap>=0.44",
61
+ "optuna>=3.4", "mlflow>=2.9", "onnx>=1.21", "onnxruntime>=1.23",
62
+ "onnxmltools>=1.16", "skl2onnx>=1.20", "matplotlib>=3.7", "pyarrow>=14.0", "psutil>=5.9",
63
+ ]
64
+ dev = [
65
+ "ruff==0.15.16",
66
+ "mypy>=1.8",
67
+ "import-linter>=2.0",
68
+ "pytest>=7.4",
69
+ "hypothesis>=6.90",
70
+ "pre-commit>=3.5",
71
+ "mkdocs-material>=9.5",
72
+ # API reference is generated from docstrings (ADR-0077 §4) — prose would drift
73
+ "mkdocstrings[python]>=0.27",
74
+ # the release-engineering yaml-parse tests — explicit, not transitive
75
+ "pyyaml>=6",
76
+ # optuna powers the HPO adapter (ADR-0061); in dev so the suite exercises the HPO path
77
+ "optuna>=3.4",
78
+ ]
79
+
80
+ [project.urls]
81
+ Homepage = "https://github.com/sukhov-is/HonestML"
82
+ Documentation = "https://sukhov-is.github.io/HonestML/"
83
+ Repository = "https://github.com/sukhov-is/HonestML"
84
+
85
+ [tool.hatch.build.targets.wheel]
86
+ packages = ["src/honestml"]
87
+
88
+ # Ship only the package + release metadata in the sdist; keep tests/docs/notebooks/benchmarks out
89
+ # of the published tarball (the wheel already ships only `src/honestml`).
90
+ [tool.hatch.build.targets.sdist]
91
+ # leading-slash anchors to the repo root (gitignore semantics) so a bare README.md/CHANGELOG.md
92
+ # inside docs/tests/notebooks is not pulled in recursively.
93
+ include = ["src/honestml", "/README.md", "/CHANGELOG.md", "/LICENSE", "/pyproject.toml"]
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # Ruff (lint + format)
97
+ # ---------------------------------------------------------------------------
98
+ [tool.ruff]
99
+ line-length = 100
100
+ target-version = "py310"
101
+ src = ["src", "tests"]
102
+
103
+ [tool.ruff.lint]
104
+ select = ["E", "F", "W", "I", "UP", "T20"]
105
+ # T20 (flake8-print): no `print` in library code (ADR-0003).
106
+ # E501 is left to the formatter (do not fail on legacy long lines during migration).
107
+ ignore = ["E501"]
108
+
109
+ [tool.ruff.lint.per-file-ignores]
110
+ "tests/**" = ["T20"]
111
+ "docs/**" = ["T20", "E", "F"]
112
+ # showcase notebooks print their results by design (like tests); F/E/I rules still apply.
113
+ "notebooks/**" = ["T20"]
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Mypy — strict on the domain core, lenient elsewhere (ADR-0004)
117
+ # ---------------------------------------------------------------------------
118
+ [tool.mypy]
119
+ # 3.12: numpy>=2.5 ships stubs using PEP 695 `type` statements that mypy parses only under a
120
+ # 3.12 target; runtime 3.10 support is enforced by the test matrix + ruff (UP, target py310).
121
+ python_version = "3.12"
122
+ warn_unused_configs = true
123
+ ignore_missing_imports = true
124
+ warn_redundant_casts = true
125
+
126
+ [[tool.mypy.overrides]]
127
+ module = "honestml.core.*"
128
+ disallow_untyped_defs = true
129
+ disallow_incomplete_defs = true
130
+ check_untyped_defs = true
131
+ no_implicit_optional = true
132
+ warn_return_any = true
133
+ strict_equality = true
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Pytest
137
+ # ---------------------------------------------------------------------------
138
+ [tool.pytest.ini_options]
139
+ testpaths = ["tests"]
140
+ markers = [
141
+ "unit: fast pure-core tests (default)",
142
+ "property: hypothesis property tests (leakage/monotonicity invariants)",
143
+ "golden: determinism of metrics on synthetic data with fixed seed",
144
+ "slow: full pipeline run on synthetic data",
145
+ ]