honestml 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- honestml-1.0.0/.gitignore +45 -0
- honestml-1.0.0/CHANGELOG.md +79 -0
- honestml-1.0.0/LICENSE +21 -0
- honestml-1.0.0/PKG-INFO +190 -0
- honestml-1.0.0/README.md +105 -0
- honestml-1.0.0/pyproject.toml +145 -0
- honestml-1.0.0/src/honestml/__init__.py +124 -0
- honestml-1.0.0/src/honestml/adapters/__init__.py +157 -0
- honestml-1.0.0/src/honestml/adapters/boosting.py +474 -0
- honestml-1.0.0/src/honestml/adapters/calibration.py +93 -0
- honestml-1.0.0/src/honestml/adapters/candidate_cache.py +97 -0
- honestml-1.0.0/src/honestml/adapters/dtype_tokens.py +54 -0
- honestml-1.0.0/src/honestml/adapters/ensembling.py +211 -0
- honestml-1.0.0/src/honestml/adapters/estimators.py +217 -0
- honestml-1.0.0/src/honestml/adapters/feature_rankers.py +321 -0
- honestml-1.0.0/src/honestml/adapters/feature_selectors.py +72 -0
- honestml-1.0.0/src/honestml/adapters/loader.py +60 -0
- honestml-1.0.0/src/honestml/adapters/metrics.py +291 -0
- honestml-1.0.0/src/honestml/adapters/onnx_export.py +153 -0
- honestml-1.0.0/src/honestml/adapters/polars_dataset.py +172 -0
- honestml-1.0.0/src/honestml/adapters/reader.py +791 -0
- honestml-1.0.0/src/honestml/adapters/run_budget.py +129 -0
- honestml-1.0.0/src/honestml/adapters/serializers.py +228 -0
- honestml-1.0.0/src/honestml/adapters/significance.py +183 -0
- honestml-1.0.0/src/honestml/adapters/splitters.py +712 -0
- honestml-1.0.0/src/honestml/adapters/tracking.py +211 -0
- honestml-1.0.0/src/honestml/adapters/tuning.py +79 -0
- honestml-1.0.0/src/honestml/application/__init__.py +125 -0
- honestml-1.0.0/src/honestml/application/calibration.py +143 -0
- honestml-1.0.0/src/honestml/application/ensemble.py +249 -0
- honestml-1.0.0/src/honestml/application/feature_compare.py +1144 -0
- honestml-1.0.0/src/honestml/application/feature_encoding.py +109 -0
- honestml-1.0.0/src/honestml/application/feature_selection.py +197 -0
- honestml-1.0.0/src/honestml/application/oof_scorer.py +219 -0
- honestml-1.0.0/src/honestml/application/projection.py +87 -0
- honestml-1.0.0/src/honestml/application/run_report.py +379 -0
- honestml-1.0.0/src/honestml/application/slice.py +1059 -0
- honestml-1.0.0/src/honestml/application/tuning.py +174 -0
- honestml-1.0.0/src/honestml/composition/__init__.py +60 -0
- honestml-1.0.0/src/honestml/composition/artifact.py +524 -0
- honestml-1.0.0/src/honestml/composition/build.py +947 -0
- honestml-1.0.0/src/honestml/composition/facade.py +1136 -0
- honestml-1.0.0/src/honestml/composition/onnx_bundle.py +211 -0
- honestml-1.0.0/src/honestml/composition/presets.py +117 -0
- honestml-1.0.0/src/honestml/composition/registry.py +244 -0
- honestml-1.0.0/src/honestml/composition/run_report.py +407 -0
- honestml-1.0.0/src/honestml/core/__init__.py +171 -0
- honestml-1.0.0/src/honestml/core/config.py +373 -0
- honestml-1.0.0/src/honestml/core/context.py +52 -0
- honestml-1.0.0/src/honestml/core/dataset.py +93 -0
- honestml-1.0.0/src/honestml/core/exceptions.py +149 -0
- honestml-1.0.0/src/honestml/core/logging.py +37 -0
- honestml-1.0.0/src/honestml/core/ports/__init__.py +83 -0
- honestml-1.0.0/src/honestml/core/ports/budget.py +38 -0
- honestml-1.0.0/src/honestml/core/ports/cache.py +33 -0
- honestml-1.0.0/src/honestml/core/ports/calibration.py +34 -0
- honestml-1.0.0/src/honestml/core/ports/ensembler.py +79 -0
- honestml-1.0.0/src/honestml/core/ports/estimator.py +106 -0
- honestml-1.0.0/src/honestml/core/ports/feature_ranker.py +55 -0
- honestml-1.0.0/src/honestml/core/ports/feature_subset_selector.py +53 -0
- honestml-1.0.0/src/honestml/core/ports/metric.py +46 -0
- honestml-1.0.0/src/honestml/core/ports/model_serializer.py +42 -0
- honestml-1.0.0/src/honestml/core/ports/model_spec.py +52 -0
- honestml-1.0.0/src/honestml/core/ports/significance.py +61 -0
- honestml-1.0.0/src/honestml/core/ports/splitter.py +112 -0
- honestml-1.0.0/src/honestml/core/ports/tracker.py +29 -0
- honestml-1.0.0/src/honestml/core/ports/tuner.py +131 -0
- honestml-1.0.0/src/honestml/core/schema.py +357 -0
- honestml-1.0.0/src/honestml/core/selection_policy.py +209 -0
- honestml-1.0.0/src/honestml/core/task.py +83 -0
- honestml-1.0.0/src/honestml/py.typed +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
build/
|
|
6
|
+
dist/
|
|
7
|
+
.eggs/
|
|
8
|
+
|
|
9
|
+
# Tooling caches
|
|
10
|
+
.ruff_cache/
|
|
11
|
+
.mypy_cache/
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.hypothesis/
|
|
14
|
+
.import_linter_cache/
|
|
15
|
+
|
|
16
|
+
# Docs build
|
|
17
|
+
site/
|
|
18
|
+
# docs/automl-productionization/
|
|
19
|
+
|
|
20
|
+
# Environments
|
|
21
|
+
.venv/
|
|
22
|
+
venv/
|
|
23
|
+
.env
|
|
24
|
+
|
|
25
|
+
# IDE / OS
|
|
26
|
+
.idea/
|
|
27
|
+
.vscode/
|
|
28
|
+
.DS_Store
|
|
29
|
+
.CLAUDE/
|
|
30
|
+
|
|
31
|
+
# local working files, not for publication
|
|
32
|
+
_legacy/
|
|
33
|
+
catboost_info/
|
|
34
|
+
benchmarks/results.json
|
|
35
|
+
benchmarks/native_cat_gate_results.json
|
|
36
|
+
|
|
37
|
+
# showcase notebook inputs/outputs (large Kaggle datasets, generated submissions)
|
|
38
|
+
notebooks/data/
|
|
39
|
+
notebooks/results/
|
|
40
|
+
notebooks/.ipynb_checkpoints/
|
|
41
|
+
|
|
42
|
+
# internal design/architecture packages (kept local, not published)
|
|
43
|
+
docs/architecture/
|
|
44
|
+
docs/audit/
|
|
45
|
+
docs/implementation/
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/) and the project adheres to
|
|
5
|
+
[Semantic Versioning](https://semver.org/) (see `docs/versioning-policy.md`).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [1.0.0] - 2026-06-24
|
|
10
|
+
|
|
11
|
+
First public release: a tabular AutoML library for binary/multiclass classification and
|
|
12
|
+
regression, built around honest model selection — the score you see is the score you can
|
|
13
|
+
expect in production.
|
|
14
|
+
|
|
15
|
+
The honesty-benchmark baseline (`benchmarks/baseline.json`) is bootstrapped for this release;
|
|
16
|
+
subsequent releases gate against it for no regress (see `docs/releasing.md`).
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
- **Task and data input contract:** sklearn-compatible `AutoML` facade
|
|
20
|
+
(`fit`/`predict`/`predict_proba`/`score`, works in `Pipeline`) over pandas/polars/numpy
|
|
21
|
+
inputs, with typed schema inference, stable train↔inference categorical encoding,
|
|
22
|
+
boundary validation, and row metadata via `fit(..., sample_weight=, groups=, time=,
|
|
23
|
+
label_time=)`.
|
|
24
|
+
- **CV schemes:** stratified / kfold / group / holdout / timeseries (purge+embargo, value-based
|
|
25
|
+
time ordering) / timeseries_period (calendar or Δt periods with wall-clock purge/embargo,
|
|
26
|
+
optional per-period weighting and a rolling max-train window) (`CVConfig(scheme=...)`; `"auto"`
|
|
27
|
+
picks the task default); every fold passes an anti-leakage validation.
|
|
28
|
+
- **Honest selection:** out-of-fold scoring on a shared CV split with a seeded
|
|
29
|
+
paired-bootstrap **equivalence band** (`significance="bootstrap"`, default) — the
|
|
30
|
+
simplest model statistically indistinguishable from the best wins and ties are
|
|
31
|
+
disclosed, never hidden; opt-in probability calibration (`CVConfig(calibrate=...)`)
|
|
32
|
+
gated by cross-fitted improvement.
|
|
33
|
+
- **Outer holdout + finalize:** `CVConfig(outer_holdout=...)` carves a scheme-aware,
|
|
34
|
+
untouched holdout scored exactly once for an unbiased final estimate; `finalize=True`
|
|
35
|
+
then refits the shipped model on all data after scoring.
|
|
36
|
+
- **Presets:** `AutoML(preset="fast" | "balanced" | "best")` — declarative partial configs
|
|
37
|
+
that fill only unset parameters; an explicit argument always wins, and honesty settings
|
|
38
|
+
are not presettable.
|
|
39
|
+
- **Budget + resume/cache:** `budget=<seconds>` or `BudgetConfig(...)` (time / trial /
|
|
40
|
+
memory limits) with graceful degradation to the best model so far; `cache="dir/"`
|
|
41
|
+
reuses per-candidate results keyed by a deterministic run fingerprint and resumes
|
|
42
|
+
interrupted runs.
|
|
43
|
+
- **Feature engineering + selection:** opt-in `FEConfig` (leakage-controlled out-of-fold
|
|
44
|
+
target encoding, frequency encoding, datetime deltas, categorical intersections) and
|
|
45
|
+
`FeatureSelectionConfig` (importance / random-probe / null-importance / sequential /
|
|
46
|
+
SHAP strategies with honest multi-strategy arbitration on a holdout or nested CV). The
|
|
47
|
+
`sequential` wrapper chooses its feature count honestly: it explores the full backward
|
|
48
|
+
trajectory and the **fewest features statistically indistinguishable from the best**
|
|
49
|
+
(significance band + Occam tie-break, default `significance="bootstrap"`) win, instead of
|
|
50
|
+
the raw out-of-fold argmax; `significance="off"` reproduces the plain argmax. The band is
|
|
51
|
+
scored on the selection folds and is strictly more conservative than argmax (residual
|
|
52
|
+
optimism documented; independent-OOF scoring is a future improvement).
|
|
53
|
+
- **Hyperparameter optimization:** `hpo=HPOConfig(...)` — seeded Optuna search over
|
|
54
|
+
per-model spaces on an inner CV of the dev data; tuned candidates then compete in the
|
|
55
|
+
regular honest selection, sharing the run's time budget.
|
|
56
|
+
- **Ensembling:** `ensemble=EnsembleConfig(...)` — greedy (Caruana) or weighted blend over
|
|
57
|
+
the out-of-fold predictions, shipped only if significantly better than the best single
|
|
58
|
+
model; the gate decision is always reported.
|
|
59
|
+
- **Run report + rendering:** versioned, tracker-independent JSON `run_report_` with full
|
|
60
|
+
provenance (resolved config, run fingerprint, leaderboard, equivalence band, timings,
|
|
61
|
+
budget/FS/HPO/ensemble outcomes); `save_run_report` plus `render_report` to markdown or
|
|
62
|
+
self-contained HTML (charts via the `report` extra).
|
|
63
|
+
- **Experiment tracking:** opt-in `tracker="mlflow"` / `TrackerConfig(...)` logs the run
|
|
64
|
+
report after fit — fail-soft and free of global MLflow state; custom backends plug in
|
|
65
|
+
via the `ExperimentTracker` port.
|
|
66
|
+
- **Artifacts + serving:** `save_artifact` / `load_artifact` — a self-contained, versioned
|
|
67
|
+
artifact directory with a sha256 integrity manifest; native boosting bodies
|
|
68
|
+
(`model_format="native"`, no pickle); the slim `honestml[inference]` extra serves
|
|
69
|
+
`load_artifact(...).predict(...)` without importing the training stack.
|
|
70
|
+
- **ONNX export:** `export_onnx(model, directory, sample=...)` — a parity-gated,
|
|
71
|
+
export-only bundle (linear and boosting models) for external runtimes.
|
|
72
|
+
- **Models and plugins:** lightweight built-in zoo (baseline, linear) plus
|
|
73
|
+
catboost/lightgbm/xgboost via the `boosting` extra; third-party estimators via
|
|
74
|
+
`honestml.models` entry points (see `docs/plugin-contract.md`), discovered lazily with
|
|
75
|
+
fail-fast name-conflict detection.
|
|
76
|
+
- **Logging, exceptions, typing:** silent-by-default `honestml` logger (`NullHandler`);
|
|
77
|
+
one exception taxonomy rooted at `AutoMLError`; fully typed (`py.typed`);
|
|
78
|
+
`import honestml` stays lightweight — optional extras load lazily and a missing one
|
|
79
|
+
fails fast with the exact `pip install honestml[...]` hint.
|
honestml-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 honestml contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
honestml-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: honestml
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: General tabular AutoML library (classification + regression), production-grade.
|
|
5
|
+
Project-URL: Homepage, https://github.com/sukhov-is/HonestML
|
|
6
|
+
Project-URL: Documentation, https://sukhov-is.github.io/HonestML/
|
|
7
|
+
Project-URL: Repository, https://github.com/sukhov-is/HonestML
|
|
8
|
+
Author: honestml contributors
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: automl,boosting,feature-selection,honestml,machine-learning,tabular
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Typing :: Typed
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: numpy>=1.24
|
|
21
|
+
Requires-Dist: pandas>=2.0
|
|
22
|
+
Requires-Dist: polars>=1.0
|
|
23
|
+
Requires-Dist: pydantic>=2
|
|
24
|
+
Requires-Dist: scikit-learn>=1.3
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: catboost>=1.2; extra == 'all'
|
|
27
|
+
Requires-Dist: lightgbm>=4.0; extra == 'all'
|
|
28
|
+
Requires-Dist: matplotlib>=3.7; extra == 'all'
|
|
29
|
+
Requires-Dist: mlflow>=2.9; extra == 'all'
|
|
30
|
+
Requires-Dist: onnx>=1.21; extra == 'all'
|
|
31
|
+
Requires-Dist: onnxmltools>=1.16; extra == 'all'
|
|
32
|
+
Requires-Dist: onnxruntime>=1.23; extra == 'all'
|
|
33
|
+
Requires-Dist: optuna>=3.4; extra == 'all'
|
|
34
|
+
Requires-Dist: psutil>=5.9; extra == 'all'
|
|
35
|
+
Requires-Dist: pyarrow>=14.0; extra == 'all'
|
|
36
|
+
Requires-Dist: shap>=0.44; extra == 'all'
|
|
37
|
+
Requires-Dist: skl2onnx>=1.20; extra == 'all'
|
|
38
|
+
Requires-Dist: xgboost>=2.0; extra == 'all'
|
|
39
|
+
Provides-Extra: boosting
|
|
40
|
+
Requires-Dist: catboost>=1.2; extra == 'boosting'
|
|
41
|
+
Requires-Dist: lightgbm>=4.0; extra == 'boosting'
|
|
42
|
+
Requires-Dist: xgboost>=2.0; extra == 'boosting'
|
|
43
|
+
Provides-Extra: catboost
|
|
44
|
+
Requires-Dist: catboost>=1.2; extra == 'catboost'
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: hypothesis>=6.90; extra == 'dev'
|
|
47
|
+
Requires-Dist: import-linter>=2.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'dev'
|
|
49
|
+
Requires-Dist: mkdocstrings[python]>=0.27; extra == 'dev'
|
|
50
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
51
|
+
Requires-Dist: optuna>=3.4; extra == 'dev'
|
|
52
|
+
Requires-Dist: pre-commit>=3.5; extra == 'dev'
|
|
53
|
+
Requires-Dist: pytest>=7.4; extra == 'dev'
|
|
54
|
+
Requires-Dist: pyyaml>=6; extra == 'dev'
|
|
55
|
+
Requires-Dist: ruff==0.15.16; extra == 'dev'
|
|
56
|
+
Provides-Extra: inference
|
|
57
|
+
Requires-Dist: joblib>=1.3; extra == 'inference'
|
|
58
|
+
Requires-Dist: numpy>=1.24; extra == 'inference'
|
|
59
|
+
Requires-Dist: pandas>=2.0; extra == 'inference'
|
|
60
|
+
Requires-Dist: polars>=1.0; extra == 'inference'
|
|
61
|
+
Requires-Dist: pydantic>=2; extra == 'inference'
|
|
62
|
+
Requires-Dist: scikit-learn>=1.3; extra == 'inference'
|
|
63
|
+
Provides-Extra: lightgbm
|
|
64
|
+
Requires-Dist: lightgbm>=4.0; extra == 'lightgbm'
|
|
65
|
+
Provides-Extra: memory
|
|
66
|
+
Requires-Dist: psutil>=5.9; extra == 'memory'
|
|
67
|
+
Provides-Extra: mlflow
|
|
68
|
+
Requires-Dist: mlflow>=2.9; extra == 'mlflow'
|
|
69
|
+
Provides-Extra: onnx
|
|
70
|
+
Requires-Dist: onnx>=1.21; extra == 'onnx'
|
|
71
|
+
Requires-Dist: onnxmltools>=1.16; extra == 'onnx'
|
|
72
|
+
Requires-Dist: onnxruntime>=1.23; extra == 'onnx'
|
|
73
|
+
Requires-Dist: skl2onnx>=1.20; extra == 'onnx'
|
|
74
|
+
Provides-Extra: optuna
|
|
75
|
+
Requires-Dist: optuna>=3.4; extra == 'optuna'
|
|
76
|
+
Provides-Extra: pyarrow
|
|
77
|
+
Requires-Dist: pyarrow>=14.0; extra == 'pyarrow'
|
|
78
|
+
Provides-Extra: report
|
|
79
|
+
Requires-Dist: matplotlib>=3.7; extra == 'report'
|
|
80
|
+
Provides-Extra: shap
|
|
81
|
+
Requires-Dist: shap>=0.44; extra == 'shap'
|
|
82
|
+
Provides-Extra: xgboost
|
|
83
|
+
Requires-Dist: xgboost>=2.0; extra == 'xgboost'
|
|
84
|
+
Description-Content-Type: text/markdown
|
|
85
|
+
|
|
86
|
+
# honestml
|
|
87
|
+
|
|
88
|
+
[](https://pypi.org/project/honestml/)
|
|
89
|
+
[](https://github.com/sukhov-is/HonestML/actions/workflows/ci.yml)
|
|
90
|
+
[](https://pypi.org/project/honestml/)
|
|
91
|
+
[](LICENSE)
|
|
92
|
+
|
|
93
|
+
**Tabular AutoML where the leaderboard doesn't lie.** Most AutoML frameworks ship the
|
|
94
|
+
model with the best validation score — but that number is optimistic, because you
|
|
95
|
+
selected for it. honestml is built so that the score you see is the score you can
|
|
96
|
+
expect in production.
|
|
97
|
+
|
|
98
|
+
It covers binary / multiclass classification and regression behind a clean, extensible
|
|
99
|
+
core. The honesty is in *how it selects*: out-of-fold scoring on a shared CV split; a
|
|
100
|
+
bootstrap **equivalence band** that, among the statistically indistinguishable best
|
|
101
|
+
candidates, ships the simplest one; leakage-controlled feature engineering and
|
|
102
|
+
selection; an optional untouched outer holdout scored exactly once; and reproducible,
|
|
103
|
+
fingerprinted runs.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from honestml import AutoML
|
|
107
|
+
|
|
108
|
+
model = AutoML(task="binary").fit(X, y)
|
|
109
|
+
proba = model.predict_proba(X_new)
|
|
110
|
+
print(model.best_model_id_, model.leaderboard_)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
The library is silent by default (a `NullHandler` on the `honestml` logger); enable
|
|
114
|
+
progress with `logging.getLogger("honestml").setLevel(logging.INFO)` plus
|
|
115
|
+
`logging.basicConfig()`.
|
|
116
|
+
|
|
117
|
+
## Install
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
pip install honestml # lightweight core (baseline/linear models)
|
|
121
|
+
pip install "honestml[boosting]" # core + catboost, lightgbm, xgboost
|
|
122
|
+
pip install "honestml[all]" # boosting + optuna (HPO), mlflow (tracking), onnx, shap, report and the rest
|
|
123
|
+
pip install "honestml[inference]" # slim serving runtime (load_artifact + predict only)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Requires Python >= 3.10. Heavy dependencies are optional extras and imported
|
|
127
|
+
lazily — `import honestml` stays light, and a missing extra fails fast with the
|
|
128
|
+
exact `pip install honestml[...]` hint.
|
|
129
|
+
|
|
130
|
+
## What you get
|
|
131
|
+
|
|
132
|
+
| Capability | How |
|
|
133
|
+
|---|---|
|
|
134
|
+
| Honest model selection | OOF scoring on a shared CV split; a seeded bootstrap **equivalence band** (`significance="bootstrap"`, the default) collects candidates statistically indistinguishable from the best, and the simplest member of the band wins — ties are disclosed, not hidden |
|
|
135
|
+
| CV schemes | stratified / kfold / group / holdout / **timeseries** (purge+embargo, value-based time order) / **timeseries_period** (calendar or Δt period folds, wall-clock gaps, optional per-period weighting, rolling train window) — `fit(..., time=, label_time=, groups=)` |
|
|
136
|
+
| Outer holdout + finalize | `cv=CVConfig(outer_holdout=0.2)`: selection sees only DEV, the holdout is scored once; the shipped model is refit on all data after scoring (`finalize=True`) |
|
|
137
|
+
| Presets | `AutoML(preset="fast" / "balanced" / "best")` — declarative, data-driven partial configs; an explicit argument always wins; honesty parameters are not presettable |
|
|
138
|
+
| Budget + resume | `budget=600` (seconds) or `BudgetConfig(...)` with graceful degradation; `cache="runs/"` resumes by run fingerprint |
|
|
139
|
+
| Feature engineering / selection | OOF-honest target (binary-only) / frequency encoding, datetime deltas, intersections; importance / null-importance / random-probe / sequential / SHAP selection with honest arbitration |
|
|
140
|
+
| HPO + ensembling | `hpo=HPOConfig(...)` (Optuna, per-model search before the honest selection); `ensemble=EnsembleConfig()` — a Caruana/weighted blend ships **only if significantly better** |
|
|
141
|
+
| Run report | `model.run_report_` (versioned JSON, tracker-independent); `save_run_report` and `render_report` produce markdown or self-contained HTML (charts via the `report` extra) |
|
|
142
|
+
| Experiment tracking | `tracker="mlflow"` or `TrackerConfig(...)` — post-fit, fail-soft, no global mlflow state; custom backends via the `ExperimentTracker` port |
|
|
143
|
+
| Artifacts + serving | `save_artifact` / `load_artifact` — versioned, integrity-checked artifact directory (see Standalone inference below) |
|
|
144
|
+
| ONNX export | `honestml.export_onnx(model, dir, sample=...)` — parity-gated, export-only bundle for external runtimes |
|
|
145
|
+
| Plugins | third-party models via `honestml.models` entry points (`docs/plugin-contract.md`) |
|
|
146
|
+
|
|
147
|
+
## Standalone inference
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from honestml import load_artifact
|
|
151
|
+
|
|
152
|
+
model = load_artifact("artifact_dir/") # integrity-checked against the sha256 manifest
|
|
153
|
+
predictions = model.predict(X_new)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The artifact directory is self-contained — manifest, preprocessing schema and the
|
|
157
|
+
model body — and loads under the slim `honestml[inference]` install: no training
|
|
158
|
+
stack is imported. Boosting models can be saved with structural native bodies
|
|
159
|
+
(`model_format="native"`). **Trust model:** the default body is joblib/pickle —
|
|
160
|
+
load only artifacts you trust; native bodies contain no pickle (a non-boosting
|
|
161
|
+
estimator and the optional calibrator still ship as joblib).
|
|
162
|
+
|
|
163
|
+
## Reproducibility
|
|
164
|
+
|
|
165
|
+
Every run computes a **fingerprint** over the resolved config, data signature,
|
|
166
|
+
estimator set and library versions; the run report carries it together with the
|
|
167
|
+
full provenance (leaderboard, band, budget outcome, FS/HPO/ensemble decisions,
|
|
168
|
+
timings). Same inputs → same selection.
|
|
169
|
+
|
|
170
|
+
## Documentation
|
|
171
|
+
|
|
172
|
+
Documentation lives in `docs/` — quickstart, API reference, correctness guide and
|
|
173
|
+
the plugin contract; build it locally with `mkdocs serve`. Source and issue
|
|
174
|
+
tracker: <https://github.com/sukhov-is/HonestML>.
|
|
175
|
+
|
|
176
|
+
## Development
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
uv sync --extra dev --extra boosting --extra shap --extra pyarrow --extra mlflow
|
|
180
|
+
uv run pytest # full suite (onnx export tests also need `--extra onnx`, Python >=3.11)
|
|
181
|
+
uv run ruff check src tests; uv run mypy src/honestml; uv run lint-imports
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
The layered architecture (core ← adapters ← application ← composition) is enforced
|
|
185
|
+
by import-linter. See `docs/releasing.md` for the release pipeline and
|
|
186
|
+
`benchmarks/` for the honesty benchmark suite.
|
|
187
|
+
|
|
188
|
+
## License
|
|
189
|
+
|
|
190
|
+
MIT (see `LICENSE`).
|
honestml-1.0.0/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# honestml
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/honestml/)
|
|
4
|
+
[](https://github.com/sukhov-is/HonestML/actions/workflows/ci.yml)
|
|
5
|
+
[](https://pypi.org/project/honestml/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
**Tabular AutoML where the leaderboard doesn't lie.** Most AutoML frameworks ship the
|
|
9
|
+
model with the best validation score — but that number is optimistic, because you
|
|
10
|
+
selected for it. honestml is built so that the score you see is the score you can
|
|
11
|
+
expect in production.
|
|
12
|
+
|
|
13
|
+
It covers binary / multiclass classification and regression behind a clean, extensible
|
|
14
|
+
core. The honesty is in *how it selects*: out-of-fold scoring on a shared CV split; a
|
|
15
|
+
bootstrap **equivalence band** that, among the statistically indistinguishable best
|
|
16
|
+
candidates, ships the simplest one; leakage-controlled feature engineering and
|
|
17
|
+
selection; an optional untouched outer holdout scored exactly once; and reproducible,
|
|
18
|
+
fingerprinted runs.
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from honestml import AutoML
|
|
22
|
+
|
|
23
|
+
model = AutoML(task="binary").fit(X, y)
|
|
24
|
+
proba = model.predict_proba(X_new)
|
|
25
|
+
print(model.best_model_id_, model.leaderboard_)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
The library is silent by default (a `NullHandler` on the `honestml` logger); enable
|
|
29
|
+
progress with `logging.getLogger("honestml").setLevel(logging.INFO)` plus
|
|
30
|
+
`logging.basicConfig()`.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install honestml # lightweight core (baseline/linear models)
|
|
36
|
+
pip install "honestml[boosting]" # core + catboost, lightgbm, xgboost
|
|
37
|
+
pip install "honestml[all]" # boosting + optuna (HPO), mlflow (tracking), onnx, shap, report and the rest
|
|
38
|
+
pip install "honestml[inference]" # slim serving runtime (load_artifact + predict only)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Requires Python >= 3.10. Heavy dependencies are optional extras and imported
|
|
42
|
+
lazily — `import honestml` stays light, and a missing extra fails fast with the
|
|
43
|
+
exact `pip install honestml[...]` hint.
|
|
44
|
+
|
|
45
|
+
## What you get
|
|
46
|
+
|
|
47
|
+
| Capability | How |
|
|
48
|
+
|---|---|
|
|
49
|
+
| Honest model selection | OOF scoring on a shared CV split; a seeded bootstrap **equivalence band** (`significance="bootstrap"`, the default) collects candidates statistically indistinguishable from the best, and the simplest member of the band wins — ties are disclosed, not hidden |
|
|
50
|
+
| CV schemes | stratified / kfold / group / holdout / **timeseries** (purge+embargo, value-based time order) / **timeseries_period** (calendar or Δt period folds, wall-clock gaps, optional per-period weighting, rolling train window) — `fit(..., time=, label_time=, groups=)` |
|
|
51
|
+
| Outer holdout + finalize | `cv=CVConfig(outer_holdout=0.2)`: selection sees only DEV, the holdout is scored once; the shipped model is refit on all data after scoring (`finalize=True`) |
|
|
52
|
+
| Presets | `AutoML(preset="fast" / "balanced" / "best")` — declarative, data-driven partial configs; an explicit argument always wins; honesty parameters are not presettable |
|
|
53
|
+
| Budget + resume | `budget=600` (seconds) or `BudgetConfig(...)` with graceful degradation; `cache="runs/"` resumes by run fingerprint |
|
|
54
|
+
| Feature engineering / selection | OOF-honest target (binary-only) / frequency encoding, datetime deltas, intersections; importance / null-importance / random-probe / sequential / SHAP selection with honest arbitration |
|
|
55
|
+
| HPO + ensembling | `hpo=HPOConfig(...)` (Optuna, per-model search before the honest selection); `ensemble=EnsembleConfig()` — a Caruana/weighted blend ships **only if significantly better** |
|
|
56
|
+
| Run report | `model.run_report_` (versioned JSON, tracker-independent); `save_run_report` and `render_report` produce markdown or self-contained HTML (charts via the `report` extra) |
|
|
57
|
+
| Experiment tracking | `tracker="mlflow"` or `TrackerConfig(...)` — post-fit, fail-soft, no global mlflow state; custom backends via the `ExperimentTracker` port |
|
|
58
|
+
| Artifacts + serving | `save_artifact` / `load_artifact` — versioned, integrity-checked artifact directory (see Standalone inference below) |
|
|
59
|
+
| ONNX export | `honestml.export_onnx(model, dir, sample=...)` — parity-gated, export-only bundle for external runtimes |
|
|
60
|
+
| Plugins | third-party models via `honestml.models` entry points (`docs/plugin-contract.md`) |
|
|
61
|
+
|
|
62
|
+
## Standalone inference
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from honestml import load_artifact
|
|
66
|
+
|
|
67
|
+
model = load_artifact("artifact_dir/") # integrity-checked against the sha256 manifest
|
|
68
|
+
predictions = model.predict(X_new)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The artifact directory is self-contained — manifest, preprocessing schema and the
|
|
72
|
+
model body — and loads under the slim `honestml[inference]` install: no training
|
|
73
|
+
stack is imported. Boosting models can be saved with structural native bodies
|
|
74
|
+
(`model_format="native"`). **Trust model:** the default body is joblib/pickle —
|
|
75
|
+
load only artifacts you trust; native bodies contain no pickle (a non-boosting
|
|
76
|
+
estimator and the optional calibrator still ship as joblib).
|
|
77
|
+
|
|
78
|
+
## Reproducibility
|
|
79
|
+
|
|
80
|
+
Every run computes a **fingerprint** over the resolved config, data signature,
|
|
81
|
+
estimator set and library versions; the run report carries it together with the
|
|
82
|
+
full provenance (leaderboard, band, budget outcome, FS/HPO/ensemble decisions,
|
|
83
|
+
timings). Same inputs → same selection.
|
|
84
|
+
|
|
85
|
+
## Documentation
|
|
86
|
+
|
|
87
|
+
Documentation lives in `docs/` — quickstart, API reference, correctness guide and
|
|
88
|
+
the plugin contract; build it locally with `mkdocs serve`. Source and issue
|
|
89
|
+
tracker: <https://github.com/sukhov-is/HonestML>.
|
|
90
|
+
|
|
91
|
+
## Development
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
uv sync --extra dev --extra boosting --extra shap --extra pyarrow --extra mlflow
|
|
95
|
+
uv run pytest # full suite (onnx export tests also need `--extra onnx`, Python >=3.11)
|
|
96
|
+
uv run ruff check src tests; uv run mypy src/honestml; uv run lint-imports
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
The layered architecture (core ← adapters ← application ← composition) is enforced
|
|
100
|
+
by import-linter. See `docs/releasing.md` for the release pipeline and
|
|
101
|
+
`benchmarks/` for the honesty benchmark suite.
|
|
102
|
+
|
|
103
|
+
## License
|
|
104
|
+
|
|
105
|
+
MIT (see `LICENSE`).
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "honestml"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "General tabular AutoML library (classification + regression), production-grade."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
# PEP 639 (ADR-0077 §1): SPDX expression + the license file shipped in the dist
|
|
12
|
+
license = "MIT"
|
|
13
|
+
license-files = ["LICENSE"]
|
|
14
|
+
authors = [{ name = "honestml contributors" }]
|
|
15
|
+
keywords = ["honestml", "automl", "machine-learning", "tabular", "boosting", "feature-selection"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 5 - Production/Stable",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
"Typing :: Typed",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
# Lightweight core: only what is needed to import `honestml` and the domain ports.
|
|
27
|
+
dependencies = [
|
|
28
|
+
"numpy>=1.24",
|
|
29
|
+
"pandas>=2.0",
|
|
30
|
+
"polars>=1.0",
|
|
31
|
+
"scikit-learn>=1.3",
|
|
32
|
+
"pydantic>=2",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
catboost = ["catboost>=1.2"]
|
|
37
|
+
lightgbm = ["lightgbm>=4.0"]
|
|
38
|
+
xgboost = ["xgboost>=2.0"]
|
|
39
|
+
boosting = ["catboost>=1.2", "lightgbm>=4.0", "xgboost>=2.0"]
|
|
40
|
+
# Slim serving runtime: load a ModelArtifact and predict without the training stack (ADR-0066 §3).
|
|
41
|
+
# pandas is REQUIRED — the Reader inference path imports it. Slim-ness vs [all] comes from NOT installing
|
|
42
|
+
# the training-only extras (optuna/shap/mlflow/onnx) plus the lazy import cone never executing them; the
|
|
43
|
+
# set mirrors core + an explicit joblib (a boosting artifact additionally needs its own extra).
|
|
44
|
+
inference = ["numpy>=1.24", "pandas>=2.0", "polars>=1.0", "scikit-learn>=1.3", "pydantic>=2", "joblib>=1.3"]
|
|
45
|
+
shap = ["shap>=0.44"]
|
|
46
|
+
optuna = ["optuna>=3.4"]
|
|
47
|
+
mlflow = ["mlflow>=2.9"]
|
|
48
|
+
# floors validated against the locked boosting zoo (onnx 1.21 / onnxruntime 1.23 /
|
|
49
|
+
# skl2onnx 1.20 / onnxmltools 1.16); onnxruntime powers the export-time
|
|
50
|
+
# parity gate (ADR-0071 §3/§6) and the external consumer.
|
|
51
|
+
onnx = ["onnx>=1.21", "onnxruntime>=1.23", "onnxmltools>=1.16", "skl2onnx>=1.20"]
|
|
52
|
+
report = ["matplotlib>=3.7"]
|
|
53
|
+
# psutil powers the cross-platform RSS probe for memory_limit_mb (ADR-0039); imported lazily in
|
|
54
|
+
# RunBudget only when a memory limit is set, never at top-level import.
|
|
55
|
+
memory = ["psutil>=5.9"]
|
|
56
|
+
# pyarrow accelerates native pandas materialization in string-heavy adapters
|
|
57
|
+
# (ADR-0005); the numpy+codes path does not require it.
|
|
58
|
+
pyarrow = ["pyarrow>=14.0"]
|
|
59
|
+
all = [
|
|
60
|
+
"catboost>=1.2", "lightgbm>=4.0", "xgboost>=2.0", "shap>=0.44",
|
|
61
|
+
"optuna>=3.4", "mlflow>=2.9", "onnx>=1.21", "onnxruntime>=1.23",
|
|
62
|
+
"onnxmltools>=1.16", "skl2onnx>=1.20", "matplotlib>=3.7", "pyarrow>=14.0", "psutil>=5.9",
|
|
63
|
+
]
|
|
64
|
+
dev = [
|
|
65
|
+
"ruff==0.15.16",
|
|
66
|
+
"mypy>=1.8",
|
|
67
|
+
"import-linter>=2.0",
|
|
68
|
+
"pytest>=7.4",
|
|
69
|
+
"hypothesis>=6.90",
|
|
70
|
+
"pre-commit>=3.5",
|
|
71
|
+
"mkdocs-material>=9.5",
|
|
72
|
+
# API reference is generated from docstrings (ADR-0077 §4) — prose would drift
|
|
73
|
+
"mkdocstrings[python]>=0.27",
|
|
74
|
+
# the release-engineering yaml-parse tests — explicit, not transitive
|
|
75
|
+
"pyyaml>=6",
|
|
76
|
+
# optuna powers the HPO adapter (ADR-0061); in dev so the suite exercises the HPO path
|
|
77
|
+
"optuna>=3.4",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
[project.urls]
|
|
81
|
+
Homepage = "https://github.com/sukhov-is/HonestML"
|
|
82
|
+
Documentation = "https://sukhov-is.github.io/HonestML/"
|
|
83
|
+
Repository = "https://github.com/sukhov-is/HonestML"
|
|
84
|
+
|
|
85
|
+
[tool.hatch.build.targets.wheel]
|
|
86
|
+
packages = ["src/honestml"]
|
|
87
|
+
|
|
88
|
+
# Ship only the package + release metadata in the sdist; keep tests/docs/notebooks/benchmarks out
|
|
89
|
+
# of the published tarball (the wheel already ships only `src/honestml`).
|
|
90
|
+
[tool.hatch.build.targets.sdist]
|
|
91
|
+
# leading-slash anchors to the repo root (gitignore semantics) so a bare README.md/CHANGELOG.md
|
|
92
|
+
# inside docs/tests/notebooks is not pulled in recursively.
|
|
93
|
+
include = ["src/honestml", "/README.md", "/CHANGELOG.md", "/LICENSE", "/pyproject.toml"]
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Ruff (lint + format)
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
[tool.ruff]
|
|
99
|
+
line-length = 100
|
|
100
|
+
target-version = "py310"
|
|
101
|
+
src = ["src", "tests"]
|
|
102
|
+
|
|
103
|
+
[tool.ruff.lint]
|
|
104
|
+
select = ["E", "F", "W", "I", "UP", "T20"]
|
|
105
|
+
# T20 (flake8-print): no `print` in library code (ADR-0003).
|
|
106
|
+
# E501 is left to the formatter (do not fail on legacy long lines during migration).
|
|
107
|
+
ignore = ["E501"]
|
|
108
|
+
|
|
109
|
+
[tool.ruff.lint.per-file-ignores]
|
|
110
|
+
"tests/**" = ["T20"]
|
|
111
|
+
"docs/**" = ["T20", "E", "F"]
|
|
112
|
+
# showcase notebooks print their results by design (like tests); F/E/I rules still apply.
|
|
113
|
+
"notebooks/**" = ["T20"]
|
|
114
|
+
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
# Mypy — strict on the domain core, lenient elsewhere (ADR-0004)
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
[tool.mypy]
|
|
119
|
+
# 3.12: numpy>=2.5 ships stubs using PEP 695 `type` statements that mypy parses only under a
|
|
120
|
+
# 3.12 target; runtime 3.10 support is enforced by the test matrix + ruff (UP, target py310).
|
|
121
|
+
python_version = "3.12"
|
|
122
|
+
warn_unused_configs = true
|
|
123
|
+
ignore_missing_imports = true
|
|
124
|
+
warn_redundant_casts = true
|
|
125
|
+
|
|
126
|
+
[[tool.mypy.overrides]]
|
|
127
|
+
module = "honestml.core.*"
|
|
128
|
+
disallow_untyped_defs = true
|
|
129
|
+
disallow_incomplete_defs = true
|
|
130
|
+
check_untyped_defs = true
|
|
131
|
+
no_implicit_optional = true
|
|
132
|
+
warn_return_any = true
|
|
133
|
+
strict_equality = true
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Pytest
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
[tool.pytest.ini_options]
|
|
139
|
+
testpaths = ["tests"]
|
|
140
|
+
markers = [
|
|
141
|
+
"unit: fast pure-core tests (default)",
|
|
142
|
+
"property: hypothesis property tests (leakage/monotonicity invariants)",
|
|
143
|
+
"golden: determinism of metrics on synthetic data with fixed seed",
|
|
144
|
+
"slow: full pipeline run on synthetic data",
|
|
145
|
+
]
|