alloygbm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. alloygbm-0.1.0/Cargo.lock +284 -0
  2. alloygbm-0.1.0/Cargo.toml +15 -0
  3. alloygbm-0.1.0/LICENSE +21 -0
  4. alloygbm-0.1.0/PKG-INFO +210 -0
  5. alloygbm-0.1.0/README.md +175 -0
  6. alloygbm-0.1.0/bindings/python/Cargo.toml +30 -0
  7. alloygbm-0.1.0/bindings/python/alloygbm/__init__.py +28 -0
  8. alloygbm-0.1.0/bindings/python/alloygbm/evaluation.py +190 -0
  9. alloygbm-0.1.0/bindings/python/alloygbm/regressor.py +1747 -0
  10. alloygbm-0.1.0/bindings/python/alloygbm/validation.py +150 -0
  11. alloygbm-0.1.0/bindings/python/src/lib.rs +1162 -0
  12. alloygbm-0.1.0/bindings/python/tests/test_evaluation_metrics.py +193 -0
  13. alloygbm-0.1.0/bindings/python/tests/test_native_runtime_integration.py +594 -0
  14. alloygbm-0.1.0/bindings/python/tests/test_regressor_contract.py +1181 -0
  15. alloygbm-0.1.0/bindings/python/tests/test_validation_splits.py +142 -0
  16. alloygbm-0.1.0/crates/backend_cpu/Cargo.toml +22 -0
  17. alloygbm-0.1.0/crates/backend_cpu/benches/histogram_kernels.rs +273 -0
  18. alloygbm-0.1.0/crates/backend_cpu/src/lib.rs +1397 -0
  19. alloygbm-0.1.0/crates/categorical/Cargo.toml +16 -0
  20. alloygbm-0.1.0/crates/categorical/src/lib.rs +495 -0
  21. alloygbm-0.1.0/crates/core/Cargo.toml +13 -0
  22. alloygbm-0.1.0/crates/core/src/lib.rs +1909 -0
  23. alloygbm-0.1.0/crates/engine/Cargo.toml +17 -0
  24. alloygbm-0.1.0/crates/engine/src/lib.rs +4452 -0
  25. alloygbm-0.1.0/crates/predictor/Cargo.toml +21 -0
  26. alloygbm-0.1.0/crates/predictor/src/lib.rs +738 -0
  27. alloygbm-0.1.0/crates/shap/Cargo.toml +20 -0
  28. alloygbm-0.1.0/crates/shap/src/lib.rs +836 -0
  29. alloygbm-0.1.0/pyproject.toml +54 -0
@@ -0,0 +1,284 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "alloygbm-backend-cpu"
7
+ version = "0.1.0"
8
+ dependencies = [
9
+ "alloygbm-core",
10
+ "alloygbm-engine",
11
+ "rayon",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "alloygbm-categorical"
16
+ version = "0.1.0"
17
+ dependencies = [
18
+ "alloygbm-core",
19
+ ]
20
+
21
+ [[package]]
22
+ name = "alloygbm-core"
23
+ version = "0.1.0"
24
+
25
+ [[package]]
26
+ name = "alloygbm-engine"
27
+ version = "0.1.0"
28
+ dependencies = [
29
+ "alloygbm-categorical",
30
+ "alloygbm-core",
31
+ ]
32
+
33
+ [[package]]
34
+ name = "alloygbm-predictor"
35
+ version = "0.1.0"
36
+ dependencies = [
37
+ "alloygbm-backend-cpu",
38
+ "alloygbm-core",
39
+ "alloygbm-engine",
40
+ "rayon",
41
+ ]
42
+
43
+ [[package]]
44
+ name = "alloygbm-python"
45
+ version = "0.1.0"
46
+ dependencies = [
47
+ "alloygbm-backend-cpu",
48
+ "alloygbm-categorical",
49
+ "alloygbm-core",
50
+ "alloygbm-engine",
51
+ "alloygbm-predictor",
52
+ "alloygbm-shap",
53
+ "pyo3",
54
+ ]
55
+
56
+ [[package]]
57
+ name = "alloygbm-shap"
58
+ version = "0.1.0"
59
+ dependencies = [
60
+ "alloygbm-core",
61
+ "alloygbm-engine",
62
+ "alloygbm-predictor",
63
+ ]
64
+
65
+ [[package]]
66
+ name = "autocfg"
67
+ version = "1.5.0"
68
+ source = "registry+https://github.com/rust-lang/crates.io-index"
69
+ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
70
+
71
+ [[package]]
72
+ name = "cfg-if"
73
+ version = "1.0.4"
74
+ source = "registry+https://github.com/rust-lang/crates.io-index"
75
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
76
+
77
+ [[package]]
78
+ name = "crossbeam-deque"
79
+ version = "0.8.6"
80
+ source = "registry+https://github.com/rust-lang/crates.io-index"
81
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
82
+ dependencies = [
83
+ "crossbeam-epoch",
84
+ "crossbeam-utils",
85
+ ]
86
+
87
+ [[package]]
88
+ name = "crossbeam-epoch"
89
+ version = "0.9.18"
90
+ source = "registry+https://github.com/rust-lang/crates.io-index"
91
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
92
+ dependencies = [
93
+ "crossbeam-utils",
94
+ ]
95
+
96
+ [[package]]
97
+ name = "crossbeam-utils"
98
+ version = "0.8.21"
99
+ source = "registry+https://github.com/rust-lang/crates.io-index"
100
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
101
+
102
+ [[package]]
103
+ name = "either"
104
+ version = "1.15.0"
105
+ source = "registry+https://github.com/rust-lang/crates.io-index"
106
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
107
+
108
+ [[package]]
109
+ name = "heck"
110
+ version = "0.5.0"
111
+ source = "registry+https://github.com/rust-lang/crates.io-index"
112
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
113
+
114
+ [[package]]
115
+ name = "indoc"
116
+ version = "2.0.7"
117
+ source = "registry+https://github.com/rust-lang/crates.io-index"
118
+ checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
119
+ dependencies = [
120
+ "rustversion",
121
+ ]
122
+
123
+ [[package]]
124
+ name = "libc"
125
+ version = "0.2.183"
126
+ source = "registry+https://github.com/rust-lang/crates.io-index"
127
+ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
128
+
129
+ [[package]]
130
+ name = "memoffset"
131
+ version = "0.9.1"
132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
133
+ checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
134
+ dependencies = [
135
+ "autocfg",
136
+ ]
137
+
138
+ [[package]]
139
+ name = "once_cell"
140
+ version = "1.21.4"
141
+ source = "registry+https://github.com/rust-lang/crates.io-index"
142
+ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
143
+
144
+ [[package]]
145
+ name = "portable-atomic"
146
+ version = "1.13.1"
147
+ source = "registry+https://github.com/rust-lang/crates.io-index"
148
+ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
149
+
150
+ [[package]]
151
+ name = "proc-macro2"
152
+ version = "1.0.106"
153
+ source = "registry+https://github.com/rust-lang/crates.io-index"
154
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
155
+ dependencies = [
156
+ "unicode-ident",
157
+ ]
158
+
159
+ [[package]]
160
+ name = "pyo3"
161
+ version = "0.23.5"
162
+ source = "registry+https://github.com/rust-lang/crates.io-index"
163
+ checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
164
+ dependencies = [
165
+ "cfg-if",
166
+ "indoc",
167
+ "libc",
168
+ "memoffset",
169
+ "once_cell",
170
+ "portable-atomic",
171
+ "pyo3-build-config",
172
+ "pyo3-ffi",
173
+ "pyo3-macros",
174
+ "unindent",
175
+ ]
176
+
177
+ [[package]]
178
+ name = "pyo3-build-config"
179
+ version = "0.23.5"
180
+ source = "registry+https://github.com/rust-lang/crates.io-index"
181
+ checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
182
+ dependencies = [
183
+ "once_cell",
184
+ "target-lexicon",
185
+ ]
186
+
187
+ [[package]]
188
+ name = "pyo3-ffi"
189
+ version = "0.23.5"
190
+ source = "registry+https://github.com/rust-lang/crates.io-index"
191
+ checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
192
+ dependencies = [
193
+ "libc",
194
+ "pyo3-build-config",
195
+ ]
196
+
197
+ [[package]]
198
+ name = "pyo3-macros"
199
+ version = "0.23.5"
200
+ source = "registry+https://github.com/rust-lang/crates.io-index"
201
+ checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
202
+ dependencies = [
203
+ "proc-macro2",
204
+ "pyo3-macros-backend",
205
+ "quote",
206
+ "syn",
207
+ ]
208
+
209
+ [[package]]
210
+ name = "pyo3-macros-backend"
211
+ version = "0.23.5"
212
+ source = "registry+https://github.com/rust-lang/crates.io-index"
213
+ checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
214
+ dependencies = [
215
+ "heck",
216
+ "proc-macro2",
217
+ "pyo3-build-config",
218
+ "quote",
219
+ "syn",
220
+ ]
221
+
222
+ [[package]]
223
+ name = "quote"
224
+ version = "1.0.45"
225
+ source = "registry+https://github.com/rust-lang/crates.io-index"
226
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
227
+ dependencies = [
228
+ "proc-macro2",
229
+ ]
230
+
231
+ [[package]]
232
+ name = "rayon"
233
+ version = "1.11.0"
234
+ source = "registry+https://github.com/rust-lang/crates.io-index"
235
+ checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
236
+ dependencies = [
237
+ "either",
238
+ "rayon-core",
239
+ ]
240
+
241
+ [[package]]
242
+ name = "rayon-core"
243
+ version = "1.13.0"
244
+ source = "registry+https://github.com/rust-lang/crates.io-index"
245
+ checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
246
+ dependencies = [
247
+ "crossbeam-deque",
248
+ "crossbeam-utils",
249
+ ]
250
+
251
+ [[package]]
252
+ name = "rustversion"
253
+ version = "1.0.22"
254
+ source = "registry+https://github.com/rust-lang/crates.io-index"
255
+ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
256
+
257
+ [[package]]
258
+ name = "syn"
259
+ version = "2.0.117"
260
+ source = "registry+https://github.com/rust-lang/crates.io-index"
261
+ checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
262
+ dependencies = [
263
+ "proc-macro2",
264
+ "quote",
265
+ "unicode-ident",
266
+ ]
267
+
268
+ [[package]]
269
+ name = "target-lexicon"
270
+ version = "0.12.16"
271
+ source = "registry+https://github.com/rust-lang/crates.io-index"
272
+ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
273
+
274
+ [[package]]
275
+ name = "unicode-ident"
276
+ version = "1.0.24"
277
+ source = "registry+https://github.com/rust-lang/crates.io-index"
278
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
279
+
280
+ [[package]]
281
+ name = "unindent"
282
+ version = "0.2.4"
283
+ source = "registry+https://github.com/rust-lang/crates.io-index"
284
+ checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
@@ -0,0 +1,15 @@
1
+ [workspace]
2
+ members = ["crates/core", "crates/engine", "crates/backend_cpu", "crates/predictor", "crates/shap", "crates/categorical", "bindings/python"]
3
+ resolver = "2"
4
+
5
+ [workspace.package]
6
+ version = "0.1.0"
7
+ edition = "2024"
8
+ license = "MIT"
9
+ rust-version = "1.92.0"
10
+ homepage = "https://github.com/LGA-Personal/AlloyGBM"
11
+ repository = "https://github.com/LGA-Personal/AlloyGBM"
12
+ documentation = "https://alloygbm.readthedocs.io/en/latest/"
13
+
14
+ [workspace.lints.rust]
15
+ unsafe_code = "forbid"
alloygbm-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Logan Ashby
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,210 @@
1
+ Metadata-Version: 2.4
2
+ Name: alloygbm
3
+ Version: 0.1.0
4
+ Classifier: Development Status :: 3 - Alpha
5
+ Classifier: Intended Audience :: Developers
6
+ Classifier: Intended Audience :: Financial and Insurance Industry
7
+ Classifier: Intended Audience :: Science/Research
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: MacOS
10
+ Classifier: Operating System :: POSIX :: Linux
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Rust
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ License-File: LICENSE
22
+ Summary: Rust-first gradient boosting for structured regression with time-aware validation utilities and Python bindings
23
+ Keywords: gradient boosting,gbdt,machine learning,tabular,time series,finance
24
+ Home-Page: https://github.com/LGA-Personal/AlloyGBM
25
+ Author-email: Logan Ashby <ashbylogan12@gmail.com>
26
+ License: MIT
27
+ Requires-Python: >=3.10
28
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
29
+ Project-URL: Benchmarks, https://github.com/LGA-Personal/AlloyGBM/tree/main/benchmarks
30
+ Project-URL: Documentation, https://alloygbm.readthedocs.io/en/latest/
31
+ Project-URL: Homepage, https://github.com/LGA-Personal/AlloyGBM
32
+ Project-URL: Issues, https://github.com/LGA-Personal/AlloyGBM/issues
33
+ Project-URL: Repository, https://github.com/LGA-Personal/AlloyGBM
34
+
35
+ # AlloyGBM
36
+
37
+ AlloyGBM is a Rust-first gradient boosting library for structured regression, with a Python API focused on fast native execution, deterministic training, and time-aware tabular workflows.
38
+
39
+ It is currently strongest on panel and finance-style regression problems where leakage-aware validation and practical iteration speed matter. It also includes native artifact prediction, SHAP explanations, and purged time-series split helpers in the Python package.
40
+
41
+ ## When To Use AlloyGBM
42
+
43
+ AlloyGBM is a good fit when you want:
44
+
45
+ - a native-backed gradient boosting regressor with a small Python API surface
46
+ - deterministic CPU training and inference
47
+ - time-aware validation helpers for forecasting or panel-style workflows
48
+ - native prediction from serialized artifacts
49
+ - SHAP-based local explanations and global feature importances
50
+
51
+ If you need the broadest possible objective support, classification, ranking, multiple categorical columns, or the strongest out-of-the-box results on generic tabular benchmarks, you should still expect XGBoost, LightGBM, or CatBoost to be stronger today.
52
+
53
+ ## Installation
54
+
55
+ PyPI:
56
+
57
+ ```bash
58
+ pip install alloygbm
59
+ ```
60
+
61
+ From source:
62
+
63
+ ```bash
64
+ python -m pip install --upgrade maturin
65
+ maturin develop --manifest-path bindings/python/Cargo.toml --release
66
+ ```
67
+
68
+ AlloyGBM currently targets Python `3.10+` and uses a native Rust extension module.
69
+
70
+ Initial `0.1.0` packaging policy:
71
+
72
+ - tested directly on macOS Apple Silicon
73
+ - planned wheel targets: macOS `arm64` and Linux `x86_64`
74
+ - Windows support is deferred until after `0.1.0`
75
+ - source distribution remains the fallback for unsupported environments
76
+
77
+ ## Minimal Example
78
+
79
+ ```python
80
+ from alloygbm import GBMRegressor, rmse
81
+
82
+ X_train = [
83
+ [0.0, 1.0],
84
+ [1.0, 0.0],
85
+ [2.0, 1.0],
86
+ [3.0, 0.0],
87
+ ]
88
+ y_train = [0.2, 0.9, 1.8, 2.7]
89
+
90
+ X_test = [
91
+ [1.5, 1.0],
92
+ [2.5, 0.0],
93
+ ]
94
+ y_test = [1.3, 2.3]
95
+
96
+ model = GBMRegressor(
97
+ learning_rate=0.05,
98
+ max_depth=6,
99
+ n_estimators=1200,
100
+ training_policy="auto",
101
+ deterministic=True,
102
+ seed=7,
103
+ )
104
+ model.fit(X_train, y_train)
105
+
106
+ predictions = model.predict(X_test)
107
+ print(predictions)
108
+ print(rmse(y_test, predictions))
109
+ ```
110
+
111
+ ## Time-Aware Validation Example
112
+
113
+ ```python
114
+ from alloygbm import GBMRegressor, purged_time_series_splits, rmse
115
+
116
+ rows = [
117
+ [0.1, 1.0],
118
+ [0.2, 1.1],
119
+ [0.4, 0.9],
120
+ [0.6, 1.2],
121
+ [0.8, 1.3],
122
+ [1.0, 1.4],
123
+ ]
124
+ targets = [0.0, 0.1, 0.2, 0.5, 0.8, 1.0]
125
+ time_index = [0, 0, 1, 1, 2, 2]
126
+
127
+ splits = purged_time_series_splits(
128
+ time_index,
129
+ n_splits=3,
130
+ purge_gap=0,
131
+ embargo=0,
132
+ )
133
+
134
+ fold_scores = []
135
+ for train_idx, test_idx in splits:
136
+ model = GBMRegressor(
137
+ learning_rate=0.05,
138
+ max_depth=6,
139
+ n_estimators=400,
140
+ deterministic=True,
141
+ seed=7,
142
+ )
143
+ X_train = [rows[i] for i in train_idx]
144
+ y_train = [targets[i] for i in train_idx]
145
+ X_test = [rows[i] for i in test_idx]
146
+ y_test = [targets[i] for i in test_idx]
147
+
148
+ model.fit(X_train, y_train)
149
+ fold_scores.append(rmse(y_test, model.predict(X_test)))
150
+
151
+ print(fold_scores)
152
+ ```
153
+
154
+ For panel data, use `purged_panel_splits(...)`.
155
+
156
+ ## Feature Summary
157
+
158
+ - Native Rust-backed training and prediction from Python
159
+ - `GBMRegressor` with deterministic training controls and dataset-aware `training_policy`
160
+ - Continuous-feature binning strategies: `linear`, `rank`, `quantile`
161
+ - Optional single-column categorical encoding path
162
+ - Artifact-backed prediction via `predict_from_artifact(...)`
163
+ - SHAP row explanations via `shap_values(...)`
164
+ - SHAP global feature importance via `feature_importances(...)`
165
+ - Time-aware validation helpers:
166
+ - `purged_time_series_splits(...)`
167
+ - `purged_panel_splits(...)`
168
+ - Metric helpers:
169
+ - `rmse`, `mae`, `r2_score`
170
+ - `pearson_correlation`, `rank_ic`, `hit_rate`, `icir`
171
+
172
+ ## Benchmark Snapshot
173
+
174
+ The current public benchmark suite compares AlloyGBM against XGBoost, LightGBM, and CatBoost on synthetic and real regression datasets.
175
+
176
+ Current headline results from the expanded suite:
177
+
178
+ - AlloyGBM is best on the `panel_time_series` benchmark across the tested profiles.
179
+ - AlloyGBM is strong on `dow_jones_financial`, with its best showing under the deeper low-learning-rate profile.
180
+ - AlloyGBM is competitive on `dense_numeric`, but still trails XGBoost and CatBoost on RMSE.
181
+ - AlloyGBM currently lags all three libraries on `california_housing` and `bike_sharing`.
182
+ - LightGBM is usually the fastest trainer in the comparison set.
183
+
184
+ The honest short version is:
185
+
186
+ - strong on `panel_time_series`
187
+ - strong on `dow_jones_financial`
188
+ - weaker on `california_housing` and `bike_sharing`
189
+
190
+ Benchmark tooling and methodology live in [benchmarks/README.md](benchmarks/README.md).
191
+
192
+ ## Current Limitations
193
+
194
+ - Regression-only. Classification and ranking are not implemented yet.
195
+ - CPU-only runtime today.
196
+ - Single categorical feature support only.
197
+ - Best performance is still concentrated in time-aware and finance-style structured regression, not broad tabular dominance.
198
+ - The API is intentionally small and still evolving toward a more complete `0.x` user-facing surface.
199
+
200
+ ## Documentation
201
+
202
+ - Docs index: [docs/README.md](docs/README.md)
203
+ - Benchmark guide: [benchmarks/README.md](benchmarks/README.md)
204
+ - Current roadmap: [docs/roadmap/current.md](docs/roadmap/current.md)
205
+ - Archive: [docs/archive/README.md](docs/archive/README.md)
206
+
207
+ ## License
208
+
209
+ MIT. See [LICENSE](LICENSE).
210
+
@@ -0,0 +1,175 @@
1
+ # AlloyGBM
2
+
3
+ AlloyGBM is a Rust-first gradient boosting library for structured regression, with a Python API focused on fast native execution, deterministic training, and time-aware tabular workflows.
4
+
5
+ It is currently strongest on panel and finance-style regression problems where leakage-aware validation and practical iteration speed matter. It also includes native artifact prediction, SHAP explanations, and purged time-series split helpers in the Python package.
6
+
7
+ ## When To Use AlloyGBM
8
+
9
+ AlloyGBM is a good fit when you want:
10
+
11
+ - a native-backed gradient boosting regressor with a small Python API surface
12
+ - deterministic CPU training and inference
13
+ - time-aware validation helpers for forecasting or panel-style workflows
14
+ - native prediction from serialized artifacts
15
+ - SHAP-based local explanations and global feature importances
16
+
17
+ If you need the broadest possible objective support, classification, ranking, multiple categorical columns, or the strongest out-of-the-box results on generic tabular benchmarks, you should still expect XGBoost, LightGBM, or CatBoost to be stronger today.
18
+
19
+ ## Installation
20
+
21
+ PyPI:
22
+
23
+ ```bash
24
+ pip install alloygbm
25
+ ```
26
+
27
+ From source:
28
+
29
+ ```bash
30
+ python -m pip install --upgrade maturin
31
+ maturin develop --manifest-path bindings/python/Cargo.toml --release
32
+ ```
33
+
34
+ AlloyGBM currently targets Python `3.10+` and uses a native Rust extension module.
35
+
36
+ Initial `0.1.0` packaging policy:
37
+
38
+ - tested directly on macOS Apple Silicon
39
+ - planned wheel targets: macOS `arm64` and Linux `x86_64`
40
+ - Windows support is deferred until after `0.1.0`
41
+ - source distribution remains the fallback for unsupported environments
42
+
43
+ ## Minimal Example
44
+
45
+ ```python
46
+ from alloygbm import GBMRegressor, rmse
47
+
48
+ X_train = [
49
+ [0.0, 1.0],
50
+ [1.0, 0.0],
51
+ [2.0, 1.0],
52
+ [3.0, 0.0],
53
+ ]
54
+ y_train = [0.2, 0.9, 1.8, 2.7]
55
+
56
+ X_test = [
57
+ [1.5, 1.0],
58
+ [2.5, 0.0],
59
+ ]
60
+ y_test = [1.3, 2.3]
61
+
62
+ model = GBMRegressor(
63
+ learning_rate=0.05,
64
+ max_depth=6,
65
+ n_estimators=1200,
66
+ training_policy="auto",
67
+ deterministic=True,
68
+ seed=7,
69
+ )
70
+ model.fit(X_train, y_train)
71
+
72
+ predictions = model.predict(X_test)
73
+ print(predictions)
74
+ print(rmse(y_test, predictions))
75
+ ```
76
+
77
+ ## Time-Aware Validation Example
78
+
79
+ ```python
80
+ from alloygbm import GBMRegressor, purged_time_series_splits, rmse
81
+
82
+ rows = [
83
+ [0.1, 1.0],
84
+ [0.2, 1.1],
85
+ [0.4, 0.9],
86
+ [0.6, 1.2],
87
+ [0.8, 1.3],
88
+ [1.0, 1.4],
89
+ ]
90
+ targets = [0.0, 0.1, 0.2, 0.5, 0.8, 1.0]
91
+ time_index = [0, 0, 1, 1, 2, 2]
92
+
93
+ splits = purged_time_series_splits(
94
+ time_index,
95
+ n_splits=3,
96
+ purge_gap=0,
97
+ embargo=0,
98
+ )
99
+
100
+ fold_scores = []
101
+ for train_idx, test_idx in splits:
102
+ model = GBMRegressor(
103
+ learning_rate=0.05,
104
+ max_depth=6,
105
+ n_estimators=400,
106
+ deterministic=True,
107
+ seed=7,
108
+ )
109
+ X_train = [rows[i] for i in train_idx]
110
+ y_train = [targets[i] for i in train_idx]
111
+ X_test = [rows[i] for i in test_idx]
112
+ y_test = [targets[i] for i in test_idx]
113
+
114
+ model.fit(X_train, y_train)
115
+ fold_scores.append(rmse(y_test, model.predict(X_test)))
116
+
117
+ print(fold_scores)
118
+ ```
119
+
120
+ For panel data, use `purged_panel_splits(...)`.
121
+
122
+ ## Feature Summary
123
+
124
+ - Native Rust-backed training and prediction from Python
125
+ - `GBMRegressor` with deterministic training controls and dataset-aware `training_policy`
126
+ - Continuous-feature binning strategies: `linear`, `rank`, `quantile`
127
+ - Optional single-column categorical encoding path
128
+ - Artifact-backed prediction via `predict_from_artifact(...)`
129
+ - SHAP row explanations via `shap_values(...)`
130
+ - SHAP global feature importance via `feature_importances(...)`
131
+ - Time-aware validation helpers:
132
+ - `purged_time_series_splits(...)`
133
+ - `purged_panel_splits(...)`
134
+ - Metric helpers:
135
+ - `rmse`, `mae`, `r2_score`
136
+ - `pearson_correlation`, `rank_ic`, `hit_rate`, `icir`
137
+
138
+ ## Benchmark Snapshot
139
+
140
+ The current public benchmark suite compares AlloyGBM against XGBoost, LightGBM, and CatBoost on synthetic and real regression datasets.
141
+
142
+ Current headline results from the expanded suite:
143
+
144
+ - AlloyGBM is best on the `panel_time_series` benchmark across the tested profiles.
145
+ - AlloyGBM is strong on `dow_jones_financial`, with its best showing under the deeper low-learning-rate profile.
146
+ - AlloyGBM is competitive on `dense_numeric`, but still trails XGBoost and CatBoost on RMSE.
147
+ - AlloyGBM currently lags all three libraries on `california_housing` and `bike_sharing`.
148
+ - LightGBM is usually the fastest trainer in the comparison set.
149
+
150
+ The honest short version is:
151
+
152
+ - strong on `panel_time_series`
153
+ - strong on `dow_jones_financial`
154
+ - weaker on `california_housing` and `bike_sharing`
155
+
156
+ Benchmark tooling and methodology live in [benchmarks/README.md](benchmarks/README.md).
157
+
158
+ ## Current Limitations
159
+
160
+ - Regression-only. Classification and ranking are not implemented yet.
161
+ - CPU-only runtime today.
162
+ - Single categorical feature support only.
163
+ - Best performance is still concentrated in time-aware and finance-style structured regression, not broad tabular dominance.
164
+ - The API is intentionally small and still evolving toward a more complete `0.x` user-facing surface.
165
+
166
+ ## Documentation
167
+
168
+ - Docs index: [docs/README.md](docs/README.md)
169
+ - Benchmark guide: [benchmarks/README.md](benchmarks/README.md)
170
+ - Current roadmap: [docs/roadmap/current.md](docs/roadmap/current.md)
171
+ - Archive: [docs/archive/README.md](docs/archive/README.md)
172
+
173
+ ## License
174
+
175
+ MIT. See [LICENSE](LICENSE).