alloygbm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alloygbm-0.1.0/Cargo.lock +284 -0
- alloygbm-0.1.0/Cargo.toml +15 -0
- alloygbm-0.1.0/LICENSE +21 -0
- alloygbm-0.1.0/PKG-INFO +210 -0
- alloygbm-0.1.0/README.md +175 -0
- alloygbm-0.1.0/bindings/python/Cargo.toml +30 -0
- alloygbm-0.1.0/bindings/python/alloygbm/__init__.py +28 -0
- alloygbm-0.1.0/bindings/python/alloygbm/evaluation.py +190 -0
- alloygbm-0.1.0/bindings/python/alloygbm/regressor.py +1747 -0
- alloygbm-0.1.0/bindings/python/alloygbm/validation.py +150 -0
- alloygbm-0.1.0/bindings/python/src/lib.rs +1162 -0
- alloygbm-0.1.0/bindings/python/tests/test_evaluation_metrics.py +193 -0
- alloygbm-0.1.0/bindings/python/tests/test_native_runtime_integration.py +594 -0
- alloygbm-0.1.0/bindings/python/tests/test_regressor_contract.py +1181 -0
- alloygbm-0.1.0/bindings/python/tests/test_validation_splits.py +142 -0
- alloygbm-0.1.0/crates/backend_cpu/Cargo.toml +22 -0
- alloygbm-0.1.0/crates/backend_cpu/benches/histogram_kernels.rs +273 -0
- alloygbm-0.1.0/crates/backend_cpu/src/lib.rs +1397 -0
- alloygbm-0.1.0/crates/categorical/Cargo.toml +16 -0
- alloygbm-0.1.0/crates/categorical/src/lib.rs +495 -0
- alloygbm-0.1.0/crates/core/Cargo.toml +13 -0
- alloygbm-0.1.0/crates/core/src/lib.rs +1909 -0
- alloygbm-0.1.0/crates/engine/Cargo.toml +17 -0
- alloygbm-0.1.0/crates/engine/src/lib.rs +4452 -0
- alloygbm-0.1.0/crates/predictor/Cargo.toml +21 -0
- alloygbm-0.1.0/crates/predictor/src/lib.rs +738 -0
- alloygbm-0.1.0/crates/shap/Cargo.toml +20 -0
- alloygbm-0.1.0/crates/shap/src/lib.rs +836 -0
- alloygbm-0.1.0/pyproject.toml +54 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "alloygbm-backend-cpu"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"alloygbm-core",
|
|
10
|
+
"alloygbm-engine",
|
|
11
|
+
"rayon",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[[package]]
|
|
15
|
+
name = "alloygbm-categorical"
|
|
16
|
+
version = "0.1.0"
|
|
17
|
+
dependencies = [
|
|
18
|
+
"alloygbm-core",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[[package]]
|
|
22
|
+
name = "alloygbm-core"
|
|
23
|
+
version = "0.1.0"
|
|
24
|
+
|
|
25
|
+
[[package]]
|
|
26
|
+
name = "alloygbm-engine"
|
|
27
|
+
version = "0.1.0"
|
|
28
|
+
dependencies = [
|
|
29
|
+
"alloygbm-categorical",
|
|
30
|
+
"alloygbm-core",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[[package]]
|
|
34
|
+
name = "alloygbm-predictor"
|
|
35
|
+
version = "0.1.0"
|
|
36
|
+
dependencies = [
|
|
37
|
+
"alloygbm-backend-cpu",
|
|
38
|
+
"alloygbm-core",
|
|
39
|
+
"alloygbm-engine",
|
|
40
|
+
"rayon",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[[package]]
|
|
44
|
+
name = "alloygbm-python"
|
|
45
|
+
version = "0.1.0"
|
|
46
|
+
dependencies = [
|
|
47
|
+
"alloygbm-backend-cpu",
|
|
48
|
+
"alloygbm-categorical",
|
|
49
|
+
"alloygbm-core",
|
|
50
|
+
"alloygbm-engine",
|
|
51
|
+
"alloygbm-predictor",
|
|
52
|
+
"alloygbm-shap",
|
|
53
|
+
"pyo3",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[[package]]
|
|
57
|
+
name = "alloygbm-shap"
|
|
58
|
+
version = "0.1.0"
|
|
59
|
+
dependencies = [
|
|
60
|
+
"alloygbm-core",
|
|
61
|
+
"alloygbm-engine",
|
|
62
|
+
"alloygbm-predictor",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
[[package]]
|
|
66
|
+
name = "autocfg"
|
|
67
|
+
version = "1.5.0"
|
|
68
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
69
|
+
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|
70
|
+
|
|
71
|
+
[[package]]
|
|
72
|
+
name = "cfg-if"
|
|
73
|
+
version = "1.0.4"
|
|
74
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
75
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
76
|
+
|
|
77
|
+
[[package]]
|
|
78
|
+
name = "crossbeam-deque"
|
|
79
|
+
version = "0.8.6"
|
|
80
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
81
|
+
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
|
82
|
+
dependencies = [
|
|
83
|
+
"crossbeam-epoch",
|
|
84
|
+
"crossbeam-utils",
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
[[package]]
|
|
88
|
+
name = "crossbeam-epoch"
|
|
89
|
+
version = "0.9.18"
|
|
90
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
91
|
+
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
|
92
|
+
dependencies = [
|
|
93
|
+
"crossbeam-utils",
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
[[package]]
|
|
97
|
+
name = "crossbeam-utils"
|
|
98
|
+
version = "0.8.21"
|
|
99
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
100
|
+
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
|
101
|
+
|
|
102
|
+
[[package]]
|
|
103
|
+
name = "either"
|
|
104
|
+
version = "1.15.0"
|
|
105
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
106
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
107
|
+
|
|
108
|
+
[[package]]
|
|
109
|
+
name = "heck"
|
|
110
|
+
version = "0.5.0"
|
|
111
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
112
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
113
|
+
|
|
114
|
+
[[package]]
|
|
115
|
+
name = "indoc"
|
|
116
|
+
version = "2.0.7"
|
|
117
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
118
|
+
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
|
119
|
+
dependencies = [
|
|
120
|
+
"rustversion",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
[[package]]
|
|
124
|
+
name = "libc"
|
|
125
|
+
version = "0.2.183"
|
|
126
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
127
|
+
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
|
128
|
+
|
|
129
|
+
[[package]]
|
|
130
|
+
name = "memoffset"
|
|
131
|
+
version = "0.9.1"
|
|
132
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
133
|
+
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
|
134
|
+
dependencies = [
|
|
135
|
+
"autocfg",
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
[[package]]
|
|
139
|
+
name = "once_cell"
|
|
140
|
+
version = "1.21.4"
|
|
141
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
142
|
+
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
|
|
143
|
+
|
|
144
|
+
[[package]]
|
|
145
|
+
name = "portable-atomic"
|
|
146
|
+
version = "1.13.1"
|
|
147
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
148
|
+
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
149
|
+
|
|
150
|
+
[[package]]
|
|
151
|
+
name = "proc-macro2"
|
|
152
|
+
version = "1.0.106"
|
|
153
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
154
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
155
|
+
dependencies = [
|
|
156
|
+
"unicode-ident",
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
[[package]]
|
|
160
|
+
name = "pyo3"
|
|
161
|
+
version = "0.23.5"
|
|
162
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
163
|
+
checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
|
|
164
|
+
dependencies = [
|
|
165
|
+
"cfg-if",
|
|
166
|
+
"indoc",
|
|
167
|
+
"libc",
|
|
168
|
+
"memoffset",
|
|
169
|
+
"once_cell",
|
|
170
|
+
"portable-atomic",
|
|
171
|
+
"pyo3-build-config",
|
|
172
|
+
"pyo3-ffi",
|
|
173
|
+
"pyo3-macros",
|
|
174
|
+
"unindent",
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
[[package]]
|
|
178
|
+
name = "pyo3-build-config"
|
|
179
|
+
version = "0.23.5"
|
|
180
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
181
|
+
checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
|
|
182
|
+
dependencies = [
|
|
183
|
+
"once_cell",
|
|
184
|
+
"target-lexicon",
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
[[package]]
|
|
188
|
+
name = "pyo3-ffi"
|
|
189
|
+
version = "0.23.5"
|
|
190
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
191
|
+
checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
|
|
192
|
+
dependencies = [
|
|
193
|
+
"libc",
|
|
194
|
+
"pyo3-build-config",
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
[[package]]
|
|
198
|
+
name = "pyo3-macros"
|
|
199
|
+
version = "0.23.5"
|
|
200
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
201
|
+
checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
|
|
202
|
+
dependencies = [
|
|
203
|
+
"proc-macro2",
|
|
204
|
+
"pyo3-macros-backend",
|
|
205
|
+
"quote",
|
|
206
|
+
"syn",
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
[[package]]
|
|
210
|
+
name = "pyo3-macros-backend"
|
|
211
|
+
version = "0.23.5"
|
|
212
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
213
|
+
checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
|
|
214
|
+
dependencies = [
|
|
215
|
+
"heck",
|
|
216
|
+
"proc-macro2",
|
|
217
|
+
"pyo3-build-config",
|
|
218
|
+
"quote",
|
|
219
|
+
"syn",
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
[[package]]
|
|
223
|
+
name = "quote"
|
|
224
|
+
version = "1.0.45"
|
|
225
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
226
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
227
|
+
dependencies = [
|
|
228
|
+
"proc-macro2",
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
[[package]]
|
|
232
|
+
name = "rayon"
|
|
233
|
+
version = "1.11.0"
|
|
234
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
235
|
+
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
|
|
236
|
+
dependencies = [
|
|
237
|
+
"either",
|
|
238
|
+
"rayon-core",
|
|
239
|
+
]
|
|
240
|
+
|
|
241
|
+
[[package]]
|
|
242
|
+
name = "rayon-core"
|
|
243
|
+
version = "1.13.0"
|
|
244
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
245
|
+
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
|
|
246
|
+
dependencies = [
|
|
247
|
+
"crossbeam-deque",
|
|
248
|
+
"crossbeam-utils",
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
[[package]]
|
|
252
|
+
name = "rustversion"
|
|
253
|
+
version = "1.0.22"
|
|
254
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
255
|
+
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
|
256
|
+
|
|
257
|
+
[[package]]
|
|
258
|
+
name = "syn"
|
|
259
|
+
version = "2.0.117"
|
|
260
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
261
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
262
|
+
dependencies = [
|
|
263
|
+
"proc-macro2",
|
|
264
|
+
"quote",
|
|
265
|
+
"unicode-ident",
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
[[package]]
|
|
269
|
+
name = "target-lexicon"
|
|
270
|
+
version = "0.12.16"
|
|
271
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
272
|
+
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
|
|
273
|
+
|
|
274
|
+
[[package]]
|
|
275
|
+
name = "unicode-ident"
|
|
276
|
+
version = "1.0.24"
|
|
277
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
278
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
279
|
+
|
|
280
|
+
[[package]]
|
|
281
|
+
name = "unindent"
|
|
282
|
+
version = "0.2.4"
|
|
283
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
284
|
+
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
[workspace]
|
|
2
|
+
members = ["crates/core", "crates/engine", "crates/backend_cpu", "crates/predictor", "crates/shap", "crates/categorical", "bindings/python"]
|
|
3
|
+
resolver = "2"
|
|
4
|
+
|
|
5
|
+
[workspace.package]
|
|
6
|
+
version = "0.1.0"
|
|
7
|
+
edition = "2024"
|
|
8
|
+
license = "MIT"
|
|
9
|
+
rust-version = "1.92.0"
|
|
10
|
+
homepage = "https://github.com/LGA-Personal/AlloyGBM"
|
|
11
|
+
repository = "https://github.com/LGA-Personal/AlloyGBM"
|
|
12
|
+
documentation = "https://alloygbm.readthedocs.io/en/latest/"
|
|
13
|
+
|
|
14
|
+
[workspace.lints.rust]
|
|
15
|
+
unsafe_code = "forbid"
|
alloygbm-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Logan Ashby
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
alloygbm-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: alloygbm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: Development Status :: 3 - Alpha
|
|
5
|
+
Classifier: Intended Audience :: Developers
|
|
6
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
7
|
+
Classifier: Intended Audience :: Science/Research
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: MacOS
|
|
10
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Rust
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Summary: Rust-first gradient boosting for structured regression with time-aware validation utilities and Python bindings
|
|
23
|
+
Keywords: gradient boosting,gbdt,machine learning,tabular,time series,finance
|
|
24
|
+
Home-Page: https://github.com/LGA-Personal/AlloyGBM
|
|
25
|
+
Author-email: Logan Ashby <ashbylogan12@gmail.com>
|
|
26
|
+
License: MIT
|
|
27
|
+
Requires-Python: >=3.10
|
|
28
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
29
|
+
Project-URL: Benchmarks, https://github.com/LGA-Personal/AlloyGBM/tree/main/benchmarks
|
|
30
|
+
Project-URL: Documentation, https://alloygbm.readthedocs.io/en/latest/
|
|
31
|
+
Project-URL: Homepage, https://github.com/LGA-Personal/AlloyGBM
|
|
32
|
+
Project-URL: Issues, https://github.com/LGA-Personal/AlloyGBM/issues
|
|
33
|
+
Project-URL: Repository, https://github.com/LGA-Personal/AlloyGBM
|
|
34
|
+
|
|
35
|
+
# AlloyGBM
|
|
36
|
+
|
|
37
|
+
AlloyGBM is a Rust-first gradient boosting library for structured regression, with a Python API focused on fast native execution, deterministic training, and time-aware tabular workflows.
|
|
38
|
+
|
|
39
|
+
It is currently strongest on panel and finance-style regression problems where leakage-aware validation and practical iteration speed matter. It also includes native artifact prediction, SHAP explanations, and purged time-series split helpers in the Python package.
|
|
40
|
+
|
|
41
|
+
## When To Use AlloyGBM
|
|
42
|
+
|
|
43
|
+
AlloyGBM is a good fit when you want:
|
|
44
|
+
|
|
45
|
+
- a native-backed gradient boosting regressor with a small Python API surface
|
|
46
|
+
- deterministic CPU training and inference
|
|
47
|
+
- time-aware validation helpers for forecasting or panel-style workflows
|
|
48
|
+
- native prediction from serialized artifacts
|
|
49
|
+
- SHAP-based local explanations and global feature importances
|
|
50
|
+
|
|
51
|
+
If you need the broadest possible objective support, classification, ranking, multiple categorical columns, or the strongest out-of-the-box results on generic tabular benchmarks, you should still expect XGBoost, LightGBM, or CatBoost to be stronger today.
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
PyPI:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install alloygbm
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
From source:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
python -m pip install --upgrade maturin
|
|
65
|
+
maturin develop --manifest-path bindings/python/Cargo.toml --release
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
AlloyGBM currently targets Python `3.10+` and uses a native Rust extension module.
|
|
69
|
+
|
|
70
|
+
Initial `0.1.0` packaging policy:
|
|
71
|
+
|
|
72
|
+
- tested directly on macOS Apple Silicon
|
|
73
|
+
- planned wheel targets: macOS `arm64` and Linux `x86_64`
|
|
74
|
+
- Windows support is deferred until after `0.1.0`
|
|
75
|
+
- source distribution remains the fallback for unsupported environments
|
|
76
|
+
|
|
77
|
+
## Minimal Example
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from alloygbm import GBMRegressor, rmse
|
|
81
|
+
|
|
82
|
+
X_train = [
|
|
83
|
+
[0.0, 1.0],
|
|
84
|
+
[1.0, 0.0],
|
|
85
|
+
[2.0, 1.0],
|
|
86
|
+
[3.0, 0.0],
|
|
87
|
+
]
|
|
88
|
+
y_train = [0.2, 0.9, 1.8, 2.7]
|
|
89
|
+
|
|
90
|
+
X_test = [
|
|
91
|
+
[1.5, 1.0],
|
|
92
|
+
[2.5, 0.0],
|
|
93
|
+
]
|
|
94
|
+
y_test = [1.3, 2.3]
|
|
95
|
+
|
|
96
|
+
model = GBMRegressor(
|
|
97
|
+
learning_rate=0.05,
|
|
98
|
+
max_depth=6,
|
|
99
|
+
n_estimators=1200,
|
|
100
|
+
training_policy="auto",
|
|
101
|
+
deterministic=True,
|
|
102
|
+
seed=7,
|
|
103
|
+
)
|
|
104
|
+
model.fit(X_train, y_train)
|
|
105
|
+
|
|
106
|
+
predictions = model.predict(X_test)
|
|
107
|
+
print(predictions)
|
|
108
|
+
print(rmse(y_test, predictions))
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Time-Aware Validation Example
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from alloygbm import GBMRegressor, purged_time_series_splits, rmse
|
|
115
|
+
|
|
116
|
+
rows = [
|
|
117
|
+
[0.1, 1.0],
|
|
118
|
+
[0.2, 1.1],
|
|
119
|
+
[0.4, 0.9],
|
|
120
|
+
[0.6, 1.2],
|
|
121
|
+
[0.8, 1.3],
|
|
122
|
+
[1.0, 1.4],
|
|
123
|
+
]
|
|
124
|
+
targets = [0.0, 0.1, 0.2, 0.5, 0.8, 1.0]
|
|
125
|
+
time_index = [0, 0, 1, 1, 2, 2]
|
|
126
|
+
|
|
127
|
+
splits = purged_time_series_splits(
|
|
128
|
+
time_index,
|
|
129
|
+
n_splits=3,
|
|
130
|
+
purge_gap=0,
|
|
131
|
+
embargo=0,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
fold_scores = []
|
|
135
|
+
for train_idx, test_idx in splits:
|
|
136
|
+
model = GBMRegressor(
|
|
137
|
+
learning_rate=0.05,
|
|
138
|
+
max_depth=6,
|
|
139
|
+
n_estimators=400,
|
|
140
|
+
deterministic=True,
|
|
141
|
+
seed=7,
|
|
142
|
+
)
|
|
143
|
+
X_train = [rows[i] for i in train_idx]
|
|
144
|
+
y_train = [targets[i] for i in train_idx]
|
|
145
|
+
X_test = [rows[i] for i in test_idx]
|
|
146
|
+
y_test = [targets[i] for i in test_idx]
|
|
147
|
+
|
|
148
|
+
model.fit(X_train, y_train)
|
|
149
|
+
fold_scores.append(rmse(y_test, model.predict(X_test)))
|
|
150
|
+
|
|
151
|
+
print(fold_scores)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
For panel data, use `purged_panel_splits(...)`.
|
|
155
|
+
|
|
156
|
+
## Feature Summary
|
|
157
|
+
|
|
158
|
+
- Native Rust-backed training and prediction from Python
|
|
159
|
+
- `GBMRegressor` with deterministic training controls and dataset-aware `training_policy`
|
|
160
|
+
- Continuous-feature binning strategies: `linear`, `rank`, `quantile`
|
|
161
|
+
- Optional single-column categorical encoding path
|
|
162
|
+
- Artifact-backed prediction via `predict_from_artifact(...)`
|
|
163
|
+
- SHAP row explanations via `shap_values(...)`
|
|
164
|
+
- SHAP global feature importance via `feature_importances(...)`
|
|
165
|
+
- Time-aware validation helpers:
|
|
166
|
+
- `purged_time_series_splits(...)`
|
|
167
|
+
- `purged_panel_splits(...)`
|
|
168
|
+
- Metric helpers:
|
|
169
|
+
- `rmse`, `mae`, `r2_score`
|
|
170
|
+
- `pearson_correlation`, `rank_ic`, `hit_rate`, `icir`
|
|
171
|
+
|
|
172
|
+
## Benchmark Snapshot
|
|
173
|
+
|
|
174
|
+
The current public benchmark suite compares AlloyGBM against XGBoost, LightGBM, and CatBoost on synthetic and real regression datasets.
|
|
175
|
+
|
|
176
|
+
Current headline results from the expanded suite:
|
|
177
|
+
|
|
178
|
+
- AlloyGBM is best on the `panel_time_series` benchmark across the tested profiles.
|
|
179
|
+
- AlloyGBM is strong on `dow_jones_financial`, with its best showing under the deeper low-learning-rate profile.
|
|
180
|
+
- AlloyGBM is competitive on `dense_numeric`, but still trails XGBoost and CatBoost on RMSE.
|
|
181
|
+
- AlloyGBM currently lags all three libraries on `california_housing` and `bike_sharing`.
|
|
182
|
+
- LightGBM is usually the fastest trainer in the comparison set.
|
|
183
|
+
|
|
184
|
+
The honest short version is:
|
|
185
|
+
|
|
186
|
+
- strong on `panel_time_series`
|
|
187
|
+
- strong on `dow_jones_financial`
|
|
188
|
+
- weaker on `california_housing` and `bike_sharing`
|
|
189
|
+
|
|
190
|
+
Benchmark tooling and methodology live in [benchmarks/README.md](benchmarks/README.md).
|
|
191
|
+
|
|
192
|
+
## Current Limitations
|
|
193
|
+
|
|
194
|
+
- Regression-only. Classification and ranking are not implemented yet.
|
|
195
|
+
- CPU-only runtime today.
|
|
196
|
+
- Single categorical feature support only.
|
|
197
|
+
- Best performance is still concentrated in time-aware and finance-style structured regression, not broad tabular dominance.
|
|
198
|
+
- The API is intentionally small and still evolving toward a more complete `0.x` user-facing surface.
|
|
199
|
+
|
|
200
|
+
## Documentation
|
|
201
|
+
|
|
202
|
+
- Docs index: [docs/README.md](docs/README.md)
|
|
203
|
+
- Benchmark guide: [benchmarks/README.md](benchmarks/README.md)
|
|
204
|
+
- Current roadmap: [docs/roadmap/current.md](docs/roadmap/current.md)
|
|
205
|
+
- Archive: [docs/archive/README.md](docs/archive/README.md)
|
|
206
|
+
|
|
207
|
+
## License
|
|
208
|
+
|
|
209
|
+
MIT. See [LICENSE](LICENSE).
|
|
210
|
+
|
alloygbm-0.1.0/README.md
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# AlloyGBM
|
|
2
|
+
|
|
3
|
+
AlloyGBM is a Rust-first gradient boosting library for structured regression, with a Python API focused on fast native execution, deterministic training, and time-aware tabular workflows.
|
|
4
|
+
|
|
5
|
+
It is currently strongest on panel and finance-style regression problems where leakage-aware validation and practical iteration speed matter. It also includes native artifact prediction, SHAP explanations, and purged time-series split helpers in the Python package.
|
|
6
|
+
|
|
7
|
+
## When To Use AlloyGBM
|
|
8
|
+
|
|
9
|
+
AlloyGBM is a good fit when you want:
|
|
10
|
+
|
|
11
|
+
- a native-backed gradient boosting regressor with a small Python API surface
|
|
12
|
+
- deterministic CPU training and inference
|
|
13
|
+
- time-aware validation helpers for forecasting or panel-style workflows
|
|
14
|
+
- native prediction from serialized artifacts
|
|
15
|
+
- SHAP-based local explanations and global feature importances
|
|
16
|
+
|
|
17
|
+
If you need the broadest possible objective support, classification, ranking, multiple categorical columns, or the strongest out-of-the-box results on generic tabular benchmarks, you should still expect XGBoost, LightGBM, or CatBoost to be stronger today.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
PyPI:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install alloygbm
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
From source:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
python -m pip install --upgrade maturin
|
|
31
|
+
maturin develop --manifest-path bindings/python/Cargo.toml --release
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
AlloyGBM currently targets Python `3.10+` and uses a native Rust extension module.
|
|
35
|
+
|
|
36
|
+
Initial `0.1.0` packaging policy:
|
|
37
|
+
|
|
38
|
+
- tested directly on macOS Apple Silicon
|
|
39
|
+
- planned wheel targets: macOS `arm64` and Linux `x86_64`
|
|
40
|
+
- Windows support is deferred until after `0.1.0`
|
|
41
|
+
- source distribution remains the fallback for unsupported environments
|
|
42
|
+
|
|
43
|
+
## Minimal Example
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
from alloygbm import GBMRegressor, rmse
|
|
47
|
+
|
|
48
|
+
X_train = [
|
|
49
|
+
[0.0, 1.0],
|
|
50
|
+
[1.0, 0.0],
|
|
51
|
+
[2.0, 1.0],
|
|
52
|
+
[3.0, 0.0],
|
|
53
|
+
]
|
|
54
|
+
y_train = [0.2, 0.9, 1.8, 2.7]
|
|
55
|
+
|
|
56
|
+
X_test = [
|
|
57
|
+
[1.5, 1.0],
|
|
58
|
+
[2.5, 0.0],
|
|
59
|
+
]
|
|
60
|
+
y_test = [1.3, 2.3]
|
|
61
|
+
|
|
62
|
+
model = GBMRegressor(
|
|
63
|
+
learning_rate=0.05,
|
|
64
|
+
max_depth=6,
|
|
65
|
+
n_estimators=1200,
|
|
66
|
+
training_policy="auto",
|
|
67
|
+
deterministic=True,
|
|
68
|
+
seed=7,
|
|
69
|
+
)
|
|
70
|
+
model.fit(X_train, y_train)
|
|
71
|
+
|
|
72
|
+
predictions = model.predict(X_test)
|
|
73
|
+
print(predictions)
|
|
74
|
+
print(rmse(y_test, predictions))
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Time-Aware Validation Example
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from alloygbm import GBMRegressor, purged_time_series_splits, rmse
|
|
81
|
+
|
|
82
|
+
rows = [
|
|
83
|
+
[0.1, 1.0],
|
|
84
|
+
[0.2, 1.1],
|
|
85
|
+
[0.4, 0.9],
|
|
86
|
+
[0.6, 1.2],
|
|
87
|
+
[0.8, 1.3],
|
|
88
|
+
[1.0, 1.4],
|
|
89
|
+
]
|
|
90
|
+
targets = [0.0, 0.1, 0.2, 0.5, 0.8, 1.0]
|
|
91
|
+
time_index = [0, 0, 1, 1, 2, 2]
|
|
92
|
+
|
|
93
|
+
splits = purged_time_series_splits(
|
|
94
|
+
time_index,
|
|
95
|
+
n_splits=3,
|
|
96
|
+
purge_gap=0,
|
|
97
|
+
embargo=0,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
fold_scores = []
|
|
101
|
+
for train_idx, test_idx in splits:
|
|
102
|
+
model = GBMRegressor(
|
|
103
|
+
learning_rate=0.05,
|
|
104
|
+
max_depth=6,
|
|
105
|
+
n_estimators=400,
|
|
106
|
+
deterministic=True,
|
|
107
|
+
seed=7,
|
|
108
|
+
)
|
|
109
|
+
X_train = [rows[i] for i in train_idx]
|
|
110
|
+
y_train = [targets[i] for i in train_idx]
|
|
111
|
+
X_test = [rows[i] for i in test_idx]
|
|
112
|
+
y_test = [targets[i] for i in test_idx]
|
|
113
|
+
|
|
114
|
+
model.fit(X_train, y_train)
|
|
115
|
+
fold_scores.append(rmse(y_test, model.predict(X_test)))
|
|
116
|
+
|
|
117
|
+
print(fold_scores)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
For panel data, use `purged_panel_splits(...)`.
|
|
121
|
+
|
|
122
|
+
## Feature Summary
|
|
123
|
+
|
|
124
|
+
- Native Rust-backed training and prediction from Python
|
|
125
|
+
- `GBMRegressor` with deterministic training controls and dataset-aware `training_policy`
|
|
126
|
+
- Continuous-feature binning strategies: `linear`, `rank`, `quantile`
|
|
127
|
+
- Optional single-column categorical encoding path
|
|
128
|
+
- Artifact-backed prediction via `predict_from_artifact(...)`
|
|
129
|
+
- SHAP row explanations via `shap_values(...)`
|
|
130
|
+
- SHAP global feature importance via `feature_importances(...)`
|
|
131
|
+
- Time-aware validation helpers:
|
|
132
|
+
- `purged_time_series_splits(...)`
|
|
133
|
+
- `purged_panel_splits(...)`
|
|
134
|
+
- Metric helpers:
|
|
135
|
+
- `rmse`, `mae`, `r2_score`
|
|
136
|
+
- `pearson_correlation`, `rank_ic`, `hit_rate`, `icir`
|
|
137
|
+
|
|
138
|
+
## Benchmark Snapshot
|
|
139
|
+
|
|
140
|
+
The current public benchmark suite compares AlloyGBM against XGBoost, LightGBM, and CatBoost on synthetic and real regression datasets.
|
|
141
|
+
|
|
142
|
+
Current headline results from the expanded suite:
|
|
143
|
+
|
|
144
|
+
- AlloyGBM is best on the `panel_time_series` benchmark across the tested profiles.
|
|
145
|
+
- AlloyGBM is strong on `dow_jones_financial`, with its best showing under the deeper low-learning-rate profile.
|
|
146
|
+
- AlloyGBM is competitive on `dense_numeric`, but still trails XGBoost and CatBoost on RMSE.
|
|
147
|
+
- AlloyGBM currently lags all three libraries on `california_housing` and `bike_sharing`.
|
|
148
|
+
- LightGBM is usually the fastest trainer in the comparison set.
|
|
149
|
+
|
|
150
|
+
The honest short version is:
|
|
151
|
+
|
|
152
|
+
- strong on `panel_time_series`
|
|
153
|
+
- strong on `dow_jones_financial`
|
|
154
|
+
- weaker on `california_housing` and `bike_sharing`
|
|
155
|
+
|
|
156
|
+
Benchmark tooling and methodology live in [benchmarks/README.md](benchmarks/README.md).
|
|
157
|
+
|
|
158
|
+
## Current Limitations
|
|
159
|
+
|
|
160
|
+
- Regression-only. Classification and ranking are not implemented yet.
|
|
161
|
+
- CPU-only runtime today.
|
|
162
|
+
- Single categorical feature support only.
|
|
163
|
+
- Best performance is still concentrated in time-aware and finance-style structured regression, not broad tabular dominance.
|
|
164
|
+
- The API is intentionally small and still evolving toward a more complete `0.x` user-facing surface.
|
|
165
|
+
|
|
166
|
+
## Documentation
|
|
167
|
+
|
|
168
|
+
- Docs index: [docs/README.md](docs/README.md)
|
|
169
|
+
- Benchmark guide: [benchmarks/README.md](benchmarks/README.md)
|
|
170
|
+
- Current roadmap: [docs/roadmap/current.md](docs/roadmap/current.md)
|
|
171
|
+
- Archive: [docs/archive/README.md](docs/archive/README.md)
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT. See [LICENSE](LICENSE).
|