fastbinning 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastbinning-0.0.1/.github/workflows/release.yaml +64 -0
- fastbinning-0.0.1/.gitignore +30 -0
- fastbinning-0.0.1/Cargo.lock +313 -0
- fastbinning-0.0.1/Cargo.toml +16 -0
- fastbinning-0.0.1/LICENSE +21 -0
- fastbinning-0.0.1/PKG-INFO +43 -0
- fastbinning-0.0.1/README.md +31 -0
- fastbinning-0.0.1/examples/categorical.py +108 -0
- fastbinning-0.0.1/examples/numerical.py +65 -0
- fastbinning-0.0.1/pyproject.toml +31 -0
- fastbinning-0.0.1/python/.DS_Store +0 -0
- fastbinning-0.0.1/python/fastbinning/__init__.py +3 -0
- fastbinning-0.0.1/python/fastbinning/__init__.pyi +72 -0
- fastbinning-0.0.1/python/fastbinning/py.typed +0 -0
- fastbinning-0.0.1/src/core/categorical.rs +212 -0
- fastbinning-0.0.1/src/core/mod.rs +5 -0
- fastbinning-0.0.1/src/core/numerical.rs +218 -0
- fastbinning-0.0.1/src/core/precategorical.rs +63 -0
- fastbinning-0.0.1/src/core/prenumerical.rs +68 -0
- fastbinning-0.0.1/src/core/woeiv.rs +18 -0
- fastbinning-0.0.1/src/lib.rs +116 -0
- fastbinning-0.0.1/uv.lock +1056 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*.*.*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build_wheels:
|
|
13
|
+
name: Build wheels on ${{ matrix.os }}
|
|
14
|
+
runs-on: ${{ matrix.os }}
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: '3.10'
|
|
24
|
+
|
|
25
|
+
- name: Build wheels
|
|
26
|
+
uses: PyO3/maturin-action@v1
|
|
27
|
+
with:
|
|
28
|
+
command: build
|
|
29
|
+
args: --release --out dist
|
|
30
|
+
sccache: 'true'
|
|
31
|
+
|
|
32
|
+
- name: Upload wheels
|
|
33
|
+
uses: actions/upload-artifact@v4
|
|
34
|
+
with:
|
|
35
|
+
name: wheels-${{ matrix.os }}
|
|
36
|
+
path: dist
|
|
37
|
+
|
|
38
|
+
publish:
|
|
39
|
+
name: Publish to PyPI
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
needs: [build_wheels]
|
|
42
|
+
steps:
|
|
43
|
+
- uses: actions/checkout@v4
|
|
44
|
+
|
|
45
|
+
- name: Download all wheels
|
|
46
|
+
uses: actions/download-artifact@v4
|
|
47
|
+
with:
|
|
48
|
+
path: dist
|
|
49
|
+
pattern: wheels-*
|
|
50
|
+
merge-multiple: true
|
|
51
|
+
|
|
52
|
+
- name: Build sdist
|
|
53
|
+
uses: PyO3/maturin-action@v1
|
|
54
|
+
with:
|
|
55
|
+
command: sdist
|
|
56
|
+
args: --out dist
|
|
57
|
+
|
|
58
|
+
- name: Publish to PyPI
|
|
59
|
+
uses: PyO3/maturin-action@v1
|
|
60
|
+
env:
|
|
61
|
+
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
|
62
|
+
with:
|
|
63
|
+
command: upload
|
|
64
|
+
args: --non-interactive --skip-existing dist/*
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Generated by Cargo
|
|
2
|
+
# will have compiled files and executables
|
|
3
|
+
debug
|
|
4
|
+
target
|
|
5
|
+
|
|
6
|
+
# These are backup files generated by rustfmt
|
|
7
|
+
**/*.rs.bk
|
|
8
|
+
|
|
9
|
+
# MSVC Windows builds of rustc generate these, which store debugging information
|
|
10
|
+
*.pdb
|
|
11
|
+
|
|
12
|
+
# Generated by cargo mutants
|
|
13
|
+
# Contains mutation testing data
|
|
14
|
+
**/mutants.out*/
|
|
15
|
+
|
|
16
|
+
# RustRover
|
|
17
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
18
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
19
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
20
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
21
|
+
#.idea/
|
|
22
|
+
|
|
23
|
+
__pycache__/
|
|
24
|
+
*.py[cod]
|
|
25
|
+
*$py.class
|
|
26
|
+
*.so
|
|
27
|
+
*.whl
|
|
28
|
+
.python-version
|
|
29
|
+
|
|
30
|
+
dist/
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "autocfg"
|
|
7
|
+
version = "1.5.0"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|
10
|
+
|
|
11
|
+
[[package]]
|
|
12
|
+
name = "crossbeam-deque"
|
|
13
|
+
version = "0.8.6"
|
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
15
|
+
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
|
16
|
+
dependencies = [
|
|
17
|
+
"crossbeam-epoch",
|
|
18
|
+
"crossbeam-utils",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[[package]]
|
|
22
|
+
name = "crossbeam-epoch"
|
|
23
|
+
version = "0.9.18"
|
|
24
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
25
|
+
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
|
26
|
+
dependencies = [
|
|
27
|
+
"crossbeam-utils",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[[package]]
|
|
31
|
+
name = "crossbeam-utils"
|
|
32
|
+
version = "0.8.21"
|
|
33
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
34
|
+
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
|
35
|
+
|
|
36
|
+
[[package]]
|
|
37
|
+
name = "either"
|
|
38
|
+
version = "1.15.0"
|
|
39
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
40
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
41
|
+
|
|
42
|
+
[[package]]
|
|
43
|
+
name = "fastbinning"
|
|
44
|
+
version = "0.0.1"
|
|
45
|
+
dependencies = [
|
|
46
|
+
"ndarray",
|
|
47
|
+
"numpy",
|
|
48
|
+
"pyo3",
|
|
49
|
+
"rayon",
|
|
50
|
+
"serde",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[[package]]
|
|
54
|
+
name = "heck"
|
|
55
|
+
version = "0.5.0"
|
|
56
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
57
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
58
|
+
|
|
59
|
+
[[package]]
|
|
60
|
+
name = "libc"
|
|
61
|
+
version = "0.2.182"
|
|
62
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
63
|
+
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
|
|
64
|
+
|
|
65
|
+
[[package]]
|
|
66
|
+
name = "matrixmultiply"
|
|
67
|
+
version = "0.3.10"
|
|
68
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
69
|
+
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
|
|
70
|
+
dependencies = [
|
|
71
|
+
"autocfg",
|
|
72
|
+
"rawpointer",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
[[package]]
|
|
76
|
+
name = "ndarray"
|
|
77
|
+
version = "0.17.2"
|
|
78
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
79
|
+
checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d"
|
|
80
|
+
dependencies = [
|
|
81
|
+
"matrixmultiply",
|
|
82
|
+
"num-complex",
|
|
83
|
+
"num-integer",
|
|
84
|
+
"num-traits",
|
|
85
|
+
"portable-atomic",
|
|
86
|
+
"portable-atomic-util",
|
|
87
|
+
"rawpointer",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
[[package]]
|
|
91
|
+
name = "num-complex"
|
|
92
|
+
version = "0.4.6"
|
|
93
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
94
|
+
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
|
|
95
|
+
dependencies = [
|
|
96
|
+
"num-traits",
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
[[package]]
|
|
100
|
+
name = "num-integer"
|
|
101
|
+
version = "0.1.46"
|
|
102
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
103
|
+
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
|
104
|
+
dependencies = [
|
|
105
|
+
"num-traits",
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
[[package]]
|
|
109
|
+
name = "num-traits"
|
|
110
|
+
version = "0.2.19"
|
|
111
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
112
|
+
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
|
113
|
+
dependencies = [
|
|
114
|
+
"autocfg",
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
[[package]]
|
|
118
|
+
name = "numpy"
|
|
119
|
+
version = "0.28.0"
|
|
120
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
121
|
+
checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2"
|
|
122
|
+
dependencies = [
|
|
123
|
+
"libc",
|
|
124
|
+
"ndarray",
|
|
125
|
+
"num-complex",
|
|
126
|
+
"num-integer",
|
|
127
|
+
"num-traits",
|
|
128
|
+
"pyo3",
|
|
129
|
+
"pyo3-build-config",
|
|
130
|
+
"rustc-hash",
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
[[package]]
|
|
134
|
+
name = "once_cell"
|
|
135
|
+
version = "1.21.3"
|
|
136
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
137
|
+
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
138
|
+
|
|
139
|
+
[[package]]
|
|
140
|
+
name = "portable-atomic"
|
|
141
|
+
version = "1.13.1"
|
|
142
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
143
|
+
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
144
|
+
|
|
145
|
+
[[package]]
|
|
146
|
+
name = "portable-atomic-util"
|
|
147
|
+
version = "0.2.5"
|
|
148
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
149
|
+
checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5"
|
|
150
|
+
dependencies = [
|
|
151
|
+
"portable-atomic",
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
[[package]]
|
|
155
|
+
name = "proc-macro2"
|
|
156
|
+
version = "1.0.106"
|
|
157
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
158
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
159
|
+
dependencies = [
|
|
160
|
+
"unicode-ident",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
[[package]]
|
|
164
|
+
name = "pyo3"
|
|
165
|
+
version = "0.28.2"
|
|
166
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
167
|
+
checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1"
|
|
168
|
+
dependencies = [
|
|
169
|
+
"libc",
|
|
170
|
+
"once_cell",
|
|
171
|
+
"portable-atomic",
|
|
172
|
+
"pyo3-build-config",
|
|
173
|
+
"pyo3-ffi",
|
|
174
|
+
"pyo3-macros",
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
[[package]]
|
|
178
|
+
name = "pyo3-build-config"
|
|
179
|
+
version = "0.28.2"
|
|
180
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
181
|
+
checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7"
|
|
182
|
+
dependencies = [
|
|
183
|
+
"target-lexicon",
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
[[package]]
|
|
187
|
+
name = "pyo3-ffi"
|
|
188
|
+
version = "0.28.2"
|
|
189
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
190
|
+
checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc"
|
|
191
|
+
dependencies = [
|
|
192
|
+
"libc",
|
|
193
|
+
"pyo3-build-config",
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
[[package]]
|
|
197
|
+
name = "pyo3-macros"
|
|
198
|
+
version = "0.28.2"
|
|
199
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
200
|
+
checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e"
|
|
201
|
+
dependencies = [
|
|
202
|
+
"proc-macro2",
|
|
203
|
+
"pyo3-macros-backend",
|
|
204
|
+
"quote",
|
|
205
|
+
"syn",
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
[[package]]
|
|
209
|
+
name = "pyo3-macros-backend"
|
|
210
|
+
version = "0.28.2"
|
|
211
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
212
|
+
checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a"
|
|
213
|
+
dependencies = [
|
|
214
|
+
"heck",
|
|
215
|
+
"proc-macro2",
|
|
216
|
+
"pyo3-build-config",
|
|
217
|
+
"quote",
|
|
218
|
+
"syn",
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
[[package]]
|
|
222
|
+
name = "quote"
|
|
223
|
+
version = "1.0.44"
|
|
224
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
225
|
+
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
|
|
226
|
+
dependencies = [
|
|
227
|
+
"proc-macro2",
|
|
228
|
+
]
|
|
229
|
+
|
|
230
|
+
[[package]]
|
|
231
|
+
name = "rawpointer"
|
|
232
|
+
version = "0.2.1"
|
|
233
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
234
|
+
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
|
235
|
+
|
|
236
|
+
[[package]]
|
|
237
|
+
name = "rayon"
|
|
238
|
+
version = "1.11.0"
|
|
239
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
240
|
+
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
|
|
241
|
+
dependencies = [
|
|
242
|
+
"either",
|
|
243
|
+
"rayon-core",
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
[[package]]
|
|
247
|
+
name = "rayon-core"
|
|
248
|
+
version = "1.13.0"
|
|
249
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
250
|
+
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
|
|
251
|
+
dependencies = [
|
|
252
|
+
"crossbeam-deque",
|
|
253
|
+
"crossbeam-utils",
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
[[package]]
|
|
257
|
+
name = "rustc-hash"
|
|
258
|
+
version = "2.1.1"
|
|
259
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
260
|
+
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
|
261
|
+
|
|
262
|
+
[[package]]
|
|
263
|
+
name = "serde"
|
|
264
|
+
version = "1.0.228"
|
|
265
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
266
|
+
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
|
267
|
+
dependencies = [
|
|
268
|
+
"serde_core",
|
|
269
|
+
"serde_derive",
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
[[package]]
|
|
273
|
+
name = "serde_core"
|
|
274
|
+
version = "1.0.228"
|
|
275
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
276
|
+
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
|
277
|
+
dependencies = [
|
|
278
|
+
"serde_derive",
|
|
279
|
+
]
|
|
280
|
+
|
|
281
|
+
[[package]]
|
|
282
|
+
name = "serde_derive"
|
|
283
|
+
version = "1.0.228"
|
|
284
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
285
|
+
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
|
286
|
+
dependencies = [
|
|
287
|
+
"proc-macro2",
|
|
288
|
+
"quote",
|
|
289
|
+
"syn",
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
[[package]]
|
|
293
|
+
name = "syn"
|
|
294
|
+
version = "2.0.117"
|
|
295
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
296
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
297
|
+
dependencies = [
|
|
298
|
+
"proc-macro2",
|
|
299
|
+
"quote",
|
|
300
|
+
"unicode-ident",
|
|
301
|
+
]
|
|
302
|
+
|
|
303
|
+
[[package]]
|
|
304
|
+
name = "target-lexicon"
|
|
305
|
+
version = "0.13.5"
|
|
306
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
307
|
+
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
|
308
|
+
|
|
309
|
+
[[package]]
|
|
310
|
+
name = "unicode-ident"
|
|
311
|
+
version = "1.0.24"
|
|
312
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
313
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "fastbinning"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
edition = "2024"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
|
|
7
|
+
[lib]
|
|
8
|
+
name = "fastbinning"
|
|
9
|
+
crate-type = ["cdylib"]
|
|
10
|
+
|
|
11
|
+
[dependencies]
|
|
12
|
+
rayon = "1.10"
|
|
13
|
+
serde = { version = "1.0", features = ["derive"] }
|
|
14
|
+
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py38"] }
|
|
15
|
+
ndarray = "0.17"
|
|
16
|
+
numpy="0.28"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 RektPunk
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fastbinning
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Requires-Dist: numpy>=1.21.6
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Summary: A high-performance binning library specifically designed for Credit Risk Modeling and Scorecard Development.
|
|
7
|
+
Author-email: RektPunk <rektpunk@gmail.com>
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
10
|
+
Project-URL: repository, https://github.com/RektPunk/rektrag
|
|
11
|
+
|
|
12
|
+
<div style="text-align: center;">
|
|
13
|
+
<img src="https://capsule-render.vercel.app/api?type=transparent&height=300&color=gradient&text=fastbinning§ion=header&reversal=false&height=120&fontSize=90">
|
|
14
|
+
</div>
|
|
15
|
+
<p align="center">
|
|
16
|
+
<a href="https://github.com/RektPunk/fastbinning/releases/latest">
|
|
17
|
+
<img alt="release" src="https://img.shields.io/github/v/release/RektPunk/fastbinning.svg">
|
|
18
|
+
</a>
|
|
19
|
+
<a href="https://github.com/RektPunk/fastbinning/blob/main/LICENSE">
|
|
20
|
+
<img alt="License" src="https://img.shields.io/github/license/RektPunk/fastbinning.svg">
|
|
21
|
+
</a>
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
A high-performance binning library specifically designed for **Credit Risk Modeling** and **Scorecard Development**.
|
|
26
|
+
|
|
27
|
+
In financial risk modeling, **Weight of Evidence (WoE)** and **Information Value (IV)** are gold standards for feature engineering. `fastbinning` ensures mathematical rigor with extreme speed.
|
|
28
|
+
|
|
29
|
+
# Why fastbinning for Credit Scoring?
|
|
30
|
+
|
|
31
|
+
* **Monotonicity Guaranteed**: In credit scoring, features like 'Utilization Rate' or 'Age' must have a monotonic relationship with default risk to be explainable and compliant.
|
|
32
|
+
* **Built for Big Data**: While traditional tools struggle with millions of rows, `fastbinning` handles 10M+ records in milliseconds.
|
|
33
|
+
* **Robustness**: Prevents overfitting by enforcing minimum sample constraints (`min_bin_pct`), ensuring each bin is statistically significant.
|
|
34
|
+
|
|
35
|
+
# Installation
|
|
36
|
+
Install using pip:
|
|
37
|
+
```bash
|
|
38
|
+
pip install fastbinning
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
# Example
|
|
42
|
+
Please refer to the [**Examples**](https://github.com/RektPunk/fastbinning/tree/main/examples) provided for further clarification.
|
|
43
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
<div style="text-align: center;">
|
|
2
|
+
<img src="https://capsule-render.vercel.app/api?type=transparent&height=300&color=gradient&text=fastbinning§ion=header&reversal=false&height=120&fontSize=90">
|
|
3
|
+
</div>
|
|
4
|
+
<p align="center">
|
|
5
|
+
<a href="https://github.com/RektPunk/fastbinning/releases/latest">
|
|
6
|
+
<img alt="release" src="https://img.shields.io/github/v/release/RektPunk/fastbinning.svg">
|
|
7
|
+
</a>
|
|
8
|
+
<a href="https://github.com/RektPunk/fastbinning/blob/main/LICENSE">
|
|
9
|
+
<img alt="License" src="https://img.shields.io/github/license/RektPunk/fastbinning.svg">
|
|
10
|
+
</a>
|
|
11
|
+
</p>
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
A high-performance binning library specifically designed for **Credit Risk Modeling** and **Scorecard Development**.
|
|
15
|
+
|
|
16
|
+
In financial risk modeling, **Weight of Evidence (WoE)** and **Information Value (IV)** are gold standards for feature engineering. `fastbinning` ensures mathematical rigor with extreme speed.
|
|
17
|
+
|
|
18
|
+
# Why fastbinning for Credit Scoring?
|
|
19
|
+
|
|
20
|
+
* **Monotonicity Guaranteed**: In credit scoring, features like 'Utilization Rate' or 'Age' must have a monotonic relationship with default risk to be explainable and compliant.
|
|
21
|
+
* **Built for Big Data**: While traditional tools struggle with millions of rows, `fastbinning` handles 10M+ records in milliseconds.
|
|
22
|
+
* **Robustness**: Prevents overfitting by enforcing minimum sample constraints (`min_bin_pct`), ensuring each bin is statistically significant.
|
|
23
|
+
|
|
24
|
+
# Installation
|
|
25
|
+
Install using pip:
|
|
26
|
+
```bash
|
|
27
|
+
pip install fastbinning
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
# Example
|
|
31
|
+
Please refer to the [**Examples**](https://github.com/RektPunk/fastbinning/tree/main/examples) provided for further clarification.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
import fastbinning
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from sklearn.preprocessing import OrdinalEncoder
|
|
7
|
+
|
|
8
|
+
# -------------------------------------------------------------------------
|
|
9
|
+
# Data Generation: 10 Million Samples
|
|
10
|
+
# -------------------------------------------------------------------------
|
|
11
|
+
n_samples = 10_000_000
|
|
12
|
+
df = pd.DataFrame({"grade": np.random.choice(["A", "B", "C", "D", "E"], n_samples)})
|
|
13
|
+
|
|
14
|
+
# Map target probability with clear differentiation for WoE testing
|
|
15
|
+
prob_map = {"A": 0.01, "B": 0.05, "C": 0.1, "D": 0.3, "E": 0.5}
|
|
16
|
+
df["target"] = (np.random.rand(n_samples) < df["grade"].map(prob_map)).astype(np.int32)
|
|
17
|
+
|
|
18
|
+
# Inject intentional Missing (NaN) values
|
|
19
|
+
# Set target to 1 for all NaNs to create a high-risk 'Missing' bin
|
|
20
|
+
df.loc[df.sample(100_000).index, "grade"] = np.nan
|
|
21
|
+
df.loc[df["grade"].isna(), "target"] = 1
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# -------------------------------------------------------------------------
|
|
25
|
+
# Configure Categorical Binning
|
|
26
|
+
# -------------------------------------------------------------------------
|
|
27
|
+
# max_bins: Final number of bins to produce
|
|
28
|
+
# min_bin_pct: Minimum sample size required for each bin (5%)
|
|
29
|
+
categorical_binning = fastbinning.CategoricalBinning(max_bins=3, min_bin_pct=0.05)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# -------------------------------------------------------------------------
|
|
33
|
+
# TEST 1: Using pd.factorize (Natural Order Mapping)
|
|
34
|
+
# -------------------------------------------------------------------------
|
|
35
|
+
print("--- Test 1: pandas.factorize (Natural Appearance Order) ---")
|
|
36
|
+
|
|
37
|
+
# pd.factorize maps NaNs to -1 by default
|
|
38
|
+
codes, uniques = pd.factorize(df["grade"])
|
|
39
|
+
uniques_list = uniques.astype(str).tolist()
|
|
40
|
+
start_time = time.perf_counter()
|
|
41
|
+
categorical_bins = categorical_binning.fit(
|
|
42
|
+
codes.astype(np.int32), df["target"].values, uniques_list
|
|
43
|
+
)
|
|
44
|
+
end_time = time.perf_counter()
|
|
45
|
+
|
|
46
|
+
print(f"Execution Time: {(end_time - start_time) * 1000:.2f} ms")
|
|
47
|
+
print("-" * 100)
|
|
48
|
+
print(
|
|
49
|
+
f"{'ID':<3} | {'Categories':<25} | {'Pos':<10} | {'Neg':<10} | {'WoE':<8} | {'IV':<8} | {'Missing'}"
|
|
50
|
+
)
|
|
51
|
+
print("-" * 100)
|
|
52
|
+
|
|
53
|
+
total_iv = 0
|
|
54
|
+
for b in categorical_bins:
|
|
55
|
+
raw_cat = ", ".join(b.categories)
|
|
56
|
+
cat_str = (raw_cat[:20] + "...") if len(raw_cat) > 20 else raw_cat
|
|
57
|
+
print(
|
|
58
|
+
f"{b.bin_id:<3} | {cat_str:<25} | {b.pos:<10} | {b.neg:<10} | {b.woe:>8.4f} | {b.iv:>8.4f} | {b.is_missing}"
|
|
59
|
+
)
|
|
60
|
+
total_iv += b.iv
|
|
61
|
+
print("-" * 100)
|
|
62
|
+
print(f"Total IV: {total_iv:.4f}\n")
|
|
63
|
+
|
|
64
|
+
# -------------------------------------------------------------------------
|
|
65
|
+
# TEST 2: Using Scikit-Learn OrdinalEncoder (Lexicographical Mapping)
|
|
66
|
+
# -------------------------------------------------------------------------
|
|
67
|
+
print("--- Test 2: sklearn OrdinalEncoder (Alphabetical Order) ---")
|
|
68
|
+
|
|
69
|
+
# Configure encoder to treat NaNs as -1 to match the engine's missing logic
|
|
70
|
+
enc = OrdinalEncoder(
|
|
71
|
+
handle_unknown="use_encoded_value",
|
|
72
|
+
unknown_value=-1,
|
|
73
|
+
encoded_missing_value=-1,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Reshape for sklearn and flatten back to 1D
|
|
77
|
+
codes = enc.fit_transform(df[["grade"]].to_numpy()).astype(np.int32).flatten()
|
|
78
|
+
|
|
79
|
+
# Extract category names while removing 'nan' from the unique labels list
|
|
80
|
+
uniques_list = enc.categories_[0].astype(str).tolist()
|
|
81
|
+
uniques_list = [c for c in uniques_list if c not in ["nan", "None", "NoneType"]]
|
|
82
|
+
|
|
83
|
+
print("Encoder Categories (Actual Mapping):", enc.categories_[0])
|
|
84
|
+
print("Processed Unique Labels List:", uniques_list)
|
|
85
|
+
|
|
86
|
+
start_time = time.perf_counter()
|
|
87
|
+
categorical_bins = categorical_binning.fit(
|
|
88
|
+
codes.astype(np.int32), df["target"].values, uniques_list
|
|
89
|
+
)
|
|
90
|
+
end_time = time.perf_counter()
|
|
91
|
+
|
|
92
|
+
print(f"Execution Time: {(end_time - start_time) * 1000:.2f} ms")
|
|
93
|
+
print("-" * 100)
|
|
94
|
+
print(
|
|
95
|
+
f"{'ID':<3} | {'Categories':<25} | {'POS':<10} | {'NEG':<10} | {'WoE':<8} | {'IV':<8} | {'Missing'}"
|
|
96
|
+
)
|
|
97
|
+
print("-" * 100)
|
|
98
|
+
|
|
99
|
+
total_iv = 0
|
|
100
|
+
for b in categorical_bins:
|
|
101
|
+
raw_cat = ", ".join(b.categories)
|
|
102
|
+
cat_str = (raw_cat[:20] + "...") if len(raw_cat) > 20 else raw_cat
|
|
103
|
+
print(
|
|
104
|
+
f"{b.bin_id:<3} | {cat_str:<25} | {b.pos:<10} | {b.neg:<10} | {b.woe:>8.4f} | {b.iv:>8.4f} | {b.is_missing}"
|
|
105
|
+
)
|
|
106
|
+
total_iv += b.iv
|
|
107
|
+
print("-" * 100)
|
|
108
|
+
print(f"Total IV: {total_iv:.4f}")
|