bun-scikit 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +187 -0
- package/binding.gyp +21 -0
- package/docs/README.md +7 -0
- package/docs/native-abi.md +53 -0
- package/index.ts +1 -0
- package/package.json +76 -0
- package/scripts/build-node-addon.ts +26 -0
- package/scripts/build-zig-kernels.ts +50 -0
- package/scripts/check-api-docs-coverage.ts +52 -0
- package/scripts/check-benchmark-health.ts +140 -0
- package/scripts/install-native.ts +160 -0
- package/scripts/package-native-artifacts.ts +62 -0
- package/scripts/sync-benchmark-readme.ts +181 -0
- package/scripts/update-benchmark-history.ts +91 -0
- package/src/ensemble/RandomForestClassifier.ts +136 -0
- package/src/ensemble/RandomForestRegressor.ts +136 -0
- package/src/index.ts +32 -0
- package/src/linear_model/LinearRegression.ts +136 -0
- package/src/linear_model/LogisticRegression.ts +260 -0
- package/src/linear_model/SGDClassifier.ts +161 -0
- package/src/linear_model/SGDRegressor.ts +104 -0
- package/src/metrics/classification.ts +294 -0
- package/src/metrics/regression.ts +51 -0
- package/src/model_selection/GridSearchCV.ts +244 -0
- package/src/model_selection/KFold.ts +82 -0
- package/src/model_selection/RepeatedKFold.ts +49 -0
- package/src/model_selection/RepeatedStratifiedKFold.ts +50 -0
- package/src/model_selection/StratifiedKFold.ts +112 -0
- package/src/model_selection/StratifiedShuffleSplit.ts +211 -0
- package/src/model_selection/crossValScore.ts +165 -0
- package/src/model_selection/trainTestSplit.ts +82 -0
- package/src/naive_bayes/GaussianNB.ts +148 -0
- package/src/native/node-addon/bun_scikit_addon.cpp +450 -0
- package/src/native/zigKernels.ts +576 -0
- package/src/neighbors/KNeighborsClassifier.ts +85 -0
- package/src/pipeline/ColumnTransformer.ts +203 -0
- package/src/pipeline/FeatureUnion.ts +123 -0
- package/src/pipeline/Pipeline.ts +168 -0
- package/src/preprocessing/MinMaxScaler.ts +113 -0
- package/src/preprocessing/OneHotEncoder.ts +91 -0
- package/src/preprocessing/PolynomialFeatures.ts +158 -0
- package/src/preprocessing/RobustScaler.ts +149 -0
- package/src/preprocessing/SimpleImputer.ts +150 -0
- package/src/preprocessing/StandardScaler.ts +92 -0
- package/src/svm/LinearSVC.ts +117 -0
- package/src/tree/DecisionTreeClassifier.ts +394 -0
- package/src/tree/DecisionTreeRegressor.ts +407 -0
- package/src/types.ts +18 -0
- package/src/utils/linalg.ts +209 -0
- package/src/utils/validation.ts +78 -0
- package/zig/kernels.zig +1327 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Seyamalam
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# bun-scikit
|
|
2
|
+
|
|
3
|
+
[](https://github.com/Seyamalam/bun-scikit/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/Seyamalam/bun-scikit/actions/workflows/benchmark-snapshot.yml)
|
|
5
|
+
|
|
6
|
+
`bun-scikit` is a scikit-learn-inspired machine learning library for Bun + TypeScript.
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- `StandardScaler`
|
|
11
|
+
- `LinearRegression` (native Zig `normal` solver)
|
|
12
|
+
- `LogisticRegression` (binary classification, native Zig)
|
|
13
|
+
- `KNeighborsClassifier`
|
|
14
|
+
- `DecisionTreeClassifier`
|
|
15
|
+
- `RandomForestClassifier`
|
|
16
|
+
- `trainTestSplit`
|
|
17
|
+
- Regression metrics: `meanSquaredError`, `meanAbsoluteError`, `r2Score`
|
|
18
|
+
- Classification metrics: `accuracyScore`, `precisionScore`, `recallScore`, `f1Score`
|
|
19
|
+
- Dataset-driven benchmark and CI comparison against Python `scikit-learn`
|
|
20
|
+
|
|
21
|
+
`test_data/heart.csv` is used for integration testing and benchmark comparison.
|
|
22
|
+
|
|
23
|
+
## Native Zig Backend
|
|
24
|
+
|
|
25
|
+
`LinearRegression` (`solver: "normal"`) and `LogisticRegression` require native Zig kernels.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bun run native:build
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Optional Node-API bridge (experimental):
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
bun run native:build:node-addon
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```ts
|
|
38
|
+
const linear = new LinearRegression({ solver: "normal" });
|
|
39
|
+
const logistic = new LogisticRegression();
|
|
40
|
+
|
|
41
|
+
linear.fit(XTrain, yTrain);
|
|
42
|
+
logistic.fit(XTrain, yTrain);
|
|
43
|
+
console.log(linear.fitBackend_, linear.fitBackendLibrary_);
|
|
44
|
+
console.log(logistic.fitBackend_, logistic.fitBackendLibrary_);
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
If native kernels are missing, `fit()` throws with guidance to run `bun run native:build`.
|
|
48
|
+
|
|
49
|
+
Bridge selection:
|
|
50
|
+
|
|
51
|
+
- `BUN_SCIKIT_NATIVE_BRIDGE=node-api|ffi` (`node-api` is attempted first when available)
|
|
52
|
+
- `BUN_SCIKIT_NODE_ADDON=/absolute/path/to/bun_scikit_node_addon.node`
|
|
53
|
+
- `BUN_SCIKIT_ZIG_LIB=/absolute/path/to/bun_scikit_kernels.<ext>`
|
|
54
|
+
|
|
55
|
+
Native ABI contract: `docs/native-abi.md`
|
|
56
|
+
|
|
57
|
+
## Install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
bun install bun-scikit
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Postinstall behavior:
|
|
64
|
+
|
|
65
|
+
- Downloads prebuilt native binaries from GitHub Releases for `linux-x64` and `windows-x64`.
|
|
66
|
+
- If prebuilt binaries are unavailable, it falls back to local native build.
|
|
67
|
+
- macOS prebuilt binaries are currently not published.
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
```ts
|
|
72
|
+
import {
|
|
73
|
+
LinearRegression,
|
|
74
|
+
StandardScaler,
|
|
75
|
+
meanSquaredError,
|
|
76
|
+
trainTestSplit,
|
|
77
|
+
} from "bun-scikit";
|
|
78
|
+
|
|
79
|
+
const X = [
|
|
80
|
+
[1, 2],
|
|
81
|
+
[2, 3],
|
|
82
|
+
[3, 4],
|
|
83
|
+
[4, 5],
|
|
84
|
+
];
|
|
85
|
+
const y = [5, 7, 9, 11];
|
|
86
|
+
|
|
87
|
+
const scaler = new StandardScaler();
|
|
88
|
+
const XScaled = scaler.fitTransform(X);
|
|
89
|
+
const { XTrain, XTest, yTrain, yTest } = trainTestSplit(XScaled, y, {
|
|
90
|
+
testSize: 0.25,
|
|
91
|
+
randomState: 42,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
const model = new LinearRegression({ solver: "normal" });
|
|
95
|
+
model.fit(XTrain, yTrain);
|
|
96
|
+
const predictions = model.predict(XTest);
|
|
97
|
+
|
|
98
|
+
console.log("MSE:", meanSquaredError(yTest, predictions));
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Benchmarks
|
|
102
|
+
|
|
103
|
+
The table below is generated from `bench/results/heart-ci-latest.json`.
|
|
104
|
+
That snapshot is produced by CI in `.github/workflows/benchmark-snapshot.yml`.
|
|
105
|
+
|
|
106
|
+
<!-- BENCHMARK_TABLE_START -->
|
|
107
|
+
Benchmark snapshot source: `bench/results/heart-ci-latest.json` (generated in CI workflow `Benchmark Snapshot`).
|
|
108
|
+
Dataset: `test_data/heart.csv` (1025 samples, 13 features, test fraction 0.2).
|
|
109
|
+
|
|
110
|
+
### Regression
|
|
111
|
+
|
|
112
|
+
| Implementation | Model | Fit median (ms) | Predict median (ms) | MSE | R2 |
|
|
113
|
+
|---|---|---:|---:|---:|---:|
|
|
114
|
+
| bun-scikit | StandardScaler + LinearRegression(normal) | 0.2103 | 0.0216 | 0.117545 | 0.529539 |
|
|
115
|
+
| python-scikit-learn | StandardScaler + LinearRegression | 0.3201 | 0.0365 | 0.117545 | 0.529539 |
|
|
116
|
+
|
|
117
|
+
Bun fit speedup vs scikit-learn: 1.522x
|
|
118
|
+
Bun predict speedup vs scikit-learn: 1.684x
|
|
119
|
+
MSE delta (bun - sklearn): 6.362e-14
|
|
120
|
+
R2 delta (bun - sklearn): -2.539e-13
|
|
121
|
+
|
|
122
|
+
### Classification
|
|
123
|
+
|
|
124
|
+
| Implementation | Model | Fit median (ms) | Predict median (ms) | Accuracy | F1 |
|
|
125
|
+
|---|---|---:|---:|---:|---:|
|
|
126
|
+
| bun-scikit | StandardScaler + LogisticRegression(gd,zig) | 0.4868 | 0.0282 | 0.863415 | 0.876106 |
|
|
127
|
+
| python-scikit-learn | StandardScaler + LogisticRegression(lbfgs) | 1.1246 | 0.0724 | 0.863415 | 0.875000 |
|
|
128
|
+
|
|
129
|
+
Bun fit speedup vs scikit-learn: 2.310x
|
|
130
|
+
Bun predict speedup vs scikit-learn: 2.574x
|
|
131
|
+
Accuracy delta (bun - sklearn): 0.000e+0
|
|
132
|
+
F1 delta (bun - sklearn): 1.106e-3
|
|
133
|
+
|
|
134
|
+
### Tree Classification
|
|
135
|
+
|
|
136
|
+
| Model | Implementation | Fit median (ms) | Predict median (ms) | Accuracy | F1 |
|
|
137
|
+
|---|---|---:|---:|---:|---:|
|
|
138
|
+
| DecisionTreeClassifier(maxDepth=8) | bun-scikit | 0.8062 | 0.0190 | 0.946341 | 0.948837 |
|
|
139
|
+
| DecisionTreeClassifier | python-scikit-learn | 1.4781 | 0.0999 | 0.931707 | 0.933962 |
|
|
140
|
+
| RandomForestClassifier(nEstimators=80,maxDepth=8) | bun-scikit | 27.6225 | 1.8535 | 0.990244 | 0.990566 |
|
|
141
|
+
| RandomForestClassifier | python-scikit-learn | 172.9585 | 6.4850 | 0.995122 | 0.995261 |
|
|
142
|
+
|
|
143
|
+
DecisionTree fit speedup vs scikit-learn: 1.833x
|
|
144
|
+
DecisionTree predict speedup vs scikit-learn: 5.244x
|
|
145
|
+
DecisionTree accuracy delta (bun - sklearn): 1.463e-2
|
|
146
|
+
DecisionTree f1 delta (bun - sklearn): 1.487e-2
|
|
147
|
+
|
|
148
|
+
RandomForest fit speedup vs scikit-learn: 6.262x
|
|
149
|
+
RandomForest predict speedup vs scikit-learn: 3.499x
|
|
150
|
+
RandomForest accuracy delta (bun - sklearn): -4.878e-3
|
|
151
|
+
RandomForest f1 delta (bun - sklearn): -4.695e-3
|
|
152
|
+
|
|
153
|
+
Snapshot generated at: 2026-02-23T14:55:51.251Z
|
|
154
|
+
<!-- BENCHMARK_TABLE_END -->
|
|
155
|
+
|
|
156
|
+
## Documentation
|
|
157
|
+
|
|
158
|
+
- Docs index: `docs/README.md`
|
|
159
|
+
- Getting started: `docs/getting-started.md`
|
|
160
|
+
- API reference: `docs/api.md`
|
|
161
|
+
- Benchmarking flow: `docs/benchmarking.md`
|
|
162
|
+
- Zig acceleration: `docs/zig-acceleration.md`
|
|
163
|
+
|
|
164
|
+
## Maintainer Files
|
|
165
|
+
|
|
166
|
+
- Changelog: `CHANGELOG.md`
|
|
167
|
+
- Contributing guide: `CONTRIBUTING.md`
|
|
168
|
+
- Code of Conduct: `CODE_OF_CONDUCT.md`
|
|
169
|
+
- Security policy: `SECURITY.md`
|
|
170
|
+
- Support policy: `SUPPORT.md`
|
|
171
|
+
- License: `LICENSE`
|
|
172
|
+
|
|
173
|
+
## Local Commands
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
bun run test
|
|
177
|
+
bun run typecheck
|
|
178
|
+
bun run docs:api:generate
|
|
179
|
+
bun run docs:coverage:check
|
|
180
|
+
bun run bench
|
|
181
|
+
bun run bench:heart:classification
|
|
182
|
+
bun run bench:heart:tree
|
|
183
|
+
bun run bench:ci
|
|
184
|
+
bun run bench:ci:native
|
|
185
|
+
bun run bench:snapshot
|
|
186
|
+
bun run native:build
|
|
187
|
+
```
|
package/binding.gyp
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"targets": [
|
|
3
|
+
{
|
|
4
|
+
"target_name": "bun_scikit_node_addon",
|
|
5
|
+
"sources": [ "src/native/node-addon/bun_scikit_addon.cpp" ],
|
|
6
|
+
"include_dirs": [
|
|
7
|
+
"<!@(node -p \"require('node-addon-api').include\")"
|
|
8
|
+
],
|
|
9
|
+
"dependencies": [
|
|
10
|
+
"<!(node -p \"require('node-addon-api').gyp\")"
|
|
11
|
+
],
|
|
12
|
+
"defines": [ "NAPI_DISABLE_CPP_EXCEPTIONS" ],
|
|
13
|
+
"cflags_cc!": [ "-fno-exceptions" ],
|
|
14
|
+
"msvs_settings": {
|
|
15
|
+
"VCCLCompilerTool": {
|
|
16
|
+
"ExceptionHandling": 0
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
]
|
|
21
|
+
}
|
package/docs/README.md
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Documentation
|
|
2
|
+
|
|
3
|
+
- Getting started: `docs/getting-started.md`
|
|
4
|
+
- API reference: `docs/api.md`
|
|
5
|
+
- Generated API docs (Typedoc output): `docs/api-reference/`
|
|
6
|
+
- Benchmarking and CI snapshot flow: `docs/benchmarking.md`
|
|
7
|
+
- Zig native acceleration: `docs/zig-acceleration.md`
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Native ABI Contract
|
|
2
|
+
|
|
3
|
+
This document defines the stable ABI boundary between JavaScript runtimes (Bun/Node) and the Zig compute core.
|
|
4
|
+
|
|
5
|
+
## ABI Version
|
|
6
|
+
|
|
7
|
+
- Exported symbol: `bun_scikit_abi_version() -> u32`
|
|
8
|
+
- Current version: `1`
|
|
9
|
+
- JavaScript bridges must refuse to load mismatched ABI versions.
|
|
10
|
+
|
|
11
|
+
## Status Codes
|
|
12
|
+
|
|
13
|
+
Zig exports numeric status constants:
|
|
14
|
+
|
|
15
|
+
- `bun_scikit_status_ok()`
|
|
16
|
+
- `bun_scikit_status_invalid_handle()`
|
|
17
|
+
- `bun_scikit_status_invalid_shape()`
|
|
18
|
+
- `bun_scikit_status_allocation_failed()`
|
|
19
|
+
- `bun_scikit_status_fit_failed()`
|
|
20
|
+
- `bun_scikit_status_symbol_unavailable()`
|
|
21
|
+
|
|
22
|
+
## Handle Lifecycle
|
|
23
|
+
|
|
24
|
+
All model handles are opaque native pointers represented as `usize` in native code and `BigInt` in JS.
|
|
25
|
+
|
|
26
|
+
Lifecycle:
|
|
27
|
+
|
|
28
|
+
1. `*_model_create(...) -> handle`
|
|
29
|
+
2. `*_model_fit(...)` / `*_model_predict(...)` / `*_model_copy_coefficients(...)`
|
|
30
|
+
3. `*_model_destroy(handle)` exactly once
|
|
31
|
+
|
|
32
|
+
## Memory Ownership Rules
|
|
33
|
+
|
|
34
|
+
- Input tensors (`x`, `y`) are caller-owned contiguous typed arrays.
|
|
35
|
+
- Output tensors (`out`) are caller-owned typed arrays preallocated to required size.
|
|
36
|
+
- Native code does not own caller buffers and must never free them.
|
|
37
|
+
- Native model state is owned by Zig and released only via `*_model_destroy`.
|
|
38
|
+
|
|
39
|
+
## Tensor Layout
|
|
40
|
+
|
|
41
|
+
- `x` must be row-major contiguous `Float64Array` with shape `[n_samples, n_features]`.
|
|
42
|
+
- `y` is contiguous `Float64Array` (`LinearRegression`/`LogisticRegression`) or `Uint8Array` for classifier labels where required.
|
|
43
|
+
|
|
44
|
+
## Runtime Bridges
|
|
45
|
+
|
|
46
|
+
- Bun FFI bridge: `src/native/zigKernels.ts` (`bun:ffi`).
|
|
47
|
+
- Node-API bridge addon: `src/native/node-addon/bun_scikit_addon.cpp`.
|
|
48
|
+
|
|
49
|
+
Environment controls:
|
|
50
|
+
|
|
51
|
+
- `BUN_SCIKIT_NATIVE_BRIDGE=node-api|ffi` (default tries Node-API then FFI)
|
|
52
|
+
- `BUN_SCIKIT_NODE_ADDON=/absolute/path/to/bun_scikit_node_addon.node`
|
|
53
|
+
- `BUN_SCIKIT_ZIG_LIB=/absolute/path/to/bun_scikit_kernels.<ext>`
|
package/index.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./src/index";
|
package/package.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "bun-scikit",
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"description": "A scikit-learn-inspired machine learning library for Bun/TypeScript.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"module": "index.ts",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+https://github.com/Seyamalam/bun-scikit.git"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://github.com/Seyamalam/bun-scikit#readme",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/Seyamalam/bun-scikit/issues"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"machine-learning",
|
|
17
|
+
"scikit-learn",
|
|
18
|
+
"bun",
|
|
19
|
+
"typescript",
|
|
20
|
+
"linear-regression",
|
|
21
|
+
"logistic-regression",
|
|
22
|
+
"decision-tree",
|
|
23
|
+
"random-forest",
|
|
24
|
+
"knn",
|
|
25
|
+
"classification",
|
|
26
|
+
"preprocessing"
|
|
27
|
+
],
|
|
28
|
+
"exports": {
|
|
29
|
+
".": "./index.ts"
|
|
30
|
+
},
|
|
31
|
+
"files": [
|
|
32
|
+
"index.ts",
|
|
33
|
+
"src",
|
|
34
|
+
"zig",
|
|
35
|
+
"scripts",
|
|
36
|
+
"binding.gyp",
|
|
37
|
+
"docs/native-abi.md",
|
|
38
|
+
"README.md",
|
|
39
|
+
"LICENSE"
|
|
40
|
+
],
|
|
41
|
+
"type": "module",
|
|
42
|
+
"engines": {
|
|
43
|
+
"bun": ">=1.3.9"
|
|
44
|
+
},
|
|
45
|
+
"scripts": {
|
|
46
|
+
"install": "bun run scripts/install-native.ts",
|
|
47
|
+
"test": "bun test",
|
|
48
|
+
"typecheck": "bunx tsc --noEmit",
|
|
49
|
+
"bench": "bun run bench/heart.bench.ts",
|
|
50
|
+
"bench:heart": "bun run bench/heart.bench.ts",
|
|
51
|
+
"bench:heart:classification": "bun run bench/heart-classification.bench.ts",
|
|
52
|
+
"bench:heart:tree": "bun run bench/heart-tree-classification.bench.ts",
|
|
53
|
+
"bench:synthetic": "bun run bench/linear-regression.bench.ts",
|
|
54
|
+
"bench:ci": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-current.json",
|
|
55
|
+
"bench:ci:native": "bun run native:build && bun run bench:ci",
|
|
56
|
+
"bench:snapshot": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-latest.json && bun run bench:sync-readme && bun run bench:history:update",
|
|
57
|
+
"bench:sync-readme": "bun run scripts/sync-benchmark-readme.ts",
|
|
58
|
+
"bench:readme:check": "bun run scripts/sync-benchmark-readme.ts --check",
|
|
59
|
+
"bench:health": "bun run scripts/check-benchmark-health.ts",
|
|
60
|
+
"bench:history:update": "bun run scripts/update-benchmark-history.ts",
|
|
61
|
+
"native:build": "bun run scripts/build-zig-kernels.ts",
|
|
62
|
+
"native:build:node-addon": "bun run scripts/build-node-addon.ts",
|
|
63
|
+
"native:build:all": "bun run native:build && bun run native:build:node-addon",
|
|
64
|
+
"native:package:assets": "bun run scripts/package-native-artifacts.ts",
|
|
65
|
+
"docs:api:generate": "typedoc --entryPointStrategy resolve --entryPoints src/index.ts --out docs/api-reference --readme none",
|
|
66
|
+
"docs:coverage:check": "bun run scripts/check-api-docs-coverage.ts",
|
|
67
|
+
"build:bench:bytecode": "bun build ./bench/run-ci-benchmarks.ts --target=bun --bytecode --minify --outdir=./dist/bench",
|
|
68
|
+
"build:bench:compiled": "bun build ./bench/run-ci-benchmarks.ts --target=bun --compile --bytecode --minify --outfile=./dist/bench/bench-ci"
|
|
69
|
+
},
|
|
70
|
+
"devDependencies": {
|
|
71
|
+
"@types/bun": "latest",
|
|
72
|
+
"node-addon-api": "^8.3.1",
|
|
73
|
+
"typedoc": "^0.28.14",
|
|
74
|
+
"typescript": "^5.9.2"
|
|
75
|
+
}
|
|
76
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { cp, mkdir } from "node:fs/promises";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
async function main(): Promise<void> {
|
|
5
|
+
const child = Bun.spawn(["bunx", "node-gyp", "rebuild"], {
|
|
6
|
+
stdout: "inherit",
|
|
7
|
+
stderr: "inherit",
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
const exitCode = await child.exited;
|
|
11
|
+
if (exitCode !== 0) {
|
|
12
|
+
throw new Error(`node-gyp rebuild failed with exit code ${exitCode}.`);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const source = resolve("build", "Release", "bun_scikit_node_addon.node");
|
|
16
|
+
const outputDir = resolve("dist", "native");
|
|
17
|
+
const destination = resolve(outputDir, "bun_scikit_node_addon.node");
|
|
18
|
+
await mkdir(outputDir, { recursive: true });
|
|
19
|
+
await cp(source, destination, { force: true });
|
|
20
|
+
console.log(`Built Node-API addon: ${destination}`);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
main().catch((error) => {
|
|
24
|
+
console.error(error);
|
|
25
|
+
process.exit(1);
|
|
26
|
+
});
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { mkdir } from "node:fs/promises";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
function sharedLibraryExtension(): string {
|
|
5
|
+
switch (process.platform) {
|
|
6
|
+
case "win32":
|
|
7
|
+
return "dll";
|
|
8
|
+
case "darwin":
|
|
9
|
+
return "dylib";
|
|
10
|
+
default:
|
|
11
|
+
return "so";
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async function main(): Promise<void> {
|
|
16
|
+
const extension = sharedLibraryExtension();
|
|
17
|
+
const outputDir = resolve("dist/native");
|
|
18
|
+
const outputFile = resolve(outputDir, `bun_scikit_kernels.${extension}`);
|
|
19
|
+
|
|
20
|
+
await mkdir(outputDir, { recursive: true });
|
|
21
|
+
|
|
22
|
+
const child = Bun.spawn(
|
|
23
|
+
[
|
|
24
|
+
"zig",
|
|
25
|
+
"build-lib",
|
|
26
|
+
"zig/kernels.zig",
|
|
27
|
+
"-dynamic",
|
|
28
|
+
"-O",
|
|
29
|
+
"ReleaseFast",
|
|
30
|
+
"-fstrip",
|
|
31
|
+
`-femit-bin=${outputFile}`,
|
|
32
|
+
],
|
|
33
|
+
{
|
|
34
|
+
stdout: "inherit",
|
|
35
|
+
stderr: "inherit",
|
|
36
|
+
},
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
const exitCode = await child.exited;
|
|
40
|
+
if (exitCode !== 0) {
|
|
41
|
+
throw new Error(`zig build-lib failed with exit code ${exitCode}.`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
console.log(`Built Zig kernels: ${outputFile}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
main().catch((error) => {
|
|
48
|
+
console.error(error);
|
|
49
|
+
process.exit(1);
|
|
50
|
+
});
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
const INDEX_PATH = resolve("src/index.ts");
|
|
5
|
+
const DOCS_PATH = resolve("docs/api.md");
|
|
6
|
+
|
|
7
|
+
function normalizeModulePath(pathLiteral: string): string {
|
|
8
|
+
return pathLiteral.replace(/^\.?\//, "");
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function extractExportedSymbolNames(source: string): string[] {
|
|
12
|
+
const exportMatches = source.matchAll(
|
|
13
|
+
/export\s+(?:class|function|const|type|interface|enum)\s+([A-Za-z0-9_]+)/g,
|
|
14
|
+
);
|
|
15
|
+
return Array.from(exportMatches, (match) => match[1]);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const indexSource = await readFile(INDEX_PATH, "utf-8");
|
|
19
|
+
const docsSource = await readFile(DOCS_PATH, "utf-8");
|
|
20
|
+
|
|
21
|
+
const modulePaths = Array.from(indexSource.matchAll(/export\s+\*\s+from\s+"\.\/(.+)";/g)).map(
|
|
22
|
+
(match) => normalizeModulePath(match[1]),
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
const symbolNames = new Set<string>();
|
|
26
|
+
for (const modulePath of modulePaths) {
|
|
27
|
+
const moduleSource = await readFile(resolve("src", `${modulePath}.ts`), "utf-8");
|
|
28
|
+
for (const name of extractExportedSymbolNames(moduleSource)) {
|
|
29
|
+
symbolNames.add(name);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function escapeRegExp(literal: string): string {
|
|
34
|
+
return literal.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const missing = Array.from(symbolNames)
|
|
38
|
+
.filter((name) => {
|
|
39
|
+
const pattern = new RegExp("`[^`]*\\b" + escapeRegExp(name) + "\\b[^`]*`");
|
|
40
|
+
return !pattern.test(docsSource);
|
|
41
|
+
})
|
|
42
|
+
.sort((a, b) => a.localeCompare(b));
|
|
43
|
+
|
|
44
|
+
if (missing.length > 0) {
|
|
45
|
+
console.error("docs/api.md is missing exported API symbols:");
|
|
46
|
+
for (const name of missing) {
|
|
47
|
+
console.error(`- ${name}`);
|
|
48
|
+
}
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
console.log(`API docs coverage check passed for ${symbolNames.size} exported symbols.`);
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
interface SharedBenchmarkResult {
|
|
5
|
+
implementation: string;
|
|
6
|
+
fitMsMedian: number;
|
|
7
|
+
predictMsMedian: number;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface RegressionBenchmarkResult extends SharedBenchmarkResult {
|
|
11
|
+
mse: number;
|
|
12
|
+
r2: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface ClassificationBenchmarkResult extends SharedBenchmarkResult {
|
|
16
|
+
accuracy: number;
|
|
17
|
+
f1: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface TreeModelComparison {
|
|
21
|
+
bun: ClassificationBenchmarkResult;
|
|
22
|
+
sklearn: ClassificationBenchmarkResult;
|
|
23
|
+
comparison: {
|
|
24
|
+
accuracyDeltaVsSklearn: number;
|
|
25
|
+
f1DeltaVsSklearn: number;
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface BenchmarkSnapshot {
|
|
30
|
+
suites: {
|
|
31
|
+
regression: {
|
|
32
|
+
results: [RegressionBenchmarkResult, RegressionBenchmarkResult];
|
|
33
|
+
comparison: {
|
|
34
|
+
mseDeltaVsSklearn: number;
|
|
35
|
+
r2DeltaVsSklearn: number;
|
|
36
|
+
};
|
|
37
|
+
};
|
|
38
|
+
classification: {
|
|
39
|
+
results: [ClassificationBenchmarkResult, ClassificationBenchmarkResult];
|
|
40
|
+
comparison: {
|
|
41
|
+
accuracyDeltaVsSklearn: number;
|
|
42
|
+
f1DeltaVsSklearn: number;
|
|
43
|
+
};
|
|
44
|
+
};
|
|
45
|
+
treeClassification: {
|
|
46
|
+
models: [TreeModelComparison, TreeModelComparison];
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const pathArgIndex = Bun.argv.indexOf("--input");
|
|
52
|
+
const inputPath =
|
|
53
|
+
pathArgIndex !== -1 && pathArgIndex + 1 < Bun.argv.length
|
|
54
|
+
? resolve(Bun.argv[pathArgIndex + 1])
|
|
55
|
+
: resolve("bench/results/heart-ci-current.json");
|
|
56
|
+
|
|
57
|
+
const snapshot = JSON.parse(await readFile(inputPath, "utf-8")) as BenchmarkSnapshot;
|
|
58
|
+
|
|
59
|
+
const [bunRegression, sklearnRegression] = snapshot.suites.regression.results;
|
|
60
|
+
const [bunClassification, sklearnClassification] = snapshot.suites.classification.results;
|
|
61
|
+
const [decisionTree, randomForest] = snapshot.suites.treeClassification.models;
|
|
62
|
+
|
|
63
|
+
for (const result of [
|
|
64
|
+
bunRegression,
|
|
65
|
+
sklearnRegression,
|
|
66
|
+
bunClassification,
|
|
67
|
+
sklearnClassification,
|
|
68
|
+
decisionTree.bun,
|
|
69
|
+
decisionTree.sklearn,
|
|
70
|
+
randomForest.bun,
|
|
71
|
+
randomForest.sklearn,
|
|
72
|
+
]) {
|
|
73
|
+
if (!(result.fitMsMedian > 0 && result.predictMsMedian > 0)) {
|
|
74
|
+
throw new Error(`Benchmark timings must be positive for ${result.implementation}.`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (
|
|
79
|
+
!Number.isFinite(bunRegression.mse) ||
|
|
80
|
+
!Number.isFinite(sklearnRegression.mse) ||
|
|
81
|
+
!Number.isFinite(bunRegression.r2) ||
|
|
82
|
+
!Number.isFinite(sklearnRegression.r2)
|
|
83
|
+
) {
|
|
84
|
+
throw new Error("Regression metrics must be finite for both implementations.");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (
|
|
88
|
+
!Number.isFinite(bunClassification.accuracy) ||
|
|
89
|
+
!Number.isFinite(sklearnClassification.accuracy) ||
|
|
90
|
+
!Number.isFinite(bunClassification.f1) ||
|
|
91
|
+
!Number.isFinite(sklearnClassification.f1)
|
|
92
|
+
) {
|
|
93
|
+
throw new Error("Classification metrics must be finite for both implementations.");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (Math.abs(snapshot.suites.regression.comparison.mseDeltaVsSklearn) > 0.01) {
|
|
97
|
+
throw new Error(
|
|
98
|
+
`Regression MSE delta too large: ${snapshot.suites.regression.comparison.mseDeltaVsSklearn}.`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (Math.abs(snapshot.suites.regression.comparison.r2DeltaVsSklearn) > 0.01) {
|
|
103
|
+
throw new Error(
|
|
104
|
+
`Regression R2 delta too large: ${snapshot.suites.regression.comparison.r2DeltaVsSklearn}.`,
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (Math.abs(snapshot.suites.classification.comparison.accuracyDeltaVsSklearn) > 0.05) {
|
|
109
|
+
throw new Error(
|
|
110
|
+
`Classification accuracy delta too large: ${snapshot.suites.classification.comparison.accuracyDeltaVsSklearn}.`,
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (Math.abs(snapshot.suites.classification.comparison.f1DeltaVsSklearn) > 0.05) {
|
|
115
|
+
throw new Error(
|
|
116
|
+
`Classification F1 delta too large: ${snapshot.suites.classification.comparison.f1DeltaVsSklearn}.`,
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (Math.abs(decisionTree.comparison.accuracyDeltaVsSklearn) > 0.08) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`DecisionTree accuracy delta too large: ${decisionTree.comparison.accuracyDeltaVsSklearn}.`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (Math.abs(decisionTree.comparison.f1DeltaVsSklearn) > 0.08) {
|
|
127
|
+
throw new Error(`DecisionTree F1 delta too large: ${decisionTree.comparison.f1DeltaVsSklearn}.`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (Math.abs(randomForest.comparison.accuracyDeltaVsSklearn) > 0.08) {
|
|
131
|
+
throw new Error(
|
|
132
|
+
`RandomForest accuracy delta too large: ${randomForest.comparison.accuracyDeltaVsSklearn}.`,
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (Math.abs(randomForest.comparison.f1DeltaVsSklearn) > 0.08) {
|
|
137
|
+
throw new Error(`RandomForest F1 delta too large: ${randomForest.comparison.f1DeltaVsSklearn}.`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
console.log("Benchmark comparison health checks passed.");
|