bun-scikit 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -137
- package/package.json +3 -2
- package/scripts/build-node-addon.ts +17 -1
- package/scripts/check-benchmark-health.ts +112 -6
- package/scripts/sync-benchmark-readme.ts +56 -0
- package/src/dummy/DummyClassifier.ts +190 -0
- package/src/dummy/DummyRegressor.ts +108 -0
- package/src/ensemble/RandomForestClassifier.ts +154 -8
- package/src/ensemble/RandomForestRegressor.ts +12 -8
- package/src/feature_selection/VarianceThreshold.ts +88 -0
- package/src/index.ts +23 -0
- package/src/metrics/classification.ts +30 -0
- package/src/metrics/regression.ts +40 -0
- package/src/model_selection/RandomizedSearchCV.ts +269 -0
- package/src/native/node-addon/bun_scikit_addon.cpp +307 -0
- package/src/native/zigKernels.ts +122 -4
- package/src/preprocessing/Binarizer.ts +46 -0
- package/src/preprocessing/LabelEncoder.ts +62 -0
- package/src/preprocessing/MaxAbsScaler.ts +77 -0
- package/src/preprocessing/Normalizer.ts +66 -0
- package/src/tree/DecisionTreeClassifier.ts +159 -4
- package/zig/kernels.zig +333 -89
package/README.md
CHANGED
|
@@ -3,185 +3,121 @@
|
|
|
3
3
|
[](https://github.com/Seyamalam/bun-scikit/actions/workflows/ci.yml)
|
|
4
4
|
[](https://github.com/Seyamalam/bun-scikit/actions/workflows/benchmark-snapshot.yml)
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
## Features
|
|
9
|
-
|
|
10
|
-
- `StandardScaler`
|
|
11
|
-
- `LinearRegression` (native Zig `normal` solver)
|
|
12
|
-
- `LogisticRegression` (binary classification, native Zig)
|
|
13
|
-
- `KNeighborsClassifier`
|
|
14
|
-
- `DecisionTreeClassifier`
|
|
15
|
-
- `RandomForestClassifier`
|
|
16
|
-
- `trainTestSplit`
|
|
17
|
-
- Regression metrics: `meanSquaredError`, `meanAbsoluteError`, `r2Score`
|
|
18
|
-
- Classification metrics: `accuracyScore`, `precisionScore`, `recallScore`, `f1Score`
|
|
19
|
-
- Dataset-driven benchmark and CI comparison against Python `scikit-learn`
|
|
20
|
-
|
|
21
|
-
`test_data/heart.csv` is used for integration testing and benchmark comparison.
|
|
22
|
-
|
|
23
|
-
## Native Zig Backend
|
|
24
|
-
|
|
25
|
-
`LinearRegression` (`solver: "normal"`) and `LogisticRegression` require native Zig kernels.
|
|
26
|
-
|
|
27
|
-
```bash
|
|
28
|
-
bun run native:build
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
Optional Node-API bridge (experimental):
|
|
32
|
-
|
|
33
|
-
```bash
|
|
34
|
-
bun run native:build:node-addon
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
```ts
|
|
38
|
-
const linear = new LinearRegression({ solver: "normal" });
|
|
39
|
-
const logistic = new LogisticRegression();
|
|
40
|
-
|
|
41
|
-
linear.fit(XTrain, yTrain);
|
|
42
|
-
logistic.fit(XTrain, yTrain);
|
|
43
|
-
console.log(linear.fitBackend_, linear.fitBackendLibrary_);
|
|
44
|
-
console.log(logistic.fitBackend_, logistic.fitBackendLibrary_);
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
If native kernels are missing, `fit()` throws with guidance to run `bun run native:build`.
|
|
48
|
-
|
|
49
|
-
Bridge selection:
|
|
50
|
-
|
|
51
|
-
- `BUN_SCIKIT_NATIVE_BRIDGE=node-api|ffi` (`node-api` is attempted first when available)
|
|
52
|
-
- `BUN_SCIKIT_NODE_ADDON=/absolute/path/to/bun_scikit_node_addon.node`
|
|
53
|
-
- `BUN_SCIKIT_ZIG_LIB=/absolute/path/to/bun_scikit_kernels.<ext>`
|
|
54
|
-
|
|
55
|
-
Native ABI contract: `docs/native-abi.md`
|
|
6
|
+
Scikit-learn-inspired machine learning for Bun + TypeScript, with native Zig acceleration for core training paths.
|
|
56
7
|
|
|
57
8
|
## Install
|
|
58
9
|
|
|
59
10
|
```bash
|
|
60
|
-
bun
|
|
11
|
+
bun add bun-scikit
|
|
61
12
|
```
|
|
62
13
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
- Prebuilt native binaries for `linux-x64` and `windows-x64` are bundled in the npm package.
|
|
66
|
-
- No `bun pm trust` step is required for normal install/use.
|
|
67
|
-
- macOS prebuilt binaries are currently not published.
|
|
68
|
-
|
|
69
|
-
## Usage
|
|
14
|
+
## Quick Start
|
|
70
15
|
|
|
71
16
|
```ts
|
|
72
17
|
import {
|
|
73
18
|
LinearRegression,
|
|
19
|
+
LogisticRegression,
|
|
74
20
|
StandardScaler,
|
|
75
|
-
meanSquaredError,
|
|
76
21
|
trainTestSplit,
|
|
22
|
+
meanSquaredError,
|
|
23
|
+
accuracyScore,
|
|
77
24
|
} from "bun-scikit";
|
|
78
25
|
|
|
79
|
-
const X = [
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
[3, 4],
|
|
83
|
-
[4, 5],
|
|
84
|
-
];
|
|
85
|
-
const y = [5, 7, 9, 11];
|
|
26
|
+
const X = [[1], [2], [3], [4], [5], [6]];
|
|
27
|
+
const yReg = [3, 5, 7, 9, 11, 13];
|
|
28
|
+
const yCls = [0, 0, 0, 1, 1, 1];
|
|
86
29
|
|
|
87
30
|
const scaler = new StandardScaler();
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
31
|
+
const Xs = scaler.fitTransform(X);
|
|
32
|
+
|
|
33
|
+
const { XTrain, XTest, yTrain, yTest } = trainTestSplit(Xs, yReg, {
|
|
34
|
+
testSize: 0.33,
|
|
91
35
|
randomState: 42,
|
|
92
36
|
});
|
|
93
37
|
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
38
|
+
const reg = new LinearRegression({ solver: "normal" });
|
|
39
|
+
reg.fit(XTrain, yTrain);
|
|
40
|
+
console.log("MSE:", meanSquaredError(yTest, reg.predict(XTest)));
|
|
97
41
|
|
|
98
|
-
|
|
42
|
+
const clf = new LogisticRegression({
|
|
43
|
+
solver: "gd",
|
|
44
|
+
learningRate: 0.8,
|
|
45
|
+
maxIter: 100,
|
|
46
|
+
tolerance: 1e-5,
|
|
47
|
+
});
|
|
48
|
+
clf.fit(Xs, yCls);
|
|
49
|
+
console.log("Accuracy:", accuracyScore(yCls, clf.predict(Xs)));
|
|
99
50
|
```
|
|
100
51
|
|
|
101
|
-
##
|
|
52
|
+
## Included APIs
|
|
102
53
|
|
|
103
|
-
|
|
104
|
-
|
|
54
|
+
- Models: `LinearRegression`, `LogisticRegression`, `KNeighborsClassifier`, `DecisionTreeClassifier`, `RandomForestClassifier`, plus additional parity models (`LinearSVC`, `GaussianNB`, `SGDClassifier`, `SGDRegressor`, regressors for tree/forest).
|
|
55
|
+
- Baselines: `DummyClassifier`, `DummyRegressor`.
|
|
56
|
+
- Preprocessing: `StandardScaler`, `MinMaxScaler`, `RobustScaler`, `MaxAbsScaler`, `Normalizer`, `Binarizer`, `LabelEncoder`, `PolynomialFeatures`, `SimpleImputer`, `OneHotEncoder`.
|
|
57
|
+
- Composition: `Pipeline`, `ColumnTransformer`, `FeatureUnion`.
|
|
58
|
+
- Feature selection: `VarianceThreshold`.
|
|
59
|
+
- Model selection: `trainTestSplit`, `KFold`, stratified/repeated splitters, `crossValScore`, `GridSearchCV`, `RandomizedSearchCV`.
|
|
60
|
+
- Metrics: regression and classification metrics, including `logLoss`, `rocAucScore`, `confusionMatrix`, `classificationReport`, `balancedAccuracyScore`, `matthewsCorrcoef`, `brierScoreLoss`, `meanAbsolutePercentageError`, and `explainedVarianceScore`.
|
|
105
61
|
|
|
106
|
-
|
|
107
|
-
Benchmark snapshot source: `bench/results/heart-ci-latest.json` (generated in CI workflow `Benchmark Snapshot`).
|
|
108
|
-
Dataset: `test_data/heart.csv` (1025 samples, 13 features, test fraction 0.2).
|
|
62
|
+
## Scikit Parity Matrix
|
|
109
63
|
|
|
110
|
-
|
|
64
|
+
| Area | Status |
|
|
65
|
+
| --- | --- |
|
|
66
|
+
| Linear models | `LinearRegression`, `LogisticRegression`, `SGDClassifier`, `SGDRegressor`, `LinearSVC` |
|
|
67
|
+
| Tree/ensemble | `DecisionTreeClassifier`, `DecisionTreeRegressor`, `RandomForestClassifier`, `RandomForestRegressor` |
|
|
68
|
+
| Neighbors / Bayes | `KNeighborsClassifier`, `GaussianNB` |
|
|
69
|
+
| Baselines | `DummyClassifier`, `DummyRegressor` |
|
|
70
|
+
| Preprocessing | `StandardScaler`, `MinMaxScaler`, `RobustScaler`, `MaxAbsScaler`, `Normalizer`, `Binarizer`, `LabelEncoder`, `PolynomialFeatures`, `SimpleImputer`, `OneHotEncoder` |
|
|
71
|
+
| Feature selection | `VarianceThreshold` |
|
|
72
|
+
| Model selection | `trainTestSplit`, `KFold`, `StratifiedKFold`, `StratifiedShuffleSplit`, `RepeatedKFold`, `RepeatedStratifiedKFold`, `crossValScore`, `GridSearchCV`, `RandomizedSearchCV` |
|
|
73
|
+
| Metrics (regression) | `meanSquaredError`, `meanAbsoluteError`, `r2Score`, `meanAbsolutePercentageError`, `explainedVarianceScore` |
|
|
74
|
+
| Metrics (classification) | `accuracyScore`, `precisionScore`, `recallScore`, `f1Score`, `balancedAccuracyScore`, `matthewsCorrcoef`, `logLoss`, `brierScoreLoss`, `rocAucScore`, `confusionMatrix`, `classificationReport` |
|
|
111
75
|
|
|
112
|
-
|
|
113
|
-
|---|---|---:|---:|---:|---:|
|
|
114
|
-
| bun-scikit | StandardScaler + LinearRegression(normal) | 0.2103 | 0.0216 | 0.117545 | 0.529539 |
|
|
115
|
-
| python-scikit-learn | StandardScaler + LinearRegression | 0.3201 | 0.0365 | 0.117545 | 0.529539 |
|
|
76
|
+
Near-term parity gaps vs scikit-learn include clustering, decomposition, calibration, advanced feature selection, and probability calibration/meta-estimators.
|
|
116
77
|
|
|
117
|
-
|
|
118
|
-
Bun predict speedup vs scikit-learn: 1.684x
|
|
119
|
-
MSE delta (bun - sklearn): 6.362e-14
|
|
120
|
-
R2 delta (bun - sklearn): -2.539e-13
|
|
78
|
+
## Native Runtime
|
|
121
79
|
|
|
122
|
-
|
|
80
|
+
- Prebuilt binaries are bundled in the npm package for:
|
|
81
|
+
- `linux-x64`
|
|
82
|
+
- `windows-x64`
|
|
83
|
+
- No `bun pm trust` step is required for standard install/use.
|
|
84
|
+
- macOS prebuilt binaries are not published yet.
|
|
123
85
|
|
|
124
|
-
|
|
125
|
-
|---|---|---:|---:|---:|---:|
|
|
126
|
-
| bun-scikit | StandardScaler + LogisticRegression(gd,zig) | 0.4868 | 0.0282 | 0.863415 | 0.876106 |
|
|
127
|
-
| python-scikit-learn | StandardScaler + LogisticRegression(lbfgs) | 1.1246 | 0.0724 | 0.863415 | 0.875000 |
|
|
86
|
+
Optional env vars:
|
|
128
87
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
88
|
+
- `BUN_SCIKIT_NATIVE_BRIDGE=node-api|ffi`
|
|
89
|
+
- `BUN_SCIKIT_NODE_ADDON=/absolute/path/to/bun_scikit_node_addon.node`
|
|
90
|
+
- `BUN_SCIKIT_ZIG_LIB=/absolute/path/to/bun_scikit_kernels.<ext>`
|
|
91
|
+
- `BUN_SCIKIT_TREE_BACKEND=zig` (opt-in native tree/forest training path; default keeps JS-fast tree splitter)
|
|
133
92
|
|
|
134
|
-
|
|
93
|
+
## Performance Snapshot
|
|
135
94
|
|
|
136
|
-
|
|
137
|
-
|---|---|---:|---:|---:|---:|
|
|
138
|
-
| DecisionTreeClassifier(maxDepth=8) | bun-scikit | 0.8062 | 0.0190 | 0.946341 | 0.948837 |
|
|
139
|
-
| DecisionTreeClassifier | python-scikit-learn | 1.4781 | 0.0999 | 0.931707 | 0.933962 |
|
|
140
|
-
| RandomForestClassifier(nEstimators=80,maxDepth=8) | bun-scikit | 27.6225 | 1.8535 | 0.990244 | 0.990566 |
|
|
141
|
-
| RandomForestClassifier | python-scikit-learn | 172.9585 | 6.4850 | 0.995122 | 0.995261 |
|
|
95
|
+
Latest CI snapshot on `test_data/heart.csv` vs Python scikit-learn:
|
|
142
96
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
DecisionTree
|
|
146
|
-
|
|
97
|
+
- Regression: fit `1.67x`, predict `1.84x`
|
|
98
|
+
- Classification: fit `1.78x`, predict `2.66x`
|
|
99
|
+
- DecisionTree (`js-fast`): fit `1.54x`, predict `4.06x`
|
|
100
|
+
- RandomForest (`js-fast`): fit `2.59x`, predict `1.29x`
|
|
101
|
+
- Tree backend matrix (`js-fast` vs `zig-tree` vs `sklearn`) is included in `bench/results/heart-ci-latest.md`
|
|
147
102
|
|
|
148
|
-
|
|
149
|
-
RandomForest predict speedup vs scikit-learn: 3.499x
|
|
150
|
-
RandomForest accuracy delta (bun - sklearn): -4.878e-3
|
|
151
|
-
RandomForest f1 delta (bun - sklearn): -4.695e-3
|
|
103
|
+
Raw benchmark artifacts:
|
|
152
104
|
|
|
153
|
-
|
|
154
|
-
|
|
105
|
+
- `bench/results/heart-ci-latest.json`
|
|
106
|
+
- `bench/results/heart-ci-latest.md`
|
|
155
107
|
|
|
156
108
|
## Documentation
|
|
157
109
|
|
|
158
|
-
- Docs index: `docs/README.md`
|
|
159
110
|
- Getting started: `docs/getting-started.md`
|
|
160
111
|
- API reference: `docs/api.md`
|
|
161
|
-
- Benchmarking
|
|
112
|
+
- Benchmarking: `docs/benchmarking.md`
|
|
162
113
|
- Zig acceleration: `docs/zig-acceleration.md`
|
|
114
|
+
- Native ABI: `docs/native-abi.md`
|
|
115
|
+
- Release checklist: `docs/release-checklist.md`
|
|
163
116
|
|
|
164
|
-
##
|
|
117
|
+
## Contributing / Project Files
|
|
165
118
|
|
|
166
119
|
- Changelog: `CHANGELOG.md`
|
|
167
|
-
- Contributing
|
|
120
|
+
- Contributing: `CONTRIBUTING.md`
|
|
121
|
+
- Security: `SECURITY.md`
|
|
168
122
|
- Code of Conduct: `CODE_OF_CONDUCT.md`
|
|
169
|
-
-
|
|
170
|
-
- Support policy: `SUPPORT.md`
|
|
171
|
-
- License: `LICENSE`
|
|
172
|
-
|
|
173
|
-
## Local Commands
|
|
174
|
-
|
|
175
|
-
```bash
|
|
176
|
-
bun run test
|
|
177
|
-
bun run typecheck
|
|
178
|
-
bun run docs:api:generate
|
|
179
|
-
bun run docs:coverage:check
|
|
180
|
-
bun run bench
|
|
181
|
-
bun run bench:heart:classification
|
|
182
|
-
bun run bench:heart:tree
|
|
183
|
-
bun run bench:ci
|
|
184
|
-
bun run bench:ci:native
|
|
185
|
-
bun run bench:snapshot
|
|
186
|
-
bun run native:build
|
|
187
|
-
```
|
|
123
|
+
- Support: `SUPPORT.md`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bun-scikit",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "A scikit-learn-inspired machine learning library for Bun/TypeScript.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"module": "index.ts",
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"bench:synthetic": "bun run bench/linear-regression.bench.ts",
|
|
53
53
|
"bench:ci": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-current.json",
|
|
54
54
|
"bench:ci:native": "bun run native:build && bun run bench:ci",
|
|
55
|
-
"bench:snapshot": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-latest.json && bun run bench:
|
|
55
|
+
"bench:snapshot": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-latest.json && bun run bench:history:update",
|
|
56
56
|
"bench:sync-readme": "bun run scripts/sync-benchmark-readme.ts",
|
|
57
57
|
"bench:readme:check": "bun run scripts/sync-benchmark-readme.ts --check",
|
|
58
58
|
"bench:health": "bun run scripts/check-benchmark-health.ts",
|
|
@@ -69,6 +69,7 @@
|
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@types/bun": "latest",
|
|
71
71
|
"node-addon-api": "^8.3.1",
|
|
72
|
+
"node-gyp": "^12.2.0",
|
|
72
73
|
"typedoc": "^0.28.14",
|
|
73
74
|
"typescript": "^5.9.2"
|
|
74
75
|
}
|
|
@@ -1,8 +1,24 @@
|
|
|
1
1
|
import { cp, mkdir } from "node:fs/promises";
|
|
2
|
+
import { createRequire } from "node:module";
|
|
2
3
|
import { resolve } from "node:path";
|
|
3
4
|
|
|
5
|
+
function resolveNodeGypCommand(): string[] {
|
|
6
|
+
const npmNodeGyp = process.env.npm_config_node_gyp?.trim();
|
|
7
|
+
if (npmNodeGyp) {
|
|
8
|
+
return ["node", npmNodeGyp, "rebuild"];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
try {
|
|
12
|
+
const require = createRequire(import.meta.url);
|
|
13
|
+
const nodeGypScript = require.resolve("node-gyp/bin/node-gyp.js");
|
|
14
|
+
return ["node", nodeGypScript, "rebuild"];
|
|
15
|
+
} catch {
|
|
16
|
+
return ["node-gyp", "rebuild"];
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
4
20
|
async function main(): Promise<void> {
|
|
5
|
-
const child = Bun.spawn(
|
|
21
|
+
const child = Bun.spawn(resolveNodeGypCommand(), {
|
|
6
22
|
stdout: "inherit",
|
|
7
23
|
stderr: "inherit",
|
|
8
24
|
});
|
|
@@ -26,6 +26,17 @@ interface TreeModelComparison {
|
|
|
26
26
|
};
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
interface TreeBackendModeComparison {
|
|
30
|
+
comparison: {
|
|
31
|
+
zigFitSpeedupVsJs: number;
|
|
32
|
+
zigPredictSpeedupVsJs: number;
|
|
33
|
+
jsFitSpeedupVsSklearn: number;
|
|
34
|
+
jsPredictSpeedupVsSklearn: number;
|
|
35
|
+
zigFitSpeedupVsSklearn: number;
|
|
36
|
+
zigPredictSpeedupVsSklearn: number;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
29
40
|
interface BenchmarkSnapshot {
|
|
30
41
|
suites: {
|
|
31
42
|
regression: {
|
|
@@ -62,9 +73,21 @@ interface BenchmarkSnapshot {
|
|
|
62
73
|
},
|
|
63
74
|
];
|
|
64
75
|
};
|
|
76
|
+
treeBackendModes: {
|
|
77
|
+
enabled: boolean;
|
|
78
|
+
models: [TreeBackendModeComparison, TreeBackendModeComparison] | [];
|
|
79
|
+
};
|
|
65
80
|
};
|
|
66
81
|
}
|
|
67
82
|
|
|
83
|
+
function parseArgValue(flag: string): string | null {
|
|
84
|
+
const index = Bun.argv.indexOf(flag);
|
|
85
|
+
if (index === -1 || index + 1 >= Bun.argv.length) {
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
return Bun.argv[index + 1];
|
|
89
|
+
}
|
|
90
|
+
|
|
68
91
|
function speedupThreshold(
|
|
69
92
|
envName: string,
|
|
70
93
|
defaultValue: number,
|
|
@@ -80,13 +103,18 @@ function speedupThreshold(
|
|
|
80
103
|
return parsed;
|
|
81
104
|
}
|
|
82
105
|
|
|
83
|
-
const
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
106
|
+
const inputPath = resolve(parseArgValue("--input") ?? "bench/results/heart-ci-current.json");
|
|
107
|
+
const baselinePath = resolve(
|
|
108
|
+
parseArgValue("--baseline") ?? process.env.BENCH_BASELINE_INPUT ?? "bench/results/heart-ci-latest.json",
|
|
109
|
+
);
|
|
110
|
+
const baselineInputEnabled = inputPath !== baselinePath;
|
|
88
111
|
|
|
89
112
|
const snapshot = JSON.parse(await readFile(inputPath, "utf-8")) as BenchmarkSnapshot;
|
|
113
|
+
const baselineSnapshot = baselineInputEnabled
|
|
114
|
+
? ((await readFile(baselinePath, "utf-8").then((raw) => JSON.parse(raw) as BenchmarkSnapshot).catch(
|
|
115
|
+
() => null,
|
|
116
|
+
)) as BenchmarkSnapshot | null)
|
|
117
|
+
: null;
|
|
90
118
|
|
|
91
119
|
const [bunRegression, sklearnRegression] = snapshot.suites.regression.results;
|
|
92
120
|
const [bunClassification, sklearnClassification] = snapshot.suites.classification.results;
|
|
@@ -106,7 +134,28 @@ const minDecisionTreePredictSpeedup = speedupThreshold(
|
|
|
106
134
|
const minRandomForestFitSpeedup = speedupThreshold("BENCH_MIN_RANDOM_FOREST_FIT_SPEEDUP", 2.0);
|
|
107
135
|
const minRandomForestPredictSpeedup = speedupThreshold(
|
|
108
136
|
"BENCH_MIN_RANDOM_FOREST_PREDICT_SPEEDUP",
|
|
109
|
-
2
|
|
137
|
+
1.2,
|
|
138
|
+
);
|
|
139
|
+
const maxZigTreeFitSlowdownVsJs = speedupThreshold("BENCH_MAX_ZIG_TREE_FIT_SLOWDOWN_VS_JS", 20);
|
|
140
|
+
const maxZigTreePredictSlowdownVsJs = speedupThreshold(
|
|
141
|
+
"BENCH_MAX_ZIG_TREE_PREDICT_SLOWDOWN_VS_JS",
|
|
142
|
+
20,
|
|
143
|
+
);
|
|
144
|
+
const maxZigForestFitSlowdownVsJs = speedupThreshold(
|
|
145
|
+
"BENCH_MAX_ZIG_FOREST_FIT_SLOWDOWN_VS_JS",
|
|
146
|
+
20,
|
|
147
|
+
);
|
|
148
|
+
const maxZigForestPredictSlowdownVsJs = speedupThreshold(
|
|
149
|
+
"BENCH_MAX_ZIG_FOREST_PREDICT_SLOWDOWN_VS_JS",
|
|
150
|
+
20,
|
|
151
|
+
);
|
|
152
|
+
const minZigTreeFitRetentionVsBaseline = speedupThreshold(
|
|
153
|
+
"BENCH_MIN_ZIG_TREE_FIT_RETENTION_VS_BASELINE",
|
|
154
|
+
0.9,
|
|
155
|
+
);
|
|
156
|
+
const minZigForestFitRetentionVsBaseline = speedupThreshold(
|
|
157
|
+
"BENCH_MIN_ZIG_FOREST_FIT_RETENTION_VS_BASELINE",
|
|
158
|
+
0.9,
|
|
110
159
|
);
|
|
111
160
|
|
|
112
161
|
for (const result of [
|
|
@@ -237,4 +286,61 @@ if (randomForest.comparison.predictSpeedupVsSklearn < minRandomForestPredictSpee
|
|
|
237
286
|
);
|
|
238
287
|
}
|
|
239
288
|
|
|
289
|
+
if (snapshot.suites.treeBackendModes.enabled) {
|
|
290
|
+
const [decisionTreeModes, randomForestModes] = snapshot.suites.treeBackendModes.models;
|
|
291
|
+
if (!decisionTreeModes || !randomForestModes) {
|
|
292
|
+
throw new Error("Tree backend mode suite is enabled but missing model comparisons.");
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const decisionTreeFitSlowdown = 1 / decisionTreeModes.comparison.zigFitSpeedupVsJs;
|
|
296
|
+
const decisionTreePredictSlowdown = 1 / decisionTreeModes.comparison.zigPredictSpeedupVsJs;
|
|
297
|
+
const randomForestFitSlowdown = 1 / randomForestModes.comparison.zigFitSpeedupVsJs;
|
|
298
|
+
const randomForestPredictSlowdown = 1 / randomForestModes.comparison.zigPredictSpeedupVsJs;
|
|
299
|
+
|
|
300
|
+
if (decisionTreeFitSlowdown > maxZigTreeFitSlowdownVsJs) {
|
|
301
|
+
throw new Error(
|
|
302
|
+
`DecisionTree zig fit slowdown too large vs js-fast: ${decisionTreeFitSlowdown} > ${maxZigTreeFitSlowdownVsJs}.`,
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
if (decisionTreePredictSlowdown > maxZigTreePredictSlowdownVsJs) {
|
|
306
|
+
throw new Error(
|
|
307
|
+
`DecisionTree zig predict slowdown too large vs js-fast: ${decisionTreePredictSlowdown} > ${maxZigTreePredictSlowdownVsJs}.`,
|
|
308
|
+
);
|
|
309
|
+
}
|
|
310
|
+
if (randomForestFitSlowdown > maxZigForestFitSlowdownVsJs) {
|
|
311
|
+
throw new Error(
|
|
312
|
+
`RandomForest zig fit slowdown too large vs js-fast: ${randomForestFitSlowdown} > ${maxZigForestFitSlowdownVsJs}.`,
|
|
313
|
+
);
|
|
314
|
+
}
|
|
315
|
+
if (randomForestPredictSlowdown > maxZigForestPredictSlowdownVsJs) {
|
|
316
|
+
throw new Error(
|
|
317
|
+
`RandomForest zig predict slowdown too large vs js-fast: ${randomForestPredictSlowdown} > ${maxZigForestPredictSlowdownVsJs}.`,
|
|
318
|
+
);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
if (baselineSnapshot?.suites?.treeBackendModes?.enabled) {
|
|
322
|
+
const [baselineDecisionTreeModes, baselineRandomForestModes] =
|
|
323
|
+
baselineSnapshot.suites.treeBackendModes.models;
|
|
324
|
+
if (baselineDecisionTreeModes && baselineRandomForestModes) {
|
|
325
|
+
const decisionTreeFitRetention =
|
|
326
|
+
decisionTreeModes.comparison.zigFitSpeedupVsJs /
|
|
327
|
+
baselineDecisionTreeModes.comparison.zigFitSpeedupVsJs;
|
|
328
|
+
const randomForestFitRetention =
|
|
329
|
+
randomForestModes.comparison.zigFitSpeedupVsJs /
|
|
330
|
+
baselineRandomForestModes.comparison.zigFitSpeedupVsJs;
|
|
331
|
+
|
|
332
|
+
if (decisionTreeFitRetention < minZigTreeFitRetentionVsBaseline) {
|
|
333
|
+
throw new Error(
|
|
334
|
+
`DecisionTree zig/js fit retention too low vs baseline: ${decisionTreeFitRetention} < ${minZigTreeFitRetentionVsBaseline}.`,
|
|
335
|
+
);
|
|
336
|
+
}
|
|
337
|
+
if (randomForestFitRetention < minZigForestFitRetentionVsBaseline) {
|
|
338
|
+
throw new Error(
|
|
339
|
+
`RandomForest zig/js fit retention too low vs baseline: ${randomForestFitRetention} < ${minZigForestFitRetentionVsBaseline}.`,
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
240
346
|
console.log("Benchmark comparison health checks passed.");
|
|
@@ -62,6 +62,19 @@ interface BenchmarkSnapshot {
|
|
|
62
62
|
treeClassification: {
|
|
63
63
|
models: [TreeModelComparison, TreeModelComparison];
|
|
64
64
|
};
|
|
65
|
+
treeBackendModes?: {
|
|
66
|
+
enabled: boolean;
|
|
67
|
+
models: Array<{
|
|
68
|
+
key: TreeModelKey;
|
|
69
|
+
jsFast: ClassificationBenchmarkResult;
|
|
70
|
+
zigTree: ClassificationBenchmarkResult;
|
|
71
|
+
sklearn: ClassificationBenchmarkResult;
|
|
72
|
+
comparison: {
|
|
73
|
+
zigFitSpeedupVsJs: number;
|
|
74
|
+
zigPredictSpeedupVsJs: number;
|
|
75
|
+
};
|
|
76
|
+
}>;
|
|
77
|
+
};
|
|
65
78
|
};
|
|
66
79
|
}
|
|
67
80
|
|
|
@@ -89,6 +102,11 @@ function renderBenchmarkSection(snapshot: BenchmarkSnapshot): string {
|
|
|
89
102
|
const [bunReg, sklearnReg] = regression.results;
|
|
90
103
|
const [bunCls, sklearnCls] = classification.results;
|
|
91
104
|
const [decisionTree, randomForest] = treeClassification.models;
|
|
105
|
+
const treeBackendModes = snapshot.suites.treeBackendModes;
|
|
106
|
+
const hasTreeBackendModes =
|
|
107
|
+
treeBackendModes?.enabled === true && Array.isArray(treeBackendModes.models) && treeBackendModes.models.length === 2;
|
|
108
|
+
const decisionTreeModes = hasTreeBackendModes ? treeBackendModes.models[0] : null;
|
|
109
|
+
const randomForestModes = hasTreeBackendModes ? treeBackendModes.models[1] : null;
|
|
92
110
|
|
|
93
111
|
return [
|
|
94
112
|
START_MARKER,
|
|
@@ -138,6 +156,44 @@ function renderBenchmarkSection(snapshot: BenchmarkSnapshot): string {
|
|
|
138
156
|
`RandomForest accuracy delta (bun - sklearn): ${randomForest.comparison.accuracyDeltaVsSklearn.toExponential(3)}`,
|
|
139
157
|
`RandomForest f1 delta (bun - sklearn): ${randomForest.comparison.f1DeltaVsSklearn.toExponential(3)}`,
|
|
140
158
|
"",
|
|
159
|
+
"### Tree Backend Modes (Bun vs Bun vs sklearn)",
|
|
160
|
+
"",
|
|
161
|
+
hasTreeBackendModes
|
|
162
|
+
? "| Model | Backend | Fit median (ms) | Predict median (ms) | Accuracy | F1 |"
|
|
163
|
+
: "Tree backend mode matrix disabled (`BENCH_TREE_BACKEND_MATRIX=0`).",
|
|
164
|
+
hasTreeBackendModes ? "|---|---|---:|---:|---:|---:|" : "",
|
|
165
|
+
hasTreeBackendModes
|
|
166
|
+
? `| DecisionTreeClassifier(maxDepth=8) | js-fast | ${decisionTreeModes!.jsFast.fitMsMedian.toFixed(4)} | ${decisionTreeModes!.jsFast.predictMsMedian.toFixed(4)} | ${decisionTreeModes!.jsFast.accuracy.toFixed(6)} | ${decisionTreeModes!.jsFast.f1.toFixed(6)} |`
|
|
167
|
+
: "",
|
|
168
|
+
hasTreeBackendModes
|
|
169
|
+
? `| DecisionTreeClassifier(maxDepth=8) | zig-tree | ${decisionTreeModes!.zigTree.fitMsMedian.toFixed(4)} | ${decisionTreeModes!.zigTree.predictMsMedian.toFixed(4)} | ${decisionTreeModes!.zigTree.accuracy.toFixed(6)} | ${decisionTreeModes!.zigTree.f1.toFixed(6)} |`
|
|
170
|
+
: "",
|
|
171
|
+
hasTreeBackendModes
|
|
172
|
+
? `| DecisionTreeClassifier | python-scikit-learn | ${decisionTreeModes!.sklearn.fitMsMedian.toFixed(4)} | ${decisionTreeModes!.sklearn.predictMsMedian.toFixed(4)} | ${decisionTreeModes!.sklearn.accuracy.toFixed(6)} | ${decisionTreeModes!.sklearn.f1.toFixed(6)} |`
|
|
173
|
+
: "",
|
|
174
|
+
hasTreeBackendModes
|
|
175
|
+
? `| RandomForestClassifier(nEstimators=80,maxDepth=8) | js-fast | ${randomForestModes!.jsFast.fitMsMedian.toFixed(4)} | ${randomForestModes!.jsFast.predictMsMedian.toFixed(4)} | ${randomForestModes!.jsFast.accuracy.toFixed(6)} | ${randomForestModes!.jsFast.f1.toFixed(6)} |`
|
|
176
|
+
: "",
|
|
177
|
+
hasTreeBackendModes
|
|
178
|
+
? `| RandomForestClassifier(nEstimators=80,maxDepth=8) | zig-tree | ${randomForestModes!.zigTree.fitMsMedian.toFixed(4)} | ${randomForestModes!.zigTree.predictMsMedian.toFixed(4)} | ${randomForestModes!.zigTree.accuracy.toFixed(6)} | ${randomForestModes!.zigTree.f1.toFixed(6)} |`
|
|
179
|
+
: "",
|
|
180
|
+
hasTreeBackendModes
|
|
181
|
+
? `| RandomForestClassifier | python-scikit-learn | ${randomForestModes!.sklearn.fitMsMedian.toFixed(4)} | ${randomForestModes!.sklearn.predictMsMedian.toFixed(4)} | ${randomForestModes!.sklearn.accuracy.toFixed(6)} | ${randomForestModes!.sklearn.f1.toFixed(6)} |`
|
|
182
|
+
: "",
|
|
183
|
+
"",
|
|
184
|
+
hasTreeBackendModes
|
|
185
|
+
? `DecisionTree zig/js fit speedup: ${decisionTreeModes!.comparison.zigFitSpeedupVsJs.toFixed(3)}x`
|
|
186
|
+
: "",
|
|
187
|
+
hasTreeBackendModes
|
|
188
|
+
? `DecisionTree zig/js predict speedup: ${decisionTreeModes!.comparison.zigPredictSpeedupVsJs.toFixed(3)}x`
|
|
189
|
+
: "",
|
|
190
|
+
hasTreeBackendModes
|
|
191
|
+
? `RandomForest zig/js fit speedup: ${randomForestModes!.comparison.zigFitSpeedupVsJs.toFixed(3)}x`
|
|
192
|
+
: "",
|
|
193
|
+
hasTreeBackendModes
|
|
194
|
+
? `RandomForest zig/js predict speedup: ${randomForestModes!.comparison.zigPredictSpeedupVsJs.toFixed(3)}x`
|
|
195
|
+
: "",
|
|
196
|
+
"",
|
|
141
197
|
`Snapshot generated at: ${snapshot.generatedAt}`,
|
|
142
198
|
END_MARKER,
|
|
143
199
|
].join("\n");
|