bun-scikit 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +187 -0
  3. package/binding.gyp +21 -0
  4. package/docs/README.md +7 -0
  5. package/docs/native-abi.md +53 -0
  6. package/index.ts +1 -0
  7. package/package.json +76 -0
  8. package/scripts/build-node-addon.ts +26 -0
  9. package/scripts/build-zig-kernels.ts +50 -0
  10. package/scripts/check-api-docs-coverage.ts +52 -0
  11. package/scripts/check-benchmark-health.ts +140 -0
  12. package/scripts/install-native.ts +160 -0
  13. package/scripts/package-native-artifacts.ts +62 -0
  14. package/scripts/sync-benchmark-readme.ts +181 -0
  15. package/scripts/update-benchmark-history.ts +91 -0
  16. package/src/ensemble/RandomForestClassifier.ts +136 -0
  17. package/src/ensemble/RandomForestRegressor.ts +136 -0
  18. package/src/index.ts +32 -0
  19. package/src/linear_model/LinearRegression.ts +136 -0
  20. package/src/linear_model/LogisticRegression.ts +260 -0
  21. package/src/linear_model/SGDClassifier.ts +161 -0
  22. package/src/linear_model/SGDRegressor.ts +104 -0
  23. package/src/metrics/classification.ts +294 -0
  24. package/src/metrics/regression.ts +51 -0
  25. package/src/model_selection/GridSearchCV.ts +244 -0
  26. package/src/model_selection/KFold.ts +82 -0
  27. package/src/model_selection/RepeatedKFold.ts +49 -0
  28. package/src/model_selection/RepeatedStratifiedKFold.ts +50 -0
  29. package/src/model_selection/StratifiedKFold.ts +112 -0
  30. package/src/model_selection/StratifiedShuffleSplit.ts +211 -0
  31. package/src/model_selection/crossValScore.ts +165 -0
  32. package/src/model_selection/trainTestSplit.ts +82 -0
  33. package/src/naive_bayes/GaussianNB.ts +148 -0
  34. package/src/native/node-addon/bun_scikit_addon.cpp +450 -0
  35. package/src/native/zigKernels.ts +576 -0
  36. package/src/neighbors/KNeighborsClassifier.ts +85 -0
  37. package/src/pipeline/ColumnTransformer.ts +203 -0
  38. package/src/pipeline/FeatureUnion.ts +123 -0
  39. package/src/pipeline/Pipeline.ts +168 -0
  40. package/src/preprocessing/MinMaxScaler.ts +113 -0
  41. package/src/preprocessing/OneHotEncoder.ts +91 -0
  42. package/src/preprocessing/PolynomialFeatures.ts +158 -0
  43. package/src/preprocessing/RobustScaler.ts +149 -0
  44. package/src/preprocessing/SimpleImputer.ts +150 -0
  45. package/src/preprocessing/StandardScaler.ts +92 -0
  46. package/src/svm/LinearSVC.ts +117 -0
  47. package/src/tree/DecisionTreeClassifier.ts +394 -0
  48. package/src/tree/DecisionTreeRegressor.ts +407 -0
  49. package/src/types.ts +18 -0
  50. package/src/utils/linalg.ts +209 -0
  51. package/src/utils/validation.ts +78 -0
  52. package/zig/kernels.zig +1327 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Seyamalam
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,187 @@
1
+ # bun-scikit
2
+
3
+ [![CI](https://github.com/Seyamalam/bun-scikit/actions/workflows/ci.yml/badge.svg)](https://github.com/Seyamalam/bun-scikit/actions/workflows/ci.yml)
4
+ [![Benchmark Snapshot](https://github.com/Seyamalam/bun-scikit/actions/workflows/benchmark-snapshot.yml/badge.svg)](https://github.com/Seyamalam/bun-scikit/actions/workflows/benchmark-snapshot.yml)
5
+
6
+ `bun-scikit` is a scikit-learn-inspired machine learning library for Bun + TypeScript.
7
+
8
+ ## Features
9
+
10
+ - `StandardScaler`
11
+ - `LinearRegression` (native Zig `normal` solver)
12
+ - `LogisticRegression` (binary classification, native Zig)
13
+ - `KNeighborsClassifier`
14
+ - `DecisionTreeClassifier`
15
+ - `RandomForestClassifier`
16
+ - `trainTestSplit`
17
+ - Regression metrics: `meanSquaredError`, `meanAbsoluteError`, `r2Score`
18
+ - Classification metrics: `accuracyScore`, `precisionScore`, `recallScore`, `f1Score`
19
+ - Dataset-driven benchmark and CI comparison against Python `scikit-learn`
20
+
21
+ `test_data/heart.csv` is used for integration testing and benchmark comparison.
22
+
23
+ ## Native Zig Backend
24
+
25
+ `LinearRegression` (`solver: "normal"`) and `LogisticRegression` require native Zig kernels.
26
+
27
+ ```bash
28
+ bun run native:build
29
+ ```
30
+
31
+ Optional Node-API bridge (experimental):
32
+
33
+ ```bash
34
+ bun run native:build:node-addon
35
+ ```
36
+
37
+ ```ts
38
+ const linear = new LinearRegression({ solver: "normal" });
39
+ const logistic = new LogisticRegression();
40
+
41
+ linear.fit(XTrain, yTrain);
42
+ logistic.fit(XTrain, yTrain);
43
+ console.log(linear.fitBackend_, linear.fitBackendLibrary_);
44
+ console.log(logistic.fitBackend_, logistic.fitBackendLibrary_);
45
+ ```
46
+
47
+ If native kernels are missing, `fit()` throws with guidance to run `bun run native:build`.
48
+
49
+ Bridge selection:
50
+
51
+ - `BUN_SCIKIT_NATIVE_BRIDGE=node-api|ffi` (`node-api` is attempted first when available)
52
+ - `BUN_SCIKIT_NODE_ADDON=/absolute/path/to/bun_scikit_node_addon.node`
53
+ - `BUN_SCIKIT_ZIG_LIB=/absolute/path/to/bun_scikit_kernels.<ext>`
54
+
55
+ Native ABI contract: `docs/native-abi.md`
56
+
57
+ ## Install
58
+
59
+ ```bash
60
+ bun install bun-scikit
61
+ ```
62
+
63
+ Postinstall behavior:
64
+
65
+ - Downloads prebuilt native binaries from GitHub Releases for `linux-x64` and `windows-x64`.
66
+ - If prebuilt binaries are unavailable, it falls back to local native build.
67
+ - macOS prebuilt binaries are currently not published.
68
+
69
+ ## Usage
70
+
71
+ ```ts
72
+ import {
73
+ LinearRegression,
74
+ StandardScaler,
75
+ meanSquaredError,
76
+ trainTestSplit,
77
+ } from "bun-scikit";
78
+
79
+ const X = [
80
+ [1, 2],
81
+ [2, 3],
82
+ [3, 4],
83
+ [4, 5],
84
+ ];
85
+ const y = [5, 7, 9, 11];
86
+
87
+ const scaler = new StandardScaler();
88
+ const XScaled = scaler.fitTransform(X);
89
+ const { XTrain, XTest, yTrain, yTest } = trainTestSplit(XScaled, y, {
90
+ testSize: 0.25,
91
+ randomState: 42,
92
+ });
93
+
94
+ const model = new LinearRegression({ solver: "normal" });
95
+ model.fit(XTrain, yTrain);
96
+ const predictions = model.predict(XTest);
97
+
98
+ console.log("MSE:", meanSquaredError(yTest, predictions));
99
+ ```
100
+
101
+ ## Benchmarks
102
+
103
+ The table below is generated from `bench/results/heart-ci-latest.json`.
104
+ That snapshot is produced by CI in `.github/workflows/benchmark-snapshot.yml`.
105
+
106
+ <!-- BENCHMARK_TABLE_START -->
107
+ Benchmark snapshot source: `bench/results/heart-ci-latest.json` (generated in CI workflow `Benchmark Snapshot`).
108
+ Dataset: `test_data/heart.csv` (1025 samples, 13 features, test fraction 0.2).
109
+
110
+ ### Regression
111
+
112
+ | Implementation | Model | Fit median (ms) | Predict median (ms) | MSE | R2 |
113
+ |---|---|---:|---:|---:|---:|
114
+ | bun-scikit | StandardScaler + LinearRegression(normal) | 0.2103 | 0.0216 | 0.117545 | 0.529539 |
115
+ | python-scikit-learn | StandardScaler + LinearRegression | 0.3201 | 0.0365 | 0.117545 | 0.529539 |
116
+
117
+ Bun fit speedup vs scikit-learn: 1.522x
118
+ Bun predict speedup vs scikit-learn: 1.684x
119
+ MSE delta (bun - sklearn): 6.362e-14
120
+ R2 delta (bun - sklearn): -2.539e-13
121
+
122
+ ### Classification
123
+
124
+ | Implementation | Model | Fit median (ms) | Predict median (ms) | Accuracy | F1 |
125
+ |---|---|---:|---:|---:|---:|
126
+ | bun-scikit | StandardScaler + LogisticRegression(gd,zig) | 0.4868 | 0.0282 | 0.863415 | 0.876106 |
127
+ | python-scikit-learn | StandardScaler + LogisticRegression(lbfgs) | 1.1246 | 0.0724 | 0.863415 | 0.875000 |
128
+
129
+ Bun fit speedup vs scikit-learn: 2.310x
130
+ Bun predict speedup vs scikit-learn: 2.574x
131
+ Accuracy delta (bun - sklearn): 0.000e+0
132
+ F1 delta (bun - sklearn): 1.106e-3
133
+
134
+ ### Tree Classification
135
+
136
+ | Model | Implementation | Fit median (ms) | Predict median (ms) | Accuracy | F1 |
137
+ |---|---|---:|---:|---:|---:|
138
+ | DecisionTreeClassifier(maxDepth=8) | bun-scikit | 0.8062 | 0.0190 | 0.946341 | 0.948837 |
139
+ | DecisionTreeClassifier | python-scikit-learn | 1.4781 | 0.0999 | 0.931707 | 0.933962 |
140
+ | RandomForestClassifier(nEstimators=80,maxDepth=8) | bun-scikit | 27.6225 | 1.8535 | 0.990244 | 0.990566 |
141
+ | RandomForestClassifier | python-scikit-learn | 172.9585 | 6.4850 | 0.995122 | 0.995261 |
142
+
143
+ DecisionTree fit speedup vs scikit-learn: 1.833x
144
+ DecisionTree predict speedup vs scikit-learn: 5.244x
145
+ DecisionTree accuracy delta (bun - sklearn): 1.463e-2
146
+ DecisionTree f1 delta (bun - sklearn): 1.487e-2
147
+
148
+ RandomForest fit speedup vs scikit-learn: 6.262x
149
+ RandomForest predict speedup vs scikit-learn: 3.499x
150
+ RandomForest accuracy delta (bun - sklearn): -4.878e-3
151
+ RandomForest f1 delta (bun - sklearn): -4.695e-3
152
+
153
+ Snapshot generated at: 2026-02-23T14:55:51.251Z
154
+ <!-- BENCHMARK_TABLE_END -->
155
+
156
+ ## Documentation
157
+
158
+ - Docs index: `docs/README.md`
159
+ - Getting started: `docs/getting-started.md`
160
+ - API reference: `docs/api.md`
161
+ - Benchmarking flow: `docs/benchmarking.md`
162
+ - Zig acceleration: `docs/zig-acceleration.md`
163
+
164
+ ## Maintainer Files
165
+
166
+ - Changelog: `CHANGELOG.md`
167
+ - Contributing guide: `CONTRIBUTING.md`
168
+ - Code of Conduct: `CODE_OF_CONDUCT.md`
169
+ - Security policy: `SECURITY.md`
170
+ - Support policy: `SUPPORT.md`
171
+ - License: `LICENSE`
172
+
173
+ ## Local Commands
174
+
175
+ ```bash
176
+ bun run test
177
+ bun run typecheck
178
+ bun run docs:api:generate
179
+ bun run docs:coverage:check
180
+ bun run bench
181
+ bun run bench:heart:classification
182
+ bun run bench:heart:tree
183
+ bun run bench:ci
184
+ bun run bench:ci:native
185
+ bun run bench:snapshot
186
+ bun run native:build
187
+ ```
package/binding.gyp ADDED
@@ -0,0 +1,21 @@
1
+ {
2
+ "targets": [
3
+ {
4
+ "target_name": "bun_scikit_node_addon",
5
+ "sources": [ "src/native/node-addon/bun_scikit_addon.cpp" ],
6
+ "include_dirs": [
7
+ "<!@(node -p \"require('node-addon-api').include\")"
8
+ ],
9
+ "dependencies": [
10
+ "<!(node -p \"require('node-addon-api').gyp\")"
11
+ ],
12
+ "defines": [ "NAPI_DISABLE_CPP_EXCEPTIONS" ],
13
+ "cflags_cc!": [ "-fno-exceptions" ],
14
+ "msvs_settings": {
15
+ "VCCLCompilerTool": {
16
+ "ExceptionHandling": 0
17
+ }
18
+ }
19
+ }
20
+ ]
21
+ }
package/docs/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # Documentation
2
+
3
+ - Getting started: `docs/getting-started.md`
4
+ - API reference: `docs/api.md`
5
+ - Generated API docs (Typedoc output): `docs/api-reference/`
6
+ - Benchmarking and CI snapshot flow: `docs/benchmarking.md`
7
+ - Zig native acceleration: `docs/zig-acceleration.md`
@@ -0,0 +1,53 @@
1
+ # Native ABI Contract
2
+
3
+ This document defines the stable ABI boundary between JavaScript runtimes (Bun/Node) and the Zig compute core.
4
+
5
+ ## ABI Version
6
+
7
+ - Exported symbol: `bun_scikit_abi_version() -> u32`
8
+ - Current version: `1`
9
+ - JavaScript bridges must refuse to load mismatched ABI versions.
10
+
11
+ ## Status Codes
12
+
13
+ Zig exports numeric status constants:
14
+
15
+ - `bun_scikit_status_ok()`
16
+ - `bun_scikit_status_invalid_handle()`
17
+ - `bun_scikit_status_invalid_shape()`
18
+ - `bun_scikit_status_allocation_failed()`
19
+ - `bun_scikit_status_fit_failed()`
20
+ - `bun_scikit_status_symbol_unavailable()`
21
+
22
+ ## Handle Lifecycle
23
+
24
+ All model handles are opaque native pointers represented as `usize` in native code and `BigInt` in JS.
25
+
26
+ Lifecycle:
27
+
28
+ 1. `*_model_create(...) -> handle`
29
+ 2. `*_model_fit(...)` / `*_model_predict(...)` / `*_model_copy_coefficients(...)`
30
+ 3. `*_model_destroy(handle)` exactly once
31
+
32
+ ## Memory Ownership Rules
33
+
34
+ - Input tensors (`x`, `y`) are caller-owned contiguous typed arrays.
35
+ - Output tensors (`out`) are caller-owned typed arrays preallocated to required size.
36
+ - Native code does not own caller buffers and must never free them.
37
+ - Native model state is owned by Zig and released only via `*_model_destroy`.
38
+
39
+ ## Tensor Layout
40
+
41
+ - `x` must be row-major contiguous `Float64Array` with shape `[n_samples, n_features]`.
42
+ - `y` is contiguous `Float64Array` (`LinearRegression`/`LogisticRegression`) or `Uint8Array` for classifier labels where required.
43
+
44
+ ## Runtime Bridges
45
+
46
+ - Bun FFI bridge: `src/native/zigKernels.ts` (`bun:ffi`).
47
+ - Node-API bridge addon: `src/native/node-addon/bun_scikit_addon.cpp`.
48
+
49
+ Environment controls:
50
+
51
+ - `BUN_SCIKIT_NATIVE_BRIDGE=node-api|ffi` (default tries Node-API then FFI)
52
+ - `BUN_SCIKIT_NODE_ADDON=/absolute/path/to/bun_scikit_node_addon.node`
53
+ - `BUN_SCIKIT_ZIG_LIB=/absolute/path/to/bun_scikit_kernels.<ext>`
package/index.ts ADDED
@@ -0,0 +1 @@
1
+ export * from "./src/index";
package/package.json ADDED
@@ -0,0 +1,76 @@
1
+ {
2
+ "name": "bun-scikit",
3
+ "version": "0.1.1",
4
+ "description": "A scikit-learn-inspired machine learning library for Bun/TypeScript.",
5
+ "license": "MIT",
6
+ "module": "index.ts",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/Seyamalam/bun-scikit.git"
10
+ },
11
+ "homepage": "https://github.com/Seyamalam/bun-scikit#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/Seyamalam/bun-scikit/issues"
14
+ },
15
+ "keywords": [
16
+ "machine-learning",
17
+ "scikit-learn",
18
+ "bun",
19
+ "typescript",
20
+ "linear-regression",
21
+ "logistic-regression",
22
+ "decision-tree",
23
+ "random-forest",
24
+ "knn",
25
+ "classification",
26
+ "preprocessing"
27
+ ],
28
+ "exports": {
29
+ ".": "./index.ts"
30
+ },
31
+ "files": [
32
+ "index.ts",
33
+ "src",
34
+ "zig",
35
+ "scripts",
36
+ "binding.gyp",
37
+ "docs/native-abi.md",
38
+ "README.md",
39
+ "LICENSE"
40
+ ],
41
+ "type": "module",
42
+ "engines": {
43
+ "bun": ">=1.3.9"
44
+ },
45
+ "scripts": {
46
+ "install": "bun run scripts/install-native.ts",
47
+ "test": "bun test",
48
+ "typecheck": "bunx tsc --noEmit",
49
+ "bench": "bun run bench/heart.bench.ts",
50
+ "bench:heart": "bun run bench/heart.bench.ts",
51
+ "bench:heart:classification": "bun run bench/heart-classification.bench.ts",
52
+ "bench:heart:tree": "bun run bench/heart-tree-classification.bench.ts",
53
+ "bench:synthetic": "bun run bench/linear-regression.bench.ts",
54
+ "bench:ci": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-current.json",
55
+ "bench:ci:native": "bun run native:build && bun run bench:ci",
56
+ "bench:snapshot": "bun run bench/run-ci-benchmarks.ts --output bench/results/heart-ci-latest.json && bun run bench:sync-readme && bun run bench:history:update",
57
+ "bench:sync-readme": "bun run scripts/sync-benchmark-readme.ts",
58
+ "bench:readme:check": "bun run scripts/sync-benchmark-readme.ts --check",
59
+ "bench:health": "bun run scripts/check-benchmark-health.ts",
60
+ "bench:history:update": "bun run scripts/update-benchmark-history.ts",
61
+ "native:build": "bun run scripts/build-zig-kernels.ts",
62
+ "native:build:node-addon": "bun run scripts/build-node-addon.ts",
63
+ "native:build:all": "bun run native:build && bun run native:build:node-addon",
64
+ "native:package:assets": "bun run scripts/package-native-artifacts.ts",
65
+ "docs:api:generate": "typedoc --entryPointStrategy resolve --entryPoints src/index.ts --out docs/api-reference --readme none",
66
+ "docs:coverage:check": "bun run scripts/check-api-docs-coverage.ts",
67
+ "build:bench:bytecode": "bun build ./bench/run-ci-benchmarks.ts --target=bun --bytecode --minify --outdir=./dist/bench",
68
+ "build:bench:compiled": "bun build ./bench/run-ci-benchmarks.ts --target=bun --compile --bytecode --minify --outfile=./dist/bench/bench-ci"
69
+ },
70
+ "devDependencies": {
71
+ "@types/bun": "latest",
72
+ "node-addon-api": "^8.3.1",
73
+ "typedoc": "^0.28.14",
74
+ "typescript": "^5.9.2"
75
+ }
76
+ }
@@ -0,0 +1,26 @@
1
+ import { cp, mkdir } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+
4
+ async function main(): Promise<void> {
5
+ const child = Bun.spawn(["bunx", "node-gyp", "rebuild"], {
6
+ stdout: "inherit",
7
+ stderr: "inherit",
8
+ });
9
+
10
+ const exitCode = await child.exited;
11
+ if (exitCode !== 0) {
12
+ throw new Error(`node-gyp rebuild failed with exit code ${exitCode}.`);
13
+ }
14
+
15
+ const source = resolve("build", "Release", "bun_scikit_node_addon.node");
16
+ const outputDir = resolve("dist", "native");
17
+ const destination = resolve(outputDir, "bun_scikit_node_addon.node");
18
+ await mkdir(outputDir, { recursive: true });
19
+ await cp(source, destination, { force: true });
20
+ console.log(`Built Node-API addon: ${destination}`);
21
+ }
22
+
23
+ main().catch((error) => {
24
+ console.error(error);
25
+ process.exit(1);
26
+ });
@@ -0,0 +1,50 @@
1
+ import { mkdir } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+
4
+ function sharedLibraryExtension(): string {
5
+ switch (process.platform) {
6
+ case "win32":
7
+ return "dll";
8
+ case "darwin":
9
+ return "dylib";
10
+ default:
11
+ return "so";
12
+ }
13
+ }
14
+
15
+ async function main(): Promise<void> {
16
+ const extension = sharedLibraryExtension();
17
+ const outputDir = resolve("dist/native");
18
+ const outputFile = resolve(outputDir, `bun_scikit_kernels.${extension}`);
19
+
20
+ await mkdir(outputDir, { recursive: true });
21
+
22
+ const child = Bun.spawn(
23
+ [
24
+ "zig",
25
+ "build-lib",
26
+ "zig/kernels.zig",
27
+ "-dynamic",
28
+ "-O",
29
+ "ReleaseFast",
30
+ "-fstrip",
31
+ `-femit-bin=${outputFile}`,
32
+ ],
33
+ {
34
+ stdout: "inherit",
35
+ stderr: "inherit",
36
+ },
37
+ );
38
+
39
+ const exitCode = await child.exited;
40
+ if (exitCode !== 0) {
41
+ throw new Error(`zig build-lib failed with exit code ${exitCode}.`);
42
+ }
43
+
44
+ console.log(`Built Zig kernels: ${outputFile}`);
45
+ }
46
+
47
+ main().catch((error) => {
48
+ console.error(error);
49
+ process.exit(1);
50
+ });
@@ -0,0 +1,52 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+
4
+ const INDEX_PATH = resolve("src/index.ts");
5
+ const DOCS_PATH = resolve("docs/api.md");
6
+
7
+ function normalizeModulePath(pathLiteral: string): string {
8
+ return pathLiteral.replace(/^\.?\//, "");
9
+ }
10
+
11
+ function extractExportedSymbolNames(source: string): string[] {
12
+ const exportMatches = source.matchAll(
13
+ /export\s+(?:class|function|const|type|interface|enum)\s+([A-Za-z0-9_]+)/g,
14
+ );
15
+ return Array.from(exportMatches, (match) => match[1]);
16
+ }
17
+
18
+ const indexSource = await readFile(INDEX_PATH, "utf-8");
19
+ const docsSource = await readFile(DOCS_PATH, "utf-8");
20
+
21
+ const modulePaths = Array.from(indexSource.matchAll(/export\s+\*\s+from\s+"\.\/(.+)";/g)).map(
22
+ (match) => normalizeModulePath(match[1]),
23
+ );
24
+
25
+ const symbolNames = new Set<string>();
26
+ for (const modulePath of modulePaths) {
27
+ const moduleSource = await readFile(resolve("src", `${modulePath}.ts`), "utf-8");
28
+ for (const name of extractExportedSymbolNames(moduleSource)) {
29
+ symbolNames.add(name);
30
+ }
31
+ }
32
+
33
+ function escapeRegExp(literal: string): string {
34
+ return literal.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
35
+ }
36
+
37
+ const missing = Array.from(symbolNames)
38
+ .filter((name) => {
39
+ const pattern = new RegExp("`[^`]*\\b" + escapeRegExp(name) + "\\b[^`]*`");
40
+ return !pattern.test(docsSource);
41
+ })
42
+ .sort((a, b) => a.localeCompare(b));
43
+
44
+ if (missing.length > 0) {
45
+ console.error("docs/api.md is missing exported API symbols:");
46
+ for (const name of missing) {
47
+ console.error(`- ${name}`);
48
+ }
49
+ process.exit(1);
50
+ }
51
+
52
+ console.log(`API docs coverage check passed for ${symbolNames.size} exported symbols.`);
@@ -0,0 +1,140 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+
4
+ interface SharedBenchmarkResult {
5
+ implementation: string;
6
+ fitMsMedian: number;
7
+ predictMsMedian: number;
8
+ }
9
+
10
+ interface RegressionBenchmarkResult extends SharedBenchmarkResult {
11
+ mse: number;
12
+ r2: number;
13
+ }
14
+
15
+ interface ClassificationBenchmarkResult extends SharedBenchmarkResult {
16
+ accuracy: number;
17
+ f1: number;
18
+ }
19
+
20
+ interface TreeModelComparison {
21
+ bun: ClassificationBenchmarkResult;
22
+ sklearn: ClassificationBenchmarkResult;
23
+ comparison: {
24
+ accuracyDeltaVsSklearn: number;
25
+ f1DeltaVsSklearn: number;
26
+ };
27
+ }
28
+
29
+ interface BenchmarkSnapshot {
30
+ suites: {
31
+ regression: {
32
+ results: [RegressionBenchmarkResult, RegressionBenchmarkResult];
33
+ comparison: {
34
+ mseDeltaVsSklearn: number;
35
+ r2DeltaVsSklearn: number;
36
+ };
37
+ };
38
+ classification: {
39
+ results: [ClassificationBenchmarkResult, ClassificationBenchmarkResult];
40
+ comparison: {
41
+ accuracyDeltaVsSklearn: number;
42
+ f1DeltaVsSklearn: number;
43
+ };
44
+ };
45
+ treeClassification: {
46
+ models: [TreeModelComparison, TreeModelComparison];
47
+ };
48
+ };
49
+ }
50
+
51
+ const pathArgIndex = Bun.argv.indexOf("--input");
52
+ const inputPath =
53
+ pathArgIndex !== -1 && pathArgIndex + 1 < Bun.argv.length
54
+ ? resolve(Bun.argv[pathArgIndex + 1])
55
+ : resolve("bench/results/heart-ci-current.json");
56
+
57
+ const snapshot = JSON.parse(await readFile(inputPath, "utf-8")) as BenchmarkSnapshot;
58
+
59
+ const [bunRegression, sklearnRegression] = snapshot.suites.regression.results;
60
+ const [bunClassification, sklearnClassification] = snapshot.suites.classification.results;
61
+ const [decisionTree, randomForest] = snapshot.suites.treeClassification.models;
62
+
63
+ for (const result of [
64
+ bunRegression,
65
+ sklearnRegression,
66
+ bunClassification,
67
+ sklearnClassification,
68
+ decisionTree.bun,
69
+ decisionTree.sklearn,
70
+ randomForest.bun,
71
+ randomForest.sklearn,
72
+ ]) {
73
+ if (!(result.fitMsMedian > 0 && result.predictMsMedian > 0)) {
74
+ throw new Error(`Benchmark timings must be positive for ${result.implementation}.`);
75
+ }
76
+ }
77
+
78
+ if (
79
+ !Number.isFinite(bunRegression.mse) ||
80
+ !Number.isFinite(sklearnRegression.mse) ||
81
+ !Number.isFinite(bunRegression.r2) ||
82
+ !Number.isFinite(sklearnRegression.r2)
83
+ ) {
84
+ throw new Error("Regression metrics must be finite for both implementations.");
85
+ }
86
+
87
+ if (
88
+ !Number.isFinite(bunClassification.accuracy) ||
89
+ !Number.isFinite(sklearnClassification.accuracy) ||
90
+ !Number.isFinite(bunClassification.f1) ||
91
+ !Number.isFinite(sklearnClassification.f1)
92
+ ) {
93
+ throw new Error("Classification metrics must be finite for both implementations.");
94
+ }
95
+
96
+ if (Math.abs(snapshot.suites.regression.comparison.mseDeltaVsSklearn) > 0.01) {
97
+ throw new Error(
98
+ `Regression MSE delta too large: ${snapshot.suites.regression.comparison.mseDeltaVsSklearn}.`,
99
+ );
100
+ }
101
+
102
+ if (Math.abs(snapshot.suites.regression.comparison.r2DeltaVsSklearn) > 0.01) {
103
+ throw new Error(
104
+ `Regression R2 delta too large: ${snapshot.suites.regression.comparison.r2DeltaVsSklearn}.`,
105
+ );
106
+ }
107
+
108
+ if (Math.abs(snapshot.suites.classification.comparison.accuracyDeltaVsSklearn) > 0.05) {
109
+ throw new Error(
110
+ `Classification accuracy delta too large: ${snapshot.suites.classification.comparison.accuracyDeltaVsSklearn}.`,
111
+ );
112
+ }
113
+
114
+ if (Math.abs(snapshot.suites.classification.comparison.f1DeltaVsSklearn) > 0.05) {
115
+ throw new Error(
116
+ `Classification F1 delta too large: ${snapshot.suites.classification.comparison.f1DeltaVsSklearn}.`,
117
+ );
118
+ }
119
+
120
+ if (Math.abs(decisionTree.comparison.accuracyDeltaVsSklearn) > 0.08) {
121
+ throw new Error(
122
+ `DecisionTree accuracy delta too large: ${decisionTree.comparison.accuracyDeltaVsSklearn}.`,
123
+ );
124
+ }
125
+
126
+ if (Math.abs(decisionTree.comparison.f1DeltaVsSklearn) > 0.08) {
127
+ throw new Error(`DecisionTree F1 delta too large: ${decisionTree.comparison.f1DeltaVsSklearn}.`);
128
+ }
129
+
130
+ if (Math.abs(randomForest.comparison.accuracyDeltaVsSklearn) > 0.08) {
131
+ throw new Error(
132
+ `RandomForest accuracy delta too large: ${randomForest.comparison.accuracyDeltaVsSklearn}.`,
133
+ );
134
+ }
135
+
136
+ if (Math.abs(randomForest.comparison.f1DeltaVsSklearn) > 0.08) {
137
+ throw new Error(`RandomForest F1 delta too large: ${randomForest.comparison.f1DeltaVsSklearn}.`);
138
+ }
139
+
140
+ console.log("Benchmark comparison health checks passed.");