bun-scikit 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +187 -0
  3. package/binding.gyp +21 -0
  4. package/docs/README.md +7 -0
  5. package/docs/native-abi.md +53 -0
  6. package/index.ts +1 -0
  7. package/package.json +76 -0
  8. package/scripts/build-node-addon.ts +26 -0
  9. package/scripts/build-zig-kernels.ts +50 -0
  10. package/scripts/check-api-docs-coverage.ts +52 -0
  11. package/scripts/check-benchmark-health.ts +140 -0
  12. package/scripts/install-native.ts +160 -0
  13. package/scripts/package-native-artifacts.ts +62 -0
  14. package/scripts/sync-benchmark-readme.ts +181 -0
  15. package/scripts/update-benchmark-history.ts +91 -0
  16. package/src/ensemble/RandomForestClassifier.ts +136 -0
  17. package/src/ensemble/RandomForestRegressor.ts +136 -0
  18. package/src/index.ts +32 -0
  19. package/src/linear_model/LinearRegression.ts +136 -0
  20. package/src/linear_model/LogisticRegression.ts +260 -0
  21. package/src/linear_model/SGDClassifier.ts +161 -0
  22. package/src/linear_model/SGDRegressor.ts +104 -0
  23. package/src/metrics/classification.ts +294 -0
  24. package/src/metrics/regression.ts +51 -0
  25. package/src/model_selection/GridSearchCV.ts +244 -0
  26. package/src/model_selection/KFold.ts +82 -0
  27. package/src/model_selection/RepeatedKFold.ts +49 -0
  28. package/src/model_selection/RepeatedStratifiedKFold.ts +50 -0
  29. package/src/model_selection/StratifiedKFold.ts +112 -0
  30. package/src/model_selection/StratifiedShuffleSplit.ts +211 -0
  31. package/src/model_selection/crossValScore.ts +165 -0
  32. package/src/model_selection/trainTestSplit.ts +82 -0
  33. package/src/naive_bayes/GaussianNB.ts +148 -0
  34. package/src/native/node-addon/bun_scikit_addon.cpp +450 -0
  35. package/src/native/zigKernels.ts +576 -0
  36. package/src/neighbors/KNeighborsClassifier.ts +85 -0
  37. package/src/pipeline/ColumnTransformer.ts +203 -0
  38. package/src/pipeline/FeatureUnion.ts +123 -0
  39. package/src/pipeline/Pipeline.ts +168 -0
  40. package/src/preprocessing/MinMaxScaler.ts +113 -0
  41. package/src/preprocessing/OneHotEncoder.ts +91 -0
  42. package/src/preprocessing/PolynomialFeatures.ts +158 -0
  43. package/src/preprocessing/RobustScaler.ts +149 -0
  44. package/src/preprocessing/SimpleImputer.ts +150 -0
  45. package/src/preprocessing/StandardScaler.ts +92 -0
  46. package/src/svm/LinearSVC.ts +117 -0
  47. package/src/tree/DecisionTreeClassifier.ts +394 -0
  48. package/src/tree/DecisionTreeRegressor.ts +407 -0
  49. package/src/types.ts +18 -0
  50. package/src/utils/linalg.ts +209 -0
  51. package/src/utils/validation.ts +78 -0
  52. package/zig/kernels.zig +1327 -0
@@ -0,0 +1,407 @@
1
+ import type { Matrix, RegressionModel, Vector } from "../types";
2
+ import { r2Score } from "../metrics/regression";
3
+ import {
4
+ assertConsistentRowSize,
5
+ assertFiniteMatrix,
6
+ assertFiniteVector,
7
+ validateRegressionInputs,
8
+ } from "../utils/validation";
9
+ import type { MaxFeaturesOption } from "./DecisionTreeClassifier";
10
+
11
+ export interface DecisionTreeRegressorOptions {
12
+ maxDepth?: number;
13
+ minSamplesSplit?: number;
14
+ minSamplesLeaf?: number;
15
+ maxFeatures?: MaxFeaturesOption;
16
+ randomState?: number;
17
+ }
18
+
19
+ interface TreeNode {
20
+ prediction: number;
21
+ featureIndex?: number;
22
+ threshold?: number;
23
+ left?: TreeNode;
24
+ right?: TreeNode;
25
+ isLeaf: boolean;
26
+ }
27
+
28
+ interface SplitEvaluation {
29
+ threshold: number;
30
+ impurity: number;
31
+ }
32
+
33
+ interface SplitPartition {
34
+ leftIndices: number[];
35
+ rightIndices: number[];
36
+ }
37
+
38
+ const MAX_THRESHOLD_BINS = 128;
39
+
40
+ function mulberry32(seed: number): () => number {
41
+ let state = seed >>> 0;
42
+ return () => {
43
+ state += 0x6d2b79f5;
44
+ let t = Math.imul(state ^ (state >>> 15), 1 | state);
45
+ t ^= t + Math.imul(t ^ (t >>> 7), 61 | t);
46
+ return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
47
+ };
48
+ }
49
+
50
+ function safeVariance(sum: number, sumSquares: number, count: number): number {
51
+ if (count <= 0) {
52
+ return 0;
53
+ }
54
+ const mean = sum / count;
55
+ const variance = sumSquares / count - mean * mean;
56
+ return variance < 0 ? 0 : variance;
57
+ }
58
+
59
+ export class DecisionTreeRegressor implements RegressionModel {
60
+ private readonly maxDepth: number;
61
+ private readonly minSamplesSplit: number;
62
+ private readonly minSamplesLeaf: number;
63
+ private readonly maxFeatures: MaxFeaturesOption;
64
+ private readonly randomState?: number;
65
+ private random: () => number = Math.random;
66
+ private root: TreeNode | null = null;
67
+ private flattenedXTrain: Float64Array | null = null;
68
+ private yTrain: Float64Array | null = null;
69
+ private featureCount = 0;
70
+ private allFeatureIndices: number[] = [];
71
+ private featureSelectionMarks: Uint8Array | null = null;
72
+ private binTotals: Uint32Array = new Uint32Array(MAX_THRESHOLD_BINS);
73
+ private binSums: Float64Array = new Float64Array(MAX_THRESHOLD_BINS);
74
+ private binSumsSquares: Float64Array = new Float64Array(MAX_THRESHOLD_BINS);
75
+
76
+ constructor(options: DecisionTreeRegressorOptions = {}) {
77
+ this.maxDepth = options.maxDepth ?? 12;
78
+ this.minSamplesSplit = options.minSamplesSplit ?? 2;
79
+ this.minSamplesLeaf = options.minSamplesLeaf ?? 1;
80
+ this.maxFeatures = options.maxFeatures ?? null;
81
+ this.randomState = options.randomState;
82
+ }
83
+
84
+ fit(
85
+ X: Matrix,
86
+ y: Vector,
87
+ sampleIndices?: ArrayLike<number>,
88
+ skipValidation = false,
89
+ flattenedXTrain?: Float64Array,
90
+ yTrain?: Float64Array,
91
+ ): this {
92
+ if (!skipValidation) {
93
+ validateRegressionInputs(X, y);
94
+ }
95
+
96
+ this.featureCount = X[0].length;
97
+ this.flattenedXTrain = flattenedXTrain ?? this.flattenTrainingMatrix(X);
98
+ this.yTrain = yTrain ?? this.toFloat64Vector(y);
99
+ this.allFeatureIndices = new Array<number>(this.featureCount);
100
+ for (let i = 0; i < this.featureCount; i += 1) {
101
+ this.allFeatureIndices[i] = i;
102
+ }
103
+ this.featureSelectionMarks = new Uint8Array(this.featureCount);
104
+ this.random = this.randomState === undefined ? Math.random : mulberry32(this.randomState);
105
+
106
+ let rootIndices: number[];
107
+ if (sampleIndices) {
108
+ if (sampleIndices.length === 0) {
109
+ throw new Error("sampleIndices must not be empty.");
110
+ }
111
+ rootIndices = new Array<number>(sampleIndices.length);
112
+ for (let i = 0; i < sampleIndices.length; i += 1) {
113
+ const index = sampleIndices[i];
114
+ if (!Number.isInteger(index) || index < 0 || index >= X.length) {
115
+ throw new Error(`sampleIndices contains invalid index: ${index}.`);
116
+ }
117
+ rootIndices[i] = index;
118
+ }
119
+ } else {
120
+ rootIndices = new Array<number>(X.length);
121
+ for (let i = 0; i < X.length; i += 1) {
122
+ rootIndices[i] = i;
123
+ }
124
+ }
125
+
126
+ this.root = this.buildTree(rootIndices, 0);
127
+ return this;
128
+ }
129
+
130
+ predict(X: Matrix): Vector {
131
+ if (!this.root || this.featureCount === 0) {
132
+ throw new Error("DecisionTreeRegressor has not been fitted.");
133
+ }
134
+
135
+ assertConsistentRowSize(X);
136
+ assertFiniteMatrix(X);
137
+
138
+ if (X[0].length !== this.featureCount) {
139
+ throw new Error(
140
+ `Feature size mismatch. Expected ${this.featureCount}, got ${X[0].length}.`,
141
+ );
142
+ }
143
+
144
+ return X.map((sample) => this.predictOne(sample, this.root!));
145
+ }
146
+
147
+ score(X: Matrix, y: Vector): number {
148
+ assertFiniteVector(y);
149
+ return r2Score(y, this.predict(X));
150
+ }
151
+
152
+ private predictOne(sample: Vector, node: TreeNode): number {
153
+ let current = node;
154
+ while (
155
+ !current.isLeaf &&
156
+ current.featureIndex !== undefined &&
157
+ current.threshold !== undefined
158
+ ) {
159
+ if (sample[current.featureIndex] <= current.threshold) {
160
+ current = current.left!;
161
+ } else {
162
+ current = current.right!;
163
+ }
164
+ }
165
+ return current.prediction;
166
+ }
167
+
168
+ private buildTree(indices: number[], depth: number): TreeNode {
169
+ const y = this.yTrain!;
170
+ const sampleCount = indices.length;
171
+
172
+ let sum = 0;
173
+ let sumSquares = 0;
174
+ for (let i = 0; i < sampleCount; i += 1) {
175
+ const value = y[indices[i]];
176
+ sum += value;
177
+ sumSquares += value * value;
178
+ }
179
+ const prediction = sum / sampleCount;
180
+ const parentVariance = safeVariance(sum, sumSquares, sampleCount);
181
+
182
+ const depthStop = depth >= this.maxDepth;
183
+ const splitStop = sampleCount < this.minSamplesSplit;
184
+ const pureEnough = parentVariance <= 1e-14;
185
+ if (depthStop || splitStop || pureEnough) {
186
+ return { isLeaf: true, prediction };
187
+ }
188
+
189
+ const candidateFeatures = this.selectFeatureIndices(this.featureCount);
190
+ let bestFeature = -1;
191
+ let bestSplit: SplitEvaluation | null = null;
192
+
193
+ for (let i = 0; i < candidateFeatures.length; i += 1) {
194
+ const featureIndex = candidateFeatures[i];
195
+ const split = this.findBestThreshold(indices, featureIndex);
196
+ if (!split) {
197
+ continue;
198
+ }
199
+ if (!bestSplit || split.impurity < bestSplit.impurity) {
200
+ bestFeature = featureIndex;
201
+ bestSplit = split;
202
+ }
203
+ }
204
+
205
+ if (!bestSplit || bestFeature === -1 || bestSplit.impurity >= parentVariance - 1e-14) {
206
+ return { isLeaf: true, prediction };
207
+ }
208
+
209
+ const partition = this.partitionIndices(indices, bestFeature, bestSplit.threshold);
210
+ if (!partition) {
211
+ return { isLeaf: true, prediction };
212
+ }
213
+
214
+ return {
215
+ isLeaf: false,
216
+ prediction,
217
+ featureIndex: bestFeature,
218
+ threshold: bestSplit.threshold,
219
+ left: this.buildTree(partition.leftIndices, depth + 1),
220
+ right: this.buildTree(partition.rightIndices, depth + 1),
221
+ };
222
+ }
223
+
224
+ private resolveMaxFeatures(featureCount: number): number {
225
+ if (this.maxFeatures === null || this.maxFeatures === undefined) {
226
+ return featureCount;
227
+ }
228
+ if (this.maxFeatures === "sqrt") {
229
+ return Math.max(1, Math.floor(Math.sqrt(featureCount)));
230
+ }
231
+ if (this.maxFeatures === "log2") {
232
+ return Math.max(1, Math.floor(Math.log2(featureCount)));
233
+ }
234
+ return Math.max(1, Math.min(featureCount, Math.floor(this.maxFeatures)));
235
+ }
236
+
237
+ private selectFeatureIndices(featureCount: number): number[] {
238
+ const k = this.resolveMaxFeatures(featureCount);
239
+ if (k >= featureCount) {
240
+ return this.allFeatureIndices;
241
+ }
242
+
243
+ const marks = this.featureSelectionMarks!;
244
+ marks.fill(0);
245
+ const selected = new Array<number>(k);
246
+ let count = 0;
247
+ while (count < k) {
248
+ const candidate = Math.floor(this.random() * featureCount);
249
+ if (marks[candidate] !== 0) {
250
+ continue;
251
+ }
252
+ marks[candidate] = 1;
253
+ selected[count] = candidate;
254
+ count += 1;
255
+ }
256
+ return selected;
257
+ }
258
+
259
+ private findBestThreshold(indices: number[], featureIndex: number): SplitEvaluation | null {
260
+ const x = this.flattenedXTrain!;
261
+ const y = this.yTrain!;
262
+ const stride = this.featureCount;
263
+ const sampleCount = indices.length;
264
+
265
+ let minValue = Number.POSITIVE_INFINITY;
266
+ let maxValue = Number.NEGATIVE_INFINITY;
267
+ let totalSum = 0;
268
+ let totalSumSquares = 0;
269
+
270
+ for (let i = 0; i < sampleCount; i += 1) {
271
+ const sampleIndex = indices[i];
272
+ const xValue = x[sampleIndex * stride + featureIndex];
273
+ const yValue = y[sampleIndex];
274
+ if (xValue < minValue) {
275
+ minValue = xValue;
276
+ }
277
+ if (xValue > maxValue) {
278
+ maxValue = xValue;
279
+ }
280
+ totalSum += yValue;
281
+ totalSumSquares += yValue * yValue;
282
+ }
283
+
284
+ if (!Number.isFinite(minValue) || !Number.isFinite(maxValue) || minValue === maxValue) {
285
+ return null;
286
+ }
287
+
288
+ const dynamicBins = Math.floor(Math.sqrt(sampleCount));
289
+ const binCount = Math.max(16, Math.min(MAX_THRESHOLD_BINS, dynamicBins));
290
+ const binTotals = this.binTotals;
291
+ const binSums = this.binSums;
292
+ const binSumsSquares = this.binSumsSquares;
293
+ binTotals.fill(0, 0, binCount);
294
+ binSums.fill(0, 0, binCount);
295
+ binSumsSquares.fill(0, 0, binCount);
296
+
297
+ const range = maxValue - minValue;
298
+ for (let i = 0; i < sampleCount; i += 1) {
299
+ const sampleIndex = indices[i];
300
+ const xValue = x[sampleIndex * stride + featureIndex];
301
+ const yValue = y[sampleIndex];
302
+ let bin = Math.floor(((xValue - minValue) / range) * binCount);
303
+ if (bin < 0) {
304
+ bin = 0;
305
+ } else if (bin >= binCount) {
306
+ bin = binCount - 1;
307
+ }
308
+ binTotals[bin] += 1;
309
+ binSums[bin] += yValue;
310
+ binSumsSquares[bin] += yValue * yValue;
311
+ }
312
+
313
+ let leftCount = 0;
314
+ let leftSum = 0;
315
+ let leftSumSquares = 0;
316
+ let bestImpurity = Number.POSITIVE_INFINITY;
317
+ let bestThreshold = 0;
318
+
319
+ for (let bin = 0; bin < binCount - 1; bin += 1) {
320
+ leftCount += binTotals[bin];
321
+ leftSum += binSums[bin];
322
+ leftSumSquares += binSumsSquares[bin];
323
+
324
+ const rightCount = sampleCount - leftCount;
325
+ if (leftCount < this.minSamplesLeaf || rightCount < this.minSamplesLeaf) {
326
+ continue;
327
+ }
328
+
329
+ const rightSum = totalSum - leftSum;
330
+ const rightSumSquares = totalSumSquares - leftSumSquares;
331
+
332
+ const leftVariance = safeVariance(leftSum, leftSumSquares, leftCount);
333
+ const rightVariance = safeVariance(rightSum, rightSumSquares, rightCount);
334
+ const impurity =
335
+ (leftCount / sampleCount) * leftVariance + (rightCount / sampleCount) * rightVariance;
336
+
337
+ if (impurity < bestImpurity) {
338
+ bestImpurity = impurity;
339
+ bestThreshold = minValue + (range * (bin + 1)) / binCount;
340
+ }
341
+ }
342
+
343
+ if (!Number.isFinite(bestImpurity)) {
344
+ return null;
345
+ }
346
+
347
+ return {
348
+ threshold: bestThreshold,
349
+ impurity: bestImpurity,
350
+ };
351
+ }
352
+
353
+ private partitionIndices(
354
+ indices: number[],
355
+ featureIndex: number,
356
+ threshold: number,
357
+ ): SplitPartition | null {
358
+ const x = this.flattenedXTrain!;
359
+ const stride = this.featureCount;
360
+ const sampleCount = indices.length;
361
+ const leftIndices = new Array<number>(sampleCount);
362
+ const rightIndices = new Array<number>(sampleCount);
363
+ let leftCount = 0;
364
+ let rightCount = 0;
365
+
366
+ for (let i = 0; i < sampleCount; i += 1) {
367
+ const sampleIndex = indices[i];
368
+ if (x[sampleIndex * stride + featureIndex] <= threshold) {
369
+ leftIndices[leftCount] = sampleIndex;
370
+ leftCount += 1;
371
+ } else {
372
+ rightIndices[rightCount] = sampleIndex;
373
+ rightCount += 1;
374
+ }
375
+ }
376
+
377
+ if (leftCount < this.minSamplesLeaf || rightCount < this.minSamplesLeaf) {
378
+ return null;
379
+ }
380
+
381
+ return {
382
+ leftIndices: leftIndices.slice(0, leftCount),
383
+ rightIndices: rightIndices.slice(0, rightCount),
384
+ };
385
+ }
386
+
387
+ private flattenTrainingMatrix(X: Matrix): Float64Array {
388
+ const sampleCount = X.length;
389
+ const flattened = new Float64Array(sampleCount * this.featureCount);
390
+ for (let i = 0; i < sampleCount; i += 1) {
391
+ const row = X[i];
392
+ const rowOffset = i * this.featureCount;
393
+ for (let j = 0; j < this.featureCount; j += 1) {
394
+ flattened[rowOffset + j] = row[j];
395
+ }
396
+ }
397
+ return flattened;
398
+ }
399
+
400
+ private toFloat64Vector(y: Vector): Float64Array {
401
+ const out = new Float64Array(y.length);
402
+ for (let i = 0; i < y.length; i += 1) {
403
+ out[i] = y[i];
404
+ }
405
+ return out;
406
+ }
407
+ }
package/src/types.ts ADDED
@@ -0,0 +1,18 @@
1
+ export type Vector = number[];
2
+ export type Matrix = number[][];
3
+
4
+ export interface Transformer {
5
+ fit(X: Matrix, y?: Vector): this;
6
+ transform(X: Matrix): Matrix;
7
+ fitTransform?(X: Matrix, y?: Vector): Matrix;
8
+ }
9
+
10
+ export interface RegressionModel {
11
+ fit(X: Matrix, y: Vector): this;
12
+ predict(X: Matrix): Vector;
13
+ }
14
+
15
+ export interface ClassificationModel {
16
+ fit(X: Matrix, y: Vector): this;
17
+ predict(X: Matrix): Vector;
18
+ }
@@ -0,0 +1,209 @@
1
+ import type { Matrix, Vector } from "../types";
2
+
3
+ export function transpose(X: Matrix): Matrix {
4
+ const rows = X.length;
5
+ const cols = X[0].length;
6
+ const result: Matrix = Array.from({ length: cols }, () =>
7
+ new Array(rows).fill(0),
8
+ );
9
+
10
+ for (let i = 0; i < rows; i += 1) {
11
+ for (let j = 0; j < cols; j += 1) {
12
+ result[j][i] = X[i][j];
13
+ }
14
+ }
15
+
16
+ return result;
17
+ }
18
+
19
+ export function multiplyMatrices(A: Matrix, B: Matrix): Matrix {
20
+ const aRows = A.length;
21
+ const aCols = A[0].length;
22
+ const bRows = B.length;
23
+ const bCols = B[0].length;
24
+
25
+ if (aCols !== bRows) {
26
+ throw new Error(
27
+ `Matrix dimensions do not align: ${aRows}x${aCols} times ${bRows}x${bCols}.`,
28
+ );
29
+ }
30
+
31
+ const result: Matrix = Array.from({ length: aRows }, () =>
32
+ new Array(bCols).fill(0),
33
+ );
34
+
35
+ for (let i = 0; i < aRows; i += 1) {
36
+ for (let k = 0; k < aCols; k += 1) {
37
+ const aik = A[i][k];
38
+ for (let j = 0; j < bCols; j += 1) {
39
+ result[i][j] += aik * B[k][j];
40
+ }
41
+ }
42
+ }
43
+
44
+ return result;
45
+ }
46
+
47
+ export function multiplyMatrixVector(A: Matrix, x: Vector): Vector {
48
+ const rows = A.length;
49
+ const cols = A[0].length;
50
+
51
+ if (cols !== x.length) {
52
+ throw new Error(
53
+ `Matrix-vector dimensions do not align: ${rows}x${cols} times ${x.length}.`,
54
+ );
55
+ }
56
+
57
+ const result = new Array(rows).fill(0);
58
+ for (let i = 0; i < rows; i += 1) {
59
+ let sum = 0;
60
+ for (let j = 0; j < cols; j += 1) {
61
+ sum += A[i][j] * x[j];
62
+ }
63
+ result[i] = sum;
64
+ }
65
+
66
+ return result;
67
+ }
68
+
69
+ export function addInterceptColumn(X: Matrix): Matrix {
70
+ return X.map((row) => [1, ...row]);
71
+ }
72
+
73
+ export function identityMatrix(size: number): Matrix {
74
+ const I: Matrix = Array.from({ length: size }, () => new Array(size).fill(0));
75
+ for (let i = 0; i < size; i += 1) {
76
+ I[i][i] = 1;
77
+ }
78
+ return I;
79
+ }
80
+
81
+ export function inverseMatrix(A: Matrix): Matrix {
82
+ const n = A.length;
83
+ if (n === 0 || A[0].length !== n) {
84
+ throw new Error("Only non-empty square matrices can be inverted.");
85
+ }
86
+
87
+ const EPSILON = 1e-12;
88
+ const augmented: Matrix = A.map((row, i) => [...row, ...identityMatrix(n)[i]]);
89
+
90
+ for (let col = 0; col < n; col += 1) {
91
+ let pivotRow = col;
92
+ let maxAbs = Math.abs(augmented[pivotRow][col]);
93
+
94
+ for (let r = col + 1; r < n; r += 1) {
95
+ const value = Math.abs(augmented[r][col]);
96
+ if (value > maxAbs) {
97
+ maxAbs = value;
98
+ pivotRow = r;
99
+ }
100
+ }
101
+
102
+ if (maxAbs < EPSILON) {
103
+ throw new Error("Matrix is singular or near-singular and cannot be inverted.");
104
+ }
105
+
106
+ if (pivotRow !== col) {
107
+ const tmp = augmented[col];
108
+ augmented[col] = augmented[pivotRow];
109
+ augmented[pivotRow] = tmp;
110
+ }
111
+
112
+ const pivot = augmented[col][col];
113
+ for (let j = 0; j < 2 * n; j += 1) {
114
+ augmented[col][j] /= pivot;
115
+ }
116
+
117
+ for (let r = 0; r < n; r += 1) {
118
+ if (r === col) {
119
+ continue;
120
+ }
121
+
122
+ const factor = augmented[r][col];
123
+ if (factor === 0) {
124
+ continue;
125
+ }
126
+
127
+ for (let j = 0; j < 2 * n; j += 1) {
128
+ augmented[r][j] -= factor * augmented[col][j];
129
+ }
130
+ }
131
+ }
132
+
133
+ return augmented.map((row) => row.slice(n));
134
+ }
135
+
136
+ export function solveSymmetricPositiveDefinite(A: Matrix, b: Vector): Vector {
137
+ const n = A.length;
138
+ if (n === 0 || A[0].length !== n) {
139
+ throw new Error("A must be a non-empty square matrix.");
140
+ }
141
+ if (b.length !== n) {
142
+ throw new Error(`b length must match matrix size ${n}. Got ${b.length}.`);
143
+ }
144
+
145
+ const L: Matrix = Array.from({ length: n }, () => new Array(n).fill(0));
146
+ const EPSILON = 1e-12;
147
+
148
+ for (let i = 0; i < n; i += 1) {
149
+ for (let j = 0; j <= i; j += 1) {
150
+ let sum = A[i][j];
151
+ for (let k = 0; k < j; k += 1) {
152
+ sum -= L[i][k] * L[j][k];
153
+ }
154
+
155
+ if (i === j) {
156
+ if (sum <= EPSILON) {
157
+ throw new Error("Matrix is not positive definite.");
158
+ }
159
+ L[i][j] = Math.sqrt(sum);
160
+ } else {
161
+ L[i][j] = sum / L[j][j];
162
+ }
163
+ }
164
+ }
165
+
166
+ const y = new Array(n).fill(0);
167
+ for (let i = 0; i < n; i += 1) {
168
+ let sum = b[i];
169
+ for (let k = 0; k < i; k += 1) {
170
+ sum -= L[i][k] * y[k];
171
+ }
172
+ y[i] = sum / L[i][i];
173
+ }
174
+
175
+ const x = new Array(n).fill(0);
176
+ for (let i = n - 1; i >= 0; i -= 1) {
177
+ let sum = y[i];
178
+ for (let k = i + 1; k < n; k += 1) {
179
+ sum -= L[k][i] * x[k];
180
+ }
181
+ x[i] = sum / L[i][i];
182
+ }
183
+
184
+ return x;
185
+ }
186
+
187
+ export function dot(a: Vector, b: Vector): number {
188
+ if (a.length !== b.length) {
189
+ throw new Error(`Vector sizes do not match: ${a.length} vs ${b.length}.`);
190
+ }
191
+
192
+ let sum = 0;
193
+ for (let i = 0; i < a.length; i += 1) {
194
+ sum += a[i] * b[i];
195
+ }
196
+ return sum;
197
+ }
198
+
199
+ export function mean(values: Vector): number {
200
+ if (values.length === 0) {
201
+ throw new Error("Cannot compute mean of an empty vector.");
202
+ }
203
+
204
+ let total = 0;
205
+ for (let i = 0; i < values.length; i += 1) {
206
+ total += values[i];
207
+ }
208
+ return total / values.length;
209
+ }
@@ -0,0 +1,78 @@
1
+ import type { Matrix, Vector } from "../types";
2
+
3
+ export function assertNonEmptyMatrix(X: Matrix, label = "X"): void {
4
+ if (!Array.isArray(X) || X.length === 0) {
5
+ throw new Error(`${label} must be a non-empty 2D array.`);
6
+ }
7
+
8
+ if (!Array.isArray(X[0]) || X[0].length === 0) {
9
+ throw new Error(`${label} must have at least one feature column.`);
10
+ }
11
+ }
12
+
13
+ export function assertConsistentRowSize(X: Matrix, label = "X"): void {
14
+ const featureCount = X[0]?.length ?? 0;
15
+ for (let rowIndex = 0; rowIndex < X.length; rowIndex += 1) {
16
+ const row = X[rowIndex];
17
+ if (!Array.isArray(row) || row.length !== featureCount) {
18
+ throw new Error(
19
+ `${label} rows must all have the same length. Row ${rowIndex} differs.`,
20
+ );
21
+ }
22
+ }
23
+ }
24
+
25
+ export function assertFiniteMatrix(X: Matrix, label = "X"): void {
26
+ for (let i = 0; i < X.length; i += 1) {
27
+ for (let j = 0; j < X[i].length; j += 1) {
28
+ const value = X[i][j];
29
+ if (!Number.isFinite(value)) {
30
+ throw new Error(`${label} contains a non-finite value at [${i}, ${j}].`);
31
+ }
32
+ }
33
+ }
34
+ }
35
+
36
+ export function assertVectorLength(
37
+ y: Vector,
38
+ expectedLength: number,
39
+ label = "y",
40
+ ): void {
41
+ if (!Array.isArray(y) || y.length !== expectedLength) {
42
+ throw new Error(`${label} length must equal ${expectedLength}.`);
43
+ }
44
+ }
45
+
46
+ export function assertFiniteVector(y: Vector, label = "y"): void {
47
+ for (let i = 0; i < y.length; i += 1) {
48
+ if (!Number.isFinite(y[i])) {
49
+ throw new Error(`${label} contains a non-finite value at index ${i}.`);
50
+ }
51
+ }
52
+ }
53
+
54
+ export function validateRegressionInputs(X: Matrix, y: Vector): void {
55
+ assertNonEmptyMatrix(X);
56
+ assertConsistentRowSize(X);
57
+ assertFiniteMatrix(X);
58
+ assertVectorLength(y, X.length);
59
+ assertFiniteVector(y);
60
+ }
61
+
62
+ export function assertBinaryVector(y: Vector, label = "y"): void {
63
+ for (let i = 0; i < y.length; i += 1) {
64
+ const value = y[i];
65
+ if (!(value === 0 || value === 1)) {
66
+ throw new Error(`${label} must be binary (0 or 1). Found ${value} at index ${i}.`);
67
+ }
68
+ }
69
+ }
70
+
71
+ export function validateClassificationInputs(X: Matrix, y: Vector): void {
72
+ assertNonEmptyMatrix(X);
73
+ assertConsistentRowSize(X);
74
+ assertFiniteMatrix(X);
75
+ assertVectorLength(y, X.length);
76
+ assertFiniteVector(y);
77
+ assertBinaryVector(y);
78
+ }