bun-scikit 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +187 -0
- package/binding.gyp +21 -0
- package/docs/README.md +7 -0
- package/docs/native-abi.md +53 -0
- package/index.ts +1 -0
- package/package.json +76 -0
- package/scripts/build-node-addon.ts +26 -0
- package/scripts/build-zig-kernels.ts +50 -0
- package/scripts/check-api-docs-coverage.ts +52 -0
- package/scripts/check-benchmark-health.ts +140 -0
- package/scripts/install-native.ts +160 -0
- package/scripts/package-native-artifacts.ts +62 -0
- package/scripts/sync-benchmark-readme.ts +181 -0
- package/scripts/update-benchmark-history.ts +91 -0
- package/src/ensemble/RandomForestClassifier.ts +136 -0
- package/src/ensemble/RandomForestRegressor.ts +136 -0
- package/src/index.ts +32 -0
- package/src/linear_model/LinearRegression.ts +136 -0
- package/src/linear_model/LogisticRegression.ts +260 -0
- package/src/linear_model/SGDClassifier.ts +161 -0
- package/src/linear_model/SGDRegressor.ts +104 -0
- package/src/metrics/classification.ts +294 -0
- package/src/metrics/regression.ts +51 -0
- package/src/model_selection/GridSearchCV.ts +244 -0
- package/src/model_selection/KFold.ts +82 -0
- package/src/model_selection/RepeatedKFold.ts +49 -0
- package/src/model_selection/RepeatedStratifiedKFold.ts +50 -0
- package/src/model_selection/StratifiedKFold.ts +112 -0
- package/src/model_selection/StratifiedShuffleSplit.ts +211 -0
- package/src/model_selection/crossValScore.ts +165 -0
- package/src/model_selection/trainTestSplit.ts +82 -0
- package/src/naive_bayes/GaussianNB.ts +148 -0
- package/src/native/node-addon/bun_scikit_addon.cpp +450 -0
- package/src/native/zigKernels.ts +576 -0
- package/src/neighbors/KNeighborsClassifier.ts +85 -0
- package/src/pipeline/ColumnTransformer.ts +203 -0
- package/src/pipeline/FeatureUnion.ts +123 -0
- package/src/pipeline/Pipeline.ts +168 -0
- package/src/preprocessing/MinMaxScaler.ts +113 -0
- package/src/preprocessing/OneHotEncoder.ts +91 -0
- package/src/preprocessing/PolynomialFeatures.ts +158 -0
- package/src/preprocessing/RobustScaler.ts +149 -0
- package/src/preprocessing/SimpleImputer.ts +150 -0
- package/src/preprocessing/StandardScaler.ts +92 -0
- package/src/svm/LinearSVC.ts +117 -0
- package/src/tree/DecisionTreeClassifier.ts +394 -0
- package/src/tree/DecisionTreeRegressor.ts +407 -0
- package/src/types.ts +18 -0
- package/src/utils/linalg.ts +209 -0
- package/src/utils/validation.ts +78 -0
- package/zig/kernels.zig +1327 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import type { Matrix, Vector } from "../types";
|
|
2
|
+
import { assertConsistentRowSize, assertFiniteMatrix, assertNonEmptyMatrix } from "../utils/validation";
|
|
3
|
+
|
|
4
|
+
interface TransformerLike {
|
|
5
|
+
fit(X: Matrix, y?: Vector): unknown;
|
|
6
|
+
transform(X: Matrix): Matrix;
|
|
7
|
+
fitTransform?: (X: Matrix, y?: Vector) => Matrix;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export type ColumnSelector = number[] | { start: number; end: number };
|
|
11
|
+
|
|
12
|
+
export type ColumnTransformerSpec = [name: string, transformer: TransformerLike, columns: ColumnSelector];
|
|
13
|
+
|
|
14
|
+
export interface ColumnTransformerOptions {
|
|
15
|
+
remainder?: "drop" | "passthrough";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function normalizeColumns(selector: ColumnSelector, featureCount: number): number[] {
|
|
19
|
+
if (Array.isArray(selector)) {
|
|
20
|
+
if (selector.length === 0) {
|
|
21
|
+
throw new Error("Column selector arrays must not be empty.");
|
|
22
|
+
}
|
|
23
|
+
const normalized = selector.map((index) => {
|
|
24
|
+
if (!Number.isInteger(index) || index < 0 || index >= featureCount) {
|
|
25
|
+
throw new Error(`Invalid column index ${index} for featureCount=${featureCount}.`);
|
|
26
|
+
}
|
|
27
|
+
return index;
|
|
28
|
+
});
|
|
29
|
+
return Array.from(new Set(normalized)).sort((a, b) => a - b);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const { start, end } = selector;
|
|
33
|
+
if (!Number.isInteger(start) || !Number.isInteger(end) || start < 0 || end <= start || end > featureCount) {
|
|
34
|
+
throw new Error(
|
|
35
|
+
`Range selector must satisfy 0 <= start < end <= featureCount. Got start=${start}, end=${end}, featureCount=${featureCount}.`,
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
const columns = new Array<number>(end - start);
|
|
39
|
+
for (let i = start; i < end; i += 1) {
|
|
40
|
+
columns[i - start] = i;
|
|
41
|
+
}
|
|
42
|
+
return columns;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function selectColumns(X: Matrix, columns: number[]): Matrix {
|
|
46
|
+
return X.map((row) => columns.map((idx) => row[idx]));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function fitTransform(transformer: TransformerLike, X: Matrix, y?: Vector): Matrix {
|
|
50
|
+
if (typeof transformer.fitTransform === "function") {
|
|
51
|
+
return transformer.fitTransform(X, y);
|
|
52
|
+
}
|
|
53
|
+
transformer.fit(X, y);
|
|
54
|
+
return transformer.transform(X);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
interface RuntimeSpec {
|
|
58
|
+
name: string;
|
|
59
|
+
transformer: TransformerLike;
|
|
60
|
+
columns: number[];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export class ColumnTransformer {
|
|
64
|
+
transformers_: ReadonlyArray<readonly [string, TransformerLike, number[]]> = [];
|
|
65
|
+
|
|
66
|
+
private readonly specs: ColumnTransformerSpec[];
|
|
67
|
+
private readonly remainder: "drop" | "passthrough";
|
|
68
|
+
private runtimeSpecs: RuntimeSpec[] = [];
|
|
69
|
+
private passthroughColumns: number[] = [];
|
|
70
|
+
private nFeaturesIn: number | null = null;
|
|
71
|
+
private isFitted = false;
|
|
72
|
+
|
|
73
|
+
constructor(specs: ColumnTransformerSpec[], options: ColumnTransformerOptions = {}) {
|
|
74
|
+
if (!Array.isArray(specs) || specs.length === 0) {
|
|
75
|
+
throw new Error("ColumnTransformer requires at least one transformer spec.");
|
|
76
|
+
}
|
|
77
|
+
this.specs = specs;
|
|
78
|
+
this.remainder = options.remainder ?? "drop";
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
fit(X: Matrix, y?: Vector): this {
|
|
82
|
+
assertNonEmptyMatrix(X);
|
|
83
|
+
assertConsistentRowSize(X);
|
|
84
|
+
assertFiniteMatrix(X);
|
|
85
|
+
|
|
86
|
+
const featureCount = X[0].length;
|
|
87
|
+
this.nFeaturesIn = featureCount;
|
|
88
|
+
const used = new Uint8Array(featureCount);
|
|
89
|
+
this.runtimeSpecs = [];
|
|
90
|
+
|
|
91
|
+
for (let i = 0; i < this.specs.length; i += 1) {
|
|
92
|
+
const [name, transformer, selector] = this.specs[i];
|
|
93
|
+
if (typeof name !== "string" || name.trim().length === 0) {
|
|
94
|
+
throw new Error("ColumnTransformer spec names must be non-empty strings.");
|
|
95
|
+
}
|
|
96
|
+
const columns = normalizeColumns(selector, featureCount);
|
|
97
|
+
for (let j = 0; j < columns.length; j += 1) {
|
|
98
|
+
used[columns[j]] = 1;
|
|
99
|
+
}
|
|
100
|
+
const subX = selectColumns(X, columns);
|
|
101
|
+
transformer.fit(subX, y);
|
|
102
|
+
this.runtimeSpecs.push({ name, transformer, columns });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
this.passthroughColumns = [];
|
|
106
|
+
if (this.remainder === "passthrough") {
|
|
107
|
+
for (let i = 0; i < featureCount; i += 1) {
|
|
108
|
+
if (used[i] === 0) {
|
|
109
|
+
this.passthroughColumns.push(i);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
this.transformers_ = this.runtimeSpecs.map(
|
|
115
|
+
(spec) => [spec.name, spec.transformer, spec.columns] as const,
|
|
116
|
+
);
|
|
117
|
+
this.isFitted = true;
|
|
118
|
+
return this;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
transform(X: Matrix): Matrix {
|
|
122
|
+
if (!this.isFitted || this.nFeaturesIn === null) {
|
|
123
|
+
throw new Error("ColumnTransformer has not been fitted.");
|
|
124
|
+
}
|
|
125
|
+
assertNonEmptyMatrix(X);
|
|
126
|
+
assertConsistentRowSize(X);
|
|
127
|
+
assertFiniteMatrix(X);
|
|
128
|
+
if (X[0].length !== this.nFeaturesIn) {
|
|
129
|
+
throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn}, got ${X[0].length}.`);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const transformedBlocks = this.runtimeSpecs.map((spec) =>
|
|
133
|
+
spec.transformer.transform(selectColumns(X, spec.columns)),
|
|
134
|
+
);
|
|
135
|
+
const passthroughBlock =
|
|
136
|
+
this.passthroughColumns.length > 0 ? selectColumns(X, this.passthroughColumns) : null;
|
|
137
|
+
|
|
138
|
+
const out = new Array<number[]>(X.length);
|
|
139
|
+
for (let rowIndex = 0; rowIndex < X.length; rowIndex += 1) {
|
|
140
|
+
const row: number[] = [];
|
|
141
|
+
for (let b = 0; b < transformedBlocks.length; b += 1) {
|
|
142
|
+
row.push(...transformedBlocks[b][rowIndex]);
|
|
143
|
+
}
|
|
144
|
+
if (passthroughBlock) {
|
|
145
|
+
row.push(...passthroughBlock[rowIndex]);
|
|
146
|
+
}
|
|
147
|
+
out[rowIndex] = row;
|
|
148
|
+
}
|
|
149
|
+
return out;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
fitTransform(X: Matrix, y?: Vector): Matrix {
|
|
153
|
+
assertNonEmptyMatrix(X);
|
|
154
|
+
assertConsistentRowSize(X);
|
|
155
|
+
assertFiniteMatrix(X);
|
|
156
|
+
|
|
157
|
+
const featureCount = X[0].length;
|
|
158
|
+
this.nFeaturesIn = featureCount;
|
|
159
|
+
const used = new Uint8Array(featureCount);
|
|
160
|
+
this.runtimeSpecs = [];
|
|
161
|
+
|
|
162
|
+
const transformedBlocks: Matrix[] = [];
|
|
163
|
+
for (let i = 0; i < this.specs.length; i += 1) {
|
|
164
|
+
const [name, transformer, selector] = this.specs[i];
|
|
165
|
+
const columns = normalizeColumns(selector, featureCount);
|
|
166
|
+
for (let j = 0; j < columns.length; j += 1) {
|
|
167
|
+
used[columns[j]] = 1;
|
|
168
|
+
}
|
|
169
|
+
const transformed = fitTransform(transformer, selectColumns(X, columns), y);
|
|
170
|
+
transformedBlocks.push(transformed);
|
|
171
|
+
this.runtimeSpecs.push({ name, transformer, columns });
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
this.passthroughColumns = [];
|
|
175
|
+
if (this.remainder === "passthrough") {
|
|
176
|
+
for (let i = 0; i < featureCount; i += 1) {
|
|
177
|
+
if (used[i] === 0) {
|
|
178
|
+
this.passthroughColumns.push(i);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const passthroughBlock =
|
|
183
|
+
this.passthroughColumns.length > 0 ? selectColumns(X, this.passthroughColumns) : null;
|
|
184
|
+
|
|
185
|
+
this.transformers_ = this.runtimeSpecs.map(
|
|
186
|
+
(spec) => [spec.name, spec.transformer, spec.columns] as const,
|
|
187
|
+
);
|
|
188
|
+
this.isFitted = true;
|
|
189
|
+
|
|
190
|
+
const out = new Array<number[]>(X.length);
|
|
191
|
+
for (let rowIndex = 0; rowIndex < X.length; rowIndex += 1) {
|
|
192
|
+
const row: number[] = [];
|
|
193
|
+
for (let b = 0; b < transformedBlocks.length; b += 1) {
|
|
194
|
+
row.push(...transformedBlocks[b][rowIndex]);
|
|
195
|
+
}
|
|
196
|
+
if (passthroughBlock) {
|
|
197
|
+
row.push(...passthroughBlock[rowIndex]);
|
|
198
|
+
}
|
|
199
|
+
out[rowIndex] = row;
|
|
200
|
+
}
|
|
201
|
+
return out;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import type { Matrix, Vector } from "../types";
|
|
2
|
+
import { assertConsistentRowSize, assertFiniteMatrix, assertNonEmptyMatrix } from "../utils/validation";
|
|
3
|
+
|
|
4
|
+
interface TransformerLike {
|
|
5
|
+
fit(X: Matrix, y?: Vector): unknown;
|
|
6
|
+
transform(X: Matrix): Matrix;
|
|
7
|
+
fitTransform?: (X: Matrix, y?: Vector) => Matrix;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export type FeatureUnionSpec = [name: string, transformer: TransformerLike];
|
|
11
|
+
|
|
12
|
+
function fitTransform(transformer: TransformerLike, X: Matrix, y?: Vector): Matrix {
|
|
13
|
+
if (typeof transformer.fitTransform === "function") {
|
|
14
|
+
return transformer.fitTransform(X, y);
|
|
15
|
+
}
|
|
16
|
+
transformer.fit(X, y);
|
|
17
|
+
return transformer.transform(X);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class FeatureUnion {
|
|
21
|
+
transformerList_: ReadonlyArray<readonly [string, TransformerLike]> = [];
|
|
22
|
+
|
|
23
|
+
private readonly specs: FeatureUnionSpec[];
|
|
24
|
+
private runtimeSpecs: FeatureUnionSpec[] = [];
|
|
25
|
+
private nFeaturesIn: number | null = null;
|
|
26
|
+
private isFitted = false;
|
|
27
|
+
|
|
28
|
+
constructor(specs: FeatureUnionSpec[]) {
|
|
29
|
+
if (!Array.isArray(specs) || specs.length === 0) {
|
|
30
|
+
throw new Error("FeatureUnion requires at least one transformer.");
|
|
31
|
+
}
|
|
32
|
+
this.specs = specs;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
fit(X: Matrix, y?: Vector): this {
|
|
36
|
+
assertNonEmptyMatrix(X);
|
|
37
|
+
assertConsistentRowSize(X);
|
|
38
|
+
assertFiniteMatrix(X);
|
|
39
|
+
this.nFeaturesIn = X[0].length;
|
|
40
|
+
|
|
41
|
+
const seen = new Set<string>();
|
|
42
|
+
this.runtimeSpecs = [];
|
|
43
|
+
for (let i = 0; i < this.specs.length; i += 1) {
|
|
44
|
+
const [name, transformer] = this.specs[i];
|
|
45
|
+
if (typeof name !== "string" || name.trim().length === 0) {
|
|
46
|
+
throw new Error("FeatureUnion transformer names must be non-empty strings.");
|
|
47
|
+
}
|
|
48
|
+
if (seen.has(name)) {
|
|
49
|
+
throw new Error(`FeatureUnion transformer names must be unique. Duplicate '${name}'.`);
|
|
50
|
+
}
|
|
51
|
+
seen.add(name);
|
|
52
|
+
transformer.fit(X, y);
|
|
53
|
+
this.runtimeSpecs.push([name, transformer]);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
this.transformerList_ = this.runtimeSpecs.map(
|
|
57
|
+
([name, transformer]) => [name, transformer] as const,
|
|
58
|
+
);
|
|
59
|
+
this.isFitted = true;
|
|
60
|
+
return this;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
transform(X: Matrix): Matrix {
|
|
64
|
+
if (!this.isFitted || this.nFeaturesIn === null) {
|
|
65
|
+
throw new Error("FeatureUnion has not been fitted.");
|
|
66
|
+
}
|
|
67
|
+
assertNonEmptyMatrix(X);
|
|
68
|
+
assertConsistentRowSize(X);
|
|
69
|
+
assertFiniteMatrix(X);
|
|
70
|
+
if (X[0].length !== this.nFeaturesIn) {
|
|
71
|
+
throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn}, got ${X[0].length}.`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const blocks = this.runtimeSpecs.map(([, transformer]) => transformer.transform(X));
|
|
75
|
+
const out = new Array<number[]>(X.length);
|
|
76
|
+
for (let rowIndex = 0; rowIndex < X.length; rowIndex += 1) {
|
|
77
|
+
const row: number[] = [];
|
|
78
|
+
for (let b = 0; b < blocks.length; b += 1) {
|
|
79
|
+
row.push(...blocks[b][rowIndex]);
|
|
80
|
+
}
|
|
81
|
+
out[rowIndex] = row;
|
|
82
|
+
}
|
|
83
|
+
return out;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
fitTransform(X: Matrix, y?: Vector): Matrix {
|
|
87
|
+
assertNonEmptyMatrix(X);
|
|
88
|
+
assertConsistentRowSize(X);
|
|
89
|
+
assertFiniteMatrix(X);
|
|
90
|
+
this.nFeaturesIn = X[0].length;
|
|
91
|
+
|
|
92
|
+
const seen = new Set<string>();
|
|
93
|
+
this.runtimeSpecs = [];
|
|
94
|
+
const blocks: Matrix[] = [];
|
|
95
|
+
for (let i = 0; i < this.specs.length; i += 1) {
|
|
96
|
+
const [name, transformer] = this.specs[i];
|
|
97
|
+
if (typeof name !== "string" || name.trim().length === 0) {
|
|
98
|
+
throw new Error("FeatureUnion transformer names must be non-empty strings.");
|
|
99
|
+
}
|
|
100
|
+
if (seen.has(name)) {
|
|
101
|
+
throw new Error(`FeatureUnion transformer names must be unique. Duplicate '${name}'.`);
|
|
102
|
+
}
|
|
103
|
+
seen.add(name);
|
|
104
|
+
blocks.push(fitTransform(transformer, X, y));
|
|
105
|
+
this.runtimeSpecs.push([name, transformer]);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
this.transformerList_ = this.runtimeSpecs.map(
|
|
109
|
+
([name, transformer]) => [name, transformer] as const,
|
|
110
|
+
);
|
|
111
|
+
this.isFitted = true;
|
|
112
|
+
|
|
113
|
+
const out = new Array<number[]>(X.length);
|
|
114
|
+
for (let rowIndex = 0; rowIndex < X.length; rowIndex += 1) {
|
|
115
|
+
const row: number[] = [];
|
|
116
|
+
for (let b = 0; b < blocks.length; b += 1) {
|
|
117
|
+
row.push(...blocks[b][rowIndex]);
|
|
118
|
+
}
|
|
119
|
+
out[rowIndex] = row;
|
|
120
|
+
}
|
|
121
|
+
return out;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import type { Matrix, Vector } from "../types";
|
|
2
|
+
import { assertConsistentRowSize, assertNonEmptyMatrix } from "../utils/validation";
|
|
3
|
+
|
|
4
|
+
type StepValue = Record<string, unknown>;
|
|
5
|
+
|
|
6
|
+
interface RuntimeStep {
|
|
7
|
+
name: string;
|
|
8
|
+
value: StepValue;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
interface Fittable {
|
|
12
|
+
fit(X: Matrix, y?: Vector): unknown;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface TransformLike extends Fittable {
|
|
16
|
+
transform(X: Matrix): Matrix;
|
|
17
|
+
fitTransform?: (X: Matrix, y?: Vector) => Matrix;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface PredictLike extends Fittable {
|
|
21
|
+
predict(X: Matrix): Vector;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function isObject(value: unknown): value is StepValue {
|
|
25
|
+
return typeof value === "object" && value !== null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function hasFit(value: StepValue): value is StepValue & Fittable {
|
|
29
|
+
return typeof value.fit === "function";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function hasTransform(value: StepValue): value is StepValue & TransformLike {
|
|
33
|
+
return typeof value.transform === "function" && hasFit(value);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function hasPredict(value: StepValue): value is StepValue & PredictLike {
|
|
37
|
+
return typeof value.predict === "function" && hasFit(value);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function fitTransformStep(step: TransformLike, X: Matrix, y?: Vector): Matrix {
|
|
41
|
+
if (typeof step.fitTransform === "function") {
|
|
42
|
+
return step.fitTransform(X, y);
|
|
43
|
+
}
|
|
44
|
+
step.fit(X, y);
|
|
45
|
+
return step.transform(X);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export type PipelineStep = [name: string, step: unknown];
|
|
49
|
+
|
|
50
|
+
export class Pipeline {
|
|
51
|
+
readonly steps_: ReadonlyArray<readonly [string, unknown]>;
|
|
52
|
+
readonly namedSteps_: Record<string, unknown>;
|
|
53
|
+
private readonly runtimeSteps: RuntimeStep[];
|
|
54
|
+
private isFitted = false;
|
|
55
|
+
|
|
56
|
+
constructor(steps: PipelineStep[]) {
|
|
57
|
+
if (steps.length === 0) {
|
|
58
|
+
throw new Error("Pipeline requires at least one step.");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const seen = new Set<string>();
|
|
62
|
+
const runtime: RuntimeStep[] = [];
|
|
63
|
+
|
|
64
|
+
for (const [name, step] of steps) {
|
|
65
|
+
if (typeof name !== "string" || name.trim().length === 0) {
|
|
66
|
+
throw new Error("Pipeline step names must be non-empty strings.");
|
|
67
|
+
}
|
|
68
|
+
if (seen.has(name)) {
|
|
69
|
+
throw new Error(`Pipeline step names must be unique. Duplicate step: '${name}'.`);
|
|
70
|
+
}
|
|
71
|
+
if (!isObject(step)) {
|
|
72
|
+
throw new Error(`Pipeline step '${name}' must be an object.`);
|
|
73
|
+
}
|
|
74
|
+
seen.add(name);
|
|
75
|
+
runtime.push({ name, value: step });
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
this.runtimeSteps = runtime;
|
|
79
|
+
this.steps_ = runtime.map((step) => [step.name, step.value] as const);
|
|
80
|
+
this.namedSteps_ = Object.fromEntries(runtime.map((step) => [step.name, step.value]));
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
fit(X: Matrix, y?: Vector): this {
|
|
84
|
+
assertNonEmptyMatrix(X);
|
|
85
|
+
assertConsistentRowSize(X);
|
|
86
|
+
|
|
87
|
+
let transformedX = X;
|
|
88
|
+
const lastIndex = this.runtimeSteps.length - 1;
|
|
89
|
+
|
|
90
|
+
for (let i = 0; i < lastIndex; i += 1) {
|
|
91
|
+
const current = this.runtimeSteps[i];
|
|
92
|
+
if (!hasTransform(current.value)) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
`Pipeline step '${current.name}' must implement fit() and transform() because it is not the final step.`,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
transformedX = fitTransformStep(current.value, transformedX, y);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const finalStep = this.runtimeSteps[lastIndex];
|
|
101
|
+
if (!hasFit(finalStep.value)) {
|
|
102
|
+
throw new Error(`Pipeline final step '${finalStep.name}' must implement fit().`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (!hasTransform(finalStep.value) && y === undefined) {
|
|
106
|
+
throw new Error(
|
|
107
|
+
`Pipeline final step '${finalStep.name}' requires target labels y for fit().`,
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
finalStep.value.fit(transformedX, y);
|
|
112
|
+
this.isFitted = true;
|
|
113
|
+
return this;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
predict(X: Matrix): Vector {
|
|
117
|
+
this.assertFitted();
|
|
118
|
+
|
|
119
|
+
const lastStep = this.runtimeSteps[this.runtimeSteps.length - 1];
|
|
120
|
+
if (!hasPredict(lastStep.value)) {
|
|
121
|
+
throw new Error(`Pipeline final step '${lastStep.name}' does not implement predict().`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const transformed = this.transformThroughIntermediates(X);
|
|
125
|
+
return lastStep.value.predict(transformed);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
transform(X: Matrix): Matrix {
|
|
129
|
+
this.assertFitted();
|
|
130
|
+
|
|
131
|
+
const transformed = this.transformThroughIntermediates(X);
|
|
132
|
+
const lastStep = this.runtimeSteps[this.runtimeSteps.length - 1];
|
|
133
|
+
if (!hasTransform(lastStep.value)) {
|
|
134
|
+
throw new Error(`Pipeline final step '${lastStep.name}' does not implement transform().`);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return lastStep.value.transform(transformed);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
fitTransform(X: Matrix, y?: Vector): Matrix {
|
|
141
|
+
this.fit(X, y);
|
|
142
|
+
return this.transform(X);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
private transformThroughIntermediates(X: Matrix): Matrix {
|
|
146
|
+
assertNonEmptyMatrix(X);
|
|
147
|
+
assertConsistentRowSize(X);
|
|
148
|
+
|
|
149
|
+
let transformedX = X;
|
|
150
|
+
const lastIndex = this.runtimeSteps.length - 1;
|
|
151
|
+
for (let i = 0; i < lastIndex; i += 1) {
|
|
152
|
+
const current = this.runtimeSteps[i];
|
|
153
|
+
if (!hasTransform(current.value)) {
|
|
154
|
+
throw new Error(
|
|
155
|
+
`Pipeline step '${current.name}' must implement transform() for inference.`,
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
transformedX = current.value.transform(transformedX);
|
|
159
|
+
}
|
|
160
|
+
return transformedX;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
private assertFitted(): void {
|
|
164
|
+
if (!this.isFitted) {
|
|
165
|
+
throw new Error("Pipeline has not been fitted.");
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type { Matrix, Vector } from "../types";
|
|
2
|
+
import {
|
|
3
|
+
assertConsistentRowSize,
|
|
4
|
+
assertFiniteMatrix,
|
|
5
|
+
assertNonEmptyMatrix,
|
|
6
|
+
} from "../utils/validation";
|
|
7
|
+
|
|
8
|
+
export interface MinMaxScalerOptions {
|
|
9
|
+
featureRange?: [number, number];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export class MinMaxScaler {
|
|
13
|
+
dataMin_: Vector | null = null;
|
|
14
|
+
dataMax_: Vector | null = null;
|
|
15
|
+
dataRange_: Vector | null = null;
|
|
16
|
+
scale_: Vector | null = null;
|
|
17
|
+
min_: Vector | null = null;
|
|
18
|
+
|
|
19
|
+
private readonly featureRange: [number, number];
|
|
20
|
+
|
|
21
|
+
constructor(options: MinMaxScalerOptions = {}) {
|
|
22
|
+
this.featureRange = options.featureRange ?? [0, 1];
|
|
23
|
+
const [rangeMin, rangeMax] = this.featureRange;
|
|
24
|
+
if (!Number.isFinite(rangeMin) || !Number.isFinite(rangeMax) || rangeMin >= rangeMax) {
|
|
25
|
+
throw new Error(
|
|
26
|
+
`featureRange must be finite and satisfy min < max. Got [${rangeMin}, ${rangeMax}].`,
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
fit(X: Matrix): this {
|
|
32
|
+
assertNonEmptyMatrix(X);
|
|
33
|
+
assertConsistentRowSize(X);
|
|
34
|
+
assertFiniteMatrix(X);
|
|
35
|
+
|
|
36
|
+
const nFeatures = X[0].length;
|
|
37
|
+
const dataMin = new Array(nFeatures).fill(Number.POSITIVE_INFINITY);
|
|
38
|
+
const dataMax = new Array(nFeatures).fill(Number.NEGATIVE_INFINITY);
|
|
39
|
+
|
|
40
|
+
for (let i = 0; i < X.length; i += 1) {
|
|
41
|
+
for (let j = 0; j < nFeatures; j += 1) {
|
|
42
|
+
const value = X[i][j];
|
|
43
|
+
if (value < dataMin[j]) {
|
|
44
|
+
dataMin[j] = value;
|
|
45
|
+
}
|
|
46
|
+
if (value > dataMax[j]) {
|
|
47
|
+
dataMax[j] = value;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const [rangeMin, rangeMax] = this.featureRange;
|
|
53
|
+
const targetRange = rangeMax - rangeMin;
|
|
54
|
+
const dataRange = new Array(nFeatures).fill(0);
|
|
55
|
+
const scale = new Array(nFeatures).fill(1);
|
|
56
|
+
const min = new Array(nFeatures).fill(rangeMin);
|
|
57
|
+
|
|
58
|
+
for (let j = 0; j < nFeatures; j += 1) {
|
|
59
|
+
const featureDataRange = dataMax[j] - dataMin[j];
|
|
60
|
+
dataRange[j] = featureDataRange;
|
|
61
|
+
const safeDenominator = featureDataRange === 0 ? 1 : featureDataRange;
|
|
62
|
+
scale[j] = targetRange / safeDenominator;
|
|
63
|
+
min[j] = rangeMin - dataMin[j] * scale[j];
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
this.dataMin_ = dataMin;
|
|
67
|
+
this.dataMax_ = dataMax;
|
|
68
|
+
this.dataRange_ = dataRange;
|
|
69
|
+
this.scale_ = scale;
|
|
70
|
+
this.min_ = min;
|
|
71
|
+
return this;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
transform(X: Matrix): Matrix {
|
|
75
|
+
if (!this.scale_ || !this.min_) {
|
|
76
|
+
throw new Error("MinMaxScaler has not been fitted.");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
assertNonEmptyMatrix(X);
|
|
80
|
+
assertConsistentRowSize(X);
|
|
81
|
+
assertFiniteMatrix(X);
|
|
82
|
+
|
|
83
|
+
if (X[0].length !== this.scale_.length) {
|
|
84
|
+
throw new Error(`Feature size mismatch. Expected ${this.scale_.length}, got ${X[0].length}.`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return X.map((row) =>
|
|
88
|
+
row.map((value, featureIdx) => value * this.scale_![featureIdx] + this.min_![featureIdx]),
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
fitTransform(X: Matrix): Matrix {
|
|
93
|
+
return this.fit(X).transform(X);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
inverseTransform(X: Matrix): Matrix {
|
|
97
|
+
if (!this.scale_ || !this.min_) {
|
|
98
|
+
throw new Error("MinMaxScaler has not been fitted.");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
assertNonEmptyMatrix(X);
|
|
102
|
+
assertConsistentRowSize(X);
|
|
103
|
+
assertFiniteMatrix(X);
|
|
104
|
+
|
|
105
|
+
if (X[0].length !== this.scale_.length) {
|
|
106
|
+
throw new Error(`Feature size mismatch. Expected ${this.scale_.length}, got ${X[0].length}.`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return X.map((row) =>
|
|
110
|
+
row.map((value, featureIdx) => (value - this.min_![featureIdx]) / this.scale_![featureIdx]),
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import type { Matrix } from "../types";
|
|
2
|
+
import { assertConsistentRowSize, assertFiniteMatrix, assertNonEmptyMatrix } from "../utils/validation";
|
|
3
|
+
|
|
4
|
+
export interface OneHotEncoderOptions {
|
|
5
|
+
handleUnknown?: "error" | "ignore";
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export class OneHotEncoder {
|
|
9
|
+
categories_: number[][] | null = null;
|
|
10
|
+
nFeaturesIn_: number | null = null;
|
|
11
|
+
nOutputFeatures_: number | null = null;
|
|
12
|
+
featureOffsets_: number[] | null = null;
|
|
13
|
+
|
|
14
|
+
private readonly handleUnknown: "error" | "ignore";
|
|
15
|
+
|
|
16
|
+
constructor(options: OneHotEncoderOptions = {}) {
|
|
17
|
+
this.handleUnknown = options.handleUnknown ?? "error";
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
fit(X: Matrix): this {
|
|
21
|
+
assertNonEmptyMatrix(X);
|
|
22
|
+
assertConsistentRowSize(X);
|
|
23
|
+
assertFiniteMatrix(X);
|
|
24
|
+
|
|
25
|
+
const nFeatures = X[0].length;
|
|
26
|
+
const categories = new Array<number[]>(nFeatures);
|
|
27
|
+
const offsets = new Array<number>(nFeatures);
|
|
28
|
+
let offset = 0;
|
|
29
|
+
|
|
30
|
+
for (let featureIndex = 0; featureIndex < nFeatures; featureIndex += 1) {
|
|
31
|
+
const unique = new Set<number>();
|
|
32
|
+
for (let i = 0; i < X.length; i += 1) {
|
|
33
|
+
unique.add(X[i][featureIndex]);
|
|
34
|
+
}
|
|
35
|
+
const values = Array.from(unique).sort((a, b) => a - b);
|
|
36
|
+
categories[featureIndex] = values;
|
|
37
|
+
offsets[featureIndex] = offset;
|
|
38
|
+
offset += values.length;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
this.categories_ = categories;
|
|
42
|
+
this.nFeaturesIn_ = nFeatures;
|
|
43
|
+
this.nOutputFeatures_ = offset;
|
|
44
|
+
this.featureOffsets_ = offsets;
|
|
45
|
+
return this;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
transform(X: Matrix): Matrix {
|
|
49
|
+
if (
|
|
50
|
+
this.categories_ === null ||
|
|
51
|
+
this.nFeaturesIn_ === null ||
|
|
52
|
+
this.nOutputFeatures_ === null ||
|
|
53
|
+
this.featureOffsets_ === null
|
|
54
|
+
) {
|
|
55
|
+
throw new Error("OneHotEncoder has not been fitted.");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
assertNonEmptyMatrix(X);
|
|
59
|
+
assertConsistentRowSize(X);
|
|
60
|
+
assertFiniteMatrix(X);
|
|
61
|
+
|
|
62
|
+
if (X[0].length !== this.nFeaturesIn_) {
|
|
63
|
+
throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0].length}.`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const encoded = new Array<number[]>(X.length);
|
|
67
|
+
for (let rowIndex = 0; rowIndex < X.length; rowIndex += 1) {
|
|
68
|
+
const row = new Array<number>(this.nOutputFeatures_).fill(0);
|
|
69
|
+
for (let featureIndex = 0; featureIndex < this.nFeaturesIn_; featureIndex += 1) {
|
|
70
|
+
const value = X[rowIndex][featureIndex];
|
|
71
|
+
const categories = this.categories_[featureIndex];
|
|
72
|
+
const categoryIndex = categories.indexOf(value);
|
|
73
|
+
if (categoryIndex === -1) {
|
|
74
|
+
if (this.handleUnknown === "error") {
|
|
75
|
+
throw new Error(
|
|
76
|
+
`Unknown category ${value} in feature ${featureIndex}. Set handleUnknown='ignore' to skip.`,
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
row[this.featureOffsets_[featureIndex] + categoryIndex] = 1;
|
|
82
|
+
}
|
|
83
|
+
encoded[rowIndex] = row;
|
|
84
|
+
}
|
|
85
|
+
return encoded;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
fitTransform(X: Matrix): Matrix {
|
|
89
|
+
return this.fit(X).transform(X);
|
|
90
|
+
}
|
|
91
|
+
}
|