deepbox 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +344 -0
- package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
- package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
- package/dist/Tensor-BQLk1ltW.d.cts +147 -0
- package/dist/Tensor-g8mUClel.d.ts +147 -0
- package/dist/chunk-4S73VUBD.js +677 -0
- package/dist/chunk-4S73VUBD.js.map +1 -0
- package/dist/chunk-5R4S63PF.js +2925 -0
- package/dist/chunk-5R4S63PF.js.map +1 -0
- package/dist/chunk-6AE5FKKQ.cjs +9264 -0
- package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
- package/dist/chunk-AD436M45.js +3854 -0
- package/dist/chunk-AD436M45.js.map +1 -0
- package/dist/chunk-ALS7ETWZ.cjs +4263 -0
- package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
- package/dist/chunk-AU7XHGKJ.js +2092 -0
- package/dist/chunk-AU7XHGKJ.js.map +1 -0
- package/dist/chunk-B5TNKUEY.js +1481 -0
- package/dist/chunk-B5TNKUEY.js.map +1 -0
- package/dist/chunk-BCR7G3A6.js +9136 -0
- package/dist/chunk-BCR7G3A6.js.map +1 -0
- package/dist/chunk-C4PKXY74.cjs +1917 -0
- package/dist/chunk-C4PKXY74.cjs.map +1 -0
- package/dist/chunk-DWZY6PIP.cjs +6400 -0
- package/dist/chunk-DWZY6PIP.cjs.map +1 -0
- package/dist/chunk-E3EU5FZO.cjs +2113 -0
- package/dist/chunk-E3EU5FZO.cjs.map +1 -0
- package/dist/chunk-F3JWBINJ.js +1054 -0
- package/dist/chunk-F3JWBINJ.js.map +1 -0
- package/dist/chunk-FJYLIGJX.js +1940 -0
- package/dist/chunk-FJYLIGJX.js.map +1 -0
- package/dist/chunk-JSCDE774.cjs +729 -0
- package/dist/chunk-JSCDE774.cjs.map +1 -0
- package/dist/chunk-LWECRCW2.cjs +2412 -0
- package/dist/chunk-LWECRCW2.cjs.map +1 -0
- package/dist/chunk-MLBMYKCG.js +6379 -0
- package/dist/chunk-MLBMYKCG.js.map +1 -0
- package/dist/chunk-OX6QXFMV.cjs +3874 -0
- package/dist/chunk-OX6QXFMV.cjs.map +1 -0
- package/dist/chunk-PHV2DKRS.cjs +1072 -0
- package/dist/chunk-PHV2DKRS.cjs.map +1 -0
- package/dist/chunk-PL7TAYKI.js +4056 -0
- package/dist/chunk-PL7TAYKI.js.map +1 -0
- package/dist/chunk-PR647I7R.js +1898 -0
- package/dist/chunk-PR647I7R.js.map +1 -0
- package/dist/chunk-QERHVCHC.cjs +2960 -0
- package/dist/chunk-QERHVCHC.cjs.map +1 -0
- package/dist/chunk-XEG44RF6.cjs +1514 -0
- package/dist/chunk-XEG44RF6.cjs.map +1 -0
- package/dist/chunk-XMWVME2W.js +2377 -0
- package/dist/chunk-XMWVME2W.js.map +1 -0
- package/dist/chunk-ZB75FESB.cjs +1979 -0
- package/dist/chunk-ZB75FESB.cjs.map +1 -0
- package/dist/chunk-ZLW62TJG.cjs +4061 -0
- package/dist/chunk-ZLW62TJG.cjs.map +1 -0
- package/dist/chunk-ZXKBDFP3.js +4235 -0
- package/dist/chunk-ZXKBDFP3.js.map +1 -0
- package/dist/core/index.cjs +204 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +2 -0
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.js +3 -0
- package/dist/core/index.js.map +1 -0
- package/dist/dataframe/index.cjs +22 -0
- package/dist/dataframe/index.cjs.map +1 -0
- package/dist/dataframe/index.d.cts +3 -0
- package/dist/dataframe/index.d.ts +3 -0
- package/dist/dataframe/index.js +5 -0
- package/dist/dataframe/index.js.map +1 -0
- package/dist/datasets/index.cjs +134 -0
- package/dist/datasets/index.cjs.map +1 -0
- package/dist/datasets/index.d.cts +3 -0
- package/dist/datasets/index.d.ts +3 -0
- package/dist/datasets/index.js +5 -0
- package/dist/datasets/index.js.map +1 -0
- package/dist/index-74AB8Cyh.d.cts +1126 -0
- package/dist/index-9oQx1HgV.d.cts +1180 -0
- package/dist/index-BJY2SI4i.d.ts +483 -0
- package/dist/index-BWGhrDlr.d.ts +733 -0
- package/dist/index-B_DK4FKY.d.cts +242 -0
- package/dist/index-BbA2Gxfl.d.ts +456 -0
- package/dist/index-BgHYAoSS.d.cts +837 -0
- package/dist/index-BndMbqsM.d.ts +1439 -0
- package/dist/index-C1mfVYoo.d.ts +2517 -0
- package/dist/index-CCvlwAmL.d.cts +809 -0
- package/dist/index-CDw5CnOU.d.ts +785 -0
- package/dist/index-Cn3SdB0O.d.ts +1126 -0
- package/dist/index-CrqLlS-a.d.ts +776 -0
- package/dist/index-D61yaSMY.d.cts +483 -0
- package/dist/index-D9Loo1_A.d.cts +2517 -0
- package/dist/index-DIT_OO9C.d.cts +785 -0
- package/dist/index-DIp_RrRt.d.ts +242 -0
- package/dist/index-DbultU6X.d.cts +1427 -0
- package/dist/index-DmEg_LCm.d.cts +776 -0
- package/dist/index-DoPWVxPo.d.cts +1439 -0
- package/dist/index-DuCxd-8d.d.ts +837 -0
- package/dist/index-Dx42TZaY.d.ts +809 -0
- package/dist/index-DyZ4QQf5.d.cts +456 -0
- package/dist/index-GFAVyOWO.d.ts +1427 -0
- package/dist/index-WHQLn0e8.d.cts +733 -0
- package/dist/index-ZtI1Iy4L.d.ts +1180 -0
- package/dist/index-eJgeni9c.d.cts +1911 -0
- package/dist/index-tk4lSYod.d.ts +1911 -0
- package/dist/index.cjs +72 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/linalg/index.cjs +86 -0
- package/dist/linalg/index.cjs.map +1 -0
- package/dist/linalg/index.d.cts +3 -0
- package/dist/linalg/index.d.ts +3 -0
- package/dist/linalg/index.js +5 -0
- package/dist/linalg/index.js.map +1 -0
- package/dist/metrics/index.cjs +158 -0
- package/dist/metrics/index.cjs.map +1 -0
- package/dist/metrics/index.d.cts +3 -0
- package/dist/metrics/index.d.ts +3 -0
- package/dist/metrics/index.js +5 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/ml/index.cjs +87 -0
- package/dist/ml/index.cjs.map +1 -0
- package/dist/ml/index.d.cts +3 -0
- package/dist/ml/index.d.ts +3 -0
- package/dist/ml/index.js +6 -0
- package/dist/ml/index.js.map +1 -0
- package/dist/ndarray/index.cjs +501 -0
- package/dist/ndarray/index.cjs.map +1 -0
- package/dist/ndarray/index.d.cts +5 -0
- package/dist/ndarray/index.d.ts +5 -0
- package/dist/ndarray/index.js +4 -0
- package/dist/ndarray/index.js.map +1 -0
- package/dist/nn/index.cjs +142 -0
- package/dist/nn/index.cjs.map +1 -0
- package/dist/nn/index.d.cts +6 -0
- package/dist/nn/index.d.ts +6 -0
- package/dist/nn/index.js +5 -0
- package/dist/nn/index.js.map +1 -0
- package/dist/optim/index.cjs +77 -0
- package/dist/optim/index.cjs.map +1 -0
- package/dist/optim/index.d.cts +4 -0
- package/dist/optim/index.d.ts +4 -0
- package/dist/optim/index.js +4 -0
- package/dist/optim/index.js.map +1 -0
- package/dist/plot/index.cjs +114 -0
- package/dist/plot/index.cjs.map +1 -0
- package/dist/plot/index.d.cts +6 -0
- package/dist/plot/index.d.ts +6 -0
- package/dist/plot/index.js +5 -0
- package/dist/plot/index.js.map +1 -0
- package/dist/preprocess/index.cjs +82 -0
- package/dist/preprocess/index.cjs.map +1 -0
- package/dist/preprocess/index.d.cts +4 -0
- package/dist/preprocess/index.d.ts +4 -0
- package/dist/preprocess/index.js +5 -0
- package/dist/preprocess/index.js.map +1 -0
- package/dist/random/index.cjs +74 -0
- package/dist/random/index.cjs.map +1 -0
- package/dist/random/index.d.cts +3 -0
- package/dist/random/index.d.ts +3 -0
- package/dist/random/index.js +5 -0
- package/dist/random/index.js.map +1 -0
- package/dist/stats/index.cjs +142 -0
- package/dist/stats/index.cjs.map +1 -0
- package/dist/stats/index.d.cts +3 -0
- package/dist/stats/index.d.ts +3 -0
- package/dist/stats/index.js +5 -0
- package/dist/stats/index.js.map +1 -0
- package/dist/tensor-B96jjJLQ.d.cts +205 -0
- package/dist/tensor-B96jjJLQ.d.ts +205 -0
- package/package.json +226 -0
|
@@ -0,0 +1,3874 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var chunk6AE5FKKQ_cjs = require('./chunk-6AE5FKKQ.cjs');
|
|
4
|
+
var chunkJSCDE774_cjs = require('./chunk-JSCDE774.cjs');
|
|
5
|
+
|
|
6
|
+
// src/preprocess/index.ts
|
|
7
|
+
var preprocess_exports = {};
|
|
8
|
+
chunkJSCDE774_cjs.__export(preprocess_exports, {
|
|
9
|
+
GroupKFold: () => GroupKFold,
|
|
10
|
+
KFold: () => KFold,
|
|
11
|
+
LabelBinarizer: () => LabelBinarizer,
|
|
12
|
+
LabelEncoder: () => LabelEncoder,
|
|
13
|
+
LeaveOneOut: () => LeaveOneOut,
|
|
14
|
+
LeavePOut: () => LeavePOut,
|
|
15
|
+
MaxAbsScaler: () => MaxAbsScaler,
|
|
16
|
+
MinMaxScaler: () => MinMaxScaler,
|
|
17
|
+
MultiLabelBinarizer: () => MultiLabelBinarizer,
|
|
18
|
+
Normalizer: () => Normalizer,
|
|
19
|
+
OneHotEncoder: () => OneHotEncoder,
|
|
20
|
+
OrdinalEncoder: () => OrdinalEncoder,
|
|
21
|
+
PowerTransformer: () => PowerTransformer,
|
|
22
|
+
QuantileTransformer: () => QuantileTransformer,
|
|
23
|
+
RobustScaler: () => RobustScaler,
|
|
24
|
+
StandardScaler: () => StandardScaler,
|
|
25
|
+
StratifiedKFold: () => StratifiedKFold,
|
|
26
|
+
trainTestSplit: () => trainTestSplit
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// src/preprocess/_internal.ts
|
|
30
|
+
function assertNumericTensor(X, name) {
|
|
31
|
+
if (X.dtype === "string") {
|
|
32
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function assert2D(X, name) {
|
|
36
|
+
if (X.ndim !== 2) {
|
|
37
|
+
throw new chunkJSCDE774_cjs.ShapeError(`${name} must be a 2D tensor, got ${X.ndim}D`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
function getShape2D(X) {
|
|
41
|
+
if (X.ndim !== 2 || X.shape[0] === void 0 || X.shape[1] === void 0) {
|
|
42
|
+
throw new chunkJSCDE774_cjs.ShapeError(`Expected 2D tensor with valid shape, got shape [${X.shape.join(", ")}]`);
|
|
43
|
+
}
|
|
44
|
+
return [X.shape[0], X.shape[1]];
|
|
45
|
+
}
|
|
46
|
+
function getStride1D(X) {
|
|
47
|
+
const stride = X.strides[0];
|
|
48
|
+
if (stride === void 0) {
|
|
49
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing stride for 1D tensor");
|
|
50
|
+
}
|
|
51
|
+
return stride;
|
|
52
|
+
}
|
|
53
|
+
function getStrides2D(X) {
|
|
54
|
+
const stride0 = X.strides[0];
|
|
55
|
+
const stride1 = X.strides[1];
|
|
56
|
+
if (stride0 === void 0 || stride1 === void 0) {
|
|
57
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing strides for 2D tensor");
|
|
58
|
+
}
|
|
59
|
+
return [stride0, stride1];
|
|
60
|
+
}
|
|
61
|
+
function createSeededRandom(seed) {
|
|
62
|
+
const a = 1103515245;
|
|
63
|
+
const c = 12345;
|
|
64
|
+
const m = 2 ** 31;
|
|
65
|
+
if (!Number.isFinite(seed) || !Number.isInteger(seed) || !Number.isSafeInteger(seed) || seed < 0) {
|
|
66
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
67
|
+
"randomState must be a non-negative safe integer",
|
|
68
|
+
"randomState",
|
|
69
|
+
seed
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
let state = seed % m;
|
|
73
|
+
return () => {
|
|
74
|
+
state = (a * state + c) % m;
|
|
75
|
+
return state / m;
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function shuffleIndicesInPlace(indices, random) {
|
|
79
|
+
for (let i = indices.length - 1; i > 0; i--) {
|
|
80
|
+
const j = Math.floor(random() * (i + 1));
|
|
81
|
+
const temp = indices[i];
|
|
82
|
+
if (temp === void 0) {
|
|
83
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: shuffle source index missing");
|
|
84
|
+
}
|
|
85
|
+
const swap = indices[j];
|
|
86
|
+
if (swap === void 0) {
|
|
87
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: shuffle target index missing");
|
|
88
|
+
}
|
|
89
|
+
indices[i] = swap;
|
|
90
|
+
indices[j] = temp;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// src/preprocess/encoders.ts
|
|
95
|
+
function getStringData(t) {
|
|
96
|
+
if (t.dtype !== "string") {
|
|
97
|
+
throw new chunkJSCDE774_cjs.DTypeError("Expected string tensor");
|
|
98
|
+
}
|
|
99
|
+
if (!Array.isArray(t.data)) {
|
|
100
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: invalid string tensor storage");
|
|
101
|
+
}
|
|
102
|
+
return t.data;
|
|
103
|
+
}
|
|
104
|
+
function getNumericData(t) {
|
|
105
|
+
if (t.dtype === "string") {
|
|
106
|
+
throw new chunkJSCDE774_cjs.DTypeError("Expected numeric tensor");
|
|
107
|
+
}
|
|
108
|
+
if (Array.isArray(t.data)) {
|
|
109
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: invalid numeric tensor storage");
|
|
110
|
+
}
|
|
111
|
+
return t.data;
|
|
112
|
+
}
|
|
113
|
+
function inferCategoryType(values, paramName) {
|
|
114
|
+
let hasString = false;
|
|
115
|
+
let hasNumber = false;
|
|
116
|
+
let hasBigInt = false;
|
|
117
|
+
for (const value of values) {
|
|
118
|
+
if (typeof value === "string") {
|
|
119
|
+
hasString = true;
|
|
120
|
+
} else if (typeof value === "number") {
|
|
121
|
+
if (!Number.isFinite(value)) {
|
|
122
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Category values must be finite numbers", paramName, value);
|
|
123
|
+
}
|
|
124
|
+
hasNumber = true;
|
|
125
|
+
} else if (typeof value === "bigint") {
|
|
126
|
+
hasBigInt = true;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const typeCount = (hasString ? 1 : 0) + (hasNumber ? 1 : 0) + (hasBigInt ? 1 : 0);
|
|
130
|
+
if (typeCount === 0) {
|
|
131
|
+
return "number";
|
|
132
|
+
}
|
|
133
|
+
if (typeCount > 1) {
|
|
134
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Mixed category types are not supported", paramName);
|
|
135
|
+
}
|
|
136
|
+
if (hasString) return "string";
|
|
137
|
+
if (hasBigInt) return "bigint";
|
|
138
|
+
return "number";
|
|
139
|
+
}
|
|
140
|
+
function sortCategories(values, paramName) {
|
|
141
|
+
const arr = Array.from(values);
|
|
142
|
+
if (arr.length === 0) return arr;
|
|
143
|
+
const categoryType = inferCategoryType(arr, paramName);
|
|
144
|
+
if (categoryType === "string") {
|
|
145
|
+
arr.sort((a, b) => {
|
|
146
|
+
if (typeof a !== "string" || typeof b !== "string") {
|
|
147
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: inconsistent category types");
|
|
148
|
+
}
|
|
149
|
+
return a.localeCompare(b);
|
|
150
|
+
});
|
|
151
|
+
return arr;
|
|
152
|
+
}
|
|
153
|
+
if (categoryType === "bigint") {
|
|
154
|
+
arr.sort((a, b) => {
|
|
155
|
+
if (typeof a !== "bigint" || typeof b !== "bigint") {
|
|
156
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: inconsistent category types");
|
|
157
|
+
}
|
|
158
|
+
if (a < b) return -1;
|
|
159
|
+
if (a > b) return 1;
|
|
160
|
+
return 0;
|
|
161
|
+
});
|
|
162
|
+
return arr;
|
|
163
|
+
}
|
|
164
|
+
arr.sort((a, b) => {
|
|
165
|
+
if (typeof a !== "number" || typeof b !== "number") {
|
|
166
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: inconsistent category types");
|
|
167
|
+
}
|
|
168
|
+
return a - b;
|
|
169
|
+
});
|
|
170
|
+
return arr;
|
|
171
|
+
}
|
|
172
|
+
function validateCategoryValues(values, paramName) {
|
|
173
|
+
if (values.length === 0) {
|
|
174
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("categories must contain at least one value", paramName);
|
|
175
|
+
}
|
|
176
|
+
const arr = Array.from(values);
|
|
177
|
+
inferCategoryType(arr, paramName);
|
|
178
|
+
const seen = /* @__PURE__ */ new Set();
|
|
179
|
+
for (const value of arr) {
|
|
180
|
+
if (seen.has(value)) {
|
|
181
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
182
|
+
`categories must be unique; duplicate value ${String(value)}`,
|
|
183
|
+
paramName,
|
|
184
|
+
value
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
seen.add(value);
|
|
188
|
+
}
|
|
189
|
+
return arr;
|
|
190
|
+
}
|
|
191
|
+
function resolveCategoriesOption(categoriesOption, nFeatures, paramName) {
|
|
192
|
+
if (categoriesOption === "auto") {
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
195
|
+
if (!Array.isArray(categoriesOption)) {
|
|
196
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
197
|
+
"categories must be 'auto' or an array of category arrays",
|
|
198
|
+
paramName,
|
|
199
|
+
categoriesOption
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
if (categoriesOption.length !== nFeatures) {
|
|
203
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
204
|
+
"categories length must match number of features",
|
|
205
|
+
paramName,
|
|
206
|
+
categoriesOption.length
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
return categoriesOption;
|
|
210
|
+
}
|
|
211
|
+
function read1DValue(t, i) {
|
|
212
|
+
const stride = getStride1D(t);
|
|
213
|
+
const idx = t.offset + i * stride;
|
|
214
|
+
if (t.dtype === "string") {
|
|
215
|
+
const value2 = getStringData(t)[idx];
|
|
216
|
+
if (value2 === void 0) {
|
|
217
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: string tensor access out of bounds");
|
|
218
|
+
}
|
|
219
|
+
return value2;
|
|
220
|
+
}
|
|
221
|
+
const value = getNumericData(t)[idx];
|
|
222
|
+
if (value === void 0) {
|
|
223
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
224
|
+
}
|
|
225
|
+
return typeof value === "bigint" ? value : Number(value);
|
|
226
|
+
}
|
|
227
|
+
function read2DValue(t, row, col) {
|
|
228
|
+
const [stride0, stride1] = getStrides2D(t);
|
|
229
|
+
const idx = t.offset + row * stride0 + col * stride1;
|
|
230
|
+
if (t.dtype === "string") {
|
|
231
|
+
const value2 = getStringData(t)[idx];
|
|
232
|
+
if (value2 === void 0) {
|
|
233
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: string tensor access out of bounds");
|
|
234
|
+
}
|
|
235
|
+
return value2;
|
|
236
|
+
}
|
|
237
|
+
const value = getNumericData(t)[idx];
|
|
238
|
+
if (value === void 0) {
|
|
239
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
240
|
+
}
|
|
241
|
+
return typeof value === "bigint" ? value : Number(value);
|
|
242
|
+
}
|
|
243
|
+
function assert1D(t, name) {
|
|
244
|
+
if (t.ndim !== 1) {
|
|
245
|
+
throw new chunkJSCDE774_cjs.ShapeError(`${name} must be a 1D tensor`);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
function categoryValueAt(values, index, context) {
|
|
249
|
+
const value = values[index];
|
|
250
|
+
if (value === void 0) {
|
|
251
|
+
throw new chunkJSCDE774_cjs.DeepboxError(`Internal error: missing category at index ${index} (${context})`);
|
|
252
|
+
}
|
|
253
|
+
return value;
|
|
254
|
+
}
|
|
255
|
+
function inferCategoryTypeFromRows(rows, paramName) {
|
|
256
|
+
const values = [];
|
|
257
|
+
for (const row of rows) {
|
|
258
|
+
for (const value of row) {
|
|
259
|
+
values.push(value);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return inferCategoryType(values, paramName);
|
|
263
|
+
}
|
|
264
|
+
function emptyCategoryVectorFromClasses(classes, paramName) {
|
|
265
|
+
const categoryType = inferCategoryType(classes, paramName);
|
|
266
|
+
if (categoryType === "string") {
|
|
267
|
+
return chunk6AE5FKKQ_cjs.empty([0], { dtype: "string" });
|
|
268
|
+
}
|
|
269
|
+
if (categoryType === "bigint") {
|
|
270
|
+
return chunk6AE5FKKQ_cjs.empty([0], { dtype: "int64" });
|
|
271
|
+
}
|
|
272
|
+
return chunk6AE5FKKQ_cjs.zeros([0], { dtype: "float64" });
|
|
273
|
+
}
|
|
274
|
+
function emptyCategoryMatrixFromCategories(categories, nFeatures, paramName) {
|
|
275
|
+
const categoryType = inferCategoryTypeFromRows(categories, paramName);
|
|
276
|
+
if (categoryType === "string") {
|
|
277
|
+
return chunk6AE5FKKQ_cjs.empty([0, nFeatures], { dtype: "string" });
|
|
278
|
+
}
|
|
279
|
+
if (categoryType === "bigint") {
|
|
280
|
+
return chunk6AE5FKKQ_cjs.empty([0, nFeatures], { dtype: "int64" });
|
|
281
|
+
}
|
|
282
|
+
return chunk6AE5FKKQ_cjs.zeros([0, nFeatures], { dtype: "float64" });
|
|
283
|
+
}
|
|
284
|
+
function toCategoryVectorTensor(values, paramName = "y") {
|
|
285
|
+
const categoryType = inferCategoryType(values, paramName);
|
|
286
|
+
if (categoryType === "string") {
|
|
287
|
+
const out2 = new Array(values.length);
|
|
288
|
+
for (let i = 0; i < values.length; i++) {
|
|
289
|
+
const value = values[i];
|
|
290
|
+
if (typeof value !== "string") {
|
|
291
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected string category value");
|
|
292
|
+
}
|
|
293
|
+
out2[i] = value;
|
|
294
|
+
}
|
|
295
|
+
return chunk6AE5FKKQ_cjs.tensor(out2);
|
|
296
|
+
}
|
|
297
|
+
if (categoryType === "bigint") {
|
|
298
|
+
const out2 = new BigInt64Array(values.length);
|
|
299
|
+
for (let i = 0; i < values.length; i++) {
|
|
300
|
+
const value = values[i];
|
|
301
|
+
if (typeof value !== "bigint") {
|
|
302
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected bigint category value");
|
|
303
|
+
}
|
|
304
|
+
out2[i] = value;
|
|
305
|
+
}
|
|
306
|
+
return chunk6AE5FKKQ_cjs.tensor(out2);
|
|
307
|
+
}
|
|
308
|
+
const out = new Float64Array(values.length);
|
|
309
|
+
for (let i = 0; i < values.length; i++) {
|
|
310
|
+
const value = values[i];
|
|
311
|
+
if (value === void 0 || typeof value !== "number") {
|
|
312
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected numeric category value");
|
|
313
|
+
}
|
|
314
|
+
out[i] = value;
|
|
315
|
+
}
|
|
316
|
+
return chunk6AE5FKKQ_cjs.tensor(out);
|
|
317
|
+
}
|
|
318
|
+
function toCategoryMatrixTensor(values, paramName = "X") {
|
|
319
|
+
const rows = values.length;
|
|
320
|
+
const cols = rows > 0 ? values[0]?.length ?? 0 : 0;
|
|
321
|
+
for (let i = 0; i < rows; i++) {
|
|
322
|
+
const row = values[i];
|
|
323
|
+
if (!row) {
|
|
324
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing row in category matrix");
|
|
325
|
+
}
|
|
326
|
+
if (row.length !== cols) {
|
|
327
|
+
throw new chunkJSCDE774_cjs.ShapeError("Ragged category matrix cannot be converted to tensor");
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
const flat = [];
|
|
331
|
+
for (const row of values) {
|
|
332
|
+
for (const value of row) {
|
|
333
|
+
flat.push(value);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
const categoryType = inferCategoryType(flat, paramName);
|
|
337
|
+
if (categoryType === "string") {
|
|
338
|
+
const out = new Array(rows);
|
|
339
|
+
for (let i = 0; i < rows; i++) {
|
|
340
|
+
const row = values[i];
|
|
341
|
+
if (!row) {
|
|
342
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing row in category matrix");
|
|
343
|
+
}
|
|
344
|
+
const outRow = new Array(cols);
|
|
345
|
+
for (let j = 0; j < cols; j++) {
|
|
346
|
+
const value = row[j];
|
|
347
|
+
if (typeof value !== "string") {
|
|
348
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected string category value");
|
|
349
|
+
}
|
|
350
|
+
outRow[j] = value;
|
|
351
|
+
}
|
|
352
|
+
out[i] = outRow;
|
|
353
|
+
}
|
|
354
|
+
return chunk6AE5FKKQ_cjs.tensor(out);
|
|
355
|
+
}
|
|
356
|
+
if (categoryType === "number") {
|
|
357
|
+
const out = new Array(rows);
|
|
358
|
+
for (let i = 0; i < rows; i++) {
|
|
359
|
+
const row = values[i];
|
|
360
|
+
if (!row) {
|
|
361
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing row in category matrix");
|
|
362
|
+
}
|
|
363
|
+
const outRow = new Array(cols);
|
|
364
|
+
for (let j = 0; j < cols; j++) {
|
|
365
|
+
const value = row[j];
|
|
366
|
+
if (typeof value !== "number") {
|
|
367
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected numeric category value");
|
|
368
|
+
}
|
|
369
|
+
outRow[j] = value;
|
|
370
|
+
}
|
|
371
|
+
out[i] = outRow;
|
|
372
|
+
}
|
|
373
|
+
return chunk6AE5FKKQ_cjs.tensor(out, { dtype: "float64" });
|
|
374
|
+
}
|
|
375
|
+
const data = new BigInt64Array(rows * cols);
|
|
376
|
+
for (let i = 0; i < flat.length; i++) {
|
|
377
|
+
const value = flat[i];
|
|
378
|
+
if (typeof value !== "bigint") {
|
|
379
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected bigint category value");
|
|
380
|
+
}
|
|
381
|
+
data[i] = value;
|
|
382
|
+
}
|
|
383
|
+
const { defaultDevice } = chunkJSCDE774_cjs.getConfig();
|
|
384
|
+
return chunk6AE5FKKQ_cjs.Tensor.fromTypedArray({
|
|
385
|
+
data,
|
|
386
|
+
shape: [rows, cols],
|
|
387
|
+
dtype: "int64",
|
|
388
|
+
device: defaultDevice
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
var LabelEncoder = class {
|
|
392
|
+
/** Indicates whether the encoder has been fitted to data */
|
|
393
|
+
fitted = false;
|
|
394
|
+
/** Array of unique classes found during fitting, sorted for consistency */
|
|
395
|
+
classes_;
|
|
396
|
+
/** Map from class value to encoded integer index for O(1) lookup */
|
|
397
|
+
classToIndex_;
|
|
398
|
+
/**
|
|
399
|
+
* Fit label encoder to a set of labels.
|
|
400
|
+
* Extracts unique classes and creates an index mapping.
|
|
401
|
+
*
|
|
402
|
+
* @param y - Target labels (1D tensor of strings or numbers)
|
|
403
|
+
* @returns this - Returns self for method chaining
|
|
404
|
+
* @throws {InvalidParameterError} If y is empty
|
|
405
|
+
*/
|
|
406
|
+
fit(y) {
|
|
407
|
+
assert1D(y, "y");
|
|
408
|
+
if (y.size === 0) {
|
|
409
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Cannot fit LabelEncoder on empty array", "y");
|
|
410
|
+
}
|
|
411
|
+
const uniqueSet = /* @__PURE__ */ new Set();
|
|
412
|
+
for (let i = 0; i < y.size; i++) {
|
|
413
|
+
uniqueSet.add(read1DValue(y, i));
|
|
414
|
+
}
|
|
415
|
+
this.classes_ = sortCategories(uniqueSet, "y");
|
|
416
|
+
this.classToIndex_ = /* @__PURE__ */ new Map();
|
|
417
|
+
for (let i = 0; i < this.classes_.length; i++) {
|
|
418
|
+
this.classToIndex_.set(categoryValueAt(this.classes_, i, "LabelEncoder.fit"), i);
|
|
419
|
+
}
|
|
420
|
+
this.fitted = true;
|
|
421
|
+
return this;
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Transform labels to normalized encoding.
|
|
425
|
+
* Each unique label is mapped to an integer in [0, n_classes-1].
|
|
426
|
+
*
|
|
427
|
+
* @param y - Target labels to encode (1D tensor)
|
|
428
|
+
* @returns Encoded labels as integer tensor
|
|
429
|
+
* @throws {NotFittedError} If encoder is not fitted
|
|
430
|
+
* @throws {InvalidParameterError} If y contains labels not seen during fit
|
|
431
|
+
*/
|
|
432
|
+
transform(y) {
|
|
433
|
+
if (!this.fitted) {
|
|
434
|
+
throw new chunkJSCDE774_cjs.NotFittedError("LabelEncoder must be fitted before transform");
|
|
435
|
+
}
|
|
436
|
+
assert1D(y, "y");
|
|
437
|
+
if (y.size === 0) {
|
|
438
|
+
return chunk6AE5FKKQ_cjs.tensor([]);
|
|
439
|
+
}
|
|
440
|
+
const lookup = this.classToIndex_;
|
|
441
|
+
if (!this.classes_ || !lookup) {
|
|
442
|
+
throw new chunkJSCDE774_cjs.DeepboxError("LabelEncoder internal error: missing fitted state");
|
|
443
|
+
}
|
|
444
|
+
const result = new Array(y.size);
|
|
445
|
+
for (let i = 0; i < y.size; i++) {
|
|
446
|
+
const val = read1DValue(y, i);
|
|
447
|
+
const idx = lookup.get(val);
|
|
448
|
+
if (idx === void 0) {
|
|
449
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
450
|
+
`Unknown label: ${String(val)}. Label must be present during fit.`,
|
|
451
|
+
"y",
|
|
452
|
+
val
|
|
453
|
+
);
|
|
454
|
+
}
|
|
455
|
+
result[i] = idx;
|
|
456
|
+
}
|
|
457
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64" });
|
|
458
|
+
}
|
|
459
|
+
/**
|
|
460
|
+
* Fit label encoder and return encoded labels in one step.
|
|
461
|
+
* Convenience method equivalent to calling fit(y).transform(y).
|
|
462
|
+
*
|
|
463
|
+
* @param y - Target labels (1D tensor)
|
|
464
|
+
* @returns Encoded labels as integer tensor
|
|
465
|
+
*/
|
|
466
|
+
fitTransform(y) {
|
|
467
|
+
return this.fit(y).transform(y);
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* Transform integer labels back to original encoding.
|
|
471
|
+
* Reverses the encoding performed by transform().
|
|
472
|
+
*
|
|
473
|
+
* @param y - Encoded labels (1D integer tensor)
|
|
474
|
+
* @returns Original labels (strings or numbers)
|
|
475
|
+
* @throws {NotFittedError} If encoder is not fitted
|
|
476
|
+
* @throws {InvalidParameterError} If y contains invalid indices
|
|
477
|
+
*/
|
|
478
|
+
inverseTransform(y) {
|
|
479
|
+
if (!this.fitted) {
|
|
480
|
+
throw new chunkJSCDE774_cjs.NotFittedError("LabelEncoder must be fitted before inverse_transform");
|
|
481
|
+
}
|
|
482
|
+
assert1D(y, "y");
|
|
483
|
+
assertNumericTensor(y, "y");
|
|
484
|
+
const classes = this.classes_;
|
|
485
|
+
if (!classes) {
|
|
486
|
+
throw new chunkJSCDE774_cjs.DeepboxError("LabelEncoder internal error: missing fitted state");
|
|
487
|
+
}
|
|
488
|
+
if (y.size === 0) {
|
|
489
|
+
return emptyCategoryVectorFromClasses(classes, "y");
|
|
490
|
+
}
|
|
491
|
+
const classesLen = classes.length;
|
|
492
|
+
const result = new Array(y.size);
|
|
493
|
+
const stride = getStride1D(y);
|
|
494
|
+
const data = getNumericData(y);
|
|
495
|
+
for (let i = 0; i < y.size; i++) {
|
|
496
|
+
const raw = data[y.offset + i * stride];
|
|
497
|
+
if (raw === void 0) {
|
|
498
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
499
|
+
}
|
|
500
|
+
const idx = Number(raw);
|
|
501
|
+
if (idx < 0 || idx >= classesLen || !Number.isInteger(idx)) {
|
|
502
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
503
|
+
`Invalid label index: ${idx}. Must be integer in [0, ${classesLen - 1}]`,
|
|
504
|
+
"y",
|
|
505
|
+
idx
|
|
506
|
+
);
|
|
507
|
+
}
|
|
508
|
+
result[i] = categoryValueAt(classes, idx, "LabelEncoder.inverseTransform");
|
|
509
|
+
}
|
|
510
|
+
return toCategoryVectorTensor(result, "y");
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
var OneHotEncoder = class {
|
|
514
|
+
/** Indicates whether the encoder has been fitted to data */
|
|
515
|
+
fitted = false;
|
|
516
|
+
/** Array of unique categories for each feature */
|
|
517
|
+
categories_;
|
|
518
|
+
/** Maps from category value to index for each feature (for O(1) lookup) */
|
|
519
|
+
categoryToIndex_;
|
|
520
|
+
/** Whether to return sparse matrix (CSR) or dense array */
|
|
521
|
+
sparse;
|
|
522
|
+
/** How to handle unknown categories during transform */
|
|
523
|
+
handleUnknown;
|
|
524
|
+
/** Drop policy to avoid collinearity */
|
|
525
|
+
drop;
|
|
526
|
+
/** Per-feature dropped category index */
|
|
527
|
+
dropIndices_;
|
|
528
|
+
/** Categories configuration */
|
|
529
|
+
categoriesOption;
|
|
530
|
+
/**
|
|
531
|
+
* Creates a new OneHotEncoder instance.
|
|
532
|
+
*
|
|
533
|
+
* @param options - Configuration options
|
|
534
|
+
* @param options.sparse - If true, returns CSRMatrix; if false, returns dense Tensor (default: false)
|
|
535
|
+
* @param options.sparseOutput - Alias for sparse (default: false)
|
|
536
|
+
* @param options.handleUnknown - How to handle unknown categories (default: "error")
|
|
537
|
+
* @param options.drop - If set, drops the first or binary category per feature
|
|
538
|
+
* @param options.categories - "auto" or explicit category list per feature
|
|
539
|
+
*/
|
|
540
|
+
constructor(options = {}) {
|
|
541
|
+
const sparseOption = options.sparse ?? options.sparseOutput ?? false;
|
|
542
|
+
if (options.sparse !== void 0 && options.sparseOutput !== void 0) {
|
|
543
|
+
if (options.sparse !== options.sparseOutput) {
|
|
544
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
545
|
+
"sparse and sparseOutput must match when both are provided",
|
|
546
|
+
"sparse",
|
|
547
|
+
options.sparse
|
|
548
|
+
);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
this.sparse = sparseOption;
|
|
552
|
+
this.handleUnknown = options.handleUnknown ?? "error";
|
|
553
|
+
this.drop = options.drop ?? null;
|
|
554
|
+
this.categoriesOption = options.categories ?? "auto";
|
|
555
|
+
if (typeof this.sparse !== "boolean") {
|
|
556
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("sparse must be a boolean", "sparse", this.sparse);
|
|
557
|
+
}
|
|
558
|
+
if (this.handleUnknown !== "error" && this.handleUnknown !== "ignore") {
|
|
559
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
560
|
+
"handleUnknown must be 'error' or 'ignore'",
|
|
561
|
+
"handleUnknown",
|
|
562
|
+
this.handleUnknown
|
|
563
|
+
);
|
|
564
|
+
}
|
|
565
|
+
if (this.drop !== null && this.drop !== "first" && this.drop !== "if_binary") {
|
|
566
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
567
|
+
"drop must be 'first', 'if_binary', or null",
|
|
568
|
+
"drop",
|
|
569
|
+
this.drop
|
|
570
|
+
);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
/**
|
|
574
|
+
* Fit OneHotEncoder to X.
|
|
575
|
+
* Learns the unique categories for each feature.
|
|
576
|
+
*
|
|
577
|
+
* @param X - Training data (2D tensor of categorical features)
|
|
578
|
+
* @returns this - Returns self for method chaining
|
|
579
|
+
* @throws {ShapeError} If X is not a 2D tensor
|
|
580
|
+
* @throws {InvalidParameterError} If X is empty
|
|
581
|
+
*/
|
|
582
|
+
fit(X) {
|
|
583
|
+
assert2D(X, "X");
|
|
584
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
585
|
+
if (nSamples === 0 || nFeatures === 0) {
|
|
586
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Cannot fit OneHotEncoder on empty array", "X");
|
|
587
|
+
}
|
|
588
|
+
this.categories_ = [];
|
|
589
|
+
this.categoryToIndex_ = [];
|
|
590
|
+
const explicitCategories = resolveCategoriesOption(
|
|
591
|
+
this.categoriesOption,
|
|
592
|
+
nFeatures,
|
|
593
|
+
"categories"
|
|
594
|
+
);
|
|
595
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
596
|
+
let cats;
|
|
597
|
+
if (explicitCategories) {
|
|
598
|
+
const featureCats = explicitCategories[j];
|
|
599
|
+
if (!featureCats) {
|
|
600
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Missing categories for feature", "categories", j);
|
|
601
|
+
}
|
|
602
|
+
if (!Array.isArray(featureCats)) {
|
|
603
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
604
|
+
"categories must be an array of category arrays",
|
|
605
|
+
"categories",
|
|
606
|
+
featureCats
|
|
607
|
+
);
|
|
608
|
+
}
|
|
609
|
+
cats = validateCategoryValues(featureCats, "categories");
|
|
610
|
+
} else {
|
|
611
|
+
const uniqueSet = /* @__PURE__ */ new Set();
|
|
612
|
+
for (let i = 0; i < nSamples; i++) {
|
|
613
|
+
uniqueSet.add(read2DValue(X, i, j));
|
|
614
|
+
}
|
|
615
|
+
cats = sortCategories(uniqueSet, "X");
|
|
616
|
+
}
|
|
617
|
+
if (cats.length === 0) {
|
|
618
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Each feature must have at least one category", "X", j);
|
|
619
|
+
}
|
|
620
|
+
this.categories_.push(cats);
|
|
621
|
+
const map = /* @__PURE__ */ new Map();
|
|
622
|
+
for (let k = 0; k < cats.length; k++) {
|
|
623
|
+
map.set(categoryValueAt(cats, k, "OneHotEncoder.fit"), k);
|
|
624
|
+
}
|
|
625
|
+
this.categoryToIndex_.push(map);
|
|
626
|
+
if (explicitCategories) {
|
|
627
|
+
for (let i = 0; i < nSamples; i++) {
|
|
628
|
+
const val = read2DValue(X, i, j);
|
|
629
|
+
if (!map.has(val)) {
|
|
630
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
631
|
+
`Unknown category: ${String(val)} in feature ${j}`,
|
|
632
|
+
"X",
|
|
633
|
+
val
|
|
634
|
+
);
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
this.dropIndices_ = this.categories_.map((cats) => {
|
|
640
|
+
if (this.drop === null) return null;
|
|
641
|
+
if (this.drop === "first") return cats.length > 0 ? 0 : null;
|
|
642
|
+
if (this.drop === "if_binary") return cats.length === 2 ? 0 : null;
|
|
643
|
+
return null;
|
|
644
|
+
});
|
|
645
|
+
this.fitted = true;
|
|
646
|
+
return this;
|
|
647
|
+
}
|
|
648
|
+
/**
|
|
649
|
+
* Transform X using one-hot encoding.
|
|
650
|
+
* Each categorical value is converted to a binary vector.
|
|
651
|
+
*
|
|
652
|
+
* @param X - Data to transform (2D tensor)
|
|
653
|
+
* @returns Encoded data as dense Tensor or sparse CSRMatrix
|
|
654
|
+
* @throws {NotFittedError} If encoder is not fitted
|
|
655
|
+
* @throws {InvalidParameterError} If X contains unknown categories
|
|
656
|
+
*/
|
|
657
|
+
transform(X) {
|
|
658
|
+
if (!this.fitted) {
|
|
659
|
+
throw new chunkJSCDE774_cjs.NotFittedError("OneHotEncoder must be fitted before transform");
|
|
660
|
+
}
|
|
661
|
+
assert2D(X, "X");
|
|
662
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
663
|
+
const categories = this.categories_;
|
|
664
|
+
const categoryMaps = this.categoryToIndex_;
|
|
665
|
+
if (!categories || !categoryMaps) {
|
|
666
|
+
throw new chunkJSCDE774_cjs.DeepboxError("OneHotEncoder internal error: missing fitted state");
|
|
667
|
+
}
|
|
668
|
+
const fittedFeatures = categories.length;
|
|
669
|
+
if (nFeatures !== fittedFeatures) {
|
|
670
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
671
|
+
"X has a different feature count than during fit",
|
|
672
|
+
"X",
|
|
673
|
+
nFeatures
|
|
674
|
+
);
|
|
675
|
+
}
|
|
676
|
+
const dropIndices = this.dropIndices_ ?? categories.map(() => null);
|
|
677
|
+
let totalCols = 0;
|
|
678
|
+
for (let j = 0; j < categories.length; j++) {
|
|
679
|
+
const cats = categories[j];
|
|
680
|
+
if (!cats) continue;
|
|
681
|
+
const dropIndex = dropIndices[j] ?? null;
|
|
682
|
+
totalCols += cats.length - (dropIndex === null ? 0 : 1);
|
|
683
|
+
}
|
|
684
|
+
if (nSamples === 0 || nFeatures === 0) {
|
|
685
|
+
return this.sparse ? chunk6AE5FKKQ_cjs.CSRMatrix.fromCOO({
|
|
686
|
+
rows: 0,
|
|
687
|
+
cols: totalCols,
|
|
688
|
+
rowIndices: new Int32Array(0),
|
|
689
|
+
colIndices: new Int32Array(0),
|
|
690
|
+
values: new Float64Array(0)
|
|
691
|
+
}) : chunk6AE5FKKQ_cjs.zeros([0, totalCols], { dtype: "float64" });
|
|
692
|
+
}
|
|
693
|
+
if (this.sparse) {
|
|
694
|
+
const rowIdx = [];
|
|
695
|
+
const colIdx = [];
|
|
696
|
+
const vals = [];
|
|
697
|
+
for (let i = 0; i < nSamples; i++) {
|
|
698
|
+
let colOffset = 0;
|
|
699
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
700
|
+
const cats = categories[j];
|
|
701
|
+
const map = categoryMaps[j];
|
|
702
|
+
const dropIndex = dropIndices[j] ?? null;
|
|
703
|
+
if (!cats || !map) {
|
|
704
|
+
throw new chunkJSCDE774_cjs.DeepboxError("OneHotEncoder internal error: missing fitted categories");
|
|
705
|
+
}
|
|
706
|
+
const outSize = cats.length - (dropIndex === null ? 0 : 1);
|
|
707
|
+
const val = read2DValue(X, i, j);
|
|
708
|
+
const idx = map.get(val);
|
|
709
|
+
if (idx === void 0) {
|
|
710
|
+
if (this.handleUnknown === "ignore") {
|
|
711
|
+
colOffset += outSize;
|
|
712
|
+
continue;
|
|
713
|
+
}
|
|
714
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Unknown category: ${String(val)}`, "X", val);
|
|
715
|
+
}
|
|
716
|
+
if (dropIndex !== null && idx === dropIndex) {
|
|
717
|
+
colOffset += outSize;
|
|
718
|
+
continue;
|
|
719
|
+
}
|
|
720
|
+
const adjusted = dropIndex !== null && idx > dropIndex ? idx - 1 : idx;
|
|
721
|
+
rowIdx.push(i);
|
|
722
|
+
colIdx.push(colOffset + adjusted);
|
|
723
|
+
vals.push(1);
|
|
724
|
+
colOffset += outSize;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
return chunk6AE5FKKQ_cjs.CSRMatrix.fromCOO({
|
|
728
|
+
rows: nSamples,
|
|
729
|
+
cols: totalCols,
|
|
730
|
+
rowIndices: Int32Array.from(rowIdx),
|
|
731
|
+
colIndices: Int32Array.from(colIdx),
|
|
732
|
+
values: Float64Array.from(vals)
|
|
733
|
+
});
|
|
734
|
+
}
|
|
735
|
+
const result = Array.from({ length: nSamples }, () => new Array(totalCols).fill(0));
|
|
736
|
+
for (let i = 0; i < nSamples; i++) {
|
|
737
|
+
let colOffset = 0;
|
|
738
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
739
|
+
const cats = categories[j];
|
|
740
|
+
const map = categoryMaps[j];
|
|
741
|
+
const dropIndex = dropIndices[j] ?? null;
|
|
742
|
+
if (!cats || !map) {
|
|
743
|
+
throw new chunkJSCDE774_cjs.DeepboxError("OneHotEncoder internal error: missing fitted categories");
|
|
744
|
+
}
|
|
745
|
+
const outSize = cats.length - (dropIndex === null ? 0 : 1);
|
|
746
|
+
const val = read2DValue(X, i, j);
|
|
747
|
+
const idx = map.get(val);
|
|
748
|
+
if (idx === void 0) {
|
|
749
|
+
if (this.handleUnknown === "ignore") {
|
|
750
|
+
colOffset += outSize;
|
|
751
|
+
continue;
|
|
752
|
+
}
|
|
753
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`Unknown category: ${String(val)}`, "X", val);
|
|
754
|
+
}
|
|
755
|
+
if (dropIndex !== null && idx === dropIndex) {
|
|
756
|
+
colOffset += outSize;
|
|
757
|
+
continue;
|
|
758
|
+
}
|
|
759
|
+
const row = result[i];
|
|
760
|
+
if (row === void 0) {
|
|
761
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
762
|
+
}
|
|
763
|
+
const adjusted = dropIndex !== null && idx > dropIndex ? idx - 1 : idx;
|
|
764
|
+
row[colOffset + adjusted] = 1;
|
|
765
|
+
colOffset += outSize;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
769
|
+
}
|
|
770
|
+
fitTransform(X) {
|
|
771
|
+
return this.fit(X).transform(X);
|
|
772
|
+
}
|
|
773
|
+
inverseTransform(X) {
|
|
774
|
+
if (!this.fitted) {
|
|
775
|
+
throw new chunkJSCDE774_cjs.NotFittedError("OneHotEncoder must be fitted before inverse_transform");
|
|
776
|
+
}
|
|
777
|
+
const dense = X instanceof chunk6AE5FKKQ_cjs.CSRMatrix ? X.toDense() : X;
|
|
778
|
+
assert2D(dense, "X");
|
|
779
|
+
assertNumericTensor(dense, "X");
|
|
780
|
+
const [nSamples, nCols] = getShape2D(dense);
|
|
781
|
+
const categories = this.categories_;
|
|
782
|
+
if (!categories) {
|
|
783
|
+
throw new chunkJSCDE774_cjs.DeepboxError("OneHotEncoder internal error: missing fitted categories");
|
|
784
|
+
}
|
|
785
|
+
const nFeatures = categories.length;
|
|
786
|
+
const dropIndices = this.dropIndices_ ?? categories.map(() => null);
|
|
787
|
+
const totalCols = categories.reduce((sum, cats, idx) => {
|
|
788
|
+
const dropIndex = dropIndices[idx] ?? null;
|
|
789
|
+
return sum + cats.length - (dropIndex === null ? 0 : 1);
|
|
790
|
+
}, 0);
|
|
791
|
+
if (nCols !== totalCols) {
|
|
792
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("column count does not match fitted categories", "X", nCols);
|
|
793
|
+
}
|
|
794
|
+
if (nSamples === 0) {
|
|
795
|
+
return emptyCategoryMatrixFromCategories(categories, nFeatures, "X");
|
|
796
|
+
}
|
|
797
|
+
const result = new Array(nSamples);
|
|
798
|
+
for (let i = 0; i < nSamples; i++) {
|
|
799
|
+
result[i] = new Array(nFeatures);
|
|
800
|
+
}
|
|
801
|
+
const denseData = getNumericData(dense);
|
|
802
|
+
const [stride0, stride1] = getStrides2D(dense);
|
|
803
|
+
for (let i = 0; i < nSamples; i++) {
|
|
804
|
+
let colOffset = 0;
|
|
805
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
806
|
+
const cats = categories[j];
|
|
807
|
+
const dropIndex = dropIndices[j] ?? null;
|
|
808
|
+
if (!cats) {
|
|
809
|
+
throw new chunkJSCDE774_cjs.DeepboxError("OneHotEncoder internal error: missing fitted categories");
|
|
810
|
+
}
|
|
811
|
+
const outSize = cats.length - (dropIndex === null ? 0 : 1);
|
|
812
|
+
if (outSize === 0) {
|
|
813
|
+
const row2 = result[i];
|
|
814
|
+
if (!row2) {
|
|
815
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
816
|
+
}
|
|
817
|
+
row2[j] = categoryValueAt(cats, dropIndex ?? 0, "OneHotEncoder.inverseTransform");
|
|
818
|
+
continue;
|
|
819
|
+
}
|
|
820
|
+
let maxIdx = 0;
|
|
821
|
+
const rowBase = dense.offset + i * stride0 + colOffset * stride1;
|
|
822
|
+
const first = denseData[rowBase];
|
|
823
|
+
if (first === void 0) {
|
|
824
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
825
|
+
}
|
|
826
|
+
let maxVal = Number(first);
|
|
827
|
+
let hasPositive = maxVal > 0;
|
|
828
|
+
for (let k = 1; k < outSize; k++) {
|
|
829
|
+
const raw = denseData[rowBase + k * stride1];
|
|
830
|
+
if (raw === void 0) {
|
|
831
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
832
|
+
}
|
|
833
|
+
const val = Number(raw);
|
|
834
|
+
if (val > maxVal) {
|
|
835
|
+
maxVal = val;
|
|
836
|
+
maxIdx = k;
|
|
837
|
+
}
|
|
838
|
+
if (val > 0) {
|
|
839
|
+
hasPositive = true;
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
const row = result[i];
|
|
843
|
+
if (row === void 0) {
|
|
844
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
845
|
+
}
|
|
846
|
+
if (!hasPositive) {
|
|
847
|
+
if (dropIndex !== null) {
|
|
848
|
+
row[j] = categoryValueAt(cats, dropIndex, "OneHotEncoder.inverseTransform");
|
|
849
|
+
} else if (this.handleUnknown === "ignore") {
|
|
850
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
851
|
+
"Cannot inverse-transform: sample contains no active category (all zeros). This may happen if unknown categories were ignored during transform.",
|
|
852
|
+
"X"
|
|
853
|
+
);
|
|
854
|
+
} else {
|
|
855
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Invalid one-hot encoding: all zeros", "X");
|
|
856
|
+
}
|
|
857
|
+
} else {
|
|
858
|
+
const actualIdx = dropIndex !== null && maxIdx >= dropIndex ? maxIdx + 1 : maxIdx;
|
|
859
|
+
row[j] = categoryValueAt(cats, actualIdx, "OneHotEncoder.inverseTransform");
|
|
860
|
+
}
|
|
861
|
+
colOffset += outSize;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
return toCategoryMatrixTensor(result, "X");
|
|
865
|
+
}
|
|
866
|
+
};
|
|
867
|
+
var OrdinalEncoder = class {
|
|
868
|
+
/** Indicates whether the encoder has been fitted to data */
|
|
869
|
+
fitted = false;
|
|
870
|
+
/** Array of unique categories for each feature, sorted */
|
|
871
|
+
categories_;
|
|
872
|
+
/** Maps from category value to index for each feature (for O(1) lookup) */
|
|
873
|
+
categoryToIndex_;
|
|
874
|
+
/** How to handle unknown categories during transform */
|
|
875
|
+
handleUnknown;
|
|
876
|
+
/** Value used for unknown categories when handleUnknown = "useEncodedValue" */
|
|
877
|
+
unknownValue;
|
|
878
|
+
/** Categories configuration */
|
|
879
|
+
categoriesOption;
|
|
880
|
+
/**
|
|
881
|
+
* Creates a new OrdinalEncoder instance.
|
|
882
|
+
*
|
|
883
|
+
* @param options - Configuration options
|
|
884
|
+
* @param options.handleUnknown - How to handle unknown categories
|
|
885
|
+
* @param options.unknownValue - Encoded value for unknown categories when handleUnknown="useEncodedValue"
|
|
886
|
+
* @param options.categories - "auto" or explicit categories per feature
|
|
887
|
+
*/
|
|
888
|
+
constructor(options = {}) {
|
|
889
|
+
this.handleUnknown = options.handleUnknown ?? "error";
|
|
890
|
+
this.unknownValue = options.unknownValue ?? -1;
|
|
891
|
+
this.categoriesOption = options.categories ?? "auto";
|
|
892
|
+
if (this.handleUnknown !== "error" && this.handleUnknown !== "useEncodedValue") {
|
|
893
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
894
|
+
"handleUnknown must be 'error' or 'useEncodedValue'",
|
|
895
|
+
"handleUnknown",
|
|
896
|
+
this.handleUnknown
|
|
897
|
+
);
|
|
898
|
+
}
|
|
899
|
+
if (!Number.isFinite(this.unknownValue) && !Number.isNaN(this.unknownValue)) {
|
|
900
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
901
|
+
"unknownValue must be a finite number or NaN",
|
|
902
|
+
"unknownValue",
|
|
903
|
+
this.unknownValue
|
|
904
|
+
);
|
|
905
|
+
}
|
|
906
|
+
if (Number.isFinite(this.unknownValue) && !Number.isInteger(this.unknownValue)) {
|
|
907
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
908
|
+
"unknownValue must be an integer when finite",
|
|
909
|
+
"unknownValue",
|
|
910
|
+
this.unknownValue
|
|
911
|
+
);
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Fit OrdinalEncoder to X.
|
|
916
|
+
* Learns the unique categories for each feature and their ordering.
|
|
917
|
+
*
|
|
918
|
+
* @param X - Training data (2D tensor of categorical features)
|
|
919
|
+
* @returns this - Returns self for method chaining
|
|
920
|
+
* @throws {InvalidParameterError} If X is empty
|
|
921
|
+
*/
|
|
922
|
+
fit(X) {
|
|
923
|
+
assert2D(X, "X");
|
|
924
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
925
|
+
if (nSamples === 0) {
|
|
926
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Cannot fit OrdinalEncoder on empty array", "X");
|
|
927
|
+
}
|
|
928
|
+
this.categories_ = [];
|
|
929
|
+
this.categoryToIndex_ = [];
|
|
930
|
+
const explicitCategories = resolveCategoriesOption(
|
|
931
|
+
this.categoriesOption,
|
|
932
|
+
nFeatures,
|
|
933
|
+
"categories"
|
|
934
|
+
);
|
|
935
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
936
|
+
let sorted;
|
|
937
|
+
if (explicitCategories) {
|
|
938
|
+
const featureCats = explicitCategories[j];
|
|
939
|
+
if (!featureCats) {
|
|
940
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Missing categories for feature", "categories", j);
|
|
941
|
+
}
|
|
942
|
+
if (!Array.isArray(featureCats)) {
|
|
943
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
944
|
+
"categories must be an array of category arrays",
|
|
945
|
+
"categories",
|
|
946
|
+
featureCats
|
|
947
|
+
);
|
|
948
|
+
}
|
|
949
|
+
sorted = validateCategoryValues(featureCats, "categories");
|
|
950
|
+
} else {
|
|
951
|
+
const uniqueSet = /* @__PURE__ */ new Set();
|
|
952
|
+
for (let i = 0; i < nSamples; i++) {
|
|
953
|
+
uniqueSet.add(read2DValue(X, i, j));
|
|
954
|
+
}
|
|
955
|
+
sorted = sortCategories(uniqueSet, "X");
|
|
956
|
+
}
|
|
957
|
+
if (sorted.length === 0) {
|
|
958
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Each feature must have at least one category", "X", j);
|
|
959
|
+
}
|
|
960
|
+
this.categories_.push(sorted);
|
|
961
|
+
const map = /* @__PURE__ */ new Map();
|
|
962
|
+
for (let k = 0; k < sorted.length; k++) {
|
|
963
|
+
map.set(categoryValueAt(sorted, k, "OrdinalEncoder.fit"), k);
|
|
964
|
+
}
|
|
965
|
+
this.categoryToIndex_.push(map);
|
|
966
|
+
if (explicitCategories) {
|
|
967
|
+
for (let i = 0; i < nSamples; i++) {
|
|
968
|
+
const val = read2DValue(X, i, j);
|
|
969
|
+
if (!map.has(val)) {
|
|
970
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
971
|
+
`Unknown category: ${String(val)} in feature ${j}`,
|
|
972
|
+
"X",
|
|
973
|
+
val
|
|
974
|
+
);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
if (this.handleUnknown === "useEncodedValue") {
|
|
979
|
+
if (Number.isFinite(this.unknownValue) && this.unknownValue >= 0 && this.unknownValue < sorted.length) {
|
|
980
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
981
|
+
"unknownValue must be outside the range of encoded categories",
|
|
982
|
+
"unknownValue",
|
|
983
|
+
this.unknownValue
|
|
984
|
+
);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
this.fitted = true;
|
|
989
|
+
return this;
|
|
990
|
+
}
|
|
991
|
+
/**
|
|
992
|
+
* Transform X using ordinal encoding.
|
|
993
|
+
* Each category is mapped to its index in the sorted categories array.
|
|
994
|
+
*
|
|
995
|
+
* @param X - Data to transform (2D tensor)
|
|
996
|
+
* @returns Encoded data with integer values
|
|
997
|
+
* @throws {NotFittedError} If encoder is not fitted
|
|
998
|
+
* @throws {InvalidParameterError} If X contains unknown categories
|
|
999
|
+
*/
|
|
1000
|
+
transform(X) {
|
|
1001
|
+
if (!this.fitted) {
|
|
1002
|
+
throw new chunkJSCDE774_cjs.NotFittedError("OrdinalEncoder must be fitted before transform");
|
|
1003
|
+
}
|
|
1004
|
+
assert2D(X, "X");
|
|
1005
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
1006
|
+
const fittedFeatures = this.categories_?.length ?? 0;
|
|
1007
|
+
if (nFeatures !== fittedFeatures) {
|
|
1008
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1009
|
+
"X has a different feature count than during fit",
|
|
1010
|
+
"X",
|
|
1011
|
+
nFeatures
|
|
1012
|
+
);
|
|
1013
|
+
}
|
|
1014
|
+
if (nSamples === 0) {
|
|
1015
|
+
return chunk6AE5FKKQ_cjs.zeros([0, nFeatures], { dtype: "float64" });
|
|
1016
|
+
}
|
|
1017
|
+
const result = new Array(nSamples);
|
|
1018
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1019
|
+
result[i] = new Array(nFeatures);
|
|
1020
|
+
}
|
|
1021
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1022
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1023
|
+
const val = read2DValue(X, i, j);
|
|
1024
|
+
const map = this.categoryToIndex_?.[j];
|
|
1025
|
+
if (!map) {
|
|
1026
|
+
throw new chunkJSCDE774_cjs.DeepboxError("OrdinalEncoder internal error: missing fitted categories");
|
|
1027
|
+
}
|
|
1028
|
+
const idx = map.get(val);
|
|
1029
|
+
const row = result[i];
|
|
1030
|
+
if (!row) {
|
|
1031
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
1032
|
+
}
|
|
1033
|
+
if (idx === void 0) {
|
|
1034
|
+
if (this.handleUnknown === "useEncodedValue") {
|
|
1035
|
+
row[j] = this.unknownValue;
|
|
1036
|
+
continue;
|
|
1037
|
+
}
|
|
1038
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1039
|
+
`Unknown category: ${String(val)} in feature ${j}`,
|
|
1040
|
+
"X",
|
|
1041
|
+
val
|
|
1042
|
+
);
|
|
1043
|
+
}
|
|
1044
|
+
row[j] = idx;
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64" });
|
|
1048
|
+
}
|
|
1049
|
+
/**
|
|
1050
|
+
* Fit encoder and transform X in one step.
|
|
1051
|
+
* Convenience method equivalent to calling fit(X).transform(X).
|
|
1052
|
+
*
|
|
1053
|
+
* @param X - Training data (2D tensor)
|
|
1054
|
+
* @returns Encoded data
|
|
1055
|
+
*/
|
|
1056
|
+
fitTransform(X) {
|
|
1057
|
+
return this.fit(X).transform(X);
|
|
1058
|
+
}
|
|
1059
|
+
/**
|
|
1060
|
+
* Transform ordinal integers back to original categories.
|
|
1061
|
+
* Reverses the encoding performed by transform().
|
|
1062
|
+
*
|
|
1063
|
+
* @param X - Encoded data (2D integer tensor)
|
|
1064
|
+
* @returns Original categorical data
|
|
1065
|
+
* @throws {NotFittedError} If encoder is not fitted
|
|
1066
|
+
* @throws {InvalidParameterError} If X contains invalid indices
|
|
1067
|
+
*/
|
|
1068
|
+
inverseTransform(X) {
|
|
1069
|
+
if (!this.fitted) {
|
|
1070
|
+
throw new chunkJSCDE774_cjs.NotFittedError("OrdinalEncoder must be fitted before inverse_transform");
|
|
1071
|
+
}
|
|
1072
|
+
assert2D(X, "X");
|
|
1073
|
+
assertNumericTensor(X, "X");
|
|
1074
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
1075
|
+
const fittedFeatures = this.categories_?.length ?? 0;
|
|
1076
|
+
if (nFeatures !== fittedFeatures) {
|
|
1077
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1078
|
+
"X has a different feature count than during fit",
|
|
1079
|
+
"X",
|
|
1080
|
+
nFeatures
|
|
1081
|
+
);
|
|
1082
|
+
}
|
|
1083
|
+
if (nSamples === 0 || nFeatures === 0) {
|
|
1084
|
+
const categoryRows = this.categories_ ?? [];
|
|
1085
|
+
const categoryType = inferCategoryTypeFromRows(categoryRows, "X");
|
|
1086
|
+
if (categoryType === "string") {
|
|
1087
|
+
return chunk6AE5FKKQ_cjs.empty([0, nFeatures], { dtype: "string" });
|
|
1088
|
+
}
|
|
1089
|
+
if (categoryType === "bigint") {
|
|
1090
|
+
return chunk6AE5FKKQ_cjs.empty([0, nFeatures], { dtype: "int64" });
|
|
1091
|
+
}
|
|
1092
|
+
return chunk6AE5FKKQ_cjs.zeros([0, nFeatures], { dtype: "float64" });
|
|
1093
|
+
}
|
|
1094
|
+
const result = new Array(nSamples);
|
|
1095
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1096
|
+
result[i] = new Array(nFeatures);
|
|
1097
|
+
}
|
|
1098
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
1099
|
+
const data = getNumericData(X);
|
|
1100
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1101
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1102
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
1103
|
+
if (raw === void 0) {
|
|
1104
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1105
|
+
}
|
|
1106
|
+
const idx = Number(raw);
|
|
1107
|
+
const isUnknownValue = this.handleUnknown === "useEncodedValue" && (Number.isNaN(idx) ? Number.isNaN(this.unknownValue) : idx === this.unknownValue);
|
|
1108
|
+
if (isUnknownValue) {
|
|
1109
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1110
|
+
"Cannot inverse-transform unknown encoded value",
|
|
1111
|
+
"X",
|
|
1112
|
+
idx
|
|
1113
|
+
);
|
|
1114
|
+
}
|
|
1115
|
+
const cats = this.categories_?.[j];
|
|
1116
|
+
if (!cats || idx < 0 || idx >= cats.length || !Number.isInteger(idx)) {
|
|
1117
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1118
|
+
`Invalid encoded value: ${idx} for feature ${j}. Must be integer in [0, ${(cats?.length ?? 0) - 1}]`,
|
|
1119
|
+
"X",
|
|
1120
|
+
idx
|
|
1121
|
+
);
|
|
1122
|
+
}
|
|
1123
|
+
const row = result[i];
|
|
1124
|
+
if (!row) {
|
|
1125
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
1126
|
+
}
|
|
1127
|
+
const catVal = cats[idx];
|
|
1128
|
+
if (catVal === void 0) {
|
|
1129
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: category value missing");
|
|
1130
|
+
}
|
|
1131
|
+
row[j] = catVal;
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
return toCategoryMatrixTensor(result, "X");
|
|
1135
|
+
}
|
|
1136
|
+
};
|
|
1137
|
+
var LabelBinarizer = class {
|
|
1138
|
+
/** Indicates whether the binarizer has been fitted to data */
|
|
1139
|
+
fitted = false;
|
|
1140
|
+
/** Array of unique classes found during fitting, sorted */
|
|
1141
|
+
classes_;
|
|
1142
|
+
/** Map from class value to index for O(1) lookups */
|
|
1143
|
+
classToIndex_;
|
|
1144
|
+
/** Value used for positive class */
|
|
1145
|
+
posLabel;
|
|
1146
|
+
/** Value used for negative class */
|
|
1147
|
+
negLabel;
|
|
1148
|
+
/** Whether to return sparse matrix output */
|
|
1149
|
+
sparse;
|
|
1150
|
+
/**
|
|
1151
|
+
* Creates a new LabelBinarizer instance.
|
|
1152
|
+
*
|
|
1153
|
+
* @param options - Configuration options
|
|
1154
|
+
* @param options.posLabel - Value for positive class (default: 1)
|
|
1155
|
+
* @param options.negLabel - Value for negative class (default: 0)
|
|
1156
|
+
* @param options.sparse - If true, returns CSRMatrix (default: false)
|
|
1157
|
+
* @param options.sparseOutput - Alias for sparse (default: false)
|
|
1158
|
+
*/
|
|
1159
|
+
constructor(options = {}) {
|
|
1160
|
+
this.posLabel = options.posLabel ?? 1;
|
|
1161
|
+
this.negLabel = options.negLabel ?? 0;
|
|
1162
|
+
const sparseOption = options.sparse ?? options.sparseOutput ?? false;
|
|
1163
|
+
if (!Number.isFinite(this.posLabel) || !Number.isFinite(this.negLabel)) {
|
|
1164
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("posLabel and negLabel must be finite numbers", "posLabel");
|
|
1165
|
+
}
|
|
1166
|
+
if (this.posLabel <= this.negLabel) {
|
|
1167
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1168
|
+
"posLabel must be greater than negLabel",
|
|
1169
|
+
"posLabel",
|
|
1170
|
+
this.posLabel
|
|
1171
|
+
);
|
|
1172
|
+
}
|
|
1173
|
+
if (options.sparse !== void 0 && options.sparseOutput !== void 0) {
|
|
1174
|
+
if (options.sparse !== options.sparseOutput) {
|
|
1175
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1176
|
+
"sparse and sparseOutput must match when both are provided",
|
|
1177
|
+
"sparse",
|
|
1178
|
+
options.sparse
|
|
1179
|
+
);
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
if (typeof sparseOption !== "boolean") {
|
|
1183
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("sparse must be a boolean", "sparse", sparseOption);
|
|
1184
|
+
}
|
|
1185
|
+
if (sparseOption && this.negLabel !== 0) {
|
|
1186
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1187
|
+
"sparse output requires negLabel to be 0",
|
|
1188
|
+
"negLabel",
|
|
1189
|
+
this.negLabel
|
|
1190
|
+
);
|
|
1191
|
+
}
|
|
1192
|
+
this.sparse = sparseOption;
|
|
1193
|
+
}
|
|
1194
|
+
/**
|
|
1195
|
+
* Fit label binarizer to a set of labels.
|
|
1196
|
+
* Learns the unique classes present in the data.
|
|
1197
|
+
*
|
|
1198
|
+
* @param y - Target labels (1D tensor)
|
|
1199
|
+
* @returns this - Returns self for method chaining
|
|
1200
|
+
* @throws {InvalidParameterError} If y is empty
|
|
1201
|
+
*/
|
|
1202
|
+
fit(y) {
|
|
1203
|
+
assert1D(y, "y");
|
|
1204
|
+
if (y.size === 0) {
|
|
1205
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Cannot fit LabelBinarizer on empty array", "y");
|
|
1206
|
+
}
|
|
1207
|
+
const uniqueSet = /* @__PURE__ */ new Set();
|
|
1208
|
+
for (let i = 0; i < y.size; i++) {
|
|
1209
|
+
uniqueSet.add(read1DValue(y, i));
|
|
1210
|
+
}
|
|
1211
|
+
this.classes_ = sortCategories(uniqueSet, "y");
|
|
1212
|
+
this.classToIndex_ = /* @__PURE__ */ new Map();
|
|
1213
|
+
for (let i = 0; i < this.classes_.length; i++) {
|
|
1214
|
+
this.classToIndex_.set(categoryValueAt(this.classes_, i, "LabelBinarizer.fit"), i);
|
|
1215
|
+
}
|
|
1216
|
+
this.fitted = true;
|
|
1217
|
+
return this;
|
|
1218
|
+
}
|
|
1219
|
+
/**
|
|
1220
|
+
* Transform labels to binary matrix.
|
|
1221
|
+
* Each label is converted to a binary vector with a single 1.
|
|
1222
|
+
*
|
|
1223
|
+
* @param y - Labels to transform (1D tensor)
|
|
1224
|
+
* @returns Binary matrix (Tensor or CSRMatrix) with shape [n_samples, n_classes]
|
|
1225
|
+
* @throws {NotFittedError} If binarizer is not fitted
|
|
1226
|
+
* @throws {InvalidParameterError} If y contains unknown labels
|
|
1227
|
+
*/
|
|
1228
|
+
transform(y) {
|
|
1229
|
+
if (!this.fitted) {
|
|
1230
|
+
throw new chunkJSCDE774_cjs.NotFittedError("LabelBinarizer must be fitted before transform");
|
|
1231
|
+
}
|
|
1232
|
+
assert1D(y, "y");
|
|
1233
|
+
if (y.size === 0) {
|
|
1234
|
+
const nClasses2 = this.classes_?.length ?? 0;
|
|
1235
|
+
return this.sparse ? chunk6AE5FKKQ_cjs.CSRMatrix.fromCOO({
|
|
1236
|
+
rows: 0,
|
|
1237
|
+
cols: nClasses2,
|
|
1238
|
+
rowIndices: new Int32Array(0),
|
|
1239
|
+
colIndices: new Int32Array(0),
|
|
1240
|
+
values: new Float64Array(0)
|
|
1241
|
+
}) : chunk6AE5FKKQ_cjs.zeros([0, nClasses2], { dtype: "float64" });
|
|
1242
|
+
}
|
|
1243
|
+
const nSamples = y.size;
|
|
1244
|
+
const nClasses = this.classes_?.length ?? 0;
|
|
1245
|
+
const lookup = this.classToIndex_;
|
|
1246
|
+
if (!lookup) {
|
|
1247
|
+
throw new chunkJSCDE774_cjs.DeepboxError("LabelBinarizer internal error: missing fitted lookup");
|
|
1248
|
+
}
|
|
1249
|
+
if (this.sparse) {
|
|
1250
|
+
const rowIdx = [];
|
|
1251
|
+
const colIdx = [];
|
|
1252
|
+
const vals = [];
|
|
1253
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1254
|
+
const val = read1DValue(y, i);
|
|
1255
|
+
const idx = lookup.get(val);
|
|
1256
|
+
if (idx === void 0) {
|
|
1257
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1258
|
+
`Unknown label: ${String(val)}. Label must be present during fit.`,
|
|
1259
|
+
"y",
|
|
1260
|
+
val
|
|
1261
|
+
);
|
|
1262
|
+
}
|
|
1263
|
+
rowIdx.push(i);
|
|
1264
|
+
colIdx.push(idx);
|
|
1265
|
+
vals.push(this.posLabel);
|
|
1266
|
+
}
|
|
1267
|
+
return chunk6AE5FKKQ_cjs.CSRMatrix.fromCOO({
|
|
1268
|
+
rows: nSamples,
|
|
1269
|
+
cols: nClasses,
|
|
1270
|
+
rowIndices: Int32Array.from(rowIdx),
|
|
1271
|
+
colIndices: Int32Array.from(colIdx),
|
|
1272
|
+
values: Float64Array.from(vals)
|
|
1273
|
+
});
|
|
1274
|
+
}
|
|
1275
|
+
const result = new Array(nSamples);
|
|
1276
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1277
|
+
result[i] = new Array(nClasses).fill(this.negLabel);
|
|
1278
|
+
}
|
|
1279
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1280
|
+
const val = read1DValue(y, i);
|
|
1281
|
+
const idx = lookup.get(val);
|
|
1282
|
+
if (idx === void 0) {
|
|
1283
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1284
|
+
`Unknown label: ${String(val)}. Label must be present during fit.`,
|
|
1285
|
+
"y",
|
|
1286
|
+
val
|
|
1287
|
+
);
|
|
1288
|
+
}
|
|
1289
|
+
const row = result[i];
|
|
1290
|
+
if (!row) {
|
|
1291
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
1292
|
+
}
|
|
1293
|
+
row[idx] = this.posLabel;
|
|
1294
|
+
}
|
|
1295
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64" });
|
|
1296
|
+
}
|
|
1297
|
+
/**
|
|
1298
|
+
* Fit binarizer and transform labels in one step.
|
|
1299
|
+
* Convenience method equivalent to calling fit(y).transform(y).
|
|
1300
|
+
*
|
|
1301
|
+
* @param y - Target labels (1D tensor)
|
|
1302
|
+
* @returns Binary matrix (Tensor or CSRMatrix)
|
|
1303
|
+
*/
|
|
1304
|
+
fitTransform(y) {
|
|
1305
|
+
return this.fit(y).transform(y);
|
|
1306
|
+
}
|
|
1307
|
+
/**
|
|
1308
|
+
* Transform binary matrix back to labels.
|
|
1309
|
+
* Finds the column with maximum value for each row.
|
|
1310
|
+
*
|
|
1311
|
+
* @param Y - Binary matrix (2D tensor or CSRMatrix)
|
|
1312
|
+
* @returns Original labels (1D tensor)
|
|
1313
|
+
* @throws {NotFittedError} If binarizer is not fitted
|
|
1314
|
+
* @throws {InvalidParameterError} If Y has invalid shape
|
|
1315
|
+
*/
|
|
1316
|
+
inverseTransform(Y) {
|
|
1317
|
+
if (!this.fitted) {
|
|
1318
|
+
throw new chunkJSCDE774_cjs.NotFittedError("LabelBinarizer must be fitted before inverse_transform");
|
|
1319
|
+
}
|
|
1320
|
+
if (Y instanceof chunk6AE5FKKQ_cjs.CSRMatrix) {
|
|
1321
|
+
if (this.negLabel !== 0) {
|
|
1322
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1323
|
+
"Sparse inverse transform requires negLabel to be 0",
|
|
1324
|
+
"negLabel",
|
|
1325
|
+
this.negLabel
|
|
1326
|
+
);
|
|
1327
|
+
}
|
|
1328
|
+
const [rows, cols] = Y.shape;
|
|
1329
|
+
if (rows === void 0 || cols === void 0) {
|
|
1330
|
+
throw new chunkJSCDE774_cjs.ShapeError("Y must have valid shape");
|
|
1331
|
+
}
|
|
1332
|
+
const nClasses2 = this.classes_?.length ?? 0;
|
|
1333
|
+
if (cols !== nClasses2) {
|
|
1334
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("column count does not match number of classes", "Y", cols);
|
|
1335
|
+
}
|
|
1336
|
+
const classes2 = this.classes_;
|
|
1337
|
+
if (!classes2) {
|
|
1338
|
+
throw new chunkJSCDE774_cjs.DeepboxError("LabelBinarizer internal error: missing fitted classes");
|
|
1339
|
+
}
|
|
1340
|
+
if (rows === 0) {
|
|
1341
|
+
return emptyCategoryVectorFromClasses(classes2, "y");
|
|
1342
|
+
}
|
|
1343
|
+
const result2 = new Array(rows);
|
|
1344
|
+
for (let i = 0; i < rows; i++) {
|
|
1345
|
+
let maxIdx = 0;
|
|
1346
|
+
let maxVal = this.negLabel;
|
|
1347
|
+
const start = Y.indptr[i] ?? 0;
|
|
1348
|
+
const end = Y.indptr[i + 1] ?? start;
|
|
1349
|
+
for (let p = start; p < end; p++) {
|
|
1350
|
+
const col = Y.indices[p];
|
|
1351
|
+
if (col === void 0) {
|
|
1352
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: sparse column index missing");
|
|
1353
|
+
}
|
|
1354
|
+
if (col < 0 || col >= nClasses2) {
|
|
1355
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1356
|
+
"column index out of bounds for fitted classes",
|
|
1357
|
+
"Y",
|
|
1358
|
+
col
|
|
1359
|
+
);
|
|
1360
|
+
}
|
|
1361
|
+
const raw = Y.data[p];
|
|
1362
|
+
if (raw === void 0) {
|
|
1363
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: sparse value missing");
|
|
1364
|
+
}
|
|
1365
|
+
const val = Number(raw);
|
|
1366
|
+
if (val > maxVal) {
|
|
1367
|
+
maxVal = val;
|
|
1368
|
+
maxIdx = col;
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
if (maxVal <= this.negLabel) {
|
|
1372
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1373
|
+
`No active label found for sample ${i}. LabelBinarizer expects exactly one active label.`,
|
|
1374
|
+
"Y"
|
|
1375
|
+
);
|
|
1376
|
+
}
|
|
1377
|
+
result2[i] = categoryValueAt(classes2, maxIdx, "LabelBinarizer.inverseTransform");
|
|
1378
|
+
}
|
|
1379
|
+
return toCategoryVectorTensor(result2, "y");
|
|
1380
|
+
}
|
|
1381
|
+
assert2D(Y, "Y");
|
|
1382
|
+
assertNumericTensor(Y, "Y");
|
|
1383
|
+
const [nSamples, nCols] = getShape2D(Y);
|
|
1384
|
+
const nClasses = this.classes_?.length ?? 0;
|
|
1385
|
+
if (nCols !== nClasses) {
|
|
1386
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("column count does not match number of classes", "Y", nCols);
|
|
1387
|
+
}
|
|
1388
|
+
const classes = this.classes_;
|
|
1389
|
+
if (!classes) {
|
|
1390
|
+
throw new chunkJSCDE774_cjs.DeepboxError("LabelBinarizer internal error: missing fitted classes");
|
|
1391
|
+
}
|
|
1392
|
+
if (nSamples === 0) {
|
|
1393
|
+
return emptyCategoryVectorFromClasses(classes, "y");
|
|
1394
|
+
}
|
|
1395
|
+
const result = new Array(nSamples);
|
|
1396
|
+
const [stride0, stride1] = getStrides2D(Y);
|
|
1397
|
+
const data = getNumericData(Y);
|
|
1398
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1399
|
+
let maxIdx = 0;
|
|
1400
|
+
const rowBase = Y.offset + i * stride0;
|
|
1401
|
+
const first = data[rowBase];
|
|
1402
|
+
if (first === void 0) {
|
|
1403
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1404
|
+
}
|
|
1405
|
+
let maxVal = Number(first);
|
|
1406
|
+
for (let j = 1; j < nCols; j++) {
|
|
1407
|
+
const raw = data[rowBase + j * stride1];
|
|
1408
|
+
if (raw === void 0) {
|
|
1409
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1410
|
+
}
|
|
1411
|
+
const val = Number(raw);
|
|
1412
|
+
if (val > maxVal) {
|
|
1413
|
+
maxVal = val;
|
|
1414
|
+
maxIdx = j;
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
if (maxVal <= this.negLabel) {
|
|
1418
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1419
|
+
`No active label found for sample ${i}. LabelBinarizer expects exactly one active label.`,
|
|
1420
|
+
"Y"
|
|
1421
|
+
);
|
|
1422
|
+
}
|
|
1423
|
+
result[i] = categoryValueAt(classes, maxIdx, "LabelBinarizer.inverseTransform");
|
|
1424
|
+
}
|
|
1425
|
+
return toCategoryVectorTensor(result, "y");
|
|
1426
|
+
}
|
|
1427
|
+
};
|
|
1428
|
+
var MultiLabelBinarizer = class {
|
|
1429
|
+
/** Indicates whether the binarizer has been fitted to data */
|
|
1430
|
+
fitted = false;
|
|
1431
|
+
/** Array of all unique classes found across all samples, sorted */
|
|
1432
|
+
classes_;
|
|
1433
|
+
/** Map from class value to index for O(1) lookups */
|
|
1434
|
+
classToIndex_;
|
|
1435
|
+
/** Whether to return sparse matrix (CSR) or dense array */
|
|
1436
|
+
sparse;
|
|
1437
|
+
/** Optional explicit class ordering */
|
|
1438
|
+
classesOption;
|
|
1439
|
+
/**
|
|
1440
|
+
* Creates a new MultiLabelBinarizer instance.
|
|
1441
|
+
*
|
|
1442
|
+
* @param options - Configuration options
|
|
1443
|
+
* @param options.sparse - If true, returns CSRMatrix; if false, returns dense Tensor (default: false)
|
|
1444
|
+
* @param options.sparseOutput - Alias for sparse (default: false)
|
|
1445
|
+
* @param options.classes - Explicit class ordering to use instead of sorting
|
|
1446
|
+
*/
|
|
1447
|
+
constructor(options = {}) {
|
|
1448
|
+
const sparseOption = options.sparse ?? options.sparseOutput ?? false;
|
|
1449
|
+
if (options.sparse !== void 0 && options.sparseOutput !== void 0) {
|
|
1450
|
+
if (options.sparse !== options.sparseOutput) {
|
|
1451
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1452
|
+
"sparse and sparseOutput must match when both are provided",
|
|
1453
|
+
"sparse",
|
|
1454
|
+
options.sparse
|
|
1455
|
+
);
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
this.sparse = sparseOption;
|
|
1459
|
+
if (typeof this.sparse !== "boolean") {
|
|
1460
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("sparse must be a boolean", "sparse", this.sparse);
|
|
1461
|
+
}
|
|
1462
|
+
if (options.classes !== void 0) {
|
|
1463
|
+
this.classesOption = validateCategoryValues(options.classes, "classes");
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
/**
|
|
1467
|
+
* Fit multi-label binarizer to label sets.
|
|
1468
|
+
* Learns all unique classes present across all samples.
|
|
1469
|
+
*
|
|
1470
|
+
* @param y - Array of label sets, where each element is an array of string/number/bigint labels
|
|
1471
|
+
* @returns this - Returns self for method chaining
|
|
1472
|
+
* @throws {InvalidParameterError} If y is empty
|
|
1473
|
+
*/
|
|
1474
|
+
fit(y) {
|
|
1475
|
+
if (y.length === 0) {
|
|
1476
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Cannot fit MultiLabelBinarizer on empty array", "y");
|
|
1477
|
+
}
|
|
1478
|
+
for (const labels of y) {
|
|
1479
|
+
if (!Array.isArray(labels)) {
|
|
1480
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("MultiLabelBinarizer expects label arrays", "y", labels);
|
|
1481
|
+
}
|
|
1482
|
+
for (const label of labels) {
|
|
1483
|
+
if (typeof label !== "string" && typeof label !== "number" && typeof label !== "bigint") {
|
|
1484
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1485
|
+
"MultiLabelBinarizer labels must be strings, numbers, or bigints",
|
|
1486
|
+
"y",
|
|
1487
|
+
label
|
|
1488
|
+
);
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
if (this.classesOption && this.classesOption.length === 0) {
|
|
1493
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("classes must contain at least one value", "classes");
|
|
1494
|
+
}
|
|
1495
|
+
if (this.classesOption) {
|
|
1496
|
+
this.classes_ = Array.from(this.classesOption);
|
|
1497
|
+
} else {
|
|
1498
|
+
const uniqueSet = /* @__PURE__ */ new Set();
|
|
1499
|
+
for (const labels of y) {
|
|
1500
|
+
for (const label of labels) {
|
|
1501
|
+
uniqueSet.add(label);
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
this.classes_ = sortCategories(uniqueSet, "y");
|
|
1505
|
+
}
|
|
1506
|
+
this.classToIndex_ = /* @__PURE__ */ new Map();
|
|
1507
|
+
for (let i = 0; i < this.classes_.length; i++) {
|
|
1508
|
+
this.classToIndex_.set(categoryValueAt(this.classes_, i, "MultiLabelBinarizer.fit"), i);
|
|
1509
|
+
}
|
|
1510
|
+
if (this.classesOption) {
|
|
1511
|
+
for (const labels of y) {
|
|
1512
|
+
for (const label of labels) {
|
|
1513
|
+
if (!this.classToIndex_.has(label)) {
|
|
1514
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1515
|
+
`Unknown label: ${String(label)}. Label must be present in classes.`,
|
|
1516
|
+
"y",
|
|
1517
|
+
label
|
|
1518
|
+
);
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
this.fitted = true;
|
|
1524
|
+
return this;
|
|
1525
|
+
}
|
|
1526
|
+
/**
|
|
1527
|
+
* Transform label sets to binary matrix.
|
|
1528
|
+
* Each sample can have multiple active (1) columns.
|
|
1529
|
+
*
|
|
1530
|
+
* @param y - Array of label sets to transform (string/number/bigint labels)
|
|
1531
|
+
* @returns Binary matrix (Tensor or CSRMatrix) with shape [n_samples, n_classes]
|
|
1532
|
+
* @throws {NotFittedError} If binarizer is not fitted
|
|
1533
|
+
* @throws {InvalidParameterError} If y contains unknown labels
|
|
1534
|
+
*/
|
|
1535
|
+
transform(y) {
|
|
1536
|
+
if (!this.fitted) {
|
|
1537
|
+
throw new chunkJSCDE774_cjs.NotFittedError("MultiLabelBinarizer must be fitted before transform");
|
|
1538
|
+
}
|
|
1539
|
+
for (const labels of y) {
|
|
1540
|
+
if (!Array.isArray(labels)) {
|
|
1541
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("MultiLabelBinarizer expects label arrays", "y", labels);
|
|
1542
|
+
}
|
|
1543
|
+
for (const label of labels) {
|
|
1544
|
+
if (typeof label !== "string" && typeof label !== "number" && typeof label !== "bigint") {
|
|
1545
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1546
|
+
"MultiLabelBinarizer labels must be strings, numbers, or bigints",
|
|
1547
|
+
"y",
|
|
1548
|
+
label
|
|
1549
|
+
);
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
}
|
|
1553
|
+
if (y.length === 0) {
|
|
1554
|
+
const nClasses2 = this.classes_?.length ?? 0;
|
|
1555
|
+
return this.sparse ? chunk6AE5FKKQ_cjs.CSRMatrix.fromCOO({
|
|
1556
|
+
rows: 0,
|
|
1557
|
+
cols: nClasses2,
|
|
1558
|
+
rowIndices: new Int32Array(0),
|
|
1559
|
+
colIndices: new Int32Array(0),
|
|
1560
|
+
values: new Float64Array(0)
|
|
1561
|
+
}) : chunk6AE5FKKQ_cjs.zeros([0, nClasses2], { dtype: "float64" });
|
|
1562
|
+
}
|
|
1563
|
+
const nSamples = y.length;
|
|
1564
|
+
const nClasses = this.classes_?.length ?? 0;
|
|
1565
|
+
const lookup = this.classToIndex_;
|
|
1566
|
+
if (!lookup) {
|
|
1567
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MultiLabelBinarizer internal error: missing fitted lookup");
|
|
1568
|
+
}
|
|
1569
|
+
if (this.sparse) {
|
|
1570
|
+
const rowIdx = [];
|
|
1571
|
+
const colIdx = [];
|
|
1572
|
+
const vals = [];
|
|
1573
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1574
|
+
const yRow = y[i];
|
|
1575
|
+
if (!yRow) continue;
|
|
1576
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1577
|
+
for (const label of yRow) {
|
|
1578
|
+
const idx = lookup.get(label);
|
|
1579
|
+
if (idx === void 0) {
|
|
1580
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1581
|
+
`Unknown label: ${String(label)}. Label must be present during fit.`,
|
|
1582
|
+
"y",
|
|
1583
|
+
label
|
|
1584
|
+
);
|
|
1585
|
+
}
|
|
1586
|
+
if (seen.has(idx)) continue;
|
|
1587
|
+
seen.add(idx);
|
|
1588
|
+
rowIdx.push(i);
|
|
1589
|
+
colIdx.push(idx);
|
|
1590
|
+
vals.push(1);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
return chunk6AE5FKKQ_cjs.CSRMatrix.fromCOO({
|
|
1594
|
+
rows: nSamples,
|
|
1595
|
+
cols: nClasses,
|
|
1596
|
+
rowIndices: Int32Array.from(rowIdx),
|
|
1597
|
+
colIndices: Int32Array.from(colIdx),
|
|
1598
|
+
values: Float64Array.from(vals)
|
|
1599
|
+
});
|
|
1600
|
+
}
|
|
1601
|
+
const result = new Array(nSamples);
|
|
1602
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1603
|
+
result[i] = new Array(nClasses).fill(0);
|
|
1604
|
+
}
|
|
1605
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1606
|
+
const yRow = y[i];
|
|
1607
|
+
if (!yRow) continue;
|
|
1608
|
+
for (const label of yRow) {
|
|
1609
|
+
const idx = lookup.get(label);
|
|
1610
|
+
if (idx === void 0) {
|
|
1611
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1612
|
+
`Unknown label: ${String(label)}. Label must be present during fit.`,
|
|
1613
|
+
"y",
|
|
1614
|
+
label
|
|
1615
|
+
);
|
|
1616
|
+
}
|
|
1617
|
+
const row = result[i];
|
|
1618
|
+
if (!row) {
|
|
1619
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
1620
|
+
}
|
|
1621
|
+
row[idx] = 1;
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64" });
|
|
1625
|
+
}
|
|
1626
|
+
/**
|
|
1627
|
+
* Fit binarizer and transform label sets in one step.
|
|
1628
|
+
* Convenience method equivalent to calling fit(y).transform(y).
|
|
1629
|
+
*
|
|
1630
|
+
* @param y - Array of label sets (string/number/bigint labels)
|
|
1631
|
+
* @returns Binary matrix (Tensor or CSRMatrix)
|
|
1632
|
+
*/
|
|
1633
|
+
fitTransform(y) {
|
|
1634
|
+
return this.fit(y).transform(y);
|
|
1635
|
+
}
|
|
1636
|
+
/**
|
|
1637
|
+
* Transform binary matrix back to label sets.
|
|
1638
|
+
* Finds all active (1) columns for each row.
|
|
1639
|
+
*
|
|
1640
|
+
* @param Y - Binary matrix (Tensor or CSRMatrix)
|
|
1641
|
+
* @returns Array of label sets, one per sample
|
|
1642
|
+
* @throws {NotFittedError} If binarizer is not fitted
|
|
1643
|
+
* @throws {InvalidParameterError} If Y has invalid shape
|
|
1644
|
+
*/
|
|
1645
|
+
inverseTransform(Y) {
|
|
1646
|
+
if (!this.fitted) {
|
|
1647
|
+
throw new chunkJSCDE774_cjs.NotFittedError("MultiLabelBinarizer must be fitted before inverse_transform");
|
|
1648
|
+
}
|
|
1649
|
+
if (Y instanceof chunk6AE5FKKQ_cjs.CSRMatrix) {
|
|
1650
|
+
const [rows, cols] = Y.shape;
|
|
1651
|
+
if (rows === void 0 || cols === void 0) {
|
|
1652
|
+
throw new chunkJSCDE774_cjs.ShapeError("Y must have valid shape");
|
|
1653
|
+
}
|
|
1654
|
+
const fittedClasses2 = this.classes_?.length ?? 0;
|
|
1655
|
+
if (cols !== fittedClasses2) {
|
|
1656
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("column count does not match number of classes", "Y", cols);
|
|
1657
|
+
}
|
|
1658
|
+
if (rows === 0) {
|
|
1659
|
+
return [];
|
|
1660
|
+
}
|
|
1661
|
+
const classes2 = this.classes_;
|
|
1662
|
+
if (!classes2) {
|
|
1663
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MultiLabelBinarizer internal error: missing fitted classes");
|
|
1664
|
+
}
|
|
1665
|
+
const result2 = [];
|
|
1666
|
+
for (let i = 0; i < rows; i++) {
|
|
1667
|
+
const labels = [];
|
|
1668
|
+
const start = Y.indptr[i] ?? 0;
|
|
1669
|
+
const end = Y.indptr[i + 1] ?? start;
|
|
1670
|
+
for (let p = start; p < end; p++) {
|
|
1671
|
+
const col = Y.indices[p];
|
|
1672
|
+
if (col === void 0) {
|
|
1673
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: sparse column index missing");
|
|
1674
|
+
}
|
|
1675
|
+
if (col < 0 || col >= fittedClasses2) {
|
|
1676
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1677
|
+
"column index out of bounds for fitted classes",
|
|
1678
|
+
"Y",
|
|
1679
|
+
col
|
|
1680
|
+
);
|
|
1681
|
+
}
|
|
1682
|
+
const raw = Y.data[p];
|
|
1683
|
+
if (raw === void 0) {
|
|
1684
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: sparse value missing");
|
|
1685
|
+
}
|
|
1686
|
+
const value = Number(raw);
|
|
1687
|
+
if (value > 0) {
|
|
1688
|
+
labels.push(categoryValueAt(classes2, col, "MultiLabelBinarizer.inverseTransform"));
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1691
|
+
result2.push(labels);
|
|
1692
|
+
}
|
|
1693
|
+
return result2;
|
|
1694
|
+
}
|
|
1695
|
+
assert2D(Y, "Y");
|
|
1696
|
+
assertNumericTensor(Y, "Y");
|
|
1697
|
+
const nSamples = Y.shape[0] ?? 0;
|
|
1698
|
+
const nClasses = Y.shape[1] ?? 0;
|
|
1699
|
+
const fittedClasses = this.classes_?.length ?? 0;
|
|
1700
|
+
if (nClasses !== fittedClasses) {
|
|
1701
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1702
|
+
"column count does not match number of classes",
|
|
1703
|
+
"Y",
|
|
1704
|
+
nClasses
|
|
1705
|
+
);
|
|
1706
|
+
}
|
|
1707
|
+
if (nSamples === 0) {
|
|
1708
|
+
return [];
|
|
1709
|
+
}
|
|
1710
|
+
const classes = this.classes_;
|
|
1711
|
+
if (!classes) {
|
|
1712
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MultiLabelBinarizer internal error: missing fitted classes");
|
|
1713
|
+
}
|
|
1714
|
+
const result = [];
|
|
1715
|
+
const [stride0, stride1] = getStrides2D(Y);
|
|
1716
|
+
const data = getNumericData(Y);
|
|
1717
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1718
|
+
const labels = [];
|
|
1719
|
+
const rowBase = Y.offset + i * stride0;
|
|
1720
|
+
for (let j = 0; j < nClasses; j++) {
|
|
1721
|
+
const raw = data[rowBase + j * stride1];
|
|
1722
|
+
if (raw === void 0) {
|
|
1723
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1724
|
+
}
|
|
1725
|
+
const val = Number(raw);
|
|
1726
|
+
if (val > 0) {
|
|
1727
|
+
labels.push(categoryValueAt(classes, j, "MultiLabelBinarizer.inverseTransform"));
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
result.push(labels);
|
|
1731
|
+
}
|
|
1732
|
+
return result;
|
|
1733
|
+
}
|
|
1734
|
+
};
|
|
1735
|
+
|
|
1736
|
+
// src/preprocess/scalers.ts
|
|
1737
|
+
function getNumericData2(X, name) {
|
|
1738
|
+
if (X.dtype === "string") {
|
|
1739
|
+
throw new chunkJSCDE774_cjs.DTypeError(`${name} must be numeric`);
|
|
1740
|
+
}
|
|
1741
|
+
if (Array.isArray(X.data)) {
|
|
1742
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: invalid numeric tensor storage");
|
|
1743
|
+
}
|
|
1744
|
+
return X.data;
|
|
1745
|
+
}
|
|
1746
|
+
function parseBooleanOption(value, name, defaultValue) {
|
|
1747
|
+
if (value === void 0) {
|
|
1748
|
+
return defaultValue;
|
|
1749
|
+
}
|
|
1750
|
+
if (typeof value !== "boolean") {
|
|
1751
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`${name} must be a boolean`, name, value);
|
|
1752
|
+
}
|
|
1753
|
+
return value;
|
|
1754
|
+
}
|
|
1755
|
+
function validateFiniteData(X, name) {
|
|
1756
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
1757
|
+
const data = getNumericData2(X, name);
|
|
1758
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
1759
|
+
let flatIndex = 0;
|
|
1760
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1761
|
+
const rowBase = X.offset + i * stride0;
|
|
1762
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1763
|
+
const raw = data[rowBase + j * stride1];
|
|
1764
|
+
if (raw === void 0) {
|
|
1765
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1766
|
+
}
|
|
1767
|
+
const val = Number(raw);
|
|
1768
|
+
if (!Number.isFinite(val)) {
|
|
1769
|
+
throw new chunkJSCDE774_cjs.DataValidationError(`${name} contains NaN or Infinity at index ${flatIndex}`);
|
|
1770
|
+
}
|
|
1771
|
+
flatIndex += 1;
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
function snapInverseValue(value) {
|
|
1776
|
+
if (!Number.isFinite(value)) return value;
|
|
1777
|
+
const rounded = Math.round(value);
|
|
1778
|
+
if (Math.abs(value - rounded) < 1e-12) return rounded;
|
|
1779
|
+
const scaled = Math.round(value * 1e12) / 1e12;
|
|
1780
|
+
if (Math.abs(value - scaled) < 1e-12) return scaled;
|
|
1781
|
+
return value;
|
|
1782
|
+
}
|
|
1783
|
+
function normalQuantile(p) {
|
|
1784
|
+
if (!Number.isFinite(p) || p <= 0 || p >= 1) {
|
|
1785
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
1786
|
+
"normalQuantile requires p in the open interval (0, 1)",
|
|
1787
|
+
"p",
|
|
1788
|
+
p
|
|
1789
|
+
);
|
|
1790
|
+
}
|
|
1791
|
+
const a1 = -39.69683028665376;
|
|
1792
|
+
const a2 = 220.9460984245205;
|
|
1793
|
+
const a3 = -275.9285104469687;
|
|
1794
|
+
const a4 = 138.357751867269;
|
|
1795
|
+
const a5 = -30.66479806614716;
|
|
1796
|
+
const a6 = 2.506628277459239;
|
|
1797
|
+
const b1 = -54.47609879822406;
|
|
1798
|
+
const b2 = 161.5858368580409;
|
|
1799
|
+
const b3 = -155.6989798598866;
|
|
1800
|
+
const b4 = 66.80131188771972;
|
|
1801
|
+
const b5 = -13.28068155288572;
|
|
1802
|
+
const c1 = -0.007784894002430293;
|
|
1803
|
+
const c2 = -0.3223964580411365;
|
|
1804
|
+
const c3 = -2.400758277161838;
|
|
1805
|
+
const c4 = -2.549732539343734;
|
|
1806
|
+
const c5 = 4.374664141464968;
|
|
1807
|
+
const c6 = 2.938163982698783;
|
|
1808
|
+
const d1 = 0.007784695709041462;
|
|
1809
|
+
const d2 = 0.3224671290700398;
|
|
1810
|
+
const d3 = 2.445134137142996;
|
|
1811
|
+
const d4 = 3.754408661907416;
|
|
1812
|
+
const plow = 0.02425;
|
|
1813
|
+
const phigh = 1 - plow;
|
|
1814
|
+
if (p < plow) {
|
|
1815
|
+
const q2 = Math.sqrt(-2 * Math.log(p));
|
|
1816
|
+
return (((((c1 * q2 + c2) * q2 + c3) * q2 + c4) * q2 + c5) * q2 + c6) / ((((d1 * q2 + d2) * q2 + d3) * q2 + d4) * q2 + 1);
|
|
1817
|
+
}
|
|
1818
|
+
if (p > phigh) {
|
|
1819
|
+
const q2 = Math.sqrt(-2 * Math.log(1 - p));
|
|
1820
|
+
return -((((((c1 * q2 + c2) * q2 + c3) * q2 + c4) * q2 + c5) * q2 + c6) / ((((d1 * q2 + d2) * q2 + d3) * q2 + d4) * q2 + 1));
|
|
1821
|
+
}
|
|
1822
|
+
const q = p - 0.5;
|
|
1823
|
+
const r = q * q;
|
|
1824
|
+
return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1);
|
|
1825
|
+
}
|
|
1826
|
+
var StandardScaler = class {
|
|
1827
|
+
fitted = false;
|
|
1828
|
+
mean_;
|
|
1829
|
+
scale_;
|
|
1830
|
+
withMean;
|
|
1831
|
+
withStd;
|
|
1832
|
+
/**
|
|
1833
|
+
* Creates a new StandardScaler.
|
|
1834
|
+
*
|
|
1835
|
+
* @param options - Configuration options
|
|
1836
|
+
* @param options.withMean - Center data before scaling (default: true)
|
|
1837
|
+
* @param options.withStd - Scale data to unit variance (default: true)
|
|
1838
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
1839
|
+
*/
|
|
1840
|
+
constructor(options = {}) {
|
|
1841
|
+
this.withMean = parseBooleanOption(options.withMean, "withMean", true);
|
|
1842
|
+
this.withStd = parseBooleanOption(options.withStd, "withStd", true);
|
|
1843
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
1844
|
+
}
|
|
1845
|
+
fit(X) {
|
|
1846
|
+
if (X.size === 0) {
|
|
1847
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X must contain at least one sample", "X");
|
|
1848
|
+
}
|
|
1849
|
+
assert2D(X, "X");
|
|
1850
|
+
assertNumericTensor(X, "X");
|
|
1851
|
+
validateFiniteData(X, "X");
|
|
1852
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
1853
|
+
const data = getNumericData2(X, "X");
|
|
1854
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
1855
|
+
let means;
|
|
1856
|
+
if (this.withMean || this.withStd) {
|
|
1857
|
+
means = new Array(nFeatures).fill(0);
|
|
1858
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1859
|
+
let sum = 0;
|
|
1860
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1861
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
1862
|
+
if (raw === void 0) {
|
|
1863
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1864
|
+
}
|
|
1865
|
+
sum += Number(raw);
|
|
1866
|
+
}
|
|
1867
|
+
if (means) {
|
|
1868
|
+
means[j] = sum / nSamples;
|
|
1869
|
+
}
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
if (this.withStd) {
|
|
1873
|
+
const stds = new Array(nFeatures).fill(0);
|
|
1874
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1875
|
+
const mean = means ? means[j] ?? 0 : 0;
|
|
1876
|
+
let sumSq = 0;
|
|
1877
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1878
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
1879
|
+
if (raw === void 0) {
|
|
1880
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1881
|
+
}
|
|
1882
|
+
const val = Number(raw) - mean;
|
|
1883
|
+
sumSq += val * val;
|
|
1884
|
+
}
|
|
1885
|
+
stds[j] = Math.sqrt(sumSq / nSamples);
|
|
1886
|
+
}
|
|
1887
|
+
this.scale_ = chunk6AE5FKKQ_cjs.tensor(stds, { dtype: "float64" });
|
|
1888
|
+
} else {
|
|
1889
|
+
this.scale_ = void 0;
|
|
1890
|
+
}
|
|
1891
|
+
this.mean_ = this.withMean && means ? chunk6AE5FKKQ_cjs.tensor(means, { dtype: "float64" }) : void 0;
|
|
1892
|
+
this.fitted = true;
|
|
1893
|
+
return this;
|
|
1894
|
+
}
|
|
1895
|
+
transform(X) {
|
|
1896
|
+
if (!this.fitted) {
|
|
1897
|
+
throw new chunkJSCDE774_cjs.NotFittedError("StandardScaler must be fitted before transform");
|
|
1898
|
+
}
|
|
1899
|
+
assert2D(X, "X");
|
|
1900
|
+
assertNumericTensor(X, "X");
|
|
1901
|
+
validateFiniteData(X, "X");
|
|
1902
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
1903
|
+
const data = getNumericData2(X, "X");
|
|
1904
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
1905
|
+
const mean = this.mean_;
|
|
1906
|
+
const scale = this.scale_;
|
|
1907
|
+
const meanData = mean ? getNumericData2(mean, "mean_") : void 0;
|
|
1908
|
+
const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
|
|
1909
|
+
const meanStride = mean ? getStride1D(mean) : 0;
|
|
1910
|
+
const scaleStride = scale ? getStride1D(scale) : 0;
|
|
1911
|
+
if (this.withMean && !mean) {
|
|
1912
|
+
throw new chunkJSCDE774_cjs.DeepboxError("StandardScaler internal error: missing mean_");
|
|
1913
|
+
}
|
|
1914
|
+
if (this.withStd && !scale) {
|
|
1915
|
+
throw new chunkJSCDE774_cjs.DeepboxError("StandardScaler internal error: missing scale_");
|
|
1916
|
+
}
|
|
1917
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
1918
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1919
|
+
const rowBase = X.offset + i * stride0;
|
|
1920
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1921
|
+
const raw = data[rowBase + j * stride1];
|
|
1922
|
+
if (raw === void 0) {
|
|
1923
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1924
|
+
}
|
|
1925
|
+
let val = Number(raw);
|
|
1926
|
+
if (this.withMean && mean && meanData) {
|
|
1927
|
+
const meanValue = meanData[mean.offset + j * meanStride];
|
|
1928
|
+
if (meanValue === void 0) {
|
|
1929
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: mean tensor access out of bounds");
|
|
1930
|
+
}
|
|
1931
|
+
val -= Number(meanValue);
|
|
1932
|
+
}
|
|
1933
|
+
if (this.withStd && scale && scaleData) {
|
|
1934
|
+
const rawScale = scaleData[scale.offset + j * scaleStride];
|
|
1935
|
+
if (rawScale === void 0) {
|
|
1936
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: scale tensor access out of bounds");
|
|
1937
|
+
}
|
|
1938
|
+
const std = Number(rawScale);
|
|
1939
|
+
const safeStd = std === 0 ? 1 : std;
|
|
1940
|
+
val /= safeStd;
|
|
1941
|
+
}
|
|
1942
|
+
const row = result[i];
|
|
1943
|
+
if (row === void 0) {
|
|
1944
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
1945
|
+
}
|
|
1946
|
+
row[j] = val;
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
1950
|
+
}
|
|
1951
|
+
fitTransform(X) {
|
|
1952
|
+
return this.fit(X).transform(X);
|
|
1953
|
+
}
|
|
1954
|
+
inverseTransform(X) {
|
|
1955
|
+
if (!this.fitted) {
|
|
1956
|
+
throw new chunkJSCDE774_cjs.NotFittedError("StandardScaler must be fitted before inverse_transform");
|
|
1957
|
+
}
|
|
1958
|
+
assert2D(X, "X");
|
|
1959
|
+
assertNumericTensor(X, "X");
|
|
1960
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
1961
|
+
const data = getNumericData2(X, "X");
|
|
1962
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
1963
|
+
const mean = this.mean_;
|
|
1964
|
+
const scale = this.scale_;
|
|
1965
|
+
const meanData = mean ? getNumericData2(mean, "mean_") : void 0;
|
|
1966
|
+
const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
|
|
1967
|
+
const meanStride = mean ? getStride1D(mean) : 0;
|
|
1968
|
+
const scaleStride = scale ? getStride1D(scale) : 0;
|
|
1969
|
+
if (this.withMean && !mean) {
|
|
1970
|
+
throw new chunkJSCDE774_cjs.DeepboxError("StandardScaler internal error: missing mean_");
|
|
1971
|
+
}
|
|
1972
|
+
if (this.withStd && !scale) {
|
|
1973
|
+
throw new chunkJSCDE774_cjs.DeepboxError("StandardScaler internal error: missing scale_");
|
|
1974
|
+
}
|
|
1975
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
1976
|
+
for (let i = 0; i < nSamples; i++) {
|
|
1977
|
+
const rowBase = X.offset + i * stride0;
|
|
1978
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
1979
|
+
const raw = data[rowBase + j * stride1];
|
|
1980
|
+
if (raw === void 0) {
|
|
1981
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
1982
|
+
}
|
|
1983
|
+
let val = Number(raw);
|
|
1984
|
+
if (this.withStd && scale && scaleData) {
|
|
1985
|
+
const rawScale = scaleData[scale.offset + j * scaleStride];
|
|
1986
|
+
if (rawScale === void 0) {
|
|
1987
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: scale tensor access out of bounds");
|
|
1988
|
+
}
|
|
1989
|
+
const std = Number(rawScale);
|
|
1990
|
+
const safeStd = std === 0 ? 1 : std;
|
|
1991
|
+
val *= safeStd;
|
|
1992
|
+
}
|
|
1993
|
+
if (this.withMean && mean && meanData) {
|
|
1994
|
+
const meanValue = meanData[mean.offset + j * meanStride];
|
|
1995
|
+
if (meanValue === void 0) {
|
|
1996
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: mean tensor access out of bounds");
|
|
1997
|
+
}
|
|
1998
|
+
val += Number(meanValue);
|
|
1999
|
+
}
|
|
2000
|
+
const resultRow = result[i];
|
|
2001
|
+
if (resultRow === void 0) {
|
|
2002
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2003
|
+
}
|
|
2004
|
+
resultRow[j] = snapInverseValue(val);
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2008
|
+
}
|
|
2009
|
+
};
|
|
2010
|
+
var MinMaxScaler = class {
|
|
2011
|
+
fitted = false;
|
|
2012
|
+
dataMin_;
|
|
2013
|
+
dataMax_;
|
|
2014
|
+
featureRange;
|
|
2015
|
+
clip;
|
|
2016
|
+
/**
|
|
2017
|
+
* Creates a new MinMaxScaler.
|
|
2018
|
+
*
|
|
2019
|
+
* @param options - Configuration options
|
|
2020
|
+
* @param options.featureRange - Desired feature range [min, max] (default: [0, 1])
|
|
2021
|
+
* @param options.clip - Clip transformed values to featureRange (default: false)
|
|
2022
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
2023
|
+
*/
|
|
2024
|
+
constructor(options = {}) {
|
|
2025
|
+
this.featureRange = options.featureRange ?? [0, 1];
|
|
2026
|
+
this.clip = parseBooleanOption(options.clip, "clip", false);
|
|
2027
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
2028
|
+
const [minRange, maxRange] = this.featureRange;
|
|
2029
|
+
if (!Number.isFinite(minRange) || !Number.isFinite(maxRange) || minRange >= maxRange) {
|
|
2030
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2031
|
+
"featureRange must be [min, max] with min < max",
|
|
2032
|
+
"featureRange",
|
|
2033
|
+
this.featureRange
|
|
2034
|
+
);
|
|
2035
|
+
}
|
|
2036
|
+
}
|
|
2037
|
+
fit(X) {
|
|
2038
|
+
if (X.size === 0) {
|
|
2039
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X must contain at least one sample", "X");
|
|
2040
|
+
}
|
|
2041
|
+
assert2D(X, "X");
|
|
2042
|
+
assertNumericTensor(X, "X");
|
|
2043
|
+
validateFiniteData(X, "X");
|
|
2044
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2045
|
+
const data = getNumericData2(X, "X");
|
|
2046
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2047
|
+
const mins = new Array(nFeatures).fill(Number.POSITIVE_INFINITY);
|
|
2048
|
+
const maxs = new Array(nFeatures).fill(Number.NEGATIVE_INFINITY);
|
|
2049
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2050
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2051
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2052
|
+
if (raw === void 0) {
|
|
2053
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2054
|
+
}
|
|
2055
|
+
const val = Number(raw);
|
|
2056
|
+
const currentMin = mins[j];
|
|
2057
|
+
const currentMax = maxs[j];
|
|
2058
|
+
if (currentMin === void 0 || currentMax === void 0) {
|
|
2059
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: min/max array access failed");
|
|
2060
|
+
}
|
|
2061
|
+
mins[j] = Math.min(currentMin, val);
|
|
2062
|
+
maxs[j] = Math.max(currentMax, val);
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
this.dataMin_ = chunk6AE5FKKQ_cjs.tensor(mins, { dtype: "float64" });
|
|
2066
|
+
this.dataMax_ = chunk6AE5FKKQ_cjs.tensor(maxs, { dtype: "float64" });
|
|
2067
|
+
this.fitted = true;
|
|
2068
|
+
return this;
|
|
2069
|
+
}
|
|
2070
|
+
transform(X) {
|
|
2071
|
+
if (!this.fitted) {
|
|
2072
|
+
throw new chunkJSCDE774_cjs.NotFittedError("MinMaxScaler must be fitted before transform");
|
|
2073
|
+
}
|
|
2074
|
+
assert2D(X, "X");
|
|
2075
|
+
assertNumericTensor(X, "X");
|
|
2076
|
+
validateFiniteData(X, "X");
|
|
2077
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2078
|
+
const [minRange, maxRange] = this.featureRange;
|
|
2079
|
+
const data = getNumericData2(X, "X");
|
|
2080
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2081
|
+
const dataMin = this.dataMin_;
|
|
2082
|
+
const dataMax = this.dataMax_;
|
|
2083
|
+
if (!dataMin || !dataMax) {
|
|
2084
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MinMaxScaler internal error: missing fitted min/max");
|
|
2085
|
+
}
|
|
2086
|
+
const minData = getNumericData2(dataMin, "dataMin_");
|
|
2087
|
+
const maxData = getNumericData2(dataMax, "dataMax_");
|
|
2088
|
+
const minStride = getStride1D(dataMin);
|
|
2089
|
+
const maxStride = getStride1D(dataMax);
|
|
2090
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2091
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2092
|
+
const rowBase = X.offset + i * stride0;
|
|
2093
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2094
|
+
const raw = data[rowBase + j * stride1];
|
|
2095
|
+
if (raw === void 0) {
|
|
2096
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2097
|
+
}
|
|
2098
|
+
const val = Number(raw);
|
|
2099
|
+
const rawMin = minData[dataMin.offset + j * minStride];
|
|
2100
|
+
const rawMax = maxData[dataMax.offset + j * maxStride];
|
|
2101
|
+
if (rawMin === void 0 || rawMax === void 0) {
|
|
2102
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: min/max tensor access out of bounds");
|
|
2103
|
+
}
|
|
2104
|
+
const min = Number(rawMin);
|
|
2105
|
+
const max = Number(rawMax);
|
|
2106
|
+
const range = max - min;
|
|
2107
|
+
const row = result[i];
|
|
2108
|
+
if (row === void 0) {
|
|
2109
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2110
|
+
}
|
|
2111
|
+
let scaled = range !== 0 ? (val - min) / range * (maxRange - minRange) + minRange : minRange;
|
|
2112
|
+
if (this.clip) {
|
|
2113
|
+
scaled = Math.max(minRange, Math.min(maxRange, scaled));
|
|
2114
|
+
}
|
|
2115
|
+
row[j] = scaled;
|
|
2116
|
+
}
|
|
2117
|
+
}
|
|
2118
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2119
|
+
}
|
|
2120
|
+
fitTransform(X) {
|
|
2121
|
+
return this.fit(X).transform(X);
|
|
2122
|
+
}
|
|
2123
|
+
inverseTransform(X) {
|
|
2124
|
+
if (!this.fitted) {
|
|
2125
|
+
throw new chunkJSCDE774_cjs.NotFittedError("MinMaxScaler must be fitted before inverse_transform");
|
|
2126
|
+
}
|
|
2127
|
+
assert2D(X, "X");
|
|
2128
|
+
assertNumericTensor(X, "X");
|
|
2129
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2130
|
+
const [minRange, maxRange] = this.featureRange;
|
|
2131
|
+
const data = getNumericData2(X, "X");
|
|
2132
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2133
|
+
const dataMin = this.dataMin_;
|
|
2134
|
+
const dataMax = this.dataMax_;
|
|
2135
|
+
if (!dataMin || !dataMax) {
|
|
2136
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MinMaxScaler internal error: missing fitted min/max");
|
|
2137
|
+
}
|
|
2138
|
+
const minData = getNumericData2(dataMin, "dataMin_");
|
|
2139
|
+
const maxData = getNumericData2(dataMax, "dataMax_");
|
|
2140
|
+
const minStride = getStride1D(dataMin);
|
|
2141
|
+
const maxStride = getStride1D(dataMax);
|
|
2142
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2143
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2144
|
+
const rowBase = X.offset + i * stride0;
|
|
2145
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2146
|
+
const raw = data[rowBase + j * stride1];
|
|
2147
|
+
if (raw === void 0) {
|
|
2148
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2149
|
+
}
|
|
2150
|
+
const val = Number(raw);
|
|
2151
|
+
const rawMin = minData[dataMin.offset + j * minStride];
|
|
2152
|
+
const rawMax = maxData[dataMax.offset + j * maxStride];
|
|
2153
|
+
if (rawMin === void 0 || rawMax === void 0) {
|
|
2154
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: min/max tensor access out of bounds");
|
|
2155
|
+
}
|
|
2156
|
+
const min = Number(rawMin);
|
|
2157
|
+
const max = Number(rawMax);
|
|
2158
|
+
const range = max - min;
|
|
2159
|
+
const row = result[i];
|
|
2160
|
+
if (row === void 0) {
|
|
2161
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2162
|
+
}
|
|
2163
|
+
row[j] = (val - minRange) / (maxRange - minRange) * range + min;
|
|
2164
|
+
}
|
|
2165
|
+
}
|
|
2166
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2167
|
+
}
|
|
2168
|
+
};
|
|
2169
|
+
var MaxAbsScaler = class {
|
|
2170
|
+
fitted = false;
|
|
2171
|
+
maxAbs_;
|
|
2172
|
+
/**
|
|
2173
|
+
* Creates a new MaxAbsScaler.
|
|
2174
|
+
*
|
|
2175
|
+
* @param options - Configuration options
|
|
2176
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
2177
|
+
*/
|
|
2178
|
+
constructor(options = {}) {
|
|
2179
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
2180
|
+
}
|
|
2181
|
+
fit(X) {
|
|
2182
|
+
if (X.size === 0) {
|
|
2183
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X must contain at least one sample", "X");
|
|
2184
|
+
}
|
|
2185
|
+
assert2D(X, "X");
|
|
2186
|
+
assertNumericTensor(X, "X");
|
|
2187
|
+
validateFiniteData(X, "X");
|
|
2188
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2189
|
+
const data = getNumericData2(X, "X");
|
|
2190
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2191
|
+
const maxAbs = new Array(nFeatures).fill(0);
|
|
2192
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2193
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2194
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2195
|
+
if (raw === void 0) {
|
|
2196
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2197
|
+
}
|
|
2198
|
+
const currentMax = maxAbs[j];
|
|
2199
|
+
if (currentMax === void 0) {
|
|
2200
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: maxAbs array access failed");
|
|
2201
|
+
}
|
|
2202
|
+
maxAbs[j] = Math.max(currentMax, Math.abs(Number(raw)));
|
|
2203
|
+
}
|
|
2204
|
+
}
|
|
2205
|
+
this.maxAbs_ = chunk6AE5FKKQ_cjs.tensor(maxAbs, { dtype: "float64" });
|
|
2206
|
+
this.fitted = true;
|
|
2207
|
+
return this;
|
|
2208
|
+
}
|
|
2209
|
+
transform(X) {
|
|
2210
|
+
if (!this.fitted) {
|
|
2211
|
+
throw new chunkJSCDE774_cjs.NotFittedError("MaxAbsScaler must be fitted before transform");
|
|
2212
|
+
}
|
|
2213
|
+
assert2D(X, "X");
|
|
2214
|
+
assertNumericTensor(X, "X");
|
|
2215
|
+
validateFiniteData(X, "X");
|
|
2216
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2217
|
+
const data = getNumericData2(X, "X");
|
|
2218
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2219
|
+
const maxAbs = this.maxAbs_;
|
|
2220
|
+
if (!maxAbs) {
|
|
2221
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MaxAbsScaler internal error: missing fitted maxAbs");
|
|
2222
|
+
}
|
|
2223
|
+
const maxData = getNumericData2(maxAbs, "maxAbs_");
|
|
2224
|
+
const maxStride = getStride1D(maxAbs);
|
|
2225
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2226
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2227
|
+
const rowBase = X.offset + i * stride0;
|
|
2228
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2229
|
+
const raw = data[rowBase + j * stride1];
|
|
2230
|
+
if (raw === void 0) {
|
|
2231
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2232
|
+
}
|
|
2233
|
+
const val = Number(raw);
|
|
2234
|
+
const rawScale = maxData[maxAbs.offset + j * maxStride];
|
|
2235
|
+
if (rawScale === void 0) {
|
|
2236
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: maxAbs tensor access out of bounds");
|
|
2237
|
+
}
|
|
2238
|
+
const scale = Number(rawScale);
|
|
2239
|
+
const safeScale = scale === 0 ? 1 : scale;
|
|
2240
|
+
const row = result[i];
|
|
2241
|
+
if (row === void 0) {
|
|
2242
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2243
|
+
}
|
|
2244
|
+
row[j] = val / safeScale;
|
|
2245
|
+
}
|
|
2246
|
+
}
|
|
2247
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2248
|
+
}
|
|
2249
|
+
fitTransform(X) {
|
|
2250
|
+
return this.fit(X).transform(X);
|
|
2251
|
+
}
|
|
2252
|
+
inverseTransform(X) {
|
|
2253
|
+
if (!this.fitted) {
|
|
2254
|
+
throw new chunkJSCDE774_cjs.NotFittedError("MaxAbsScaler must be fitted before inverse_transform");
|
|
2255
|
+
}
|
|
2256
|
+
assert2D(X, "X");
|
|
2257
|
+
assertNumericTensor(X, "X");
|
|
2258
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2259
|
+
const data = getNumericData2(X, "X");
|
|
2260
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2261
|
+
const maxAbs = this.maxAbs_;
|
|
2262
|
+
if (!maxAbs) {
|
|
2263
|
+
throw new chunkJSCDE774_cjs.DeepboxError("MaxAbsScaler internal error: missing fitted maxAbs");
|
|
2264
|
+
}
|
|
2265
|
+
const maxData = getNumericData2(maxAbs, "maxAbs_");
|
|
2266
|
+
const maxStride = getStride1D(maxAbs);
|
|
2267
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2268
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2269
|
+
const rowBase = X.offset + i * stride0;
|
|
2270
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2271
|
+
const raw = data[rowBase + j * stride1];
|
|
2272
|
+
if (raw === void 0) {
|
|
2273
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2274
|
+
}
|
|
2275
|
+
const val = Number(raw);
|
|
2276
|
+
const rawScale = maxData[maxAbs.offset + j * maxStride];
|
|
2277
|
+
if (rawScale === void 0) {
|
|
2278
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: maxAbs tensor access out of bounds");
|
|
2279
|
+
}
|
|
2280
|
+
const scale = Number(rawScale);
|
|
2281
|
+
const row = result[i];
|
|
2282
|
+
if (row === void 0) {
|
|
2283
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2284
|
+
}
|
|
2285
|
+
row[j] = val * scale;
|
|
2286
|
+
}
|
|
2287
|
+
}
|
|
2288
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2289
|
+
}
|
|
2290
|
+
};
|
|
2291
|
+
var RobustScaler = class {
|
|
2292
|
+
fitted = false;
|
|
2293
|
+
center_;
|
|
2294
|
+
scale_;
|
|
2295
|
+
withCentering;
|
|
2296
|
+
withScaling;
|
|
2297
|
+
quantileRange;
|
|
2298
|
+
unitVariance;
|
|
2299
|
+
/**
|
|
2300
|
+
* Creates a new RobustScaler.
|
|
2301
|
+
*
|
|
2302
|
+
* @param options - Configuration options
|
|
2303
|
+
* @param options.withCentering - Center data using median (default: true)
|
|
2304
|
+
* @param options.withScaling - Scale data using IQR (default: true)
|
|
2305
|
+
* @param options.quantileRange - Quantile range for IQR as percentiles (default: [25, 75])
|
|
2306
|
+
* @param options.unitVariance - Scale so that features have unit variance under normality (default: false)
|
|
2307
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
2308
|
+
*/
|
|
2309
|
+
constructor(options = {}) {
|
|
2310
|
+
this.withCentering = parseBooleanOption(options.withCentering, "withCentering", true);
|
|
2311
|
+
this.withScaling = parseBooleanOption(options.withScaling, "withScaling", true);
|
|
2312
|
+
this.quantileRange = options.quantileRange ?? [25, 75];
|
|
2313
|
+
this.unitVariance = parseBooleanOption(options.unitVariance, "unitVariance", false);
|
|
2314
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
2315
|
+
const [lower, upper] = this.quantileRange;
|
|
2316
|
+
if (!Number.isFinite(lower) || !Number.isFinite(upper) || lower < 0 || upper > 100 || lower >= upper) {
|
|
2317
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2318
|
+
"quantileRange must be a valid ascending percentile range",
|
|
2319
|
+
"quantileRange",
|
|
2320
|
+
this.quantileRange
|
|
2321
|
+
);
|
|
2322
|
+
}
|
|
2323
|
+
}
|
|
2324
|
+
fit(X) {
|
|
2325
|
+
if (X.size === 0) {
|
|
2326
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X must contain at least one sample", "X");
|
|
2327
|
+
}
|
|
2328
|
+
assert2D(X, "X");
|
|
2329
|
+
assertNumericTensor(X, "X");
|
|
2330
|
+
validateFiniteData(X, "X");
|
|
2331
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2332
|
+
const data = getNumericData2(X, "X");
|
|
2333
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2334
|
+
const centers = new Array(nFeatures).fill(0);
|
|
2335
|
+
const scales = new Array(nFeatures).fill(0);
|
|
2336
|
+
const [lowerPercentile, upperPercentile] = this.quantileRange;
|
|
2337
|
+
const lowerFraction = lowerPercentile / 100;
|
|
2338
|
+
const upperFraction = upperPercentile / 100;
|
|
2339
|
+
const normalizer = this.unitVariance ? normalQuantile(upperFraction) - normalQuantile(lowerFraction) : 1;
|
|
2340
|
+
if (this.unitVariance && (!Number.isFinite(normalizer) || normalizer <= 0)) {
|
|
2341
|
+
throw new chunkJSCDE774_cjs.DeepboxError("RobustScaler internal error: invalid unit variance normalizer");
|
|
2342
|
+
}
|
|
2343
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2344
|
+
const values = [];
|
|
2345
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2346
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2347
|
+
if (raw === void 0) {
|
|
2348
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2349
|
+
}
|
|
2350
|
+
values.push(Number(raw));
|
|
2351
|
+
}
|
|
2352
|
+
values.sort((a, b) => a - b);
|
|
2353
|
+
const interpolate = (q) => {
|
|
2354
|
+
if (values.length === 0) {
|
|
2355
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: cannot interpolate empty values");
|
|
2356
|
+
}
|
|
2357
|
+
if (values.length === 1) {
|
|
2358
|
+
const only = values[0];
|
|
2359
|
+
if (only === void 0) {
|
|
2360
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing sorted value");
|
|
2361
|
+
}
|
|
2362
|
+
return only;
|
|
2363
|
+
}
|
|
2364
|
+
const position = q * (values.length - 1);
|
|
2365
|
+
const lower = Math.floor(position);
|
|
2366
|
+
const upper = Math.ceil(position);
|
|
2367
|
+
const lowerValue = values[lower];
|
|
2368
|
+
const upperValue = values[upper];
|
|
2369
|
+
if (lowerValue === void 0 || upperValue === void 0) {
|
|
2370
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: quantile interpolation index out of bounds");
|
|
2371
|
+
}
|
|
2372
|
+
if (upper === lower) {
|
|
2373
|
+
return lowerValue;
|
|
2374
|
+
}
|
|
2375
|
+
const weight = position - lower;
|
|
2376
|
+
return lowerValue * (1 - weight) + upperValue * weight;
|
|
2377
|
+
};
|
|
2378
|
+
centers[j] = interpolate(0.5);
|
|
2379
|
+
const qLower = interpolate(lowerFraction);
|
|
2380
|
+
const qUpper = interpolate(upperFraction);
|
|
2381
|
+
const iqr = qUpper - qLower;
|
|
2382
|
+
scales[j] = this.unitVariance ? iqr / normalizer : iqr;
|
|
2383
|
+
}
|
|
2384
|
+
this.center_ = this.withCentering ? chunk6AE5FKKQ_cjs.tensor(centers, { dtype: "float64" }) : void 0;
|
|
2385
|
+
this.scale_ = this.withScaling ? chunk6AE5FKKQ_cjs.tensor(scales, { dtype: "float64" }) : void 0;
|
|
2386
|
+
this.fitted = true;
|
|
2387
|
+
return this;
|
|
2388
|
+
}
|
|
2389
|
+
transform(X) {
|
|
2390
|
+
if (!this.fitted) {
|
|
2391
|
+
throw new chunkJSCDE774_cjs.NotFittedError("RobustScaler must be fitted before transform");
|
|
2392
|
+
}
|
|
2393
|
+
assert2D(X, "X");
|
|
2394
|
+
assertNumericTensor(X, "X");
|
|
2395
|
+
validateFiniteData(X, "X");
|
|
2396
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2397
|
+
const data = getNumericData2(X, "X");
|
|
2398
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2399
|
+
const center = this.center_;
|
|
2400
|
+
const scale = this.scale_;
|
|
2401
|
+
const centerData = center ? getNumericData2(center, "center_") : void 0;
|
|
2402
|
+
const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
|
|
2403
|
+
const centerStride = center ? getStride1D(center) : 0;
|
|
2404
|
+
const scaleStride = scale ? getStride1D(scale) : 0;
|
|
2405
|
+
if (this.withCentering && !center) {
|
|
2406
|
+
throw new chunkJSCDE774_cjs.DeepboxError("RobustScaler internal error: missing center_");
|
|
2407
|
+
}
|
|
2408
|
+
if (this.withScaling && !scale) {
|
|
2409
|
+
throw new chunkJSCDE774_cjs.DeepboxError("RobustScaler internal error: missing scale_");
|
|
2410
|
+
}
|
|
2411
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2412
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2413
|
+
const rowBase = X.offset + i * stride0;
|
|
2414
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2415
|
+
const raw = data[rowBase + j * stride1];
|
|
2416
|
+
if (raw === void 0) {
|
|
2417
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2418
|
+
}
|
|
2419
|
+
let val = Number(raw);
|
|
2420
|
+
if (this.withCentering && center && centerData) {
|
|
2421
|
+
const rawCenter = centerData[center.offset + j * centerStride];
|
|
2422
|
+
if (rawCenter === void 0) {
|
|
2423
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: center tensor access out of bounds");
|
|
2424
|
+
}
|
|
2425
|
+
val -= Number(rawCenter);
|
|
2426
|
+
}
|
|
2427
|
+
if (this.withScaling && scale && scaleData) {
|
|
2428
|
+
const rawScale = scaleData[scale.offset + j * scaleStride];
|
|
2429
|
+
if (rawScale === void 0) {
|
|
2430
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: scale tensor access out of bounds");
|
|
2431
|
+
}
|
|
2432
|
+
const scaleValue = Number(rawScale);
|
|
2433
|
+
const safeScale = scaleValue === 0 ? 1 : scaleValue;
|
|
2434
|
+
val /= safeScale;
|
|
2435
|
+
}
|
|
2436
|
+
const resultRow = result[i];
|
|
2437
|
+
if (resultRow === void 0) {
|
|
2438
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2439
|
+
}
|
|
2440
|
+
resultRow[j] = val;
|
|
2441
|
+
}
|
|
2442
|
+
}
|
|
2443
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2444
|
+
}
|
|
2445
|
+
fitTransform(X) {
|
|
2446
|
+
return this.fit(X).transform(X);
|
|
2447
|
+
}
|
|
2448
|
+
inverseTransform(X) {
|
|
2449
|
+
if (!this.fitted) {
|
|
2450
|
+
throw new chunkJSCDE774_cjs.NotFittedError("RobustScaler must be fitted before inverse_transform");
|
|
2451
|
+
}
|
|
2452
|
+
assert2D(X, "X");
|
|
2453
|
+
assertNumericTensor(X, "X");
|
|
2454
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2455
|
+
const data = getNumericData2(X, "X");
|
|
2456
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2457
|
+
const center = this.center_;
|
|
2458
|
+
const scale = this.scale_;
|
|
2459
|
+
const centerData = center ? getNumericData2(center, "center_") : void 0;
|
|
2460
|
+
const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
|
|
2461
|
+
const centerStride = center ? getStride1D(center) : 0;
|
|
2462
|
+
const scaleStride = scale ? getStride1D(scale) : 0;
|
|
2463
|
+
if (this.withCentering && !center) {
|
|
2464
|
+
throw new chunkJSCDE774_cjs.DeepboxError("RobustScaler internal error: missing center_");
|
|
2465
|
+
}
|
|
2466
|
+
if (this.withScaling && !scale) {
|
|
2467
|
+
throw new chunkJSCDE774_cjs.DeepboxError("RobustScaler internal error: missing scale_");
|
|
2468
|
+
}
|
|
2469
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2470
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2471
|
+
const rowBase = X.offset + i * stride0;
|
|
2472
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2473
|
+
const raw = data[rowBase + j * stride1];
|
|
2474
|
+
if (raw === void 0) {
|
|
2475
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2476
|
+
}
|
|
2477
|
+
let val = Number(raw);
|
|
2478
|
+
if (this.withScaling && scale && scaleData) {
|
|
2479
|
+
const rawScale = scaleData[scale.offset + j * scaleStride];
|
|
2480
|
+
if (rawScale === void 0) {
|
|
2481
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: scale tensor access out of bounds");
|
|
2482
|
+
}
|
|
2483
|
+
const scaleValue = Number(rawScale);
|
|
2484
|
+
const safeScale = scaleValue === 0 ? 1 : scaleValue;
|
|
2485
|
+
val *= safeScale;
|
|
2486
|
+
}
|
|
2487
|
+
if (this.withCentering && center && centerData) {
|
|
2488
|
+
const rawCenter = centerData[center.offset + j * centerStride];
|
|
2489
|
+
if (rawCenter === void 0) {
|
|
2490
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: center tensor access out of bounds");
|
|
2491
|
+
}
|
|
2492
|
+
val += Number(rawCenter);
|
|
2493
|
+
}
|
|
2494
|
+
const resultRow = result[i];
|
|
2495
|
+
if (resultRow === void 0) {
|
|
2496
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2497
|
+
}
|
|
2498
|
+
resultRow[j] = val;
|
|
2499
|
+
}
|
|
2500
|
+
}
|
|
2501
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2502
|
+
}
|
|
2503
|
+
};
|
|
2504
|
+
var Normalizer = class {
|
|
2505
|
+
norm;
|
|
2506
|
+
/**
|
|
2507
|
+
* Creates a new Normalizer.
|
|
2508
|
+
*
|
|
2509
|
+
* @param options - Configuration options
|
|
2510
|
+
* @param options.norm - Norm to use (default: "l2")
|
|
2511
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
2512
|
+
*/
|
|
2513
|
+
constructor(options = {}) {
|
|
2514
|
+
this.norm = options.norm ?? "l2";
|
|
2515
|
+
if (this.norm !== "l1" && this.norm !== "l2" && this.norm !== "max") {
|
|
2516
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("norm must be one of: l1, l2, max", "norm", this.norm);
|
|
2517
|
+
}
|
|
2518
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
2519
|
+
}
|
|
2520
|
+
fit(_X) {
|
|
2521
|
+
return this;
|
|
2522
|
+
}
|
|
2523
|
+
transform(X) {
|
|
2524
|
+
assert2D(X, "X");
|
|
2525
|
+
assertNumericTensor(X, "X");
|
|
2526
|
+
validateFiniteData(X, "X");
|
|
2527
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2528
|
+
const data = getNumericData2(X, "X");
|
|
2529
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2530
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
2531
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2532
|
+
let norm = 0;
|
|
2533
|
+
const rowBase = X.offset + i * stride0;
|
|
2534
|
+
if (this.norm === "l2") {
|
|
2535
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2536
|
+
const raw = data[rowBase + j * stride1];
|
|
2537
|
+
if (raw === void 0) {
|
|
2538
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2539
|
+
}
|
|
2540
|
+
const val = Number(raw);
|
|
2541
|
+
norm += val * val;
|
|
2542
|
+
}
|
|
2543
|
+
norm = Math.sqrt(norm);
|
|
2544
|
+
} else if (this.norm === "l1") {
|
|
2545
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2546
|
+
const raw = data[rowBase + j * stride1];
|
|
2547
|
+
if (raw === void 0) {
|
|
2548
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2549
|
+
}
|
|
2550
|
+
norm += Math.abs(Number(raw));
|
|
2551
|
+
}
|
|
2552
|
+
} else if (this.norm === "max") {
|
|
2553
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2554
|
+
const raw = data[rowBase + j * stride1];
|
|
2555
|
+
if (raw === void 0) {
|
|
2556
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2557
|
+
}
|
|
2558
|
+
norm = Math.max(norm, Math.abs(Number(raw)));
|
|
2559
|
+
}
|
|
2560
|
+
}
|
|
2561
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2562
|
+
const raw = data[rowBase + j * stride1];
|
|
2563
|
+
if (raw === void 0) {
|
|
2564
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2565
|
+
}
|
|
2566
|
+
const val = Number(raw);
|
|
2567
|
+
const row = result[i];
|
|
2568
|
+
if (row === void 0) {
|
|
2569
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2570
|
+
}
|
|
2571
|
+
row[j] = norm === 0 ? val : val / norm;
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2575
|
+
}
|
|
2576
|
+
fitTransform(X) {
|
|
2577
|
+
return this.transform(X);
|
|
2578
|
+
}
|
|
2579
|
+
};
|
|
2580
|
+
var QuantileTransformer = class {
|
|
2581
|
+
fitted = false;
|
|
2582
|
+
nQuantiles;
|
|
2583
|
+
outputDistribution;
|
|
2584
|
+
quantiles_;
|
|
2585
|
+
subsample;
|
|
2586
|
+
randomState;
|
|
2587
|
+
/**
|
|
2588
|
+
* Creates a new QuantileTransformer.
|
|
2589
|
+
*
|
|
2590
|
+
* @param options - Configuration options
|
|
2591
|
+
* @param options.nQuantiles - Number of quantiles to use (default: 1000)
|
|
2592
|
+
* @param options.outputDistribution - "uniform" or "normal" (default: "uniform")
|
|
2593
|
+
* @param options.subsample - Subsample size for quantile estimation (default: use all samples)
|
|
2594
|
+
* @param options.randomState - Seed for subsampling reproducibility
|
|
2595
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
2596
|
+
*/
|
|
2597
|
+
constructor(options = {}) {
|
|
2598
|
+
this.nQuantiles = options.nQuantiles ?? 1e3;
|
|
2599
|
+
this.outputDistribution = options.outputDistribution ?? "uniform";
|
|
2600
|
+
this.subsample = options.subsample;
|
|
2601
|
+
this.randomState = options.randomState;
|
|
2602
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
2603
|
+
if (!Number.isFinite(this.nQuantiles) || !Number.isInteger(this.nQuantiles) || this.nQuantiles < 2) {
|
|
2604
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2605
|
+
"nQuantiles must be at least 2",
|
|
2606
|
+
"nQuantiles",
|
|
2607
|
+
this.nQuantiles
|
|
2608
|
+
);
|
|
2609
|
+
}
|
|
2610
|
+
if (this.outputDistribution !== "uniform" && this.outputDistribution !== "normal") {
|
|
2611
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2612
|
+
"outputDistribution must be 'uniform' or 'normal'",
|
|
2613
|
+
"outputDistribution",
|
|
2614
|
+
this.outputDistribution
|
|
2615
|
+
);
|
|
2616
|
+
}
|
|
2617
|
+
if (this.subsample !== void 0) {
|
|
2618
|
+
if (!Number.isFinite(this.subsample) || !Number.isInteger(this.subsample) || this.subsample < 2) {
|
|
2619
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2620
|
+
"subsample must be an integer >= 2",
|
|
2621
|
+
"subsample",
|
|
2622
|
+
this.subsample
|
|
2623
|
+
);
|
|
2624
|
+
}
|
|
2625
|
+
}
|
|
2626
|
+
}
|
|
2627
|
+
fit(X) {
|
|
2628
|
+
if (X.size === 0) {
|
|
2629
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X must contain at least one sample", "X");
|
|
2630
|
+
}
|
|
2631
|
+
assert2D(X, "X");
|
|
2632
|
+
assertNumericTensor(X, "X");
|
|
2633
|
+
validateFiniteData(X, "X");
|
|
2634
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2635
|
+
const data = getNumericData2(X, "X");
|
|
2636
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2637
|
+
this.quantiles_ = /* @__PURE__ */ new Map();
|
|
2638
|
+
const sampleCount = this.subsample !== void 0 ? Math.min(this.subsample, nSamples) : nSamples;
|
|
2639
|
+
const nQuantilesEffective = Math.min(this.nQuantiles, sampleCount);
|
|
2640
|
+
const references = nQuantilesEffective <= 1 ? [0.5] : Array.from({ length: nQuantilesEffective }, (_, i) => i / (nQuantilesEffective - 1));
|
|
2641
|
+
let sampleIndices;
|
|
2642
|
+
if (sampleCount < nSamples) {
|
|
2643
|
+
sampleIndices = Array.from({ length: nSamples }, (_, i) => i);
|
|
2644
|
+
const random = this.randomState !== void 0 ? createSeededRandom(this.randomState) : Math.random;
|
|
2645
|
+
shuffleIndicesInPlace(sampleIndices, random);
|
|
2646
|
+
sampleIndices = sampleIndices.slice(0, sampleCount);
|
|
2647
|
+
}
|
|
2648
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2649
|
+
const values = [];
|
|
2650
|
+
if (sampleIndices) {
|
|
2651
|
+
for (const idx of sampleIndices) {
|
|
2652
|
+
const raw = data[X.offset + idx * stride0 + j * stride1];
|
|
2653
|
+
if (raw === void 0) {
|
|
2654
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2655
|
+
}
|
|
2656
|
+
values.push(Number(raw));
|
|
2657
|
+
}
|
|
2658
|
+
} else {
|
|
2659
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2660
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2661
|
+
if (raw === void 0) {
|
|
2662
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2663
|
+
}
|
|
2664
|
+
values.push(Number(raw));
|
|
2665
|
+
}
|
|
2666
|
+
}
|
|
2667
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
2668
|
+
const quantiles = references.map((q) => this.interpolateFromSorted(sorted, q));
|
|
2669
|
+
this.quantiles_.set(j, { quantiles, references });
|
|
2670
|
+
}
|
|
2671
|
+
this.fitted = true;
|
|
2672
|
+
return this;
|
|
2673
|
+
}
|
|
2674
|
+
transform(X) {
|
|
2675
|
+
if (!this.fitted || !this.quantiles_) {
|
|
2676
|
+
throw new chunkJSCDE774_cjs.NotFittedError("QuantileTransformer must be fitted before transform");
|
|
2677
|
+
}
|
|
2678
|
+
assert2D(X, "X");
|
|
2679
|
+
assertNumericTensor(X, "X");
|
|
2680
|
+
validateFiniteData(X, "X");
|
|
2681
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2682
|
+
const data = getNumericData2(X, "X");
|
|
2683
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2684
|
+
if (nSamples === 0) {
|
|
2685
|
+
return chunk6AE5FKKQ_cjs.zeros([0, nFeatures], { dtype: "float64" });
|
|
2686
|
+
}
|
|
2687
|
+
const result = new Array(nSamples);
|
|
2688
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2689
|
+
result[i] = new Array(nFeatures);
|
|
2690
|
+
}
|
|
2691
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2692
|
+
const feature = this.quantiles_.get(j);
|
|
2693
|
+
if (!feature) {
|
|
2694
|
+
throw new chunkJSCDE774_cjs.DeepboxError(`Internal error: missing fitted quantiles for feature ${j}`);
|
|
2695
|
+
}
|
|
2696
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2697
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2698
|
+
if (raw === void 0) {
|
|
2699
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2700
|
+
}
|
|
2701
|
+
const val = Number(raw);
|
|
2702
|
+
const quantile = this.mapValueToQuantile(val, feature.quantiles, feature.references);
|
|
2703
|
+
const row = result[i];
|
|
2704
|
+
if (!row) {
|
|
2705
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2706
|
+
}
|
|
2707
|
+
if (this.outputDistribution === "uniform") {
|
|
2708
|
+
row[j] = quantile;
|
|
2709
|
+
} else {
|
|
2710
|
+
const clampedQuantile = Math.max(1e-7, Math.min(1 - 1e-7, quantile));
|
|
2711
|
+
const z = Math.sqrt(2) * this.erfInv(2 * clampedQuantile - 1);
|
|
2712
|
+
row[j] = z;
|
|
2713
|
+
}
|
|
2714
|
+
}
|
|
2715
|
+
}
|
|
2716
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2717
|
+
}
|
|
2718
|
+
/**
|
|
2719
|
+
* Inverse transform data back to the original feature space.
|
|
2720
|
+
*
|
|
2721
|
+
* If `outputDistribution="normal"`, values are first mapped back to uniform
|
|
2722
|
+
* quantiles before being projected into the original data distribution.
|
|
2723
|
+
*
|
|
2724
|
+
* @param X - Transformed data (2D tensor)
|
|
2725
|
+
* @returns Data in the original feature space
|
|
2726
|
+
* @throws {NotFittedError} If transformer is not fitted
|
|
2727
|
+
*/
|
|
2728
|
+
inverseTransform(X) {
|
|
2729
|
+
if (!this.fitted || !this.quantiles_) {
|
|
2730
|
+
throw new chunkJSCDE774_cjs.NotFittedError("QuantileTransformer must be fitted before inverse_transform");
|
|
2731
|
+
}
|
|
2732
|
+
assert2D(X, "X");
|
|
2733
|
+
assertNumericTensor(X, "X");
|
|
2734
|
+
validateFiniteData(X, "X");
|
|
2735
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2736
|
+
const data = getNumericData2(X, "X");
|
|
2737
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2738
|
+
if (nSamples === 0) {
|
|
2739
|
+
return chunk6AE5FKKQ_cjs.zeros([0, nFeatures], { dtype: "float64" });
|
|
2740
|
+
}
|
|
2741
|
+
const result = new Array(nSamples);
|
|
2742
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2743
|
+
result[i] = new Array(nFeatures);
|
|
2744
|
+
}
|
|
2745
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2746
|
+
const feature = this.quantiles_.get(j);
|
|
2747
|
+
if (!feature) {
|
|
2748
|
+
throw new chunkJSCDE774_cjs.DeepboxError(`Internal error: missing fitted quantiles for feature ${j}`);
|
|
2749
|
+
}
|
|
2750
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2751
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2752
|
+
if (raw === void 0) {
|
|
2753
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2754
|
+
}
|
|
2755
|
+
const value = Number(raw);
|
|
2756
|
+
let quantile = this.outputDistribution === "normal" ? this.normalCdf(value) : value;
|
|
2757
|
+
quantile = Math.max(0, Math.min(1, quantile));
|
|
2758
|
+
const row = result[i];
|
|
2759
|
+
if (!row) {
|
|
2760
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
2761
|
+
}
|
|
2762
|
+
row[j] = this.mapQuantileToValue(quantile, feature.quantiles, feature.references);
|
|
2763
|
+
}
|
|
2764
|
+
}
|
|
2765
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
2766
|
+
}
|
|
2767
|
+
erf(x) {
|
|
2768
|
+
const sign = x < 0 ? -1 : 1;
|
|
2769
|
+
const absX = Math.abs(x);
|
|
2770
|
+
const t = 1 / (1 + 0.3275911 * absX);
|
|
2771
|
+
const a1 = 0.254829592;
|
|
2772
|
+
const a2 = -0.284496736;
|
|
2773
|
+
const a3 = 1.421413741;
|
|
2774
|
+
const a4 = -1.453152027;
|
|
2775
|
+
const a5 = 1.061405429;
|
|
2776
|
+
const poly = ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t;
|
|
2777
|
+
return sign * (1 - poly * Math.exp(-absX * absX));
|
|
2778
|
+
}
|
|
2779
|
+
normalCdf(z) {
|
|
2780
|
+
return 0.5 * (1 + this.erf(z / Math.sqrt(2)));
|
|
2781
|
+
}
|
|
2782
|
+
erfInv(x) {
|
|
2783
|
+
const a = 0.147;
|
|
2784
|
+
const b = 2 / (Math.PI * a) + Math.log(1 - x * x) / 2;
|
|
2785
|
+
const sign = x < 0 ? -1 : 1;
|
|
2786
|
+
return sign * Math.sqrt(Math.sqrt(b * b - Math.log(1 - x * x) / a) - b);
|
|
2787
|
+
}
|
|
2788
|
+
interpolateFromSorted(sorted, q) {
|
|
2789
|
+
if (sorted.length === 0) {
|
|
2790
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: cannot interpolate empty sorted values");
|
|
2791
|
+
}
|
|
2792
|
+
if (sorted.length === 1) {
|
|
2793
|
+
const only = sorted[0];
|
|
2794
|
+
if (only === void 0) {
|
|
2795
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing sorted value");
|
|
2796
|
+
}
|
|
2797
|
+
return only;
|
|
2798
|
+
}
|
|
2799
|
+
const position = q * (sorted.length - 1);
|
|
2800
|
+
const lower = Math.floor(position);
|
|
2801
|
+
const upper = Math.ceil(position);
|
|
2802
|
+
const lowerValue = sorted[lower];
|
|
2803
|
+
const upperValue = sorted[upper];
|
|
2804
|
+
if (lowerValue === void 0 || upperValue === void 0) {
|
|
2805
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: quantile interpolation index out of bounds");
|
|
2806
|
+
}
|
|
2807
|
+
if (upper === lower) {
|
|
2808
|
+
return lowerValue;
|
|
2809
|
+
}
|
|
2810
|
+
const weight = position - lower;
|
|
2811
|
+
return lowerValue * (1 - weight) + upperValue * weight;
|
|
2812
|
+
}
|
|
2813
|
+
mapValueToQuantile(value, quantiles, references) {
|
|
2814
|
+
const n = quantiles.length;
|
|
2815
|
+
if (n === 0) {
|
|
2816
|
+
return 0;
|
|
2817
|
+
}
|
|
2818
|
+
if (n === 1) {
|
|
2819
|
+
const onlyReference = references[0];
|
|
2820
|
+
if (onlyReference === void 0) {
|
|
2821
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile reference");
|
|
2822
|
+
}
|
|
2823
|
+
return onlyReference;
|
|
2824
|
+
}
|
|
2825
|
+
const firstQuantile = quantiles[0];
|
|
2826
|
+
const lastQuantile = quantiles[n - 1];
|
|
2827
|
+
if (firstQuantile === void 0 || lastQuantile === void 0) {
|
|
2828
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile endpoints");
|
|
2829
|
+
}
|
|
2830
|
+
if (value <= firstQuantile) {
|
|
2831
|
+
return 0;
|
|
2832
|
+
}
|
|
2833
|
+
if (value >= lastQuantile) {
|
|
2834
|
+
return 1;
|
|
2835
|
+
}
|
|
2836
|
+
let left = 0;
|
|
2837
|
+
let right = n - 1;
|
|
2838
|
+
while (left + 1 < right) {
|
|
2839
|
+
const mid = Math.floor((left + right) / 2);
|
|
2840
|
+
const midValue = quantiles[mid];
|
|
2841
|
+
if (midValue === void 0) {
|
|
2842
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile midpoint");
|
|
2843
|
+
}
|
|
2844
|
+
if (midValue <= value) {
|
|
2845
|
+
left = mid;
|
|
2846
|
+
} else {
|
|
2847
|
+
right = mid;
|
|
2848
|
+
}
|
|
2849
|
+
}
|
|
2850
|
+
const qLeft = quantiles[left];
|
|
2851
|
+
const qRight = quantiles[right];
|
|
2852
|
+
const rLeft = references[left];
|
|
2853
|
+
const rRight = references[right];
|
|
2854
|
+
if (qLeft === void 0 || qRight === void 0 || rLeft === void 0 || rRight === void 0) {
|
|
2855
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile interpolation points");
|
|
2856
|
+
}
|
|
2857
|
+
if (qRight <= qLeft) {
|
|
2858
|
+
return (rLeft + rRight) / 2;
|
|
2859
|
+
}
|
|
2860
|
+
const ratio = (value - qLeft) / (qRight - qLeft);
|
|
2861
|
+
return rLeft + ratio * (rRight - rLeft);
|
|
2862
|
+
}
|
|
2863
|
+
mapQuantileToValue(quantile, quantiles, references) {
|
|
2864
|
+
const n = references.length;
|
|
2865
|
+
if (n === 0) {
|
|
2866
|
+
return 0;
|
|
2867
|
+
}
|
|
2868
|
+
if (n === 1) {
|
|
2869
|
+
const onlyQuantile = quantiles[0];
|
|
2870
|
+
if (onlyQuantile === void 0) {
|
|
2871
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile value");
|
|
2872
|
+
}
|
|
2873
|
+
return onlyQuantile;
|
|
2874
|
+
}
|
|
2875
|
+
const firstRef = references[0];
|
|
2876
|
+
const lastRef = references[n - 1];
|
|
2877
|
+
if (firstRef === void 0 || lastRef === void 0) {
|
|
2878
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing reference endpoints");
|
|
2879
|
+
}
|
|
2880
|
+
if (quantile <= firstRef) {
|
|
2881
|
+
const firstQuantile = quantiles[0];
|
|
2882
|
+
if (firstQuantile === void 0) {
|
|
2883
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile endpoints");
|
|
2884
|
+
}
|
|
2885
|
+
return firstQuantile;
|
|
2886
|
+
}
|
|
2887
|
+
if (quantile >= lastRef) {
|
|
2888
|
+
const lastQuantile = quantiles[n - 1];
|
|
2889
|
+
if (lastQuantile === void 0) {
|
|
2890
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile endpoints");
|
|
2891
|
+
}
|
|
2892
|
+
return lastQuantile;
|
|
2893
|
+
}
|
|
2894
|
+
let left = 0;
|
|
2895
|
+
let right = n - 1;
|
|
2896
|
+
while (left + 1 < right) {
|
|
2897
|
+
const mid = Math.floor((left + right) / 2);
|
|
2898
|
+
const midRef = references[mid];
|
|
2899
|
+
if (midRef === void 0) {
|
|
2900
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile reference");
|
|
2901
|
+
}
|
|
2902
|
+
if (midRef <= quantile) {
|
|
2903
|
+
left = mid;
|
|
2904
|
+
} else {
|
|
2905
|
+
right = mid;
|
|
2906
|
+
}
|
|
2907
|
+
}
|
|
2908
|
+
const rLeft = references[left];
|
|
2909
|
+
const rRight = references[right];
|
|
2910
|
+
const qLeft = quantiles[left];
|
|
2911
|
+
const qRight = quantiles[right];
|
|
2912
|
+
if (rLeft === void 0 || rRight === void 0 || qLeft === void 0 || qRight === void 0) {
|
|
2913
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing quantile interpolation points");
|
|
2914
|
+
}
|
|
2915
|
+
if (rRight <= rLeft) {
|
|
2916
|
+
return (qLeft + qRight) / 2;
|
|
2917
|
+
}
|
|
2918
|
+
const ratio = (quantile - rLeft) / (rRight - rLeft);
|
|
2919
|
+
return qLeft + ratio * (qRight - qLeft);
|
|
2920
|
+
}
|
|
2921
|
+
fitTransform(X) {
|
|
2922
|
+
return this.fit(X).transform(X);
|
|
2923
|
+
}
|
|
2924
|
+
};
|
|
2925
|
+
var PowerTransformer = class {
|
|
2926
|
+
fitted = false;
|
|
2927
|
+
method;
|
|
2928
|
+
lambdas_;
|
|
2929
|
+
standardize;
|
|
2930
|
+
mean_;
|
|
2931
|
+
scale_;
|
|
2932
|
+
/**
|
|
2933
|
+
* Creates a new PowerTransformer.
|
|
2934
|
+
*
|
|
2935
|
+
* @param options - Configuration options
|
|
2936
|
+
* @param options.method - "box-cox" or "yeo-johnson" (default: "yeo-johnson")
|
|
2937
|
+
* @param options.standardize - Whether to standardize transformed features (default: false)
|
|
2938
|
+
* @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
|
|
2939
|
+
*/
|
|
2940
|
+
constructor(options = {}) {
|
|
2941
|
+
this.method = options.method ?? "yeo-johnson";
|
|
2942
|
+
if (this.method !== "box-cox" && this.method !== "yeo-johnson") {
|
|
2943
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2944
|
+
"method must be 'box-cox' or 'yeo-johnson'",
|
|
2945
|
+
"method",
|
|
2946
|
+
this.method
|
|
2947
|
+
);
|
|
2948
|
+
}
|
|
2949
|
+
this.standardize = parseBooleanOption(options.standardize, "standardize", false);
|
|
2950
|
+
parseBooleanOption(options.copy, "copy", true);
|
|
2951
|
+
}
|
|
2952
|
+
fit(X) {
|
|
2953
|
+
if (X.size === 0) {
|
|
2954
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X must contain at least one sample", "X");
|
|
2955
|
+
}
|
|
2956
|
+
assert2D(X, "X");
|
|
2957
|
+
assertNumericTensor(X, "X");
|
|
2958
|
+
validateFiniteData(X, "X");
|
|
2959
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
2960
|
+
const data = getNumericData2(X, "X");
|
|
2961
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
2962
|
+
const lambdas = new Array(nFeatures);
|
|
2963
|
+
const means = this.standardize ? new Array(nFeatures).fill(0) : void 0;
|
|
2964
|
+
const scales = this.standardize ? new Array(nFeatures).fill(0) : void 0;
|
|
2965
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
2966
|
+
const featureValues = new Array(nSamples);
|
|
2967
|
+
for (let i = 0; i < nSamples; i++) {
|
|
2968
|
+
const raw = data[X.offset + i * stride0 + j * stride1];
|
|
2969
|
+
if (raw === void 0) {
|
|
2970
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
2971
|
+
}
|
|
2972
|
+
const value = Number(raw);
|
|
2973
|
+
if (this.method === "box-cox" && value <= 0) {
|
|
2974
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
2975
|
+
`Box-Cox requires strictly positive values in fit data (feature ${j})`,
|
|
2976
|
+
"X",
|
|
2977
|
+
value
|
|
2978
|
+
);
|
|
2979
|
+
}
|
|
2980
|
+
featureValues[i] = value;
|
|
2981
|
+
}
|
|
2982
|
+
const lambda = this.optimizeLambda(featureValues);
|
|
2983
|
+
lambdas[j] = lambda;
|
|
2984
|
+
if (this.standardize && means && scales) {
|
|
2985
|
+
let sum = 0;
|
|
2986
|
+
for (const value of featureValues) {
|
|
2987
|
+
const transformed = this.method === "box-cox" ? this.boxCoxTransformValue(value, lambda) : this.yeoJohnsonTransformValue(value, lambda);
|
|
2988
|
+
sum += transformed;
|
|
2989
|
+
}
|
|
2990
|
+
const mean = sum / nSamples;
|
|
2991
|
+
means[j] = mean;
|
|
2992
|
+
let sumSqDiff = 0;
|
|
2993
|
+
for (const value of featureValues) {
|
|
2994
|
+
const transformed = this.method === "box-cox" ? this.boxCoxTransformValue(value, lambda) : this.yeoJohnsonTransformValue(value, lambda);
|
|
2995
|
+
const diff = transformed - mean;
|
|
2996
|
+
sumSqDiff += diff * diff;
|
|
2997
|
+
}
|
|
2998
|
+
const variance = sumSqDiff / nSamples;
|
|
2999
|
+
const std = Math.sqrt(Math.max(variance, 0));
|
|
3000
|
+
scales[j] = std === 0 ? 1 : std;
|
|
3001
|
+
}
|
|
3002
|
+
}
|
|
3003
|
+
this.lambdas_ = lambdas;
|
|
3004
|
+
this.mean_ = this.standardize ? means : void 0;
|
|
3005
|
+
this.scale_ = this.standardize ? scales : void 0;
|
|
3006
|
+
this.fitted = true;
|
|
3007
|
+
return this;
|
|
3008
|
+
}
|
|
3009
|
+
transform(X) {
|
|
3010
|
+
if (!this.fitted || !this.lambdas_) {
|
|
3011
|
+
throw new chunkJSCDE774_cjs.NotFittedError("PowerTransformer must be fitted before transform");
|
|
3012
|
+
}
|
|
3013
|
+
assert2D(X, "X");
|
|
3014
|
+
assertNumericTensor(X, "X");
|
|
3015
|
+
validateFiniteData(X, "X");
|
|
3016
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
3017
|
+
const data = getNumericData2(X, "X");
|
|
3018
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
3019
|
+
if (this.standardize && (!this.mean_ || !this.scale_)) {
|
|
3020
|
+
throw new chunkJSCDE774_cjs.DeepboxError("PowerTransformer internal error: missing standardization stats");
|
|
3021
|
+
}
|
|
3022
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
3023
|
+
for (let i = 0; i < nSamples; i++) {
|
|
3024
|
+
const rowBase = X.offset + i * stride0;
|
|
3025
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
3026
|
+
const raw = data[rowBase + j * stride1];
|
|
3027
|
+
if (raw === void 0) {
|
|
3028
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
3029
|
+
}
|
|
3030
|
+
const val = Number(raw);
|
|
3031
|
+
const lambda = this.lambdas_[j];
|
|
3032
|
+
if (lambda === void 0) {
|
|
3033
|
+
throw new chunkJSCDE774_cjs.DeepboxError(`Internal error: missing fitted lambda for feature ${j}`);
|
|
3034
|
+
}
|
|
3035
|
+
let transformed;
|
|
3036
|
+
if (this.method === "box-cox") {
|
|
3037
|
+
if (val <= 0) {
|
|
3038
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Box-Cox requires strictly positive values", "X", val);
|
|
3039
|
+
}
|
|
3040
|
+
transformed = this.boxCoxTransformValue(val, lambda);
|
|
3041
|
+
} else {
|
|
3042
|
+
transformed = this.yeoJohnsonTransformValue(val, lambda);
|
|
3043
|
+
}
|
|
3044
|
+
if (this.standardize && this.mean_ && this.scale_) {
|
|
3045
|
+
const mean = this.mean_[j] ?? 0;
|
|
3046
|
+
const scale = this.scale_[j] ?? 1;
|
|
3047
|
+
transformed = (transformed - mean) / scale;
|
|
3048
|
+
}
|
|
3049
|
+
const row = result[i];
|
|
3050
|
+
if (row === void 0) {
|
|
3051
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
3052
|
+
}
|
|
3053
|
+
row[j] = transformed;
|
|
3054
|
+
}
|
|
3055
|
+
}
|
|
3056
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
3057
|
+
}
|
|
3058
|
+
/**
|
|
3059
|
+
* Inverse transform data back to the original feature space.
|
|
3060
|
+
* If `standardize=true`, de-standardizes before applying the inverse power transform.
|
|
3061
|
+
*
|
|
3062
|
+
* @param X - Transformed data (2D tensor)
|
|
3063
|
+
* @returns Data in the original feature space
|
|
3064
|
+
* @throws {NotFittedError} If transformer is not fitted
|
|
3065
|
+
*/
|
|
3066
|
+
inverseTransform(X) {
|
|
3067
|
+
if (!this.fitted || !this.lambdas_) {
|
|
3068
|
+
throw new chunkJSCDE774_cjs.NotFittedError("PowerTransformer must be fitted before inverse_transform");
|
|
3069
|
+
}
|
|
3070
|
+
assert2D(X, "X");
|
|
3071
|
+
assertNumericTensor(X, "X");
|
|
3072
|
+
validateFiniteData(X, "X");
|
|
3073
|
+
const [nSamples, nFeatures] = getShape2D(X);
|
|
3074
|
+
const data = getNumericData2(X, "X");
|
|
3075
|
+
const [stride0, stride1] = getStrides2D(X);
|
|
3076
|
+
if (this.standardize && (!this.mean_ || !this.scale_)) {
|
|
3077
|
+
throw new chunkJSCDE774_cjs.DeepboxError("PowerTransformer internal error: missing standardization stats");
|
|
3078
|
+
}
|
|
3079
|
+
const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
|
|
3080
|
+
for (let i = 0; i < nSamples; i++) {
|
|
3081
|
+
const rowBase = X.offset + i * stride0;
|
|
3082
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
3083
|
+
const raw = data[rowBase + j * stride1];
|
|
3084
|
+
if (raw === void 0) {
|
|
3085
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: numeric tensor access out of bounds");
|
|
3086
|
+
}
|
|
3087
|
+
let val = Number(raw);
|
|
3088
|
+
if (this.standardize && this.mean_ && this.scale_) {
|
|
3089
|
+
const mean = this.mean_[j] ?? 0;
|
|
3090
|
+
const scale = this.scale_[j] ?? 1;
|
|
3091
|
+
val = val * scale + mean;
|
|
3092
|
+
}
|
|
3093
|
+
const lambda = this.lambdas_[j];
|
|
3094
|
+
if (lambda === void 0) {
|
|
3095
|
+
throw new chunkJSCDE774_cjs.DeepboxError(`Internal error: missing fitted lambda for feature ${j}`);
|
|
3096
|
+
}
|
|
3097
|
+
let inverted;
|
|
3098
|
+
if (this.method === "box-cox") {
|
|
3099
|
+
inverted = this.boxCoxInverseValue(val, lambda);
|
|
3100
|
+
} else {
|
|
3101
|
+
inverted = this.yeoJohnsonInverseValue(val, lambda);
|
|
3102
|
+
}
|
|
3103
|
+
const row = result[i];
|
|
3104
|
+
if (row === void 0) {
|
|
3105
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: result row access failed");
|
|
3106
|
+
}
|
|
3107
|
+
row[j] = inverted;
|
|
3108
|
+
}
|
|
3109
|
+
}
|
|
3110
|
+
return chunk6AE5FKKQ_cjs.tensor(result, { dtype: "float64", device: X.device });
|
|
3111
|
+
}
|
|
3112
|
+
boxCoxTransformValue(value, lambda) {
|
|
3113
|
+
return Math.abs(lambda) < 1e-12 ? Math.log(value) : (value ** lambda - 1) / lambda;
|
|
3114
|
+
}
|
|
3115
|
+
yeoJohnsonTransformValue(value, lambda) {
|
|
3116
|
+
if (value >= 0) {
|
|
3117
|
+
return Math.abs(lambda) < 1e-12 ? Math.log(value + 1) : ((value + 1) ** lambda - 1) / lambda;
|
|
3118
|
+
}
|
|
3119
|
+
const twoMinusLambda = 2 - lambda;
|
|
3120
|
+
return Math.abs(twoMinusLambda) < 1e-12 ? -Math.log(1 - value) : -((1 - value) ** twoMinusLambda - 1) / twoMinusLambda;
|
|
3121
|
+
}
|
|
3122
|
+
boxCoxInverseValue(value, lambda) {
|
|
3123
|
+
if (Math.abs(lambda) < 1e-12) {
|
|
3124
|
+
return Math.exp(value);
|
|
3125
|
+
}
|
|
3126
|
+
const base = value * lambda + 1;
|
|
3127
|
+
if (base <= 0) {
|
|
3128
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Box-Cox inverse encountered invalid value", "X", value);
|
|
3129
|
+
}
|
|
3130
|
+
return base ** (1 / lambda);
|
|
3131
|
+
}
|
|
3132
|
+
yeoJohnsonInverseValue(value, lambda) {
|
|
3133
|
+
if (value >= 0) {
|
|
3134
|
+
if (Math.abs(lambda) < 1e-12) {
|
|
3135
|
+
return Math.exp(value) - 1;
|
|
3136
|
+
}
|
|
3137
|
+
const base2 = value * lambda + 1;
|
|
3138
|
+
if (base2 <= 0) {
|
|
3139
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3140
|
+
"Yeo-Johnson inverse encountered invalid value",
|
|
3141
|
+
"X",
|
|
3142
|
+
value
|
|
3143
|
+
);
|
|
3144
|
+
}
|
|
3145
|
+
return base2 ** (1 / lambda) - 1;
|
|
3146
|
+
}
|
|
3147
|
+
const twoMinusLambda = 2 - lambda;
|
|
3148
|
+
if (Math.abs(twoMinusLambda) < 1e-12) {
|
|
3149
|
+
return 1 - Math.exp(-value);
|
|
3150
|
+
}
|
|
3151
|
+
const base = 1 - value * twoMinusLambda;
|
|
3152
|
+
if (base <= 0) {
|
|
3153
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Yeo-Johnson inverse encountered invalid value", "X", value);
|
|
3154
|
+
}
|
|
3155
|
+
return 1 - base ** (1 / twoMinusLambda);
|
|
3156
|
+
}
|
|
3157
|
+
logLikelihood(values, lambda) {
|
|
3158
|
+
const transformed = new Array(values.length);
|
|
3159
|
+
let jacobian = 0;
|
|
3160
|
+
for (let i = 0; i < values.length; i++) {
|
|
3161
|
+
const value = values[i];
|
|
3162
|
+
if (value === void 0) {
|
|
3163
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: missing feature value during optimization");
|
|
3164
|
+
}
|
|
3165
|
+
let transformedValue;
|
|
3166
|
+
if (this.method === "box-cox") {
|
|
3167
|
+
if (value <= 0) {
|
|
3168
|
+
return Number.NEGATIVE_INFINITY;
|
|
3169
|
+
}
|
|
3170
|
+
transformedValue = this.boxCoxTransformValue(value, lambda);
|
|
3171
|
+
jacobian += (lambda - 1) * Math.log(value);
|
|
3172
|
+
} else {
|
|
3173
|
+
transformedValue = this.yeoJohnsonTransformValue(value, lambda);
|
|
3174
|
+
jacobian += value >= 0 ? (lambda - 1) * Math.log(value + 1) : (1 - lambda) * Math.log(1 - value);
|
|
3175
|
+
}
|
|
3176
|
+
if (!Number.isFinite(transformedValue)) {
|
|
3177
|
+
return Number.NEGATIVE_INFINITY;
|
|
3178
|
+
}
|
|
3179
|
+
transformed[i] = transformedValue;
|
|
3180
|
+
}
|
|
3181
|
+
let sum = 0;
|
|
3182
|
+
for (const value of transformed) {
|
|
3183
|
+
sum += value;
|
|
3184
|
+
}
|
|
3185
|
+
const mean = sum / transformed.length;
|
|
3186
|
+
let varianceSum = 0;
|
|
3187
|
+
for (const value of transformed) {
|
|
3188
|
+
const delta = value - mean;
|
|
3189
|
+
varianceSum += delta * delta;
|
|
3190
|
+
}
|
|
3191
|
+
const variance = varianceSum / transformed.length;
|
|
3192
|
+
if (!Number.isFinite(variance) || variance <= 1e-15) {
|
|
3193
|
+
return Number.NEGATIVE_INFINITY;
|
|
3194
|
+
}
|
|
3195
|
+
return -0.5 * transformed.length * Math.log(variance) + jacobian;
|
|
3196
|
+
}
|
|
3197
|
+
optimizeLambda(values) {
|
|
3198
|
+
if (values.length < 2) {
|
|
3199
|
+
return 1;
|
|
3200
|
+
}
|
|
3201
|
+
let minValue = Number.POSITIVE_INFINITY;
|
|
3202
|
+
let maxValue = Number.NEGATIVE_INFINITY;
|
|
3203
|
+
for (const value of values) {
|
|
3204
|
+
if (value < minValue) minValue = value;
|
|
3205
|
+
if (value > maxValue) maxValue = value;
|
|
3206
|
+
}
|
|
3207
|
+
if (!Number.isFinite(minValue) || !Number.isFinite(maxValue) || maxValue - minValue <= 1e-15) {
|
|
3208
|
+
return 1;
|
|
3209
|
+
}
|
|
3210
|
+
let left = -5;
|
|
3211
|
+
let right = 5;
|
|
3212
|
+
const phi = (Math.sqrt(5) - 1) / 2;
|
|
3213
|
+
let c = right - phi * (right - left);
|
|
3214
|
+
let d = left + phi * (right - left);
|
|
3215
|
+
let fc = this.logLikelihood(values, c);
|
|
3216
|
+
let fd = this.logLikelihood(values, d);
|
|
3217
|
+
for (let iter = 0; iter < 80; iter++) {
|
|
3218
|
+
if (Math.abs(right - left) < 1e-6) break;
|
|
3219
|
+
if (fc > fd) {
|
|
3220
|
+
right = d;
|
|
3221
|
+
d = c;
|
|
3222
|
+
fd = fc;
|
|
3223
|
+
c = right - phi * (right - left);
|
|
3224
|
+
fc = this.logLikelihood(values, c);
|
|
3225
|
+
} else {
|
|
3226
|
+
left = c;
|
|
3227
|
+
c = d;
|
|
3228
|
+
fc = fd;
|
|
3229
|
+
d = left + phi * (right - left);
|
|
3230
|
+
fd = this.logLikelihood(values, d);
|
|
3231
|
+
}
|
|
3232
|
+
}
|
|
3233
|
+
const candidates = [left, right, (left + right) / 2, 0, 1, 2, -2];
|
|
3234
|
+
let bestLambda = 1;
|
|
3235
|
+
let bestScore = Number.NEGATIVE_INFINITY;
|
|
3236
|
+
for (const lambda of candidates) {
|
|
3237
|
+
const score = this.logLikelihood(values, lambda);
|
|
3238
|
+
if (score > bestScore) {
|
|
3239
|
+
bestScore = score;
|
|
3240
|
+
bestLambda = lambda;
|
|
3241
|
+
}
|
|
3242
|
+
}
|
|
3243
|
+
return Number.isFinite(bestLambda) ? bestLambda : 1;
|
|
3244
|
+
}
|
|
3245
|
+
fitTransform(X) {
|
|
3246
|
+
return this.fit(X).transform(X);
|
|
3247
|
+
}
|
|
3248
|
+
};
|
|
3249
|
+
|
|
3250
|
+
// src/preprocess/split.ts
|
|
3251
|
+
function validateNSplits(nSplits) {
|
|
3252
|
+
if (!Number.isFinite(nSplits) || !Number.isInteger(nSplits) || nSplits < 2) {
|
|
3253
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("nSplits must be an integer at least 2", "nSplits", nSplits);
|
|
3254
|
+
}
|
|
3255
|
+
}
|
|
3256
|
+
function parseSplitSpec(value, name) {
|
|
3257
|
+
if (value === void 0) {
|
|
3258
|
+
return void 0;
|
|
3259
|
+
}
|
|
3260
|
+
if (!Number.isFinite(value) || value <= 0) {
|
|
3261
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(`${name} must be a positive number`, name, value);
|
|
3262
|
+
}
|
|
3263
|
+
if (value < 1) {
|
|
3264
|
+
return { kind: "fraction", value };
|
|
3265
|
+
}
|
|
3266
|
+
if (!Number.isInteger(value)) {
|
|
3267
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3268
|
+
`${name} must be an integer when provided as an absolute size`,
|
|
3269
|
+
name,
|
|
3270
|
+
value
|
|
3271
|
+
);
|
|
3272
|
+
}
|
|
3273
|
+
return { kind: "count", value };
|
|
3274
|
+
}
|
|
3275
|
+
function resolveSplitCount(spec, nSamples, isTrain) {
|
|
3276
|
+
if (spec.kind === "count") {
|
|
3277
|
+
return spec.value;
|
|
3278
|
+
}
|
|
3279
|
+
const exact = nSamples * spec.value;
|
|
3280
|
+
return isTrain ? Math.floor(exact) : Math.ceil(exact);
|
|
3281
|
+
}
|
|
3282
|
+
function resolveTrainTestCounts(nSamples, trainSize, testSize) {
|
|
3283
|
+
const defaultTestSize = trainSize === void 0 && testSize === void 0 ? 0.25 : testSize;
|
|
3284
|
+
const trainSpec = parseSplitSpec(trainSize, "trainSize");
|
|
3285
|
+
const testSpec = parseSplitSpec(defaultTestSize, "testSize");
|
|
3286
|
+
if (trainSpec?.kind === "count" && trainSpec.value > nSamples) {
|
|
3287
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3288
|
+
"trainSize must not exceed number of samples",
|
|
3289
|
+
"trainSize",
|
|
3290
|
+
trainSpec.value
|
|
3291
|
+
);
|
|
3292
|
+
}
|
|
3293
|
+
if (testSpec?.kind === "count" && testSpec.value > nSamples) {
|
|
3294
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3295
|
+
"testSize must not exceed number of samples",
|
|
3296
|
+
"testSize",
|
|
3297
|
+
testSpec.value
|
|
3298
|
+
);
|
|
3299
|
+
}
|
|
3300
|
+
if (trainSpec?.kind === "fraction" && testSpec?.kind === "fraction" && trainSpec.value + testSpec.value > 1) {
|
|
3301
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3302
|
+
"trainSize and testSize fractions must sum to at most 1",
|
|
3303
|
+
"trainSize",
|
|
3304
|
+
trainSpec.value
|
|
3305
|
+
);
|
|
3306
|
+
}
|
|
3307
|
+
let nTrain = trainSpec === void 0 ? void 0 : resolveSplitCount(trainSpec, nSamples, true);
|
|
3308
|
+
let nTest = testSpec === void 0 ? void 0 : resolveSplitCount(testSpec, nSamples, false);
|
|
3309
|
+
if (nTrain === void 0 && nTest === void 0) {
|
|
3310
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: failed to resolve split sizes");
|
|
3311
|
+
}
|
|
3312
|
+
if (nTrain === void 0) {
|
|
3313
|
+
nTrain = nSamples - (nTest ?? 0);
|
|
3314
|
+
}
|
|
3315
|
+
if (nTest === void 0) {
|
|
3316
|
+
nTest = nSamples - nTrain;
|
|
3317
|
+
}
|
|
3318
|
+
if (nTrain + nTest > nSamples) {
|
|
3319
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3320
|
+
"trainSize and testSize exceed number of samples",
|
|
3321
|
+
"trainSize",
|
|
3322
|
+
trainSize
|
|
3323
|
+
);
|
|
3324
|
+
}
|
|
3325
|
+
if (nTrain < 1) {
|
|
3326
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("trainSize must be at least 1 sample", "trainSize", trainSize);
|
|
3327
|
+
}
|
|
3328
|
+
if (nTest < 1) {
|
|
3329
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("testSize must be at least 1 sample", "testSize", testSize);
|
|
3330
|
+
}
|
|
3331
|
+
return [nTrain, nTest];
|
|
3332
|
+
}
|
|
3333
|
+
function compareLabels(a, b) {
|
|
3334
|
+
if (typeof a === "number" && typeof b === "number") return a - b;
|
|
3335
|
+
if (typeof a === "bigint" && typeof b === "bigint") {
|
|
3336
|
+
if (a < b) return -1;
|
|
3337
|
+
if (a > b) return 1;
|
|
3338
|
+
return 0;
|
|
3339
|
+
}
|
|
3340
|
+
return String(a).localeCompare(String(b));
|
|
3341
|
+
}
|
|
3342
|
+
function makeFoldSizes(total, nSplits) {
|
|
3343
|
+
const base = Math.floor(total / nSplits);
|
|
3344
|
+
const remainder = total % nSplits;
|
|
3345
|
+
return Array.from({ length: nSplits }, (_, i) => base + (i < remainder ? 1 : 0));
|
|
3346
|
+
}
|
|
3347
|
+
function readTensorValue(t, indices) {
|
|
3348
|
+
const value = t.at(...indices);
|
|
3349
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "bigint") {
|
|
3350
|
+
return value;
|
|
3351
|
+
}
|
|
3352
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: unsupported tensor value type");
|
|
3353
|
+
}
|
|
3354
|
+
function writeTensorValue(t, flatIndex, value) {
|
|
3355
|
+
if (t.dtype === "string") {
|
|
3356
|
+
if (typeof value !== "string") {
|
|
3357
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: expected string value for string tensor");
|
|
3358
|
+
}
|
|
3359
|
+
t.data[flatIndex] = value;
|
|
3360
|
+
return;
|
|
3361
|
+
}
|
|
3362
|
+
if (typeof value === "string") {
|
|
3363
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: encountered string value in numeric tensor");
|
|
3364
|
+
}
|
|
3365
|
+
if (t.data instanceof BigInt64Array) {
|
|
3366
|
+
t.data[flatIndex] = typeof value === "bigint" ? value : BigInt(value);
|
|
3367
|
+
return;
|
|
3368
|
+
}
|
|
3369
|
+
t.data[flatIndex] = Number(value);
|
|
3370
|
+
}
|
|
3371
|
+
function takeRows2D(X, sampleIndices) {
|
|
3372
|
+
const [, nFeatures] = getShape2D(X);
|
|
3373
|
+
const out = chunk6AE5FKKQ_cjs.zeros([sampleIndices.length, nFeatures], { dtype: X.dtype });
|
|
3374
|
+
for (let i = 0; i < sampleIndices.length; i++) {
|
|
3375
|
+
const sampleIndex = sampleIndices[i];
|
|
3376
|
+
if (sampleIndex === void 0) {
|
|
3377
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: sample index access failed");
|
|
3378
|
+
}
|
|
3379
|
+
for (let j = 0; j < nFeatures; j++) {
|
|
3380
|
+
const value = readTensorValue(X, [sampleIndex, j]);
|
|
3381
|
+
writeTensorValue(out, out.offset + i * nFeatures + j, value);
|
|
3382
|
+
}
|
|
3383
|
+
}
|
|
3384
|
+
return out;
|
|
3385
|
+
}
|
|
3386
|
+
function takeVector(y, sampleIndices) {
|
|
3387
|
+
if (y.ndim !== 1) {
|
|
3388
|
+
throw new chunkJSCDE774_cjs.ShapeError(`y must be a 1D tensor, got ${y.ndim}D`);
|
|
3389
|
+
}
|
|
3390
|
+
const out = chunk6AE5FKKQ_cjs.zeros([sampleIndices.length], { dtype: y.dtype });
|
|
3391
|
+
for (let i = 0; i < sampleIndices.length; i++) {
|
|
3392
|
+
const sampleIndex = sampleIndices[i];
|
|
3393
|
+
if (sampleIndex === void 0) {
|
|
3394
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: sample index access failed");
|
|
3395
|
+
}
|
|
3396
|
+
const value = readTensorValue(y, [sampleIndex]);
|
|
3397
|
+
writeTensorValue(out, out.offset + i, value);
|
|
3398
|
+
}
|
|
3399
|
+
return out;
|
|
3400
|
+
}
|
|
3401
|
+
function trainTestSplit(X, y, options) {
|
|
3402
|
+
const opts = options ?? {};
|
|
3403
|
+
const shuffle = opts.shuffle ?? true;
|
|
3404
|
+
const randomState = opts.randomState;
|
|
3405
|
+
const [nSamples] = getShape2D(X);
|
|
3406
|
+
if (nSamples === 0) {
|
|
3407
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("Cannot split empty array", "X");
|
|
3408
|
+
}
|
|
3409
|
+
if (y) {
|
|
3410
|
+
const yShape0 = y.shape[0];
|
|
3411
|
+
if (yShape0 === void 0 || yShape0 !== nSamples) {
|
|
3412
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X and y must have same number of samples", "y", yShape0);
|
|
3413
|
+
}
|
|
3414
|
+
}
|
|
3415
|
+
if (opts.stratify) {
|
|
3416
|
+
if (opts.stratify.ndim !== 1) {
|
|
3417
|
+
throw new chunkJSCDE774_cjs.ShapeError(`stratify must be a 1D tensor, got ${opts.stratify.ndim}D`);
|
|
3418
|
+
}
|
|
3419
|
+
const stratifyShape0 = opts.stratify.shape[0];
|
|
3420
|
+
if (stratifyShape0 === void 0 || stratifyShape0 !== nSamples) {
|
|
3421
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3422
|
+
"stratify must have same number of samples as X",
|
|
3423
|
+
"stratify",
|
|
3424
|
+
stratifyShape0
|
|
3425
|
+
);
|
|
3426
|
+
}
|
|
3427
|
+
}
|
|
3428
|
+
const [nTrain, nTest] = resolveTrainTestCounts(nSamples, opts.trainSize, opts.testSize);
|
|
3429
|
+
const indices = Array.from({ length: nSamples }, (_, i) => i);
|
|
3430
|
+
const random = randomState !== void 0 ? createSeededRandom(randomState) : Math.random;
|
|
3431
|
+
const maybeShuffle = (arr) => {
|
|
3432
|
+
if (!shuffle) return;
|
|
3433
|
+
shuffleIndicesInPlace(arr, random);
|
|
3434
|
+
};
|
|
3435
|
+
let trainIndices = [];
|
|
3436
|
+
let testIndices = [];
|
|
3437
|
+
if (opts.stratify) {
|
|
3438
|
+
const stratify = opts.stratify;
|
|
3439
|
+
const labelMap = /* @__PURE__ */ new Map();
|
|
3440
|
+
for (let i = 0; i < nSamples; i++) {
|
|
3441
|
+
const label = readTensorValue(stratify, [i]);
|
|
3442
|
+
let bucket = labelMap.get(label);
|
|
3443
|
+
if (bucket === void 0) {
|
|
3444
|
+
bucket = [];
|
|
3445
|
+
labelMap.set(label, bucket);
|
|
3446
|
+
}
|
|
3447
|
+
bucket.push(i);
|
|
3448
|
+
}
|
|
3449
|
+
const labels = Array.from(labelMap.keys()).sort(compareLabels);
|
|
3450
|
+
const nClasses = labels.length;
|
|
3451
|
+
const classSizes = labels.map((label) => labelMap.get(label)?.length ?? 0);
|
|
3452
|
+
const hasSingleton = classSizes.some((size) => size < 2);
|
|
3453
|
+
if (hasSingleton && shuffle && randomState === void 0) {
|
|
3454
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3455
|
+
"stratify requires at least 2 samples per class",
|
|
3456
|
+
"stratify",
|
|
3457
|
+
classSizes
|
|
3458
|
+
);
|
|
3459
|
+
}
|
|
3460
|
+
if (opts.trainSize !== void 0 && nTrain < nClasses) {
|
|
3461
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3462
|
+
"trainSize must be at least the number of classes when stratifying",
|
|
3463
|
+
"trainSize",
|
|
3464
|
+
nTrain
|
|
3465
|
+
);
|
|
3466
|
+
}
|
|
3467
|
+
if (nTest < nClasses) {
|
|
3468
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3469
|
+
"testSize must be at least the number of classes when stratifying",
|
|
3470
|
+
"testSize",
|
|
3471
|
+
nTest
|
|
3472
|
+
);
|
|
3473
|
+
}
|
|
3474
|
+
const testFraction = nTest / nSamples;
|
|
3475
|
+
const allowEmptyClassSplits = nTrain < nClasses;
|
|
3476
|
+
const counts = labels.map((label) => {
|
|
3477
|
+
const size = labelMap.get(label)?.length ?? 0;
|
|
3478
|
+
const exact = size * testFraction;
|
|
3479
|
+
let testCount = Math.floor(exact);
|
|
3480
|
+
let remainder = exact - testCount;
|
|
3481
|
+
let min = allowEmptyClassSplits ? 0 : 1;
|
|
3482
|
+
let max = allowEmptyClassSplits ? size : size - 1;
|
|
3483
|
+
if (size < 2) {
|
|
3484
|
+
min = 0;
|
|
3485
|
+
max = allowEmptyClassSplits ? size : 0;
|
|
3486
|
+
testCount = 0;
|
|
3487
|
+
remainder = 0;
|
|
3488
|
+
} else {
|
|
3489
|
+
if (testCount < min) testCount = min;
|
|
3490
|
+
if (testCount > max) testCount = max;
|
|
3491
|
+
}
|
|
3492
|
+
return { label, size, testCount, remainder, min, max };
|
|
3493
|
+
});
|
|
3494
|
+
let remaining = nTest - counts.reduce((sum, c) => sum + c.testCount, 0);
|
|
3495
|
+
if (remaining !== 0) {
|
|
3496
|
+
const order = remaining > 0 ? [...counts].sort((a, b) => {
|
|
3497
|
+
if (b.remainder !== a.remainder) return b.remainder - a.remainder;
|
|
3498
|
+
return compareLabels(a.label, b.label);
|
|
3499
|
+
}) : [...counts].sort((a, b) => {
|
|
3500
|
+
if (a.remainder !== b.remainder) return a.remainder - b.remainder;
|
|
3501
|
+
return compareLabels(a.label, b.label);
|
|
3502
|
+
});
|
|
3503
|
+
let guard = 0;
|
|
3504
|
+
while (remaining !== 0 && guard < counts.length * 2) {
|
|
3505
|
+
for (const entry of order) {
|
|
3506
|
+
if (remaining === 0) break;
|
|
3507
|
+
if (remaining > 0 && entry.testCount < entry.max) {
|
|
3508
|
+
entry.testCount += 1;
|
|
3509
|
+
remaining -= 1;
|
|
3510
|
+
} else if (remaining < 0 && entry.testCount > entry.min) {
|
|
3511
|
+
entry.testCount -= 1;
|
|
3512
|
+
remaining += 1;
|
|
3513
|
+
}
|
|
3514
|
+
}
|
|
3515
|
+
guard += 1;
|
|
3516
|
+
}
|
|
3517
|
+
if (remaining !== 0) {
|
|
3518
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: unable to allocate stratified split sizes");
|
|
3519
|
+
}
|
|
3520
|
+
}
|
|
3521
|
+
const remainingTrainPool = [];
|
|
3522
|
+
for (const entry of counts) {
|
|
3523
|
+
const labelIndices = [...labelMap.get(entry.label) ?? []];
|
|
3524
|
+
maybeShuffle(labelIndices);
|
|
3525
|
+
testIndices.push(...labelIndices.slice(0, entry.testCount));
|
|
3526
|
+
remainingTrainPool.push(...labelIndices.slice(entry.testCount));
|
|
3527
|
+
}
|
|
3528
|
+
maybeShuffle(testIndices);
|
|
3529
|
+
maybeShuffle(remainingTrainPool);
|
|
3530
|
+
trainIndices = remainingTrainPool.slice(0, nTrain);
|
|
3531
|
+
} else {
|
|
3532
|
+
maybeShuffle(indices);
|
|
3533
|
+
trainIndices = indices.slice(0, nTrain);
|
|
3534
|
+
testIndices = indices.slice(nTrain, nTrain + nTest);
|
|
3535
|
+
}
|
|
3536
|
+
if (trainIndices.length !== nTrain || testIndices.length !== nTest) {
|
|
3537
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: resolved split indices do not match requested sizes");
|
|
3538
|
+
}
|
|
3539
|
+
const XTrain = takeRows2D(X, trainIndices);
|
|
3540
|
+
const XTest = takeRows2D(X, testIndices);
|
|
3541
|
+
if (y) {
|
|
3542
|
+
const yTrain = takeVector(y, trainIndices);
|
|
3543
|
+
const yTest = takeVector(y, testIndices);
|
|
3544
|
+
return [XTrain, XTest, yTrain, yTest];
|
|
3545
|
+
}
|
|
3546
|
+
return [XTrain, XTest];
|
|
3547
|
+
}
|
|
3548
|
+
var KFold = class {
|
|
3549
|
+
nSplits;
|
|
3550
|
+
shuffle;
|
|
3551
|
+
randomState;
|
|
3552
|
+
constructor(options = {}) {
|
|
3553
|
+
this.nSplits = options.nSplits ?? 5;
|
|
3554
|
+
this.shuffle = options.shuffle ?? false;
|
|
3555
|
+
this.randomState = options.randomState;
|
|
3556
|
+
}
|
|
3557
|
+
split(X) {
|
|
3558
|
+
const shape0 = X.shape[0];
|
|
3559
|
+
if (shape0 === void 0) {
|
|
3560
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3561
|
+
}
|
|
3562
|
+
const nSamples = shape0;
|
|
3563
|
+
validateNSplits(this.nSplits);
|
|
3564
|
+
if (this.nSplits > nSamples) {
|
|
3565
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3566
|
+
"nSplits must not be greater than number of samples",
|
|
3567
|
+
"nSplits",
|
|
3568
|
+
this.nSplits
|
|
3569
|
+
);
|
|
3570
|
+
}
|
|
3571
|
+
const indices = Array.from({ length: nSamples }, (_, i) => i);
|
|
3572
|
+
if (this.shuffle) {
|
|
3573
|
+
const random = this.randomState !== void 0 ? createSeededRandom(this.randomState) : Math.random;
|
|
3574
|
+
shuffleIndicesInPlace(indices, random);
|
|
3575
|
+
}
|
|
3576
|
+
const splits = [];
|
|
3577
|
+
const foldSizes = makeFoldSizes(nSamples, this.nSplits);
|
|
3578
|
+
let current = 0;
|
|
3579
|
+
for (let i = 0; i < this.nSplits; i++) {
|
|
3580
|
+
const foldSize = foldSizes[i] ?? 0;
|
|
3581
|
+
const testStart = current;
|
|
3582
|
+
const testEnd = current + foldSize;
|
|
3583
|
+
const testIndices = indices.slice(testStart, testEnd);
|
|
3584
|
+
const trainIndices = [...indices.slice(0, testStart), ...indices.slice(testEnd)];
|
|
3585
|
+
splits.push([trainIndices, testIndices]);
|
|
3586
|
+
current = testEnd;
|
|
3587
|
+
}
|
|
3588
|
+
return splits;
|
|
3589
|
+
}
|
|
3590
|
+
getNSplits() {
|
|
3591
|
+
return this.nSplits;
|
|
3592
|
+
}
|
|
3593
|
+
};
|
|
3594
|
+
var StratifiedKFold = class {
|
|
3595
|
+
nSplits;
|
|
3596
|
+
shuffle;
|
|
3597
|
+
randomState;
|
|
3598
|
+
constructor(options = {}) {
|
|
3599
|
+
this.nSplits = options.nSplits ?? 5;
|
|
3600
|
+
this.shuffle = options.shuffle ?? false;
|
|
3601
|
+
this.randomState = options.randomState;
|
|
3602
|
+
}
|
|
3603
|
+
split(X, y) {
|
|
3604
|
+
const shape0 = X.shape[0];
|
|
3605
|
+
if (shape0 === void 0) {
|
|
3606
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3607
|
+
}
|
|
3608
|
+
const nSamples = shape0;
|
|
3609
|
+
validateNSplits(this.nSplits);
|
|
3610
|
+
if (this.nSplits > nSamples) {
|
|
3611
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3612
|
+
"nSplits must not be greater than number of samples",
|
|
3613
|
+
"nSplits",
|
|
3614
|
+
this.nSplits
|
|
3615
|
+
);
|
|
3616
|
+
}
|
|
3617
|
+
const yShape0 = y.shape[0];
|
|
3618
|
+
if (yShape0 === void 0 || yShape0 !== nSamples) {
|
|
3619
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("X and y must have same number of samples", "y", yShape0);
|
|
3620
|
+
}
|
|
3621
|
+
if (y.ndim !== 1) {
|
|
3622
|
+
throw new chunkJSCDE774_cjs.ShapeError(`y must be a 1D tensor, got ${y.ndim}D`);
|
|
3623
|
+
}
|
|
3624
|
+
const labelMap = /* @__PURE__ */ new Map();
|
|
3625
|
+
const random = this.randomState !== void 0 ? createSeededRandom(this.randomState) : Math.random;
|
|
3626
|
+
for (let i = 0; i < nSamples; i++) {
|
|
3627
|
+
const label = readTensorValue(y, [i]);
|
|
3628
|
+
let bucket = labelMap.get(label);
|
|
3629
|
+
if (bucket === void 0) {
|
|
3630
|
+
bucket = [];
|
|
3631
|
+
labelMap.set(label, bucket);
|
|
3632
|
+
}
|
|
3633
|
+
bucket.push(i);
|
|
3634
|
+
}
|
|
3635
|
+
for (const [label, indices] of labelMap.entries()) {
|
|
3636
|
+
if (this.shuffle) {
|
|
3637
|
+
shuffleIndicesInPlace(indices, random);
|
|
3638
|
+
}
|
|
3639
|
+
if (indices.length < this.nSplits) {
|
|
3640
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3641
|
+
`Each class must have at least nSplits samples; class ${label} has ${indices.length}`,
|
|
3642
|
+
"nSplits",
|
|
3643
|
+
this.nSplits
|
|
3644
|
+
);
|
|
3645
|
+
}
|
|
3646
|
+
}
|
|
3647
|
+
const foldIndices = Array.from({ length: this.nSplits }, () => []);
|
|
3648
|
+
for (const indices of labelMap.values()) {
|
|
3649
|
+
const foldSizes = makeFoldSizes(indices.length, this.nSplits);
|
|
3650
|
+
let start = 0;
|
|
3651
|
+
for (let fold = 0; fold < this.nSplits; fold++) {
|
|
3652
|
+
const size = foldSizes[fold] ?? 0;
|
|
3653
|
+
const end = start + size;
|
|
3654
|
+
const target = foldIndices[fold];
|
|
3655
|
+
if (!target) {
|
|
3656
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: stratified fold storage missing");
|
|
3657
|
+
}
|
|
3658
|
+
target.push(...indices.slice(start, end));
|
|
3659
|
+
start = end;
|
|
3660
|
+
}
|
|
3661
|
+
}
|
|
3662
|
+
const splits = [];
|
|
3663
|
+
for (let fold = 0; fold < this.nSplits; fold++) {
|
|
3664
|
+
const testIndices = foldIndices[fold] ?? [];
|
|
3665
|
+
const trainIndices = [];
|
|
3666
|
+
for (let other = 0; other < this.nSplits; other++) {
|
|
3667
|
+
if (other === fold) continue;
|
|
3668
|
+
trainIndices.push(...foldIndices[other] ?? []);
|
|
3669
|
+
}
|
|
3670
|
+
splits.push([trainIndices, testIndices]);
|
|
3671
|
+
}
|
|
3672
|
+
return splits;
|
|
3673
|
+
}
|
|
3674
|
+
getNSplits() {
|
|
3675
|
+
return this.nSplits;
|
|
3676
|
+
}
|
|
3677
|
+
};
|
|
3678
|
+
var GroupKFold = class {
|
|
3679
|
+
nSplits;
|
|
3680
|
+
constructor(options = {}) {
|
|
3681
|
+
this.nSplits = options.nSplits ?? 5;
|
|
3682
|
+
}
|
|
3683
|
+
split(X, _y, groups) {
|
|
3684
|
+
const shape0 = X.shape[0];
|
|
3685
|
+
if (shape0 === void 0) {
|
|
3686
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3687
|
+
}
|
|
3688
|
+
const nSamples = shape0;
|
|
3689
|
+
validateNSplits(this.nSplits);
|
|
3690
|
+
if (groups.ndim !== 1) {
|
|
3691
|
+
throw new chunkJSCDE774_cjs.ShapeError(`groups must be a 1D tensor, got ${groups.ndim}D`);
|
|
3692
|
+
}
|
|
3693
|
+
const groupsShape0 = groups.shape[0];
|
|
3694
|
+
if (groupsShape0 === void 0 || groupsShape0 !== nSamples) {
|
|
3695
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3696
|
+
"X and groups must have same number of samples",
|
|
3697
|
+
"groups",
|
|
3698
|
+
groupsShape0
|
|
3699
|
+
);
|
|
3700
|
+
}
|
|
3701
|
+
const groupMap = /* @__PURE__ */ new Map();
|
|
3702
|
+
for (let i = 0; i < nSamples; i++) {
|
|
3703
|
+
const group = readTensorValue(groups, [i]);
|
|
3704
|
+
let bucket = groupMap.get(group);
|
|
3705
|
+
if (bucket === void 0) {
|
|
3706
|
+
bucket = [];
|
|
3707
|
+
groupMap.set(group, bucket);
|
|
3708
|
+
}
|
|
3709
|
+
bucket.push(i);
|
|
3710
|
+
}
|
|
3711
|
+
const groupEntries = Array.from(groupMap.entries()).map(([group, indices]) => ({
|
|
3712
|
+
group,
|
|
3713
|
+
indices,
|
|
3714
|
+
size: indices.length
|
|
3715
|
+
}));
|
|
3716
|
+
if (this.nSplits > groupEntries.length) {
|
|
3717
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3718
|
+
"Number of groups must be at least nSplits",
|
|
3719
|
+
"nSplits",
|
|
3720
|
+
this.nSplits
|
|
3721
|
+
);
|
|
3722
|
+
}
|
|
3723
|
+
groupEntries.sort((a, b) => {
|
|
3724
|
+
if (b.size !== a.size) return b.size - a.size;
|
|
3725
|
+
return compareLabels(a.group, b.group);
|
|
3726
|
+
});
|
|
3727
|
+
const foldIndices = Array.from({ length: this.nSplits }, () => []);
|
|
3728
|
+
const foldSizes = new Array(this.nSplits).fill(0);
|
|
3729
|
+
for (const entry of groupEntries) {
|
|
3730
|
+
let bestFold = 0;
|
|
3731
|
+
let bestSize = foldSizes[0] ?? 0;
|
|
3732
|
+
for (let fold = 1; fold < this.nSplits; fold++) {
|
|
3733
|
+
const size = foldSizes[fold] ?? 0;
|
|
3734
|
+
if (size < bestSize) {
|
|
3735
|
+
bestSize = size;
|
|
3736
|
+
bestFold = fold;
|
|
3737
|
+
}
|
|
3738
|
+
}
|
|
3739
|
+
const target = foldIndices[bestFold];
|
|
3740
|
+
if (!target) {
|
|
3741
|
+
throw new chunkJSCDE774_cjs.DeepboxError("Internal error: group fold storage missing");
|
|
3742
|
+
}
|
|
3743
|
+
target.push(...entry.indices);
|
|
3744
|
+
foldSizes[bestFold] = bestSize + entry.size;
|
|
3745
|
+
}
|
|
3746
|
+
const splits = [];
|
|
3747
|
+
for (let fold = 0; fold < this.nSplits; fold++) {
|
|
3748
|
+
const testIndices = foldIndices[fold] ?? [];
|
|
3749
|
+
const trainIndices = [];
|
|
3750
|
+
for (let other = 0; other < this.nSplits; other++) {
|
|
3751
|
+
if (other === fold) continue;
|
|
3752
|
+
trainIndices.push(...foldIndices[other] ?? []);
|
|
3753
|
+
}
|
|
3754
|
+
splits.push([trainIndices, testIndices]);
|
|
3755
|
+
}
|
|
3756
|
+
return splits;
|
|
3757
|
+
}
|
|
3758
|
+
getNSplits() {
|
|
3759
|
+
return this.nSplits;
|
|
3760
|
+
}
|
|
3761
|
+
};
|
|
3762
|
+
var LeaveOneOut = class {
|
|
3763
|
+
split(X) {
|
|
3764
|
+
const shape0 = X.shape[0];
|
|
3765
|
+
if (shape0 === void 0) {
|
|
3766
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3767
|
+
}
|
|
3768
|
+
const nSamples = shape0;
|
|
3769
|
+
const splits = [];
|
|
3770
|
+
for (let i = 0; i < nSamples; i++) {
|
|
3771
|
+
const trainIndices = [
|
|
3772
|
+
...Array.from({ length: i }, (_, j) => j),
|
|
3773
|
+
...Array.from({ length: nSamples - i - 1 }, (_, j) => i + 1 + j)
|
|
3774
|
+
];
|
|
3775
|
+
const testIndices = [i];
|
|
3776
|
+
splits.push([trainIndices, testIndices]);
|
|
3777
|
+
}
|
|
3778
|
+
return splits;
|
|
3779
|
+
}
|
|
3780
|
+
getNSplits(X) {
|
|
3781
|
+
const shape0 = X.shape[0];
|
|
3782
|
+
if (shape0 === void 0) {
|
|
3783
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3784
|
+
}
|
|
3785
|
+
return shape0;
|
|
3786
|
+
}
|
|
3787
|
+
};
|
|
3788
|
+
var LeavePOut = class {
|
|
3789
|
+
p;
|
|
3790
|
+
constructor(p) {
|
|
3791
|
+
if (!Number.isFinite(p) || !Number.isInteger(p) || p <= 0) {
|
|
3792
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("p must be a positive integer", "p", p);
|
|
3793
|
+
}
|
|
3794
|
+
this.p = p;
|
|
3795
|
+
}
|
|
3796
|
+
split(X) {
|
|
3797
|
+
const shape0 = X.shape[0];
|
|
3798
|
+
if (shape0 === void 0) {
|
|
3799
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3800
|
+
}
|
|
3801
|
+
const nSamples = shape0;
|
|
3802
|
+
if (this.p > nSamples) {
|
|
3803
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("p must not be greater than number of samples", "p", this.p);
|
|
3804
|
+
}
|
|
3805
|
+
let nCombos = 1;
|
|
3806
|
+
const k = this.p > nSamples / 2 ? nSamples - this.p : this.p;
|
|
3807
|
+
for (let i = 0; i < k; i++) {
|
|
3808
|
+
nCombos = nCombos * (nSamples - i) / (i + 1);
|
|
3809
|
+
}
|
|
3810
|
+
if (nCombos > 1e5) {
|
|
3811
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError(
|
|
3812
|
+
`LeavePOut produces ${Math.floor(nCombos)} splits, which exceeds memory safety limit of 100,000`,
|
|
3813
|
+
"p",
|
|
3814
|
+
this.p
|
|
3815
|
+
);
|
|
3816
|
+
}
|
|
3817
|
+
const splits = [];
|
|
3818
|
+
const allIndices = Array.from({ length: nSamples }, (_, i) => i);
|
|
3819
|
+
const combine = (start, currentCombo) => {
|
|
3820
|
+
if (currentCombo.length === this.p) {
|
|
3821
|
+
const testSet = new Set(currentCombo);
|
|
3822
|
+
const testIndices = [...currentCombo];
|
|
3823
|
+
const trainIndices = allIndices.filter((i) => !testSet.has(i));
|
|
3824
|
+
splits.push([trainIndices, testIndices]);
|
|
3825
|
+
return;
|
|
3826
|
+
}
|
|
3827
|
+
for (let i = start; i < nSamples; i++) {
|
|
3828
|
+
currentCombo.push(i);
|
|
3829
|
+
combine(i + 1, currentCombo);
|
|
3830
|
+
currentCombo.pop();
|
|
3831
|
+
}
|
|
3832
|
+
};
|
|
3833
|
+
combine(0, []);
|
|
3834
|
+
return splits;
|
|
3835
|
+
}
|
|
3836
|
+
getNSplits(X) {
|
|
3837
|
+
const shape0 = X.shape[0];
|
|
3838
|
+
if (shape0 === void 0) {
|
|
3839
|
+
throw new chunkJSCDE774_cjs.ShapeError("X must have valid shape[0]");
|
|
3840
|
+
}
|
|
3841
|
+
const n = shape0;
|
|
3842
|
+
if (this.p > n) {
|
|
3843
|
+
throw new chunkJSCDE774_cjs.InvalidParameterError("p must not be greater than number of samples", "p", this.p);
|
|
3844
|
+
}
|
|
3845
|
+
let result = 1;
|
|
3846
|
+
const k = this.p > n / 2 ? n - this.p : this.p;
|
|
3847
|
+
for (let i = 0; i < k; i++) {
|
|
3848
|
+
result = result * (n - i) / (i + 1);
|
|
3849
|
+
}
|
|
3850
|
+
return Math.round(result);
|
|
3851
|
+
}
|
|
3852
|
+
};
|
|
3853
|
+
|
|
3854
|
+
exports.GroupKFold = GroupKFold;
|
|
3855
|
+
exports.KFold = KFold;
|
|
3856
|
+
exports.LabelBinarizer = LabelBinarizer;
|
|
3857
|
+
exports.LabelEncoder = LabelEncoder;
|
|
3858
|
+
exports.LeaveOneOut = LeaveOneOut;
|
|
3859
|
+
exports.LeavePOut = LeavePOut;
|
|
3860
|
+
exports.MaxAbsScaler = MaxAbsScaler;
|
|
3861
|
+
exports.MinMaxScaler = MinMaxScaler;
|
|
3862
|
+
exports.MultiLabelBinarizer = MultiLabelBinarizer;
|
|
3863
|
+
exports.Normalizer = Normalizer;
|
|
3864
|
+
exports.OneHotEncoder = OneHotEncoder;
|
|
3865
|
+
exports.OrdinalEncoder = OrdinalEncoder;
|
|
3866
|
+
exports.PowerTransformer = PowerTransformer;
|
|
3867
|
+
exports.QuantileTransformer = QuantileTransformer;
|
|
3868
|
+
exports.RobustScaler = RobustScaler;
|
|
3869
|
+
exports.StandardScaler = StandardScaler;
|
|
3870
|
+
exports.StratifiedKFold = StratifiedKFold;
|
|
3871
|
+
exports.preprocess_exports = preprocess_exports;
|
|
3872
|
+
exports.trainTestSplit = trainTestSplit;
|
|
3873
|
+
//# sourceMappingURL=chunk-OX6QXFMV.cjs.map
|
|
3874
|
+
//# sourceMappingURL=chunk-OX6QXFMV.cjs.map
|