deepbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +344 -0
  3. package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
  4. package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
  5. package/dist/Tensor-BQLk1ltW.d.cts +147 -0
  6. package/dist/Tensor-g8mUClel.d.ts +147 -0
  7. package/dist/chunk-4S73VUBD.js +677 -0
  8. package/dist/chunk-4S73VUBD.js.map +1 -0
  9. package/dist/chunk-5R4S63PF.js +2925 -0
  10. package/dist/chunk-5R4S63PF.js.map +1 -0
  11. package/dist/chunk-6AE5FKKQ.cjs +9264 -0
  12. package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
  13. package/dist/chunk-AD436M45.js +3854 -0
  14. package/dist/chunk-AD436M45.js.map +1 -0
  15. package/dist/chunk-ALS7ETWZ.cjs +4263 -0
  16. package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
  17. package/dist/chunk-AU7XHGKJ.js +2092 -0
  18. package/dist/chunk-AU7XHGKJ.js.map +1 -0
  19. package/dist/chunk-B5TNKUEY.js +1481 -0
  20. package/dist/chunk-B5TNKUEY.js.map +1 -0
  21. package/dist/chunk-BCR7G3A6.js +9136 -0
  22. package/dist/chunk-BCR7G3A6.js.map +1 -0
  23. package/dist/chunk-C4PKXY74.cjs +1917 -0
  24. package/dist/chunk-C4PKXY74.cjs.map +1 -0
  25. package/dist/chunk-DWZY6PIP.cjs +6400 -0
  26. package/dist/chunk-DWZY6PIP.cjs.map +1 -0
  27. package/dist/chunk-E3EU5FZO.cjs +2113 -0
  28. package/dist/chunk-E3EU5FZO.cjs.map +1 -0
  29. package/dist/chunk-F3JWBINJ.js +1054 -0
  30. package/dist/chunk-F3JWBINJ.js.map +1 -0
  31. package/dist/chunk-FJYLIGJX.js +1940 -0
  32. package/dist/chunk-FJYLIGJX.js.map +1 -0
  33. package/dist/chunk-JSCDE774.cjs +729 -0
  34. package/dist/chunk-JSCDE774.cjs.map +1 -0
  35. package/dist/chunk-LWECRCW2.cjs +2412 -0
  36. package/dist/chunk-LWECRCW2.cjs.map +1 -0
  37. package/dist/chunk-MLBMYKCG.js +6379 -0
  38. package/dist/chunk-MLBMYKCG.js.map +1 -0
  39. package/dist/chunk-OX6QXFMV.cjs +3874 -0
  40. package/dist/chunk-OX6QXFMV.cjs.map +1 -0
  41. package/dist/chunk-PHV2DKRS.cjs +1072 -0
  42. package/dist/chunk-PHV2DKRS.cjs.map +1 -0
  43. package/dist/chunk-PL7TAYKI.js +4056 -0
  44. package/dist/chunk-PL7TAYKI.js.map +1 -0
  45. package/dist/chunk-PR647I7R.js +1898 -0
  46. package/dist/chunk-PR647I7R.js.map +1 -0
  47. package/dist/chunk-QERHVCHC.cjs +2960 -0
  48. package/dist/chunk-QERHVCHC.cjs.map +1 -0
  49. package/dist/chunk-XEG44RF6.cjs +1514 -0
  50. package/dist/chunk-XEG44RF6.cjs.map +1 -0
  51. package/dist/chunk-XMWVME2W.js +2377 -0
  52. package/dist/chunk-XMWVME2W.js.map +1 -0
  53. package/dist/chunk-ZB75FESB.cjs +1979 -0
  54. package/dist/chunk-ZB75FESB.cjs.map +1 -0
  55. package/dist/chunk-ZLW62TJG.cjs +4061 -0
  56. package/dist/chunk-ZLW62TJG.cjs.map +1 -0
  57. package/dist/chunk-ZXKBDFP3.js +4235 -0
  58. package/dist/chunk-ZXKBDFP3.js.map +1 -0
  59. package/dist/core/index.cjs +204 -0
  60. package/dist/core/index.cjs.map +1 -0
  61. package/dist/core/index.d.cts +2 -0
  62. package/dist/core/index.d.ts +2 -0
  63. package/dist/core/index.js +3 -0
  64. package/dist/core/index.js.map +1 -0
  65. package/dist/dataframe/index.cjs +22 -0
  66. package/dist/dataframe/index.cjs.map +1 -0
  67. package/dist/dataframe/index.d.cts +3 -0
  68. package/dist/dataframe/index.d.ts +3 -0
  69. package/dist/dataframe/index.js +5 -0
  70. package/dist/dataframe/index.js.map +1 -0
  71. package/dist/datasets/index.cjs +134 -0
  72. package/dist/datasets/index.cjs.map +1 -0
  73. package/dist/datasets/index.d.cts +3 -0
  74. package/dist/datasets/index.d.ts +3 -0
  75. package/dist/datasets/index.js +5 -0
  76. package/dist/datasets/index.js.map +1 -0
  77. package/dist/index-74AB8Cyh.d.cts +1126 -0
  78. package/dist/index-9oQx1HgV.d.cts +1180 -0
  79. package/dist/index-BJY2SI4i.d.ts +483 -0
  80. package/dist/index-BWGhrDlr.d.ts +733 -0
  81. package/dist/index-B_DK4FKY.d.cts +242 -0
  82. package/dist/index-BbA2Gxfl.d.ts +456 -0
  83. package/dist/index-BgHYAoSS.d.cts +837 -0
  84. package/dist/index-BndMbqsM.d.ts +1439 -0
  85. package/dist/index-C1mfVYoo.d.ts +2517 -0
  86. package/dist/index-CCvlwAmL.d.cts +809 -0
  87. package/dist/index-CDw5CnOU.d.ts +785 -0
  88. package/dist/index-Cn3SdB0O.d.ts +1126 -0
  89. package/dist/index-CrqLlS-a.d.ts +776 -0
  90. package/dist/index-D61yaSMY.d.cts +483 -0
  91. package/dist/index-D9Loo1_A.d.cts +2517 -0
  92. package/dist/index-DIT_OO9C.d.cts +785 -0
  93. package/dist/index-DIp_RrRt.d.ts +242 -0
  94. package/dist/index-DbultU6X.d.cts +1427 -0
  95. package/dist/index-DmEg_LCm.d.cts +776 -0
  96. package/dist/index-DoPWVxPo.d.cts +1439 -0
  97. package/dist/index-DuCxd-8d.d.ts +837 -0
  98. package/dist/index-Dx42TZaY.d.ts +809 -0
  99. package/dist/index-DyZ4QQf5.d.cts +456 -0
  100. package/dist/index-GFAVyOWO.d.ts +1427 -0
  101. package/dist/index-WHQLn0e8.d.cts +733 -0
  102. package/dist/index-ZtI1Iy4L.d.ts +1180 -0
  103. package/dist/index-eJgeni9c.d.cts +1911 -0
  104. package/dist/index-tk4lSYod.d.ts +1911 -0
  105. package/dist/index.cjs +72 -0
  106. package/dist/index.cjs.map +1 -0
  107. package/dist/index.d.cts +17 -0
  108. package/dist/index.d.ts +17 -0
  109. package/dist/index.js +15 -0
  110. package/dist/index.js.map +1 -0
  111. package/dist/linalg/index.cjs +86 -0
  112. package/dist/linalg/index.cjs.map +1 -0
  113. package/dist/linalg/index.d.cts +3 -0
  114. package/dist/linalg/index.d.ts +3 -0
  115. package/dist/linalg/index.js +5 -0
  116. package/dist/linalg/index.js.map +1 -0
  117. package/dist/metrics/index.cjs +158 -0
  118. package/dist/metrics/index.cjs.map +1 -0
  119. package/dist/metrics/index.d.cts +3 -0
  120. package/dist/metrics/index.d.ts +3 -0
  121. package/dist/metrics/index.js +5 -0
  122. package/dist/metrics/index.js.map +1 -0
  123. package/dist/ml/index.cjs +87 -0
  124. package/dist/ml/index.cjs.map +1 -0
  125. package/dist/ml/index.d.cts +3 -0
  126. package/dist/ml/index.d.ts +3 -0
  127. package/dist/ml/index.js +6 -0
  128. package/dist/ml/index.js.map +1 -0
  129. package/dist/ndarray/index.cjs +501 -0
  130. package/dist/ndarray/index.cjs.map +1 -0
  131. package/dist/ndarray/index.d.cts +5 -0
  132. package/dist/ndarray/index.d.ts +5 -0
  133. package/dist/ndarray/index.js +4 -0
  134. package/dist/ndarray/index.js.map +1 -0
  135. package/dist/nn/index.cjs +142 -0
  136. package/dist/nn/index.cjs.map +1 -0
  137. package/dist/nn/index.d.cts +6 -0
  138. package/dist/nn/index.d.ts +6 -0
  139. package/dist/nn/index.js +5 -0
  140. package/dist/nn/index.js.map +1 -0
  141. package/dist/optim/index.cjs +77 -0
  142. package/dist/optim/index.cjs.map +1 -0
  143. package/dist/optim/index.d.cts +4 -0
  144. package/dist/optim/index.d.ts +4 -0
  145. package/dist/optim/index.js +4 -0
  146. package/dist/optim/index.js.map +1 -0
  147. package/dist/plot/index.cjs +114 -0
  148. package/dist/plot/index.cjs.map +1 -0
  149. package/dist/plot/index.d.cts +6 -0
  150. package/dist/plot/index.d.ts +6 -0
  151. package/dist/plot/index.js +5 -0
  152. package/dist/plot/index.js.map +1 -0
  153. package/dist/preprocess/index.cjs +82 -0
  154. package/dist/preprocess/index.cjs.map +1 -0
  155. package/dist/preprocess/index.d.cts +4 -0
  156. package/dist/preprocess/index.d.ts +4 -0
  157. package/dist/preprocess/index.js +5 -0
  158. package/dist/preprocess/index.js.map +1 -0
  159. package/dist/random/index.cjs +74 -0
  160. package/dist/random/index.cjs.map +1 -0
  161. package/dist/random/index.d.cts +3 -0
  162. package/dist/random/index.d.ts +3 -0
  163. package/dist/random/index.js +5 -0
  164. package/dist/random/index.js.map +1 -0
  165. package/dist/stats/index.cjs +142 -0
  166. package/dist/stats/index.cjs.map +1 -0
  167. package/dist/stats/index.d.cts +3 -0
  168. package/dist/stats/index.d.ts +3 -0
  169. package/dist/stats/index.js +5 -0
  170. package/dist/stats/index.js.map +1 -0
  171. package/dist/tensor-B96jjJLQ.d.cts +205 -0
  172. package/dist/tensor-B96jjJLQ.d.ts +205 -0
  173. package/package.json +226 -0
@@ -0,0 +1,3854 @@
1
+ import { tensor, CSRMatrix, zeros, empty, Tensor } from './chunk-BCR7G3A6.js';
2
+ import { __export, InvalidParameterError, NotFittedError, DeepboxError, ShapeError, DTypeError, getConfig, DataValidationError } from './chunk-4S73VUBD.js';
3
+
4
+ // src/preprocess/index.ts
5
+ var preprocess_exports = {};
6
+ __export(preprocess_exports, {
7
+ GroupKFold: () => GroupKFold,
8
+ KFold: () => KFold,
9
+ LabelBinarizer: () => LabelBinarizer,
10
+ LabelEncoder: () => LabelEncoder,
11
+ LeaveOneOut: () => LeaveOneOut,
12
+ LeavePOut: () => LeavePOut,
13
+ MaxAbsScaler: () => MaxAbsScaler,
14
+ MinMaxScaler: () => MinMaxScaler,
15
+ MultiLabelBinarizer: () => MultiLabelBinarizer,
16
+ Normalizer: () => Normalizer,
17
+ OneHotEncoder: () => OneHotEncoder,
18
+ OrdinalEncoder: () => OrdinalEncoder,
19
+ PowerTransformer: () => PowerTransformer,
20
+ QuantileTransformer: () => QuantileTransformer,
21
+ RobustScaler: () => RobustScaler,
22
+ StandardScaler: () => StandardScaler,
23
+ StratifiedKFold: () => StratifiedKFold,
24
+ trainTestSplit: () => trainTestSplit
25
+ });
26
+
27
+ // src/preprocess/_internal.ts
28
+ function assertNumericTensor(X, name) {
29
+ if (X.dtype === "string") {
30
+ throw new DTypeError(`${name} must be numeric`);
31
+ }
32
+ }
33
+ function assert2D(X, name) {
34
+ if (X.ndim !== 2) {
35
+ throw new ShapeError(`${name} must be a 2D tensor, got ${X.ndim}D`);
36
+ }
37
+ }
38
+ function getShape2D(X) {
39
+ if (X.ndim !== 2 || X.shape[0] === void 0 || X.shape[1] === void 0) {
40
+ throw new ShapeError(`Expected 2D tensor with valid shape, got shape [${X.shape.join(", ")}]`);
41
+ }
42
+ return [X.shape[0], X.shape[1]];
43
+ }
44
+ function getStride1D(X) {
45
+ const stride = X.strides[0];
46
+ if (stride === void 0) {
47
+ throw new DeepboxError("Internal error: missing stride for 1D tensor");
48
+ }
49
+ return stride;
50
+ }
51
+ function getStrides2D(X) {
52
+ const stride0 = X.strides[0];
53
+ const stride1 = X.strides[1];
54
+ if (stride0 === void 0 || stride1 === void 0) {
55
+ throw new DeepboxError("Internal error: missing strides for 2D tensor");
56
+ }
57
+ return [stride0, stride1];
58
+ }
59
+ function createSeededRandom(seed) {
60
+ const a = 1103515245;
61
+ const c = 12345;
62
+ const m = 2 ** 31;
63
+ if (!Number.isFinite(seed) || !Number.isInteger(seed) || !Number.isSafeInteger(seed) || seed < 0) {
64
+ throw new InvalidParameterError(
65
+ "randomState must be a non-negative safe integer",
66
+ "randomState",
67
+ seed
68
+ );
69
+ }
70
+ let state = seed % m;
71
+ return () => {
72
+ state = (a * state + c) % m;
73
+ return state / m;
74
+ };
75
+ }
76
+ function shuffleIndicesInPlace(indices, random) {
77
+ for (let i = indices.length - 1; i > 0; i--) {
78
+ const j = Math.floor(random() * (i + 1));
79
+ const temp = indices[i];
80
+ if (temp === void 0) {
81
+ throw new DeepboxError("Internal error: shuffle source index missing");
82
+ }
83
+ const swap = indices[j];
84
+ if (swap === void 0) {
85
+ throw new DeepboxError("Internal error: shuffle target index missing");
86
+ }
87
+ indices[i] = swap;
88
+ indices[j] = temp;
89
+ }
90
+ }
91
+
92
+ // src/preprocess/encoders.ts
93
+ function getStringData(t) {
94
+ if (t.dtype !== "string") {
95
+ throw new DTypeError("Expected string tensor");
96
+ }
97
+ if (!Array.isArray(t.data)) {
98
+ throw new DeepboxError("Internal error: invalid string tensor storage");
99
+ }
100
+ return t.data;
101
+ }
102
+ function getNumericData(t) {
103
+ if (t.dtype === "string") {
104
+ throw new DTypeError("Expected numeric tensor");
105
+ }
106
+ if (Array.isArray(t.data)) {
107
+ throw new DeepboxError("Internal error: invalid numeric tensor storage");
108
+ }
109
+ return t.data;
110
+ }
111
+ function inferCategoryType(values, paramName) {
112
+ let hasString = false;
113
+ let hasNumber = false;
114
+ let hasBigInt = false;
115
+ for (const value of values) {
116
+ if (typeof value === "string") {
117
+ hasString = true;
118
+ } else if (typeof value === "number") {
119
+ if (!Number.isFinite(value)) {
120
+ throw new InvalidParameterError("Category values must be finite numbers", paramName, value);
121
+ }
122
+ hasNumber = true;
123
+ } else if (typeof value === "bigint") {
124
+ hasBigInt = true;
125
+ }
126
+ }
127
+ const typeCount = (hasString ? 1 : 0) + (hasNumber ? 1 : 0) + (hasBigInt ? 1 : 0);
128
+ if (typeCount === 0) {
129
+ return "number";
130
+ }
131
+ if (typeCount > 1) {
132
+ throw new InvalidParameterError("Mixed category types are not supported", paramName);
133
+ }
134
+ if (hasString) return "string";
135
+ if (hasBigInt) return "bigint";
136
+ return "number";
137
+ }
138
+ function sortCategories(values, paramName) {
139
+ const arr = Array.from(values);
140
+ if (arr.length === 0) return arr;
141
+ const categoryType = inferCategoryType(arr, paramName);
142
+ if (categoryType === "string") {
143
+ arr.sort((a, b) => {
144
+ if (typeof a !== "string" || typeof b !== "string") {
145
+ throw new DeepboxError("Internal error: inconsistent category types");
146
+ }
147
+ return a.localeCompare(b);
148
+ });
149
+ return arr;
150
+ }
151
+ if (categoryType === "bigint") {
152
+ arr.sort((a, b) => {
153
+ if (typeof a !== "bigint" || typeof b !== "bigint") {
154
+ throw new DeepboxError("Internal error: inconsistent category types");
155
+ }
156
+ if (a < b) return -1;
157
+ if (a > b) return 1;
158
+ return 0;
159
+ });
160
+ return arr;
161
+ }
162
+ arr.sort((a, b) => {
163
+ if (typeof a !== "number" || typeof b !== "number") {
164
+ throw new DeepboxError("Internal error: inconsistent category types");
165
+ }
166
+ return a - b;
167
+ });
168
+ return arr;
169
+ }
170
+ function validateCategoryValues(values, paramName) {
171
+ if (values.length === 0) {
172
+ throw new InvalidParameterError("categories must contain at least one value", paramName);
173
+ }
174
+ const arr = Array.from(values);
175
+ inferCategoryType(arr, paramName);
176
+ const seen = /* @__PURE__ */ new Set();
177
+ for (const value of arr) {
178
+ if (seen.has(value)) {
179
+ throw new InvalidParameterError(
180
+ `categories must be unique; duplicate value ${String(value)}`,
181
+ paramName,
182
+ value
183
+ );
184
+ }
185
+ seen.add(value);
186
+ }
187
+ return arr;
188
+ }
189
+ function resolveCategoriesOption(categoriesOption, nFeatures, paramName) {
190
+ if (categoriesOption === "auto") {
191
+ return null;
192
+ }
193
+ if (!Array.isArray(categoriesOption)) {
194
+ throw new InvalidParameterError(
195
+ "categories must be 'auto' or an array of category arrays",
196
+ paramName,
197
+ categoriesOption
198
+ );
199
+ }
200
+ if (categoriesOption.length !== nFeatures) {
201
+ throw new InvalidParameterError(
202
+ "categories length must match number of features",
203
+ paramName,
204
+ categoriesOption.length
205
+ );
206
+ }
207
+ return categoriesOption;
208
+ }
209
+ function read1DValue(t, i) {
210
+ const stride = getStride1D(t);
211
+ const idx = t.offset + i * stride;
212
+ if (t.dtype === "string") {
213
+ const value2 = getStringData(t)[idx];
214
+ if (value2 === void 0) {
215
+ throw new DeepboxError("Internal error: string tensor access out of bounds");
216
+ }
217
+ return value2;
218
+ }
219
+ const value = getNumericData(t)[idx];
220
+ if (value === void 0) {
221
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
222
+ }
223
+ return typeof value === "bigint" ? value : Number(value);
224
+ }
225
+ function read2DValue(t, row, col) {
226
+ const [stride0, stride1] = getStrides2D(t);
227
+ const idx = t.offset + row * stride0 + col * stride1;
228
+ if (t.dtype === "string") {
229
+ const value2 = getStringData(t)[idx];
230
+ if (value2 === void 0) {
231
+ throw new DeepboxError("Internal error: string tensor access out of bounds");
232
+ }
233
+ return value2;
234
+ }
235
+ const value = getNumericData(t)[idx];
236
+ if (value === void 0) {
237
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
238
+ }
239
+ return typeof value === "bigint" ? value : Number(value);
240
+ }
241
+ function assert1D(t, name) {
242
+ if (t.ndim !== 1) {
243
+ throw new ShapeError(`${name} must be a 1D tensor`);
244
+ }
245
+ }
246
+ function categoryValueAt(values, index, context) {
247
+ const value = values[index];
248
+ if (value === void 0) {
249
+ throw new DeepboxError(`Internal error: missing category at index ${index} (${context})`);
250
+ }
251
+ return value;
252
+ }
253
+ function inferCategoryTypeFromRows(rows, paramName) {
254
+ const values = [];
255
+ for (const row of rows) {
256
+ for (const value of row) {
257
+ values.push(value);
258
+ }
259
+ }
260
+ return inferCategoryType(values, paramName);
261
+ }
262
+ function emptyCategoryVectorFromClasses(classes, paramName) {
263
+ const categoryType = inferCategoryType(classes, paramName);
264
+ if (categoryType === "string") {
265
+ return empty([0], { dtype: "string" });
266
+ }
267
+ if (categoryType === "bigint") {
268
+ return empty([0], { dtype: "int64" });
269
+ }
270
+ return zeros([0], { dtype: "float64" });
271
+ }
272
+ function emptyCategoryMatrixFromCategories(categories, nFeatures, paramName) {
273
+ const categoryType = inferCategoryTypeFromRows(categories, paramName);
274
+ if (categoryType === "string") {
275
+ return empty([0, nFeatures], { dtype: "string" });
276
+ }
277
+ if (categoryType === "bigint") {
278
+ return empty([0, nFeatures], { dtype: "int64" });
279
+ }
280
+ return zeros([0, nFeatures], { dtype: "float64" });
281
+ }
282
+ function toCategoryVectorTensor(values, paramName = "y") {
283
+ const categoryType = inferCategoryType(values, paramName);
284
+ if (categoryType === "string") {
285
+ const out2 = new Array(values.length);
286
+ for (let i = 0; i < values.length; i++) {
287
+ const value = values[i];
288
+ if (typeof value !== "string") {
289
+ throw new DeepboxError("Internal error: expected string category value");
290
+ }
291
+ out2[i] = value;
292
+ }
293
+ return tensor(out2);
294
+ }
295
+ if (categoryType === "bigint") {
296
+ const out2 = new BigInt64Array(values.length);
297
+ for (let i = 0; i < values.length; i++) {
298
+ const value = values[i];
299
+ if (typeof value !== "bigint") {
300
+ throw new DeepboxError("Internal error: expected bigint category value");
301
+ }
302
+ out2[i] = value;
303
+ }
304
+ return tensor(out2);
305
+ }
306
+ const out = new Float64Array(values.length);
307
+ for (let i = 0; i < values.length; i++) {
308
+ const value = values[i];
309
+ if (value === void 0 || typeof value !== "number") {
310
+ throw new DeepboxError("Internal error: expected numeric category value");
311
+ }
312
+ out[i] = value;
313
+ }
314
+ return tensor(out);
315
+ }
316
+ function toCategoryMatrixTensor(values, paramName = "X") {
317
+ const rows = values.length;
318
+ const cols = rows > 0 ? values[0]?.length ?? 0 : 0;
319
+ for (let i = 0; i < rows; i++) {
320
+ const row = values[i];
321
+ if (!row) {
322
+ throw new DeepboxError("Internal error: missing row in category matrix");
323
+ }
324
+ if (row.length !== cols) {
325
+ throw new ShapeError("Ragged category matrix cannot be converted to tensor");
326
+ }
327
+ }
328
+ const flat = [];
329
+ for (const row of values) {
330
+ for (const value of row) {
331
+ flat.push(value);
332
+ }
333
+ }
334
+ const categoryType = inferCategoryType(flat, paramName);
335
+ if (categoryType === "string") {
336
+ const out = new Array(rows);
337
+ for (let i = 0; i < rows; i++) {
338
+ const row = values[i];
339
+ if (!row) {
340
+ throw new DeepboxError("Internal error: missing row in category matrix");
341
+ }
342
+ const outRow = new Array(cols);
343
+ for (let j = 0; j < cols; j++) {
344
+ const value = row[j];
345
+ if (typeof value !== "string") {
346
+ throw new DeepboxError("Internal error: expected string category value");
347
+ }
348
+ outRow[j] = value;
349
+ }
350
+ out[i] = outRow;
351
+ }
352
+ return tensor(out);
353
+ }
354
+ if (categoryType === "number") {
355
+ const out = new Array(rows);
356
+ for (let i = 0; i < rows; i++) {
357
+ const row = values[i];
358
+ if (!row) {
359
+ throw new DeepboxError("Internal error: missing row in category matrix");
360
+ }
361
+ const outRow = new Array(cols);
362
+ for (let j = 0; j < cols; j++) {
363
+ const value = row[j];
364
+ if (typeof value !== "number") {
365
+ throw new DeepboxError("Internal error: expected numeric category value");
366
+ }
367
+ outRow[j] = value;
368
+ }
369
+ out[i] = outRow;
370
+ }
371
+ return tensor(out, { dtype: "float64" });
372
+ }
373
+ const data = new BigInt64Array(rows * cols);
374
+ for (let i = 0; i < flat.length; i++) {
375
+ const value = flat[i];
376
+ if (typeof value !== "bigint") {
377
+ throw new DeepboxError("Internal error: expected bigint category value");
378
+ }
379
+ data[i] = value;
380
+ }
381
+ const { defaultDevice } = getConfig();
382
+ return Tensor.fromTypedArray({
383
+ data,
384
+ shape: [rows, cols],
385
+ dtype: "int64",
386
+ device: defaultDevice
387
+ });
388
+ }
389
+ var LabelEncoder = class {
390
+ /** Indicates whether the encoder has been fitted to data */
391
+ fitted = false;
392
+ /** Array of unique classes found during fitting, sorted for consistency */
393
+ classes_;
394
+ /** Map from class value to encoded integer index for O(1) lookup */
395
+ classToIndex_;
396
+ /**
397
+ * Fit label encoder to a set of labels.
398
+ * Extracts unique classes and creates an index mapping.
399
+ *
400
+ * @param y - Target labels (1D tensor of strings or numbers)
401
+ * @returns this - Returns self for method chaining
402
+ * @throws {InvalidParameterError} If y is empty
403
+ */
404
+ fit(y) {
405
+ assert1D(y, "y");
406
+ if (y.size === 0) {
407
+ throw new InvalidParameterError("Cannot fit LabelEncoder on empty array", "y");
408
+ }
409
+ const uniqueSet = /* @__PURE__ */ new Set();
410
+ for (let i = 0; i < y.size; i++) {
411
+ uniqueSet.add(read1DValue(y, i));
412
+ }
413
+ this.classes_ = sortCategories(uniqueSet, "y");
414
+ this.classToIndex_ = /* @__PURE__ */ new Map();
415
+ for (let i = 0; i < this.classes_.length; i++) {
416
+ this.classToIndex_.set(categoryValueAt(this.classes_, i, "LabelEncoder.fit"), i);
417
+ }
418
+ this.fitted = true;
419
+ return this;
420
+ }
421
+ /**
422
+ * Transform labels to normalized encoding.
423
+ * Each unique label is mapped to an integer in [0, n_classes-1].
424
+ *
425
+ * @param y - Target labels to encode (1D tensor)
426
+ * @returns Encoded labels as integer tensor
427
+ * @throws {NotFittedError} If encoder is not fitted
428
+ * @throws {InvalidParameterError} If y contains labels not seen during fit
429
+ */
430
+ transform(y) {
431
+ if (!this.fitted) {
432
+ throw new NotFittedError("LabelEncoder must be fitted before transform");
433
+ }
434
+ assert1D(y, "y");
435
+ if (y.size === 0) {
436
+ return tensor([]);
437
+ }
438
+ const lookup = this.classToIndex_;
439
+ if (!this.classes_ || !lookup) {
440
+ throw new DeepboxError("LabelEncoder internal error: missing fitted state");
441
+ }
442
+ const result = new Array(y.size);
443
+ for (let i = 0; i < y.size; i++) {
444
+ const val = read1DValue(y, i);
445
+ const idx = lookup.get(val);
446
+ if (idx === void 0) {
447
+ throw new InvalidParameterError(
448
+ `Unknown label: ${String(val)}. Label must be present during fit.`,
449
+ "y",
450
+ val
451
+ );
452
+ }
453
+ result[i] = idx;
454
+ }
455
+ return tensor(result, { dtype: "float64" });
456
+ }
457
+ /**
458
+ * Fit label encoder and return encoded labels in one step.
459
+ * Convenience method equivalent to calling fit(y).transform(y).
460
+ *
461
+ * @param y - Target labels (1D tensor)
462
+ * @returns Encoded labels as integer tensor
463
+ */
464
+ fitTransform(y) {
465
+ return this.fit(y).transform(y);
466
+ }
467
+ /**
468
+ * Transform integer labels back to original encoding.
469
+ * Reverses the encoding performed by transform().
470
+ *
471
+ * @param y - Encoded labels (1D integer tensor)
472
+ * @returns Original labels (strings or numbers)
473
+ * @throws {NotFittedError} If encoder is not fitted
474
+ * @throws {InvalidParameterError} If y contains invalid indices
475
+ */
476
+ inverseTransform(y) {
477
+ if (!this.fitted) {
478
+ throw new NotFittedError("LabelEncoder must be fitted before inverse_transform");
479
+ }
480
+ assert1D(y, "y");
481
+ assertNumericTensor(y, "y");
482
+ const classes = this.classes_;
483
+ if (!classes) {
484
+ throw new DeepboxError("LabelEncoder internal error: missing fitted state");
485
+ }
486
+ if (y.size === 0) {
487
+ return emptyCategoryVectorFromClasses(classes, "y");
488
+ }
489
+ const classesLen = classes.length;
490
+ const result = new Array(y.size);
491
+ const stride = getStride1D(y);
492
+ const data = getNumericData(y);
493
+ for (let i = 0; i < y.size; i++) {
494
+ const raw = data[y.offset + i * stride];
495
+ if (raw === void 0) {
496
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
497
+ }
498
+ const idx = Number(raw);
499
+ if (idx < 0 || idx >= classesLen || !Number.isInteger(idx)) {
500
+ throw new InvalidParameterError(
501
+ `Invalid label index: ${idx}. Must be integer in [0, ${classesLen - 1}]`,
502
+ "y",
503
+ idx
504
+ );
505
+ }
506
+ result[i] = categoryValueAt(classes, idx, "LabelEncoder.inverseTransform");
507
+ }
508
+ return toCategoryVectorTensor(result, "y");
509
+ }
510
+ };
511
+ var OneHotEncoder = class {
512
+ /** Indicates whether the encoder has been fitted to data */
513
+ fitted = false;
514
+ /** Array of unique categories for each feature */
515
+ categories_;
516
+ /** Maps from category value to index for each feature (for O(1) lookup) */
517
+ categoryToIndex_;
518
+ /** Whether to return sparse matrix (CSR) or dense array */
519
+ sparse;
520
+ /** How to handle unknown categories during transform */
521
+ handleUnknown;
522
+ /** Drop policy to avoid collinearity */
523
+ drop;
524
+ /** Per-feature dropped category index */
525
+ dropIndices_;
526
+ /** Categories configuration */
527
+ categoriesOption;
528
+ /**
529
+ * Creates a new OneHotEncoder instance.
530
+ *
531
+ * @param options - Configuration options
532
+ * @param options.sparse - If true, returns CSRMatrix; if false, returns dense Tensor (default: false)
533
+ * @param options.sparseOutput - Alias for sparse (default: false)
534
+ * @param options.handleUnknown - How to handle unknown categories (default: "error")
535
+ * @param options.drop - If set, drops the first or binary category per feature
536
+ * @param options.categories - "auto" or explicit category list per feature
537
+ */
538
+ constructor(options = {}) {
539
+ const sparseOption = options.sparse ?? options.sparseOutput ?? false;
540
+ if (options.sparse !== void 0 && options.sparseOutput !== void 0) {
541
+ if (options.sparse !== options.sparseOutput) {
542
+ throw new InvalidParameterError(
543
+ "sparse and sparseOutput must match when both are provided",
544
+ "sparse",
545
+ options.sparse
546
+ );
547
+ }
548
+ }
549
+ this.sparse = sparseOption;
550
+ this.handleUnknown = options.handleUnknown ?? "error";
551
+ this.drop = options.drop ?? null;
552
+ this.categoriesOption = options.categories ?? "auto";
553
+ if (typeof this.sparse !== "boolean") {
554
+ throw new InvalidParameterError("sparse must be a boolean", "sparse", this.sparse);
555
+ }
556
+ if (this.handleUnknown !== "error" && this.handleUnknown !== "ignore") {
557
+ throw new InvalidParameterError(
558
+ "handleUnknown must be 'error' or 'ignore'",
559
+ "handleUnknown",
560
+ this.handleUnknown
561
+ );
562
+ }
563
+ if (this.drop !== null && this.drop !== "first" && this.drop !== "if_binary") {
564
+ throw new InvalidParameterError(
565
+ "drop must be 'first', 'if_binary', or null",
566
+ "drop",
567
+ this.drop
568
+ );
569
+ }
570
+ }
571
+ /**
572
+ * Fit OneHotEncoder to X.
573
+ * Learns the unique categories for each feature.
574
+ *
575
+ * @param X - Training data (2D tensor of categorical features)
576
+ * @returns this - Returns self for method chaining
577
+ * @throws {ShapeError} If X is not a 2D tensor
578
+ * @throws {InvalidParameterError} If X is empty
579
+ */
580
+ fit(X) {
581
+ assert2D(X, "X");
582
+ const [nSamples, nFeatures] = getShape2D(X);
583
+ if (nSamples === 0 || nFeatures === 0) {
584
+ throw new InvalidParameterError("Cannot fit OneHotEncoder on empty array", "X");
585
+ }
586
+ this.categories_ = [];
587
+ this.categoryToIndex_ = [];
588
+ const explicitCategories = resolveCategoriesOption(
589
+ this.categoriesOption,
590
+ nFeatures,
591
+ "categories"
592
+ );
593
+ for (let j = 0; j < nFeatures; j++) {
594
+ let cats;
595
+ if (explicitCategories) {
596
+ const featureCats = explicitCategories[j];
597
+ if (!featureCats) {
598
+ throw new InvalidParameterError("Missing categories for feature", "categories", j);
599
+ }
600
+ if (!Array.isArray(featureCats)) {
601
+ throw new InvalidParameterError(
602
+ "categories must be an array of category arrays",
603
+ "categories",
604
+ featureCats
605
+ );
606
+ }
607
+ cats = validateCategoryValues(featureCats, "categories");
608
+ } else {
609
+ const uniqueSet = /* @__PURE__ */ new Set();
610
+ for (let i = 0; i < nSamples; i++) {
611
+ uniqueSet.add(read2DValue(X, i, j));
612
+ }
613
+ cats = sortCategories(uniqueSet, "X");
614
+ }
615
+ if (cats.length === 0) {
616
+ throw new InvalidParameterError("Each feature must have at least one category", "X", j);
617
+ }
618
+ this.categories_.push(cats);
619
+ const map = /* @__PURE__ */ new Map();
620
+ for (let k = 0; k < cats.length; k++) {
621
+ map.set(categoryValueAt(cats, k, "OneHotEncoder.fit"), k);
622
+ }
623
+ this.categoryToIndex_.push(map);
624
+ if (explicitCategories) {
625
+ for (let i = 0; i < nSamples; i++) {
626
+ const val = read2DValue(X, i, j);
627
+ if (!map.has(val)) {
628
+ throw new InvalidParameterError(
629
+ `Unknown category: ${String(val)} in feature ${j}`,
630
+ "X",
631
+ val
632
+ );
633
+ }
634
+ }
635
+ }
636
+ }
637
+ this.dropIndices_ = this.categories_.map((cats) => {
638
+ if (this.drop === null) return null;
639
+ if (this.drop === "first") return cats.length > 0 ? 0 : null;
640
+ if (this.drop === "if_binary") return cats.length === 2 ? 0 : null;
641
+ return null;
642
+ });
643
+ this.fitted = true;
644
+ return this;
645
+ }
646
+ /**
647
+ * Transform X using one-hot encoding.
648
+ * Each categorical value is converted to a binary vector.
649
+ *
650
+ * @param X - Data to transform (2D tensor)
651
+ * @returns Encoded data as dense Tensor or sparse CSRMatrix
652
+ * @throws {NotFittedError} If encoder is not fitted
653
+ * @throws {InvalidParameterError} If X contains unknown categories
654
+ */
655
+ transform(X) {
656
+ if (!this.fitted) {
657
+ throw new NotFittedError("OneHotEncoder must be fitted before transform");
658
+ }
659
+ assert2D(X, "X");
660
+ const [nSamples, nFeatures] = getShape2D(X);
661
+ const categories = this.categories_;
662
+ const categoryMaps = this.categoryToIndex_;
663
+ if (!categories || !categoryMaps) {
664
+ throw new DeepboxError("OneHotEncoder internal error: missing fitted state");
665
+ }
666
+ const fittedFeatures = categories.length;
667
+ if (nFeatures !== fittedFeatures) {
668
+ throw new InvalidParameterError(
669
+ "X has a different feature count than during fit",
670
+ "X",
671
+ nFeatures
672
+ );
673
+ }
674
+ const dropIndices = this.dropIndices_ ?? categories.map(() => null);
675
+ let totalCols = 0;
676
+ for (let j = 0; j < categories.length; j++) {
677
+ const cats = categories[j];
678
+ if (!cats) continue;
679
+ const dropIndex = dropIndices[j] ?? null;
680
+ totalCols += cats.length - (dropIndex === null ? 0 : 1);
681
+ }
682
+ if (nSamples === 0 || nFeatures === 0) {
683
+ return this.sparse ? CSRMatrix.fromCOO({
684
+ rows: 0,
685
+ cols: totalCols,
686
+ rowIndices: new Int32Array(0),
687
+ colIndices: new Int32Array(0),
688
+ values: new Float64Array(0)
689
+ }) : zeros([0, totalCols], { dtype: "float64" });
690
+ }
691
+ if (this.sparse) {
692
+ const rowIdx = [];
693
+ const colIdx = [];
694
+ const vals = [];
695
+ for (let i = 0; i < nSamples; i++) {
696
+ let colOffset = 0;
697
+ for (let j = 0; j < nFeatures; j++) {
698
+ const cats = categories[j];
699
+ const map = categoryMaps[j];
700
+ const dropIndex = dropIndices[j] ?? null;
701
+ if (!cats || !map) {
702
+ throw new DeepboxError("OneHotEncoder internal error: missing fitted categories");
703
+ }
704
+ const outSize = cats.length - (dropIndex === null ? 0 : 1);
705
+ const val = read2DValue(X, i, j);
706
+ const idx = map.get(val);
707
+ if (idx === void 0) {
708
+ if (this.handleUnknown === "ignore") {
709
+ colOffset += outSize;
710
+ continue;
711
+ }
712
+ throw new InvalidParameterError(`Unknown category: ${String(val)}`, "X", val);
713
+ }
714
+ if (dropIndex !== null && idx === dropIndex) {
715
+ colOffset += outSize;
716
+ continue;
717
+ }
718
+ const adjusted = dropIndex !== null && idx > dropIndex ? idx - 1 : idx;
719
+ rowIdx.push(i);
720
+ colIdx.push(colOffset + adjusted);
721
+ vals.push(1);
722
+ colOffset += outSize;
723
+ }
724
+ }
725
+ return CSRMatrix.fromCOO({
726
+ rows: nSamples,
727
+ cols: totalCols,
728
+ rowIndices: Int32Array.from(rowIdx),
729
+ colIndices: Int32Array.from(colIdx),
730
+ values: Float64Array.from(vals)
731
+ });
732
+ }
733
+ const result = Array.from({ length: nSamples }, () => new Array(totalCols).fill(0));
734
+ for (let i = 0; i < nSamples; i++) {
735
+ let colOffset = 0;
736
+ for (let j = 0; j < nFeatures; j++) {
737
+ const cats = categories[j];
738
+ const map = categoryMaps[j];
739
+ const dropIndex = dropIndices[j] ?? null;
740
+ if (!cats || !map) {
741
+ throw new DeepboxError("OneHotEncoder internal error: missing fitted categories");
742
+ }
743
+ const outSize = cats.length - (dropIndex === null ? 0 : 1);
744
+ const val = read2DValue(X, i, j);
745
+ const idx = map.get(val);
746
+ if (idx === void 0) {
747
+ if (this.handleUnknown === "ignore") {
748
+ colOffset += outSize;
749
+ continue;
750
+ }
751
+ throw new InvalidParameterError(`Unknown category: ${String(val)}`, "X", val);
752
+ }
753
+ if (dropIndex !== null && idx === dropIndex) {
754
+ colOffset += outSize;
755
+ continue;
756
+ }
757
+ const row = result[i];
758
+ if (row === void 0) {
759
+ throw new DeepboxError("Internal error: result row access failed");
760
+ }
761
+ const adjusted = dropIndex !== null && idx > dropIndex ? idx - 1 : idx;
762
+ row[colOffset + adjusted] = 1;
763
+ colOffset += outSize;
764
+ }
765
+ }
766
+ return tensor(result, { dtype: "float64", device: X.device });
767
+ }
768
+ fitTransform(X) {
769
+ return this.fit(X).transform(X);
770
+ }
771
+ inverseTransform(X) {
772
+ if (!this.fitted) {
773
+ throw new NotFittedError("OneHotEncoder must be fitted before inverse_transform");
774
+ }
775
+ const dense = X instanceof CSRMatrix ? X.toDense() : X;
776
+ assert2D(dense, "X");
777
+ assertNumericTensor(dense, "X");
778
+ const [nSamples, nCols] = getShape2D(dense);
779
+ const categories = this.categories_;
780
+ if (!categories) {
781
+ throw new DeepboxError("OneHotEncoder internal error: missing fitted categories");
782
+ }
783
+ const nFeatures = categories.length;
784
+ const dropIndices = this.dropIndices_ ?? categories.map(() => null);
785
+ const totalCols = categories.reduce((sum, cats, idx) => {
786
+ const dropIndex = dropIndices[idx] ?? null;
787
+ return sum + cats.length - (dropIndex === null ? 0 : 1);
788
+ }, 0);
789
+ if (nCols !== totalCols) {
790
+ throw new InvalidParameterError("column count does not match fitted categories", "X", nCols);
791
+ }
792
+ if (nSamples === 0) {
793
+ return emptyCategoryMatrixFromCategories(categories, nFeatures, "X");
794
+ }
795
+ const result = new Array(nSamples);
796
+ for (let i = 0; i < nSamples; i++) {
797
+ result[i] = new Array(nFeatures);
798
+ }
799
+ const denseData = getNumericData(dense);
800
+ const [stride0, stride1] = getStrides2D(dense);
801
+ for (let i = 0; i < nSamples; i++) {
802
+ let colOffset = 0;
803
+ for (let j = 0; j < nFeatures; j++) {
804
+ const cats = categories[j];
805
+ const dropIndex = dropIndices[j] ?? null;
806
+ if (!cats) {
807
+ throw new DeepboxError("OneHotEncoder internal error: missing fitted categories");
808
+ }
809
+ const outSize = cats.length - (dropIndex === null ? 0 : 1);
810
+ if (outSize === 0) {
811
+ const row2 = result[i];
812
+ if (!row2) {
813
+ throw new DeepboxError("Internal error: result row access failed");
814
+ }
815
+ row2[j] = categoryValueAt(cats, dropIndex ?? 0, "OneHotEncoder.inverseTransform");
816
+ continue;
817
+ }
818
+ let maxIdx = 0;
819
+ const rowBase = dense.offset + i * stride0 + colOffset * stride1;
820
+ const first = denseData[rowBase];
821
+ if (first === void 0) {
822
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
823
+ }
824
+ let maxVal = Number(first);
825
+ let hasPositive = maxVal > 0;
826
+ for (let k = 1; k < outSize; k++) {
827
+ const raw = denseData[rowBase + k * stride1];
828
+ if (raw === void 0) {
829
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
830
+ }
831
+ const val = Number(raw);
832
+ if (val > maxVal) {
833
+ maxVal = val;
834
+ maxIdx = k;
835
+ }
836
+ if (val > 0) {
837
+ hasPositive = true;
838
+ }
839
+ }
840
+ const row = result[i];
841
+ if (row === void 0) {
842
+ throw new DeepboxError("Internal error: result row access failed");
843
+ }
844
+ if (!hasPositive) {
845
+ if (dropIndex !== null) {
846
+ row[j] = categoryValueAt(cats, dropIndex, "OneHotEncoder.inverseTransform");
847
+ } else if (this.handleUnknown === "ignore") {
848
+ throw new InvalidParameterError(
849
+ "Cannot inverse-transform: sample contains no active category (all zeros). This may happen if unknown categories were ignored during transform.",
850
+ "X"
851
+ );
852
+ } else {
853
+ throw new InvalidParameterError("Invalid one-hot encoding: all zeros", "X");
854
+ }
855
+ } else {
856
+ const actualIdx = dropIndex !== null && maxIdx >= dropIndex ? maxIdx + 1 : maxIdx;
857
+ row[j] = categoryValueAt(cats, actualIdx, "OneHotEncoder.inverseTransform");
858
+ }
859
+ colOffset += outSize;
860
+ }
861
+ }
862
+ return toCategoryMatrixTensor(result, "X");
863
+ }
864
+ };
865
+ var OrdinalEncoder = class {
866
+ /** Indicates whether the encoder has been fitted to data */
867
+ fitted = false;
868
+ /** Array of unique categories for each feature, sorted */
869
+ categories_;
870
+ /** Maps from category value to index for each feature (for O(1) lookup) */
871
+ categoryToIndex_;
872
+ /** How to handle unknown categories during transform */
873
+ handleUnknown;
874
+ /** Value used for unknown categories when handleUnknown = "useEncodedValue" */
875
+ unknownValue;
876
+ /** Categories configuration */
877
+ categoriesOption;
878
+ /**
879
+ * Creates a new OrdinalEncoder instance.
880
+ *
881
+ * @param options - Configuration options
882
+ * @param options.handleUnknown - How to handle unknown categories
883
+ * @param options.unknownValue - Encoded value for unknown categories when handleUnknown="useEncodedValue"
884
+ * @param options.categories - "auto" or explicit categories per feature
885
+ */
886
+ constructor(options = {}) {
887
+ this.handleUnknown = options.handleUnknown ?? "error";
888
+ this.unknownValue = options.unknownValue ?? -1;
889
+ this.categoriesOption = options.categories ?? "auto";
890
+ if (this.handleUnknown !== "error" && this.handleUnknown !== "useEncodedValue") {
891
+ throw new InvalidParameterError(
892
+ "handleUnknown must be 'error' or 'useEncodedValue'",
893
+ "handleUnknown",
894
+ this.handleUnknown
895
+ );
896
+ }
897
+ if (!Number.isFinite(this.unknownValue) && !Number.isNaN(this.unknownValue)) {
898
+ throw new InvalidParameterError(
899
+ "unknownValue must be a finite number or NaN",
900
+ "unknownValue",
901
+ this.unknownValue
902
+ );
903
+ }
904
+ if (Number.isFinite(this.unknownValue) && !Number.isInteger(this.unknownValue)) {
905
+ throw new InvalidParameterError(
906
+ "unknownValue must be an integer when finite",
907
+ "unknownValue",
908
+ this.unknownValue
909
+ );
910
+ }
911
+ }
912
+ /**
913
+ * Fit OrdinalEncoder to X.
914
+ * Learns the unique categories for each feature and their ordering.
915
+ *
916
+ * @param X - Training data (2D tensor of categorical features)
917
+ * @returns this - Returns self for method chaining
918
+ * @throws {InvalidParameterError} If X is empty
919
+ */
920
+ fit(X) {
921
+ assert2D(X, "X");
922
+ const [nSamples, nFeatures] = getShape2D(X);
923
+ if (nSamples === 0) {
924
+ throw new InvalidParameterError("Cannot fit OrdinalEncoder on empty array", "X");
925
+ }
926
+ this.categories_ = [];
927
+ this.categoryToIndex_ = [];
928
+ const explicitCategories = resolveCategoriesOption(
929
+ this.categoriesOption,
930
+ nFeatures,
931
+ "categories"
932
+ );
933
+ for (let j = 0; j < nFeatures; j++) {
934
+ let sorted;
935
+ if (explicitCategories) {
936
+ const featureCats = explicitCategories[j];
937
+ if (!featureCats) {
938
+ throw new InvalidParameterError("Missing categories for feature", "categories", j);
939
+ }
940
+ if (!Array.isArray(featureCats)) {
941
+ throw new InvalidParameterError(
942
+ "categories must be an array of category arrays",
943
+ "categories",
944
+ featureCats
945
+ );
946
+ }
947
+ sorted = validateCategoryValues(featureCats, "categories");
948
+ } else {
949
+ const uniqueSet = /* @__PURE__ */ new Set();
950
+ for (let i = 0; i < nSamples; i++) {
951
+ uniqueSet.add(read2DValue(X, i, j));
952
+ }
953
+ sorted = sortCategories(uniqueSet, "X");
954
+ }
955
+ if (sorted.length === 0) {
956
+ throw new InvalidParameterError("Each feature must have at least one category", "X", j);
957
+ }
958
+ this.categories_.push(sorted);
959
+ const map = /* @__PURE__ */ new Map();
960
+ for (let k = 0; k < sorted.length; k++) {
961
+ map.set(categoryValueAt(sorted, k, "OrdinalEncoder.fit"), k);
962
+ }
963
+ this.categoryToIndex_.push(map);
964
+ if (explicitCategories) {
965
+ for (let i = 0; i < nSamples; i++) {
966
+ const val = read2DValue(X, i, j);
967
+ if (!map.has(val)) {
968
+ throw new InvalidParameterError(
969
+ `Unknown category: ${String(val)} in feature ${j}`,
970
+ "X",
971
+ val
972
+ );
973
+ }
974
+ }
975
+ }
976
+ if (this.handleUnknown === "useEncodedValue") {
977
+ if (Number.isFinite(this.unknownValue) && this.unknownValue >= 0 && this.unknownValue < sorted.length) {
978
+ throw new InvalidParameterError(
979
+ "unknownValue must be outside the range of encoded categories",
980
+ "unknownValue",
981
+ this.unknownValue
982
+ );
983
+ }
984
+ }
985
+ }
986
+ this.fitted = true;
987
+ return this;
988
+ }
989
+ /**
990
+ * Transform X using ordinal encoding.
991
+ * Each category is mapped to its index in the sorted categories array.
992
+ *
993
+ * @param X - Data to transform (2D tensor)
994
+ * @returns Encoded data with integer values
995
+ * @throws {NotFittedError} If encoder is not fitted
996
+ * @throws {InvalidParameterError} If X contains unknown categories
997
+ */
998
+ transform(X) {
999
+ if (!this.fitted) {
1000
+ throw new NotFittedError("OrdinalEncoder must be fitted before transform");
1001
+ }
1002
+ assert2D(X, "X");
1003
+ const [nSamples, nFeatures] = getShape2D(X);
1004
+ const fittedFeatures = this.categories_?.length ?? 0;
1005
+ if (nFeatures !== fittedFeatures) {
1006
+ throw new InvalidParameterError(
1007
+ "X has a different feature count than during fit",
1008
+ "X",
1009
+ nFeatures
1010
+ );
1011
+ }
1012
+ if (nSamples === 0) {
1013
+ return zeros([0, nFeatures], { dtype: "float64" });
1014
+ }
1015
+ const result = new Array(nSamples);
1016
+ for (let i = 0; i < nSamples; i++) {
1017
+ result[i] = new Array(nFeatures);
1018
+ }
1019
+ for (let i = 0; i < nSamples; i++) {
1020
+ for (let j = 0; j < nFeatures; j++) {
1021
+ const val = read2DValue(X, i, j);
1022
+ const map = this.categoryToIndex_?.[j];
1023
+ if (!map) {
1024
+ throw new DeepboxError("OrdinalEncoder internal error: missing fitted categories");
1025
+ }
1026
+ const idx = map.get(val);
1027
+ const row = result[i];
1028
+ if (!row) {
1029
+ throw new DeepboxError("Internal error: result row access failed");
1030
+ }
1031
+ if (idx === void 0) {
1032
+ if (this.handleUnknown === "useEncodedValue") {
1033
+ row[j] = this.unknownValue;
1034
+ continue;
1035
+ }
1036
+ throw new InvalidParameterError(
1037
+ `Unknown category: ${String(val)} in feature ${j}`,
1038
+ "X",
1039
+ val
1040
+ );
1041
+ }
1042
+ row[j] = idx;
1043
+ }
1044
+ }
1045
+ return tensor(result, { dtype: "float64" });
1046
+ }
1047
+ /**
1048
+ * Fit encoder and transform X in one step.
1049
+ * Convenience method equivalent to calling fit(X).transform(X).
1050
+ *
1051
+ * @param X - Training data (2D tensor)
1052
+ * @returns Encoded data
1053
+ */
1054
+ fitTransform(X) {
1055
+ return this.fit(X).transform(X);
1056
+ }
1057
+ /**
1058
+ * Transform ordinal integers back to original categories.
1059
+ * Reverses the encoding performed by transform().
1060
+ *
1061
+ * @param X - Encoded data (2D integer tensor)
1062
+ * @returns Original categorical data
1063
+ * @throws {NotFittedError} If encoder is not fitted
1064
+ * @throws {InvalidParameterError} If X contains invalid indices
1065
+ */
1066
+ inverseTransform(X) {
1067
+ if (!this.fitted) {
1068
+ throw new NotFittedError("OrdinalEncoder must be fitted before inverse_transform");
1069
+ }
1070
+ assert2D(X, "X");
1071
+ assertNumericTensor(X, "X");
1072
+ const [nSamples, nFeatures] = getShape2D(X);
1073
+ const fittedFeatures = this.categories_?.length ?? 0;
1074
+ if (nFeatures !== fittedFeatures) {
1075
+ throw new InvalidParameterError(
1076
+ "X has a different feature count than during fit",
1077
+ "X",
1078
+ nFeatures
1079
+ );
1080
+ }
1081
+ if (nSamples === 0 || nFeatures === 0) {
1082
+ const categoryRows = this.categories_ ?? [];
1083
+ const categoryType = inferCategoryTypeFromRows(categoryRows, "X");
1084
+ if (categoryType === "string") {
1085
+ return empty([0, nFeatures], { dtype: "string" });
1086
+ }
1087
+ if (categoryType === "bigint") {
1088
+ return empty([0, nFeatures], { dtype: "int64" });
1089
+ }
1090
+ return zeros([0, nFeatures], { dtype: "float64" });
1091
+ }
1092
+ const result = new Array(nSamples);
1093
+ for (let i = 0; i < nSamples; i++) {
1094
+ result[i] = new Array(nFeatures);
1095
+ }
1096
+ const [stride0, stride1] = getStrides2D(X);
1097
+ const data = getNumericData(X);
1098
+ for (let i = 0; i < nSamples; i++) {
1099
+ for (let j = 0; j < nFeatures; j++) {
1100
+ const raw = data[X.offset + i * stride0 + j * stride1];
1101
+ if (raw === void 0) {
1102
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1103
+ }
1104
+ const idx = Number(raw);
1105
+ const isUnknownValue = this.handleUnknown === "useEncodedValue" && (Number.isNaN(idx) ? Number.isNaN(this.unknownValue) : idx === this.unknownValue);
1106
+ if (isUnknownValue) {
1107
+ throw new InvalidParameterError(
1108
+ "Cannot inverse-transform unknown encoded value",
1109
+ "X",
1110
+ idx
1111
+ );
1112
+ }
1113
+ const cats = this.categories_?.[j];
1114
+ if (!cats || idx < 0 || idx >= cats.length || !Number.isInteger(idx)) {
1115
+ throw new InvalidParameterError(
1116
+ `Invalid encoded value: ${idx} for feature ${j}. Must be integer in [0, ${(cats?.length ?? 0) - 1}]`,
1117
+ "X",
1118
+ idx
1119
+ );
1120
+ }
1121
+ const row = result[i];
1122
+ if (!row) {
1123
+ throw new DeepboxError("Internal error: result row access failed");
1124
+ }
1125
+ const catVal = cats[idx];
1126
+ if (catVal === void 0) {
1127
+ throw new DeepboxError("Internal error: category value missing");
1128
+ }
1129
+ row[j] = catVal;
1130
+ }
1131
+ }
1132
+ return toCategoryMatrixTensor(result, "X");
1133
+ }
1134
+ };
1135
+ var LabelBinarizer = class {
1136
+ /** Indicates whether the binarizer has been fitted to data */
1137
+ fitted = false;
1138
+ /** Array of unique classes found during fitting, sorted */
1139
+ classes_;
1140
+ /** Map from class value to index for O(1) lookups */
1141
+ classToIndex_;
1142
+ /** Value used for positive class */
1143
+ posLabel;
1144
+ /** Value used for negative class */
1145
+ negLabel;
1146
+ /** Whether to return sparse matrix output */
1147
+ sparse;
1148
+ /**
1149
+ * Creates a new LabelBinarizer instance.
1150
+ *
1151
+ * @param options - Configuration options
1152
+ * @param options.posLabel - Value for positive class (default: 1)
1153
+ * @param options.negLabel - Value for negative class (default: 0)
1154
+ * @param options.sparse - If true, returns CSRMatrix (default: false)
1155
+ * @param options.sparseOutput - Alias for sparse (default: false)
1156
+ */
1157
+ constructor(options = {}) {
1158
+ this.posLabel = options.posLabel ?? 1;
1159
+ this.negLabel = options.negLabel ?? 0;
1160
+ const sparseOption = options.sparse ?? options.sparseOutput ?? false;
1161
+ if (!Number.isFinite(this.posLabel) || !Number.isFinite(this.negLabel)) {
1162
+ throw new InvalidParameterError("posLabel and negLabel must be finite numbers", "posLabel");
1163
+ }
1164
+ if (this.posLabel <= this.negLabel) {
1165
+ throw new InvalidParameterError(
1166
+ "posLabel must be greater than negLabel",
1167
+ "posLabel",
1168
+ this.posLabel
1169
+ );
1170
+ }
1171
+ if (options.sparse !== void 0 && options.sparseOutput !== void 0) {
1172
+ if (options.sparse !== options.sparseOutput) {
1173
+ throw new InvalidParameterError(
1174
+ "sparse and sparseOutput must match when both are provided",
1175
+ "sparse",
1176
+ options.sparse
1177
+ );
1178
+ }
1179
+ }
1180
+ if (typeof sparseOption !== "boolean") {
1181
+ throw new InvalidParameterError("sparse must be a boolean", "sparse", sparseOption);
1182
+ }
1183
+ if (sparseOption && this.negLabel !== 0) {
1184
+ throw new InvalidParameterError(
1185
+ "sparse output requires negLabel to be 0",
1186
+ "negLabel",
1187
+ this.negLabel
1188
+ );
1189
+ }
1190
+ this.sparse = sparseOption;
1191
+ }
1192
+ /**
1193
+ * Fit label binarizer to a set of labels.
1194
+ * Learns the unique classes present in the data.
1195
+ *
1196
+ * @param y - Target labels (1D tensor)
1197
+ * @returns this - Returns self for method chaining
1198
+ * @throws {InvalidParameterError} If y is empty
1199
+ */
1200
+ fit(y) {
1201
+ assert1D(y, "y");
1202
+ if (y.size === 0) {
1203
+ throw new InvalidParameterError("Cannot fit LabelBinarizer on empty array", "y");
1204
+ }
1205
+ const uniqueSet = /* @__PURE__ */ new Set();
1206
+ for (let i = 0; i < y.size; i++) {
1207
+ uniqueSet.add(read1DValue(y, i));
1208
+ }
1209
+ this.classes_ = sortCategories(uniqueSet, "y");
1210
+ this.classToIndex_ = /* @__PURE__ */ new Map();
1211
+ for (let i = 0; i < this.classes_.length; i++) {
1212
+ this.classToIndex_.set(categoryValueAt(this.classes_, i, "LabelBinarizer.fit"), i);
1213
+ }
1214
+ this.fitted = true;
1215
+ return this;
1216
+ }
1217
+ /**
1218
+ * Transform labels to binary matrix.
1219
+ * Each label is converted to a binary vector with a single 1.
1220
+ *
1221
+ * @param y - Labels to transform (1D tensor)
1222
+ * @returns Binary matrix (Tensor or CSRMatrix) with shape [n_samples, n_classes]
1223
+ * @throws {NotFittedError} If binarizer is not fitted
1224
+ * @throws {InvalidParameterError} If y contains unknown labels
1225
+ */
1226
+ transform(y) {
1227
+ if (!this.fitted) {
1228
+ throw new NotFittedError("LabelBinarizer must be fitted before transform");
1229
+ }
1230
+ assert1D(y, "y");
1231
+ if (y.size === 0) {
1232
+ const nClasses2 = this.classes_?.length ?? 0;
1233
+ return this.sparse ? CSRMatrix.fromCOO({
1234
+ rows: 0,
1235
+ cols: nClasses2,
1236
+ rowIndices: new Int32Array(0),
1237
+ colIndices: new Int32Array(0),
1238
+ values: new Float64Array(0)
1239
+ }) : zeros([0, nClasses2], { dtype: "float64" });
1240
+ }
1241
+ const nSamples = y.size;
1242
+ const nClasses = this.classes_?.length ?? 0;
1243
+ const lookup = this.classToIndex_;
1244
+ if (!lookup) {
1245
+ throw new DeepboxError("LabelBinarizer internal error: missing fitted lookup");
1246
+ }
1247
+ if (this.sparse) {
1248
+ const rowIdx = [];
1249
+ const colIdx = [];
1250
+ const vals = [];
1251
+ for (let i = 0; i < nSamples; i++) {
1252
+ const val = read1DValue(y, i);
1253
+ const idx = lookup.get(val);
1254
+ if (idx === void 0) {
1255
+ throw new InvalidParameterError(
1256
+ `Unknown label: ${String(val)}. Label must be present during fit.`,
1257
+ "y",
1258
+ val
1259
+ );
1260
+ }
1261
+ rowIdx.push(i);
1262
+ colIdx.push(idx);
1263
+ vals.push(this.posLabel);
1264
+ }
1265
+ return CSRMatrix.fromCOO({
1266
+ rows: nSamples,
1267
+ cols: nClasses,
1268
+ rowIndices: Int32Array.from(rowIdx),
1269
+ colIndices: Int32Array.from(colIdx),
1270
+ values: Float64Array.from(vals)
1271
+ });
1272
+ }
1273
+ const result = new Array(nSamples);
1274
+ for (let i = 0; i < nSamples; i++) {
1275
+ result[i] = new Array(nClasses).fill(this.negLabel);
1276
+ }
1277
+ for (let i = 0; i < nSamples; i++) {
1278
+ const val = read1DValue(y, i);
1279
+ const idx = lookup.get(val);
1280
+ if (idx === void 0) {
1281
+ throw new InvalidParameterError(
1282
+ `Unknown label: ${String(val)}. Label must be present during fit.`,
1283
+ "y",
1284
+ val
1285
+ );
1286
+ }
1287
+ const row = result[i];
1288
+ if (!row) {
1289
+ throw new DeepboxError("Internal error: result row access failed");
1290
+ }
1291
+ row[idx] = this.posLabel;
1292
+ }
1293
+ return tensor(result, { dtype: "float64" });
1294
+ }
1295
+ /**
1296
+ * Fit binarizer and transform labels in one step.
1297
+ * Convenience method equivalent to calling fit(y).transform(y).
1298
+ *
1299
+ * @param y - Target labels (1D tensor)
1300
+ * @returns Binary matrix (Tensor or CSRMatrix)
1301
+ */
1302
+ fitTransform(y) {
1303
+ return this.fit(y).transform(y);
1304
+ }
1305
+ /**
1306
+ * Transform binary matrix back to labels.
1307
+ * Finds the column with maximum value for each row.
1308
+ *
1309
+ * @param Y - Binary matrix (2D tensor or CSRMatrix)
1310
+ * @returns Original labels (1D tensor)
1311
+ * @throws {NotFittedError} If binarizer is not fitted
1312
+ * @throws {InvalidParameterError} If Y has invalid shape
1313
+ */
1314
+ inverseTransform(Y) {
1315
+ if (!this.fitted) {
1316
+ throw new NotFittedError("LabelBinarizer must be fitted before inverse_transform");
1317
+ }
1318
+ if (Y instanceof CSRMatrix) {
1319
+ if (this.negLabel !== 0) {
1320
+ throw new InvalidParameterError(
1321
+ "Sparse inverse transform requires negLabel to be 0",
1322
+ "negLabel",
1323
+ this.negLabel
1324
+ );
1325
+ }
1326
+ const [rows, cols] = Y.shape;
1327
+ if (rows === void 0 || cols === void 0) {
1328
+ throw new ShapeError("Y must have valid shape");
1329
+ }
1330
+ const nClasses2 = this.classes_?.length ?? 0;
1331
+ if (cols !== nClasses2) {
1332
+ throw new InvalidParameterError("column count does not match number of classes", "Y", cols);
1333
+ }
1334
+ const classes2 = this.classes_;
1335
+ if (!classes2) {
1336
+ throw new DeepboxError("LabelBinarizer internal error: missing fitted classes");
1337
+ }
1338
+ if (rows === 0) {
1339
+ return emptyCategoryVectorFromClasses(classes2, "y");
1340
+ }
1341
+ const result2 = new Array(rows);
1342
+ for (let i = 0; i < rows; i++) {
1343
+ let maxIdx = 0;
1344
+ let maxVal = this.negLabel;
1345
+ const start = Y.indptr[i] ?? 0;
1346
+ const end = Y.indptr[i + 1] ?? start;
1347
+ for (let p = start; p < end; p++) {
1348
+ const col = Y.indices[p];
1349
+ if (col === void 0) {
1350
+ throw new DeepboxError("Internal error: sparse column index missing");
1351
+ }
1352
+ if (col < 0 || col >= nClasses2) {
1353
+ throw new InvalidParameterError(
1354
+ "column index out of bounds for fitted classes",
1355
+ "Y",
1356
+ col
1357
+ );
1358
+ }
1359
+ const raw = Y.data[p];
1360
+ if (raw === void 0) {
1361
+ throw new DeepboxError("Internal error: sparse value missing");
1362
+ }
1363
+ const val = Number(raw);
1364
+ if (val > maxVal) {
1365
+ maxVal = val;
1366
+ maxIdx = col;
1367
+ }
1368
+ }
1369
+ if (maxVal <= this.negLabel) {
1370
+ throw new InvalidParameterError(
1371
+ `No active label found for sample ${i}. LabelBinarizer expects exactly one active label.`,
1372
+ "Y"
1373
+ );
1374
+ }
1375
+ result2[i] = categoryValueAt(classes2, maxIdx, "LabelBinarizer.inverseTransform");
1376
+ }
1377
+ return toCategoryVectorTensor(result2, "y");
1378
+ }
1379
+ assert2D(Y, "Y");
1380
+ assertNumericTensor(Y, "Y");
1381
+ const [nSamples, nCols] = getShape2D(Y);
1382
+ const nClasses = this.classes_?.length ?? 0;
1383
+ if (nCols !== nClasses) {
1384
+ throw new InvalidParameterError("column count does not match number of classes", "Y", nCols);
1385
+ }
1386
+ const classes = this.classes_;
1387
+ if (!classes) {
1388
+ throw new DeepboxError("LabelBinarizer internal error: missing fitted classes");
1389
+ }
1390
+ if (nSamples === 0) {
1391
+ return emptyCategoryVectorFromClasses(classes, "y");
1392
+ }
1393
+ const result = new Array(nSamples);
1394
+ const [stride0, stride1] = getStrides2D(Y);
1395
+ const data = getNumericData(Y);
1396
+ for (let i = 0; i < nSamples; i++) {
1397
+ let maxIdx = 0;
1398
+ const rowBase = Y.offset + i * stride0;
1399
+ const first = data[rowBase];
1400
+ if (first === void 0) {
1401
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1402
+ }
1403
+ let maxVal = Number(first);
1404
+ for (let j = 1; j < nCols; j++) {
1405
+ const raw = data[rowBase + j * stride1];
1406
+ if (raw === void 0) {
1407
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1408
+ }
1409
+ const val = Number(raw);
1410
+ if (val > maxVal) {
1411
+ maxVal = val;
1412
+ maxIdx = j;
1413
+ }
1414
+ }
1415
+ if (maxVal <= this.negLabel) {
1416
+ throw new InvalidParameterError(
1417
+ `No active label found for sample ${i}. LabelBinarizer expects exactly one active label.`,
1418
+ "Y"
1419
+ );
1420
+ }
1421
+ result[i] = categoryValueAt(classes, maxIdx, "LabelBinarizer.inverseTransform");
1422
+ }
1423
+ return toCategoryVectorTensor(result, "y");
1424
+ }
1425
+ };
1426
+ var MultiLabelBinarizer = class {
1427
+ /** Indicates whether the binarizer has been fitted to data */
1428
+ fitted = false;
1429
+ /** Array of all unique classes found across all samples, sorted */
1430
+ classes_;
1431
+ /** Map from class value to index for O(1) lookups */
1432
+ classToIndex_;
1433
+ /** Whether to return sparse matrix (CSR) or dense array */
1434
+ sparse;
1435
+ /** Optional explicit class ordering */
1436
+ classesOption;
1437
+ /**
1438
+ * Creates a new MultiLabelBinarizer instance.
1439
+ *
1440
+ * @param options - Configuration options
1441
+ * @param options.sparse - If true, returns CSRMatrix; if false, returns dense Tensor (default: false)
1442
+ * @param options.sparseOutput - Alias for sparse (default: false)
1443
+ * @param options.classes - Explicit class ordering to use instead of sorting
1444
+ */
1445
+ constructor(options = {}) {
1446
+ const sparseOption = options.sparse ?? options.sparseOutput ?? false;
1447
+ if (options.sparse !== void 0 && options.sparseOutput !== void 0) {
1448
+ if (options.sparse !== options.sparseOutput) {
1449
+ throw new InvalidParameterError(
1450
+ "sparse and sparseOutput must match when both are provided",
1451
+ "sparse",
1452
+ options.sparse
1453
+ );
1454
+ }
1455
+ }
1456
+ this.sparse = sparseOption;
1457
+ if (typeof this.sparse !== "boolean") {
1458
+ throw new InvalidParameterError("sparse must be a boolean", "sparse", this.sparse);
1459
+ }
1460
+ if (options.classes !== void 0) {
1461
+ this.classesOption = validateCategoryValues(options.classes, "classes");
1462
+ }
1463
+ }
1464
+ /**
1465
+ * Fit multi-label binarizer to label sets.
1466
+ * Learns all unique classes present across all samples.
1467
+ *
1468
+ * @param y - Array of label sets, where each element is an array of string/number/bigint labels
1469
+ * @returns this - Returns self for method chaining
1470
+ * @throws {InvalidParameterError} If y is empty
1471
+ */
1472
+ fit(y) {
1473
+ if (y.length === 0) {
1474
+ throw new InvalidParameterError("Cannot fit MultiLabelBinarizer on empty array", "y");
1475
+ }
1476
+ for (const labels of y) {
1477
+ if (!Array.isArray(labels)) {
1478
+ throw new InvalidParameterError("MultiLabelBinarizer expects label arrays", "y", labels);
1479
+ }
1480
+ for (const label of labels) {
1481
+ if (typeof label !== "string" && typeof label !== "number" && typeof label !== "bigint") {
1482
+ throw new InvalidParameterError(
1483
+ "MultiLabelBinarizer labels must be strings, numbers, or bigints",
1484
+ "y",
1485
+ label
1486
+ );
1487
+ }
1488
+ }
1489
+ }
1490
+ if (this.classesOption && this.classesOption.length === 0) {
1491
+ throw new InvalidParameterError("classes must contain at least one value", "classes");
1492
+ }
1493
+ if (this.classesOption) {
1494
+ this.classes_ = Array.from(this.classesOption);
1495
+ } else {
1496
+ const uniqueSet = /* @__PURE__ */ new Set();
1497
+ for (const labels of y) {
1498
+ for (const label of labels) {
1499
+ uniqueSet.add(label);
1500
+ }
1501
+ }
1502
+ this.classes_ = sortCategories(uniqueSet, "y");
1503
+ }
1504
+ this.classToIndex_ = /* @__PURE__ */ new Map();
1505
+ for (let i = 0; i < this.classes_.length; i++) {
1506
+ this.classToIndex_.set(categoryValueAt(this.classes_, i, "MultiLabelBinarizer.fit"), i);
1507
+ }
1508
+ if (this.classesOption) {
1509
+ for (const labels of y) {
1510
+ for (const label of labels) {
1511
+ if (!this.classToIndex_.has(label)) {
1512
+ throw new InvalidParameterError(
1513
+ `Unknown label: ${String(label)}. Label must be present in classes.`,
1514
+ "y",
1515
+ label
1516
+ );
1517
+ }
1518
+ }
1519
+ }
1520
+ }
1521
+ this.fitted = true;
1522
+ return this;
1523
+ }
1524
+ /**
1525
+ * Transform label sets to binary matrix.
1526
+ * Each sample can have multiple active (1) columns.
1527
+ *
1528
+ * @param y - Array of label sets to transform (string/number/bigint labels)
1529
+ * @returns Binary matrix (Tensor or CSRMatrix) with shape [n_samples, n_classes]
1530
+ * @throws {NotFittedError} If binarizer is not fitted
1531
+ * @throws {InvalidParameterError} If y contains unknown labels
1532
+ */
1533
+ transform(y) {
1534
+ if (!this.fitted) {
1535
+ throw new NotFittedError("MultiLabelBinarizer must be fitted before transform");
1536
+ }
1537
+ for (const labels of y) {
1538
+ if (!Array.isArray(labels)) {
1539
+ throw new InvalidParameterError("MultiLabelBinarizer expects label arrays", "y", labels);
1540
+ }
1541
+ for (const label of labels) {
1542
+ if (typeof label !== "string" && typeof label !== "number" && typeof label !== "bigint") {
1543
+ throw new InvalidParameterError(
1544
+ "MultiLabelBinarizer labels must be strings, numbers, or bigints",
1545
+ "y",
1546
+ label
1547
+ );
1548
+ }
1549
+ }
1550
+ }
1551
+ if (y.length === 0) {
1552
+ const nClasses2 = this.classes_?.length ?? 0;
1553
+ return this.sparse ? CSRMatrix.fromCOO({
1554
+ rows: 0,
1555
+ cols: nClasses2,
1556
+ rowIndices: new Int32Array(0),
1557
+ colIndices: new Int32Array(0),
1558
+ values: new Float64Array(0)
1559
+ }) : zeros([0, nClasses2], { dtype: "float64" });
1560
+ }
1561
+ const nSamples = y.length;
1562
+ const nClasses = this.classes_?.length ?? 0;
1563
+ const lookup = this.classToIndex_;
1564
+ if (!lookup) {
1565
+ throw new DeepboxError("MultiLabelBinarizer internal error: missing fitted lookup");
1566
+ }
1567
+ if (this.sparse) {
1568
+ const rowIdx = [];
1569
+ const colIdx = [];
1570
+ const vals = [];
1571
+ for (let i = 0; i < nSamples; i++) {
1572
+ const yRow = y[i];
1573
+ if (!yRow) continue;
1574
+ const seen = /* @__PURE__ */ new Set();
1575
+ for (const label of yRow) {
1576
+ const idx = lookup.get(label);
1577
+ if (idx === void 0) {
1578
+ throw new InvalidParameterError(
1579
+ `Unknown label: ${String(label)}. Label must be present during fit.`,
1580
+ "y",
1581
+ label
1582
+ );
1583
+ }
1584
+ if (seen.has(idx)) continue;
1585
+ seen.add(idx);
1586
+ rowIdx.push(i);
1587
+ colIdx.push(idx);
1588
+ vals.push(1);
1589
+ }
1590
+ }
1591
+ return CSRMatrix.fromCOO({
1592
+ rows: nSamples,
1593
+ cols: nClasses,
1594
+ rowIndices: Int32Array.from(rowIdx),
1595
+ colIndices: Int32Array.from(colIdx),
1596
+ values: Float64Array.from(vals)
1597
+ });
1598
+ }
1599
+ const result = new Array(nSamples);
1600
+ for (let i = 0; i < nSamples; i++) {
1601
+ result[i] = new Array(nClasses).fill(0);
1602
+ }
1603
+ for (let i = 0; i < nSamples; i++) {
1604
+ const yRow = y[i];
1605
+ if (!yRow) continue;
1606
+ for (const label of yRow) {
1607
+ const idx = lookup.get(label);
1608
+ if (idx === void 0) {
1609
+ throw new InvalidParameterError(
1610
+ `Unknown label: ${String(label)}. Label must be present during fit.`,
1611
+ "y",
1612
+ label
1613
+ );
1614
+ }
1615
+ const row = result[i];
1616
+ if (!row) {
1617
+ throw new DeepboxError("Internal error: result row access failed");
1618
+ }
1619
+ row[idx] = 1;
1620
+ }
1621
+ }
1622
+ return tensor(result, { dtype: "float64" });
1623
+ }
1624
+ /**
1625
+ * Fit binarizer and transform label sets in one step.
1626
+ * Convenience method equivalent to calling fit(y).transform(y).
1627
+ *
1628
+ * @param y - Array of label sets (string/number/bigint labels)
1629
+ * @returns Binary matrix (Tensor or CSRMatrix)
1630
+ */
1631
+ fitTransform(y) {
1632
+ return this.fit(y).transform(y);
1633
+ }
1634
+ /**
1635
+ * Transform binary matrix back to label sets.
1636
+ * Finds all active (1) columns for each row.
1637
+ *
1638
+ * @param Y - Binary matrix (Tensor or CSRMatrix)
1639
+ * @returns Array of label sets, one per sample
1640
+ * @throws {NotFittedError} If binarizer is not fitted
1641
+ * @throws {InvalidParameterError} If Y has invalid shape
1642
+ */
1643
+ inverseTransform(Y) {
1644
+ if (!this.fitted) {
1645
+ throw new NotFittedError("MultiLabelBinarizer must be fitted before inverse_transform");
1646
+ }
1647
+ if (Y instanceof CSRMatrix) {
1648
+ const [rows, cols] = Y.shape;
1649
+ if (rows === void 0 || cols === void 0) {
1650
+ throw new ShapeError("Y must have valid shape");
1651
+ }
1652
+ const fittedClasses2 = this.classes_?.length ?? 0;
1653
+ if (cols !== fittedClasses2) {
1654
+ throw new InvalidParameterError("column count does not match number of classes", "Y", cols);
1655
+ }
1656
+ if (rows === 0) {
1657
+ return [];
1658
+ }
1659
+ const classes2 = this.classes_;
1660
+ if (!classes2) {
1661
+ throw new DeepboxError("MultiLabelBinarizer internal error: missing fitted classes");
1662
+ }
1663
+ const result2 = [];
1664
+ for (let i = 0; i < rows; i++) {
1665
+ const labels = [];
1666
+ const start = Y.indptr[i] ?? 0;
1667
+ const end = Y.indptr[i + 1] ?? start;
1668
+ for (let p = start; p < end; p++) {
1669
+ const col = Y.indices[p];
1670
+ if (col === void 0) {
1671
+ throw new DeepboxError("Internal error: sparse column index missing");
1672
+ }
1673
+ if (col < 0 || col >= fittedClasses2) {
1674
+ throw new InvalidParameterError(
1675
+ "column index out of bounds for fitted classes",
1676
+ "Y",
1677
+ col
1678
+ );
1679
+ }
1680
+ const raw = Y.data[p];
1681
+ if (raw === void 0) {
1682
+ throw new DeepboxError("Internal error: sparse value missing");
1683
+ }
1684
+ const value = Number(raw);
1685
+ if (value > 0) {
1686
+ labels.push(categoryValueAt(classes2, col, "MultiLabelBinarizer.inverseTransform"));
1687
+ }
1688
+ }
1689
+ result2.push(labels);
1690
+ }
1691
+ return result2;
1692
+ }
1693
+ assert2D(Y, "Y");
1694
+ assertNumericTensor(Y, "Y");
1695
+ const nSamples = Y.shape[0] ?? 0;
1696
+ const nClasses = Y.shape[1] ?? 0;
1697
+ const fittedClasses = this.classes_?.length ?? 0;
1698
+ if (nClasses !== fittedClasses) {
1699
+ throw new InvalidParameterError(
1700
+ "column count does not match number of classes",
1701
+ "Y",
1702
+ nClasses
1703
+ );
1704
+ }
1705
+ if (nSamples === 0) {
1706
+ return [];
1707
+ }
1708
+ const classes = this.classes_;
1709
+ if (!classes) {
1710
+ throw new DeepboxError("MultiLabelBinarizer internal error: missing fitted classes");
1711
+ }
1712
+ const result = [];
1713
+ const [stride0, stride1] = getStrides2D(Y);
1714
+ const data = getNumericData(Y);
1715
+ for (let i = 0; i < nSamples; i++) {
1716
+ const labels = [];
1717
+ const rowBase = Y.offset + i * stride0;
1718
+ for (let j = 0; j < nClasses; j++) {
1719
+ const raw = data[rowBase + j * stride1];
1720
+ if (raw === void 0) {
1721
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1722
+ }
1723
+ const val = Number(raw);
1724
+ if (val > 0) {
1725
+ labels.push(categoryValueAt(classes, j, "MultiLabelBinarizer.inverseTransform"));
1726
+ }
1727
+ }
1728
+ result.push(labels);
1729
+ }
1730
+ return result;
1731
+ }
1732
+ };
1733
+
1734
+ // src/preprocess/scalers.ts
1735
+ function getNumericData2(X, name) {
1736
+ if (X.dtype === "string") {
1737
+ throw new DTypeError(`${name} must be numeric`);
1738
+ }
1739
+ if (Array.isArray(X.data)) {
1740
+ throw new DeepboxError("Internal error: invalid numeric tensor storage");
1741
+ }
1742
+ return X.data;
1743
+ }
1744
+ function parseBooleanOption(value, name, defaultValue) {
1745
+ if (value === void 0) {
1746
+ return defaultValue;
1747
+ }
1748
+ if (typeof value !== "boolean") {
1749
+ throw new InvalidParameterError(`${name} must be a boolean`, name, value);
1750
+ }
1751
+ return value;
1752
+ }
1753
+ function validateFiniteData(X, name) {
1754
+ const [nSamples, nFeatures] = getShape2D(X);
1755
+ const data = getNumericData2(X, name);
1756
+ const [stride0, stride1] = getStrides2D(X);
1757
+ let flatIndex = 0;
1758
+ for (let i = 0; i < nSamples; i++) {
1759
+ const rowBase = X.offset + i * stride0;
1760
+ for (let j = 0; j < nFeatures; j++) {
1761
+ const raw = data[rowBase + j * stride1];
1762
+ if (raw === void 0) {
1763
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1764
+ }
1765
+ const val = Number(raw);
1766
+ if (!Number.isFinite(val)) {
1767
+ throw new DataValidationError(`${name} contains NaN or Infinity at index ${flatIndex}`);
1768
+ }
1769
+ flatIndex += 1;
1770
+ }
1771
+ }
1772
+ }
1773
+ function snapInverseValue(value) {
1774
+ if (!Number.isFinite(value)) return value;
1775
+ const rounded = Math.round(value);
1776
+ if (Math.abs(value - rounded) < 1e-12) return rounded;
1777
+ const scaled = Math.round(value * 1e12) / 1e12;
1778
+ if (Math.abs(value - scaled) < 1e-12) return scaled;
1779
+ return value;
1780
+ }
1781
+ function normalQuantile(p) {
1782
+ if (!Number.isFinite(p) || p <= 0 || p >= 1) {
1783
+ throw new InvalidParameterError(
1784
+ "normalQuantile requires p in the open interval (0, 1)",
1785
+ "p",
1786
+ p
1787
+ );
1788
+ }
1789
+ const a1 = -39.69683028665376;
1790
+ const a2 = 220.9460984245205;
1791
+ const a3 = -275.9285104469687;
1792
+ const a4 = 138.357751867269;
1793
+ const a5 = -30.66479806614716;
1794
+ const a6 = 2.506628277459239;
1795
+ const b1 = -54.47609879822406;
1796
+ const b2 = 161.5858368580409;
1797
+ const b3 = -155.6989798598866;
1798
+ const b4 = 66.80131188771972;
1799
+ const b5 = -13.28068155288572;
1800
+ const c1 = -0.007784894002430293;
1801
+ const c2 = -0.3223964580411365;
1802
+ const c3 = -2.400758277161838;
1803
+ const c4 = -2.549732539343734;
1804
+ const c5 = 4.374664141464968;
1805
+ const c6 = 2.938163982698783;
1806
+ const d1 = 0.007784695709041462;
1807
+ const d2 = 0.3224671290700398;
1808
+ const d3 = 2.445134137142996;
1809
+ const d4 = 3.754408661907416;
1810
+ const plow = 0.02425;
1811
+ const phigh = 1 - plow;
1812
+ if (p < plow) {
1813
+ const q2 = Math.sqrt(-2 * Math.log(p));
1814
+ return (((((c1 * q2 + c2) * q2 + c3) * q2 + c4) * q2 + c5) * q2 + c6) / ((((d1 * q2 + d2) * q2 + d3) * q2 + d4) * q2 + 1);
1815
+ }
1816
+ if (p > phigh) {
1817
+ const q2 = Math.sqrt(-2 * Math.log(1 - p));
1818
+ return -((((((c1 * q2 + c2) * q2 + c3) * q2 + c4) * q2 + c5) * q2 + c6) / ((((d1 * q2 + d2) * q2 + d3) * q2 + d4) * q2 + 1));
1819
+ }
1820
+ const q = p - 0.5;
1821
+ const r = q * q;
1822
+ return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1);
1823
+ }
1824
+ var StandardScaler = class {
1825
+ fitted = false;
1826
+ mean_;
1827
+ scale_;
1828
+ withMean;
1829
+ withStd;
1830
+ /**
1831
+ * Creates a new StandardScaler.
1832
+ *
1833
+ * @param options - Configuration options
1834
+ * @param options.withMean - Center data before scaling (default: true)
1835
+ * @param options.withStd - Scale data to unit variance (default: true)
1836
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
1837
+ */
1838
+ constructor(options = {}) {
1839
+ this.withMean = parseBooleanOption(options.withMean, "withMean", true);
1840
+ this.withStd = parseBooleanOption(options.withStd, "withStd", true);
1841
+ parseBooleanOption(options.copy, "copy", true);
1842
+ }
1843
+ fit(X) {
1844
+ if (X.size === 0) {
1845
+ throw new InvalidParameterError("X must contain at least one sample", "X");
1846
+ }
1847
+ assert2D(X, "X");
1848
+ assertNumericTensor(X, "X");
1849
+ validateFiniteData(X, "X");
1850
+ const [nSamples, nFeatures] = getShape2D(X);
1851
+ const data = getNumericData2(X, "X");
1852
+ const [stride0, stride1] = getStrides2D(X);
1853
+ let means;
1854
+ if (this.withMean || this.withStd) {
1855
+ means = new Array(nFeatures).fill(0);
1856
+ for (let j = 0; j < nFeatures; j++) {
1857
+ let sum = 0;
1858
+ for (let i = 0; i < nSamples; i++) {
1859
+ const raw = data[X.offset + i * stride0 + j * stride1];
1860
+ if (raw === void 0) {
1861
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1862
+ }
1863
+ sum += Number(raw);
1864
+ }
1865
+ if (means) {
1866
+ means[j] = sum / nSamples;
1867
+ }
1868
+ }
1869
+ }
1870
+ if (this.withStd) {
1871
+ const stds = new Array(nFeatures).fill(0);
1872
+ for (let j = 0; j < nFeatures; j++) {
1873
+ const mean = means ? means[j] ?? 0 : 0;
1874
+ let sumSq = 0;
1875
+ for (let i = 0; i < nSamples; i++) {
1876
+ const raw = data[X.offset + i * stride0 + j * stride1];
1877
+ if (raw === void 0) {
1878
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1879
+ }
1880
+ const val = Number(raw) - mean;
1881
+ sumSq += val * val;
1882
+ }
1883
+ stds[j] = Math.sqrt(sumSq / nSamples);
1884
+ }
1885
+ this.scale_ = tensor(stds, { dtype: "float64" });
1886
+ } else {
1887
+ this.scale_ = void 0;
1888
+ }
1889
+ this.mean_ = this.withMean && means ? tensor(means, { dtype: "float64" }) : void 0;
1890
+ this.fitted = true;
1891
+ return this;
1892
+ }
1893
+ transform(X) {
1894
+ if (!this.fitted) {
1895
+ throw new NotFittedError("StandardScaler must be fitted before transform");
1896
+ }
1897
+ assert2D(X, "X");
1898
+ assertNumericTensor(X, "X");
1899
+ validateFiniteData(X, "X");
1900
+ const [nSamples, nFeatures] = getShape2D(X);
1901
+ const data = getNumericData2(X, "X");
1902
+ const [stride0, stride1] = getStrides2D(X);
1903
+ const mean = this.mean_;
1904
+ const scale = this.scale_;
1905
+ const meanData = mean ? getNumericData2(mean, "mean_") : void 0;
1906
+ const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
1907
+ const meanStride = mean ? getStride1D(mean) : 0;
1908
+ const scaleStride = scale ? getStride1D(scale) : 0;
1909
+ if (this.withMean && !mean) {
1910
+ throw new DeepboxError("StandardScaler internal error: missing mean_");
1911
+ }
1912
+ if (this.withStd && !scale) {
1913
+ throw new DeepboxError("StandardScaler internal error: missing scale_");
1914
+ }
1915
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
1916
+ for (let i = 0; i < nSamples; i++) {
1917
+ const rowBase = X.offset + i * stride0;
1918
+ for (let j = 0; j < nFeatures; j++) {
1919
+ const raw = data[rowBase + j * stride1];
1920
+ if (raw === void 0) {
1921
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1922
+ }
1923
+ let val = Number(raw);
1924
+ if (this.withMean && mean && meanData) {
1925
+ const meanValue = meanData[mean.offset + j * meanStride];
1926
+ if (meanValue === void 0) {
1927
+ throw new DeepboxError("Internal error: mean tensor access out of bounds");
1928
+ }
1929
+ val -= Number(meanValue);
1930
+ }
1931
+ if (this.withStd && scale && scaleData) {
1932
+ const rawScale = scaleData[scale.offset + j * scaleStride];
1933
+ if (rawScale === void 0) {
1934
+ throw new DeepboxError("Internal error: scale tensor access out of bounds");
1935
+ }
1936
+ const std = Number(rawScale);
1937
+ const safeStd = std === 0 ? 1 : std;
1938
+ val /= safeStd;
1939
+ }
1940
+ const row = result[i];
1941
+ if (row === void 0) {
1942
+ throw new DeepboxError("Internal error: result row access failed");
1943
+ }
1944
+ row[j] = val;
1945
+ }
1946
+ }
1947
+ return tensor(result, { dtype: "float64", device: X.device });
1948
+ }
1949
+ fitTransform(X) {
1950
+ return this.fit(X).transform(X);
1951
+ }
1952
+ inverseTransform(X) {
1953
+ if (!this.fitted) {
1954
+ throw new NotFittedError("StandardScaler must be fitted before inverse_transform");
1955
+ }
1956
+ assert2D(X, "X");
1957
+ assertNumericTensor(X, "X");
1958
+ const [nSamples, nFeatures] = getShape2D(X);
1959
+ const data = getNumericData2(X, "X");
1960
+ const [stride0, stride1] = getStrides2D(X);
1961
+ const mean = this.mean_;
1962
+ const scale = this.scale_;
1963
+ const meanData = mean ? getNumericData2(mean, "mean_") : void 0;
1964
+ const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
1965
+ const meanStride = mean ? getStride1D(mean) : 0;
1966
+ const scaleStride = scale ? getStride1D(scale) : 0;
1967
+ if (this.withMean && !mean) {
1968
+ throw new DeepboxError("StandardScaler internal error: missing mean_");
1969
+ }
1970
+ if (this.withStd && !scale) {
1971
+ throw new DeepboxError("StandardScaler internal error: missing scale_");
1972
+ }
1973
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
1974
+ for (let i = 0; i < nSamples; i++) {
1975
+ const rowBase = X.offset + i * stride0;
1976
+ for (let j = 0; j < nFeatures; j++) {
1977
+ const raw = data[rowBase + j * stride1];
1978
+ if (raw === void 0) {
1979
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
1980
+ }
1981
+ let val = Number(raw);
1982
+ if (this.withStd && scale && scaleData) {
1983
+ const rawScale = scaleData[scale.offset + j * scaleStride];
1984
+ if (rawScale === void 0) {
1985
+ throw new DeepboxError("Internal error: scale tensor access out of bounds");
1986
+ }
1987
+ const std = Number(rawScale);
1988
+ const safeStd = std === 0 ? 1 : std;
1989
+ val *= safeStd;
1990
+ }
1991
+ if (this.withMean && mean && meanData) {
1992
+ const meanValue = meanData[mean.offset + j * meanStride];
1993
+ if (meanValue === void 0) {
1994
+ throw new DeepboxError("Internal error: mean tensor access out of bounds");
1995
+ }
1996
+ val += Number(meanValue);
1997
+ }
1998
+ const resultRow = result[i];
1999
+ if (resultRow === void 0) {
2000
+ throw new DeepboxError("Internal error: result row access failed");
2001
+ }
2002
+ resultRow[j] = snapInverseValue(val);
2003
+ }
2004
+ }
2005
+ return tensor(result, { dtype: "float64", device: X.device });
2006
+ }
2007
+ };
2008
+ var MinMaxScaler = class {
2009
+ fitted = false;
2010
+ dataMin_;
2011
+ dataMax_;
2012
+ featureRange;
2013
+ clip;
2014
+ /**
2015
+ * Creates a new MinMaxScaler.
2016
+ *
2017
+ * @param options - Configuration options
2018
+ * @param options.featureRange - Desired feature range [min, max] (default: [0, 1])
2019
+ * @param options.clip - Clip transformed values to featureRange (default: false)
2020
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
2021
+ */
2022
+ constructor(options = {}) {
2023
+ this.featureRange = options.featureRange ?? [0, 1];
2024
+ this.clip = parseBooleanOption(options.clip, "clip", false);
2025
+ parseBooleanOption(options.copy, "copy", true);
2026
+ const [minRange, maxRange] = this.featureRange;
2027
+ if (!Number.isFinite(minRange) || !Number.isFinite(maxRange) || minRange >= maxRange) {
2028
+ throw new InvalidParameterError(
2029
+ "featureRange must be [min, max] with min < max",
2030
+ "featureRange",
2031
+ this.featureRange
2032
+ );
2033
+ }
2034
+ }
2035
+ fit(X) {
2036
+ if (X.size === 0) {
2037
+ throw new InvalidParameterError("X must contain at least one sample", "X");
2038
+ }
2039
+ assert2D(X, "X");
2040
+ assertNumericTensor(X, "X");
2041
+ validateFiniteData(X, "X");
2042
+ const [nSamples, nFeatures] = getShape2D(X);
2043
+ const data = getNumericData2(X, "X");
2044
+ const [stride0, stride1] = getStrides2D(X);
2045
+ const mins = new Array(nFeatures).fill(Number.POSITIVE_INFINITY);
2046
+ const maxs = new Array(nFeatures).fill(Number.NEGATIVE_INFINITY);
2047
+ for (let j = 0; j < nFeatures; j++) {
2048
+ for (let i = 0; i < nSamples; i++) {
2049
+ const raw = data[X.offset + i * stride0 + j * stride1];
2050
+ if (raw === void 0) {
2051
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2052
+ }
2053
+ const val = Number(raw);
2054
+ const currentMin = mins[j];
2055
+ const currentMax = maxs[j];
2056
+ if (currentMin === void 0 || currentMax === void 0) {
2057
+ throw new DeepboxError("Internal error: min/max array access failed");
2058
+ }
2059
+ mins[j] = Math.min(currentMin, val);
2060
+ maxs[j] = Math.max(currentMax, val);
2061
+ }
2062
+ }
2063
+ this.dataMin_ = tensor(mins, { dtype: "float64" });
2064
+ this.dataMax_ = tensor(maxs, { dtype: "float64" });
2065
+ this.fitted = true;
2066
+ return this;
2067
+ }
2068
+ transform(X) {
2069
+ if (!this.fitted) {
2070
+ throw new NotFittedError("MinMaxScaler must be fitted before transform");
2071
+ }
2072
+ assert2D(X, "X");
2073
+ assertNumericTensor(X, "X");
2074
+ validateFiniteData(X, "X");
2075
+ const [nSamples, nFeatures] = getShape2D(X);
2076
+ const [minRange, maxRange] = this.featureRange;
2077
+ const data = getNumericData2(X, "X");
2078
+ const [stride0, stride1] = getStrides2D(X);
2079
+ const dataMin = this.dataMin_;
2080
+ const dataMax = this.dataMax_;
2081
+ if (!dataMin || !dataMax) {
2082
+ throw new DeepboxError("MinMaxScaler internal error: missing fitted min/max");
2083
+ }
2084
+ const minData = getNumericData2(dataMin, "dataMin_");
2085
+ const maxData = getNumericData2(dataMax, "dataMax_");
2086
+ const minStride = getStride1D(dataMin);
2087
+ const maxStride = getStride1D(dataMax);
2088
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2089
+ for (let i = 0; i < nSamples; i++) {
2090
+ const rowBase = X.offset + i * stride0;
2091
+ for (let j = 0; j < nFeatures; j++) {
2092
+ const raw = data[rowBase + j * stride1];
2093
+ if (raw === void 0) {
2094
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2095
+ }
2096
+ const val = Number(raw);
2097
+ const rawMin = minData[dataMin.offset + j * minStride];
2098
+ const rawMax = maxData[dataMax.offset + j * maxStride];
2099
+ if (rawMin === void 0 || rawMax === void 0) {
2100
+ throw new DeepboxError("Internal error: min/max tensor access out of bounds");
2101
+ }
2102
+ const min = Number(rawMin);
2103
+ const max = Number(rawMax);
2104
+ const range = max - min;
2105
+ const row = result[i];
2106
+ if (row === void 0) {
2107
+ throw new DeepboxError("Internal error: result row access failed");
2108
+ }
2109
+ let scaled = range !== 0 ? (val - min) / range * (maxRange - minRange) + minRange : minRange;
2110
+ if (this.clip) {
2111
+ scaled = Math.max(minRange, Math.min(maxRange, scaled));
2112
+ }
2113
+ row[j] = scaled;
2114
+ }
2115
+ }
2116
+ return tensor(result, { dtype: "float64", device: X.device });
2117
+ }
2118
+ fitTransform(X) {
2119
+ return this.fit(X).transform(X);
2120
+ }
2121
+ inverseTransform(X) {
2122
+ if (!this.fitted) {
2123
+ throw new NotFittedError("MinMaxScaler must be fitted before inverse_transform");
2124
+ }
2125
+ assert2D(X, "X");
2126
+ assertNumericTensor(X, "X");
2127
+ const [nSamples, nFeatures] = getShape2D(X);
2128
+ const [minRange, maxRange] = this.featureRange;
2129
+ const data = getNumericData2(X, "X");
2130
+ const [stride0, stride1] = getStrides2D(X);
2131
+ const dataMin = this.dataMin_;
2132
+ const dataMax = this.dataMax_;
2133
+ if (!dataMin || !dataMax) {
2134
+ throw new DeepboxError("MinMaxScaler internal error: missing fitted min/max");
2135
+ }
2136
+ const minData = getNumericData2(dataMin, "dataMin_");
2137
+ const maxData = getNumericData2(dataMax, "dataMax_");
2138
+ const minStride = getStride1D(dataMin);
2139
+ const maxStride = getStride1D(dataMax);
2140
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2141
+ for (let i = 0; i < nSamples; i++) {
2142
+ const rowBase = X.offset + i * stride0;
2143
+ for (let j = 0; j < nFeatures; j++) {
2144
+ const raw = data[rowBase + j * stride1];
2145
+ if (raw === void 0) {
2146
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2147
+ }
2148
+ const val = Number(raw);
2149
+ const rawMin = minData[dataMin.offset + j * minStride];
2150
+ const rawMax = maxData[dataMax.offset + j * maxStride];
2151
+ if (rawMin === void 0 || rawMax === void 0) {
2152
+ throw new DeepboxError("Internal error: min/max tensor access out of bounds");
2153
+ }
2154
+ const min = Number(rawMin);
2155
+ const max = Number(rawMax);
2156
+ const range = max - min;
2157
+ const row = result[i];
2158
+ if (row === void 0) {
2159
+ throw new DeepboxError("Internal error: result row access failed");
2160
+ }
2161
+ row[j] = (val - minRange) / (maxRange - minRange) * range + min;
2162
+ }
2163
+ }
2164
+ return tensor(result, { dtype: "float64", device: X.device });
2165
+ }
2166
+ };
2167
+ var MaxAbsScaler = class {
2168
+ fitted = false;
2169
+ maxAbs_;
2170
+ /**
2171
+ * Creates a new MaxAbsScaler.
2172
+ *
2173
+ * @param options - Configuration options
2174
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
2175
+ */
2176
+ constructor(options = {}) {
2177
+ parseBooleanOption(options.copy, "copy", true);
2178
+ }
2179
+ fit(X) {
2180
+ if (X.size === 0) {
2181
+ throw new InvalidParameterError("X must contain at least one sample", "X");
2182
+ }
2183
+ assert2D(X, "X");
2184
+ assertNumericTensor(X, "X");
2185
+ validateFiniteData(X, "X");
2186
+ const [nSamples, nFeatures] = getShape2D(X);
2187
+ const data = getNumericData2(X, "X");
2188
+ const [stride0, stride1] = getStrides2D(X);
2189
+ const maxAbs = new Array(nFeatures).fill(0);
2190
+ for (let j = 0; j < nFeatures; j++) {
2191
+ for (let i = 0; i < nSamples; i++) {
2192
+ const raw = data[X.offset + i * stride0 + j * stride1];
2193
+ if (raw === void 0) {
2194
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2195
+ }
2196
+ const currentMax = maxAbs[j];
2197
+ if (currentMax === void 0) {
2198
+ throw new DeepboxError("Internal error: maxAbs array access failed");
2199
+ }
2200
+ maxAbs[j] = Math.max(currentMax, Math.abs(Number(raw)));
2201
+ }
2202
+ }
2203
+ this.maxAbs_ = tensor(maxAbs, { dtype: "float64" });
2204
+ this.fitted = true;
2205
+ return this;
2206
+ }
2207
+ transform(X) {
2208
+ if (!this.fitted) {
2209
+ throw new NotFittedError("MaxAbsScaler must be fitted before transform");
2210
+ }
2211
+ assert2D(X, "X");
2212
+ assertNumericTensor(X, "X");
2213
+ validateFiniteData(X, "X");
2214
+ const [nSamples, nFeatures] = getShape2D(X);
2215
+ const data = getNumericData2(X, "X");
2216
+ const [stride0, stride1] = getStrides2D(X);
2217
+ const maxAbs = this.maxAbs_;
2218
+ if (!maxAbs) {
2219
+ throw new DeepboxError("MaxAbsScaler internal error: missing fitted maxAbs");
2220
+ }
2221
+ const maxData = getNumericData2(maxAbs, "maxAbs_");
2222
+ const maxStride = getStride1D(maxAbs);
2223
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2224
+ for (let i = 0; i < nSamples; i++) {
2225
+ const rowBase = X.offset + i * stride0;
2226
+ for (let j = 0; j < nFeatures; j++) {
2227
+ const raw = data[rowBase + j * stride1];
2228
+ if (raw === void 0) {
2229
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2230
+ }
2231
+ const val = Number(raw);
2232
+ const rawScale = maxData[maxAbs.offset + j * maxStride];
2233
+ if (rawScale === void 0) {
2234
+ throw new DeepboxError("Internal error: maxAbs tensor access out of bounds");
2235
+ }
2236
+ const scale = Number(rawScale);
2237
+ const safeScale = scale === 0 ? 1 : scale;
2238
+ const row = result[i];
2239
+ if (row === void 0) {
2240
+ throw new DeepboxError("Internal error: result row access failed");
2241
+ }
2242
+ row[j] = val / safeScale;
2243
+ }
2244
+ }
2245
+ return tensor(result, { dtype: "float64", device: X.device });
2246
+ }
2247
+ fitTransform(X) {
2248
+ return this.fit(X).transform(X);
2249
+ }
2250
+ inverseTransform(X) {
2251
+ if (!this.fitted) {
2252
+ throw new NotFittedError("MaxAbsScaler must be fitted before inverse_transform");
2253
+ }
2254
+ assert2D(X, "X");
2255
+ assertNumericTensor(X, "X");
2256
+ const [nSamples, nFeatures] = getShape2D(X);
2257
+ const data = getNumericData2(X, "X");
2258
+ const [stride0, stride1] = getStrides2D(X);
2259
+ const maxAbs = this.maxAbs_;
2260
+ if (!maxAbs) {
2261
+ throw new DeepboxError("MaxAbsScaler internal error: missing fitted maxAbs");
2262
+ }
2263
+ const maxData = getNumericData2(maxAbs, "maxAbs_");
2264
+ const maxStride = getStride1D(maxAbs);
2265
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2266
+ for (let i = 0; i < nSamples; i++) {
2267
+ const rowBase = X.offset + i * stride0;
2268
+ for (let j = 0; j < nFeatures; j++) {
2269
+ const raw = data[rowBase + j * stride1];
2270
+ if (raw === void 0) {
2271
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2272
+ }
2273
+ const val = Number(raw);
2274
+ const rawScale = maxData[maxAbs.offset + j * maxStride];
2275
+ if (rawScale === void 0) {
2276
+ throw new DeepboxError("Internal error: maxAbs tensor access out of bounds");
2277
+ }
2278
+ const scale = Number(rawScale);
2279
+ const row = result[i];
2280
+ if (row === void 0) {
2281
+ throw new DeepboxError("Internal error: result row access failed");
2282
+ }
2283
+ row[j] = val * scale;
2284
+ }
2285
+ }
2286
+ return tensor(result, { dtype: "float64", device: X.device });
2287
+ }
2288
+ };
2289
+ var RobustScaler = class {
2290
+ fitted = false;
2291
+ center_;
2292
+ scale_;
2293
+ withCentering;
2294
+ withScaling;
2295
+ quantileRange;
2296
+ unitVariance;
2297
+ /**
2298
+ * Creates a new RobustScaler.
2299
+ *
2300
+ * @param options - Configuration options
2301
+ * @param options.withCentering - Center data using median (default: true)
2302
+ * @param options.withScaling - Scale data using IQR (default: true)
2303
+ * @param options.quantileRange - Quantile range for IQR as percentiles (default: [25, 75])
2304
+ * @param options.unitVariance - Scale so that features have unit variance under normality (default: false)
2305
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
2306
+ */
2307
+ constructor(options = {}) {
2308
+ this.withCentering = parseBooleanOption(options.withCentering, "withCentering", true);
2309
+ this.withScaling = parseBooleanOption(options.withScaling, "withScaling", true);
2310
+ this.quantileRange = options.quantileRange ?? [25, 75];
2311
+ this.unitVariance = parseBooleanOption(options.unitVariance, "unitVariance", false);
2312
+ parseBooleanOption(options.copy, "copy", true);
2313
+ const [lower, upper] = this.quantileRange;
2314
+ if (!Number.isFinite(lower) || !Number.isFinite(upper) || lower < 0 || upper > 100 || lower >= upper) {
2315
+ throw new InvalidParameterError(
2316
+ "quantileRange must be a valid ascending percentile range",
2317
+ "quantileRange",
2318
+ this.quantileRange
2319
+ );
2320
+ }
2321
+ }
2322
+ fit(X) {
2323
+ if (X.size === 0) {
2324
+ throw new InvalidParameterError("X must contain at least one sample", "X");
2325
+ }
2326
+ assert2D(X, "X");
2327
+ assertNumericTensor(X, "X");
2328
+ validateFiniteData(X, "X");
2329
+ const [nSamples, nFeatures] = getShape2D(X);
2330
+ const data = getNumericData2(X, "X");
2331
+ const [stride0, stride1] = getStrides2D(X);
2332
+ const centers = new Array(nFeatures).fill(0);
2333
+ const scales = new Array(nFeatures).fill(0);
2334
+ const [lowerPercentile, upperPercentile] = this.quantileRange;
2335
+ const lowerFraction = lowerPercentile / 100;
2336
+ const upperFraction = upperPercentile / 100;
2337
+ const normalizer = this.unitVariance ? normalQuantile(upperFraction) - normalQuantile(lowerFraction) : 1;
2338
+ if (this.unitVariance && (!Number.isFinite(normalizer) || normalizer <= 0)) {
2339
+ throw new DeepboxError("RobustScaler internal error: invalid unit variance normalizer");
2340
+ }
2341
+ for (let j = 0; j < nFeatures; j++) {
2342
+ const values = [];
2343
+ for (let i = 0; i < nSamples; i++) {
2344
+ const raw = data[X.offset + i * stride0 + j * stride1];
2345
+ if (raw === void 0) {
2346
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2347
+ }
2348
+ values.push(Number(raw));
2349
+ }
2350
+ values.sort((a, b) => a - b);
2351
+ const interpolate = (q) => {
2352
+ if (values.length === 0) {
2353
+ throw new DeepboxError("Internal error: cannot interpolate empty values");
2354
+ }
2355
+ if (values.length === 1) {
2356
+ const only = values[0];
2357
+ if (only === void 0) {
2358
+ throw new DeepboxError("Internal error: missing sorted value");
2359
+ }
2360
+ return only;
2361
+ }
2362
+ const position = q * (values.length - 1);
2363
+ const lower = Math.floor(position);
2364
+ const upper = Math.ceil(position);
2365
+ const lowerValue = values[lower];
2366
+ const upperValue = values[upper];
2367
+ if (lowerValue === void 0 || upperValue === void 0) {
2368
+ throw new DeepboxError("Internal error: quantile interpolation index out of bounds");
2369
+ }
2370
+ if (upper === lower) {
2371
+ return lowerValue;
2372
+ }
2373
+ const weight = position - lower;
2374
+ return lowerValue * (1 - weight) + upperValue * weight;
2375
+ };
2376
+ centers[j] = interpolate(0.5);
2377
+ const qLower = interpolate(lowerFraction);
2378
+ const qUpper = interpolate(upperFraction);
2379
+ const iqr = qUpper - qLower;
2380
+ scales[j] = this.unitVariance ? iqr / normalizer : iqr;
2381
+ }
2382
+ this.center_ = this.withCentering ? tensor(centers, { dtype: "float64" }) : void 0;
2383
+ this.scale_ = this.withScaling ? tensor(scales, { dtype: "float64" }) : void 0;
2384
+ this.fitted = true;
2385
+ return this;
2386
+ }
2387
+ transform(X) {
2388
+ if (!this.fitted) {
2389
+ throw new NotFittedError("RobustScaler must be fitted before transform");
2390
+ }
2391
+ assert2D(X, "X");
2392
+ assertNumericTensor(X, "X");
2393
+ validateFiniteData(X, "X");
2394
+ const [nSamples, nFeatures] = getShape2D(X);
2395
+ const data = getNumericData2(X, "X");
2396
+ const [stride0, stride1] = getStrides2D(X);
2397
+ const center = this.center_;
2398
+ const scale = this.scale_;
2399
+ const centerData = center ? getNumericData2(center, "center_") : void 0;
2400
+ const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
2401
+ const centerStride = center ? getStride1D(center) : 0;
2402
+ const scaleStride = scale ? getStride1D(scale) : 0;
2403
+ if (this.withCentering && !center) {
2404
+ throw new DeepboxError("RobustScaler internal error: missing center_");
2405
+ }
2406
+ if (this.withScaling && !scale) {
2407
+ throw new DeepboxError("RobustScaler internal error: missing scale_");
2408
+ }
2409
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2410
+ for (let i = 0; i < nSamples; i++) {
2411
+ const rowBase = X.offset + i * stride0;
2412
+ for (let j = 0; j < nFeatures; j++) {
2413
+ const raw = data[rowBase + j * stride1];
2414
+ if (raw === void 0) {
2415
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2416
+ }
2417
+ let val = Number(raw);
2418
+ if (this.withCentering && center && centerData) {
2419
+ const rawCenter = centerData[center.offset + j * centerStride];
2420
+ if (rawCenter === void 0) {
2421
+ throw new DeepboxError("Internal error: center tensor access out of bounds");
2422
+ }
2423
+ val -= Number(rawCenter);
2424
+ }
2425
+ if (this.withScaling && scale && scaleData) {
2426
+ const rawScale = scaleData[scale.offset + j * scaleStride];
2427
+ if (rawScale === void 0) {
2428
+ throw new DeepboxError("Internal error: scale tensor access out of bounds");
2429
+ }
2430
+ const scaleValue = Number(rawScale);
2431
+ const safeScale = scaleValue === 0 ? 1 : scaleValue;
2432
+ val /= safeScale;
2433
+ }
2434
+ const resultRow = result[i];
2435
+ if (resultRow === void 0) {
2436
+ throw new DeepboxError("Internal error: result row access failed");
2437
+ }
2438
+ resultRow[j] = val;
2439
+ }
2440
+ }
2441
+ return tensor(result, { dtype: "float64", device: X.device });
2442
+ }
2443
+ fitTransform(X) {
2444
+ return this.fit(X).transform(X);
2445
+ }
2446
+ inverseTransform(X) {
2447
+ if (!this.fitted) {
2448
+ throw new NotFittedError("RobustScaler must be fitted before inverse_transform");
2449
+ }
2450
+ assert2D(X, "X");
2451
+ assertNumericTensor(X, "X");
2452
+ const [nSamples, nFeatures] = getShape2D(X);
2453
+ const data = getNumericData2(X, "X");
2454
+ const [stride0, stride1] = getStrides2D(X);
2455
+ const center = this.center_;
2456
+ const scale = this.scale_;
2457
+ const centerData = center ? getNumericData2(center, "center_") : void 0;
2458
+ const scaleData = scale ? getNumericData2(scale, "scale_") : void 0;
2459
+ const centerStride = center ? getStride1D(center) : 0;
2460
+ const scaleStride = scale ? getStride1D(scale) : 0;
2461
+ if (this.withCentering && !center) {
2462
+ throw new DeepboxError("RobustScaler internal error: missing center_");
2463
+ }
2464
+ if (this.withScaling && !scale) {
2465
+ throw new DeepboxError("RobustScaler internal error: missing scale_");
2466
+ }
2467
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2468
+ for (let i = 0; i < nSamples; i++) {
2469
+ const rowBase = X.offset + i * stride0;
2470
+ for (let j = 0; j < nFeatures; j++) {
2471
+ const raw = data[rowBase + j * stride1];
2472
+ if (raw === void 0) {
2473
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2474
+ }
2475
+ let val = Number(raw);
2476
+ if (this.withScaling && scale && scaleData) {
2477
+ const rawScale = scaleData[scale.offset + j * scaleStride];
2478
+ if (rawScale === void 0) {
2479
+ throw new DeepboxError("Internal error: scale tensor access out of bounds");
2480
+ }
2481
+ const scaleValue = Number(rawScale);
2482
+ const safeScale = scaleValue === 0 ? 1 : scaleValue;
2483
+ val *= safeScale;
2484
+ }
2485
+ if (this.withCentering && center && centerData) {
2486
+ const rawCenter = centerData[center.offset + j * centerStride];
2487
+ if (rawCenter === void 0) {
2488
+ throw new DeepboxError("Internal error: center tensor access out of bounds");
2489
+ }
2490
+ val += Number(rawCenter);
2491
+ }
2492
+ const resultRow = result[i];
2493
+ if (resultRow === void 0) {
2494
+ throw new DeepboxError("Internal error: result row access failed");
2495
+ }
2496
+ resultRow[j] = val;
2497
+ }
2498
+ }
2499
+ return tensor(result, { dtype: "float64", device: X.device });
2500
+ }
2501
+ };
2502
+ var Normalizer = class {
2503
+ norm;
2504
+ /**
2505
+ * Creates a new Normalizer.
2506
+ *
2507
+ * @param options - Configuration options
2508
+ * @param options.norm - Norm to use (default: "l2")
2509
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
2510
+ */
2511
+ constructor(options = {}) {
2512
+ this.norm = options.norm ?? "l2";
2513
+ if (this.norm !== "l1" && this.norm !== "l2" && this.norm !== "max") {
2514
+ throw new InvalidParameterError("norm must be one of: l1, l2, max", "norm", this.norm);
2515
+ }
2516
+ parseBooleanOption(options.copy, "copy", true);
2517
+ }
2518
+ fit(_X) {
2519
+ return this;
2520
+ }
2521
+ transform(X) {
2522
+ assert2D(X, "X");
2523
+ assertNumericTensor(X, "X");
2524
+ validateFiniteData(X, "X");
2525
+ const [nSamples, nFeatures] = getShape2D(X);
2526
+ const data = getNumericData2(X, "X");
2527
+ const [stride0, stride1] = getStrides2D(X);
2528
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
2529
+ for (let i = 0; i < nSamples; i++) {
2530
+ let norm = 0;
2531
+ const rowBase = X.offset + i * stride0;
2532
+ if (this.norm === "l2") {
2533
+ for (let j = 0; j < nFeatures; j++) {
2534
+ const raw = data[rowBase + j * stride1];
2535
+ if (raw === void 0) {
2536
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2537
+ }
2538
+ const val = Number(raw);
2539
+ norm += val * val;
2540
+ }
2541
+ norm = Math.sqrt(norm);
2542
+ } else if (this.norm === "l1") {
2543
+ for (let j = 0; j < nFeatures; j++) {
2544
+ const raw = data[rowBase + j * stride1];
2545
+ if (raw === void 0) {
2546
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2547
+ }
2548
+ norm += Math.abs(Number(raw));
2549
+ }
2550
+ } else if (this.norm === "max") {
2551
+ for (let j = 0; j < nFeatures; j++) {
2552
+ const raw = data[rowBase + j * stride1];
2553
+ if (raw === void 0) {
2554
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2555
+ }
2556
+ norm = Math.max(norm, Math.abs(Number(raw)));
2557
+ }
2558
+ }
2559
+ for (let j = 0; j < nFeatures; j++) {
2560
+ const raw = data[rowBase + j * stride1];
2561
+ if (raw === void 0) {
2562
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2563
+ }
2564
+ const val = Number(raw);
2565
+ const row = result[i];
2566
+ if (row === void 0) {
2567
+ throw new DeepboxError("Internal error: result row access failed");
2568
+ }
2569
+ row[j] = norm === 0 ? val : val / norm;
2570
+ }
2571
+ }
2572
+ return tensor(result, { dtype: "float64", device: X.device });
2573
+ }
2574
+ fitTransform(X) {
2575
+ return this.transform(X);
2576
+ }
2577
+ };
2578
+ var QuantileTransformer = class {
2579
+ fitted = false;
2580
+ nQuantiles;
2581
+ outputDistribution;
2582
+ quantiles_;
2583
+ subsample;
2584
+ randomState;
2585
+ /**
2586
+ * Creates a new QuantileTransformer.
2587
+ *
2588
+ * @param options - Configuration options
2589
+ * @param options.nQuantiles - Number of quantiles to use (default: 1000)
2590
+ * @param options.outputDistribution - "uniform" or "normal" (default: "uniform")
2591
+ * @param options.subsample - Subsample size for quantile estimation (default: use all samples)
2592
+ * @param options.randomState - Seed for subsampling reproducibility
2593
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
2594
+ */
2595
+ constructor(options = {}) {
2596
+ this.nQuantiles = options.nQuantiles ?? 1e3;
2597
+ this.outputDistribution = options.outputDistribution ?? "uniform";
2598
+ this.subsample = options.subsample;
2599
+ this.randomState = options.randomState;
2600
+ parseBooleanOption(options.copy, "copy", true);
2601
+ if (!Number.isFinite(this.nQuantiles) || !Number.isInteger(this.nQuantiles) || this.nQuantiles < 2) {
2602
+ throw new InvalidParameterError(
2603
+ "nQuantiles must be at least 2",
2604
+ "nQuantiles",
2605
+ this.nQuantiles
2606
+ );
2607
+ }
2608
+ if (this.outputDistribution !== "uniform" && this.outputDistribution !== "normal") {
2609
+ throw new InvalidParameterError(
2610
+ "outputDistribution must be 'uniform' or 'normal'",
2611
+ "outputDistribution",
2612
+ this.outputDistribution
2613
+ );
2614
+ }
2615
+ if (this.subsample !== void 0) {
2616
+ if (!Number.isFinite(this.subsample) || !Number.isInteger(this.subsample) || this.subsample < 2) {
2617
+ throw new InvalidParameterError(
2618
+ "subsample must be an integer >= 2",
2619
+ "subsample",
2620
+ this.subsample
2621
+ );
2622
+ }
2623
+ }
2624
+ }
2625
+ fit(X) {
2626
+ if (X.size === 0) {
2627
+ throw new InvalidParameterError("X must contain at least one sample", "X");
2628
+ }
2629
+ assert2D(X, "X");
2630
+ assertNumericTensor(X, "X");
2631
+ validateFiniteData(X, "X");
2632
+ const [nSamples, nFeatures] = getShape2D(X);
2633
+ const data = getNumericData2(X, "X");
2634
+ const [stride0, stride1] = getStrides2D(X);
2635
+ this.quantiles_ = /* @__PURE__ */ new Map();
2636
+ const sampleCount = this.subsample !== void 0 ? Math.min(this.subsample, nSamples) : nSamples;
2637
+ const nQuantilesEffective = Math.min(this.nQuantiles, sampleCount);
2638
+ const references = nQuantilesEffective <= 1 ? [0.5] : Array.from({ length: nQuantilesEffective }, (_, i) => i / (nQuantilesEffective - 1));
2639
+ let sampleIndices;
2640
+ if (sampleCount < nSamples) {
2641
+ sampleIndices = Array.from({ length: nSamples }, (_, i) => i);
2642
+ const random = this.randomState !== void 0 ? createSeededRandom(this.randomState) : Math.random;
2643
+ shuffleIndicesInPlace(sampleIndices, random);
2644
+ sampleIndices = sampleIndices.slice(0, sampleCount);
2645
+ }
2646
+ for (let j = 0; j < nFeatures; j++) {
2647
+ const values = [];
2648
+ if (sampleIndices) {
2649
+ for (const idx of sampleIndices) {
2650
+ const raw = data[X.offset + idx * stride0 + j * stride1];
2651
+ if (raw === void 0) {
2652
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2653
+ }
2654
+ values.push(Number(raw));
2655
+ }
2656
+ } else {
2657
+ for (let i = 0; i < nSamples; i++) {
2658
+ const raw = data[X.offset + i * stride0 + j * stride1];
2659
+ if (raw === void 0) {
2660
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2661
+ }
2662
+ values.push(Number(raw));
2663
+ }
2664
+ }
2665
+ const sorted = [...values].sort((a, b) => a - b);
2666
+ const quantiles = references.map((q) => this.interpolateFromSorted(sorted, q));
2667
+ this.quantiles_.set(j, { quantiles, references });
2668
+ }
2669
+ this.fitted = true;
2670
+ return this;
2671
+ }
2672
+ transform(X) {
2673
+ if (!this.fitted || !this.quantiles_) {
2674
+ throw new NotFittedError("QuantileTransformer must be fitted before transform");
2675
+ }
2676
+ assert2D(X, "X");
2677
+ assertNumericTensor(X, "X");
2678
+ validateFiniteData(X, "X");
2679
+ const [nSamples, nFeatures] = getShape2D(X);
2680
+ const data = getNumericData2(X, "X");
2681
+ const [stride0, stride1] = getStrides2D(X);
2682
+ if (nSamples === 0) {
2683
+ return zeros([0, nFeatures], { dtype: "float64" });
2684
+ }
2685
+ const result = new Array(nSamples);
2686
+ for (let i = 0; i < nSamples; i++) {
2687
+ result[i] = new Array(nFeatures);
2688
+ }
2689
+ for (let j = 0; j < nFeatures; j++) {
2690
+ const feature = this.quantiles_.get(j);
2691
+ if (!feature) {
2692
+ throw new DeepboxError(`Internal error: missing fitted quantiles for feature ${j}`);
2693
+ }
2694
+ for (let i = 0; i < nSamples; i++) {
2695
+ const raw = data[X.offset + i * stride0 + j * stride1];
2696
+ if (raw === void 0) {
2697
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2698
+ }
2699
+ const val = Number(raw);
2700
+ const quantile = this.mapValueToQuantile(val, feature.quantiles, feature.references);
2701
+ const row = result[i];
2702
+ if (!row) {
2703
+ throw new DeepboxError("Internal error: result row access failed");
2704
+ }
2705
+ if (this.outputDistribution === "uniform") {
2706
+ row[j] = quantile;
2707
+ } else {
2708
+ const clampedQuantile = Math.max(1e-7, Math.min(1 - 1e-7, quantile));
2709
+ const z = Math.sqrt(2) * this.erfInv(2 * clampedQuantile - 1);
2710
+ row[j] = z;
2711
+ }
2712
+ }
2713
+ }
2714
+ return tensor(result, { dtype: "float64", device: X.device });
2715
+ }
2716
+ /**
2717
+ * Inverse transform data back to the original feature space.
2718
+ *
2719
+ * If `outputDistribution="normal"`, values are first mapped back to uniform
2720
+ * quantiles before being projected into the original data distribution.
2721
+ *
2722
+ * @param X - Transformed data (2D tensor)
2723
+ * @returns Data in the original feature space
2724
+ * @throws {NotFittedError} If transformer is not fitted
2725
+ */
2726
+ inverseTransform(X) {
2727
+ if (!this.fitted || !this.quantiles_) {
2728
+ throw new NotFittedError("QuantileTransformer must be fitted before inverse_transform");
2729
+ }
2730
+ assert2D(X, "X");
2731
+ assertNumericTensor(X, "X");
2732
+ validateFiniteData(X, "X");
2733
+ const [nSamples, nFeatures] = getShape2D(X);
2734
+ const data = getNumericData2(X, "X");
2735
+ const [stride0, stride1] = getStrides2D(X);
2736
+ if (nSamples === 0) {
2737
+ return zeros([0, nFeatures], { dtype: "float64" });
2738
+ }
2739
+ const result = new Array(nSamples);
2740
+ for (let i = 0; i < nSamples; i++) {
2741
+ result[i] = new Array(nFeatures);
2742
+ }
2743
+ for (let j = 0; j < nFeatures; j++) {
2744
+ const feature = this.quantiles_.get(j);
2745
+ if (!feature) {
2746
+ throw new DeepboxError(`Internal error: missing fitted quantiles for feature ${j}`);
2747
+ }
2748
+ for (let i = 0; i < nSamples; i++) {
2749
+ const raw = data[X.offset + i * stride0 + j * stride1];
2750
+ if (raw === void 0) {
2751
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2752
+ }
2753
+ const value = Number(raw);
2754
+ let quantile = this.outputDistribution === "normal" ? this.normalCdf(value) : value;
2755
+ quantile = Math.max(0, Math.min(1, quantile));
2756
+ const row = result[i];
2757
+ if (!row) {
2758
+ throw new DeepboxError("Internal error: result row access failed");
2759
+ }
2760
+ row[j] = this.mapQuantileToValue(quantile, feature.quantiles, feature.references);
2761
+ }
2762
+ }
2763
+ return tensor(result, { dtype: "float64", device: X.device });
2764
+ }
2765
+ erf(x) {
2766
+ const sign = x < 0 ? -1 : 1;
2767
+ const absX = Math.abs(x);
2768
+ const t = 1 / (1 + 0.3275911 * absX);
2769
+ const a1 = 0.254829592;
2770
+ const a2 = -0.284496736;
2771
+ const a3 = 1.421413741;
2772
+ const a4 = -1.453152027;
2773
+ const a5 = 1.061405429;
2774
+ const poly = ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t;
2775
+ return sign * (1 - poly * Math.exp(-absX * absX));
2776
+ }
2777
+ normalCdf(z) {
2778
+ return 0.5 * (1 + this.erf(z / Math.sqrt(2)));
2779
+ }
2780
+ erfInv(x) {
2781
+ const a = 0.147;
2782
+ const b = 2 / (Math.PI * a) + Math.log(1 - x * x) / 2;
2783
+ const sign = x < 0 ? -1 : 1;
2784
+ return sign * Math.sqrt(Math.sqrt(b * b - Math.log(1 - x * x) / a) - b);
2785
+ }
2786
+ interpolateFromSorted(sorted, q) {
2787
+ if (sorted.length === 0) {
2788
+ throw new DeepboxError("Internal error: cannot interpolate empty sorted values");
2789
+ }
2790
+ if (sorted.length === 1) {
2791
+ const only = sorted[0];
2792
+ if (only === void 0) {
2793
+ throw new DeepboxError("Internal error: missing sorted value");
2794
+ }
2795
+ return only;
2796
+ }
2797
+ const position = q * (sorted.length - 1);
2798
+ const lower = Math.floor(position);
2799
+ const upper = Math.ceil(position);
2800
+ const lowerValue = sorted[lower];
2801
+ const upperValue = sorted[upper];
2802
+ if (lowerValue === void 0 || upperValue === void 0) {
2803
+ throw new DeepboxError("Internal error: quantile interpolation index out of bounds");
2804
+ }
2805
+ if (upper === lower) {
2806
+ return lowerValue;
2807
+ }
2808
+ const weight = position - lower;
2809
+ return lowerValue * (1 - weight) + upperValue * weight;
2810
+ }
2811
+ mapValueToQuantile(value, quantiles, references) {
2812
+ const n = quantiles.length;
2813
+ if (n === 0) {
2814
+ return 0;
2815
+ }
2816
+ if (n === 1) {
2817
+ const onlyReference = references[0];
2818
+ if (onlyReference === void 0) {
2819
+ throw new DeepboxError("Internal error: missing quantile reference");
2820
+ }
2821
+ return onlyReference;
2822
+ }
2823
+ const firstQuantile = quantiles[0];
2824
+ const lastQuantile = quantiles[n - 1];
2825
+ if (firstQuantile === void 0 || lastQuantile === void 0) {
2826
+ throw new DeepboxError("Internal error: missing quantile endpoints");
2827
+ }
2828
+ if (value <= firstQuantile) {
2829
+ return 0;
2830
+ }
2831
+ if (value >= lastQuantile) {
2832
+ return 1;
2833
+ }
2834
+ let left = 0;
2835
+ let right = n - 1;
2836
+ while (left + 1 < right) {
2837
+ const mid = Math.floor((left + right) / 2);
2838
+ const midValue = quantiles[mid];
2839
+ if (midValue === void 0) {
2840
+ throw new DeepboxError("Internal error: missing quantile midpoint");
2841
+ }
2842
+ if (midValue <= value) {
2843
+ left = mid;
2844
+ } else {
2845
+ right = mid;
2846
+ }
2847
+ }
2848
+ const qLeft = quantiles[left];
2849
+ const qRight = quantiles[right];
2850
+ const rLeft = references[left];
2851
+ const rRight = references[right];
2852
+ if (qLeft === void 0 || qRight === void 0 || rLeft === void 0 || rRight === void 0) {
2853
+ throw new DeepboxError("Internal error: missing quantile interpolation points");
2854
+ }
2855
+ if (qRight <= qLeft) {
2856
+ return (rLeft + rRight) / 2;
2857
+ }
2858
+ const ratio = (value - qLeft) / (qRight - qLeft);
2859
+ return rLeft + ratio * (rRight - rLeft);
2860
+ }
2861
+ mapQuantileToValue(quantile, quantiles, references) {
2862
+ const n = references.length;
2863
+ if (n === 0) {
2864
+ return 0;
2865
+ }
2866
+ if (n === 1) {
2867
+ const onlyQuantile = quantiles[0];
2868
+ if (onlyQuantile === void 0) {
2869
+ throw new DeepboxError("Internal error: missing quantile value");
2870
+ }
2871
+ return onlyQuantile;
2872
+ }
2873
+ const firstRef = references[0];
2874
+ const lastRef = references[n - 1];
2875
+ if (firstRef === void 0 || lastRef === void 0) {
2876
+ throw new DeepboxError("Internal error: missing reference endpoints");
2877
+ }
2878
+ if (quantile <= firstRef) {
2879
+ const firstQuantile = quantiles[0];
2880
+ if (firstQuantile === void 0) {
2881
+ throw new DeepboxError("Internal error: missing quantile endpoints");
2882
+ }
2883
+ return firstQuantile;
2884
+ }
2885
+ if (quantile >= lastRef) {
2886
+ const lastQuantile = quantiles[n - 1];
2887
+ if (lastQuantile === void 0) {
2888
+ throw new DeepboxError("Internal error: missing quantile endpoints");
2889
+ }
2890
+ return lastQuantile;
2891
+ }
2892
+ let left = 0;
2893
+ let right = n - 1;
2894
+ while (left + 1 < right) {
2895
+ const mid = Math.floor((left + right) / 2);
2896
+ const midRef = references[mid];
2897
+ if (midRef === void 0) {
2898
+ throw new DeepboxError("Internal error: missing quantile reference");
2899
+ }
2900
+ if (midRef <= quantile) {
2901
+ left = mid;
2902
+ } else {
2903
+ right = mid;
2904
+ }
2905
+ }
2906
+ const rLeft = references[left];
2907
+ const rRight = references[right];
2908
+ const qLeft = quantiles[left];
2909
+ const qRight = quantiles[right];
2910
+ if (rLeft === void 0 || rRight === void 0 || qLeft === void 0 || qRight === void 0) {
2911
+ throw new DeepboxError("Internal error: missing quantile interpolation points");
2912
+ }
2913
+ if (rRight <= rLeft) {
2914
+ return (qLeft + qRight) / 2;
2915
+ }
2916
+ const ratio = (quantile - rLeft) / (rRight - rLeft);
2917
+ return qLeft + ratio * (qRight - qLeft);
2918
+ }
2919
+ fitTransform(X) {
2920
+ return this.fit(X).transform(X);
2921
+ }
2922
+ };
2923
+ var PowerTransformer = class {
2924
+ fitted = false;
2925
+ method;
2926
+ lambdas_;
2927
+ standardize;
2928
+ mean_;
2929
+ scale_;
2930
+ /**
2931
+ * Creates a new PowerTransformer.
2932
+ *
2933
+ * @param options - Configuration options
2934
+ * @param options.method - "box-cox" or "yeo-johnson" (default: "yeo-johnson")
2935
+ * @param options.standardize - Whether to standardize transformed features (default: false)
2936
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
2937
+ */
2938
+ constructor(options = {}) {
2939
+ this.method = options.method ?? "yeo-johnson";
2940
+ if (this.method !== "box-cox" && this.method !== "yeo-johnson") {
2941
+ throw new InvalidParameterError(
2942
+ "method must be 'box-cox' or 'yeo-johnson'",
2943
+ "method",
2944
+ this.method
2945
+ );
2946
+ }
2947
+ this.standardize = parseBooleanOption(options.standardize, "standardize", false);
2948
+ parseBooleanOption(options.copy, "copy", true);
2949
+ }
2950
+ fit(X) {
2951
+ if (X.size === 0) {
2952
+ throw new InvalidParameterError("X must contain at least one sample", "X");
2953
+ }
2954
+ assert2D(X, "X");
2955
+ assertNumericTensor(X, "X");
2956
+ validateFiniteData(X, "X");
2957
+ const [nSamples, nFeatures] = getShape2D(X);
2958
+ const data = getNumericData2(X, "X");
2959
+ const [stride0, stride1] = getStrides2D(X);
2960
+ const lambdas = new Array(nFeatures);
2961
+ const means = this.standardize ? new Array(nFeatures).fill(0) : void 0;
2962
+ const scales = this.standardize ? new Array(nFeatures).fill(0) : void 0;
2963
+ for (let j = 0; j < nFeatures; j++) {
2964
+ const featureValues = new Array(nSamples);
2965
+ for (let i = 0; i < nSamples; i++) {
2966
+ const raw = data[X.offset + i * stride0 + j * stride1];
2967
+ if (raw === void 0) {
2968
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
2969
+ }
2970
+ const value = Number(raw);
2971
+ if (this.method === "box-cox" && value <= 0) {
2972
+ throw new InvalidParameterError(
2973
+ `Box-Cox requires strictly positive values in fit data (feature ${j})`,
2974
+ "X",
2975
+ value
2976
+ );
2977
+ }
2978
+ featureValues[i] = value;
2979
+ }
2980
+ const lambda = this.optimizeLambda(featureValues);
2981
+ lambdas[j] = lambda;
2982
+ if (this.standardize && means && scales) {
2983
+ let sum = 0;
2984
+ for (const value of featureValues) {
2985
+ const transformed = this.method === "box-cox" ? this.boxCoxTransformValue(value, lambda) : this.yeoJohnsonTransformValue(value, lambda);
2986
+ sum += transformed;
2987
+ }
2988
+ const mean = sum / nSamples;
2989
+ means[j] = mean;
2990
+ let sumSqDiff = 0;
2991
+ for (const value of featureValues) {
2992
+ const transformed = this.method === "box-cox" ? this.boxCoxTransformValue(value, lambda) : this.yeoJohnsonTransformValue(value, lambda);
2993
+ const diff = transformed - mean;
2994
+ sumSqDiff += diff * diff;
2995
+ }
2996
+ const variance = sumSqDiff / nSamples;
2997
+ const std = Math.sqrt(Math.max(variance, 0));
2998
+ scales[j] = std === 0 ? 1 : std;
2999
+ }
3000
+ }
3001
+ this.lambdas_ = lambdas;
3002
+ this.mean_ = this.standardize ? means : void 0;
3003
+ this.scale_ = this.standardize ? scales : void 0;
3004
+ this.fitted = true;
3005
+ return this;
3006
+ }
3007
+ transform(X) {
3008
+ if (!this.fitted || !this.lambdas_) {
3009
+ throw new NotFittedError("PowerTransformer must be fitted before transform");
3010
+ }
3011
+ assert2D(X, "X");
3012
+ assertNumericTensor(X, "X");
3013
+ validateFiniteData(X, "X");
3014
+ const [nSamples, nFeatures] = getShape2D(X);
3015
+ const data = getNumericData2(X, "X");
3016
+ const [stride0, stride1] = getStrides2D(X);
3017
+ if (this.standardize && (!this.mean_ || !this.scale_)) {
3018
+ throw new DeepboxError("PowerTransformer internal error: missing standardization stats");
3019
+ }
3020
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
3021
+ for (let i = 0; i < nSamples; i++) {
3022
+ const rowBase = X.offset + i * stride0;
3023
+ for (let j = 0; j < nFeatures; j++) {
3024
+ const raw = data[rowBase + j * stride1];
3025
+ if (raw === void 0) {
3026
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
3027
+ }
3028
+ const val = Number(raw);
3029
+ const lambda = this.lambdas_[j];
3030
+ if (lambda === void 0) {
3031
+ throw new DeepboxError(`Internal error: missing fitted lambda for feature ${j}`);
3032
+ }
3033
+ let transformed;
3034
+ if (this.method === "box-cox") {
3035
+ if (val <= 0) {
3036
+ throw new InvalidParameterError("Box-Cox requires strictly positive values", "X", val);
3037
+ }
3038
+ transformed = this.boxCoxTransformValue(val, lambda);
3039
+ } else {
3040
+ transformed = this.yeoJohnsonTransformValue(val, lambda);
3041
+ }
3042
+ if (this.standardize && this.mean_ && this.scale_) {
3043
+ const mean = this.mean_[j] ?? 0;
3044
+ const scale = this.scale_[j] ?? 1;
3045
+ transformed = (transformed - mean) / scale;
3046
+ }
3047
+ const row = result[i];
3048
+ if (row === void 0) {
3049
+ throw new DeepboxError("Internal error: result row access failed");
3050
+ }
3051
+ row[j] = transformed;
3052
+ }
3053
+ }
3054
+ return tensor(result, { dtype: "float64", device: X.device });
3055
+ }
3056
+ /**
3057
+ * Inverse transform data back to the original feature space.
3058
+ * If `standardize=true`, de-standardizes before applying the inverse power transform.
3059
+ *
3060
+ * @param X - Transformed data (2D tensor)
3061
+ * @returns Data in the original feature space
3062
+ * @throws {NotFittedError} If transformer is not fitted
3063
+ */
3064
+ inverseTransform(X) {
3065
+ if (!this.fitted || !this.lambdas_) {
3066
+ throw new NotFittedError("PowerTransformer must be fitted before inverse_transform");
3067
+ }
3068
+ assert2D(X, "X");
3069
+ assertNumericTensor(X, "X");
3070
+ validateFiniteData(X, "X");
3071
+ const [nSamples, nFeatures] = getShape2D(X);
3072
+ const data = getNumericData2(X, "X");
3073
+ const [stride0, stride1] = getStrides2D(X);
3074
+ if (this.standardize && (!this.mean_ || !this.scale_)) {
3075
+ throw new DeepboxError("PowerTransformer internal error: missing standardization stats");
3076
+ }
3077
+ const result = Array.from({ length: nSamples }, () => new Array(nFeatures).fill(0));
3078
+ for (let i = 0; i < nSamples; i++) {
3079
+ const rowBase = X.offset + i * stride0;
3080
+ for (let j = 0; j < nFeatures; j++) {
3081
+ const raw = data[rowBase + j * stride1];
3082
+ if (raw === void 0) {
3083
+ throw new DeepboxError("Internal error: numeric tensor access out of bounds");
3084
+ }
3085
+ let val = Number(raw);
3086
+ if (this.standardize && this.mean_ && this.scale_) {
3087
+ const mean = this.mean_[j] ?? 0;
3088
+ const scale = this.scale_[j] ?? 1;
3089
+ val = val * scale + mean;
3090
+ }
3091
+ const lambda = this.lambdas_[j];
3092
+ if (lambda === void 0) {
3093
+ throw new DeepboxError(`Internal error: missing fitted lambda for feature ${j}`);
3094
+ }
3095
+ let inverted;
3096
+ if (this.method === "box-cox") {
3097
+ inverted = this.boxCoxInverseValue(val, lambda);
3098
+ } else {
3099
+ inverted = this.yeoJohnsonInverseValue(val, lambda);
3100
+ }
3101
+ const row = result[i];
3102
+ if (row === void 0) {
3103
+ throw new DeepboxError("Internal error: result row access failed");
3104
+ }
3105
+ row[j] = inverted;
3106
+ }
3107
+ }
3108
+ return tensor(result, { dtype: "float64", device: X.device });
3109
+ }
3110
+ boxCoxTransformValue(value, lambda) {
3111
+ return Math.abs(lambda) < 1e-12 ? Math.log(value) : (value ** lambda - 1) / lambda;
3112
+ }
3113
+ yeoJohnsonTransformValue(value, lambda) {
3114
+ if (value >= 0) {
3115
+ return Math.abs(lambda) < 1e-12 ? Math.log(value + 1) : ((value + 1) ** lambda - 1) / lambda;
3116
+ }
3117
+ const twoMinusLambda = 2 - lambda;
3118
+ return Math.abs(twoMinusLambda) < 1e-12 ? -Math.log(1 - value) : -((1 - value) ** twoMinusLambda - 1) / twoMinusLambda;
3119
+ }
3120
+ boxCoxInverseValue(value, lambda) {
3121
+ if (Math.abs(lambda) < 1e-12) {
3122
+ return Math.exp(value);
3123
+ }
3124
+ const base = value * lambda + 1;
3125
+ if (base <= 0) {
3126
+ throw new InvalidParameterError("Box-Cox inverse encountered invalid value", "X", value);
3127
+ }
3128
+ return base ** (1 / lambda);
3129
+ }
3130
+ yeoJohnsonInverseValue(value, lambda) {
3131
+ if (value >= 0) {
3132
+ if (Math.abs(lambda) < 1e-12) {
3133
+ return Math.exp(value) - 1;
3134
+ }
3135
+ const base2 = value * lambda + 1;
3136
+ if (base2 <= 0) {
3137
+ throw new InvalidParameterError(
3138
+ "Yeo-Johnson inverse encountered invalid value",
3139
+ "X",
3140
+ value
3141
+ );
3142
+ }
3143
+ return base2 ** (1 / lambda) - 1;
3144
+ }
3145
+ const twoMinusLambda = 2 - lambda;
3146
+ if (Math.abs(twoMinusLambda) < 1e-12) {
3147
+ return 1 - Math.exp(-value);
3148
+ }
3149
+ const base = 1 - value * twoMinusLambda;
3150
+ if (base <= 0) {
3151
+ throw new InvalidParameterError("Yeo-Johnson inverse encountered invalid value", "X", value);
3152
+ }
3153
+ return 1 - base ** (1 / twoMinusLambda);
3154
+ }
3155
+ logLikelihood(values, lambda) {
3156
+ const transformed = new Array(values.length);
3157
+ let jacobian = 0;
3158
+ for (let i = 0; i < values.length; i++) {
3159
+ const value = values[i];
3160
+ if (value === void 0) {
3161
+ throw new DeepboxError("Internal error: missing feature value during optimization");
3162
+ }
3163
+ let transformedValue;
3164
+ if (this.method === "box-cox") {
3165
+ if (value <= 0) {
3166
+ return Number.NEGATIVE_INFINITY;
3167
+ }
3168
+ transformedValue = this.boxCoxTransformValue(value, lambda);
3169
+ jacobian += (lambda - 1) * Math.log(value);
3170
+ } else {
3171
+ transformedValue = this.yeoJohnsonTransformValue(value, lambda);
3172
+ jacobian += value >= 0 ? (lambda - 1) * Math.log(value + 1) : (1 - lambda) * Math.log(1 - value);
3173
+ }
3174
+ if (!Number.isFinite(transformedValue)) {
3175
+ return Number.NEGATIVE_INFINITY;
3176
+ }
3177
+ transformed[i] = transformedValue;
3178
+ }
3179
+ let sum = 0;
3180
+ for (const value of transformed) {
3181
+ sum += value;
3182
+ }
3183
+ const mean = sum / transformed.length;
3184
+ let varianceSum = 0;
3185
+ for (const value of transformed) {
3186
+ const delta = value - mean;
3187
+ varianceSum += delta * delta;
3188
+ }
3189
+ const variance = varianceSum / transformed.length;
3190
+ if (!Number.isFinite(variance) || variance <= 1e-15) {
3191
+ return Number.NEGATIVE_INFINITY;
3192
+ }
3193
+ return -0.5 * transformed.length * Math.log(variance) + jacobian;
3194
+ }
3195
+ optimizeLambda(values) {
3196
+ if (values.length < 2) {
3197
+ return 1;
3198
+ }
3199
+ let minValue = Number.POSITIVE_INFINITY;
3200
+ let maxValue = Number.NEGATIVE_INFINITY;
3201
+ for (const value of values) {
3202
+ if (value < minValue) minValue = value;
3203
+ if (value > maxValue) maxValue = value;
3204
+ }
3205
+ if (!Number.isFinite(minValue) || !Number.isFinite(maxValue) || maxValue - minValue <= 1e-15) {
3206
+ return 1;
3207
+ }
3208
+ let left = -5;
3209
+ let right = 5;
3210
+ const phi = (Math.sqrt(5) - 1) / 2;
3211
+ let c = right - phi * (right - left);
3212
+ let d = left + phi * (right - left);
3213
+ let fc = this.logLikelihood(values, c);
3214
+ let fd = this.logLikelihood(values, d);
3215
+ for (let iter = 0; iter < 80; iter++) {
3216
+ if (Math.abs(right - left) < 1e-6) break;
3217
+ if (fc > fd) {
3218
+ right = d;
3219
+ d = c;
3220
+ fd = fc;
3221
+ c = right - phi * (right - left);
3222
+ fc = this.logLikelihood(values, c);
3223
+ } else {
3224
+ left = c;
3225
+ c = d;
3226
+ fc = fd;
3227
+ d = left + phi * (right - left);
3228
+ fd = this.logLikelihood(values, d);
3229
+ }
3230
+ }
3231
+ const candidates = [left, right, (left + right) / 2, 0, 1, 2, -2];
3232
+ let bestLambda = 1;
3233
+ let bestScore = Number.NEGATIVE_INFINITY;
3234
+ for (const lambda of candidates) {
3235
+ const score = this.logLikelihood(values, lambda);
3236
+ if (score > bestScore) {
3237
+ bestScore = score;
3238
+ bestLambda = lambda;
3239
+ }
3240
+ }
3241
+ return Number.isFinite(bestLambda) ? bestLambda : 1;
3242
+ }
3243
+ fitTransform(X) {
3244
+ return this.fit(X).transform(X);
3245
+ }
3246
+ };
3247
+
3248
+ // src/preprocess/split.ts
3249
+ function validateNSplits(nSplits) {
3250
+ if (!Number.isFinite(nSplits) || !Number.isInteger(nSplits) || nSplits < 2) {
3251
+ throw new InvalidParameterError("nSplits must be an integer at least 2", "nSplits", nSplits);
3252
+ }
3253
+ }
3254
+ function parseSplitSpec(value, name) {
3255
+ if (value === void 0) {
3256
+ return void 0;
3257
+ }
3258
+ if (!Number.isFinite(value) || value <= 0) {
3259
+ throw new InvalidParameterError(`${name} must be a positive number`, name, value);
3260
+ }
3261
+ if (value < 1) {
3262
+ return { kind: "fraction", value };
3263
+ }
3264
+ if (!Number.isInteger(value)) {
3265
+ throw new InvalidParameterError(
3266
+ `${name} must be an integer when provided as an absolute size`,
3267
+ name,
3268
+ value
3269
+ );
3270
+ }
3271
+ return { kind: "count", value };
3272
+ }
3273
+ function resolveSplitCount(spec, nSamples, isTrain) {
3274
+ if (spec.kind === "count") {
3275
+ return spec.value;
3276
+ }
3277
+ const exact = nSamples * spec.value;
3278
+ return isTrain ? Math.floor(exact) : Math.ceil(exact);
3279
+ }
3280
+ function resolveTrainTestCounts(nSamples, trainSize, testSize) {
3281
+ const defaultTestSize = trainSize === void 0 && testSize === void 0 ? 0.25 : testSize;
3282
+ const trainSpec = parseSplitSpec(trainSize, "trainSize");
3283
+ const testSpec = parseSplitSpec(defaultTestSize, "testSize");
3284
+ if (trainSpec?.kind === "count" && trainSpec.value > nSamples) {
3285
+ throw new InvalidParameterError(
3286
+ "trainSize must not exceed number of samples",
3287
+ "trainSize",
3288
+ trainSpec.value
3289
+ );
3290
+ }
3291
+ if (testSpec?.kind === "count" && testSpec.value > nSamples) {
3292
+ throw new InvalidParameterError(
3293
+ "testSize must not exceed number of samples",
3294
+ "testSize",
3295
+ testSpec.value
3296
+ );
3297
+ }
3298
+ if (trainSpec?.kind === "fraction" && testSpec?.kind === "fraction" && trainSpec.value + testSpec.value > 1) {
3299
+ throw new InvalidParameterError(
3300
+ "trainSize and testSize fractions must sum to at most 1",
3301
+ "trainSize",
3302
+ trainSpec.value
3303
+ );
3304
+ }
3305
+ let nTrain = trainSpec === void 0 ? void 0 : resolveSplitCount(trainSpec, nSamples, true);
3306
+ let nTest = testSpec === void 0 ? void 0 : resolveSplitCount(testSpec, nSamples, false);
3307
+ if (nTrain === void 0 && nTest === void 0) {
3308
+ throw new DeepboxError("Internal error: failed to resolve split sizes");
3309
+ }
3310
+ if (nTrain === void 0) {
3311
+ nTrain = nSamples - (nTest ?? 0);
3312
+ }
3313
+ if (nTest === void 0) {
3314
+ nTest = nSamples - nTrain;
3315
+ }
3316
+ if (nTrain + nTest > nSamples) {
3317
+ throw new InvalidParameterError(
3318
+ "trainSize and testSize exceed number of samples",
3319
+ "trainSize",
3320
+ trainSize
3321
+ );
3322
+ }
3323
+ if (nTrain < 1) {
3324
+ throw new InvalidParameterError("trainSize must be at least 1 sample", "trainSize", trainSize);
3325
+ }
3326
+ if (nTest < 1) {
3327
+ throw new InvalidParameterError("testSize must be at least 1 sample", "testSize", testSize);
3328
+ }
3329
+ return [nTrain, nTest];
3330
+ }
3331
+ function compareLabels(a, b) {
3332
+ if (typeof a === "number" && typeof b === "number") return a - b;
3333
+ if (typeof a === "bigint" && typeof b === "bigint") {
3334
+ if (a < b) return -1;
3335
+ if (a > b) return 1;
3336
+ return 0;
3337
+ }
3338
+ return String(a).localeCompare(String(b));
3339
+ }
3340
+ function makeFoldSizes(total, nSplits) {
3341
+ const base = Math.floor(total / nSplits);
3342
+ const remainder = total % nSplits;
3343
+ return Array.from({ length: nSplits }, (_, i) => base + (i < remainder ? 1 : 0));
3344
+ }
3345
+ function readTensorValue(t, indices) {
3346
+ const value = t.at(...indices);
3347
+ if (typeof value === "string" || typeof value === "number" || typeof value === "bigint") {
3348
+ return value;
3349
+ }
3350
+ throw new DeepboxError("Internal error: unsupported tensor value type");
3351
+ }
3352
+ function writeTensorValue(t, flatIndex, value) {
3353
+ if (t.dtype === "string") {
3354
+ if (typeof value !== "string") {
3355
+ throw new DeepboxError("Internal error: expected string value for string tensor");
3356
+ }
3357
+ t.data[flatIndex] = value;
3358
+ return;
3359
+ }
3360
+ if (typeof value === "string") {
3361
+ throw new DeepboxError("Internal error: encountered string value in numeric tensor");
3362
+ }
3363
+ if (t.data instanceof BigInt64Array) {
3364
+ t.data[flatIndex] = typeof value === "bigint" ? value : BigInt(value);
3365
+ return;
3366
+ }
3367
+ t.data[flatIndex] = Number(value);
3368
+ }
3369
+ function takeRows2D(X, sampleIndices) {
3370
+ const [, nFeatures] = getShape2D(X);
3371
+ const out = zeros([sampleIndices.length, nFeatures], { dtype: X.dtype });
3372
+ for (let i = 0; i < sampleIndices.length; i++) {
3373
+ const sampleIndex = sampleIndices[i];
3374
+ if (sampleIndex === void 0) {
3375
+ throw new DeepboxError("Internal error: sample index access failed");
3376
+ }
3377
+ for (let j = 0; j < nFeatures; j++) {
3378
+ const value = readTensorValue(X, [sampleIndex, j]);
3379
+ writeTensorValue(out, out.offset + i * nFeatures + j, value);
3380
+ }
3381
+ }
3382
+ return out;
3383
+ }
3384
+ function takeVector(y, sampleIndices) {
3385
+ if (y.ndim !== 1) {
3386
+ throw new ShapeError(`y must be a 1D tensor, got ${y.ndim}D`);
3387
+ }
3388
+ const out = zeros([sampleIndices.length], { dtype: y.dtype });
3389
+ for (let i = 0; i < sampleIndices.length; i++) {
3390
+ const sampleIndex = sampleIndices[i];
3391
+ if (sampleIndex === void 0) {
3392
+ throw new DeepboxError("Internal error: sample index access failed");
3393
+ }
3394
+ const value = readTensorValue(y, [sampleIndex]);
3395
+ writeTensorValue(out, out.offset + i, value);
3396
+ }
3397
+ return out;
3398
+ }
3399
+ function trainTestSplit(X, y, options) {
3400
+ const opts = options ?? {};
3401
+ const shuffle = opts.shuffle ?? true;
3402
+ const randomState = opts.randomState;
3403
+ const [nSamples] = getShape2D(X);
3404
+ if (nSamples === 0) {
3405
+ throw new InvalidParameterError("Cannot split empty array", "X");
3406
+ }
3407
+ if (y) {
3408
+ const yShape0 = y.shape[0];
3409
+ if (yShape0 === void 0 || yShape0 !== nSamples) {
3410
+ throw new InvalidParameterError("X and y must have same number of samples", "y", yShape0);
3411
+ }
3412
+ }
3413
+ if (opts.stratify) {
3414
+ if (opts.stratify.ndim !== 1) {
3415
+ throw new ShapeError(`stratify must be a 1D tensor, got ${opts.stratify.ndim}D`);
3416
+ }
3417
+ const stratifyShape0 = opts.stratify.shape[0];
3418
+ if (stratifyShape0 === void 0 || stratifyShape0 !== nSamples) {
3419
+ throw new InvalidParameterError(
3420
+ "stratify must have same number of samples as X",
3421
+ "stratify",
3422
+ stratifyShape0
3423
+ );
3424
+ }
3425
+ }
3426
+ const [nTrain, nTest] = resolveTrainTestCounts(nSamples, opts.trainSize, opts.testSize);
3427
+ const indices = Array.from({ length: nSamples }, (_, i) => i);
3428
+ const random = randomState !== void 0 ? createSeededRandom(randomState) : Math.random;
3429
+ const maybeShuffle = (arr) => {
3430
+ if (!shuffle) return;
3431
+ shuffleIndicesInPlace(arr, random);
3432
+ };
3433
+ let trainIndices = [];
3434
+ let testIndices = [];
3435
+ if (opts.stratify) {
3436
+ const stratify = opts.stratify;
3437
+ const labelMap = /* @__PURE__ */ new Map();
3438
+ for (let i = 0; i < nSamples; i++) {
3439
+ const label = readTensorValue(stratify, [i]);
3440
+ let bucket = labelMap.get(label);
3441
+ if (bucket === void 0) {
3442
+ bucket = [];
3443
+ labelMap.set(label, bucket);
3444
+ }
3445
+ bucket.push(i);
3446
+ }
3447
+ const labels = Array.from(labelMap.keys()).sort(compareLabels);
3448
+ const nClasses = labels.length;
3449
+ const classSizes = labels.map((label) => labelMap.get(label)?.length ?? 0);
3450
+ const hasSingleton = classSizes.some((size) => size < 2);
3451
+ if (hasSingleton && shuffle && randomState === void 0) {
3452
+ throw new InvalidParameterError(
3453
+ "stratify requires at least 2 samples per class",
3454
+ "stratify",
3455
+ classSizes
3456
+ );
3457
+ }
3458
+ if (opts.trainSize !== void 0 && nTrain < nClasses) {
3459
+ throw new InvalidParameterError(
3460
+ "trainSize must be at least the number of classes when stratifying",
3461
+ "trainSize",
3462
+ nTrain
3463
+ );
3464
+ }
3465
+ if (nTest < nClasses) {
3466
+ throw new InvalidParameterError(
3467
+ "testSize must be at least the number of classes when stratifying",
3468
+ "testSize",
3469
+ nTest
3470
+ );
3471
+ }
3472
+ const testFraction = nTest / nSamples;
3473
+ const allowEmptyClassSplits = nTrain < nClasses;
3474
+ const counts = labels.map((label) => {
3475
+ const size = labelMap.get(label)?.length ?? 0;
3476
+ const exact = size * testFraction;
3477
+ let testCount = Math.floor(exact);
3478
+ let remainder = exact - testCount;
3479
+ let min = allowEmptyClassSplits ? 0 : 1;
3480
+ let max = allowEmptyClassSplits ? size : size - 1;
3481
+ if (size < 2) {
3482
+ min = 0;
3483
+ max = allowEmptyClassSplits ? size : 0;
3484
+ testCount = 0;
3485
+ remainder = 0;
3486
+ } else {
3487
+ if (testCount < min) testCount = min;
3488
+ if (testCount > max) testCount = max;
3489
+ }
3490
+ return { label, size, testCount, remainder, min, max };
3491
+ });
3492
+ let remaining = nTest - counts.reduce((sum, c) => sum + c.testCount, 0);
3493
+ if (remaining !== 0) {
3494
+ const order = remaining > 0 ? [...counts].sort((a, b) => {
3495
+ if (b.remainder !== a.remainder) return b.remainder - a.remainder;
3496
+ return compareLabels(a.label, b.label);
3497
+ }) : [...counts].sort((a, b) => {
3498
+ if (a.remainder !== b.remainder) return a.remainder - b.remainder;
3499
+ return compareLabels(a.label, b.label);
3500
+ });
3501
+ let guard = 0;
3502
+ while (remaining !== 0 && guard < counts.length * 2) {
3503
+ for (const entry of order) {
3504
+ if (remaining === 0) break;
3505
+ if (remaining > 0 && entry.testCount < entry.max) {
3506
+ entry.testCount += 1;
3507
+ remaining -= 1;
3508
+ } else if (remaining < 0 && entry.testCount > entry.min) {
3509
+ entry.testCount -= 1;
3510
+ remaining += 1;
3511
+ }
3512
+ }
3513
+ guard += 1;
3514
+ }
3515
+ if (remaining !== 0) {
3516
+ throw new DeepboxError("Internal error: unable to allocate stratified split sizes");
3517
+ }
3518
+ }
3519
+ const remainingTrainPool = [];
3520
+ for (const entry of counts) {
3521
+ const labelIndices = [...labelMap.get(entry.label) ?? []];
3522
+ maybeShuffle(labelIndices);
3523
+ testIndices.push(...labelIndices.slice(0, entry.testCount));
3524
+ remainingTrainPool.push(...labelIndices.slice(entry.testCount));
3525
+ }
3526
+ maybeShuffle(testIndices);
3527
+ maybeShuffle(remainingTrainPool);
3528
+ trainIndices = remainingTrainPool.slice(0, nTrain);
3529
+ } else {
3530
+ maybeShuffle(indices);
3531
+ trainIndices = indices.slice(0, nTrain);
3532
+ testIndices = indices.slice(nTrain, nTrain + nTest);
3533
+ }
3534
+ if (trainIndices.length !== nTrain || testIndices.length !== nTest) {
3535
+ throw new DeepboxError("Internal error: resolved split indices do not match requested sizes");
3536
+ }
3537
+ const XTrain = takeRows2D(X, trainIndices);
3538
+ const XTest = takeRows2D(X, testIndices);
3539
+ if (y) {
3540
+ const yTrain = takeVector(y, trainIndices);
3541
+ const yTest = takeVector(y, testIndices);
3542
+ return [XTrain, XTest, yTrain, yTest];
3543
+ }
3544
+ return [XTrain, XTest];
3545
+ }
3546
+ var KFold = class {
3547
+ nSplits;
3548
+ shuffle;
3549
+ randomState;
3550
+ constructor(options = {}) {
3551
+ this.nSplits = options.nSplits ?? 5;
3552
+ this.shuffle = options.shuffle ?? false;
3553
+ this.randomState = options.randomState;
3554
+ }
3555
+ split(X) {
3556
+ const shape0 = X.shape[0];
3557
+ if (shape0 === void 0) {
3558
+ throw new ShapeError("X must have valid shape[0]");
3559
+ }
3560
+ const nSamples = shape0;
3561
+ validateNSplits(this.nSplits);
3562
+ if (this.nSplits > nSamples) {
3563
+ throw new InvalidParameterError(
3564
+ "nSplits must not be greater than number of samples",
3565
+ "nSplits",
3566
+ this.nSplits
3567
+ );
3568
+ }
3569
+ const indices = Array.from({ length: nSamples }, (_, i) => i);
3570
+ if (this.shuffle) {
3571
+ const random = this.randomState !== void 0 ? createSeededRandom(this.randomState) : Math.random;
3572
+ shuffleIndicesInPlace(indices, random);
3573
+ }
3574
+ const splits = [];
3575
+ const foldSizes = makeFoldSizes(nSamples, this.nSplits);
3576
+ let current = 0;
3577
+ for (let i = 0; i < this.nSplits; i++) {
3578
+ const foldSize = foldSizes[i] ?? 0;
3579
+ const testStart = current;
3580
+ const testEnd = current + foldSize;
3581
+ const testIndices = indices.slice(testStart, testEnd);
3582
+ const trainIndices = [...indices.slice(0, testStart), ...indices.slice(testEnd)];
3583
+ splits.push([trainIndices, testIndices]);
3584
+ current = testEnd;
3585
+ }
3586
+ return splits;
3587
+ }
3588
+ getNSplits() {
3589
+ return this.nSplits;
3590
+ }
3591
+ };
3592
+ var StratifiedKFold = class {
3593
+ nSplits;
3594
+ shuffle;
3595
+ randomState;
3596
+ constructor(options = {}) {
3597
+ this.nSplits = options.nSplits ?? 5;
3598
+ this.shuffle = options.shuffle ?? false;
3599
+ this.randomState = options.randomState;
3600
+ }
3601
+ split(X, y) {
3602
+ const shape0 = X.shape[0];
3603
+ if (shape0 === void 0) {
3604
+ throw new ShapeError("X must have valid shape[0]");
3605
+ }
3606
+ const nSamples = shape0;
3607
+ validateNSplits(this.nSplits);
3608
+ if (this.nSplits > nSamples) {
3609
+ throw new InvalidParameterError(
3610
+ "nSplits must not be greater than number of samples",
3611
+ "nSplits",
3612
+ this.nSplits
3613
+ );
3614
+ }
3615
+ const yShape0 = y.shape[0];
3616
+ if (yShape0 === void 0 || yShape0 !== nSamples) {
3617
+ throw new InvalidParameterError("X and y must have same number of samples", "y", yShape0);
3618
+ }
3619
+ if (y.ndim !== 1) {
3620
+ throw new ShapeError(`y must be a 1D tensor, got ${y.ndim}D`);
3621
+ }
3622
+ const labelMap = /* @__PURE__ */ new Map();
3623
+ const random = this.randomState !== void 0 ? createSeededRandom(this.randomState) : Math.random;
3624
+ for (let i = 0; i < nSamples; i++) {
3625
+ const label = readTensorValue(y, [i]);
3626
+ let bucket = labelMap.get(label);
3627
+ if (bucket === void 0) {
3628
+ bucket = [];
3629
+ labelMap.set(label, bucket);
3630
+ }
3631
+ bucket.push(i);
3632
+ }
3633
+ for (const [label, indices] of labelMap.entries()) {
3634
+ if (this.shuffle) {
3635
+ shuffleIndicesInPlace(indices, random);
3636
+ }
3637
+ if (indices.length < this.nSplits) {
3638
+ throw new InvalidParameterError(
3639
+ `Each class must have at least nSplits samples; class ${label} has ${indices.length}`,
3640
+ "nSplits",
3641
+ this.nSplits
3642
+ );
3643
+ }
3644
+ }
3645
+ const foldIndices = Array.from({ length: this.nSplits }, () => []);
3646
+ for (const indices of labelMap.values()) {
3647
+ const foldSizes = makeFoldSizes(indices.length, this.nSplits);
3648
+ let start = 0;
3649
+ for (let fold = 0; fold < this.nSplits; fold++) {
3650
+ const size = foldSizes[fold] ?? 0;
3651
+ const end = start + size;
3652
+ const target = foldIndices[fold];
3653
+ if (!target) {
3654
+ throw new DeepboxError("Internal error: stratified fold storage missing");
3655
+ }
3656
+ target.push(...indices.slice(start, end));
3657
+ start = end;
3658
+ }
3659
+ }
3660
+ const splits = [];
3661
+ for (let fold = 0; fold < this.nSplits; fold++) {
3662
+ const testIndices = foldIndices[fold] ?? [];
3663
+ const trainIndices = [];
3664
+ for (let other = 0; other < this.nSplits; other++) {
3665
+ if (other === fold) continue;
3666
+ trainIndices.push(...foldIndices[other] ?? []);
3667
+ }
3668
+ splits.push([trainIndices, testIndices]);
3669
+ }
3670
+ return splits;
3671
+ }
3672
+ getNSplits() {
3673
+ return this.nSplits;
3674
+ }
3675
+ };
3676
+ var GroupKFold = class {
3677
+ nSplits;
3678
+ constructor(options = {}) {
3679
+ this.nSplits = options.nSplits ?? 5;
3680
+ }
3681
+ split(X, _y, groups) {
3682
+ const shape0 = X.shape[0];
3683
+ if (shape0 === void 0) {
3684
+ throw new ShapeError("X must have valid shape[0]");
3685
+ }
3686
+ const nSamples = shape0;
3687
+ validateNSplits(this.nSplits);
3688
+ if (groups.ndim !== 1) {
3689
+ throw new ShapeError(`groups must be a 1D tensor, got ${groups.ndim}D`);
3690
+ }
3691
+ const groupsShape0 = groups.shape[0];
3692
+ if (groupsShape0 === void 0 || groupsShape0 !== nSamples) {
3693
+ throw new InvalidParameterError(
3694
+ "X and groups must have same number of samples",
3695
+ "groups",
3696
+ groupsShape0
3697
+ );
3698
+ }
3699
+ const groupMap = /* @__PURE__ */ new Map();
3700
+ for (let i = 0; i < nSamples; i++) {
3701
+ const group = readTensorValue(groups, [i]);
3702
+ let bucket = groupMap.get(group);
3703
+ if (bucket === void 0) {
3704
+ bucket = [];
3705
+ groupMap.set(group, bucket);
3706
+ }
3707
+ bucket.push(i);
3708
+ }
3709
+ const groupEntries = Array.from(groupMap.entries()).map(([group, indices]) => ({
3710
+ group,
3711
+ indices,
3712
+ size: indices.length
3713
+ }));
3714
+ if (this.nSplits > groupEntries.length) {
3715
+ throw new InvalidParameterError(
3716
+ "Number of groups must be at least nSplits",
3717
+ "nSplits",
3718
+ this.nSplits
3719
+ );
3720
+ }
3721
+ groupEntries.sort((a, b) => {
3722
+ if (b.size !== a.size) return b.size - a.size;
3723
+ return compareLabels(a.group, b.group);
3724
+ });
3725
+ const foldIndices = Array.from({ length: this.nSplits }, () => []);
3726
+ const foldSizes = new Array(this.nSplits).fill(0);
3727
+ for (const entry of groupEntries) {
3728
+ let bestFold = 0;
3729
+ let bestSize = foldSizes[0] ?? 0;
3730
+ for (let fold = 1; fold < this.nSplits; fold++) {
3731
+ const size = foldSizes[fold] ?? 0;
3732
+ if (size < bestSize) {
3733
+ bestSize = size;
3734
+ bestFold = fold;
3735
+ }
3736
+ }
3737
+ const target = foldIndices[bestFold];
3738
+ if (!target) {
3739
+ throw new DeepboxError("Internal error: group fold storage missing");
3740
+ }
3741
+ target.push(...entry.indices);
3742
+ foldSizes[bestFold] = bestSize + entry.size;
3743
+ }
3744
+ const splits = [];
3745
+ for (let fold = 0; fold < this.nSplits; fold++) {
3746
+ const testIndices = foldIndices[fold] ?? [];
3747
+ const trainIndices = [];
3748
+ for (let other = 0; other < this.nSplits; other++) {
3749
+ if (other === fold) continue;
3750
+ trainIndices.push(...foldIndices[other] ?? []);
3751
+ }
3752
+ splits.push([trainIndices, testIndices]);
3753
+ }
3754
+ return splits;
3755
+ }
3756
+ getNSplits() {
3757
+ return this.nSplits;
3758
+ }
3759
+ };
3760
+ var LeaveOneOut = class {
3761
+ split(X) {
3762
+ const shape0 = X.shape[0];
3763
+ if (shape0 === void 0) {
3764
+ throw new ShapeError("X must have valid shape[0]");
3765
+ }
3766
+ const nSamples = shape0;
3767
+ const splits = [];
3768
+ for (let i = 0; i < nSamples; i++) {
3769
+ const trainIndices = [
3770
+ ...Array.from({ length: i }, (_, j) => j),
3771
+ ...Array.from({ length: nSamples - i - 1 }, (_, j) => i + 1 + j)
3772
+ ];
3773
+ const testIndices = [i];
3774
+ splits.push([trainIndices, testIndices]);
3775
+ }
3776
+ return splits;
3777
+ }
3778
+ getNSplits(X) {
3779
+ const shape0 = X.shape[0];
3780
+ if (shape0 === void 0) {
3781
+ throw new ShapeError("X must have valid shape[0]");
3782
+ }
3783
+ return shape0;
3784
+ }
3785
+ };
3786
+ var LeavePOut = class {
3787
+ p;
3788
+ constructor(p) {
3789
+ if (!Number.isFinite(p) || !Number.isInteger(p) || p <= 0) {
3790
+ throw new InvalidParameterError("p must be a positive integer", "p", p);
3791
+ }
3792
+ this.p = p;
3793
+ }
3794
+ split(X) {
3795
+ const shape0 = X.shape[0];
3796
+ if (shape0 === void 0) {
3797
+ throw new ShapeError("X must have valid shape[0]");
3798
+ }
3799
+ const nSamples = shape0;
3800
+ if (this.p > nSamples) {
3801
+ throw new InvalidParameterError("p must not be greater than number of samples", "p", this.p);
3802
+ }
3803
+ let nCombos = 1;
3804
+ const k = this.p > nSamples / 2 ? nSamples - this.p : this.p;
3805
+ for (let i = 0; i < k; i++) {
3806
+ nCombos = nCombos * (nSamples - i) / (i + 1);
3807
+ }
3808
+ if (nCombos > 1e5) {
3809
+ throw new InvalidParameterError(
3810
+ `LeavePOut produces ${Math.floor(nCombos)} splits, which exceeds memory safety limit of 100,000`,
3811
+ "p",
3812
+ this.p
3813
+ );
3814
+ }
3815
+ const splits = [];
3816
+ const allIndices = Array.from({ length: nSamples }, (_, i) => i);
3817
+ const combine = (start, currentCombo) => {
3818
+ if (currentCombo.length === this.p) {
3819
+ const testSet = new Set(currentCombo);
3820
+ const testIndices = [...currentCombo];
3821
+ const trainIndices = allIndices.filter((i) => !testSet.has(i));
3822
+ splits.push([trainIndices, testIndices]);
3823
+ return;
3824
+ }
3825
+ for (let i = start; i < nSamples; i++) {
3826
+ currentCombo.push(i);
3827
+ combine(i + 1, currentCombo);
3828
+ currentCombo.pop();
3829
+ }
3830
+ };
3831
+ combine(0, []);
3832
+ return splits;
3833
+ }
3834
+ getNSplits(X) {
3835
+ const shape0 = X.shape[0];
3836
+ if (shape0 === void 0) {
3837
+ throw new ShapeError("X must have valid shape[0]");
3838
+ }
3839
+ const n = shape0;
3840
+ if (this.p > n) {
3841
+ throw new InvalidParameterError("p must not be greater than number of samples", "p", this.p);
3842
+ }
3843
+ let result = 1;
3844
+ const k = this.p > n / 2 ? n - this.p : this.p;
3845
+ for (let i = 0; i < k; i++) {
3846
+ result = result * (n - i) / (i + 1);
3847
+ }
3848
+ return Math.round(result);
3849
+ }
3850
+ };
3851
+
3852
+ export { GroupKFold, KFold, LabelBinarizer, LabelEncoder, LeaveOneOut, LeavePOut, MaxAbsScaler, MinMaxScaler, MultiLabelBinarizer, Normalizer, OneHotEncoder, OrdinalEncoder, PowerTransformer, QuantileTransformer, RobustScaler, StandardScaler, StratifiedKFold, preprocess_exports, trainTestSplit };
3853
+ //# sourceMappingURL=chunk-AD436M45.js.map
3854
+ //# sourceMappingURL=chunk-AD436M45.js.map