deepbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +344 -0
  3. package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
  4. package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
  5. package/dist/Tensor-BQLk1ltW.d.cts +147 -0
  6. package/dist/Tensor-g8mUClel.d.ts +147 -0
  7. package/dist/chunk-4S73VUBD.js +677 -0
  8. package/dist/chunk-4S73VUBD.js.map +1 -0
  9. package/dist/chunk-5R4S63PF.js +2925 -0
  10. package/dist/chunk-5R4S63PF.js.map +1 -0
  11. package/dist/chunk-6AE5FKKQ.cjs +9264 -0
  12. package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
  13. package/dist/chunk-AD436M45.js +3854 -0
  14. package/dist/chunk-AD436M45.js.map +1 -0
  15. package/dist/chunk-ALS7ETWZ.cjs +4263 -0
  16. package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
  17. package/dist/chunk-AU7XHGKJ.js +2092 -0
  18. package/dist/chunk-AU7XHGKJ.js.map +1 -0
  19. package/dist/chunk-B5TNKUEY.js +1481 -0
  20. package/dist/chunk-B5TNKUEY.js.map +1 -0
  21. package/dist/chunk-BCR7G3A6.js +9136 -0
  22. package/dist/chunk-BCR7G3A6.js.map +1 -0
  23. package/dist/chunk-C4PKXY74.cjs +1917 -0
  24. package/dist/chunk-C4PKXY74.cjs.map +1 -0
  25. package/dist/chunk-DWZY6PIP.cjs +6400 -0
  26. package/dist/chunk-DWZY6PIP.cjs.map +1 -0
  27. package/dist/chunk-E3EU5FZO.cjs +2113 -0
  28. package/dist/chunk-E3EU5FZO.cjs.map +1 -0
  29. package/dist/chunk-F3JWBINJ.js +1054 -0
  30. package/dist/chunk-F3JWBINJ.js.map +1 -0
  31. package/dist/chunk-FJYLIGJX.js +1940 -0
  32. package/dist/chunk-FJYLIGJX.js.map +1 -0
  33. package/dist/chunk-JSCDE774.cjs +729 -0
  34. package/dist/chunk-JSCDE774.cjs.map +1 -0
  35. package/dist/chunk-LWECRCW2.cjs +2412 -0
  36. package/dist/chunk-LWECRCW2.cjs.map +1 -0
  37. package/dist/chunk-MLBMYKCG.js +6379 -0
  38. package/dist/chunk-MLBMYKCG.js.map +1 -0
  39. package/dist/chunk-OX6QXFMV.cjs +3874 -0
  40. package/dist/chunk-OX6QXFMV.cjs.map +1 -0
  41. package/dist/chunk-PHV2DKRS.cjs +1072 -0
  42. package/dist/chunk-PHV2DKRS.cjs.map +1 -0
  43. package/dist/chunk-PL7TAYKI.js +4056 -0
  44. package/dist/chunk-PL7TAYKI.js.map +1 -0
  45. package/dist/chunk-PR647I7R.js +1898 -0
  46. package/dist/chunk-PR647I7R.js.map +1 -0
  47. package/dist/chunk-QERHVCHC.cjs +2960 -0
  48. package/dist/chunk-QERHVCHC.cjs.map +1 -0
  49. package/dist/chunk-XEG44RF6.cjs +1514 -0
  50. package/dist/chunk-XEG44RF6.cjs.map +1 -0
  51. package/dist/chunk-XMWVME2W.js +2377 -0
  52. package/dist/chunk-XMWVME2W.js.map +1 -0
  53. package/dist/chunk-ZB75FESB.cjs +1979 -0
  54. package/dist/chunk-ZB75FESB.cjs.map +1 -0
  55. package/dist/chunk-ZLW62TJG.cjs +4061 -0
  56. package/dist/chunk-ZLW62TJG.cjs.map +1 -0
  57. package/dist/chunk-ZXKBDFP3.js +4235 -0
  58. package/dist/chunk-ZXKBDFP3.js.map +1 -0
  59. package/dist/core/index.cjs +204 -0
  60. package/dist/core/index.cjs.map +1 -0
  61. package/dist/core/index.d.cts +2 -0
  62. package/dist/core/index.d.ts +2 -0
  63. package/dist/core/index.js +3 -0
  64. package/dist/core/index.js.map +1 -0
  65. package/dist/dataframe/index.cjs +22 -0
  66. package/dist/dataframe/index.cjs.map +1 -0
  67. package/dist/dataframe/index.d.cts +3 -0
  68. package/dist/dataframe/index.d.ts +3 -0
  69. package/dist/dataframe/index.js +5 -0
  70. package/dist/dataframe/index.js.map +1 -0
  71. package/dist/datasets/index.cjs +134 -0
  72. package/dist/datasets/index.cjs.map +1 -0
  73. package/dist/datasets/index.d.cts +3 -0
  74. package/dist/datasets/index.d.ts +3 -0
  75. package/dist/datasets/index.js +5 -0
  76. package/dist/datasets/index.js.map +1 -0
  77. package/dist/index-74AB8Cyh.d.cts +1126 -0
  78. package/dist/index-9oQx1HgV.d.cts +1180 -0
  79. package/dist/index-BJY2SI4i.d.ts +483 -0
  80. package/dist/index-BWGhrDlr.d.ts +733 -0
  81. package/dist/index-B_DK4FKY.d.cts +242 -0
  82. package/dist/index-BbA2Gxfl.d.ts +456 -0
  83. package/dist/index-BgHYAoSS.d.cts +837 -0
  84. package/dist/index-BndMbqsM.d.ts +1439 -0
  85. package/dist/index-C1mfVYoo.d.ts +2517 -0
  86. package/dist/index-CCvlwAmL.d.cts +809 -0
  87. package/dist/index-CDw5CnOU.d.ts +785 -0
  88. package/dist/index-Cn3SdB0O.d.ts +1126 -0
  89. package/dist/index-CrqLlS-a.d.ts +776 -0
  90. package/dist/index-D61yaSMY.d.cts +483 -0
  91. package/dist/index-D9Loo1_A.d.cts +2517 -0
  92. package/dist/index-DIT_OO9C.d.cts +785 -0
  93. package/dist/index-DIp_RrRt.d.ts +242 -0
  94. package/dist/index-DbultU6X.d.cts +1427 -0
  95. package/dist/index-DmEg_LCm.d.cts +776 -0
  96. package/dist/index-DoPWVxPo.d.cts +1439 -0
  97. package/dist/index-DuCxd-8d.d.ts +837 -0
  98. package/dist/index-Dx42TZaY.d.ts +809 -0
  99. package/dist/index-DyZ4QQf5.d.cts +456 -0
  100. package/dist/index-GFAVyOWO.d.ts +1427 -0
  101. package/dist/index-WHQLn0e8.d.cts +733 -0
  102. package/dist/index-ZtI1Iy4L.d.ts +1180 -0
  103. package/dist/index-eJgeni9c.d.cts +1911 -0
  104. package/dist/index-tk4lSYod.d.ts +1911 -0
  105. package/dist/index.cjs +72 -0
  106. package/dist/index.cjs.map +1 -0
  107. package/dist/index.d.cts +17 -0
  108. package/dist/index.d.ts +17 -0
  109. package/dist/index.js +15 -0
  110. package/dist/index.js.map +1 -0
  111. package/dist/linalg/index.cjs +86 -0
  112. package/dist/linalg/index.cjs.map +1 -0
  113. package/dist/linalg/index.d.cts +3 -0
  114. package/dist/linalg/index.d.ts +3 -0
  115. package/dist/linalg/index.js +5 -0
  116. package/dist/linalg/index.js.map +1 -0
  117. package/dist/metrics/index.cjs +158 -0
  118. package/dist/metrics/index.cjs.map +1 -0
  119. package/dist/metrics/index.d.cts +3 -0
  120. package/dist/metrics/index.d.ts +3 -0
  121. package/dist/metrics/index.js +5 -0
  122. package/dist/metrics/index.js.map +1 -0
  123. package/dist/ml/index.cjs +87 -0
  124. package/dist/ml/index.cjs.map +1 -0
  125. package/dist/ml/index.d.cts +3 -0
  126. package/dist/ml/index.d.ts +3 -0
  127. package/dist/ml/index.js +6 -0
  128. package/dist/ml/index.js.map +1 -0
  129. package/dist/ndarray/index.cjs +501 -0
  130. package/dist/ndarray/index.cjs.map +1 -0
  131. package/dist/ndarray/index.d.cts +5 -0
  132. package/dist/ndarray/index.d.ts +5 -0
  133. package/dist/ndarray/index.js +4 -0
  134. package/dist/ndarray/index.js.map +1 -0
  135. package/dist/nn/index.cjs +142 -0
  136. package/dist/nn/index.cjs.map +1 -0
  137. package/dist/nn/index.d.cts +6 -0
  138. package/dist/nn/index.d.ts +6 -0
  139. package/dist/nn/index.js +5 -0
  140. package/dist/nn/index.js.map +1 -0
  141. package/dist/optim/index.cjs +77 -0
  142. package/dist/optim/index.cjs.map +1 -0
  143. package/dist/optim/index.d.cts +4 -0
  144. package/dist/optim/index.d.ts +4 -0
  145. package/dist/optim/index.js +4 -0
  146. package/dist/optim/index.js.map +1 -0
  147. package/dist/plot/index.cjs +114 -0
  148. package/dist/plot/index.cjs.map +1 -0
  149. package/dist/plot/index.d.cts +6 -0
  150. package/dist/plot/index.d.ts +6 -0
  151. package/dist/plot/index.js +5 -0
  152. package/dist/plot/index.js.map +1 -0
  153. package/dist/preprocess/index.cjs +82 -0
  154. package/dist/preprocess/index.cjs.map +1 -0
  155. package/dist/preprocess/index.d.cts +4 -0
  156. package/dist/preprocess/index.d.ts +4 -0
  157. package/dist/preprocess/index.js +5 -0
  158. package/dist/preprocess/index.js.map +1 -0
  159. package/dist/random/index.cjs +74 -0
  160. package/dist/random/index.cjs.map +1 -0
  161. package/dist/random/index.d.cts +3 -0
  162. package/dist/random/index.d.ts +3 -0
  163. package/dist/random/index.js +5 -0
  164. package/dist/random/index.js.map +1 -0
  165. package/dist/stats/index.cjs +142 -0
  166. package/dist/stats/index.cjs.map +1 -0
  167. package/dist/stats/index.d.cts +3 -0
  168. package/dist/stats/index.d.ts +3 -0
  169. package/dist/stats/index.js +5 -0
  170. package/dist/stats/index.js.map +1 -0
  171. package/dist/tensor-B96jjJLQ.d.cts +205 -0
  172. package/dist/tensor-B96jjJLQ.d.ts +205 -0
  173. package/package.json +226 -0
@@ -0,0 +1,837 @@
1
+ import { T as Tensor } from './Tensor-BQLk1ltW.cjs';
2
+ import { C as CSRMatrix } from './CSRMatrix-CwGwQRea.cjs';
3
+
4
+ /**
5
+ * Type representing a category value that can be a string, number, or bigint.
6
+ * Used for categorical encoding operations.
7
+ */
8
+ type Category = number | string | bigint;
9
+ type CategoriesOption = "auto" | ReadonlyArray<ReadonlyArray<Category>>;
10
+ /**
11
+ * Encode target labels with value between 0 and n_classes-1.
12
+ *
13
+ * This transformer encodes categorical labels (strings or numbers) into integers
14
+ * in the range [0, n_classes-1]. It maintains a mapping of unique classes to
15
+ * their integer representations and can reverse the transformation.
16
+ *
17
+ * **Time Complexity:**
18
+ * - fit: O(n) where n is the number of samples
19
+ * - transform: O(n) with O(1) lookup per sample
20
+ * - inverseTransform: O(n)
21
+ *
22
+ * **Space Complexity:** O(k) where k is the number of unique classes
23
+ *
24
+ * @example
25
+ * ```js
26
+ * import { LabelEncoder } from 'deepbox/preprocess';
27
+ * import { tensor } from 'deepbox/ndarray';
28
+ *
29
+ * const y = tensor(['cat', 'dog', 'cat', 'bird']);
30
+ * const encoder = new LabelEncoder();
31
+ * encoder.fit(y);
32
+ * const yEncoded = encoder.transform(y); // [1, 2, 1, 0]
33
+ * const yDecoded = encoder.inverseTransform(yEncoded); // ['cat', 'dog', 'cat', 'bird']
34
+ * ```
35
+ *
36
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html | Scikit-learn LabelEncoder}
37
+ */
38
+ declare class LabelEncoder {
39
+ /** Indicates whether the encoder has been fitted to data */
40
+ private fitted;
41
+ /** Array of unique classes found during fitting, sorted for consistency */
42
+ private classes_?;
43
+ /** Map from class value to encoded integer index for O(1) lookup */
44
+ private classToIndex_?;
45
+ /**
46
+ * Fit label encoder to a set of labels.
47
+ * Extracts unique classes and creates an index mapping.
48
+ *
49
+ * @param y - Target labels (1D tensor of strings or numbers)
50
+ * @returns this - Returns self for method chaining
51
+ * @throws {InvalidParameterError} If y is empty
52
+ */
53
+ fit(y: Tensor): this;
54
+ /**
55
+ * Transform labels to normalized encoding.
56
+ * Each unique label is mapped to an integer in [0, n_classes-1].
57
+ *
58
+ * @param y - Target labels to encode (1D tensor)
59
+ * @returns Encoded labels as integer tensor
60
+ * @throws {NotFittedError} If encoder is not fitted
61
+ * @throws {InvalidParameterError} If y contains labels not seen during fit
62
+ */
63
+ transform(y: Tensor): Tensor;
64
+ /**
65
+ * Fit label encoder and return encoded labels in one step.
66
+ * Convenience method equivalent to calling fit(y).transform(y).
67
+ *
68
+ * @param y - Target labels (1D tensor)
69
+ * @returns Encoded labels as integer tensor
70
+ */
71
+ fitTransform(y: Tensor): Tensor;
72
+ /**
73
+ * Transform integer labels back to original encoding.
74
+ * Reverses the encoding performed by transform().
75
+ *
76
+ * @param y - Encoded labels (1D integer tensor)
77
+ * @returns Original labels (strings or numbers)
78
+ * @throws {NotFittedError} If encoder is not fitted
79
+ * @throws {InvalidParameterError} If y contains invalid indices
80
+ */
81
+ inverseTransform(y: Tensor): Tensor;
82
+ }
83
+ /**
84
+ * Encode categorical features as one-hot numeric array.
85
+ *
86
+ * This encoder transforms categorical features into a binary one-hot encoding.
87
+ * Each categorical feature with n unique values is transformed into n binary features,
88
+ * with only one active (set to 1) per sample.
89
+ *
90
+ * **Time Complexity:**
91
+ * - fit: O(n*m) where n is samples, m is features
92
+ * - transform: O(n*m*k) where k is average categories per feature
93
+ * - Sparse mode is more efficient for high-cardinality features
94
+ *
95
+ * **Space Complexity:**
96
+ * - Dense: O(n * sum(k_i)) where k_i is unique categories for feature i
97
+ * - Sparse: O(nnz) where nnz is number of non-zero elements
98
+ *
99
+ * @example
100
+ * ```js
101
+ * const X = tensor([['red', 'S'], ['blue', 'M'], ['red', 'L']]);
102
+ * const encoder = new OneHotEncoder({ sparse: false });
103
+ * encoder.fit(X);
104
+ * const encoded = encoder.transform(X);
105
+ * // Result: [[1,0,1,0,0], [0,1,0,1,0], [1,0,0,0,1]]
106
+ * ```
107
+ *
108
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html | Scikit-learn OneHotEncoder}
109
+ */
110
+ declare class OneHotEncoder {
111
+ /** Indicates whether the encoder has been fitted to data */
112
+ private fitted;
113
+ /** Array of unique categories for each feature */
114
+ private categories_?;
115
+ /** Maps from category value to index for each feature (for O(1) lookup) */
116
+ private categoryToIndex_?;
117
+ /** Whether to return sparse matrix (CSR) or dense array */
118
+ private sparse;
119
+ /** How to handle unknown categories during transform */
120
+ private handleUnknown;
121
+ /** Drop policy to avoid collinearity */
122
+ private drop;
123
+ /** Per-feature dropped category index */
124
+ private dropIndices_?;
125
+ /** Categories configuration */
126
+ private categoriesOption;
127
+ /**
128
+ * Creates a new OneHotEncoder instance.
129
+ *
130
+ * @param options - Configuration options
131
+ * @param options.sparse - If true, returns CSRMatrix; if false, returns dense Tensor (default: false)
132
+ * @param options.sparseOutput - Alias for sparse (default: false)
133
+ * @param options.handleUnknown - How to handle unknown categories (default: "error")
134
+ * @param options.drop - If set, drops the first or binary category per feature
135
+ * @param options.categories - "auto" or explicit category list per feature
136
+ */
137
+ constructor(options?: {
138
+ sparse?: boolean;
139
+ sparseOutput?: boolean;
140
+ handleUnknown?: "error" | "ignore";
141
+ drop?: "first" | "if_binary" | null;
142
+ categories?: CategoriesOption;
143
+ });
144
+ /**
145
+ * Fit OneHotEncoder to X.
146
+ * Learns the unique categories for each feature.
147
+ *
148
+ * @param X - Training data (2D tensor of categorical features)
149
+ * @returns this - Returns self for method chaining
150
+ * @throws {ShapeError} If X is not a 2D tensor
151
+ * @throws {InvalidParameterError} If X is empty
152
+ */
153
+ fit(X: Tensor): this;
154
+ /**
155
+ * Transform X using one-hot encoding.
156
+ * Each categorical value is converted to a binary vector.
157
+ *
158
+ * @param X - Data to transform (2D tensor)
159
+ * @returns Encoded data as dense Tensor or sparse CSRMatrix
160
+ * @throws {NotFittedError} If encoder is not fitted
161
+ * @throws {InvalidParameterError} If X contains unknown categories
162
+ */
163
+ transform(X: Tensor): Tensor | CSRMatrix;
164
+ fitTransform(X: Tensor): Tensor | CSRMatrix;
165
+ inverseTransform(X: Tensor | CSRMatrix): Tensor;
166
+ }
167
+ /**
168
+ * Encode categorical features as integer array.
169
+ *
170
+ * This encoder transforms categorical features into ordinal integers.
171
+ * Each feature's categories are mapped to integers [0, n_categories-1]
172
+ * based on their sorted order. Unlike OneHotEncoder, this maintains
173
+ * a single column per feature.
174
+ *
175
+ * **Time Complexity:**
176
+ * - fit: O(n*m*log(k)) where n=samples, m=features, k=avg categories
177
+ * - transform: O(n*m*log(k)) due to indexOf lookup
178
+ *
179
+ * **Space Complexity:** O(m*k) where m=features, k=avg categories per feature
180
+ *
181
+ * @example
182
+ * ```js
183
+ * const X = tensor([['low', 'red'], ['high', 'blue'], ['medium', 'red']]);
184
+ * const encoder = new OrdinalEncoder();
185
+ * encoder.fit(X);
186
+ * const encoded = encoder.transform(X);
187
+ * // Result: [[1, 1], [0, 0], [2, 1]] (alphabetically sorted)
188
+ * ```
189
+ *
190
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html | Scikit-learn OrdinalEncoder}
191
+ */
192
+ declare class OrdinalEncoder {
193
+ /** Indicates whether the encoder has been fitted to data */
194
+ private fitted;
195
+ /** Array of unique categories for each feature, sorted */
196
+ private categories_?;
197
+ /** Maps from category value to index for each feature (for O(1) lookup) */
198
+ private categoryToIndex_?;
199
+ /** How to handle unknown categories during transform */
200
+ private handleUnknown;
201
+ /** Value used for unknown categories when handleUnknown = "useEncodedValue" */
202
+ private unknownValue;
203
+ /** Categories configuration */
204
+ private categoriesOption;
205
+ /**
206
+ * Creates a new OrdinalEncoder instance.
207
+ *
208
+ * @param options - Configuration options
209
+ * @param options.handleUnknown - How to handle unknown categories
210
+ * @param options.unknownValue - Encoded value for unknown categories when handleUnknown="useEncodedValue"
211
+ * @param options.categories - "auto" or explicit categories per feature
212
+ */
213
+ constructor(options?: {
214
+ handleUnknown?: "error" | "useEncodedValue";
215
+ unknownValue?: number;
216
+ categories?: CategoriesOption;
217
+ });
218
+ /**
219
+ * Fit OrdinalEncoder to X.
220
+ * Learns the unique categories for each feature and their ordering.
221
+ *
222
+ * @param X - Training data (2D tensor of categorical features)
223
+ * @returns this - Returns self for method chaining
224
+ * @throws {InvalidParameterError} If X is empty
225
+ */
226
+ fit(X: Tensor): this;
227
+ /**
228
+ * Transform X using ordinal encoding.
229
+ * Each category is mapped to its index in the sorted categories array.
230
+ *
231
+ * @param X - Data to transform (2D tensor)
232
+ * @returns Encoded data with integer values
233
+ * @throws {NotFittedError} If encoder is not fitted
234
+ * @throws {InvalidParameterError} If X contains unknown categories
235
+ */
236
+ transform(X: Tensor): Tensor;
237
+ /**
238
+ * Fit encoder and transform X in one step.
239
+ * Convenience method equivalent to calling fit(X).transform(X).
240
+ *
241
+ * @param X - Training data (2D tensor)
242
+ * @returns Encoded data
243
+ */
244
+ fitTransform(X: Tensor): Tensor;
245
+ /**
246
+ * Transform ordinal integers back to original categories.
247
+ * Reverses the encoding performed by transform().
248
+ *
249
+ * @param X - Encoded data (2D integer tensor)
250
+ * @returns Original categorical data
251
+ * @throws {NotFittedError} If encoder is not fitted
252
+ * @throws {InvalidParameterError} If X contains invalid indices
253
+ */
254
+ inverseTransform(X: Tensor): Tensor;
255
+ }
256
+ /**
257
+ * Binarize labels in a one-vs-all fashion.
258
+ *
259
+ * This transformer creates a binary matrix representation of labels where
260
+ * each class gets its own column. For multi-class problems, this creates
261
+ * a one-hot encoding of the labels.
262
+ *
263
+ * **Time Complexity:**
264
+ * - fit: O(n) where n is the number of samples
265
+ * - transform: O(n*k) where k is the number of classes
266
+ *
267
+ * **Space Complexity:** O(n*k) for the output matrix
268
+ *
269
+ * @example
270
+ * ```js
271
+ * const y = tensor([0, 1, 2, 0, 1]);
272
+ * const binarizer = new LabelBinarizer();
273
+ * const yBin = binarizer.fitTransform(y);
274
+ * // Result shape: [5, 3] with one-hot encoding
275
+ * ```
276
+ *
277
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelBinarizer.html | Scikit-learn LabelBinarizer}
278
+ */
279
+ declare class LabelBinarizer {
280
+ /** Indicates whether the binarizer has been fitted to data */
281
+ private fitted;
282
+ /** Array of unique classes found during fitting, sorted */
283
+ private classes_?;
284
+ /** Map from class value to index for O(1) lookups */
285
+ private classToIndex_?;
286
+ /** Value used for positive class */
287
+ private posLabel;
288
+ /** Value used for negative class */
289
+ private negLabel;
290
+ /** Whether to return sparse matrix output */
291
+ private sparse;
292
+ /**
293
+ * Creates a new LabelBinarizer instance.
294
+ *
295
+ * @param options - Configuration options
296
+ * @param options.posLabel - Value for positive class (default: 1)
297
+ * @param options.negLabel - Value for negative class (default: 0)
298
+ * @param options.sparse - If true, returns CSRMatrix (default: false)
299
+ * @param options.sparseOutput - Alias for sparse (default: false)
300
+ */
301
+ constructor(options?: {
302
+ posLabel?: number;
303
+ negLabel?: number;
304
+ sparse?: boolean;
305
+ sparseOutput?: boolean;
306
+ });
307
+ /**
308
+ * Fit label binarizer to a set of labels.
309
+ * Learns the unique classes present in the data.
310
+ *
311
+ * @param y - Target labels (1D tensor)
312
+ * @returns this - Returns self for method chaining
313
+ * @throws {InvalidParameterError} If y is empty
314
+ */
315
+ fit(y: Tensor): this;
316
+ /**
317
+ * Transform labels to binary matrix.
318
+ * Each label is converted to a binary vector with a single 1.
319
+ *
320
+ * @param y - Labels to transform (1D tensor)
321
+ * @returns Binary matrix (Tensor or CSRMatrix) with shape [n_samples, n_classes]
322
+ * @throws {NotFittedError} If binarizer is not fitted
323
+ * @throws {InvalidParameterError} If y contains unknown labels
324
+ */
325
+ transform(y: Tensor): Tensor | CSRMatrix;
326
+ /**
327
+ * Fit binarizer and transform labels in one step.
328
+ * Convenience method equivalent to calling fit(y).transform(y).
329
+ *
330
+ * @param y - Target labels (1D tensor)
331
+ * @returns Binary matrix (Tensor or CSRMatrix)
332
+ */
333
+ fitTransform(y: Tensor): Tensor | CSRMatrix;
334
+ /**
335
+ * Transform binary matrix back to labels.
336
+ * Finds the column with maximum value for each row.
337
+ *
338
+ * @param Y - Binary matrix (2D tensor or CSRMatrix)
339
+ * @returns Original labels (1D tensor)
340
+ * @throws {NotFittedError} If binarizer is not fitted
341
+ * @throws {InvalidParameterError} If Y has invalid shape
342
+ */
343
+ inverseTransform(Y: Tensor | CSRMatrix): Tensor;
344
+ }
345
+ /**
346
+ * Transform multi-label classification data to binary format.
347
+ *
348
+ * This transformer handles multi-label classification where each sample
349
+ * can belong to multiple classes simultaneously. It creates a binary
350
+ * matrix where each column represents a class and multiple columns can
351
+ * be active (set to 1) for a single sample.
352
+ *
353
+ * **Time Complexity:**
354
+ * - fit: O(n*k) where n is samples, k is avg labels per sample
355
+ * - transform: O(n*k*c) where c is total unique classes
356
+ *
357
+ * **Space Complexity:** O(n*c) for the output matrix
358
+ *
359
+ * @example
360
+ * ```js
361
+ * const y = [['sci-fi', 'action'], ['comedy'], ['action', 'drama']];
362
+ * const binarizer = new MultiLabelBinarizer();
363
+ * const yBin = binarizer.fitTransform(y);
364
+ * // Each row can have multiple 1s
365
+ * ```
366
+ *
367
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MultiLabelBinarizer.html | Scikit-learn MultiLabelBinarizer}
368
+ */
369
+ declare class MultiLabelBinarizer {
370
+ /** Indicates whether the binarizer has been fitted to data */
371
+ private fitted;
372
+ /** Array of all unique classes found across all samples, sorted */
373
+ private classes_?;
374
+ /** Map from class value to index for O(1) lookups */
375
+ private classToIndex_?;
376
+ /** Whether to return sparse matrix (CSR) or dense array */
377
+ private sparse;
378
+ /** Optional explicit class ordering */
379
+ private classesOption?;
380
+ /**
381
+ * Creates a new MultiLabelBinarizer instance.
382
+ *
383
+ * @param options - Configuration options
384
+ * @param options.sparse - If true, returns CSRMatrix; if false, returns dense Tensor (default: false)
385
+ * @param options.sparseOutput - Alias for sparse (default: false)
386
+ * @param options.classes - Explicit class ordering to use instead of sorting
387
+ */
388
+ constructor(options?: {
389
+ sparse?: boolean;
390
+ sparseOutput?: boolean;
391
+ classes?: ReadonlyArray<Category>;
392
+ });
393
+ /**
394
+ * Fit multi-label binarizer to label sets.
395
+ * Learns all unique classes present across all samples.
396
+ *
397
+ * @param y - Array of label sets, where each element is an array of string/number/bigint labels
398
+ * @returns this - Returns self for method chaining
399
+ * @throws {InvalidParameterError} If y is empty
400
+ */
401
+ fit(y: ReadonlyArray<ReadonlyArray<Category>>): this;
402
+ /**
403
+ * Transform label sets to binary matrix.
404
+ * Each sample can have multiple active (1) columns.
405
+ *
406
+ * @param y - Array of label sets to transform (string/number/bigint labels)
407
+ * @returns Binary matrix (Tensor or CSRMatrix) with shape [n_samples, n_classes]
408
+ * @throws {NotFittedError} If binarizer is not fitted
409
+ * @throws {InvalidParameterError} If y contains unknown labels
410
+ */
411
+ transform(y: ReadonlyArray<ReadonlyArray<Category>>): Tensor | CSRMatrix;
412
+ /**
413
+ * Fit binarizer and transform label sets in one step.
414
+ * Convenience method equivalent to calling fit(y).transform(y).
415
+ *
416
+ * @param y - Array of label sets (string/number/bigint labels)
417
+ * @returns Binary matrix (Tensor or CSRMatrix)
418
+ */
419
+ fitTransform(y: ReadonlyArray<ReadonlyArray<Category>>): Tensor | CSRMatrix;
420
+ /**
421
+ * Transform binary matrix back to label sets.
422
+ * Finds all active (1) columns for each row.
423
+ *
424
+ * @param Y - Binary matrix (Tensor or CSRMatrix)
425
+ * @returns Array of label sets, one per sample
426
+ * @throws {NotFittedError} If binarizer is not fitted
427
+ * @throws {InvalidParameterError} If Y has invalid shape
428
+ */
429
+ inverseTransform(Y: Tensor | CSRMatrix): Category[][];
430
+ }
431
+
432
+ /**
433
+ * Standardize features by removing mean and scaling to unit variance.
434
+ *
435
+ * **Formula**: z = (x - μ) / σ
436
+ *
437
+ * **Attributes** (after fitting):
438
+ * - `mean_`: Mean of each feature
439
+ * - `scale_`: Standard deviation of each feature
440
+ *
441
+ * @example
442
+ * ```js
443
+ * import { StandardScaler } from 'deepbox/preprocess';
444
+ * import { tensor } from 'deepbox/ndarray';
445
+ *
446
+ * const X = tensor([[1, 2], [3, 4], [5, 6]]);
447
+ * const scaler = new StandardScaler();
448
+ * scaler.fit(X);
449
+ * const XScaled = scaler.transform(X);
450
+ * ```
451
+ *
452
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html | Scikit-learn StandardScaler}
453
+ */
454
+ declare class StandardScaler {
455
+ private fitted;
456
+ private mean_;
457
+ private scale_;
458
+ private withMean;
459
+ private withStd;
460
+ /**
461
+ * Creates a new StandardScaler.
462
+ *
463
+ * @param options - Configuration options
464
+ * @param options.withMean - Center data before scaling (default: true)
465
+ * @param options.withStd - Scale data to unit variance (default: true)
466
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
467
+ */
468
+ constructor(options?: {
469
+ withMean?: boolean;
470
+ withStd?: boolean;
471
+ copy?: boolean;
472
+ });
473
+ fit(X: Tensor): this;
474
+ transform(X: Tensor): Tensor;
475
+ fitTransform(X: Tensor): Tensor;
476
+ inverseTransform(X: Tensor): Tensor;
477
+ }
478
+ /**
479
+ * Scale features to a range [min, max].
480
+ *
481
+ * **Formula**: X_scaled = (X - X.min) / (X.max - X.min) * (max - min) + min
482
+ *
483
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html | Scikit-learn MinMaxScaler}
484
+ */
485
+ declare class MinMaxScaler {
486
+ private fitted;
487
+ private dataMin_?;
488
+ private dataMax_?;
489
+ private featureRange;
490
+ private clip;
491
+ /**
492
+ * Creates a new MinMaxScaler.
493
+ *
494
+ * @param options - Configuration options
495
+ * @param options.featureRange - Desired feature range [min, max] (default: [0, 1])
496
+ * @param options.clip - Clip transformed values to featureRange (default: false)
497
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
498
+ */
499
+ constructor(options?: {
500
+ featureRange?: [number, number];
501
+ clip?: boolean;
502
+ copy?: boolean;
503
+ });
504
+ fit(X: Tensor): this;
505
+ transform(X: Tensor): Tensor;
506
+ fitTransform(X: Tensor): Tensor;
507
+ inverseTransform(X: Tensor): Tensor;
508
+ }
509
+ /**
510
+ * Scale features by maximum absolute value.
511
+ *
512
+ * Scales to range [-1, 1]. Suitable for data that is already centered at zero.
513
+ *
514
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html | Scikit-learn MaxAbsScaler}
515
+ */
516
+ declare class MaxAbsScaler {
517
+ private fitted;
518
+ private maxAbs_?;
519
+ /**
520
+ * Creates a new MaxAbsScaler.
521
+ *
522
+ * @param options - Configuration options
523
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
524
+ */
525
+ constructor(options?: {
526
+ copy?: boolean;
527
+ });
528
+ fit(X: Tensor): this;
529
+ transform(X: Tensor): Tensor;
530
+ fitTransform(X: Tensor): Tensor;
531
+ inverseTransform(X: Tensor): Tensor;
532
+ }
533
+ /**
534
+ * Robust scaler using median and IQR.
535
+ *
536
+ * Robust to outliers.
537
+ *
538
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html | Scikit-learn RobustScaler}
539
+ */
540
+ declare class RobustScaler {
541
+ private fitted;
542
+ private center_;
543
+ private scale_;
544
+ private withCentering;
545
+ private withScaling;
546
+ private quantileRange;
547
+ private unitVariance;
548
+ /**
549
+ * Creates a new RobustScaler.
550
+ *
551
+ * @param options - Configuration options
552
+ * @param options.withCentering - Center data using median (default: true)
553
+ * @param options.withScaling - Scale data using IQR (default: true)
554
+ * @param options.quantileRange - Quantile range for IQR as percentiles (default: [25, 75])
555
+ * @param options.unitVariance - Scale so that features have unit variance under normality (default: false)
556
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
557
+ */
558
+ constructor(options?: {
559
+ withCentering?: boolean;
560
+ withScaling?: boolean;
561
+ quantileRange?: [number, number];
562
+ unitVariance?: boolean;
563
+ copy?: boolean;
564
+ });
565
+ fit(X: Tensor): this;
566
+ transform(X: Tensor): Tensor;
567
+ fitTransform(X: Tensor): Tensor;
568
+ inverseTransform(X: Tensor): Tensor;
569
+ }
570
+ /**
571
+ * Normalize samples to unit norm.
572
+ *
573
+ * Scales each sample (row) to have unit norm.
574
+ *
575
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html | Scikit-learn Normalizer}
576
+ */
577
+ declare class Normalizer {
578
+ private norm;
579
+ /**
580
+ * Creates a new Normalizer.
581
+ *
582
+ * @param options - Configuration options
583
+ * @param options.norm - Norm to use (default: "l2")
584
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
585
+ */
586
+ constructor(options?: {
587
+ norm?: "l1" | "l2" | "max";
588
+ copy?: boolean;
589
+ });
590
+ fit(_X: Tensor): this;
591
+ transform(X: Tensor): Tensor;
592
+ fitTransform(X: Tensor): Tensor;
593
+ }
594
+ /**
595
+ * Transform features using quantiles.
596
+ *
597
+ * Maps to uniform or normal distribution.
598
+ *
599
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html | Scikit-learn QuantileTransformer}
600
+ */
601
+ declare class QuantileTransformer {
602
+ private fitted;
603
+ private nQuantiles;
604
+ private outputDistribution;
605
+ private quantiles_?;
606
+ private subsample;
607
+ private randomState;
608
+ /**
609
+ * Creates a new QuantileTransformer.
610
+ *
611
+ * @param options - Configuration options
612
+ * @param options.nQuantiles - Number of quantiles to use (default: 1000)
613
+ * @param options.outputDistribution - "uniform" or "normal" (default: "uniform")
614
+ * @param options.subsample - Subsample size for quantile estimation (default: use all samples)
615
+ * @param options.randomState - Seed for subsampling reproducibility
616
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
617
+ */
618
+ constructor(options?: {
619
+ nQuantiles?: number;
620
+ outputDistribution?: "uniform" | "normal";
621
+ subsample?: number;
622
+ randomState?: number;
623
+ copy?: boolean;
624
+ });
625
+ fit(X: Tensor): this;
626
+ transform(X: Tensor): Tensor;
627
+ /**
628
+ * Inverse transform data back to the original feature space.
629
+ *
630
+ * If `outputDistribution="normal"`, values are first mapped back to uniform
631
+ * quantiles before being projected into the original data distribution.
632
+ *
633
+ * @param X - Transformed data (2D tensor)
634
+ * @returns Data in the original feature space
635
+ * @throws {NotFittedError} If transformer is not fitted
636
+ */
637
+ inverseTransform(X: Tensor): Tensor;
638
+ private erf;
639
+ private normalCdf;
640
+ private erfInv;
641
+ private interpolateFromSorted;
642
+ private mapValueToQuantile;
643
+ private mapQuantileToValue;
644
+ fitTransform(X: Tensor): Tensor;
645
+ }
646
+ /**
647
+ * Apply power transform to make data more Gaussian-like.
648
+ *
649
+ * Supports Box-Cox and Yeo-Johnson transforms, with optional standardization.
650
+ *
651
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PowerTransformer.html | Scikit-learn PowerTransformer}
652
+ */
653
+ declare class PowerTransformer {
654
+ private fitted;
655
+ private method;
656
+ private lambdas_;
657
+ private standardize;
658
+ private mean_;
659
+ private scale_;
660
+ /**
661
+ * Creates a new PowerTransformer.
662
+ *
663
+ * @param options - Configuration options
664
+ * @param options.method - "box-cox" or "yeo-johnson" (default: "yeo-johnson")
665
+ * @param options.standardize - Whether to standardize transformed features (default: false)
666
+ * @param options.copy - Accepted for API parity; transforms are always out-of-place (default: true)
667
+ */
668
+ constructor(options?: {
669
+ method?: "box-cox" | "yeo-johnson";
670
+ standardize?: boolean;
671
+ copy?: boolean;
672
+ });
673
+ fit(X: Tensor): this;
674
+ transform(X: Tensor): Tensor;
675
+ /**
676
+ * Inverse transform data back to the original feature space.
677
+ * If `standardize=true`, de-standardizes before applying the inverse power transform.
678
+ *
679
+ * @param X - Transformed data (2D tensor)
680
+ * @returns Data in the original feature space
681
+ * @throws {NotFittedError} If transformer is not fitted
682
+ */
683
+ inverseTransform(X: Tensor): Tensor;
684
+ private boxCoxTransformValue;
685
+ private yeoJohnsonTransformValue;
686
+ private boxCoxInverseValue;
687
+ private yeoJohnsonInverseValue;
688
+ private logLikelihood;
689
+ private optimizeLambda;
690
+ fitTransform(X: Tensor): Tensor;
691
+ }
692
+
693
+ /**
694
+ * Split arrays into random train and test subsets.
695
+ *
696
+ * @param X - Feature matrix (2D tensor)
697
+ * @param y - Optional target labels (1D tensor)
698
+ * @param options - Split configuration options
699
+ * @param options.testSize - Proportion or absolute number of test samples
700
+ * @param options.trainSize - Proportion or absolute number of train samples
701
+ * @param options.randomState - Random seed
702
+ * @param options.shuffle - Whether to shuffle data before splitting
703
+ * @param options.stratify - If not undefined, data is split in stratified fashion using this as class labels
704
+ *
705
+ * @example
706
+ * ```js
707
+ * import { trainTestSplit } from 'deepbox/preprocess';
708
+ * import { tensor } from 'deepbox/ndarray';
709
+ *
710
+ * const X = tensor([[1, 2], [3, 4], [5, 6], [7, 8]]);
711
+ * const y = tensor([0, 1, 0, 1]);
712
+ * const [XTrain, XTest, yTrain, yTest] = trainTestSplit(X, y, { testSize: 0.25 });
713
+ * ```
714
+ *
715
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html | Scikit-learn train_test_split}
716
+ */
717
+ declare function trainTestSplit(X: Tensor, y?: Tensor, options?: {
718
+ testSize?: number;
719
+ trainSize?: number;
720
+ randomState?: number;
721
+ shuffle?: boolean;
722
+ stratify?: Tensor;
723
+ }): Tensor[];
724
+ /**
725
+ * K-Folds cross-validator.
726
+ *
727
+ * Provides train/test indices to split data in train/test sets.
728
+ *
729
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html | Scikit-learn KFold}
730
+ */
731
+ declare class KFold {
732
+ private nSplits;
733
+ private shuffle;
734
+ private randomState;
735
+ constructor(options?: {
736
+ nSplits?: number;
737
+ shuffle?: boolean;
738
+ randomState?: number;
739
+ });
740
+ split(X: Tensor): Array<[number[], number[]]>;
741
+ getNSplits(): number;
742
+ }
743
+ /**
744
+ * Stratified K-Folds cross-validator.
745
+ *
746
+ * Provides train/test indices while preserving class distribution.
747
+ *
748
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html | Scikit-learn StratifiedKFold}
749
+ */
750
+ declare class StratifiedKFold {
751
+ private nSplits;
752
+ private shuffle;
753
+ private randomState;
754
+ constructor(options?: {
755
+ nSplits?: number;
756
+ shuffle?: boolean;
757
+ randomState?: number;
758
+ });
759
+ split(X: Tensor, y: Tensor): Array<[number[], number[]]>;
760
+ getNSplits(): number;
761
+ }
762
+ /**
763
+ * Group K-Fold cross-validator.
764
+ *
765
+ * Ensures same group is not in both train and test.
766
+ *
767
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GroupKFold.html | Scikit-learn GroupKFold}
768
+ */
769
+ declare class GroupKFold {
770
+ private nSplits;
771
+ constructor(options?: {
772
+ nSplits?: number;
773
+ });
774
+ split(X: Tensor, _y: Tensor | undefined, groups: Tensor): Array<[number[], number[]]>;
775
+ getNSplits(): number;
776
+ }
777
+ /**
778
+ * Leave-One-Out cross-validator.
779
+ *
780
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeaveOneOut.html | Scikit-learn LeaveOneOut}
781
+ */
782
+ declare class LeaveOneOut {
783
+ split(X: Tensor): Array<[number[], number[]]>;
784
+ getNSplits(X: Tensor): number;
785
+ }
786
+ /**
787
+ * Leave-P-Out cross-validator.
788
+ *
789
+ * @see {@link https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.LeavePOut.html | Scikit-learn LeavePOut}
790
+ */
791
+ declare class LeavePOut {
792
+ private p;
793
+ constructor(p: number);
794
+ split(X: Tensor): Array<[number[], number[]]>;
795
+ getNSplits(X: Tensor): number;
796
+ }
797
+
798
+ type index_GroupKFold = GroupKFold;
799
+ declare const index_GroupKFold: typeof GroupKFold;
800
+ type index_KFold = KFold;
801
+ declare const index_KFold: typeof KFold;
802
+ type index_LabelBinarizer = LabelBinarizer;
803
+ declare const index_LabelBinarizer: typeof LabelBinarizer;
804
+ type index_LabelEncoder = LabelEncoder;
805
+ declare const index_LabelEncoder: typeof LabelEncoder;
806
+ type index_LeaveOneOut = LeaveOneOut;
807
+ declare const index_LeaveOneOut: typeof LeaveOneOut;
808
+ type index_LeavePOut = LeavePOut;
809
+ declare const index_LeavePOut: typeof LeavePOut;
810
+ type index_MaxAbsScaler = MaxAbsScaler;
811
+ declare const index_MaxAbsScaler: typeof MaxAbsScaler;
812
+ type index_MinMaxScaler = MinMaxScaler;
813
+ declare const index_MinMaxScaler: typeof MinMaxScaler;
814
+ type index_MultiLabelBinarizer = MultiLabelBinarizer;
815
+ declare const index_MultiLabelBinarizer: typeof MultiLabelBinarizer;
816
+ type index_Normalizer = Normalizer;
817
+ declare const index_Normalizer: typeof Normalizer;
818
+ type index_OneHotEncoder = OneHotEncoder;
819
+ declare const index_OneHotEncoder: typeof OneHotEncoder;
820
+ type index_OrdinalEncoder = OrdinalEncoder;
821
+ declare const index_OrdinalEncoder: typeof OrdinalEncoder;
822
+ type index_PowerTransformer = PowerTransformer;
823
+ declare const index_PowerTransformer: typeof PowerTransformer;
824
+ type index_QuantileTransformer = QuantileTransformer;
825
+ declare const index_QuantileTransformer: typeof QuantileTransformer;
826
+ type index_RobustScaler = RobustScaler;
827
+ declare const index_RobustScaler: typeof RobustScaler;
828
+ type index_StandardScaler = StandardScaler;
829
+ declare const index_StandardScaler: typeof StandardScaler;
830
+ type index_StratifiedKFold = StratifiedKFold;
831
+ declare const index_StratifiedKFold: typeof StratifiedKFold;
832
+ declare const index_trainTestSplit: typeof trainTestSplit;
833
+ declare namespace index {
834
+ export { index_GroupKFold as GroupKFold, index_KFold as KFold, index_LabelBinarizer as LabelBinarizer, index_LabelEncoder as LabelEncoder, index_LeaveOneOut as LeaveOneOut, index_LeavePOut as LeavePOut, index_MaxAbsScaler as MaxAbsScaler, index_MinMaxScaler as MinMaxScaler, index_MultiLabelBinarizer as MultiLabelBinarizer, index_Normalizer as Normalizer, index_OneHotEncoder as OneHotEncoder, index_OrdinalEncoder as OrdinalEncoder, index_PowerTransformer as PowerTransformer, index_QuantileTransformer as QuantileTransformer, index_RobustScaler as RobustScaler, index_StandardScaler as StandardScaler, index_StratifiedKFold as StratifiedKFold, index_trainTestSplit as trainTestSplit };
835
+ }
836
+
837
+ export { GroupKFold as G, KFold as K, LabelBinarizer as L, MultiLabelBinarizer as M, Normalizer as N, OneHotEncoder as O, PowerTransformer as P, QuantileTransformer as Q, RobustScaler as R, StandardScaler as S, LabelEncoder as a, OrdinalEncoder as b, MaxAbsScaler as c, MinMaxScaler as d, LeaveOneOut as e, LeavePOut as f, StratifiedKFold as g, index as i, trainTestSplit as t };