deepbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +344 -0
  3. package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
  4. package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
  5. package/dist/Tensor-BQLk1ltW.d.cts +147 -0
  6. package/dist/Tensor-g8mUClel.d.ts +147 -0
  7. package/dist/chunk-4S73VUBD.js +677 -0
  8. package/dist/chunk-4S73VUBD.js.map +1 -0
  9. package/dist/chunk-5R4S63PF.js +2925 -0
  10. package/dist/chunk-5R4S63PF.js.map +1 -0
  11. package/dist/chunk-6AE5FKKQ.cjs +9264 -0
  12. package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
  13. package/dist/chunk-AD436M45.js +3854 -0
  14. package/dist/chunk-AD436M45.js.map +1 -0
  15. package/dist/chunk-ALS7ETWZ.cjs +4263 -0
  16. package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
  17. package/dist/chunk-AU7XHGKJ.js +2092 -0
  18. package/dist/chunk-AU7XHGKJ.js.map +1 -0
  19. package/dist/chunk-B5TNKUEY.js +1481 -0
  20. package/dist/chunk-B5TNKUEY.js.map +1 -0
  21. package/dist/chunk-BCR7G3A6.js +9136 -0
  22. package/dist/chunk-BCR7G3A6.js.map +1 -0
  23. package/dist/chunk-C4PKXY74.cjs +1917 -0
  24. package/dist/chunk-C4PKXY74.cjs.map +1 -0
  25. package/dist/chunk-DWZY6PIP.cjs +6400 -0
  26. package/dist/chunk-DWZY6PIP.cjs.map +1 -0
  27. package/dist/chunk-E3EU5FZO.cjs +2113 -0
  28. package/dist/chunk-E3EU5FZO.cjs.map +1 -0
  29. package/dist/chunk-F3JWBINJ.js +1054 -0
  30. package/dist/chunk-F3JWBINJ.js.map +1 -0
  31. package/dist/chunk-FJYLIGJX.js +1940 -0
  32. package/dist/chunk-FJYLIGJX.js.map +1 -0
  33. package/dist/chunk-JSCDE774.cjs +729 -0
  34. package/dist/chunk-JSCDE774.cjs.map +1 -0
  35. package/dist/chunk-LWECRCW2.cjs +2412 -0
  36. package/dist/chunk-LWECRCW2.cjs.map +1 -0
  37. package/dist/chunk-MLBMYKCG.js +6379 -0
  38. package/dist/chunk-MLBMYKCG.js.map +1 -0
  39. package/dist/chunk-OX6QXFMV.cjs +3874 -0
  40. package/dist/chunk-OX6QXFMV.cjs.map +1 -0
  41. package/dist/chunk-PHV2DKRS.cjs +1072 -0
  42. package/dist/chunk-PHV2DKRS.cjs.map +1 -0
  43. package/dist/chunk-PL7TAYKI.js +4056 -0
  44. package/dist/chunk-PL7TAYKI.js.map +1 -0
  45. package/dist/chunk-PR647I7R.js +1898 -0
  46. package/dist/chunk-PR647I7R.js.map +1 -0
  47. package/dist/chunk-QERHVCHC.cjs +2960 -0
  48. package/dist/chunk-QERHVCHC.cjs.map +1 -0
  49. package/dist/chunk-XEG44RF6.cjs +1514 -0
  50. package/dist/chunk-XEG44RF6.cjs.map +1 -0
  51. package/dist/chunk-XMWVME2W.js +2377 -0
  52. package/dist/chunk-XMWVME2W.js.map +1 -0
  53. package/dist/chunk-ZB75FESB.cjs +1979 -0
  54. package/dist/chunk-ZB75FESB.cjs.map +1 -0
  55. package/dist/chunk-ZLW62TJG.cjs +4061 -0
  56. package/dist/chunk-ZLW62TJG.cjs.map +1 -0
  57. package/dist/chunk-ZXKBDFP3.js +4235 -0
  58. package/dist/chunk-ZXKBDFP3.js.map +1 -0
  59. package/dist/core/index.cjs +204 -0
  60. package/dist/core/index.cjs.map +1 -0
  61. package/dist/core/index.d.cts +2 -0
  62. package/dist/core/index.d.ts +2 -0
  63. package/dist/core/index.js +3 -0
  64. package/dist/core/index.js.map +1 -0
  65. package/dist/dataframe/index.cjs +22 -0
  66. package/dist/dataframe/index.cjs.map +1 -0
  67. package/dist/dataframe/index.d.cts +3 -0
  68. package/dist/dataframe/index.d.ts +3 -0
  69. package/dist/dataframe/index.js +5 -0
  70. package/dist/dataframe/index.js.map +1 -0
  71. package/dist/datasets/index.cjs +134 -0
  72. package/dist/datasets/index.cjs.map +1 -0
  73. package/dist/datasets/index.d.cts +3 -0
  74. package/dist/datasets/index.d.ts +3 -0
  75. package/dist/datasets/index.js +5 -0
  76. package/dist/datasets/index.js.map +1 -0
  77. package/dist/index-74AB8Cyh.d.cts +1126 -0
  78. package/dist/index-9oQx1HgV.d.cts +1180 -0
  79. package/dist/index-BJY2SI4i.d.ts +483 -0
  80. package/dist/index-BWGhrDlr.d.ts +733 -0
  81. package/dist/index-B_DK4FKY.d.cts +242 -0
  82. package/dist/index-BbA2Gxfl.d.ts +456 -0
  83. package/dist/index-BgHYAoSS.d.cts +837 -0
  84. package/dist/index-BndMbqsM.d.ts +1439 -0
  85. package/dist/index-C1mfVYoo.d.ts +2517 -0
  86. package/dist/index-CCvlwAmL.d.cts +809 -0
  87. package/dist/index-CDw5CnOU.d.ts +785 -0
  88. package/dist/index-Cn3SdB0O.d.ts +1126 -0
  89. package/dist/index-CrqLlS-a.d.ts +776 -0
  90. package/dist/index-D61yaSMY.d.cts +483 -0
  91. package/dist/index-D9Loo1_A.d.cts +2517 -0
  92. package/dist/index-DIT_OO9C.d.cts +785 -0
  93. package/dist/index-DIp_RrRt.d.ts +242 -0
  94. package/dist/index-DbultU6X.d.cts +1427 -0
  95. package/dist/index-DmEg_LCm.d.cts +776 -0
  96. package/dist/index-DoPWVxPo.d.cts +1439 -0
  97. package/dist/index-DuCxd-8d.d.ts +837 -0
  98. package/dist/index-Dx42TZaY.d.ts +809 -0
  99. package/dist/index-DyZ4QQf5.d.cts +456 -0
  100. package/dist/index-GFAVyOWO.d.ts +1427 -0
  101. package/dist/index-WHQLn0e8.d.cts +733 -0
  102. package/dist/index-ZtI1Iy4L.d.ts +1180 -0
  103. package/dist/index-eJgeni9c.d.cts +1911 -0
  104. package/dist/index-tk4lSYod.d.ts +1911 -0
  105. package/dist/index.cjs +72 -0
  106. package/dist/index.cjs.map +1 -0
  107. package/dist/index.d.cts +17 -0
  108. package/dist/index.d.ts +17 -0
  109. package/dist/index.js +15 -0
  110. package/dist/index.js.map +1 -0
  111. package/dist/linalg/index.cjs +86 -0
  112. package/dist/linalg/index.cjs.map +1 -0
  113. package/dist/linalg/index.d.cts +3 -0
  114. package/dist/linalg/index.d.ts +3 -0
  115. package/dist/linalg/index.js +5 -0
  116. package/dist/linalg/index.js.map +1 -0
  117. package/dist/metrics/index.cjs +158 -0
  118. package/dist/metrics/index.cjs.map +1 -0
  119. package/dist/metrics/index.d.cts +3 -0
  120. package/dist/metrics/index.d.ts +3 -0
  121. package/dist/metrics/index.js +5 -0
  122. package/dist/metrics/index.js.map +1 -0
  123. package/dist/ml/index.cjs +87 -0
  124. package/dist/ml/index.cjs.map +1 -0
  125. package/dist/ml/index.d.cts +3 -0
  126. package/dist/ml/index.d.ts +3 -0
  127. package/dist/ml/index.js +6 -0
  128. package/dist/ml/index.js.map +1 -0
  129. package/dist/ndarray/index.cjs +501 -0
  130. package/dist/ndarray/index.cjs.map +1 -0
  131. package/dist/ndarray/index.d.cts +5 -0
  132. package/dist/ndarray/index.d.ts +5 -0
  133. package/dist/ndarray/index.js +4 -0
  134. package/dist/ndarray/index.js.map +1 -0
  135. package/dist/nn/index.cjs +142 -0
  136. package/dist/nn/index.cjs.map +1 -0
  137. package/dist/nn/index.d.cts +6 -0
  138. package/dist/nn/index.d.ts +6 -0
  139. package/dist/nn/index.js +5 -0
  140. package/dist/nn/index.js.map +1 -0
  141. package/dist/optim/index.cjs +77 -0
  142. package/dist/optim/index.cjs.map +1 -0
  143. package/dist/optim/index.d.cts +4 -0
  144. package/dist/optim/index.d.ts +4 -0
  145. package/dist/optim/index.js +4 -0
  146. package/dist/optim/index.js.map +1 -0
  147. package/dist/plot/index.cjs +114 -0
  148. package/dist/plot/index.cjs.map +1 -0
  149. package/dist/plot/index.d.cts +6 -0
  150. package/dist/plot/index.d.ts +6 -0
  151. package/dist/plot/index.js +5 -0
  152. package/dist/plot/index.js.map +1 -0
  153. package/dist/preprocess/index.cjs +82 -0
  154. package/dist/preprocess/index.cjs.map +1 -0
  155. package/dist/preprocess/index.d.cts +4 -0
  156. package/dist/preprocess/index.d.ts +4 -0
  157. package/dist/preprocess/index.js +5 -0
  158. package/dist/preprocess/index.js.map +1 -0
  159. package/dist/random/index.cjs +74 -0
  160. package/dist/random/index.cjs.map +1 -0
  161. package/dist/random/index.d.cts +3 -0
  162. package/dist/random/index.d.ts +3 -0
  163. package/dist/random/index.js +5 -0
  164. package/dist/random/index.js.map +1 -0
  165. package/dist/stats/index.cjs +142 -0
  166. package/dist/stats/index.cjs.map +1 -0
  167. package/dist/stats/index.d.cts +3 -0
  168. package/dist/stats/index.d.ts +3 -0
  169. package/dist/stats/index.js +5 -0
  170. package/dist/stats/index.js.map +1 -0
  171. package/dist/tensor-B96jjJLQ.d.cts +205 -0
  172. package/dist/tensor-B96jjJLQ.d.ts +205 -0
  173. package/package.json +226 -0
@@ -0,0 +1,1439 @@
1
+ import { A as AnyTensor } from './index-eJgeni9c.cjs';
2
+ import { D as DType, a as Device, A as Axis } from './tensor-B96jjJLQ.cjs';
3
+ import { G as GradTensor } from './index-B_DK4FKY.cjs';
4
+ import { T as Tensor } from './Tensor-BQLk1ltW.cjs';
5
+
6
+ type StateEntry = {
7
+ data: Array<number | string | bigint>;
8
+ dtype: DType;
9
+ shape: number[];
10
+ };
11
+ /**
12
+ * Hook function called before the forward pass.
13
+ *
14
+ * @param module - The module being called
15
+ * @param inputs - The input tensors to the forward pass
16
+ * @returns Modified inputs array, or undefined to keep original inputs
17
+ */
18
+ type ForwardPreHook = (module: Module, inputs: AnyTensor[]) => AnyTensor[] | undefined;
19
+ /**
20
+ * Hook function called after the forward pass.
21
+ *
22
+ * @param module - The module being called
23
+ * @param inputs - The input tensors to the forward pass
24
+ * @param output - The output tensor from the forward pass
25
+ * @returns Modified output tensor, or undefined to keep original output
26
+ */
27
+ type ForwardHook = (module: Module, inputs: AnyTensor[], output: AnyTensor) => AnyTensor | undefined;
28
+ /**
29
+ * Base class for all neural network modules.
30
+ *
31
+ * All models should subclass this class. Modules can contain other modules,
32
+ * allowing to nest them in a tree structure.
33
+ *
34
+ * This is analogous to PyTorch's nn.Module.
35
+ *
36
+ * @example
37
+ * ```ts
38
+ * import { Module, Linear, ReLU } from 'deepbox/nn';
39
+ * import type { Tensor } from 'deepbox/ndarray';
40
+ *
41
+ * class MyModel extends Module {
42
+ * private fc1: Linear;
43
+ * private relu: ReLU;
44
+ * private fc2: Linear;
45
+ *
46
+ * constructor() {
47
+ * super();
48
+ * this.fc1 = new Linear(10, 5);
49
+ * this.relu = new ReLU();
50
+ * this.fc2 = new Linear(5, 2);
51
+ * this.registerModule('fc1', this.fc1);
52
+ * this.registerModule('relu', this.relu);
53
+ * this.registerModule('fc2', this.fc2);
54
+ * }
55
+ *
56
+ * forward(x: Tensor): Tensor {
57
+ * let out = this.fc1.forward(x);
58
+ * out = this.relu.forward(out);
59
+ * out = this.fc2.forward(out);
60
+ * return out;
61
+ * }
62
+ * }
63
+ * ```
64
+ *
65
+ * References:
66
+ * - PyTorch nn.Module: https://pytorch.org/docs/stable/generated/torch.nn.Module.html
67
+ *
68
+ * @category Neural Networks
69
+ */
70
+ declare abstract class Module {
71
+ /** Child modules registered to this module - stores nested layers/modules */
72
+ protected _modules: Map<string, Module>;
73
+ /** Parameters of this module - trainable tensors (weights, biases) wrapped as GradTensor */
74
+ protected _parameters: Map<string, GradTensor>;
75
+ /** Buffers (non-trainable tensors) of this module - e.g., running stats in BatchNorm */
76
+ protected _buffers: Map<string, Tensor>;
77
+ /** Training mode flag - affects behavior of layers like Dropout and BatchNorm */
78
+ protected _training: boolean;
79
+ /** Forward pre-hooks registered on this module */
80
+ private _forwardPreHooks;
81
+ /** Forward hooks registered on this module */
82
+ private _forwardHooks;
83
+ /** Incrementing hook id */
84
+ private _nextHookId;
85
+ /**
86
+ * Forward pass of the module.
87
+ *
88
+ * Should be overridden by all subclasses. Accepts either regular Tensors
89
+ * or GradTensors for automatic differentiation support.
90
+ *
91
+ * @param inputs - Input tensors (Tensor or GradTensor)
92
+ * @returns Output tensor (Tensor or GradTensor depending on input and layer type)
93
+ *
94
+ * @example
95
+ * ```ts
96
+ * // Using with regular Tensor
97
+ * const output = model.forward(inputTensor);
98
+ *
99
+ * // Using with GradTensor for training
100
+ * const gradOutput = model.forward(gradInput);
101
+ * gradOutput.backward();
102
+ * ```
103
+ */
104
+ abstract forward(...inputs: AnyTensor[]): AnyTensor;
105
+ /**
106
+ * Makes the module callable (allows using `module(x)` instead of `module.forward(x)`).
107
+ *
108
+ * @param inputs - Input tensors (Tensor or GradTensor)
109
+ * @returns Output tensor
110
+ */
111
+ call(...inputs: AnyTensor[]): AnyTensor;
112
+ /**
113
+ * Register a child module.
114
+ *
115
+ * @param name - Name of the module
116
+ * @param module - The module to register
117
+ */
118
+ protected registerModule(name: string, module: Module): void;
119
+ /**
120
+ * Register a parameter (trainable tensor).
121
+ *
122
+ * Parameters must be GradTensor instances with requiresGrad=true for
123
+ * proper gradient computation during backpropagation.
124
+ *
125
+ * @param name - Name of the parameter
126
+ * @param param - The parameter tensor (must be GradTensor)
127
+ */
128
+ protected registerParameter(name: string, param: GradTensor): void;
129
+ /**
130
+ * Register a buffer (non-trainable tensor).
131
+ *
132
+ * Buffers are typically used for running statistics in batch normalization.
133
+ *
134
+ * @param name - Name of the buffer
135
+ * @param buffer - The buffer tensor
136
+ */
137
+ protected registerBuffer(name: string, buffer: Tensor): void;
138
+ /**
139
+ * Get all parameters of this module and its children.
140
+ *
141
+ * Returns GradTensor instances that are compatible with optimizers.
142
+ * This enables direct usage with optimizer constructors:
143
+ * ```ts
144
+ * const optimizer = new Adam(model.parameters());
145
+ * ```
146
+ *
147
+ * @param recurse - Whether to include parameters of child modules
148
+ * @returns Iterator of GradTensor parameters
149
+ */
150
+ parameters(recurse?: boolean): Generator<GradTensor>;
151
+ /**
152
+ * Get all named parameters of this module and its children.
153
+ *
154
+ * @param prefix - Prefix for parameter names
155
+ * @param recurse - Whether to include parameters of child modules
156
+ * @returns Iterator of [name, parameter] pairs
157
+ */
158
+ namedParameters(prefix?: string, recurse?: boolean): Generator<[string, GradTensor]>;
159
+ /**
160
+ * Get all child modules.
161
+ *
162
+ * @param recurse - Whether to include nested child modules
163
+ * @returns Iterator of modules
164
+ */
165
+ modules(recurse?: boolean): Generator<Module>;
166
+ /**
167
+ * Get all named child modules.
168
+ *
169
+ * @param prefix - Prefix for module names
170
+ * @param recurse - Whether to include nested child modules
171
+ * @returns Iterator of [name, module] pairs
172
+ */
173
+ namedModules(prefix?: string, recurse?: boolean): Generator<[string, Module]>;
174
+ /**
175
+ * Set the module in training mode.
176
+ *
177
+ * This affects certain layers like Dropout and BatchNorm.
178
+ *
179
+ * @param mode - Training mode (true) or evaluation mode (false)
180
+ * @returns this
181
+ */
182
+ train(mode?: boolean): this;
183
+ /**
184
+ * Set the module in evaluation mode.
185
+ *
186
+ * This is equivalent to calling `train(false)`.
187
+ *
188
+ * @returns this
189
+ */
190
+ eval(): this;
191
+ /**
192
+ * Check if the module is in training mode.
193
+ *
194
+ * @returns true if in training mode
195
+ */
196
+ get training(): boolean;
197
+ /**
198
+ * Zero out the gradients of all parameters.
199
+ *
200
+ * Call this before each training iteration to prevent gradient accumulation
201
+ * from previous iterations.
202
+ *
203
+ * For parameters wrapped in GradTensor, this calls zeroGrad() on each.
204
+ * For regular Tensors, this is a no-op until they are converted to GradTensor.
205
+ *
206
+ * @example
207
+ * ```ts
208
+ * model.zeroGrad();
209
+ * const output = model.forward(input);
210
+ * // ... compute loss and backward
211
+ * optimizer.step();
212
+ * ```
213
+ */
214
+ zeroGrad(): void;
215
+ /**
216
+ * Get all buffers of this module and its children.
217
+ */
218
+ buffers(recurse?: boolean): Generator<Tensor>;
219
+ /**
220
+ * Get all named buffers of this module and its children.
221
+ */
222
+ namedBuffers(prefix?: string, recurse?: boolean): Generator<[string, Tensor]>;
223
+ /**
224
+ * Freeze specific parameters by name (or all if none provided).
225
+ *
226
+ * **⚠️ IMPORTANT**: This method creates new GradTensor instances with updated
227
+ * `requiresGrad` flags. Any external references to the old parameter objects
228
+ * will become stale. If you're using an optimizer that holds parameter references,
229
+ * you should recreate the optimizer after freezing/unfreezing parameters.
230
+ *
231
+ * @param names - Array of parameter names to freeze (e.g., ['fc1.weight']). If undefined, freezes all parameters.
232
+ * @param recurse - Whether to include parameters from child modules (default: true)
233
+ *
234
+ * @example
235
+ * ```ts
236
+ * const model = new MyModel();
237
+ * // Freeze only the first layer's weights
238
+ * model.freezeParameters(['fc1.weight']);
239
+ * // Note: Recreate optimizer after freezing
240
+ * const optimizer = new Adam(model.parameters());
241
+ * ```
242
+ */
243
+ freezeParameters(names?: string[], recurse?: boolean): void;
244
+ /**
245
+ * Unfreeze specific parameters by name (or all if none provided).
246
+ *
247
+ * **⚠️ IMPORTANT**: This method creates new GradTensor instances with updated
248
+ * `requiresGrad` flags. Any external references to the old parameter objects
249
+ * will become stale. If you're using an optimizer that holds parameter references,
250
+ * you should recreate the optimizer after freezing/unfreezing parameters.
251
+ *
252
+ * @param names - Array of parameter names to unfreeze (e.g., ['fc1.weight']). If undefined, unfreezes all parameters.
253
+ * @param recurse - Whether to include parameters from child modules (default: true)
254
+ *
255
+ * @example
256
+ * ```ts
257
+ * const model = new MyModel();
258
+ * model.freezeParameters(); // Freeze all
259
+ * model.unfreezeParameters(['fc2.weight']); // Unfreeze only fc2 weights
260
+ * // Note: Recreate optimizer after unfreezing
261
+ * const optimizer = new Adam(model.parameters());
262
+ * ```
263
+ */
264
+ unfreezeParameters(names?: string[], recurse?: boolean): void;
265
+ private setRequiresGradForNames;
266
+ private resolveModuleAndName;
267
+ private static setTensorDeviceMetadata;
268
+ /**
269
+ * Get the state dictionary of the module.
270
+ */
271
+ stateDict(): {
272
+ parameters: Record<string, StateEntry>;
273
+ buffers: Record<string, StateEntry>;
274
+ };
275
+ /**
276
+ * Load state dictionary into the module.
277
+ */
278
+ loadStateDict(stateDict: {
279
+ parameters?: Record<string, StateEntry>;
280
+ buffers?: Record<string, StateEntry>;
281
+ }): void;
282
+ /**
283
+ * Move module to a specific device.
284
+ *
285
+ * **⚠️ WARNING**: This is a metadata-only operation. It updates the device
286
+ * property on parameters and buffers but does NOT actually transfer data
287
+ * between devices. Actual device data transfer requires device-specific
288
+ * memory management which is not yet implemented.
289
+ *
290
+ * This method is provided for API compatibility and future extensibility.
291
+ * Currently, it only updates the `device` metadata field.
292
+ *
293
+ * @param device - Target device identifier (e.g., 'cpu', 'webgpu', 'wasm')
294
+ * @returns this module for method chaining
295
+ *
296
+ * @example
297
+ * ```ts
298
+ * const model = new Linear(10, 5);
299
+ * model.to('webgpu'); // Updates device metadata only
300
+ * ```
301
+ */
302
+ to(device: Device): this;
303
+ /**
304
+ * Apply a function to all modules recursively.
305
+ */
306
+ apply(fn: (module: Module) => void): this;
307
+ /**
308
+ * Register a forward pre-hook.
309
+ */
310
+ registerForwardPreHook(hook: ForwardPreHook): () => void;
311
+ /**
312
+ * Register a forward hook.
313
+ */
314
+ registerForwardHook(hook: ForwardHook): () => void;
315
+ /**
316
+ * Get string representation of the module.
317
+ *
318
+ * @returns Hierarchical string representation showing module structure
319
+ */
320
+ toString(): string;
321
+ }
322
+
323
+ /**
324
+ * Sequential container for stacking layers in a linear pipeline.
325
+ *
326
+ * **Purpose:**
327
+ * - Simplifies model construction by chaining layers sequentially
328
+ * - Automatically manages forward pass through all layers
329
+ * - Provides clean API for building feedforward networks
330
+ *
331
+ * **Behavior:**
332
+ * The output of each layer becomes the input to the next layer.
333
+ * Layers are executed in the order they were added.
334
+ *
335
+ * @example
336
+ * ```ts
337
+ * import { Sequential, Linear, ReLU, Dropout } from 'deepbox/nn';
338
+ * import { tensor } from 'deepbox/ndarray';
339
+ *
340
+ * // Create a simple feedforward network
341
+ * const model = new Sequential(
342
+ * new Linear(784, 256),
343
+ * new ReLU(),
344
+ * new Dropout(0.5),
345
+ * new Linear(256, 10)
346
+ * );
347
+ *
348
+ * const input = tensor(new Array(784).fill(0));
349
+ * const output = model.forward(input);
350
+ * ```
351
+ *
352
+ * @example
353
+ * ```ts
354
+ * // Access individual layers
355
+ * const model = new Sequential(
356
+ * new Linear(10, 5),
357
+ * new ReLU()
358
+ * );
359
+ *
360
+ * const firstLayer = model.getLayer(0); // Linear layer
361
+ * const layerCount = model.length; // 2
362
+ * ```
363
+ *
364
+ * References:
365
+ * - PyTorch Sequential: https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html
366
+ * - Keras Sequential: https://keras.io/guides/sequential_model/
367
+ *
368
+ * @category Neural Network Containers
369
+ */
370
+ declare class Sequential extends Module {
371
+ /** Array of layers in sequential order */
372
+ private readonly layers;
373
+ /**
374
+ * Create a new Sequential container.
375
+ *
376
+ * @param layers - Variable number of Module instances to stack sequentially
377
+ * @throws {InvalidParameterError} If no layers are provided
378
+ * @throws {DeepboxError} If a layer is undefined
379
+ */
380
+ constructor(...layers: Module[]);
381
+ /**
382
+ * Forward pass: sequentially apply all layers.
383
+ *
384
+ * The output of each layer becomes the input to the next layer.
385
+ *
386
+ * @param input - Input tensor (Tensor or GradTensor)
387
+ * @returns Output tensor after passing through all layers
388
+ * @throws {InvalidParameterError} If the input count is invalid or a layer returns multiple outputs
389
+ * @throws {DeepboxError} If a layer is undefined
390
+ */
391
+ forward(...inputs: AnyTensor[]): AnyTensor;
392
+ /**
393
+ * Get a layer by index.
394
+ *
395
+ * @param index - Zero-based index of the layer
396
+ * @returns The layer at the specified index
397
+ * @throws {IndexError} If index is out of bounds
398
+ * @throws {DeepboxError} If a layer is undefined
399
+ */
400
+ getLayer(index: number): Module;
401
+ /**
402
+ * Get the number of layers in the sequential container.
403
+ */
404
+ get length(): number;
405
+ /**
406
+ * Get string representation showing all layers.
407
+ *
408
+ * @returns Multi-line string with each layer on a separate line
409
+ */
410
+ toString(): string;
411
+ /**
412
+ * Iterate over all layers.
413
+ *
414
+ * @returns Iterator of layers
415
+ */
416
+ [Symbol.iterator](): Iterator<Module>;
417
+ }
418
+
419
+ /**
420
+ * Applies the Rectified Linear Unit (ReLU) activation function element-wise.
421
+ *
422
+ * ReLU(x) = max(0, x)
423
+ *
424
+ * @category Neural Network Layers
425
+ */
426
+ declare class ReLU extends Module {
427
+ forward(input: GradTensor): GradTensor;
428
+ forward(input: Tensor): Tensor;
429
+ toString(): string;
430
+ }
431
+ /**
432
+ * Applies the Sigmoid activation function element-wise.
433
+ *
434
+ * Sigmoid(x) = 1 / (1 + exp(-x))
435
+ *
436
+ * @category Neural Network Layers
437
+ */
438
+ declare class Sigmoid extends Module {
439
+ forward(input: GradTensor): GradTensor;
440
+ forward(input: Tensor): Tensor;
441
+ toString(): string;
442
+ }
443
+ /**
444
+ * Applies the Hyperbolic Tangent (Tanh) activation function element-wise.
445
+ *
446
+ * Tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
447
+ *
448
+ * @category Neural Network Layers
449
+ */
450
+ declare class Tanh extends Module {
451
+ forward(input: GradTensor): GradTensor;
452
+ forward(input: Tensor): Tensor;
453
+ toString(): string;
454
+ }
455
+ /**
456
+ * Applies the Leaky Rectified Linear Unit (Leaky ReLU) activation.
457
+ *
458
+ * LeakyReLU(x) = max(alpha * x, x)
459
+ *
460
+ * @category Neural Network Layers
461
+ */
462
+ declare class LeakyReLU extends Module {
463
+ private readonly alpha;
464
+ constructor(alpha?: number);
465
+ forward(input: GradTensor): GradTensor;
466
+ forward(input: Tensor): Tensor;
467
+ toString(): string;
468
+ }
469
+ /**
470
+ * Applies the Exponential Linear Unit (ELU) activation.
471
+ *
472
+ * ELU(x) = x if x > 0, else alpha * (exp(x) - 1)
473
+ *
474
+ * @category Neural Network Layers
475
+ */
476
+ declare class ELU extends Module {
477
+ private readonly alpha;
478
+ constructor(alpha?: number);
479
+ forward(input: GradTensor): GradTensor;
480
+ forward(input: Tensor): Tensor;
481
+ toString(): string;
482
+ }
483
+ /**
484
+ * Applies the Gaussian Error Linear Unit (GELU) activation.
485
+ *
486
+ * GELU(x) = x * Phi(x) where Phi is the CDF of standard normal distribution
487
+ *
488
+ * @category Neural Network Layers
489
+ */
490
+ declare class GELU extends Module {
491
+ forward(input: GradTensor): GradTensor;
492
+ forward(input: Tensor): Tensor;
493
+ toString(): string;
494
+ }
495
+ /**
496
+ * Applies the Softmax activation function.
497
+ *
498
+ * Softmax(x_i) = exp(x_i) / sum(exp(x_j))
499
+ *
500
+ * @category Neural Network Layers
501
+ */
502
+ declare class Softmax extends Module {
503
+ private readonly axis;
504
+ constructor(axis?: Axis);
505
+ forward(input: GradTensor): GradTensor;
506
+ forward(input: Tensor): Tensor;
507
+ toString(): string;
508
+ }
509
+ /**
510
+ * Applies the Log Softmax activation function.
511
+ *
512
+ * LogSoftmax(x_i) = log(exp(x_i) / sum(exp(x_j)))
513
+ *
514
+ * @category Neural Network Layers
515
+ */
516
+ declare class LogSoftmax extends Module {
517
+ private readonly axis;
518
+ constructor(axis?: Axis);
519
+ forward(input: GradTensor): GradTensor;
520
+ forward(input: Tensor): Tensor;
521
+ toString(): string;
522
+ }
523
+ /**
524
+ * Applies the Softplus activation function.
525
+ *
526
+ * Softplus(x) = log(1 + exp(x))
527
+ *
528
+ * @category Neural Network Layers
529
+ */
530
+ declare class Softplus extends Module {
531
+ forward(input: GradTensor): GradTensor;
532
+ forward(input: Tensor): Tensor;
533
+ toString(): string;
534
+ }
535
+ /**
536
+ * Applies the Swish activation function (also known as SiLU).
537
+ *
538
+ * Swish(x) = x * sigmoid(x)
539
+ *
540
+ * @category Neural Network Layers
541
+ */
542
+ declare class Swish extends Module {
543
+ forward(input: GradTensor): GradTensor;
544
+ forward(input: Tensor): Tensor;
545
+ toString(): string;
546
+ }
547
+ /**
548
+ * Applies the Mish activation function.
549
+ *
550
+ * Mish(x) = x * tanh(softplus(x))
551
+ *
552
+ * @category Neural Network Layers
553
+ */
554
+ declare class Mish extends Module {
555
+ forward(input: GradTensor): GradTensor;
556
+ forward(input: Tensor): Tensor;
557
+ toString(): string;
558
+ }
559
+
560
+ /**
561
+ * Multi-Head Attention mechanism.
562
+ *
563
+ * Allows the model to jointly attend to information from different representation
564
+ * subspaces at different positions. This is the core building block of Transformers.
565
+ *
566
+ * **Mathematical Formulation**:
567
+ * ```
568
+ * Attention(Q, K, V) = softmax(Q * K^T / sqrt(d_k)) * V
569
+ * MultiHead(Q, K, V) = Concat(head_1, ..., head_h) * W_O
570
+ * where head_i = Attention(Q * W_Q^i, K * W_K^i, V * W_V^i)
571
+ * ```
572
+ *
573
+ * @example
574
+ * ```ts
575
+ * import { MultiheadAttention } from 'deepbox/nn';
576
+ * import { tensor } from 'deepbox/ndarray';
577
+ *
578
+ * const mha = new MultiheadAttention(512, 8);
579
+ * const x = tensor([[/* ... sequence data ... *\/]]);
580
+ * const output = mha.forward(x, x, x);
581
+ * ```
582
+ *
583
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html | PyTorch MultiheadAttention}
584
+ * @see Vaswani et al. (2017) "Attention Is All You Need"
585
+ */
586
+ declare class MultiheadAttention extends Module {
587
+ /** Embedding dimension */
588
+ private readonly embedDim;
589
+ /** Number of attention heads */
590
+ private readonly numHeads;
591
+ /** Dimension of each head */
592
+ private readonly headDim;
593
+ /** Scaling factor for dot product attention */
594
+ private readonly scale;
595
+ /** Whether to add bias to projections */
596
+ private readonly useBias;
597
+ /** Dropout probability applied to attention weights */
598
+ private readonly dropout;
599
+ /** Query projection weights (embedDim, embedDim) */
600
+ private wQ;
601
+ private bQ?;
602
+ /** Key projection weights (embedDim, embedDim) */
603
+ private wK;
604
+ private bK?;
605
+ /** Value projection weights (embedDim, embedDim) */
606
+ private wV;
607
+ private bV?;
608
+ /** Output projection weights (embedDim, embedDim) */
609
+ private wO;
610
+ private bO?;
611
+ /**
612
+ * Create a new MultiheadAttention layer.
613
+ *
614
+ * @param embedDim - Total dimension of the model (must be divisible by numHeads)
615
+ * @param numHeads - Number of parallel attention heads
616
+ * @param options - Configuration options
617
+ * @param options.bias - Whether to add bias to projections (default: true)
618
+ * @param options.dropout - Dropout probability applied to attention weights (default: 0.0)
619
+ */
620
+ constructor(embedDim: number, numHeads: number, options?: {
621
+ readonly bias?: boolean;
622
+ readonly dropout?: number;
623
+ });
624
+ /**
625
+ * Forward pass of multi-head attention.
626
+ *
627
+ * @param query - Query tensor of shape (batch, seqLen, embedDim)
628
+ * @param key - Key tensor of shape (batch, seqLen, embedDim)
629
+ * @param value - Value tensor of shape (batch, seqLen, embedDim)
630
+ * @returns Output tensor of same shape as query
631
+ */
632
+ forward(...inputs: AnyTensor[]): GradTensor;
633
+ toString(): string;
634
+ }
635
+ /**
636
+ * Transformer Encoder Layer.
637
+ *
638
+ * A single layer of the Transformer encoder, consisting of:
639
+ * 1. Multi-head self-attention
640
+ * 2. Add & Norm (residual connection + layer normalization)
641
+ * 3. Feed-forward network (FFN)
642
+ * 4. Add & Norm
643
+ *
644
+ * @example
645
+ * ```ts
646
+ * import { TransformerEncoderLayer } from 'deepbox/nn';
647
+ * import { tensor } from 'deepbox/ndarray';
648
+ *
649
+ * const layer = new TransformerEncoderLayer(512, 8, 2048);
650
+ * const x = tensor([[/* sequence data *\/]]);
651
+ * const output = layer.forward(x);
652
+ * ```
653
+ *
654
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoderLayer.html | PyTorch TransformerEncoderLayer}
655
+ */
656
+ declare class TransformerEncoderLayer extends Module {
657
+ private readonly dModel;
658
+ private readonly nHead;
659
+ private readonly dFF;
660
+ private readonly selfAttn;
661
+ private readonly linear1;
662
+ private readonly linear2;
663
+ private readonly norm1;
664
+ private readonly norm2;
665
+ private readonly dropout;
666
+ private readonly dropout1;
667
+ private readonly dropout2;
668
+ private readonly dropout3;
669
+ constructor(dModel: number, nHead: number, dFF: number, options?: {
670
+ readonly dropout?: number;
671
+ readonly eps?: number;
672
+ });
673
+ /**
674
+ * Forward pass of the Transformer encoder layer.
675
+ *
676
+ * @param src - Source sequence of shape (batch, seqLen, dModel)
677
+ * @returns Output of same shape as input
678
+ */
679
+ forward(src: AnyTensor): GradTensor;
680
+ toString(): string;
681
+ }
682
+
683
+ /**
684
+ * 1D Convolutional Layer.
685
+ *
686
+ * Applies a 1D convolution over an input signal composed of several input planes.
687
+ *
688
+ * @example
689
+ * ```ts
690
+ * import { Conv1d } from 'deepbox/nn';
691
+ *
692
+ * const conv = new Conv1d(16, 33, 3); // in_channels=16, out_channels=33, kernel_size=3
693
+ * ```
694
+ *
695
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html | PyTorch Conv1d}
696
+ */
697
+ declare class Conv1d extends Module {
698
+ private readonly inChannels;
699
+ private readonly outChannels;
700
+ private readonly kernelSize;
701
+ private readonly stride;
702
+ private readonly padding;
703
+ private readonly bias;
704
+ private weight_?;
705
+ private bias_?;
706
+ constructor(inChannels: number, outChannels: number, kernelSize: number, options?: {
707
+ readonly stride?: number;
708
+ readonly padding?: number;
709
+ readonly bias?: boolean;
710
+ });
711
+ private initializeParameters;
712
+ forward(x: AnyTensor): GradTensor;
713
+ get weight(): GradTensor;
714
+ }
715
+ /**
716
+ * 2D Convolutional Layer.
717
+ *
718
+ * Applies a 2D convolution over an input signal composed of several input planes.
719
+ *
720
+ * @example
721
+ * ```ts
722
+ * import { Conv2d } from 'deepbox/nn';
723
+ *
724
+ * const conv = new Conv2d(3, 64, 3); // RGB to 64 channels, 3x3 kernel
725
+ * ```
726
+ *
727
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html | PyTorch Conv2d}
728
+ */
729
+ declare class Conv2d extends Module {
730
+ private readonly inChannels;
731
+ private readonly outChannels;
732
+ private readonly kernelSize;
733
+ private readonly stride;
734
+ private readonly padding;
735
+ private readonly useBias;
736
+ private weight_?;
737
+ private bias_?;
738
+ constructor(inChannels: number, outChannels: number, kernelSize: number | [number, number], options?: {
739
+ readonly stride?: number | [number, number];
740
+ readonly padding?: number | [number, number];
741
+ readonly bias?: boolean;
742
+ });
743
+ private initializeParameters;
744
+ forward(x: AnyTensor): GradTensor;
745
+ get weight(): GradTensor;
746
+ }
747
+ /**
748
+ * 2D Max Pooling Layer.
749
+ *
750
+ * Applies a 2D max pooling over an input signal.
751
+ *
752
+ * @example
753
+ * ```ts
754
+ * import { MaxPool2d } from 'deepbox/nn';
755
+ *
756
+ * const pool = new MaxPool2d(2); // 2x2 pooling
757
+ * ```
758
+ *
759
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html | PyTorch MaxPool2d}
760
+ */
761
+ declare class MaxPool2d extends Module {
762
+ private readonly kernelSizeValue;
763
+ private readonly stride;
764
+ private readonly padding;
765
+ constructor(kernelSize: number | [number, number], options?: {
766
+ readonly stride?: number | [number, number];
767
+ readonly padding?: number | [number, number];
768
+ });
769
+ forward(x: AnyTensor): GradTensor;
770
+ }
771
+ /**
772
+ * 2D Average Pooling Layer.
773
+ *
774
+ * Applies a 2D average pooling over an input signal.
775
+ *
776
+ * @example
777
+ * ```ts
778
+ * import { AvgPool2d } from 'deepbox/nn';
779
+ *
780
+ * const pool = new AvgPool2d(2); // 2x2 pooling
781
+ * ```
782
+ *
783
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html | PyTorch AvgPool2d}
784
+ */
785
+ declare class AvgPool2d extends Module {
786
+ private readonly kernelSizeValue;
787
+ private readonly stride;
788
+ private readonly padding;
789
+ constructor(kernelSize: number | [number, number], options?: {
790
+ readonly stride?: number | [number, number];
791
+ readonly padding?: number | [number, number];
792
+ });
793
+ forward(x: AnyTensor): GradTensor;
794
+ }
795
+
796
+ /**
797
+ * Applies Dropout regularization during training.
798
+ *
799
+ * **Mathematical Formulation:**
800
+ * During training:
801
+ * ```
802
+ * y = x * mask / (1 - p)
803
+ * ```
804
+ * where mask is a binary tensor with probability (1-p) of being 1.
805
+ *
806
+ * During evaluation:
807
+ * ```
808
+ * y = x
809
+ * ```
810
+ *
811
+ * **Purpose:**
812
+ * - Prevents overfitting by randomly zeroing elements during training
813
+ * - Forces network to learn redundant representations
814
+ * - Improves generalization performance
815
+ *
816
+ * **Scaling:**
817
+ * The output is scaled by 1/(1-p) during training to maintain expected value.
818
+ * This is called "inverted dropout" and eliminates the need for scaling during inference.
819
+ *
820
+ * @example
821
+ * ```ts
822
+ * import { Dropout } from 'deepbox/nn';
823
+ * import { tensor } from 'deepbox/ndarray';
824
+ *
825
+ * const dropout = new Dropout(0.5); // Drop 50% of neurons
826
+ * const input = tensor([[1, 2, 3, 4]]);
827
+ *
828
+ * // Training mode: randomly zeros ~50% of elements
829
+ * dropout.train();
830
+ * const output = dropout.forward(input);
831
+ *
832
+ * // Evaluation mode: passes input unchanged
833
+ * dropout.eval();
834
+ * const output2 = dropout.forward(input); // Same as input
835
+ * ```
836
+ *
837
+ * References:
838
+ * - Dropout paper: https://jmlr.org/papers/v15/srivastava14a.html
839
+ * - PyTorch Dropout: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
840
+ *
841
+ * @category Neural Network Layers
842
+ */
843
+ declare class Dropout extends Module {
844
+ /** Probability of an element being zeroed (dropout rate) */
845
+ private readonly p;
846
+ /**
847
+ * Create a new Dropout layer.
848
+ *
849
+ * @param p - Probability of an element being zeroed (0 <= p < 1)
850
+ * @throws {InvalidParameterError} If p is not in valid range [0, 1)
851
+ */
852
+ constructor(p?: number);
853
+ /**
854
+ * Forward pass: apply dropout during training, identity during evaluation.
855
+ *
856
+ * @param input - Input tensor of any shape (Tensor or GradTensor)
857
+ * @returns Output tensor with same shape as input
858
+ */
859
+ forward(input: AnyTensor): GradTensor;
860
+ /**
861
+ * Get string representation of the layer.
862
+ *
863
+ * @returns String representation with dropout probability
864
+ */
865
+ toString(): string;
866
+ /**
867
+ * Get the dropout probability.
868
+ */
869
+ get dropoutRate(): number;
870
+ }
871
+
872
+ /**
873
+ * Applies a linear transformation to the incoming data: y = xA^T + b
874
+ *
875
+ * This is also known as a fully connected layer or dense layer.
876
+ *
877
+ * **Mathematical Formulation:**
878
+ * ```
879
+ * y = x * W^T + b
880
+ * ```
881
+ *
882
+ * Where:
883
+ * - x is the input tensor of shape (*, in_features)
884
+ * - W is the weight matrix of shape (out_features, in_features)
885
+ * - b is the bias vector of shape (out_features,)
886
+ * - y is the output tensor of shape (*, out_features)
887
+ *
888
+ * **Shape Conventions:**
889
+ * - Input: `(*, in_features)` where `*` means any number of leading dimensions
890
+ * - 1D: `(in_features)` → Output: `(out_features)`
891
+ * - 2D: `(batch, in_features)` → Output: `(batch, out_features)`
892
+ * - 3D: `(batch, seq_len, in_features)` → Output: `(batch, seq_len, out_features)`
893
+ * - The last dimension must equal `in_features`
894
+ * - All leading dimensions are preserved in the output
895
+ *
896
+ * **Parameters:**
897
+ * - `inFeatures`: Size of each input sample
898
+ * - `outFeatures`: Size of each output sample
899
+ * - `bias`: If true, adds a learnable bias to the output
900
+ *
901
+ * **Attributes:**
902
+ * - `weight`: Learnable weights of shape (out_features, in_features)
903
+ * - `bias`: Learnable bias of shape (out_features,) if bias=true
904
+ *
905
+ * **Initialization:**
906
+ * Uses Kaiming/He initialization: weights ~ N(0, sqrt(2/in_features))
907
+ * Biases are initialized to zeros
908
+ *
909
+ * @example
910
+ * ```ts
911
+ * import { Linear } from 'deepbox/nn';
912
+ * import { tensor } from 'deepbox/ndarray';
913
+ *
914
+ * // Create a linear layer with 20 input features and 30 output features
915
+ * const layer = new Linear(20, 30);
916
+ *
917
+ * // Forward pass
918
+ * const input = tensor([[1, 2, ..., 20]]); // shape: (1, 20)
919
+ * const output = layer.forward(input); // shape: (1, 30)
920
+ *
921
+ * // Without bias
922
+ * const layerNoBias = new Linear(10, 5, { bias: false });
923
+ * ```
924
+ *
925
+ * References:
926
+ * - PyTorch Linear: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
927
+ * - Xavier/Glorot initialization: http://proceedings.mlr.press/v9/glorot10a.html
928
+ *
929
+ * @category Neural Network Layers
930
+ */
931
+ declare class Linear extends Module {
932
+ /** Weight matrix of shape (out_features, in_features) */
933
+ private weight;
934
+ private weightParam;
935
+ /** Bias vector of shape (out_features,) */
936
+ private bias?;
937
+ private biasParam?;
938
+ /** Number of input features */
939
+ private readonly inFeatures;
940
+ /** Number of output features */
941
+ private readonly outFeatures;
942
+ /** Whether this layer has a bias */
943
+ private readonly useBias;
944
+ /**
945
+ * Create a new Linear layer.
946
+ *
947
+ * @param inFeatures - Size of each input sample
948
+ * @param outFeatures - Size of each output sample
949
+ * @param options - Configuration options
950
+ * @param options.bias - If true, add learnable bias (default: true)
951
+ * @param options.dtype - Data type for weights (default: 'float32')
952
+ * @param options.device - Device to place tensors on (default: 'cpu')
953
+ */
954
+ constructor(inFeatures: number, outFeatures: number, options?: {
955
+ readonly bias?: boolean;
956
+ readonly dtype?: "float32" | "float64";
957
+ readonly device?: "cpu" | "webgpu" | "wasm";
958
+ });
959
+ /**
960
+ * Forward pass: compute y = x * W^T + b
961
+ *
962
+ * @param input - Input tensor of shape (*, in_features)
963
+ * @returns Output tensor of shape (*, out_features)
964
+ * @throws {ShapeError} If input shape is invalid
965
+ * @throws {DTypeError} If input dtype is unsupported
966
+ */
967
+ forward(input: GradTensor): GradTensor;
968
+ forward(input: Tensor): Tensor;
969
+ /**
970
+ * Get extra representation string for this layer.
971
+ *
972
+ * @returns String representation of layer parameters
973
+ */
974
+ toString(): string;
975
+ /**
976
+ * Get the weight matrix.
977
+ *
978
+ * @returns Weight tensor of shape (out_features, in_features)
979
+ */
980
+ getWeight(): Tensor;
981
+ /**
982
+ * Get the bias vector.
983
+ *
984
+ * @returns Bias tensor of shape (out_features,) or undefined if no bias
985
+ */
986
+ getBias(): Tensor | undefined;
987
+ /**
988
+ * Get the number of input features.
989
+ */
990
+ get inputSize(): number;
991
+ /**
992
+ * Get the number of output features.
993
+ */
994
+ get outputSize(): number;
995
+ }
996
+
997
+ /**
998
+ * Batch Normalization layer.
999
+ *
1000
+ * Normalizes the input over the batch dimension for faster and more stable training.
1001
+ *
1002
+ * **Formula**: y = (x - E[x]) / sqrt(Var[x] + eps) * gamma + beta
1003
+ *
1004
+ * During training, uses batch statistics. During evaluation, uses running statistics
1005
+ * unless `trackRunningStats=false`, in which case batch statistics are always used.
1006
+ *
1007
+ * @example
1008
+ * ```ts
1009
+ * import { BatchNorm1d } from 'deepbox/nn';
1010
+ * import { tensor } from 'deepbox/ndarray';
1011
+ *
1012
+ * const bn = new BatchNorm1d(10);
1013
+ * const x = tensor([[1, 2, 3], [4, 5, 6]]);
1014
+ * const y = bn.forward(x);
1015
+ * ```
1016
+ *
1017
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html | PyTorch BatchNorm1d}
1018
+ */
1019
+ declare class BatchNorm1d extends Module {
1020
+ private readonly numFeatures;
1021
+ private readonly eps;
1022
+ private readonly momentum;
1023
+ private readonly affine;
1024
+ private readonly trackRunningStats;
1025
+ private gamma?;
1026
+ private beta?;
1027
+ private runningMean;
1028
+ private runningVar;
1029
+ constructor(numFeatures: number, options?: {
1030
+ readonly eps?: number;
1031
+ readonly momentum?: number;
1032
+ readonly affine?: boolean;
1033
+ readonly trackRunningStats?: boolean;
1034
+ });
1035
+ forward(x: AnyTensor): GradTensor;
1036
+ toString(): string;
1037
+ }
1038
+ /**
1039
+ * Layer Normalization.
1040
+ *
1041
+ * Normalizes across the feature dimensions (trailing dimensions specified by `normalizedShape`)
1042
+ * for each sample independently. Unlike BatchNorm, LayerNorm works the same way during training
1043
+ * and evaluation.
1044
+ *
1045
+ * **Formula**: y = (x - E[x]) / sqrt(Var[x] + eps) * gamma + beta
1046
+ *
1047
+ * @example
1048
+ * ```ts
1049
+ * import { LayerNorm } from 'deepbox/nn';
1050
+ * import { tensor } from 'deepbox/ndarray';
1051
+ *
1052
+ * const ln = new LayerNorm([10]);
1053
+ * const x = tensor([[1, 2, 3]]);
1054
+ * const y = ln.forward(x);
1055
+ * ```
1056
+ *
1057
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.LayerNorm.html | PyTorch LayerNorm}
1058
+ */
1059
+ declare class LayerNorm extends Module {
1060
+ private readonly normalizedShape;
1061
+ private readonly eps;
1062
+ private readonly elementwiseAffine;
1063
+ private gamma?;
1064
+ private beta?;
1065
+ constructor(normalizedShape: number | readonly number[], options?: {
1066
+ readonly eps?: number;
1067
+ readonly elementwiseAffine?: boolean;
1068
+ });
1069
+ forward(x: AnyTensor): GradTensor;
1070
+ toString(): string;
1071
+ }
1072
+
1073
+ /**
1074
+ * Simple RNN layer.
1075
+ *
1076
+ * Applies a simple recurrent neural network to an input sequence.
1077
+ *
1078
+ * **Formula**: h_t = tanh(W_ih * x_t + b_ih + W_hh * h_{t-1} + b_hh)
1079
+ *
1080
+ * @example
1081
+ * ```ts
1082
+ * import { RNN } from 'deepbox/nn';
1083
+ * import { tensor } from 'deepbox/ndarray';
1084
+ *
1085
+ * const rnn = new RNN(10, 20);
1086
+ * const x = tensor([[[1, 2, 3]]]); // (batch, seq_len, input_size)
1087
+ * const output = rnn.forward(x);
1088
+ * ```
1089
+ *
1090
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.RNN.html | PyTorch RNN}
1091
+ */
1092
+ declare class RNN extends Module {
1093
+ private readonly inputSize;
1094
+ private readonly hiddenSize;
1095
+ private readonly numLayers;
1096
+ private readonly nonlinearity;
1097
+ private readonly bias;
1098
+ private readonly batchFirst;
1099
+ private weightsIh;
1100
+ private weightsHh;
1101
+ private biasIh;
1102
+ private biasHh;
1103
+ constructor(inputSize: number, hiddenSize: number, options?: {
1104
+ readonly numLayers?: number;
1105
+ readonly nonlinearity?: "tanh" | "relu";
1106
+ readonly bias?: boolean;
1107
+ readonly batchFirst?: boolean;
1108
+ });
1109
+ private activation;
1110
+ private run;
1111
+ forward(...inputs: AnyTensor[]): Tensor;
1112
+ /**
1113
+ * Forward pass returning both output and hidden state.
1114
+ * Use this method when you need the hidden state.
1115
+ */
1116
+ forwardWithState(input: AnyTensor, hx?: AnyTensor): [Tensor, Tensor];
1117
+ toString(): string;
1118
+ }
1119
+ /**
1120
+ * LSTM (Long Short-Term Memory) layer.
1121
+ *
1122
+ * Applies a multi-layer LSTM to an input sequence.
1123
+ *
1124
+ * **Gates**:
1125
+ * - Input gate: i_t = σ(W_ii * x_t + b_ii + W_hi * h_{t-1} + b_hi)
1126
+ * - Forget gate: f_t = σ(W_if * x_t + b_if + W_hf * h_{t-1} + b_hf)
1127
+ * - Cell gate: g_t = tanh(W_ig * x_t + b_ig + W_hg * h_{t-1} + b_hg)
1128
+ * - Output gate: o_t = σ(W_io * x_t + b_io + W_ho * h_{t-1} + b_ho)
1129
+ * - Cell state: c_t = f_t * c_{t-1} + i_t * g_t
1130
+ * - Hidden state: h_t = o_t * tanh(c_t)
1131
+ *
1132
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html | PyTorch LSTM}
1133
+ */
1134
+ declare class LSTM extends Module {
1135
+ private readonly inputSize;
1136
+ private readonly hiddenSize;
1137
+ private readonly numLayers;
1138
+ private readonly bias;
1139
+ private readonly batchFirst;
1140
+ private weightsIh;
1141
+ private weightsHh;
1142
+ private biasIh;
1143
+ private biasHh;
1144
+ constructor(inputSize: number, hiddenSize: number, options?: {
1145
+ readonly numLayers?: number;
1146
+ readonly bias?: boolean;
1147
+ readonly batchFirst?: boolean;
1148
+ });
1149
+ private sigmoid;
1150
+ private run;
1151
+ forward(...inputs: AnyTensor[]): Tensor;
1152
+ /**
1153
+ * Forward pass returning output, hidden state, and cell state.
1154
+ * Use this method when you need the hidden/cell states.
1155
+ */
1156
+ forwardWithState(input: AnyTensor, hx?: AnyTensor, cx?: AnyTensor): [Tensor, [Tensor, Tensor]];
1157
+ toString(): string;
1158
+ }
1159
+ /**
1160
+ * GRU (Gated Recurrent Unit) layer.
1161
+ *
1162
+ * Applies a multi-layer GRU to an input sequence.
1163
+ *
1164
+ * **Gates**:
1165
+ * - Reset gate: r_t = σ(W_ir * x_t + b_ir + W_hr * h_{t-1} + b_hr)
1166
+ * - Update gate: z_t = σ(W_iz * x_t + b_iz + W_hz * h_{t-1} + b_hz)
1167
+ * - New gate: n_t = tanh(W_in * x_t + b_in + r_t * (W_hn * h_{t-1} + b_hn))
1168
+ * - Hidden: h_t = (1 - z_t) * n_t + z_t * h_{t-1}
1169
+ *
1170
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.nn.GRU.html | PyTorch GRU}
1171
+ */
1172
+ declare class GRU extends Module {
1173
+ private readonly inputSize;
1174
+ private readonly hiddenSize;
1175
+ private readonly numLayers;
1176
+ private readonly bias;
1177
+ private readonly batchFirst;
1178
+ private weightsIh;
1179
+ private weightsHh;
1180
+ private biasIh;
1181
+ private biasHh;
1182
+ constructor(inputSize: number, hiddenSize: number, options?: {
1183
+ readonly numLayers?: number;
1184
+ readonly bias?: boolean;
1185
+ readonly batchFirst?: boolean;
1186
+ });
1187
+ private sigmoid;
1188
+ private run;
1189
+ forward(...inputs: AnyTensor[]): Tensor;
1190
+ /**
1191
+ * Forward pass returning both output and hidden state.
1192
+ * Use this method when you need the hidden state.
1193
+ */
1194
+ forwardWithState(input: AnyTensor, hx?: AnyTensor): [Tensor, Tensor];
1195
+ toString(): string;
1196
+ }
1197
+
1198
+ /**
1199
+ * Cross Entropy Loss.
1200
+ *
1201
+ * Computes the cross entropy loss between predictions and targets.
1202
+ * Commonly used for multi-class classification problems.
1203
+ *
1204
+ * Supports both integer class indices and one-hot encoded probabilities for targets.
1205
+ *
1206
+ * **Formula**: L = -mean(sum(target * log_softmax(input), dim=1))
1207
+ *
1208
+ * @param input - Predicted logits of shape (n_samples, n_classes)
1209
+ * @param target - True labels. Either:
1210
+ * - Class indices of shape (n_samples,)
1211
+ * - Probabilities/One-hot of shape (n_samples, n_classes)
1212
+ * @returns Scalar loss value (GradTensor)
1213
+ *
1214
+ * @example
1215
+ * ```ts
1216
+ * import { crossEntropyLoss } from 'deepbox/nn';
1217
+ * import { tensor } from 'deepbox/ndarray';
1218
+ *
1219
+ * const pred = tensor([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1]]);
1220
+ * const true_idx = tensor([0, 1]);
1221
+ * const loss = crossEntropyLoss(pred, true_idx);
1222
+ * ```
1223
+ */
1224
+ declare function crossEntropyLoss(input: Tensor, target: Tensor): number;
1225
+ declare function crossEntropyLoss(input: GradTensor, target: AnyTensor): GradTensor;
1226
+ /**
1227
+ * Binary Cross Entropy Loss with logits.
1228
+ *
1229
+ * Combines sigmoid activation and binary cross entropy loss for numerical stability.
1230
+ *
1231
+ * @param input - Predicted logits of shape (n_samples,) or (n_samples, 1)
1232
+ * @param target - True binary labels of same shape as input
1233
+ * @returns Scalar loss value (GradTensor)
1234
+ */
1235
+ declare function binaryCrossEntropyWithLogitsLoss(input: Tensor, target: Tensor): number;
1236
+ declare function binaryCrossEntropyWithLogitsLoss(input: GradTensor, target: AnyTensor): GradTensor;
1237
+
1238
+ /**
1239
+ * Mean Squared Error (MSE) loss function.
1240
+ *
1241
+ * **Mathematical Formula:**
1242
+ * ```
1243
+ * MSE = mean((y_pred - y_true)^2)
1244
+ * ```
1245
+ *
1246
+ * **Use Cases:**
1247
+ * - Regression tasks
1248
+ * - Continuous value prediction
1249
+ * - Measuring distance between predictions and targets
1250
+ *
1251
+ * **Properties:**
1252
+ * - Always non-negative
1253
+ * - Penalizes large errors more heavily (quadratic)
1254
+ * - Differentiable everywhere
1255
+ *
1256
+ * @param predictions - Predicted values
1257
+ * @param targets - True target values
1258
+ * @param reduction - How to reduce the loss: 'mean', 'sum', or 'none'
1259
+ * @returns Scalar loss value (or tensor if reduction='none')
1260
+ *
1261
+ * @example
1262
+ * ```ts
1263
+ * import { mseLoss } from 'deepbox/nn/losses';
1264
+ * import { tensor } from 'deepbox/ndarray';
1265
+ *
1266
+ * const predictions = tensor([2.5, 0.0, 2.1, 7.8]);
1267
+ * const targets = tensor([3.0, -0.5, 2.0, 8.0]);
1268
+ * const loss = mseLoss(predictions, targets); // Scalar tensor
1269
+ * ```
1270
+ *
1271
+ * @category Loss Functions
1272
+ */
1273
+ declare function mseLoss(predictions: Tensor, targets: Tensor, reduction?: "mean" | "sum" | "none"): Tensor;
1274
+ /**
1275
+ * Mean Absolute Error (MAE) loss function, also known as L1 loss.
1276
+ *
1277
+ * **Mathematical Formula:**
1278
+ * ```
1279
+ * MAE = mean(|y_pred - y_true|)
1280
+ * ```
1281
+ *
1282
+ * **Use Cases:**
1283
+ * - Regression tasks where outliers should have less influence
1284
+ * - More robust to outliers than MSE
1285
+ *
1286
+ * **Properties:**
1287
+ * - Always non-negative
1288
+ * - Linear penalty for errors
1289
+ * - Less sensitive to outliers than MSE
1290
+ *
1291
+ * @param predictions - Predicted values
1292
+ * @param targets - True target values
1293
+ * @param reduction - How to reduce the loss: 'mean', 'sum', or 'none'
1294
+ * @returns Scalar loss value (or tensor if reduction='none')
1295
+ *
1296
+ * @category Loss Functions
1297
+ */
1298
+ declare function maeLoss(predictions: Tensor, targets: Tensor, reduction?: "mean" | "sum" | "none"): Tensor;
1299
+ /**
1300
+ * Binary Cross-Entropy (BCE) loss function.
1301
+ *
1302
+ * **Mathematical Formula:**
1303
+ * ```
1304
+ * BCE = -mean(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred))
1305
+ * ```
1306
+ *
1307
+ * **Use Cases:**
1308
+ * - Binary classification tasks
1309
+ * - Multi-label classification (independent binary decisions)
1310
+ * - Predictions should be probabilities in (0, 1)
1311
+ *
1312
+ * **Properties:**
1313
+ * - Requires predictions in range (0, 1) - use sigmoid activation
1314
+ * - Targets should be 0 or 1
1315
+ * - Numerically stable with epsilon for log
1316
+ *
1317
+ * @param predictions - Predicted probabilities (0 to 1)
1318
+ * @param targets - True binary labels (0 or 1)
1319
+ * @param reduction - How to reduce the loss: 'mean', 'sum', or 'none'
1320
+ * @returns Scalar loss value (or tensor if reduction='none')
1321
+ *
1322
+ * @category Loss Functions
1323
+ */
1324
+ declare function binaryCrossEntropyLoss(predictions: Tensor, targets: Tensor, reduction?: "mean" | "sum" | "none"): Tensor;
1325
+ /**
1326
+ * Root Mean Squared Error (RMSE) loss function.
1327
+ *
1328
+ * **Mathematical Formula:**
1329
+ * ```
1330
+ * RMSE = sqrt(mean((y_pred - y_true)^2))
1331
+ * ```
1332
+ *
1333
+ * **Use Cases:**
1334
+ * - Regression tasks
1335
+ * - When you want error in same units as target
1336
+ * - More interpretable than MSE
1337
+ *
1338
+ * @param predictions - Predicted values
1339
+ * @param targets - True target values
1340
+ * @returns Scalar loss value
1341
+ *
1342
+ * @category Loss Functions
1343
+ */
1344
+ declare function rmseLoss(predictions: Tensor, targets: Tensor): Tensor;
1345
+ /**
1346
+ * Huber loss function - combines MSE and MAE.
1347
+ *
1348
+ * **Mathematical Formula:**
1349
+ * ```
1350
+ * Huber(a) = 0.5 * a^2 if |a| <= delta
1351
+ * = delta * (|a| - 0.5 * delta) otherwise
1352
+ * where a = y_pred - y_true
1353
+ * ```
1354
+ *
1355
+ * **Use Cases:**
1356
+ * - Regression with outliers
1357
+ * - Robust to outliers while maintaining MSE benefits for small errors
1358
+ *
1359
+ * **Properties:**
1360
+ * - Quadratic for small errors (like MSE)
1361
+ * - Linear for large errors (like MAE)
1362
+ * - Controlled by delta parameter
1363
+ *
1364
+ * @param predictions - Predicted values
1365
+ * @param targets - True target values
1366
+ * @param delta - Threshold where loss transitions from quadratic to linear
1367
+ * @param reduction - How to reduce the loss: 'mean', 'sum', or 'none'
1368
+ * @returns Scalar loss value (or tensor if reduction='none')
1369
+ *
1370
+ * @category Loss Functions
1371
+ */
1372
+ declare function huberLoss(predictions: Tensor, targets: Tensor, delta?: number, reduction?: "mean" | "sum" | "none"): Tensor;
1373
+
1374
+ type index_AvgPool2d = AvgPool2d;
1375
+ declare const index_AvgPool2d: typeof AvgPool2d;
1376
+ type index_BatchNorm1d = BatchNorm1d;
1377
+ declare const index_BatchNorm1d: typeof BatchNorm1d;
1378
+ type index_Conv1d = Conv1d;
1379
+ declare const index_Conv1d: typeof Conv1d;
1380
+ type index_Conv2d = Conv2d;
1381
+ declare const index_Conv2d: typeof Conv2d;
1382
+ type index_Dropout = Dropout;
1383
+ declare const index_Dropout: typeof Dropout;
1384
+ type index_ELU = ELU;
1385
+ declare const index_ELU: typeof ELU;
1386
+ type index_ForwardHook = ForwardHook;
1387
+ type index_ForwardPreHook = ForwardPreHook;
1388
+ type index_GELU = GELU;
1389
+ declare const index_GELU: typeof GELU;
1390
+ type index_GRU = GRU;
1391
+ declare const index_GRU: typeof GRU;
1392
+ type index_LSTM = LSTM;
1393
+ declare const index_LSTM: typeof LSTM;
1394
+ type index_LayerNorm = LayerNorm;
1395
+ declare const index_LayerNorm: typeof LayerNorm;
1396
+ type index_LeakyReLU = LeakyReLU;
1397
+ declare const index_LeakyReLU: typeof LeakyReLU;
1398
+ type index_Linear = Linear;
1399
+ declare const index_Linear: typeof Linear;
1400
+ type index_LogSoftmax = LogSoftmax;
1401
+ declare const index_LogSoftmax: typeof LogSoftmax;
1402
+ type index_MaxPool2d = MaxPool2d;
1403
+ declare const index_MaxPool2d: typeof MaxPool2d;
1404
+ type index_Mish = Mish;
1405
+ declare const index_Mish: typeof Mish;
1406
+ type index_Module = Module;
1407
+ declare const index_Module: typeof Module;
1408
+ type index_MultiheadAttention = MultiheadAttention;
1409
+ declare const index_MultiheadAttention: typeof MultiheadAttention;
1410
+ type index_RNN = RNN;
1411
+ declare const index_RNN: typeof RNN;
1412
+ type index_ReLU = ReLU;
1413
+ declare const index_ReLU: typeof ReLU;
1414
+ type index_Sequential = Sequential;
1415
+ declare const index_Sequential: typeof Sequential;
1416
+ type index_Sigmoid = Sigmoid;
1417
+ declare const index_Sigmoid: typeof Sigmoid;
1418
+ type index_Softmax = Softmax;
1419
+ declare const index_Softmax: typeof Softmax;
1420
+ type index_Softplus = Softplus;
1421
+ declare const index_Softplus: typeof Softplus;
1422
+ type index_Swish = Swish;
1423
+ declare const index_Swish: typeof Swish;
1424
+ type index_Tanh = Tanh;
1425
+ declare const index_Tanh: typeof Tanh;
1426
+ type index_TransformerEncoderLayer = TransformerEncoderLayer;
1427
+ declare const index_TransformerEncoderLayer: typeof TransformerEncoderLayer;
1428
+ declare const index_binaryCrossEntropyLoss: typeof binaryCrossEntropyLoss;
1429
+ declare const index_binaryCrossEntropyWithLogitsLoss: typeof binaryCrossEntropyWithLogitsLoss;
1430
+ declare const index_crossEntropyLoss: typeof crossEntropyLoss;
1431
+ declare const index_huberLoss: typeof huberLoss;
1432
+ declare const index_maeLoss: typeof maeLoss;
1433
+ declare const index_mseLoss: typeof mseLoss;
1434
+ declare const index_rmseLoss: typeof rmseLoss;
1435
+ declare namespace index {
1436
+ export { index_AvgPool2d as AvgPool2d, index_BatchNorm1d as BatchNorm1d, index_Conv1d as Conv1d, index_Conv2d as Conv2d, index_Dropout as Dropout, index_ELU as ELU, type index_ForwardHook as ForwardHook, type index_ForwardPreHook as ForwardPreHook, index_GELU as GELU, index_GRU as GRU, index_LSTM as LSTM, index_LayerNorm as LayerNorm, index_LeakyReLU as LeakyReLU, index_Linear as Linear, index_LogSoftmax as LogSoftmax, index_MaxPool2d as MaxPool2d, index_Mish as Mish, index_Module as Module, index_MultiheadAttention as MultiheadAttention, index_RNN as RNN, index_ReLU as ReLU, index_Sequential as Sequential, index_Sigmoid as Sigmoid, index_Softmax as Softmax, index_Softplus as Softplus, index_Swish as Swish, index_Tanh as Tanh, index_TransformerEncoderLayer as TransformerEncoderLayer, index_binaryCrossEntropyLoss as binaryCrossEntropyLoss, index_binaryCrossEntropyWithLogitsLoss as binaryCrossEntropyWithLogitsLoss, index_crossEntropyLoss as crossEntropyLoss, index_huberLoss as huberLoss, index_maeLoss as maeLoss, index_mseLoss as mseLoss, index_rmseLoss as rmseLoss };
1437
+ }
1438
+
1439
+ export { AvgPool2d as A, BatchNorm1d as B, Conv1d as C, Dropout as D, ELU as E, type ForwardHook as F, GELU as G, LeakyReLU as L, Mish as M, ReLU as R, Sequential as S, Tanh as T, LogSoftmax as a, Sigmoid as b, Softmax as c, Softplus as d, Swish as e, MultiheadAttention as f, TransformerEncoderLayer as g, Conv2d as h, index as i, MaxPool2d as j, Linear as k, LayerNorm as l, GRU as m, LSTM as n, RNN as o, binaryCrossEntropyLoss as p, binaryCrossEntropyWithLogitsLoss as q, crossEntropyLoss as r, huberLoss as s, maeLoss as t, mseLoss as u, rmseLoss as v, type ForwardPreHook as w, Module as x };