npm - deepbox - Versions diffs - 0.1.0 - Mend

deepbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

package/LICENSE +21 -0
package/README.md +344 -0
package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
package/dist/Tensor-BQLk1ltW.d.cts +147 -0
package/dist/Tensor-g8mUClel.d.ts +147 -0
package/dist/chunk-4S73VUBD.js +677 -0
package/dist/chunk-4S73VUBD.js.map +1 -0
package/dist/chunk-5R4S63PF.js +2925 -0
package/dist/chunk-5R4S63PF.js.map +1 -0
package/dist/chunk-6AE5FKKQ.cjs +9264 -0
package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
package/dist/chunk-AD436M45.js +3854 -0
package/dist/chunk-AD436M45.js.map +1 -0
package/dist/chunk-ALS7ETWZ.cjs +4263 -0
package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
package/dist/chunk-AU7XHGKJ.js +2092 -0
package/dist/chunk-AU7XHGKJ.js.map +1 -0
package/dist/chunk-B5TNKUEY.js +1481 -0
package/dist/chunk-B5TNKUEY.js.map +1 -0
package/dist/chunk-BCR7G3A6.js +9136 -0
package/dist/chunk-BCR7G3A6.js.map +1 -0
package/dist/chunk-C4PKXY74.cjs +1917 -0
package/dist/chunk-C4PKXY74.cjs.map +1 -0
package/dist/chunk-DWZY6PIP.cjs +6400 -0
package/dist/chunk-DWZY6PIP.cjs.map +1 -0
package/dist/chunk-E3EU5FZO.cjs +2113 -0
package/dist/chunk-E3EU5FZO.cjs.map +1 -0
package/dist/chunk-F3JWBINJ.js +1054 -0
package/dist/chunk-F3JWBINJ.js.map +1 -0
package/dist/chunk-FJYLIGJX.js +1940 -0
package/dist/chunk-FJYLIGJX.js.map +1 -0
package/dist/chunk-JSCDE774.cjs +729 -0
package/dist/chunk-JSCDE774.cjs.map +1 -0
package/dist/chunk-LWECRCW2.cjs +2412 -0
package/dist/chunk-LWECRCW2.cjs.map +1 -0
package/dist/chunk-MLBMYKCG.js +6379 -0
package/dist/chunk-MLBMYKCG.js.map +1 -0
package/dist/chunk-OX6QXFMV.cjs +3874 -0
package/dist/chunk-OX6QXFMV.cjs.map +1 -0
package/dist/chunk-PHV2DKRS.cjs +1072 -0
package/dist/chunk-PHV2DKRS.cjs.map +1 -0
package/dist/chunk-PL7TAYKI.js +4056 -0
package/dist/chunk-PL7TAYKI.js.map +1 -0
package/dist/chunk-PR647I7R.js +1898 -0
package/dist/chunk-PR647I7R.js.map +1 -0
package/dist/chunk-QERHVCHC.cjs +2960 -0
package/dist/chunk-QERHVCHC.cjs.map +1 -0
package/dist/chunk-XEG44RF6.cjs +1514 -0
package/dist/chunk-XEG44RF6.cjs.map +1 -0
package/dist/chunk-XMWVME2W.js +2377 -0
package/dist/chunk-XMWVME2W.js.map +1 -0
package/dist/chunk-ZB75FESB.cjs +1979 -0
package/dist/chunk-ZB75FESB.cjs.map +1 -0
package/dist/chunk-ZLW62TJG.cjs +4061 -0
package/dist/chunk-ZLW62TJG.cjs.map +1 -0
package/dist/chunk-ZXKBDFP3.js +4235 -0
package/dist/chunk-ZXKBDFP3.js.map +1 -0
package/dist/core/index.cjs +204 -0
package/dist/core/index.cjs.map +1 -0
package/dist/core/index.d.cts +2 -0
package/dist/core/index.d.ts +2 -0
package/dist/core/index.js +3 -0
package/dist/core/index.js.map +1 -0
package/dist/dataframe/index.cjs +22 -0
package/dist/dataframe/index.cjs.map +1 -0
package/dist/dataframe/index.d.cts +3 -0
package/dist/dataframe/index.d.ts +3 -0
package/dist/dataframe/index.js +5 -0
package/dist/dataframe/index.js.map +1 -0
package/dist/datasets/index.cjs +134 -0
package/dist/datasets/index.cjs.map +1 -0
package/dist/datasets/index.d.cts +3 -0
package/dist/datasets/index.d.ts +3 -0
package/dist/datasets/index.js +5 -0
package/dist/datasets/index.js.map +1 -0
package/dist/index-74AB8Cyh.d.cts +1126 -0
package/dist/index-9oQx1HgV.d.cts +1180 -0
package/dist/index-BJY2SI4i.d.ts +483 -0
package/dist/index-BWGhrDlr.d.ts +733 -0
package/dist/index-B_DK4FKY.d.cts +242 -0
package/dist/index-BbA2Gxfl.d.ts +456 -0
package/dist/index-BgHYAoSS.d.cts +837 -0
package/dist/index-BndMbqsM.d.ts +1439 -0
package/dist/index-C1mfVYoo.d.ts +2517 -0
package/dist/index-CCvlwAmL.d.cts +809 -0
package/dist/index-CDw5CnOU.d.ts +785 -0
package/dist/index-Cn3SdB0O.d.ts +1126 -0
package/dist/index-CrqLlS-a.d.ts +776 -0
package/dist/index-D61yaSMY.d.cts +483 -0
package/dist/index-D9Loo1_A.d.cts +2517 -0
package/dist/index-DIT_OO9C.d.cts +785 -0
package/dist/index-DIp_RrRt.d.ts +242 -0
package/dist/index-DbultU6X.d.cts +1427 -0
package/dist/index-DmEg_LCm.d.cts +776 -0
package/dist/index-DoPWVxPo.d.cts +1439 -0
package/dist/index-DuCxd-8d.d.ts +837 -0
package/dist/index-Dx42TZaY.d.ts +809 -0
package/dist/index-DyZ4QQf5.d.cts +456 -0
package/dist/index-GFAVyOWO.d.ts +1427 -0
package/dist/index-WHQLn0e8.d.cts +733 -0
package/dist/index-ZtI1Iy4L.d.ts +1180 -0
package/dist/index-eJgeni9c.d.cts +1911 -0
package/dist/index-tk4lSYod.d.ts +1911 -0
package/dist/index.cjs +72 -0
package/dist/index.cjs.map +1 -0
package/dist/index.d.cts +17 -0
package/dist/index.d.ts +17 -0
package/dist/index.js +15 -0
package/dist/index.js.map +1 -0
package/dist/linalg/index.cjs +86 -0
package/dist/linalg/index.cjs.map +1 -0
package/dist/linalg/index.d.cts +3 -0
package/dist/linalg/index.d.ts +3 -0
package/dist/linalg/index.js +5 -0
package/dist/linalg/index.js.map +1 -0
package/dist/metrics/index.cjs +158 -0
package/dist/metrics/index.cjs.map +1 -0
package/dist/metrics/index.d.cts +3 -0
package/dist/metrics/index.d.ts +3 -0
package/dist/metrics/index.js +5 -0
package/dist/metrics/index.js.map +1 -0
package/dist/ml/index.cjs +87 -0
package/dist/ml/index.cjs.map +1 -0
package/dist/ml/index.d.cts +3 -0
package/dist/ml/index.d.ts +3 -0
package/dist/ml/index.js +6 -0
package/dist/ml/index.js.map +1 -0
package/dist/ndarray/index.cjs +501 -0
package/dist/ndarray/index.cjs.map +1 -0
package/dist/ndarray/index.d.cts +5 -0
package/dist/ndarray/index.d.ts +5 -0
package/dist/ndarray/index.js +4 -0
package/dist/ndarray/index.js.map +1 -0
package/dist/nn/index.cjs +142 -0
package/dist/nn/index.cjs.map +1 -0
package/dist/nn/index.d.cts +6 -0
package/dist/nn/index.d.ts +6 -0
package/dist/nn/index.js +5 -0
package/dist/nn/index.js.map +1 -0
package/dist/optim/index.cjs +77 -0
package/dist/optim/index.cjs.map +1 -0
package/dist/optim/index.d.cts +4 -0
package/dist/optim/index.d.ts +4 -0
package/dist/optim/index.js +4 -0
package/dist/optim/index.js.map +1 -0
package/dist/plot/index.cjs +114 -0
package/dist/plot/index.cjs.map +1 -0
package/dist/plot/index.d.cts +6 -0
package/dist/plot/index.d.ts +6 -0
package/dist/plot/index.js +5 -0
package/dist/plot/index.js.map +1 -0
package/dist/preprocess/index.cjs +82 -0
package/dist/preprocess/index.cjs.map +1 -0
package/dist/preprocess/index.d.cts +4 -0
package/dist/preprocess/index.d.ts +4 -0
package/dist/preprocess/index.js +5 -0
package/dist/preprocess/index.js.map +1 -0
package/dist/random/index.cjs +74 -0
package/dist/random/index.cjs.map +1 -0
package/dist/random/index.d.cts +3 -0
package/dist/random/index.d.ts +3 -0
package/dist/random/index.js +5 -0
package/dist/random/index.js.map +1 -0
package/dist/stats/index.cjs +142 -0
package/dist/stats/index.cjs.map +1 -0
package/dist/stats/index.d.cts +3 -0
package/dist/stats/index.d.ts +3 -0
package/dist/stats/index.js +5 -0
package/dist/stats/index.js.map +1 -0
package/dist/tensor-B96jjJLQ.d.cts +205 -0
package/dist/tensor-B96jjJLQ.d.ts +205 -0
package/package.json +226 -0

package/dist/chunk-5R4S63PF.js ADDED Viewed

@@ -0,0 +1,2925 @@
+import { GradTensor, relu, sigmoid, tanh, leakyRelu, elu, gelu, softmax2, softmax, logSoftmax2, logSoftmax, softplus, swish, mish, dropout, randn, mulScalar, parameter, zeros, reshape, dot, transpose, add, ones, variance2, noGrad, isContiguous, im2col2, Tensor, sub, pow, tensor, sum, mean, abs, clip, log, mul, neg, sqrt, computeStrides, offsetFromFlatIndex } from './chunk-BCR7G3A6.js';
+import { __export, InvalidParameterError, DeepboxError, isDevice, IndexError, normalizeAxis, DTypeError, ShapeError, ensureNumericDType, NotFittedError, getBigIntElement, getNumericElement, dtypeToTypedArrayCtor, getElementAsNumber } from './chunk-4S73VUBD.js';
+// src/nn/index.ts
+var nn_exports = {};
+__export(nn_exports, {
+  AvgPool2d: () => AvgPool2d,
+  BatchNorm1d: () => BatchNorm1d,
+  Conv1d: () => Conv1d,
+  Conv2d: () => Conv2d,
+  Dropout: () => Dropout,
+  ELU: () => ELU,
+  GELU: () => GELU,
+  GRU: () => GRU,
+  LSTM: () => LSTM,
+  LayerNorm: () => LayerNorm,
+  LeakyReLU: () => LeakyReLU,
+  Linear: () => Linear,
+  LogSoftmax: () => LogSoftmax,
+  MaxPool2d: () => MaxPool2d,
+  Mish: () => Mish,
+  Module: () => Module,
+  MultiheadAttention: () => MultiheadAttention,
+  RNN: () => RNN,
+  ReLU: () => ReLU,
+  Sequential: () => Sequential,
+  Sigmoid: () => Sigmoid,
+  Softmax: () => Softmax,
+  Softplus: () => Softplus,
+  Swish: () => Swish,
+  Tanh: () => Tanh,
+  TransformerEncoderLayer: () => TransformerEncoderLayer,
+  binaryCrossEntropyLoss: () => binaryCrossEntropyLoss,
+  binaryCrossEntropyWithLogitsLoss: () => binaryCrossEntropyWithLogitsLoss,
+  crossEntropyLoss: () => crossEntropyLoss,
+  huberLoss: () => huberLoss,
+  maeLoss: () => maeLoss,
+  mseLoss: () => mseLoss,
+  rmseLoss: () => rmseLoss
+});
+// src/nn/module/Module.ts
+function shapesEqual(a, b) {
+  if (a.length !== b.length) return false;
+  for (let i = 0; i < a.length; i++) {
+    if ((a[i] ?? 0) !== (b[i] ?? 0)) return false;
+  }
+  return true;
+}
+function sizeFromShape(shape, context) {
+  let size = 1;
+  for (const dim of shape) {
+    if (!Number.isInteger(dim) || dim < 0) {
+      throw new ShapeError(`${context} contains invalid dimension ${String(dim)}`);
+    }
+    size *= dim;
+  }
+  return size;
+}
+function cloneTensorData(t) {
+  const data = t.data;
+  if (Array.isArray(data)) {
+    return data.slice();
+  }
+  if (data instanceof BigInt64Array) {
+    return Array.from(data);
+  }
+  const out = new Array(data.length);
+  for (let i = 0; i < data.length; i++) {
+    const value = data[i];
+    if (value === void 0) {
+      throw new DeepboxError("Internal error: tensor data access out of bounds");
+    }
+    out[i] = value;
+  }
+  return out;
+}
+function validateStateEntryShape(name, kind, entry) {
+  const size = sizeFromShape(entry.shape, `${kind} ${name} shape`);
+  if (entry.data.length !== size) {
+    throw new ShapeError(
+      `${kind} ${name} data length ${entry.data.length} does not match shape size ${size}`
+    );
+  }
+}
+function copyStateEntryIntoTensor(name, kind, target, entry) {
+  if (!shapesEqual(target.shape, entry.shape)) {
+    throw new ShapeError(
+      `${kind} ${name} shape mismatch: expected [${target.shape.join(", ")}], got [${entry.shape.join(", ")}]`
+    );
+  }
+  if (target.dtype !== entry.dtype) {
+    throw new DTypeError(
+      `${kind} ${name} dtype mismatch: expected ${target.dtype}, got ${entry.dtype}`
+    );
+  }
+  const size = sizeFromShape(entry.shape, `${kind} ${name} shape`);
+  const logicalStrides = computeStrides(target.shape);
+  const data = target.data;
+  if (target.dtype === "string") {
+    if (!Array.isArray(data)) {
+      throw new DTypeError(`${kind} ${name} expected string data`);
+    }
+    for (let i = 0; i < size; i++) {
+      const value = entry.data[i];
+      if (typeof value !== "string") {
+        throw new DTypeError(`${kind} ${name} expects string data`);
+      }
+      const offset = offsetFromFlatIndex(i, logicalStrides, target.strides, target.offset);
+      data[offset] = value;
+    }
+    return;
+  }
+  if (data instanceof BigInt64Array) {
+    for (let i = 0; i < size; i++) {
+      const value = entry.data[i];
+      if (typeof value !== "bigint") {
+        throw new DTypeError(`${kind} ${name} expects bigint data`);
+      }
+      const offset = offsetFromFlatIndex(i, logicalStrides, target.strides, target.offset);
+      data[offset] = value;
+    }
+    return;
+  }
+  if (Array.isArray(data)) {
+    throw new DTypeError(`${kind} ${name} expected numeric data`);
+  }
+  for (let i = 0; i < size; i++) {
+    const value = entry.data[i];
+    if (typeof value !== "number") {
+      throw new DTypeError(`${kind} ${name} expects numeric data`);
+    }
+    const offset = offsetFromFlatIndex(i, logicalStrides, target.strides, target.offset);
+    data[offset] = value;
+  }
+}
+var Module = class _Module {
+  /** Child modules registered to this module - stores nested layers/modules */
+  _modules = /* @__PURE__ */ new Map();
+  /** Parameters of this module - trainable tensors (weights, biases) wrapped as GradTensor */
+  _parameters = /* @__PURE__ */ new Map();
+  /** Buffers (non-trainable tensors) of this module - e.g., running stats in BatchNorm */
+  _buffers = /* @__PURE__ */ new Map();
+  /** Training mode flag - affects behavior of layers like Dropout and BatchNorm */
+  _training = true;
+  /** Forward pre-hooks registered on this module */
+  _forwardPreHooks = /* @__PURE__ */ new Map();
+  /** Forward hooks registered on this module */
+  _forwardHooks = /* @__PURE__ */ new Map();
+  /** Incrementing hook id */
+  _nextHookId = 0;
+  /**
+   * Makes the module callable (allows using `module(x)` instead of `module.forward(x)`).
+   *
+   * @param inputs - Input tensors (Tensor or GradTensor)
+   * @returns Output tensor
+   */
+  call(...inputs) {
+    let curInputs = inputs;
+    for (const hook of this._forwardPreHooks.values()) {
+      const result = hook(this, curInputs);
+      if (Array.isArray(result)) {
+        curInputs = result;
+      }
+    }
+    let output = this.forward(...curInputs);
+    for (const hook of this._forwardHooks.values()) {
+      const result = hook(this, curInputs, output);
+      if (result !== void 0) {
+        output = result;
+      }
+    }
+    return output;
+  }
+  /**
+   * Register a child module.
+   *
+   * @param name - Name of the module
+   * @param module - The module to register
+   */
+  registerModule(name, module) {
+    this._modules.set(name, module);
+  }
+  /**
+   * Register a parameter (trainable tensor).
+   *
+   * Parameters must be GradTensor instances with requiresGrad=true for
+   * proper gradient computation during backpropagation.
+   *
+   * @param name - Name of the parameter
+   * @param param - The parameter tensor (must be GradTensor)
+   */
+  registerParameter(name, param) {
+    this._parameters.set(name, param);
+  }
+  /**
+   * Register a buffer (non-trainable tensor).
+   *
+   * Buffers are typically used for running statistics in batch normalization.
+   *
+   * @param name - Name of the buffer
+   * @param buffer - The buffer tensor
+   */
+  registerBuffer(name, buffer) {
+    this._buffers.set(name, buffer);
+  }
+  /**
+   * Get all parameters of this module and its children.
+   *
+   * Returns GradTensor instances that are compatible with optimizers.
+   * This enables direct usage with optimizer constructors:
+   * ```ts
+   * const optimizer = new Adam(model.parameters());
+   * ```
+   *
+   * @param recurse - Whether to include parameters of child modules
+   * @returns Iterator of GradTensor parameters
+   */
+  *parameters(recurse = true) {
+    for (const param of this._parameters.values()) {
+      yield param;
+    }
+    if (recurse) {
+      for (const module of this._modules.values()) {
+        yield* module.parameters(true);
+      }
+    }
+  }
+  /**
+   * Get all named parameters of this module and its children.
+   *
+   * @param prefix - Prefix for parameter names
+   * @param recurse - Whether to include parameters of child modules
+   * @returns Iterator of [name, parameter] pairs
+   */
+  *namedParameters(prefix = "", recurse = true) {
+    for (const [name, param] of this._parameters.entries()) {
+      const fullName = prefix ? `${prefix}.${name}` : name;
+      yield [fullName, param];
+    }
+    if (recurse) {
+      for (const [moduleName, module] of this._modules.entries()) {
+        const fullPrefix = prefix ? `${prefix}.${moduleName}` : moduleName;
+        yield* module.namedParameters(fullPrefix, true);
+      }
+    }
+  }
+  /**
+   * Get all child modules.
+   *
+   * @param recurse - Whether to include nested child modules
+   * @returns Iterator of modules
+   */
+  *modules(recurse = true) {
+    yield this;
+    if (recurse) {
+      for (const module of this._modules.values()) {
+        yield* module.modules(true);
+      }
+    }
+  }
+  /**
+   * Get all named child modules.
+   *
+   * @param prefix - Prefix for module names
+   * @param recurse - Whether to include nested child modules
+   * @returns Iterator of [name, module] pairs
+   */
+  *namedModules(prefix = "", recurse = true) {
+    yield [prefix, this];
+    if (recurse) {
+      for (const [name, module] of this._modules.entries()) {
+        const fullName = prefix ? `${prefix}.${name}` : name;
+        yield* module.namedModules(fullName, true);
+      }
+    }
+  }
+  /**
+   * Set the module in training mode.
+   *
+   * This affects certain layers like Dropout and BatchNorm.
+   *
+   * @param mode - Training mode (true) or evaluation mode (false)
+   * @returns this
+   */
+  train(mode = true) {
+    this._training = mode;
+    for (const module of this._modules.values()) {
+      module.train(mode);
+    }
+    return this;
+  }
+  /**
+   * Set the module in evaluation mode.
+   *
+   * This is equivalent to calling `train(false)`.
+   *
+   * @returns this
+   */
+  eval() {
+    return this.train(false);
+  }
+  /**
+   * Check if the module is in training mode.
+   *
+   * @returns true if in training mode
+   */
+  get training() {
+    return this._training;
+  }
+  /**
+   * Zero out the gradients of all parameters.
+   *
+   * Call this before each training iteration to prevent gradient accumulation
+   * from previous iterations.
+   *
+   * For parameters wrapped in GradTensor, this calls zeroGrad() on each.
+   * For regular Tensors, this is a no-op until they are converted to GradTensor.
+   *
+   * @example
+   * ```ts
+   * model.zeroGrad();
+   * const output = model.forward(input);
+   * // ... compute loss and backward
+   * optimizer.step();
+   * ```
+   */
+  zeroGrad() {
+    for (const param of this.parameters()) {
+      param.zeroGrad();
+    }
+  }
+  /**
+   * Get all buffers of this module and its children.
+   */
+  *buffers(recurse = true) {
+    for (const buffer of this._buffers.values()) {
+      yield buffer;
+    }
+    if (recurse) {
+      for (const module of this._modules.values()) {
+        yield* module.buffers(true);
+      }
+    }
+  }
+  /**
+   * Get all named buffers of this module and its children.
+   */
+  *namedBuffers(prefix = "", recurse = true) {
+    for (const [name, buffer] of this._buffers.entries()) {
+      const fullName = prefix ? `${prefix}.${name}` : name;
+      yield [fullName, buffer];
+    }
+    if (recurse) {
+      for (const [moduleName, module] of this._modules.entries()) {
+        const fullPrefix = prefix ? `${prefix}.${moduleName}` : moduleName;
+        yield* module.namedBuffers(fullPrefix, true);
+      }
+    }
+  }
+  /**
+   * Freeze specific parameters by name (or all if none provided).
+   *
+   * **⚠️ IMPORTANT**: This method creates new GradTensor instances with updated
+   * `requiresGrad` flags. Any external references to the old parameter objects
+   * will become stale. If you're using an optimizer that holds parameter references,
+   * you should recreate the optimizer after freezing/unfreezing parameters.
+   *
+   * @param names - Array of parameter names to freeze (e.g., ['fc1.weight']). If undefined, freezes all parameters.
+   * @param recurse - Whether to include parameters from child modules (default: true)
+   *
+   * @example
+   * ```ts
+   * const model = new MyModel();
+   * // Freeze only the first layer's weights
+   * model.freezeParameters(['fc1.weight']);
+   * // Note: Recreate optimizer after freezing
+   * const optimizer = new Adam(model.parameters());
+   * ```
+   */
+  freezeParameters(names, recurse = true) {
+    this.setRequiresGradForNames(names, false, recurse);
+  }
+  /**
+   * Unfreeze specific parameters by name (or all if none provided).
+   *
+   * **⚠️ IMPORTANT**: This method creates new GradTensor instances with updated
+   * `requiresGrad` flags. Any external references to the old parameter objects
+   * will become stale. If you're using an optimizer that holds parameter references,
+   * you should recreate the optimizer after freezing/unfreezing parameters.
+   *
+   * @param names - Array of parameter names to unfreeze (e.g., ['fc1.weight']). If undefined, unfreezes all parameters.
+   * @param recurse - Whether to include parameters from child modules (default: true)
+   *
+   * @example
+   * ```ts
+   * const model = new MyModel();
+   * model.freezeParameters(); // Freeze all
+   * model.unfreezeParameters(['fc2.weight']); // Unfreeze only fc2 weights
+   * // Note: Recreate optimizer after unfreezing
+   * const optimizer = new Adam(model.parameters());
+   * ```
+   */
+  unfreezeParameters(names, recurse = true) {
+    this.setRequiresGradForNames(names, true, recurse);
+  }
+  setRequiresGradForNames(names, requiresGrad, recurse) {
+    const providedNames = names !== void 0;
+    const targetNames = names ?? Array.from(this.namedParameters("", recurse)).map(([name]) => name);
+    for (const name of targetNames) {
+      const resolved = this.resolveModuleAndName(name);
+      if (!resolved) {
+        if (providedNames) {
+          throw new InvalidParameterError(`Unknown parameter name: ${name}`, "names", name);
+        }
+        continue;
+      }
+      const { module, localName } = resolved;
+      const param = module._parameters.get(localName);
+      if (!param) {
+        if (providedNames) {
+          throw new InvalidParameterError(`Unknown parameter name: ${name}`, "names", name);
+        }
+        continue;
+      }
+      const nextParam = GradTensor.fromTensor(param.tensor, { requiresGrad });
+      module._parameters.set(localName, nextParam);
+      for (const [key, value] of Object.entries(module)) {
+        if (value === param) {
+          Reflect.set(module, key, nextParam);
+        }
+      }
+    }
+  }
+  resolveModuleAndName(fullName) {
+    const parts = fullName.split(".");
+    let module = this;
+    for (let i = 0; i < parts.length - 1; i++) {
+      const part = parts[i] ?? "";
+      const child = module._modules.get(part);
+      if (!child) return null;
+      module = child;
+    }
+    const localName = parts[parts.length - 1] ?? "";
+    return { module, localName };
+  }
+  static setTensorDeviceMetadata(target, device) {
+    if (!Reflect.set(target, "device", device)) {
+      throw new DeepboxError("Failed to update tensor device metadata");
+    }
+  }
+  /**
+   * Get the state dictionary of the module.
+   */
+  stateDict() {
+    const parameters = {};
+    const buffers = {};
+    for (const [name, param] of this.namedParameters()) {
+      const t = param.tensor;
+      const data = cloneTensorData(t);
+      parameters[name] = {
+        data,
+        shape: [...t.shape],
+        dtype: t.dtype
+      };
+    }
+    for (const [name, buffer] of this.namedBuffers()) {
+      const data = cloneTensorData(buffer);
+      buffers[name] = {
+        data,
+        shape: [...buffer.shape],
+        dtype: buffer.dtype
+      };
+    }
+    return { parameters, buffers };
+  }
+  /**
+   * Load state dictionary into the module.
+   */
+  loadStateDict(stateDict) {
+    const parameters = stateDict.parameters ?? {};
+    const buffers = stateDict.buffers ?? {};
+    const namedParams = new Map(this.namedParameters());
+    const namedBuffs = new Map(this.namedBuffers());
+    for (const name of namedParams.keys()) {
+      if (!(name in parameters)) {
+        throw new InvalidParameterError(`missing parameter: ${name}`, "stateDict.parameters", name);
+      }
+    }
+    for (const name of namedBuffs.keys()) {
+      if (!(name in buffers)) {
+        throw new InvalidParameterError(`missing buffer: ${name}`, "stateDict.buffers", name);
+      }
+    }
+    for (const name of Object.keys(parameters)) {
+      if (!namedParams.has(name)) {
+        throw new InvalidParameterError(
+          `unexpected parameter: ${name}`,
+          "stateDict.parameters",
+          name
+        );
+      }
+    }
+    for (const name of Object.keys(buffers)) {
+      if (!namedBuffs.has(name)) {
+        throw new InvalidParameterError(`unexpected buffer: ${name}`, "stateDict.buffers", name);
+      }
+    }
+    for (const [name, entry] of Object.entries(parameters)) {
+      const param = namedParams.get(name);
+      if (!param) continue;
+      validateStateEntryShape(name, "parameter", entry);
+      copyStateEntryIntoTensor(name, "parameter", param.tensor, entry);
+    }
+    for (const [name, entry] of Object.entries(buffers)) {
+      const buffer = namedBuffs.get(name);
+      if (!buffer) continue;
+      validateStateEntryShape(name, "buffer", entry);
+      copyStateEntryIntoTensor(name, "buffer", buffer, entry);
+    }
+  }
+  /**
+   * Move module to a specific device.
+   *
+   * **⚠️ WARNING**: This is a metadata-only operation. It updates the device
+   * property on parameters and buffers but does NOT actually transfer data
+   * between devices. Actual device data transfer requires device-specific
+   * memory management which is not yet implemented.
+   *
+   * This method is provided for API compatibility and future extensibility.
+   * Currently, it only updates the `device` metadata field.
+   *
+   * @param device - Target device identifier (e.g., 'cpu', 'webgpu', 'wasm')
+   * @returns this module for method chaining
+   *
+   * @example
+   * ```ts
+   * const model = new Linear(10, 5);
+   * model.to('webgpu'); // Updates device metadata only
+   * ```
+   */
+  to(device) {
+    if (!isDevice(device)) {
+      throw new InvalidParameterError("device must be one of: cpu, webgpu, wasm", "device", device);
+    }
+    for (const param of this.parameters()) {
+      _Module.setTensorDeviceMetadata(param.tensor, device);
+    }
+    for (const buffer of this.buffers()) {
+      _Module.setTensorDeviceMetadata(buffer, device);
+    }
+    return this;
+  }
+  /**
+   * Apply a function to all modules recursively.
+   */
+  apply(fn) {
+    for (const module of this.modules()) {
+      fn(module);
+    }
+    return this;
+  }
+  /**
+   * Register a forward pre-hook.
+   */
+  registerForwardPreHook(hook) {
+    const hookId = this._nextHookId++;
+    this._forwardPreHooks.set(hookId, hook);
+    return () => {
+      this._forwardPreHooks.delete(hookId);
+    };
+  }
+  /**
+   * Register a forward hook.
+   */
+  registerForwardHook(hook) {
+    const hookId = this._nextHookId++;
+    this._forwardHooks.set(hookId, hook);
+    return () => {
+      this._forwardHooks.delete(hookId);
+    };
+  }
+  /**
+   * Get string representation of the module.
+   *
+   * @returns Hierarchical string representation showing module structure
+   */
+  toString() {
+    const lines = [`${this.constructor.name}(`];
+    for (const [name, module] of this._modules.entries()) {
+      const childLines = module.toString().split("\n");
+      const moduleStr = childLines.map((line, i) => i === 0 ? line : `  ${line}`).join("\n");
+      lines.push(`  (${name}): ${moduleStr}`);
+    }
+    lines.push(")");
+    return lines.join("\n");
+  }
+};
+// src/nn/containers/Sequential.ts
+var Sequential = class extends Module {
+  /** Array of layers in sequential order */
+  layers;
+  /**
+   * Create a new Sequential container.
+   *
+   * @param layers - Variable number of Module instances to stack sequentially
+   * @throws {InvalidParameterError} If no layers are provided
+   * @throws {DeepboxError} If a layer is undefined
+   */
+  constructor(...layers) {
+    super();
+    if (layers.length === 0) {
+      throw new InvalidParameterError(
+        "Sequential requires at least one layer",
+        "layers",
+        layers.length
+      );
+    }
+    this.layers = layers;
+    for (let i = 0; i < layers.length; i++) {
+      const layer = layers[i];
+      if (!layer) {
+        throw new DeepboxError(`Layer at index ${i} is undefined`);
+      }
+      this.registerModule(String(i), layer);
+    }
+  }
+  /**
+   * Forward pass: sequentially apply all layers.
+   *
+   * The output of each layer becomes the input to the next layer.
+   *
+   * @param input - Input tensor (Tensor or GradTensor)
+   * @returns Output tensor after passing through all layers
+   * @throws {InvalidParameterError} If the input count is invalid or a layer returns multiple outputs
+   * @throws {DeepboxError} If a layer is undefined
+   */
+  forward(...inputs) {
+    if (inputs.length !== 1) {
+      throw new InvalidParameterError(
+        "Sequential.forward expects a single input tensor",
+        "inputs",
+        inputs.length
+      );
+    }
+    const input = inputs[0];
+    if (!input) {
+      throw new InvalidParameterError(
+        "Sequential.forward expects a single input tensor",
+        "input",
+        input
+      );
+    }
+    let output = input;
+    for (let i = 0; i < this.layers.length; i++) {
+      const layer = this.layers[i];
+      if (!layer) {
+        throw new DeepboxError(`Layer at index ${i} is undefined`);
+      }
+      const result = layer.call(output);
+      if (Array.isArray(result)) {
+        throw new InvalidParameterError(
+          `Sequential does not support layers that return multiple tensors (layer ${i})`,
+          "layer",
+          i
+        );
+      }
+      output = result;
+    }
+    return output;
+  }
+  /**
+   * Get a layer by index.
+   *
+   * @param index - Zero-based index of the layer
+   * @returns The layer at the specified index
+   * @throws {IndexError} If index is out of bounds
+   * @throws {DeepboxError} If a layer is undefined
+   */
+  getLayer(index) {
+    if (index < 0 || index >= this.layers.length) {
+      throw new IndexError(`Layer index ${index} out of bounds [0, ${this.layers.length})`, {
+        index,
+        validRange: [0, this.layers.length - 1]
+      });
+    }
+    const layer = this.layers[index];
+    if (!layer) {
+      throw new DeepboxError(`Layer at index ${index} is undefined`);
+    }
+    return layer;
+  }
+  /**
+   * Get the number of layers in the sequential container.
+   */
+  get length() {
+    return this.layers.length;
+  }
+  /**
+   * Get string representation showing all layers.
+   *
+   * @returns Multi-line string with each layer on a separate line
+   */
+  toString() {
+    const lines = ["Sequential("];
+    for (let i = 0; i < this.layers.length; i++) {
+      const layer = this.layers[i];
+      if (!layer) continue;
+      const childLines = layer.toString().split("\n");
+      const layerStr = childLines.map((line, idx) => idx === 0 ? line : `  ${line}`).join("\n");
+      lines.push(`  (${i}): ${layerStr}`);
+    }
+    lines.push(")");
+    return lines.join("\n");
+  }
+  /**
+   * Iterate over all layers.
+   *
+   * @returns Iterator of layers
+   */
+  *[Symbol.iterator]() {
+    for (const layer of this.layers) {
+      yield layer;
+    }
+  }
+};
+// src/nn/layers/activations.ts
+var ReLU = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) return input.relu();
+    return relu(input);
+  }
+  toString() {
+    return "ReLU()";
+  }
+};
+var Sigmoid = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) return input.sigmoid();
+    return sigmoid(input);
+  }
+  toString() {
+    return "Sigmoid()";
+  }
+};
+var Tanh = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) return input.tanh();
+    return tanh(input);
+  }
+  toString() {
+    return "Tanh()";
+  }
+};
+var LeakyReLU = class extends Module {
+  alpha;
+  constructor(alpha = 0.01) {
+    super();
+    this.alpha = alpha;
+  }
+  forward(input) {
+    if (input instanceof GradTensor) return input.leakyRelu(this.alpha);
+    return leakyRelu(input, this.alpha);
+  }
+  toString() {
+    return `LeakyReLU(alpha=${this.alpha})`;
+  }
+};
+var ELU = class extends Module {
+  alpha;
+  constructor(alpha = 1) {
+    super();
+    this.alpha = alpha;
+  }
+  forward(input) {
+    if (input instanceof GradTensor) return input.elu(this.alpha);
+    return elu(input, this.alpha);
+  }
+  toString() {
+    return `ELU(alpha=${this.alpha})`;
+  }
+};
+var GELU = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) return input.gelu();
+    return gelu(input);
+  }
+  toString() {
+    return "GELU()";
+  }
+};
+var Softmax = class extends Module {
+  axis;
+  constructor(axis = -1) {
+    super();
+    this.axis = axis;
+  }
+  forward(input) {
+    if (input instanceof GradTensor) {
+      return softmax2(input, normalizeAxis(this.axis, input.tensor.ndim));
+    }
+    return softmax(input, this.axis);
+  }
+  toString() {
+    return `Softmax(axis=${this.axis})`;
+  }
+};
+var LogSoftmax = class extends Module {
+  axis;
+  constructor(axis = -1) {
+    super();
+    this.axis = axis;
+  }
+  forward(input) {
+    if (input instanceof GradTensor) {
+      return logSoftmax2(input, normalizeAxis(this.axis, input.tensor.ndim));
+    }
+    return logSoftmax(input, this.axis);
+  }
+  toString() {
+    return `LogSoftmax(axis=${this.axis})`;
+  }
+};
+var Softplus = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) {
+      return GradTensor.fromTensor(softplus(input.tensor), {
+        requiresGrad: false
+      });
+    }
+    return softplus(input);
+  }
+  toString() {
+    return "Softplus()";
+  }
+};
+var Swish = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) {
+      return GradTensor.fromTensor(swish(input.tensor), {
+        requiresGrad: false
+      });
+    }
+    return swish(input);
+  }
+  toString() {
+    return "Swish()";
+  }
+};
+var Mish = class extends Module {
+  forward(input) {
+    if (input instanceof GradTensor) {
+      return GradTensor.fromTensor(mish(input.tensor), {
+        requiresGrad: false
+      });
+    }
+    return mish(input);
+  }
+  toString() {
+    return "Mish()";
+  }
+};
+// src/nn/layers/dropout.ts
+var Dropout = class extends Module {
+  /** Probability of an element being zeroed (dropout rate) */
+  p;
+  /**
+   * Create a new Dropout layer.
+   *
+   * @param p - Probability of an element being zeroed (0 <= p < 1)
+   * @throws {InvalidParameterError} If p is not in valid range [0, 1)
+   */
+  constructor(p = 0.5) {
+    super();
+    if (!Number.isFinite(p) || p < 0 || p >= 1) {
+      throw new InvalidParameterError(`Dropout probability must be in [0, 1), got ${p}`, "p", p);
+    }
+    this.p = p;
+  }
+  /**
+   * Forward pass: apply dropout during training, identity during evaluation.
+   *
+   * @param input - Input tensor of any shape (Tensor or GradTensor)
+   * @returns Output tensor with same shape as input
+   */
+  forward(input) {
+    const inputTensor = input instanceof GradTensor ? input : GradTensor.fromTensor(input);
+    if (inputTensor.dtype === "string") {
+      throw new DTypeError("Dropout does not support string dtype");
+    }
+    return dropout(inputTensor, this.p, this.training);
+  }
+  /**
+   * Get string representation of the layer.
+   *
+   * @returns String representation with dropout probability
+   */
+  toString() {
+    return `Dropout(p=${this.p})`;
+  }
+  /**
+   * Get the dropout probability.
+   */
+  get dropoutRate() {
+    return this.p;
+  }
+};
+// src/nn/layers/linear.ts
+var Linear = class extends Module {
+  /** Weight matrix of shape (out_features, in_features) */
+  weight;
+  weightParam;
+  /** Bias vector of shape (out_features,) */
+  bias;
+  biasParam;
+  /** Number of input features */
+  inFeatures;
+  /** Number of output features */
+  outFeatures;
+  /** Whether this layer has a bias */
+  useBias;
+  /**
+   * Create a new Linear layer.
+   *
+   * @param inFeatures - Size of each input sample
+   * @param outFeatures - Size of each output sample
+   * @param options - Configuration options
+   * @param options.bias - If true, add learnable bias (default: true)
+   * @param options.dtype - Data type for weights (default: 'float32')
+   * @param options.device - Device to place tensors on (default: 'cpu')
+   */
+  constructor(inFeatures, outFeatures, options = {}) {
+    super();
+    if (inFeatures <= 0 || !Number.isInteger(inFeatures)) {
+      throw new InvalidParameterError(
+        "inFeatures must be a positive integer",
+        "inFeatures",
+        inFeatures
+      );
+    }
+    if (outFeatures <= 0 || !Number.isInteger(outFeatures)) {
+      throw new InvalidParameterError(
+        "outFeatures must be a positive integer",
+        "outFeatures",
+        outFeatures
+      );
+    }
+    this.inFeatures = inFeatures;
+    this.outFeatures = outFeatures;
+    this.useBias = options.bias ?? true;
+    const stdDev = Math.sqrt(2 / inFeatures);
+    const weightTensor = randn([outFeatures, inFeatures], {
+      dtype: options.dtype ?? "float32",
+      device: options.device ?? "cpu"
+    });
+    const scaledWeight = mulScalar(weightTensor, stdDev);
+    this.weightParam = parameter(scaledWeight);
+    this.weight = this.weightParam.tensor;
+    this.registerParameter("weight", this.weightParam);
+    if (this.useBias) {
+      const biasTensor = zeros([outFeatures], {
+        dtype: options.dtype ?? "float32",
+        device: options.device ?? "cpu"
+      });
+      this.biasParam = parameter(biasTensor);
+      this.bias = this.biasParam.tensor;
+      this.registerParameter("bias", this.biasParam);
+    }
+  }
+  forward(input) {
+    const inputTensor = input instanceof GradTensor ? input.tensor : input;
+    if (inputTensor.dtype === "string") {
+      throw new DTypeError("Linear layer does not support string dtype");
+    }
+    if (inputTensor.ndim < 1) {
+      throw new ShapeError(`Linear layer expects at least 1D input; got ndim=${inputTensor.ndim}`);
+    }
+    const inputFeatures = inputTensor.shape[inputTensor.shape.length - 1] ?? 0;
+    if (inputFeatures !== this.inFeatures) {
+      throw new ShapeError(
+        `Linear layer expects ${this.inFeatures} input features; got ${inputFeatures}`
+      );
+    }
+    const isVectorInput = inputTensor.ndim === 1;
+    const batchSize = inputTensor.size / this.inFeatures;
+    const outputShape = isVectorInput ? [this.outFeatures] : [...inputTensor.shape.slice(0, -1), this.outFeatures];
+    if (input instanceof GradTensor) {
+      const input2d2 = input.reshape([batchSize, this.inFeatures]);
+      const output2d2 = input2d2.matmul(this.weightParam.transpose());
+      let output2 = output2d2.reshape(outputShape);
+      if (this.useBias && this.biasParam) {
+        output2 = output2.add(this.biasParam);
+      }
+      return output2;
+    }
+    const input2d = reshape(inputTensor, [batchSize, this.inFeatures]);
+    const output2d = dot(input2d, transpose(this.weight));
+    const output = reshape(output2d, outputShape);
+    if (this.useBias && this.bias) {
+      return add(output, this.bias);
+    }
+    return output;
+  }
+  /**
+   * Get extra representation string for this layer.
+   *
+   * @returns String representation of layer parameters
+   */
+  toString() {
+    const biasStr = this.useBias ? "bias=true" : "bias=false";
+    return `Linear(in_features=${this.inFeatures}, out_features=${this.outFeatures}, ${biasStr})`;
+  }
+  /**
+   * Get the weight matrix.
+   *
+   * @returns Weight tensor of shape (out_features, in_features)
+   */
+  getWeight() {
+    return this.weight;
+  }
+  /**
+   * Get the bias vector.
+   *
+   * @returns Bias tensor of shape (out_features,) or undefined if no bias
+   */
+  getBias() {
+    return this.bias;
+  }
+  /**
+   * Get the number of input features.
+   */
+  get inputSize() {
+    return this.inFeatures;
+  }
+  /**
+   * Get the number of output features.
+   */
+  get outputSize() {
+    return this.outFeatures;
+  }
+};
+// src/nn/layers/normalization.ts
+function toContiguousTensor(t) {
+  if (isContiguous(t.shape, t.strides)) {
+    return t;
+  }
+  if (t.dtype === "string") {
+    throw new DTypeError("Normalization does not support string dtype");
+  }
+  const Ctor = dtypeToTypedArrayCtor(t.dtype);
+  const out = new Ctor(t.size);
+  const logicalStrides = computeStrides(t.shape);
+  const data = t.data;
+  if (Array.isArray(data)) {
+    throw new DTypeError("Normalization does not support string dtype");
+  }
+  if (data instanceof BigInt64Array) {
+    if (!(out instanceof BigInt64Array)) {
+      throw new DTypeError("Expected int64 output buffer for int64 tensor");
+    }
+    for (let i = 0; i < t.size; i++) {
+      const offset = offsetFromFlatIndex(i, logicalStrides, t.strides, t.offset);
+      out[i] = getBigIntElement(data, offset);
+    }
+  } else {
+    if (out instanceof BigInt64Array) {
+      throw new DTypeError("Unexpected int64 output buffer for numeric tensor");
+    }
+    for (let i = 0; i < t.size; i++) {
+      const offset = offsetFromFlatIndex(i, logicalStrides, t.strides, t.offset);
+      out[i] = getNumericElement(data, offset);
+    }
+  }
+  return Tensor.fromTypedArray({
+    data: out,
+    shape: t.shape,
+    dtype: t.dtype,
+    device: t.device
+  });
+}
+var BatchNorm1d = class extends Module {
+  numFeatures;
+  eps;
+  momentum;
+  affine;
+  trackRunningStats;
+  gamma;
+  beta;
+  runningMean;
+  runningVar;
+  constructor(numFeatures, options = {}) {
+    super();
+    if (!Number.isFinite(numFeatures) || numFeatures <= 0 || Math.trunc(numFeatures) !== numFeatures) {
+      throw new InvalidParameterError(
+        "numFeatures must be a positive integer",
+        "numFeatures",
+        numFeatures
+      );
+    }
+    this.numFeatures = numFeatures;
+    this.eps = options.eps ?? 1e-5;
+    if (!Number.isFinite(this.eps) || this.eps <= 0) {
+      throw new InvalidParameterError("eps must be a positive number", "eps", this.eps);
+    }
+    this.momentum = options.momentum ?? 0.1;
+    if (!Number.isFinite(this.momentum) || this.momentum < 0 || this.momentum > 1) {
+      throw new InvalidParameterError(
+        "momentum must be in range [0, 1]",
+        "momentum",
+        this.momentum
+      );
+    }
+    this.affine = options.affine ?? true;
+    this.trackRunningStats = options.trackRunningStats ?? true;
+    if (this.affine) {
+      const gamma = ones([numFeatures]);
+      const beta = zeros([numFeatures]);
+      this.gamma = parameter(gamma);
+      this.beta = parameter(beta);
+      this.registerParameter("weight", this.gamma);
+      this.registerParameter("bias", this.beta);
+    }
+    this.runningMean = GradTensor.fromTensor(zeros([numFeatures]), {
+      requiresGrad: false
+    });
+    this.runningVar = GradTensor.fromTensor(ones([numFeatures]), {
+      requiresGrad: false
+    });
+    if (this.trackRunningStats) {
+      this.registerBuffer("running_mean", this.runningMean.tensor);
+      this.registerBuffer("running_var", this.runningVar.tensor);
+    }
+  }
+  forward(x) {
+    const input = x instanceof GradTensor ? x : GradTensor.fromTensor(x);
+    const inputDtype = input.dtype;
+    if (inputDtype === "string") {
+      throw new DTypeError("BatchNorm1d does not support string dtype");
+    }
+    if (input.ndim !== 2 && input.ndim !== 3) {
+      throw new ShapeError(`BatchNorm1d expects 2D or 3D input; got ndim=${input.ndim}`);
+    }
+    const nFeatures = input.shape[1] ?? 0;
+    if (nFeatures !== this.numFeatures) {
+      throw new ShapeError(`Expected ${this.numFeatures} features, got ${nFeatures}`);
+    }
+    const useBatchStats = this.training || !this.trackRunningStats;
+    let mean2;
+    let varVal;
+    let inputReshaped = input;
+    if (input.ndim === 3) {
+      const batch = input.shape[0] ?? 0;
+      const length = input.shape[2] ?? 0;
+      const flat = batch * length;
+      const numericInputDtype = ensureNumericDType(inputDtype, "BatchNorm1d");
+      inputReshaped = input.transpose([0, 2, 1]).mul(GradTensor.scalar(1, { dtype: numericInputDtype })).reshape([flat, nFeatures]);
+    }
+    if (useBatchStats) {
+      if (inputReshaped.shape[0] === 0) {
+        throw new InvalidParameterError(
+          "BatchNorm requires at least one element",
+          "input",
+          input.shape
+        );
+      }
+      mean2 = inputReshaped.mean(0);
+      varVal = variance2(inputReshaped, 0, 0);
+      if (this.trackRunningStats) {
+        noGrad(() => {
+          const n = inputReshaped.shape[0] ?? 0;
+          const unbiasedVar = n > 1 ? variance2(inputReshaped, 0, 1) : variance2(inputReshaped, 0, 0);
+          const m = this.momentum;
+          const statsDtype = this.runningMean.dtype;
+          if (statsDtype === "string") {
+            throw new DTypeError("BatchNorm running statistics must be numeric");
+          }
+          const oneMinusM = GradTensor.scalar(1 - m, { dtype: statsDtype });
+          const mScalar = GradTensor.scalar(m, { dtype: statsDtype });
+          const newMean = this.runningMean.mul(oneMinusM).add(mean2.mul(mScalar));
+          const newVar = this.runningVar.mul(oneMinusM).add(unbiasedVar.mul(mScalar));
+          this.runningMean = GradTensor.fromTensor(newMean.tensor, {
+            requiresGrad: false
+          });
+          this.runningVar = GradTensor.fromTensor(newVar.tensor, {
+            requiresGrad: false
+          });
+          this.registerBuffer("running_mean", this.runningMean.tensor);
+          this.registerBuffer("running_var", this.runningVar.tensor);
+        });
+      }
+    } else {
+      mean2 = this.runningMean;
+      varVal = this.runningVar;
+    }
+    let meanBroadcast = mean2;
+    let varBroadcast = varVal;
+    if (input.ndim === 3) {
+      meanBroadcast = mean2.reshape([1, nFeatures, 1]);
+      varBroadcast = varVal.reshape([1, nFeatures, 1]);
+    } else {
+      meanBroadcast = mean2.reshape([1, nFeatures]);
+      varBroadcast = varVal.reshape([1, nFeatures]);
+    }
+    const epsTensor = GradTensor.scalar(this.eps, { dtype: inputDtype });
+    const denom = varBroadcast.add(epsTensor).sqrt();
+    let out = input.sub(meanBroadcast).div(denom);
+    if (this.affine && this.gamma && this.beta) {
+      let gammaB = this.gamma;
+      let betaB = this.beta;
+      if (input.ndim === 3) {
+        gammaB = this.gamma.reshape([1, nFeatures, 1]);
+        betaB = this.beta.reshape([1, nFeatures, 1]);
+      } else {
+        gammaB = this.gamma.reshape([1, nFeatures]);
+        betaB = this.beta.reshape([1, nFeatures]);
+      }
+      out = out.mul(gammaB).add(betaB);
+    }
+    return out;
+  }
+  toString() {
+    return `BatchNorm1d(${this.numFeatures}, eps=${this.eps}, momentum=${this.momentum}, affine=${this.affine})`;
+  }
+};
+var LayerNorm = class extends Module {
+  normalizedShape;
+  eps;
+  elementwiseAffine;
+  gamma;
+  beta;
+  constructor(normalizedShape, options = {}) {
+    super();
+    this.normalizedShape = typeof normalizedShape === "number" ? [normalizedShape] : Array.from(normalizedShape);
+    if (this.normalizedShape.length === 0) {
+      throw new InvalidParameterError(
+        "normalizedShape must contain at least one dimension",
+        "normalizedShape",
+        normalizedShape
+      );
+    }
+    for (const dim of this.normalizedShape) {
+      if (!Number.isFinite(dim) || dim <= 0 || Math.trunc(dim) !== dim) {
+        throw new InvalidParameterError(
+          "All dimensions in normalizedShape must be positive integers",
+          "normalizedShape",
+          normalizedShape
+        );
+      }
+    }
+    this.eps = options.eps ?? 1e-5;
+    if (!Number.isFinite(this.eps) || this.eps <= 0) {
+      throw new InvalidParameterError("eps must be a positive number", "eps", this.eps);
+    }
+    this.elementwiseAffine = options.elementwiseAffine ?? true;
+    if (this.elementwiseAffine) {
+      this.gamma = parameter(ones(this.normalizedShape));
+      this.beta = parameter(zeros(this.normalizedShape));
+      this.registerParameter("weight", this.gamma);
+      this.registerParameter("bias", this.beta);
+    }
+  }
+  forward(x) {
+    const input = x instanceof GradTensor ? x : GradTensor.fromTensor(x);
+    const inputDtype = input.dtype;
+    if (inputDtype === "string") {
+      throw new DTypeError("LayerNorm does not support string dtype");
+    }
+    let workingInput = input;
+    if (!isContiguous(input.tensor.shape, input.tensor.strides)) {
+      const contiguous = toContiguousTensor(input.tensor);
+      workingInput = GradTensor.fromTensor(contiguous, {
+        requiresGrad: input.requiresGrad
+      });
+    }
+    const inputShape = workingInput.shape;
+    const normShape = this.normalizedShape;
+    if (normShape.length > inputShape.length) {
+      throw new ShapeError(`Input shape ${inputShape} too small for normalizedShape ${normShape}`);
+    }
+    const suffixStart = inputShape.length - normShape.length;
+    for (let i = 0; i < normShape.length; i++) {
+      if (inputShape[suffixStart + i] !== normShape[i]) {
+        throw new ShapeError(
+          `Input shape ${inputShape} does not end with normalizedShape ${normShape}`
+        );
+      }
+    }
+    const outerDims = inputShape.slice(0, suffixStart);
+    const normSize = normShape.reduce((a, b) => a * b, 1);
+    const flattenedShape = [...outerDims, normSize];
+    const inputReshaped = workingInput.reshape(flattenedShape);
+    const mean2 = inputReshaped.mean(-1, true);
+    const varVal = variance2(inputReshaped, -1, 0);
+    const varReshaped = varVal.reshape(mean2.shape);
+    const epsTensor = GradTensor.scalar(this.eps, { dtype: inputDtype });
+    const denom = varReshaped.add(epsTensor).sqrt();
+    const normalizedReshaped = inputReshaped.sub(mean2).div(denom);
+    let out = normalizedReshaped.reshape(inputShape);
+    if (this.elementwiseAffine && this.gamma && this.beta) {
+      out = out.mul(this.gamma).add(this.beta);
+    }
+    return out;
+  }
+  toString() {
+    return `LayerNorm(${this.normalizedShape}, eps=${this.eps}, elementwise_affine=${this.elementwiseAffine})`;
+  }
+};
+// src/nn/layers/attention.ts
+var MultiheadAttention = class extends Module {
+  /** Embedding dimension */
+  embedDim;
+  /** Number of attention heads */
+  numHeads;
+  /** Dimension of each head */
+  headDim;
+  /** Scaling factor for dot product attention */
+  scale;
+  /** Whether to add bias to projections */
+  useBias;
+  /** Dropout probability applied to attention weights */
+  dropout;
+  /** Query projection weights (embedDim, embedDim) */
+  wQ;
+  bQ;
+  /** Key projection weights (embedDim, embedDim) */
+  wK;
+  bK;
+  /** Value projection weights (embedDim, embedDim) */
+  wV;
+  bV;
+  /** Output projection weights (embedDim, embedDim) */
+  wO;
+  bO;
+  /**
+   * Create a new MultiheadAttention layer.
+   *
+   * @param embedDim - Total dimension of the model (must be divisible by numHeads)
+   * @param numHeads - Number of parallel attention heads
+   * @param options - Configuration options
+   * @param options.bias - Whether to add bias to projections (default: true)
+   * @param options.dropout - Dropout probability applied to attention weights (default: 0.0)
+   */
+  constructor(embedDim, numHeads, options = {}) {
+    super();
+    if (!Number.isInteger(embedDim) || embedDim <= 0) {
+      throw new InvalidParameterError("embedDim must be a positive integer", "embedDim", embedDim);
+    }
+    if (!Number.isInteger(numHeads) || numHeads <= 0) {
+      throw new InvalidParameterError("numHeads must be a positive integer", "numHeads", numHeads);
+    }
+    if (embedDim % numHeads !== 0) {
+      throw new InvalidParameterError(
+        `embedDim (${embedDim}) must be divisible by numHeads (${numHeads})`,
+        "embedDim",
+        embedDim
+      );
+    }
+    const dropout2 = options.dropout ?? 0;
+    if (!Number.isFinite(dropout2) || dropout2 < 0 || dropout2 >= 1) {
+      throw new InvalidParameterError("dropout must be in [0, 1)", "dropout", dropout2);
+    }
+    this.embedDim = embedDim;
+    this.numHeads = numHeads;
+    this.headDim = embedDim / numHeads;
+    this.scale = Math.sqrt(this.headDim);
+    this.useBias = options.bias ?? true;
+    this.dropout = dropout2;
+    const stdDev = Math.sqrt(2 / (embedDim + embedDim));
+    this.wQ = parameter(mulScalar(randn([embedDim, embedDim]), stdDev));
+    this.wK = parameter(mulScalar(randn([embedDim, embedDim]), stdDev));
+    this.wV = parameter(mulScalar(randn([embedDim, embedDim]), stdDev));
+    this.wO = parameter(mulScalar(randn([embedDim, embedDim]), stdDev));
+    this.registerParameter("in_proj_weight_q", this.wQ);
+    this.registerParameter("in_proj_weight_k", this.wK);
+    this.registerParameter("in_proj_weight_v", this.wV);
+    this.registerParameter("out_proj_weight", this.wO);
+    if (this.useBias) {
+      this.bQ = parameter(zeros([embedDim]));
+      this.bK = parameter(zeros([embedDim]));
+      this.bV = parameter(zeros([embedDim]));
+      this.bO = parameter(zeros([embedDim]));
+      this.registerParameter("in_proj_bias_q", this.bQ);
+      this.registerParameter("in_proj_bias_k", this.bK);
+      this.registerParameter("in_proj_bias_v", this.bV);
+      this.registerParameter("out_proj_bias", this.bO);
+    }
+  }
+  /**
+   * Forward pass of multi-head attention.
+   *
+   * @param query - Query tensor of shape (batch, seqLen, embedDim)
+   * @param key - Key tensor of shape (batch, seqLen, embedDim)
+   * @param value - Value tensor of shape (batch, seqLen, embedDim)
+   * @returns Output tensor of same shape as query
+   */
+  forward(...inputs) {
+    if (inputs.length < 1 || inputs.length > 3) {
+      throw new InvalidParameterError(
+        "MultiheadAttention.forward expects 1 to 3 input tensors",
+        "inputs",
+        inputs.length
+      );
+    }
+    const queryInput = inputs[0];
+    if (queryInput === void 0) {
+      throw new InvalidParameterError("Query tensor is required", "query", queryInput);
+    }
+    const query = queryInput instanceof GradTensor ? queryInput : GradTensor.fromTensor(queryInput);
+    const keyInput = inputs[1] ?? queryInput;
+    const key = keyInput instanceof GradTensor ? keyInput : GradTensor.fromTensor(keyInput);
+    const valueInput = inputs[2] ?? queryInput;
+    const value = valueInput instanceof GradTensor ? valueInput : GradTensor.fromTensor(valueInput);
+    if (query.dtype === "string") throw new DTypeError("String tensors are not supported");
+    if (query.ndim !== key.ndim || query.ndim !== value.ndim) {
+      throw new ShapeError("query, key, and value must have same rank");
+    }
+    if (query.ndim !== 2 && query.ndim !== 3) {
+      throw new ShapeError(`Query must be 2D or 3D; got ndim=${query.ndim}`);
+    }
+    if (key.ndim !== 2 && key.ndim !== 3) {
+      throw new ShapeError(`Key must be 2D or 3D; got ndim=${key.ndim}`);
+    }
+    if (value.ndim !== 2 && value.ndim !== 3) {
+      throw new ShapeError(`Value must be 2D or 3D; got ndim=${value.ndim}`);
+    }
+    let q = query;
+    let k = key;
+    let v = value;
+    if (q.ndim === 2) q = q.reshape([1, q.shape[0] ?? 0, q.shape[1] ?? 0]);
+    if (k.ndim === 2) k = k.reshape([1, k.shape[0] ?? 0, k.shape[1] ?? 0]);
+    if (v.ndim === 2) v = v.reshape([1, v.shape[0] ?? 0, v.shape[1] ?? 0]);
+    const batchSize = q.shape[0] ?? 0;
+    const seqLenQ = q.shape[1] ?? 0;
+    const seqLenK = k.shape[1] ?? 0;
+    const seqLenV = v.shape[1] ?? 0;
+    const embedDim = q.shape[2] ?? 0;
+    if (embedDim !== this.embedDim) {
+      throw new ShapeError(`Query embedDim mismatch: expected ${this.embedDim}, got ${embedDim}`);
+    }
+    if (k.shape[2] !== this.embedDim) {
+      throw new ShapeError(`Key embedDim mismatch: expected ${this.embedDim}, got ${k.shape[2]}`);
+    }
+    if (v.shape[2] !== this.embedDim) {
+      throw new ShapeError(`Value embedDim mismatch: expected ${this.embedDim}, got ${v.shape[2]}`);
+    }
+    if (k.shape[0] !== batchSize || v.shape[0] !== batchSize) {
+      throw new ShapeError(
+        `batch size mismatch: query=${batchSize}, key=${k.shape[0]}, value=${v.shape[0]}`
+      );
+    }
+    if (seqLenK !== seqLenV) {
+      throw new ShapeError(`Key/value sequence length mismatch: key=${seqLenK}, value=${seqLenV}`);
+    }
+    let Q = q.matmul(this.wQ.transpose());
+    if (this.bQ) Q = Q.add(this.bQ);
+    let K = k.matmul(this.wK.transpose());
+    if (this.bK) K = K.add(this.bK);
+    let V = v.matmul(this.wV.transpose());
+    if (this.bV) V = V.add(this.bV);
+    const H = this.numHeads;
+    const D = this.headDim;
+    Q = Q.reshape([batchSize, seqLenQ, H, D]).transpose([0, 2, 1, 3]);
+    K = K.reshape([batchSize, seqLenK, H, D]).transpose([0, 2, 1, 3]);
+    V = V.reshape([batchSize, seqLenV, H, D]).transpose([0, 2, 1, 3]);
+    let scores = Q.matmul(K.transpose([0, 1, 3, 2]));
+    scores = scores.div(GradTensor.scalar(this.scale));
+    let attn = softmax2(scores, -1);
+    attn = dropout(attn, this.dropout, this.training);
+    const context = attn.matmul(V);
+    const contextDtype = ensureNumericDType(context.dtype, "MultiheadAttention");
+    const contextReshaped = context.transpose([0, 2, 1, 3]).mul(GradTensor.scalar(1, { dtype: contextDtype })).reshape([batchSize, seqLenQ, this.embedDim]);
+    let output = contextReshaped.matmul(this.wO.transpose());
+    if (this.bO) output = output.add(this.bO);
+    if (query.ndim === 2) {
+      output = output.reshape([seqLenQ, this.embedDim]);
+    }
+    return output;
+  }
+  toString() {
+    return `MultiheadAttention(embed_dim=${this.embedDim}, num_heads=${this.numHeads})`;
+  }
+};
+var TransformerEncoderLayer = class extends Module {
+  dModel;
+  nHead;
+  dFF;
+  selfAttn;
+  linear1;
+  linear2;
+  norm1;
+  norm2;
+  dropout;
+  // We use functional dropout in forward, or could use Dropout module.
+  // Using Dropout module is cleaner.
+  dropout1;
+  dropout2;
+  dropout3;
+  constructor(dModel, nHead, dFF, options = {}) {
+    super();
+    if (!Number.isInteger(dModel) || dModel <= 0) {
+      throw new InvalidParameterError("dModel must be a positive integer", "dModel", dModel);
+    }
+    if (!Number.isInteger(nHead) || nHead <= 0) {
+      throw new InvalidParameterError("nHead must be a positive integer", "nHead", nHead);
+    }
+    if (dModel % nHead !== 0) {
+      throw new InvalidParameterError(
+        `dModel (${dModel}) must be divisible by nHead (${nHead})`,
+        "dModel",
+        dModel
+      );
+    }
+    if (!Number.isInteger(dFF) || dFF <= 0) {
+      throw new InvalidParameterError("dFF must be a positive integer", "dFF", dFF);
+    }
+    const dropout2 = options.dropout ?? 0.1;
+    const eps = options.eps ?? 1e-5;
+    this.dModel = dModel;
+    this.nHead = nHead;
+    this.dFF = dFF;
+    this.dropout = dropout2;
+    this.selfAttn = new MultiheadAttention(dModel, nHead, { dropout: dropout2 });
+    this.linear1 = new Linear(dModel, dFF);
+    this.linear2 = new Linear(dFF, dModel);
+    this.norm1 = new LayerNorm(dModel, { eps });
+    this.norm2 = new LayerNorm(dModel, { eps });
+    this.dropout1 = new Dropout(dropout2);
+    this.dropout2 = new Dropout(dropout2);
+    this.dropout3 = new Dropout(dropout2);
+    this.registerModule("self_attn", this.selfAttn);
+    this.registerModule("linear1", this.linear1);
+    this.registerModule("linear2", this.linear2);
+    this.registerModule("norm1", this.norm1);
+    this.registerModule("norm2", this.norm2);
+    this.registerModule("dropout1", this.dropout1);
+    this.registerModule("dropout2", this.dropout2);
+    this.registerModule("dropout3", this.dropout3);
+  }
+  /**
+   * Forward pass of the Transformer encoder layer.
+   *
+   * @param src - Source sequence of shape (batch, seqLen, dModel)
+   * @returns Output of same shape as input
+   */
+  forward(src) {
+    const input = src instanceof GradTensor ? src : GradTensor.fromTensor(src);
+    if (input.dtype === "string") {
+      throw new DTypeError("TransformerEncoderLayer does not support string dtype");
+    }
+    let src2 = this.selfAttn.forward(input, input, input);
+    src2 = this.dropout1.forward(src2);
+    let out = input.add(src2);
+    out = this.norm1.forward(out);
+    let ffn = this.linear1.forward(out);
+    ffn = ffn.relu();
+    ffn = this.dropout2.forward(ffn);
+    ffn = this.linear2.forward(ffn);
+    ffn = this.dropout3.forward(ffn);
+    out = out.add(ffn);
+    out = this.norm2.forward(out);
+    return out;
+  }
+  toString() {
+    return `TransformerEncoderLayer(d_model=${this.dModel}, nhead=${this.nHead}, dim_feedforward=${this.dFF}, dropout=${this.dropout})`;
+  }
+};
+// src/nn/layers/conv.ts
+function normalizePair(name, value, allowZero, description) {
+  const arr = typeof value === "number" ? [value, value] : value;
+  const first = arr[0];
+  const second = arr[1];
+  if (arr.length !== 2 || first === void 0 || second === void 0 || !Number.isInteger(first) || !Number.isInteger(second) || (allowZero ? first < 0 || second < 0 : first <= 0 || second <= 0)) {
+    throw new InvalidParameterError(`${name} must be ${description}`, name, value);
+  }
+  return [first, second];
+}
+var Conv1d = class extends Module {
+  inChannels;
+  outChannels;
+  kernelSize;
+  stride;
+  padding;
+  bias;
+  weight_;
+  bias_;
+  constructor(inChannels, outChannels, kernelSize, options = {}) {
+    super();
+    if (inChannels <= 0 || !Number.isInteger(inChannels)) {
+      throw new InvalidParameterError(
+        "inChannels must be a positive integer",
+        "inChannels",
+        inChannels
+      );
+    }
+    if (outChannels <= 0 || !Number.isInteger(outChannels)) {
+      throw new InvalidParameterError(
+        "outChannels must be a positive integer",
+        "outChannels",
+        outChannels
+      );
+    }
+    if (kernelSize <= 0 || !Number.isInteger(kernelSize)) {
+      throw new InvalidParameterError(
+        "kernelSize must be a positive integer",
+        "kernelSize",
+        kernelSize
+      );
+    }
+    const stride = options.stride ?? 1;
+    if (stride <= 0 || !Number.isInteger(stride)) {
+      throw new InvalidParameterError("stride must be a positive integer", "stride", stride);
+    }
+    const padding = options.padding ?? 0;
+    if (padding < 0 || !Number.isInteger(padding)) {
+      throw new InvalidParameterError("padding must be a non-negative integer", "padding", padding);
+    }
+    this.inChannels = inChannels;
+    this.outChannels = outChannels;
+    this.kernelSize = kernelSize;
+    this.stride = stride;
+    this.padding = padding;
+    this.bias = options.bias ?? true;
+    this.initializeParameters();
+  }
+  initializeParameters() {
+    const k = 1 / Math.sqrt(this.inChannels * this.kernelSize);
+    const weight = randn([this.outChannels, this.inChannels, this.kernelSize]);
+    this.weight_ = parameter(mulScalar(weight, k));
+    this.registerParameter("weight", this.weight_);
+    if (this.bias) {
+      const biasInit = randn([this.outChannels]);
+      this.bias_ = parameter(mulScalar(biasInit, k));
+      this.registerParameter("bias", this.bias_);
+    }
+  }
+  forward(x) {
+    const input = x instanceof GradTensor ? x : GradTensor.fromTensor(x);
+    if (input.dtype === "string") {
+      throw new DTypeError("String tensors are not supported");
+    }
+    if (input.ndim !== 3) {
+      throw new ShapeError(`Conv1d expects 3D input (batch, channels, length), got ${input.ndim}D`);
+    }
+    const batch = input.shape[0] ?? 0;
+    const inC = input.shape[1] ?? 0;
+    const inL = input.shape[2] ?? 0;
+    if (inC !== this.inChannels) {
+      throw new ShapeError(`Expected ${this.inChannels} input channels, got ${inC}`);
+    }
+    const weight = this.weight_;
+    if (!weight) throw new NotFittedError("Weight not initialized");
+    const input2d = input.reshape([batch, inC, 1, inL]);
+    const kernelSize = [1, this.kernelSize];
+    const stride = [1, this.stride];
+    const padding = [0, this.padding];
+    const cols = im2col2(input2d, kernelSize, stride, padding);
+    const weightFlat = weight.reshape([this.outChannels, this.inChannels * this.kernelSize]);
+    const out = cols.matmul(weightFlat.transpose());
+    const outTransposed = out.transpose([0, 2, 1]);
+    if (this.bias && this.bias_) {
+      const biasReshaped = this.bias_.reshape([1, this.outChannels, 1]);
+      return outTransposed.add(biasReshaped);
+    }
+    return outTransposed;
+  }
+  get weight() {
+    if (!this.weight_) {
+      throw new NotFittedError("Weight not initialized");
+    }
+    return this.weight_;
+  }
+};
+var Conv2d = class extends Module {
+  inChannels;
+  outChannels;
+  kernelSize;
+  stride;
+  padding;
+  useBias;
+  weight_;
+  bias_;
+  constructor(inChannels, outChannels, kernelSize, options = {}) {
+    super();
+    if (inChannels <= 0 || !Number.isInteger(inChannels)) {
+      throw new InvalidParameterError(
+        "inChannels must be a positive integer",
+        "inChannels",
+        inChannels
+      );
+    }
+    if (outChannels <= 0 || !Number.isInteger(outChannels)) {
+      throw new InvalidParameterError(
+        "outChannels must be a positive integer",
+        "outChannels",
+        outChannels
+      );
+    }
+    const kernelArr = normalizePair(
+      "kernelSize",
+      kernelSize,
+      false,
+      "a positive integer or a tuple of two positive integers"
+    );
+    const stride = options.stride ?? 1;
+    const strideArr = normalizePair(
+      "stride",
+      stride,
+      false,
+      "a positive integer or a tuple of two positive integers"
+    );
+    const padding = options.padding ?? 0;
+    const paddingArr = normalizePair(
+      "padding",
+      padding,
+      true,
+      "a non-negative integer or a tuple of two non-negative integers"
+    );
+    this.inChannels = inChannels;
+    this.outChannels = outChannels;
+    this.kernelSize = kernelArr;
+    this.stride = strideArr;
+    this.padding = paddingArr;
+    this.useBias = options.bias ?? true;
+    this.initializeParameters();
+  }
+  initializeParameters() {
+    const kH = this.kernelSize[0] ?? 1;
+    const kW = this.kernelSize[1] ?? 1;
+    const k = 1 / Math.sqrt(this.inChannels * kH * kW);
+    const weight = randn([this.outChannels, this.inChannels, kH, kW]);
+    this.weight_ = parameter(mulScalar(weight, k));
+    this.registerParameter("weight", this.weight_);
+    if (this.useBias) {
+      const biasInit = randn([this.outChannels]);
+      this.bias_ = parameter(mulScalar(biasInit, k));
+      this.registerParameter("bias", this.bias_);
+    }
+  }
+  forward(x) {
+    const input = x instanceof GradTensor ? x : GradTensor.fromTensor(x);
+    if (input.dtype === "string") {
+      throw new DTypeError("String tensors are not supported");
+    }
+    if (input.ndim !== 4) {
+      throw new ShapeError(
+        `Conv2d expects 4D input (batch, channels, height, width), got ${input.ndim}D`
+      );
+    }
+    const batch = input.shape[0] ?? 0;
+    const inC = input.shape[1] ?? 0;
+    const inH = input.shape[2] ?? 0;
+    const inW = input.shape[3] ?? 0;
+    if (inC !== this.inChannels) {
+      throw new ShapeError(`Expected ${this.inChannels} input channels, got ${inC}`);
+    }
+    const weight = this.weight_;
+    if (!weight) throw new NotFittedError("Weight not initialized");
+    const [kH, kW] = this.kernelSize;
+    const [sH, sW] = this.stride;
+    const [pH, pW] = this.padding;
+    const cols = im2col2(input, [kH, kW], [sH, sW], [pH, pW]);
+    const outH = Math.floor((inH + 2 * pH - kH) / sH) + 1;
+    const outW = Math.floor((inW + 2 * pW - kW) / sW) + 1;
+    const weightFlat = weight.reshape([this.outChannels, this.inChannels * kH * kW]);
+    const out = cols.matmul(weightFlat.transpose());
+    const outTransposed = out.transpose([0, 2, 1]);
+    const outReshaped = outTransposed.reshape([batch, this.outChannels, outH, outW]);
+    if (this.useBias && this.bias_) {
+      const biasReshaped = this.bias_.reshape([1, this.outChannels, 1, 1]);
+      return outReshaped.add(biasReshaped);
+    }
+    return outReshaped;
+  }
+  get weight() {
+    if (!this.weight_) {
+      throw new NotFittedError("Weight not initialized");
+    }
+    return this.weight_;
+  }
+};
+var MaxPool2d = class extends Module {
+  kernelSizeValue;
+  stride;
+  padding;
+  constructor(kernelSize, options = {}) {
+    super();
+    const kernelArr = normalizePair(
+      "kernelSize",
+      kernelSize,
+      false,
+      "a positive integer or a tuple of two positive integers"
+    );
+    this.kernelSizeValue = kernelArr;
+    const strideArr = normalizePair(
+      "stride",
+      options.stride ?? kernelSize,
+      false,
+      "a positive integer or a tuple of two positive integers"
+    );
+    this.stride = strideArr;
+    const paddingArr = normalizePair(
+      "padding",
+      options.padding ?? 0,
+      true,
+      "a non-negative integer or a tuple of two non-negative integers"
+    );
+    this.padding = paddingArr;
+  }
+  forward(x) {
+    const input = x instanceof GradTensor ? x : GradTensor.fromTensor(x);
+    if (input.dtype === "string") {
+      throw new DTypeError("String tensors are not supported");
+    }
+    if (input.ndim !== 4) {
+      throw new ShapeError(
+        `MaxPool2d expects 4D input (batch, channels, height, width), got ${input.ndim}D`
+      );
+    }
+    const batch = input.shape[0] ?? 0;
+    const channels = input.shape[1] ?? 0;
+    const inH = input.shape[2] ?? 0;
+    const inW = input.shape[3] ?? 0;
+    const [kH, kW] = this.kernelSizeValue;
+    const [sH, sW] = this.stride;
+    const [pH, pW] = this.padding;
+    const inputReshaped = input.reshape([batch * channels, 1, inH, inW]);
+    const cols = im2col2(inputReshaped, [kH, kW], [sH, sW], [pH, pW]);
+    const maxVals = cols.max(2);
+    const outH = Math.floor((inH + 2 * pH - kH) / sH) + 1;
+    const outW = Math.floor((inW + 2 * pW - kW) / sW) + 1;
+    return maxVals.reshape([batch, channels, outH, outW]);
+  }
+};
+var AvgPool2d = class extends Module {
+  kernelSizeValue;
+  stride;
+  padding;
+  constructor(kernelSize, options = {}) {
+    super();
+    const kernelArr = normalizePair(
+      "kernelSize",
+      kernelSize,
+      false,
+      "a positive integer or a tuple of two positive integers"
+    );
+    this.kernelSizeValue = kernelArr;
+    const strideArr = normalizePair(
+      "stride",
+      options.stride ?? kernelSize,
+      false,
+      "a positive integer or a tuple of two positive integers"
+    );
+    this.stride = strideArr;
+    const paddingArr = normalizePair(
+      "padding",
+      options.padding ?? 0,
+      true,
+      "a non-negative integer or a tuple of two non-negative integers"
+    );
+    this.padding = paddingArr;
+  }
+  forward(x) {
+    const input = x instanceof GradTensor ? x : GradTensor.fromTensor(x);
+    if (input.dtype === "string") {
+      throw new DTypeError("String tensors are not supported");
+    }
+    if (input.ndim !== 4) {
+      throw new ShapeError(
+        `AvgPool2d expects 4D input (batch, channels, height, width), got ${input.ndim}D`
+      );
+    }
+    const batch = input.shape[0] ?? 0;
+    const channels = input.shape[1] ?? 0;
+    const inH = input.shape[2] ?? 0;
+    const inW = input.shape[3] ?? 0;
+    const [kH, kW] = this.kernelSizeValue;
+    const [sH, sW] = this.stride;
+    const [pH, pW] = this.padding;
+    const inputReshaped = input.reshape([batch * channels, 1, inH, inW]);
+    const cols = im2col2(inputReshaped, [kH, kW], [sH, sW], [pH, pW]);
+    const meanVals = cols.mean(2);
+    const outH = Math.floor((inH + 2 * pH - kH) / sH) + 1;
+    const outW = Math.floor((inW + 2 * pW - kW) / sW) + 1;
+    return meanVals.reshape([batch, channels, outH, outW]);
+  }
+};
+// src/nn/layers/recurrent.ts
+function ensureFloatTensor(t, context) {
+  if (t.dtype === "string") {
+    throw new DTypeError(`${context} does not support string dtype`);
+  }
+  if (t.dtype !== "float32" && t.dtype !== "float64") {
+    throw new DTypeError(`${context} expects float32 or float64 dtype`);
+  }
+}
+function readNumeric(t, offset) {
+  const data = t.data;
+  if (Array.isArray(data)) {
+    throw new DTypeError("String tensors are not supported");
+  }
+  return getElementAsNumber(data, offset);
+}
+function createFloatBuffer(size, dtype) {
+  return dtype === "float64" ? new Float64Array(size) : new Float32Array(size);
+}
+function validatePositiveInt(name, value) {
+  if (!Number.isInteger(value) || value <= 0) {
+    throw new InvalidParameterError(`${name} must be a positive integer`, name, value);
+  }
+}
+function parseInput(input, batchFirst) {
+  if (input.ndim === 2) {
+    const seqLen = input.shape[0] ?? 0;
+    const inputDim = input.shape[1] ?? 0;
+    return {
+      batch: 1,
+      seqLen,
+      inputDim,
+      isUnbatched: true,
+      batchStride: 0,
+      seqStride: input.strides[0] ?? 0,
+      featStride: input.strides[1] ?? 0
+    };
+  }
+  if (input.ndim !== 3) {
+    throw new ShapeError(`Recurrent layers expect 2D or 3D input; got ndim=${input.ndim}`);
+  }
+  if (batchFirst) {
+    return {
+      batch: input.shape[0] ?? 0,
+      seqLen: input.shape[1] ?? 0,
+      inputDim: input.shape[2] ?? 0,
+      isUnbatched: false,
+      batchStride: input.strides[0] ?? 0,
+      seqStride: input.strides[1] ?? 0,
+      featStride: input.strides[2] ?? 0
+    };
+  }
+  return {
+    batch: input.shape[1] ?? 0,
+    seqLen: input.shape[0] ?? 0,
+    inputDim: input.shape[2] ?? 0,
+    isUnbatched: false,
+    batchStride: input.strides[1] ?? 0,
+    seqStride: input.strides[0] ?? 0,
+    featStride: input.strides[2] ?? 0
+  };
+}
+function outputIndex(batchFirst, isUnbatched, batch, seqLen, hiddenSize, b, t, j) {
+  if (isUnbatched) {
+    return t * hiddenSize + j;
+  }
+  if (batchFirst) {
+    return b * (seqLen * hiddenSize) + t * hiddenSize + j;
+  }
+  return t * (batch * hiddenSize) + b * hiddenSize + j;
+}
+function extractTensor(arg, _name) {
+  if (arg instanceof GradTensor) {
+    return arg.tensor;
+  }
+  return arg;
+}
+function buildState(state, numLayers, batch, hiddenSize, isUnbatched, name) {
+  const result = new Array(numLayers);
+  for (let l = 0; l < numLayers; l++) {
+    result[l] = new Float64Array(batch * hiddenSize);
+  }
+  if (!state) {
+    return result;
+  }
+  ensureFloatTensor(state, name);
+  if (state.ndim === 2) {
+    if (!isUnbatched) {
+      throw new ShapeError(`Expected ${name} with 3 dimensions for batched input`);
+    }
+    if ((state.shape[0] ?? 0) !== numLayers || (state.shape[1] ?? 0) !== hiddenSize) {
+      throw new ShapeError(
+        `Expected ${name} shape [${numLayers}, ${hiddenSize}], got [${state.shape.join(", ")}]`
+      );
+    }
+    const stride02 = state.strides[0] ?? 0;
+    const stride12 = state.strides[1] ?? 0;
+    for (let l = 0; l < numLayers; l++) {
+      const layerState = result[l];
+      if (!layerState) {
+        throw new ShapeError(`Internal error: missing ${name} layer state`);
+      }
+      const base = state.offset + l * stride02;
+      for (let j = 0; j < hiddenSize; j++) {
+        layerState[j] = readNumeric(state, base + j * stride12);
+      }
+    }
+    return result;
+  }
+  if (state.ndim !== 3) {
+    throw new ShapeError(`Expected ${name} with 2 or 3 dimensions; got ndim=${state.ndim}`);
+  }
+  const expectedBatch = isUnbatched ? 1 : batch;
+  if ((state.shape[0] ?? 0) !== numLayers || (state.shape[1] ?? 0) !== expectedBatch || (state.shape[2] ?? 0) !== hiddenSize) {
+    const expected = isUnbatched ? [numLayers, 1, hiddenSize] : [numLayers, batch, hiddenSize];
+    throw new ShapeError(
+      `Expected ${name} shape [${expected.join(", ")}], got [${state.shape.join(", ")}]`
+    );
+  }
+  const stride0 = state.strides[0] ?? 0;
+  const stride1 = state.strides[1] ?? 0;
+  const stride2 = state.strides[2] ?? 0;
+  for (let l = 0; l < numLayers; l++) {
+    const layerState = result[l];
+    if (!layerState) {
+      throw new ShapeError(`Internal error: missing ${name} layer state`);
+    }
+    const baseLayer = state.offset + l * stride0;
+    for (let b = 0; b < batch; b++) {
+      const baseBatch = baseLayer + b * stride1;
+      for (let j = 0; j < hiddenSize; j++) {
+        layerState[b * hiddenSize + j] = readNumeric(state, baseBatch + j * stride2);
+      }
+    }
+  }
+  return result;
+}
+function packState(state, numLayers, batch, hiddenSize, dtype, device, isUnbatched) {
+  const size = isUnbatched ? numLayers * hiddenSize : numLayers * batch * hiddenSize;
+  const data = createFloatBuffer(size, dtype);
+  if (isUnbatched) {
+    for (let l = 0; l < numLayers; l++) {
+      const layer = state[l];
+      if (!layer) {
+        throw new ShapeError("Internal error: missing packed state layer");
+      }
+      for (let j = 0; j < hiddenSize; j++) {
+        data[l * hiddenSize + j] = layer[j] ?? 0;
+      }
+    }
+    return Tensor.fromTypedArray({
+      data,
+      shape: [numLayers, hiddenSize],
+      dtype,
+      device
+    });
+  }
+  for (let l = 0; l < numLayers; l++) {
+    const layer = state[l];
+    if (!layer) {
+      throw new ShapeError("Internal error: missing packed state layer");
+    }
+    const layerOffset = l * batch * hiddenSize;
+    for (let b = 0; b < batch; b++) {
+      const batchOffset = layerOffset + b * hiddenSize;
+      for (let j = 0; j < hiddenSize; j++) {
+        data[batchOffset + j] = layer[b * hiddenSize + j] ?? 0;
+      }
+    }
+  }
+  return Tensor.fromTypedArray({
+    data,
+    shape: [numLayers, batch, hiddenSize],
+    dtype,
+    device
+  });
+}
+var RNN = class extends Module {
+  inputSize;
+  hiddenSize;
+  numLayers;
+  nonlinearity;
+  bias;
+  batchFirst;
+  weightsIh;
+  weightsHh;
+  biasIh;
+  biasHh;
+  constructor(inputSize, hiddenSize, options = {}) {
+    super();
+    validatePositiveInt("inputSize", inputSize);
+    validatePositiveInt("hiddenSize", hiddenSize);
+    const numLayers = options.numLayers ?? 1;
+    validatePositiveInt("numLayers", numLayers);
+    this.inputSize = inputSize;
+    this.hiddenSize = hiddenSize;
+    this.numLayers = numLayers;
+    this.nonlinearity = options.nonlinearity ?? "tanh";
+    this.bias = options.bias ?? true;
+    this.batchFirst = options.batchFirst ?? true;
+    const stdv = 1 / Math.sqrt(hiddenSize);
+    this.weightsIh = [];
+    this.weightsHh = [];
+    this.biasIh = [];
+    this.biasHh = [];
+    for (let layer = 0; layer < this.numLayers; layer++) {
+      const inputDim = layer === 0 ? inputSize : hiddenSize;
+      const wIh = mulScalar(randn([hiddenSize, inputDim]), stdv);
+      const wHh = mulScalar(randn([hiddenSize, hiddenSize]), stdv);
+      this.weightsIh.push(wIh);
+      this.weightsHh.push(wHh);
+      this.registerParameter(`weight_ih_l${layer}`, parameter(wIh));
+      this.registerParameter(`weight_hh_l${layer}`, parameter(wHh));
+      if (this.bias) {
+        const bIh = zeros([hiddenSize]);
+        const bHh = zeros([hiddenSize]);
+        this.biasIh.push(bIh);
+        this.biasHh.push(bHh);
+        this.registerParameter(`bias_ih_l${layer}`, parameter(bIh));
+        this.registerParameter(`bias_hh_l${layer}`, parameter(bHh));
+      }
+    }
+  }
+  activation(x) {
+    return this.nonlinearity === "tanh" ? Math.tanh(x) : Math.max(0, x);
+  }
+  run(input, hx) {
+    ensureFloatTensor(input, "RNN");
+    const parsed = parseInput(input, this.batchFirst);
+    const { batch, seqLen, inputDim, isUnbatched, batchStride, seqStride, featStride } = parsed;
+    if (inputDim !== this.inputSize) {
+      throw new ShapeError(`Expected input size ${this.inputSize}, got ${inputDim}`);
+    }
+    if (seqLen <= 0) {
+      throw new InvalidParameterError("Sequence length must be positive", "seqLen", seqLen);
+    }
+    if (!isUnbatched && batch <= 0) {
+      throw new InvalidParameterError("Batch size must be positive", "batch", batch);
+    }
+    const h = buildState(hx, this.numLayers, batch, this.hiddenSize, isUnbatched, "hx");
+    const outSize = (isUnbatched ? seqLen : batch * seqLen) * this.hiddenSize;
+    const out = createFloatBuffer(outSize, input.dtype);
+    const inputVec = new Float64Array(inputDim);
+    for (let t = 0; t < seqLen; t++) {
+      for (let b = 0; b < batch; b++) {
+        const baseOffset = input.offset + b * batchStride + t * seqStride;
+        for (let i = 0; i < inputDim; i++) {
+          inputVec[i] = readNumeric(input, baseOffset + i * featStride);
+        }
+        let layerInput = inputVec;
+        for (let l = 0; l < this.numLayers; l++) {
+          const wIh = this.weightsIh[l];
+          const wHh = this.weightsHh[l];
+          if (!wIh || !wHh) {
+            throw new ShapeError("Internal error: missing RNN weights");
+          }
+          const curInputSize = l === 0 ? this.inputSize : this.hiddenSize;
+          const newH = new Float64Array(this.hiddenSize);
+          const hLayer = h[l];
+          if (!hLayer) {
+            throw new ShapeError("Internal error: missing RNN hidden state");
+          }
+          const wIhStride0 = wIh.strides[0] ?? 0;
+          const wIhStride1 = wIh.strides[1] ?? 0;
+          const wHhStride0 = wHh.strides[0] ?? 0;
+          const wHhStride1 = wHh.strides[1] ?? 0;
+          const biasIh = this.biasIh[l];
+          const biasHh = this.biasHh[l];
+          const biasIhStride = biasIh ? biasIh.strides[0] ?? 0 : 0;
+          const biasHhStride = biasHh ? biasHh.strides[0] ?? 0 : 0;
+          for (let j = 0; j < this.hiddenSize; j++) {
+            let sum2 = 0;
+            const wIhBase = wIh.offset + j * wIhStride0;
+            for (let k = 0; k < curInputSize; k++) {
+              sum2 += (layerInput[k] ?? 0) * readNumeric(wIh, wIhBase + k * wIhStride1);
+            }
+            const wHhBase = wHh.offset + j * wHhStride0;
+            for (let k = 0; k < this.hiddenSize; k++) {
+              sum2 += (hLayer[b * this.hiddenSize + k] ?? 0) * readNumeric(wHh, wHhBase + k * wHhStride1);
+            }
+            if (this.bias && biasIh && biasHh) {
+              sum2 += readNumeric(biasIh, biasIh.offset + j * biasIhStride);
+              sum2 += readNumeric(biasHh, biasHh.offset + j * biasHhStride);
+            }
+            newH[j] = this.activation(sum2);
+          }
+          for (let j = 0; j < this.hiddenSize; j++) {
+            hLayer[b * this.hiddenSize + j] = newH[j] ?? 0;
+          }
+          layerInput = newH;
+        }
+        for (let j = 0; j < this.hiddenSize; j++) {
+          const idx = outputIndex(
+            this.batchFirst,
+            isUnbatched,
+            batch,
+            seqLen,
+            this.hiddenSize,
+            b,
+            t,
+            j
+          );
+          out[idx] = layerInput[j] ?? 0;
+        }
+      }
+    }
+    const outShape = isUnbatched ? [seqLen, this.hiddenSize] : this.batchFirst ? [batch, seqLen, this.hiddenSize] : [seqLen, batch, this.hiddenSize];
+    return {
+      output: Tensor.fromTypedArray({
+        data: out,
+        shape: outShape,
+        dtype: input.dtype,
+        device: input.device
+      }),
+      h: packState(
+        h,
+        this.numLayers,
+        batch,
+        this.hiddenSize,
+        input.dtype,
+        input.device,
+        isUnbatched
+      )
+    };
+  }
+  forward(...inputs) {
+    if (inputs.length < 1 || inputs.length > 2) {
+      throw new InvalidParameterError("RNN.forward expects 1 or 2 inputs", "inputs", inputs.length);
+    }
+    const inputArg = inputs[0];
+    if (inputArg === void 0) {
+      throw new InvalidParameterError("RNN.forward requires an input tensor", "input", inputArg);
+    }
+    const input = extractTensor(inputArg);
+    const hxArg = inputs.length === 2 ? inputs[1] : void 0;
+    const hx = hxArg === void 0 ? void 0 : extractTensor(hxArg);
+    return this.run(input, hx).output;
+  }
+  /**
+   * Forward pass returning both output and hidden state.
+   * Use this method when you need the hidden state.
+   */
+  forwardWithState(input, hx) {
+    const inputTensor = extractTensor(input);
+    const hxTensor = hx === void 0 ? void 0 : extractTensor(hx);
+    const { output, h } = this.run(inputTensor, hxTensor);
+    return [output, h];
+  }
+  toString() {
+    return `RNN(${this.inputSize}, ${this.hiddenSize}, num_layers=${this.numLayers})`;
+  }
+};
+var LSTM = class extends Module {
+  inputSize;
+  hiddenSize;
+  numLayers;
+  bias;
+  batchFirst;
+  weightsIh;
+  weightsHh;
+  biasIh;
+  biasHh;
+  constructor(inputSize, hiddenSize, options = {}) {
+    super();
+    validatePositiveInt("inputSize", inputSize);
+    validatePositiveInt("hiddenSize", hiddenSize);
+    const numLayers = options.numLayers ?? 1;
+    validatePositiveInt("numLayers", numLayers);
+    this.inputSize = inputSize;
+    this.hiddenSize = hiddenSize;
+    this.numLayers = numLayers;
+    this.bias = options.bias ?? true;
+    this.batchFirst = options.batchFirst ?? true;
+    const stdv = 1 / Math.sqrt(hiddenSize);
+    this.weightsIh = [];
+    this.weightsHh = [];
+    this.biasIh = [];
+    this.biasHh = [];
+    for (let layer = 0; layer < this.numLayers; layer++) {
+      const inputDim = layer === 0 ? inputSize : hiddenSize;
+      const wIh = mulScalar(randn([4 * hiddenSize, inputDim]), stdv);
+      const wHh = mulScalar(randn([4 * hiddenSize, hiddenSize]), stdv);
+      this.weightsIh.push(wIh);
+      this.weightsHh.push(wHh);
+      this.registerParameter(`weight_ih_l${layer}`, parameter(wIh));
+      this.registerParameter(`weight_hh_l${layer}`, parameter(wHh));
+      if (this.bias) {
+        const bIh = zeros([4 * hiddenSize]);
+        const bHh = zeros([4 * hiddenSize]);
+        this.biasIh.push(bIh);
+        this.biasHh.push(bHh);
+        this.registerParameter(`bias_ih_l${layer}`, parameter(bIh));
+        this.registerParameter(`bias_hh_l${layer}`, parameter(bHh));
+      }
+    }
+  }
+  sigmoid(x) {
+    return 1 / (1 + Math.exp(-x));
+  }
+  run(input, hx, cx) {
+    ensureFloatTensor(input, "LSTM");
+    const parsed = parseInput(input, this.batchFirst);
+    const { batch, seqLen, inputDim, isUnbatched, batchStride, seqStride, featStride } = parsed;
+    if (inputDim !== this.inputSize) {
+      throw new ShapeError(`Expected input size ${this.inputSize}, got ${inputDim}`);
+    }
+    if (seqLen <= 0) {
+      throw new InvalidParameterError("Sequence length must be positive", "seqLen", seqLen);
+    }
+    if (!isUnbatched && batch <= 0) {
+      throw new InvalidParameterError("Batch size must be positive", "batch", batch);
+    }
+    const h = buildState(hx, this.numLayers, batch, this.hiddenSize, isUnbatched, "hx");
+    const c = buildState(cx, this.numLayers, batch, this.hiddenSize, isUnbatched, "cx");
+    const outSize = (isUnbatched ? seqLen : batch * seqLen) * this.hiddenSize;
+    const out = createFloatBuffer(outSize, input.dtype);
+    const inputVec = new Float64Array(inputDim);
+    const gates = new Float64Array(4 * this.hiddenSize);
+    for (let t = 0; t < seqLen; t++) {
+      for (let b = 0; b < batch; b++) {
+        const baseOffset = input.offset + b * batchStride + t * seqStride;
+        for (let i = 0; i < inputDim; i++) {
+          inputVec[i] = readNumeric(input, baseOffset + i * featStride);
+        }
+        let layerInput = inputVec;
+        for (let l = 0; l < this.numLayers; l++) {
+          const wIh = this.weightsIh[l];
+          const wHh = this.weightsHh[l];
+          if (!wIh || !wHh) {
+            throw new ShapeError("Internal error: missing LSTM weights");
+          }
+          const curInputSize = l === 0 ? this.inputSize : this.hiddenSize;
+          const hLayer = h[l];
+          const cLayer = c[l];
+          if (!hLayer || !cLayer) {
+            throw new ShapeError("Internal error: missing LSTM state");
+          }
+          const wIhStride0 = wIh.strides[0] ?? 0;
+          const wIhStride1 = wIh.strides[1] ?? 0;
+          const wHhStride0 = wHh.strides[0] ?? 0;
+          const wHhStride1 = wHh.strides[1] ?? 0;
+          const biasIh = this.biasIh[l];
+          const biasHh = this.biasHh[l];
+          const biasIhStride = biasIh ? biasIh.strides[0] ?? 0 : 0;
+          const biasHhStride = biasHh ? biasHh.strides[0] ?? 0 : 0;
+          for (let g = 0; g < 4 * this.hiddenSize; g++) {
+            let sum2 = 0;
+            const wIhBase = wIh.offset + g * wIhStride0;
+            for (let k = 0; k < curInputSize; k++) {
+              sum2 += (layerInput[k] ?? 0) * readNumeric(wIh, wIhBase + k * wIhStride1);
+            }
+            const wHhBase = wHh.offset + g * wHhStride0;
+            for (let k = 0; k < this.hiddenSize; k++) {
+              sum2 += (hLayer[b * this.hiddenSize + k] ?? 0) * readNumeric(wHh, wHhBase + k * wHhStride1);
+            }
+            if (this.bias && biasIh && biasHh) {
+              sum2 += readNumeric(biasIh, biasIh.offset + g * biasIhStride);
+              sum2 += readNumeric(biasHh, biasHh.offset + g * biasHhStride);
+            }
+            gates[g] = sum2;
+          }
+          const newH = new Float64Array(this.hiddenSize);
+          const newC = new Float64Array(this.hiddenSize);
+          for (let j = 0; j < this.hiddenSize; j++) {
+            const iGate = this.sigmoid(gates[j] ?? 0);
+            const fGate = this.sigmoid(gates[this.hiddenSize + j] ?? 0);
+            const gGate = Math.tanh(gates[2 * this.hiddenSize + j] ?? 0);
+            const oGate = this.sigmoid(gates[3 * this.hiddenSize + j] ?? 0);
+            const prevC = cLayer[b * this.hiddenSize + j] ?? 0;
+            const nextC = fGate * prevC + iGate * gGate;
+            const nextH = oGate * Math.tanh(nextC);
+            newC[j] = nextC;
+            newH[j] = nextH;
+          }
+          for (let j = 0; j < this.hiddenSize; j++) {
+            hLayer[b * this.hiddenSize + j] = newH[j] ?? 0;
+            cLayer[b * this.hiddenSize + j] = newC[j] ?? 0;
+          }
+          layerInput = newH;
+        }
+        for (let j = 0; j < this.hiddenSize; j++) {
+          const idx = outputIndex(
+            this.batchFirst,
+            isUnbatched,
+            batch,
+            seqLen,
+            this.hiddenSize,
+            b,
+            t,
+            j
+          );
+          out[idx] = layerInput[j] ?? 0;
+        }
+      }
+    }
+    const outShape = isUnbatched ? [seqLen, this.hiddenSize] : this.batchFirst ? [batch, seqLen, this.hiddenSize] : [seqLen, batch, this.hiddenSize];
+    return {
+      output: Tensor.fromTypedArray({
+        data: out,
+        shape: outShape,
+        dtype: input.dtype,
+        device: input.device
+      }),
+      h: packState(
+        h,
+        this.numLayers,
+        batch,
+        this.hiddenSize,
+        input.dtype,
+        input.device,
+        isUnbatched
+      ),
+      c: packState(
+        c,
+        this.numLayers,
+        batch,
+        this.hiddenSize,
+        input.dtype,
+        input.device,
+        isUnbatched
+      )
+    };
+  }
+  forward(...inputs) {
+    if (inputs.length < 1 || inputs.length > 3) {
+      throw new InvalidParameterError(
+        "LSTM.forward expects 1 to 3 inputs",
+        "inputs",
+        inputs.length
+      );
+    }
+    const inputArg = inputs[0];
+    if (inputArg === void 0) {
+      throw new InvalidParameterError("LSTM.forward requires an input tensor", "input", inputArg);
+    }
+    const input = extractTensor(inputArg);
+    const hxArg = inputs.length >= 2 ? inputs[1] : void 0;
+    const cxArg = inputs.length >= 3 ? inputs[2] : void 0;
+    const hx = hxArg === void 0 ? void 0 : extractTensor(hxArg);
+    const cx = cxArg === void 0 ? void 0 : extractTensor(cxArg);
+    return this.run(input, hx, cx).output;
+  }
+  /**
+   * Forward pass returning output, hidden state, and cell state.
+   * Use this method when you need the hidden/cell states.
+   */
+  forwardWithState(input, hx, cx) {
+    const inputTensor = extractTensor(input);
+    const hxTensor = hx === void 0 ? void 0 : extractTensor(hx);
+    const cxTensor = cx === void 0 ? void 0 : extractTensor(cx);
+    const { output, h, c } = this.run(inputTensor, hxTensor, cxTensor);
+    return [output, [h, c]];
+  }
+  toString() {
+    return `LSTM(${this.inputSize}, ${this.hiddenSize}, num_layers=${this.numLayers})`;
+  }
+};
+var GRU = class extends Module {
+  inputSize;
+  hiddenSize;
+  numLayers;
+  bias;
+  batchFirst;
+  weightsIh;
+  weightsHh;
+  biasIh;
+  biasHh;
+  constructor(inputSize, hiddenSize, options = {}) {
+    super();
+    validatePositiveInt("inputSize", inputSize);
+    validatePositiveInt("hiddenSize", hiddenSize);
+    const numLayers = options.numLayers ?? 1;
+    validatePositiveInt("numLayers", numLayers);
+    this.inputSize = inputSize;
+    this.hiddenSize = hiddenSize;
+    this.numLayers = numLayers;
+    this.bias = options.bias ?? true;
+    this.batchFirst = options.batchFirst ?? true;
+    const stdv = 1 / Math.sqrt(hiddenSize);
+    this.weightsIh = [];
+    this.weightsHh = [];
+    this.biasIh = [];
+    this.biasHh = [];
+    for (let layer = 0; layer < this.numLayers; layer++) {
+      const inputDim = layer === 0 ? inputSize : hiddenSize;
+      const wIh = mulScalar(randn([3 * hiddenSize, inputDim]), stdv);
+      const wHh = mulScalar(randn([3 * hiddenSize, hiddenSize]), stdv);
+      this.weightsIh.push(wIh);
+      this.weightsHh.push(wHh);
+      this.registerParameter(`weight_ih_l${layer}`, parameter(wIh));
+      this.registerParameter(`weight_hh_l${layer}`, parameter(wHh));
+      if (this.bias) {
+        const bIh = zeros([3 * hiddenSize]);
+        const bHh = zeros([3 * hiddenSize]);
+        this.biasIh.push(bIh);
+        this.biasHh.push(bHh);
+        this.registerParameter(`bias_ih_l${layer}`, parameter(bIh));
+        this.registerParameter(`bias_hh_l${layer}`, parameter(bHh));
+      }
+    }
+  }
+  sigmoid(x) {
+    return 1 / (1 + Math.exp(-x));
+  }
+  run(input, hx) {
+    ensureFloatTensor(input, "GRU");
+    const parsed = parseInput(input, this.batchFirst);
+    const { batch, seqLen, inputDim, isUnbatched, batchStride, seqStride, featStride } = parsed;
+    if (inputDim !== this.inputSize) {
+      throw new ShapeError(`Expected input size ${this.inputSize}, got ${inputDim}`);
+    }
+    if (seqLen <= 0) {
+      throw new InvalidParameterError("Sequence length must be positive", "seqLen", seqLen);
+    }
+    if (!isUnbatched && batch <= 0) {
+      throw new InvalidParameterError("Batch size must be positive", "batch", batch);
+    }
+    const h = buildState(hx, this.numLayers, batch, this.hiddenSize, isUnbatched, "hx");
+    const outSize = (isUnbatched ? seqLen : batch * seqLen) * this.hiddenSize;
+    const out = createFloatBuffer(outSize, input.dtype);
+    const inputVec = new Float64Array(inputDim);
+    const gatesIh = new Float64Array(3 * this.hiddenSize);
+    const gatesHh = new Float64Array(3 * this.hiddenSize);
+    for (let t = 0; t < seqLen; t++) {
+      for (let b = 0; b < batch; b++) {
+        const baseOffset = input.offset + b * batchStride + t * seqStride;
+        for (let i = 0; i < inputDim; i++) {
+          inputVec[i] = readNumeric(input, baseOffset + i * featStride);
+        }
+        let layerInput = inputVec;
+        for (let l = 0; l < this.numLayers; l++) {
+          const wIh = this.weightsIh[l];
+          const wHh = this.weightsHh[l];
+          if (!wIh || !wHh) {
+            throw new ShapeError("Internal error: missing GRU weights");
+          }
+          const curInputSize = l === 0 ? this.inputSize : this.hiddenSize;
+          const hLayer = h[l];
+          if (!hLayer) {
+            throw new ShapeError("Internal error: missing GRU hidden state");
+          }
+          const wIhStride0 = wIh.strides[0] ?? 0;
+          const wIhStride1 = wIh.strides[1] ?? 0;
+          const wHhStride0 = wHh.strides[0] ?? 0;
+          const wHhStride1 = wHh.strides[1] ?? 0;
+          const biasIh = this.biasIh[l];
+          const biasHh = this.biasHh[l];
+          const biasIhStride = biasIh ? biasIh.strides[0] ?? 0 : 0;
+          const biasHhStride = biasHh ? biasHh.strides[0] ?? 0 : 0;
+          for (let g = 0; g < 3 * this.hiddenSize; g++) {
+            let sumIh = 0;
+            let sumHh = 0;
+            const wIhBase = wIh.offset + g * wIhStride0;
+            for (let k = 0; k < curInputSize; k++) {
+              sumIh += (layerInput[k] ?? 0) * readNumeric(wIh, wIhBase + k * wIhStride1);
+            }
+            const wHhBase = wHh.offset + g * wHhStride0;
+            for (let k = 0; k < this.hiddenSize; k++) {
+              sumHh += (hLayer[b * this.hiddenSize + k] ?? 0) * readNumeric(wHh, wHhBase + k * wHhStride1);
+            }
+            if (this.bias && biasIh && biasHh) {
+              sumIh += readNumeric(biasIh, biasIh.offset + g * biasIhStride);
+              sumHh += readNumeric(biasHh, biasHh.offset + g * biasHhStride);
+            }
+            gatesIh[g] = sumIh;
+            gatesHh[g] = sumHh;
+          }
+          const newH = new Float64Array(this.hiddenSize);
+          for (let j = 0; j < this.hiddenSize; j++) {
+            const r = this.sigmoid((gatesIh[j] ?? 0) + (gatesHh[j] ?? 0));
+            const z = this.sigmoid(
+              (gatesIh[this.hiddenSize + j] ?? 0) + (gatesHh[this.hiddenSize + j] ?? 0)
+            );
+            const n = Math.tanh(
+              (gatesIh[2 * this.hiddenSize + j] ?? 0) + r * (gatesHh[2 * this.hiddenSize + j] ?? 0)
+            );
+            newH[j] = (1 - z) * n + z * (hLayer[b * this.hiddenSize + j] ?? 0);
+          }
+          for (let j = 0; j < this.hiddenSize; j++) {
+            hLayer[b * this.hiddenSize + j] = newH[j] ?? 0;
+          }
+          layerInput = newH;
+        }
+        for (let j = 0; j < this.hiddenSize; j++) {
+          const idx = outputIndex(
+            this.batchFirst,
+            isUnbatched,
+            batch,
+            seqLen,
+            this.hiddenSize,
+            b,
+            t,
+            j
+          );
+          out[idx] = layerInput[j] ?? 0;
+        }
+      }
+    }
+    const outShape = isUnbatched ? [seqLen, this.hiddenSize] : this.batchFirst ? [batch, seqLen, this.hiddenSize] : [seqLen, batch, this.hiddenSize];
+    return {
+      output: Tensor.fromTypedArray({
+        data: out,
+        shape: outShape,
+        dtype: input.dtype,
+        device: input.device
+      }),
+      h: packState(
+        h,
+        this.numLayers,
+        batch,
+        this.hiddenSize,
+        input.dtype,
+        input.device,
+        isUnbatched
+      )
+    };
+  }
+  forward(...inputs) {
+    if (inputs.length < 1 || inputs.length > 2) {
+      throw new InvalidParameterError("GRU.forward expects 1 or 2 inputs", "inputs", inputs.length);
+    }
+    const inputArg = inputs[0];
+    if (inputArg === void 0) {
+      throw new InvalidParameterError("GRU.forward requires an input tensor", "input", inputArg);
+    }
+    const input = extractTensor(inputArg);
+    const hxArg = inputs.length === 2 ? inputs[1] : void 0;
+    const hx = hxArg === void 0 ? void 0 : extractTensor(hxArg);
+    return this.run(input, hx).output;
+  }
+  /**
+   * Forward pass returning both output and hidden state.
+   * Use this method when you need the hidden state.
+   */
+  forwardWithState(input, hx) {
+    const inputTensor = extractTensor(input);
+    const hxTensor = hx === void 0 ? void 0 : extractTensor(hx);
+    const { output, h } = this.run(inputTensor, hxTensor);
+    return [output, h];
+  }
+  toString() {
+    return `GRU(${this.inputSize}, ${this.hiddenSize}, num_layers=${this.numLayers})`;
+  }
+};
+// src/nn/losses/crossEntropy.ts
+function toOneHot(indices, numClasses) {
+  const nSamples = indices.size;
+  const outData = new Float32Array(nSamples * numClasses);
+  const data = indices.data;
+  if (Array.isArray(data)) {
+    throw new DTypeError("crossEntropyLoss target indices must be numeric");
+  }
+  const stride0 = indices.strides[0] ?? 0;
+  const base = indices.offset;
+  for (let i = 0; i < nSamples; i++) {
+    const offset = base + i * stride0;
+    let idx;
+    if (data instanceof BigInt64Array) {
+      const raw = getBigIntElement(data, offset);
+      const asNumber = Number(raw);
+      if (!Number.isSafeInteger(asNumber)) {
+        throw new InvalidParameterError(
+          `Class index ${raw.toString()} exceeds safe integer range`,
+          "target",
+          raw.toString()
+        );
+      }
+      idx = asNumber;
+    } else {
+      idx = Number(getNumericElement(data, offset));
+    }
+    if (!Number.isFinite(idx) || !Number.isInteger(idx)) {
+      throw new InvalidParameterError(`Class index ${idx} is not a valid integer`, "target", idx);
+    }
+    if (idx < 0 || idx >= numClasses) {
+      throw new InvalidParameterError(
+        `Class index ${idx} out of range [0, ${numClasses})`,
+        "target",
+        idx
+      );
+    }
+    outData[i * numClasses + idx] = 1;
+  }
+  return Tensor.fromTypedArray({
+    data: outData,
+    shape: [nSamples, numClasses],
+    dtype: "float32",
+    device: indices.device
+  });
+}
+function crossEntropyLoss(input, target) {
+  const yPred = input instanceof GradTensor ? input : GradTensor.fromTensor(input);
+  const targetIsGrad = target instanceof GradTensor;
+  const yTrue = target instanceof GradTensor ? target : GradTensor.fromTensor(target, { requiresGrad: false });
+  if (yPred.ndim !== 2) {
+    throw new ShapeError(`Input must be 2-dimensional (batch, classes); got ${yPred.ndim}`);
+  }
+  const nSamples = yPred.shape[0] ?? 0;
+  const nClasses = yPred.shape[1] ?? 0;
+  let targetTensor = yTrue;
+  if (yTrue.ndim === 1) {
+    if (targetIsGrad) {
+      throw new ShapeError("Target must be 2-dimensional when provided as GradTensor");
+    }
+    if (yTrue.shape[0] !== nSamples) {
+      throw new ShapeError(
+        `Target must have same number of samples as input; got ${yTrue.shape[0]} and ${nSamples}`
+      );
+    }
+    const oneHot = toOneHot(yTrue.tensor, nClasses);
+    targetTensor = GradTensor.fromTensor(oneHot, { requiresGrad: false });
+  } else if (yTrue.ndim === 2) {
+    if (yTrue.shape[0] !== nSamples || yTrue.shape[1] !== nClasses) {
+      throw new ShapeError(
+        "Target must be 1-dimensional class indices or have the same shape as input"
+      );
+    }
+  } else {
+    throw new ShapeError(`Target must be 1D (indices) or 2D (probs); got ${yTrue.ndim}D`);
+  }
+  const logProbs = logSoftmax2(yPred, 1);
+  const weighted = logProbs.mul(targetTensor);
+  const sampleLoss = weighted.sum(1);
+  const meanLoss = sampleLoss.mean().neg();
+  if (!(input instanceof GradTensor) && !targetIsGrad) {
+    const data = meanLoss.tensor.data;
+    if (Array.isArray(data)) {
+      throw new DTypeError("crossEntropyLoss does not support string dtype");
+    }
+    if (data instanceof BigInt64Array) {
+      const raw = getBigIntElement(data, meanLoss.tensor.offset);
+      return Number(raw);
+    }
+    return getNumericElement(data, meanLoss.tensor.offset);
+  }
+  return meanLoss;
+}
+function binaryCrossEntropyWithLogitsLoss(input, target) {
+  const yPred = input instanceof GradTensor ? input : GradTensor.fromTensor(input);
+  const yTrue = target instanceof GradTensor ? target : GradTensor.fromTensor(target, { requiresGrad: false });
+  let pred = yPred;
+  let truth = yTrue;
+  if (pred.ndim !== 1 && pred.ndim !== 2) {
+    throw new ShapeError("Input must be 1 or 2-dimensional");
+  }
+  if (truth.ndim !== 1 && truth.ndim !== 2) {
+    throw new ShapeError("Target must be 1 or 2-dimensional");
+  }
+  if (pred.ndim === 1) {
+    pred = pred.reshape([pred.shape[0] ?? 0, 1]);
+  }
+  if (truth.ndim === 1) {
+    truth = truth.reshape([truth.shape[0] ?? 0, 1]);
+  }
+  if (pred.ndim !== 2 || pred.shape[1] !== 1) {
+    throw new ShapeError(`Input must have shape (N,) or (N, 1)`);
+  }
+  if (truth.ndim !== 2 || truth.shape[1] !== 1) {
+    throw new ShapeError(`Target must be 1-dimensional or have shape (N, 1)`);
+  }
+  if ((pred.shape[0] ?? 0) !== (truth.shape[0] ?? 0)) {
+    throw new ShapeError(`Batch size mismatch`);
+  }
+  const predDtype = pred.dtype;
+  if (predDtype === "string") {
+    throw new DTypeError("Binary cross entropy does not support string dtype");
+  }
+  const term1 = pred.relu();
+  const term2 = pred.mul(truth);
+  const negPred = pred.neg();
+  const absPred = pred.relu().add(negPred.relu());
+  const expNegAbs = absPred.neg().exp();
+  const scalarDtype = expNegAbs.dtype;
+  if (scalarDtype === "string") {
+    throw new DTypeError("binaryCrossEntropyWithLogitsLoss does not support string dtype");
+  }
+  const one = GradTensor.scalar(1, { dtype: scalarDtype });
+  const term3 = one.add(expNegAbs).log();
+  const loss = term1.sub(term2).add(term3).mean();
+  if (!(input instanceof GradTensor) && !(target instanceof GradTensor)) {
+    const data = loss.tensor.data;
+    if (Array.isArray(data)) {
+      throw new DTypeError("binaryCrossEntropyWithLogitsLoss does not support string dtype");
+    }
+    if (data instanceof BigInt64Array) {
+      const raw = getBigIntElement(data, loss.tensor.offset);
+      return Number(raw);
+    }
+    return getNumericElement(data, loss.tensor.offset);
+  }
+  return loss;
+}
+// src/nn/losses/index.ts
+function shapesEqual2(a, b) {
+  if (a.length !== b.length) return false;
+  for (let i = 0; i < a.length; i++) {
+    if ((a[i] ?? 0) !== (b[i] ?? 0)) return false;
+  }
+  return true;
+}
+function ensureSameShape(a, b, context) {
+  if (!shapesEqual2(a.shape, b.shape)) {
+    throw new ShapeError(`Shape mismatch in ${context}: [${a.shape}] vs [${b.shape}]`);
+  }
+}
+function ensureNumeric(t, context) {
+  if (t.dtype === "string") {
+    throw new DTypeError(`${context} does not support string dtype`);
+  }
+}
+function validateReduction(reduction, context) {
+  if (reduction !== "mean" && reduction !== "sum" && reduction !== "none") {
+    throw new InvalidParameterError(
+      `${context} reduction must be 'mean', 'sum', or 'none'`,
+      "reduction",
+      reduction
+    );
+  }
+}
+function readNumericFlat(data, flat, logicalStrides, strides, offset) {
+  const dataOffset = offsetFromFlatIndex(flat, logicalStrides, strides, offset);
+  return getElementAsNumber(data, dataOffset);
+}
+function mseLoss(predictions, targets, reduction = "mean") {
+  validateReduction(reduction, "mseLoss");
+  ensureNumeric(predictions, "mseLoss");
+  ensureNumeric(targets, "mseLoss");
+  ensureSameShape(predictions, targets, "mseLoss");
+  const diff = sub(predictions, targets);
+  const squaredDiff = pow(diff, tensor(2, { dtype: diff.dtype, device: diff.device }));
+  if (reduction === "none") {
+    return squaredDiff;
+  }
+  if (reduction === "sum") {
+    return sum(squaredDiff);
+  }
+  return mean(squaredDiff);
+}
+function maeLoss(predictions, targets, reduction = "mean") {
+  validateReduction(reduction, "maeLoss");
+  ensureNumeric(predictions, "maeLoss");
+  ensureNumeric(targets, "maeLoss");
+  ensureSameShape(predictions, targets, "maeLoss");
+  const diff = sub(predictions, targets);
+  const absDiff = abs(diff);
+  if (reduction === "none") {
+    return absDiff;
+  }
+  if (reduction === "sum") {
+    return sum(absDiff);
+  }
+  return mean(absDiff);
+}
+function binaryCrossEntropyLoss(predictions, targets, reduction = "mean") {
+  validateReduction(reduction, "binaryCrossEntropyLoss");
+  ensureNumeric(predictions, "binaryCrossEntropyLoss");
+  ensureNumeric(targets, "binaryCrossEntropyLoss");
+  ensureSameShape(predictions, targets, "binaryCrossEntropyLoss");
+  const epsilon = 1e-7;
+  const predClamped = clip(predictions, epsilon, 1 - epsilon);
+  const logPred = log(predClamped);
+  const term1 = mul(targets, logPred);
+  const one = tensor(1, {
+    dtype: predictions.dtype === "float64" ? "float64" : "float32",
+    device: predictions.device
+  });
+  const oneMinusTargets = sub(one, targets);
+  const oneMinusPred = sub(one, predClamped);
+  const logOneMinusPred = log(oneMinusPred);
+  const term2 = mul(oneMinusTargets, logOneMinusPred);
+  const loss = neg(add(term1, term2));
+  if (reduction === "none") {
+    return loss;
+  }
+  if (reduction === "sum") {
+    return sum(loss);
+  }
+  return mean(loss);
+}
+function rmseLoss(predictions, targets) {
+  ensureNumeric(predictions, "rmseLoss");
+  ensureNumeric(targets, "rmseLoss");
+  ensureSameShape(predictions, targets, "rmseLoss");
+  const mse = mseLoss(predictions, targets, "mean");
+  return sqrt(mse);
+}
+function huberLoss(predictions, targets, delta = 1, reduction = "mean") {
+  validateReduction(reduction, "huberLoss");
+  ensureNumeric(predictions, "huberLoss");
+  ensureNumeric(targets, "huberLoss");
+  ensureSameShape(predictions, targets, "huberLoss");
+  if (!Number.isFinite(delta) || delta <= 0) {
+    throw new InvalidParameterError(`delta must be positive; got ${delta}`, "delta", delta);
+  }
+  const diff = sub(predictions, targets);
+  const absDiff = abs(diff);
+  const absData = absDiff.data;
+  if (Array.isArray(absData)) {
+    throw new DTypeError("huberLoss does not support string dtype");
+  }
+  const dtype = predictions.dtype === "float64" ? "float64" : "float32";
+  const lossData = dtype === "float64" ? new Float64Array(diff.size) : new Float32Array(diff.size);
+  const logicalStrides = computeStrides(absDiff.shape);
+  for (let i = 0; i < diff.size; i++) {
+    const absVal = readNumericFlat(absData, i, logicalStrides, absDiff.strides, absDiff.offset);
+    if (absVal <= delta) {
+      lossData[i] = 0.5 * absVal * absVal;
+    } else {
+      lossData[i] = delta * (absVal - 0.5 * delta);
+    }
+  }
+  const loss = Tensor.fromTypedArray({
+    data: lossData,
+    shape: predictions.shape,
+    dtype,
+    device: predictions.device
+  });
+  if (reduction === "none") {
+    return loss;
+  }
+  if (reduction === "sum") {
+    return sum(loss);
+  }
+  return mean(loss);
+}
+export { AvgPool2d, BatchNorm1d, Conv1d, Conv2d, Dropout, ELU, GELU, GRU, LSTM, LayerNorm, LeakyReLU, Linear, LogSoftmax, MaxPool2d, Mish, Module, MultiheadAttention, RNN, ReLU, Sequential, Sigmoid, Softmax, Softplus, Swish, Tanh, TransformerEncoderLayer, binaryCrossEntropyLoss, binaryCrossEntropyWithLogitsLoss, crossEntropyLoss, huberLoss, maeLoss, mseLoss, nn_exports, rmseLoss };
+//# sourceMappingURL=chunk-5R4S63PF.js.map
+//# sourceMappingURL=chunk-5R4S63PF.js.map