npm - deepbox - Versions diffs - 0.1.0 - Mend

deepbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

package/LICENSE +21 -0
package/README.md +344 -0
package/dist/CSRMatrix-CwGwQRea.d.cts +219 -0
package/dist/CSRMatrix-KzNt6QpS.d.ts +219 -0
package/dist/Tensor-BQLk1ltW.d.cts +147 -0
package/dist/Tensor-g8mUClel.d.ts +147 -0
package/dist/chunk-4S73VUBD.js +677 -0
package/dist/chunk-4S73VUBD.js.map +1 -0
package/dist/chunk-5R4S63PF.js +2925 -0
package/dist/chunk-5R4S63PF.js.map +1 -0
package/dist/chunk-6AE5FKKQ.cjs +9264 -0
package/dist/chunk-6AE5FKKQ.cjs.map +1 -0
package/dist/chunk-AD436M45.js +3854 -0
package/dist/chunk-AD436M45.js.map +1 -0
package/dist/chunk-ALS7ETWZ.cjs +4263 -0
package/dist/chunk-ALS7ETWZ.cjs.map +1 -0
package/dist/chunk-AU7XHGKJ.js +2092 -0
package/dist/chunk-AU7XHGKJ.js.map +1 -0
package/dist/chunk-B5TNKUEY.js +1481 -0
package/dist/chunk-B5TNKUEY.js.map +1 -0
package/dist/chunk-BCR7G3A6.js +9136 -0
package/dist/chunk-BCR7G3A6.js.map +1 -0
package/dist/chunk-C4PKXY74.cjs +1917 -0
package/dist/chunk-C4PKXY74.cjs.map +1 -0
package/dist/chunk-DWZY6PIP.cjs +6400 -0
package/dist/chunk-DWZY6PIP.cjs.map +1 -0
package/dist/chunk-E3EU5FZO.cjs +2113 -0
package/dist/chunk-E3EU5FZO.cjs.map +1 -0
package/dist/chunk-F3JWBINJ.js +1054 -0
package/dist/chunk-F3JWBINJ.js.map +1 -0
package/dist/chunk-FJYLIGJX.js +1940 -0
package/dist/chunk-FJYLIGJX.js.map +1 -0
package/dist/chunk-JSCDE774.cjs +729 -0
package/dist/chunk-JSCDE774.cjs.map +1 -0
package/dist/chunk-LWECRCW2.cjs +2412 -0
package/dist/chunk-LWECRCW2.cjs.map +1 -0
package/dist/chunk-MLBMYKCG.js +6379 -0
package/dist/chunk-MLBMYKCG.js.map +1 -0
package/dist/chunk-OX6QXFMV.cjs +3874 -0
package/dist/chunk-OX6QXFMV.cjs.map +1 -0
package/dist/chunk-PHV2DKRS.cjs +1072 -0
package/dist/chunk-PHV2DKRS.cjs.map +1 -0
package/dist/chunk-PL7TAYKI.js +4056 -0
package/dist/chunk-PL7TAYKI.js.map +1 -0
package/dist/chunk-PR647I7R.js +1898 -0
package/dist/chunk-PR647I7R.js.map +1 -0
package/dist/chunk-QERHVCHC.cjs +2960 -0
package/dist/chunk-QERHVCHC.cjs.map +1 -0
package/dist/chunk-XEG44RF6.cjs +1514 -0
package/dist/chunk-XEG44RF6.cjs.map +1 -0
package/dist/chunk-XMWVME2W.js +2377 -0
package/dist/chunk-XMWVME2W.js.map +1 -0
package/dist/chunk-ZB75FESB.cjs +1979 -0
package/dist/chunk-ZB75FESB.cjs.map +1 -0
package/dist/chunk-ZLW62TJG.cjs +4061 -0
package/dist/chunk-ZLW62TJG.cjs.map +1 -0
package/dist/chunk-ZXKBDFP3.js +4235 -0
package/dist/chunk-ZXKBDFP3.js.map +1 -0
package/dist/core/index.cjs +204 -0
package/dist/core/index.cjs.map +1 -0
package/dist/core/index.d.cts +2 -0
package/dist/core/index.d.ts +2 -0
package/dist/core/index.js +3 -0
package/dist/core/index.js.map +1 -0
package/dist/dataframe/index.cjs +22 -0
package/dist/dataframe/index.cjs.map +1 -0
package/dist/dataframe/index.d.cts +3 -0
package/dist/dataframe/index.d.ts +3 -0
package/dist/dataframe/index.js +5 -0
package/dist/dataframe/index.js.map +1 -0
package/dist/datasets/index.cjs +134 -0
package/dist/datasets/index.cjs.map +1 -0
package/dist/datasets/index.d.cts +3 -0
package/dist/datasets/index.d.ts +3 -0
package/dist/datasets/index.js +5 -0
package/dist/datasets/index.js.map +1 -0
package/dist/index-74AB8Cyh.d.cts +1126 -0
package/dist/index-9oQx1HgV.d.cts +1180 -0
package/dist/index-BJY2SI4i.d.ts +483 -0
package/dist/index-BWGhrDlr.d.ts +733 -0
package/dist/index-B_DK4FKY.d.cts +242 -0
package/dist/index-BbA2Gxfl.d.ts +456 -0
package/dist/index-BgHYAoSS.d.cts +837 -0
package/dist/index-BndMbqsM.d.ts +1439 -0
package/dist/index-C1mfVYoo.d.ts +2517 -0
package/dist/index-CCvlwAmL.d.cts +809 -0
package/dist/index-CDw5CnOU.d.ts +785 -0
package/dist/index-Cn3SdB0O.d.ts +1126 -0
package/dist/index-CrqLlS-a.d.ts +776 -0
package/dist/index-D61yaSMY.d.cts +483 -0
package/dist/index-D9Loo1_A.d.cts +2517 -0
package/dist/index-DIT_OO9C.d.cts +785 -0
package/dist/index-DIp_RrRt.d.ts +242 -0
package/dist/index-DbultU6X.d.cts +1427 -0
package/dist/index-DmEg_LCm.d.cts +776 -0
package/dist/index-DoPWVxPo.d.cts +1439 -0
package/dist/index-DuCxd-8d.d.ts +837 -0
package/dist/index-Dx42TZaY.d.ts +809 -0
package/dist/index-DyZ4QQf5.d.cts +456 -0
package/dist/index-GFAVyOWO.d.ts +1427 -0
package/dist/index-WHQLn0e8.d.cts +733 -0
package/dist/index-ZtI1Iy4L.d.ts +1180 -0
package/dist/index-eJgeni9c.d.cts +1911 -0
package/dist/index-tk4lSYod.d.ts +1911 -0
package/dist/index.cjs +72 -0
package/dist/index.cjs.map +1 -0
package/dist/index.d.cts +17 -0
package/dist/index.d.ts +17 -0
package/dist/index.js +15 -0
package/dist/index.js.map +1 -0
package/dist/linalg/index.cjs +86 -0
package/dist/linalg/index.cjs.map +1 -0
package/dist/linalg/index.d.cts +3 -0
package/dist/linalg/index.d.ts +3 -0
package/dist/linalg/index.js +5 -0
package/dist/linalg/index.js.map +1 -0
package/dist/metrics/index.cjs +158 -0
package/dist/metrics/index.cjs.map +1 -0
package/dist/metrics/index.d.cts +3 -0
package/dist/metrics/index.d.ts +3 -0
package/dist/metrics/index.js +5 -0
package/dist/metrics/index.js.map +1 -0
package/dist/ml/index.cjs +87 -0
package/dist/ml/index.cjs.map +1 -0
package/dist/ml/index.d.cts +3 -0
package/dist/ml/index.d.ts +3 -0
package/dist/ml/index.js +6 -0
package/dist/ml/index.js.map +1 -0
package/dist/ndarray/index.cjs +501 -0
package/dist/ndarray/index.cjs.map +1 -0
package/dist/ndarray/index.d.cts +5 -0
package/dist/ndarray/index.d.ts +5 -0
package/dist/ndarray/index.js +4 -0
package/dist/ndarray/index.js.map +1 -0
package/dist/nn/index.cjs +142 -0
package/dist/nn/index.cjs.map +1 -0
package/dist/nn/index.d.cts +6 -0
package/dist/nn/index.d.ts +6 -0
package/dist/nn/index.js +5 -0
package/dist/nn/index.js.map +1 -0
package/dist/optim/index.cjs +77 -0
package/dist/optim/index.cjs.map +1 -0
package/dist/optim/index.d.cts +4 -0
package/dist/optim/index.d.ts +4 -0
package/dist/optim/index.js +4 -0
package/dist/optim/index.js.map +1 -0
package/dist/plot/index.cjs +114 -0
package/dist/plot/index.cjs.map +1 -0
package/dist/plot/index.d.cts +6 -0
package/dist/plot/index.d.ts +6 -0
package/dist/plot/index.js +5 -0
package/dist/plot/index.js.map +1 -0
package/dist/preprocess/index.cjs +82 -0
package/dist/preprocess/index.cjs.map +1 -0
package/dist/preprocess/index.d.cts +4 -0
package/dist/preprocess/index.d.ts +4 -0
package/dist/preprocess/index.js +5 -0
package/dist/preprocess/index.js.map +1 -0
package/dist/random/index.cjs +74 -0
package/dist/random/index.cjs.map +1 -0
package/dist/random/index.d.cts +3 -0
package/dist/random/index.d.ts +3 -0
package/dist/random/index.js +5 -0
package/dist/random/index.js.map +1 -0
package/dist/stats/index.cjs +142 -0
package/dist/stats/index.cjs.map +1 -0
package/dist/stats/index.d.cts +3 -0
package/dist/stats/index.d.ts +3 -0
package/dist/stats/index.js +5 -0
package/dist/stats/index.js.map +1 -0
package/dist/tensor-B96jjJLQ.d.cts +205 -0
package/dist/tensor-B96jjJLQ.d.ts +205 -0
package/package.json +226 -0

package/dist/index-74AB8Cyh.d.cts ADDED Viewed

@@ -0,0 +1,1126 @@
+import { G as GradTensor } from './index-B_DK4FKY.cjs';
+/**
+ * Base class for all optimizers.
+ *
+ * This abstract class provides the foundation for implementing optimization algorithms
+ * used in training machine learning models. All concrete optimizers (SGD, Adam, etc.)
+ * must extend this class and implement the abstract `step()` method.
+ *
+ * **Key Features:**
+ * - Parameter groups with per-group hyperparameters
+ * - State management for stateful optimizers (momentum, adaptive learning rates)
+ * - Gradient zeroing utilities
+ * - State serialization for checkpointing
+ *
+ * **Design Pattern:**
+ * The optimizer maintains a list of parameter groups, where each group can have
+ * different hyperparameters (e.g., different learning rates for different layers).
+ * This enables fine-grained control over the optimization process.
+ *
+ * @example
+ * ```ts
+ * import { SGD } from 'deepbox/optim';
+ *
+ * const optimizer = new SGD(model.parameters(), { lr: 0.01 });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < 100; epoch++) {
+ *   optimizer.zeroGrad();
+ *   const loss = computeLoss();
+ *   loss.backward();
+ *   optimizer.step();
+ * }
+ * ```
+ *
+ * @example
+ * ```ts
+ * // Using parameter groups with different learning rates
+ * const optimizer = new SGD([
+ *   { params: model.layer1.parameters(), lr: 0.01 },
+ *   { params: model.layer2.parameters(), lr: 0.001 }
+ * ], { lr: 0.01 });
+ * ```
+ *
+ * References:
+ * - PyTorch Optimizer: https://pytorch.org/docs/stable/optim.html
+ *
+ * @category Optimization
+ */
+/**
+ * Represents a group of parameters with optional per-group hyperparameters.
+ *
+ * @template Options - Type of optimizer-specific options
+ * @property params - Iterable of parameters to optimize in this group
+ */
+type ParamGroup<Options extends Record<string, unknown>> = {
+    readonly params: Iterable<GradTensor>;
+} & Partial<Options>;
+/**
+ * Abstract base class for all optimization algorithms.
+ *
+ * @template Options - Type defining optimizer-specific hyperparameters
+ * @template State - Type defining per-parameter state (e.g., momentum buffers)
+ */
+declare abstract class Optimizer<Options extends Record<string, unknown>, State extends Record<string, unknown>> {
+    protected readonly defaults: Readonly<Options>;
+    /**
+     * Groups of parameters with their associated hyperparameters.
+     * Each group can have different options (e.g., learning rates).
+     * Exposed publicly to enable scheduler integrations.
+     */
+    paramGroups: Array<{
+        params: GradTensor[];
+        options: Options;
+    }>;
+    /**
+     * Per-parameter state storage.
+     * Maps each parameter to its optimizer-specific state (momentum, adaptive rates, etc.).
+     */
+    protected state: Map<GradTensor, State>;
+    /**
+     * Create a new optimizer.
+     *
+     * Initializes the optimizer with either a simple list of parameters or
+     * multiple parameter groups with per-group hyperparameters.
+     *
+     * @param params - Either an iterable of parameters or array of parameter groups
+     * @param defaults - Default hyperparameters applied to all groups
+     */
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<Options>>, defaults: Readonly<Options>);
+    /**
+     * Perform a single optimization step (parameter update).
+     *
+     * This abstract method must be implemented by all optimizer subclasses.
+     * It applies the optimization algorithm to update all parameters based on
+     * their gradients.
+     *
+     * @param closure - Optional closure that reevaluates the model and returns the loss.
+     *                  Used by some optimizers (e.g., LBFGS) that require multiple
+     *                  function evaluations per step.
+     * @returns Loss value if closure is provided, undefined otherwise
+     */
+    abstract step(closure?: () => number): number | undefined;
+    /**
+     * Zero out the gradients of all optimized parameters.
+     *
+     * This method should be called at the beginning of each training iteration,
+     * before computing new gradients. Without this call, gradients would accumulate
+     * across iterations, leading to incorrect updates.
+     *
+     * **Implementation Note:**
+     * For parameters wrapped in GradTensor, this calls zeroGrad() on each parameter,
+     * which either sets the gradient to zero or initializes it if not yet created.
+     *
+     * @example
+     * ```ts
+     * // Typical training loop
+     * optimizer.zeroGrad();              // Clear previous gradients
+     * const output = model.forward(input);
+     * const loss = criterion(output, target);
+     * loss.backward();                   // Compute new gradients
+     * optimizer.step();                  // Update parameters
+     * ```
+     */
+    zeroGrad(): void;
+    /**
+     * Add a parameter group to the optimizer.
+     *
+     * This method allows adding new parameters to optimize after the optimizer
+     * has been created. This is particularly useful for:
+     * - Fine-tuning: adding pre-trained layers with different learning rates
+     * - Progressive training: gradually unfreezing layers
+     * - Dynamic architectures: adding parameters while the model grows
+     *
+     * @param paramGroup - Parameter group to add with optional per-group options
+     *
+     * @example
+     * ```ts
+     * const optimizer = new SGD(model.backbone.parameters(), { lr: 0.001 });
+     * // Later, add classifier with higher learning rate
+     * optimizer.addParamGroup({
+     *   params: model.classifier.parameters(),
+     *   lr: 0.01
+     * });
+     * ```
+     */
+    addParamGroup(paramGroup: ParamGroup<Options>): void;
+    /**
+     * Validate that a given state object matches the optimizer's state type.
+     *
+     * @param state - The state object to validate
+     * @returns True if the state object is valid, false otherwise
+     */
+    protected abstract isState(state: Record<string, unknown>): state is State;
+    /**
+     * Get the current state of the optimizer.
+     *
+     * Returns a dictionary containing all optimizer state that needs to be
+     * saved for checkpointing. This includes per-parameter state (momentum buffers,
+     * adaptive learning rates, etc.) and parameter group configurations.
+     *
+     * **Note:** In a production implementation, parameters would be identified by
+     * unique IDs rather than object references for proper serialization.
+     *
+     * @returns Optimizer state dictionary containing state and parameter groups
+     *
+     * @example
+     * ```ts
+     * // Save checkpoint
+     * const checkpoint = {
+     *   model: model.stateDict(),
+     *   optimizer: optimizer.stateDict(),
+     *   epoch: currentEpoch
+     * };
+     * ```
+     */
+    stateDict(): {
+        state: {
+            paramId: number;
+            param: GradTensor;
+            state: State;
+        }[];
+        paramGroups: {
+            params: GradTensor[];
+            paramIds: number[];
+            options: Options;
+        }[];
+    };
+    /**
+     * Load optimizer state from a state dictionary.
+     *
+     * Restores the optimizer to a previously saved state, including all
+     * per-parameter state and parameter group configurations. This is essential
+     * for resuming training from checkpoints.
+     *
+     * **Important:** The loaded state must be compatible with the current
+     * optimizer configuration (same parameters, same optimizer type).
+     *
+     * @param stateDict - State dictionary previously returned by stateDict()
+     *
+     * @example
+     * ```ts
+     * // Resume from checkpoint
+     * const checkpoint = loadCheckpoint('checkpoint.json');
+     * model.loadStateDict(checkpoint.model);
+     * optimizer.loadStateDict(checkpoint.optimizer);
+     * ```
+     */
+    loadStateDict(stateDict: Record<string, unknown>): void;
+}
+type AdaDeltaOptions = {
+    lr: number;
+    readonly rho: number;
+    readonly eps: number;
+    readonly weightDecay: number;
+};
+type AdaDeltaState = {
+    squareAvg: Float64Array;
+    accDelta: Float64Array;
+};
+/**
+ * AdaDelta optimizer.
+ *
+ * Implements AdaDelta algorithm - an extension of Adagrad that seeks to reduce
+ * its aggressive, monotonically decreasing learning rate. AdaDelta adapts learning
+ * rates based on a moving window of gradient updates, rather than accumulating all
+ * past gradients.
+ *
+ * @example
+ * ```ts
+ * import { AdaDelta } from 'deepbox/optim';
+ *
+ * const optimizer = new AdaDelta(model.parameters(), {
+ *   lr: 1.0,
+ *   rho: 0.9,
+ *   eps: 1e-6
+ * });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < numEpochs; epoch++) {
+ *   optimizer.zeroGrad();
+ *   // ...
+ *   optimizer.step();
+ * }
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class AdaDelta extends Optimizer<AdaDeltaOptions, AdaDeltaState> {
+    private _stepCount;
+    get stepCount(): number;
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<AdaDeltaOptions>>, options?: {
+        readonly lr?: number;
+        readonly rho?: number;
+        readonly eps?: number;
+        readonly weightDecay?: number;
+    });
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+    protected isState(state: Record<string, unknown>): state is AdaDeltaState;
+    step(closure?: () => number): number | undefined;
+}
+type AdagradOptions = {
+    lr: number;
+    eps: number;
+    weightDecay: number;
+    lrDecay: number;
+};
+type AdagradState = {
+    step: number;
+    sum: Float64Array;
+};
+/**
+ * Adagrad (Adaptive Gradient Algorithm) optimizer.
+ *
+ * Adagrad adapts the learning rate for each parameter based on the historical
+ * sum of squared gradients. Parameters with larger gradients receive smaller
+ * effective learning rates, while parameters with smaller gradients receive
+ * larger effective learning rates.
+ *
+ * @example
+ * ```ts
+ * import { Adagrad } from 'deepbox/optim';
+ *
+ * const optimizer = new Adagrad(model.parameters(), {
+ *   lr: 0.01,
+ *   eps: 1e-10
+ * });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < numEpochs; epoch++) {
+ *   optimizer.zeroGrad();
+ *   // ...
+ *   optimizer.step();
+ * }
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class Adagrad extends Optimizer<AdagradOptions, AdagradState> {
+    private _stepCount;
+    get stepCount(): number;
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<AdagradOptions>>, options?: {
+        readonly lr?: number;
+        readonly eps?: number;
+        readonly weightDecay?: number;
+        readonly lrDecay?: number;
+    });
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+    protected isState(state: Record<string, unknown>): state is AdagradState;
+    step(closure?: () => number): number | undefined;
+}
+type AdamOptions = {
+    lr: number;
+    beta1: number;
+    beta2: number;
+    eps: number;
+    weightDecay: number;
+    amsgrad: boolean;
+};
+type AdamState = {
+    step: number;
+    expAvg: Float64Array;
+    expAvgSq: Float64Array;
+    maxExpAvgSq?: Float64Array;
+};
+/**
+ * Adam (Adaptive Moment Estimation) optimizer.
+ *
+ * Computes adaptive learning rates for each parameter by maintaining
+ * running averages of both the gradients and their squared values.
+ *
+ * @example
+ * ```ts
+ * import { Adam } from 'deepbox/optim';
+ *
+ * const optimizer = new Adam(model.parameters(), {
+ *   lr: 0.001,
+ *   beta1: 0.9,
+ *   beta2: 0.999
+ * });
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class Adam extends Optimizer<AdamOptions, AdamState> {
+    private _stepCount;
+    get stepCount(): number;
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<AdamOptions>>, options?: {
+        readonly lr?: number;
+        readonly beta1?: number;
+        readonly beta2?: number;
+        readonly eps?: number;
+        readonly weightDecay?: number;
+        readonly amsgrad?: boolean;
+    });
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+    protected isState(state: Record<string, unknown>): state is AdamState;
+    step(closure?: () => number): number | undefined;
+}
+/**
+ * Options for the AdamW optimizer.
+ *
+ * @property lr - Learning rate (step size)
+ * @property beta1 - Exponential decay rate for first moment estimates
+ * @property beta2 - Exponential decay rate for second moment estimates
+ * @property eps - Small constant for numerical stability
+ * @property weightDecay - Weight decay coefficient (L2 penalty)
+ * @property amsgrad - Whether to use the AMSGrad variant
+ */
+type AdamWOptions = {
+    lr: number;
+    beta1: number;
+    beta2: number;
+    eps: number;
+    weightDecay: number;
+    amsgrad: boolean;
+};
+/**
+ * State maintained per parameter by AdamW.
+ *
+ * @property step - Number of optimization steps taken
+ * @property expAvg - Exponentially weighted average of gradients (first moment)
+ * @property expAvgSq - Exponentially weighted average of squared gradients (second moment)
+ * @property maxExpAvgSq - Maximum of exponentially weighted average of squared gradients (AMSGrad only)
+ */
+type AdamWState = {
+    step: number;
+    expAvg: Float64Array;
+    expAvgSq: Float64Array;
+    maxExpAvgSq?: Float64Array;
+};
+/**
+ * AdamW (Adam with decoupled Weight decay) optimizer.
+ *
+ * AdamW fixes the weight decay implementation in Adam by decoupling it from the
+ * gradient-based update. This leads to better generalization and is the recommended
+ * variant for most applications.
+ *
+ * @example
+ * ```ts
+ * import { AdamW } from 'deepbox/optim';
+ *
+ * const optimizer = new AdamW(model.parameters(), {
+ *   lr: 0.001,
+ *   weightDecay: 0.01,  // Typical value for AdamW
+ *   beta1: 0.9,
+ *   beta2: 0.999
+ * });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < numEpochs; epoch++) {
+ *   optimizer.zeroGrad();
+ *   // ...
+ *   optimizer.step();
+ * }
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class AdamW extends Optimizer<AdamWOptions, AdamWState> {
+    /** Internal counter tracking total number of optimization steps */
+    private _stepCount;
+    /**
+     * Get the total number of optimization steps performed.
+     *
+     * @returns Number of steps taken
+     */
+    get stepCount(): number;
+    /**
+     * Create a new AdamW optimizer.
+     *
+     * @param params - Iterable of parameters or parameter groups to optimize
+     * @param options - Optimization options
+     * @param options.lr - Learning rate (default: 0.001)
+     * @param options.beta1 - First moment decay rate (default: 0.9)
+     * @param options.beta2 - Second moment decay rate (default: 0.999)
+     * @param options.eps - Numerical stability constant (default: 1e-8)
+     * @param options.weightDecay - Weight decay coefficient (default: 0.01)
+     * @param options.amsgrad - Enable AMSGrad variant (default: false)
+     * @throws {InvalidParameterError} If a parameter is invalid
+     */
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<AdamWOptions>>, options?: {
+        readonly lr?: number;
+        readonly beta1?: number;
+        readonly beta2?: number;
+        readonly eps?: number;
+        readonly weightDecay?: number;
+        readonly amsgrad?: boolean;
+    });
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+    /**
+     * Perform a single optimization step (parameter update).
+     *
+     * Implements the AdamW update rule with decoupled weight decay.
+     *
+     * @param closure - Optional closure that reevaluates the model and returns the loss
+     * @returns Loss value if closure is provided, undefined otherwise
+     */
+    protected isState(state: Record<string, unknown>): state is AdamWState;
+    step(closure?: () => number): number | undefined;
+}
+type NadamOptions = {
+    lr: number;
+    readonly beta1: number;
+    readonly beta2: number;
+    readonly eps: number;
+    readonly weightDecay: number;
+    readonly momentumDecay: number;
+};
+type NadamState = {
+    step: number;
+    expAvg: Float64Array;
+    expAvgSq: Float64Array;
+    muProduct: number;
+};
+/**
+ * Nadam (Nesterov-accelerated Adam) optimizer.
+ *
+ * Implements Nadam algorithm - combines Adam's adaptive learning rates with
+ * Nesterov momentum for potentially faster convergence. Nadam applies Nesterov
+ * acceleration to the momentum term, providing a "look-ahead" gradient.
+ *
+ * @example
+ * ```ts
+ * import { Nadam } from 'deepbox/optim';
+ *
+ * const optimizer = new Nadam(model.parameters(), {
+ *   lr: 0.002,
+ *   beta1: 0.9,
+ *   beta2: 0.999
+ * });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < numEpochs; epoch++) {
+ *   optimizer.zeroGrad();
+ *   // ...
+ *   optimizer.step();
+ * }
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class Nadam extends Optimizer<NadamOptions, NadamState> {
+    private _stepCount;
+    get stepCount(): number;
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<NadamOptions>>, options?: {
+        readonly lr?: number;
+        readonly beta1?: number;
+        readonly beta2?: number;
+        readonly eps?: number;
+        readonly weightDecay?: number;
+        readonly momentumDecay?: number;
+    });
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+    protected isState(state: Record<string, unknown>): state is NadamState;
+    step(closure?: () => number): number | undefined;
+}
+/**
+ * Options for the RMSprop optimizer.
+ *
+ * @property lr - Learning rate (step size)
+ * @property alpha - Smoothing constant for moving average of squared gradients
+ * @property eps - Small constant for numerical stability
+ * @property weightDecay - Weight decay coefficient (L2 penalty)
+ * @property momentum - Momentum factor
+ * @property centered - Whether to use centered RMSprop variant
+ */
+type RMSpropOptions = {
+    lr: number;
+    alpha: number;
+    eps: number;
+    weightDecay: number;
+    momentum: number;
+    centered: boolean;
+};
+/**
+ * State maintained per parameter by RMSprop.
+ *
+ * @property squareAvg - Exponentially weighted average of squared gradients
+ * @property momentumBuffer - Momentum buffer (if momentum > 0)
+ * @property gradAvg - Exponentially weighted average of gradients (centered variant only)
+ */
+type RMSpropState = {
+    squareAvg: Float64Array;
+    momentumBuffer?: Float64Array;
+    gradAvg?: Float64Array;
+};
+/**
+ * RMSprop (Root Mean Square Propagation) optimizer.
+ *
+ * RMSprop adapts the learning rate for each parameter by dividing by a running
+ * average of recent gradient magnitudes. This helps with non-stationary objectives
+ * and is particularly effective for RNNs.
+ *
+ * @example
+ * ```ts
+ * import { RMSprop } from 'deepbox/optim';
+ *
+ * const optimizer = new RMSprop(model.parameters(), {
+ *   lr: 0.01,
+ *   alpha: 0.99,
+ *   momentum: 0.9,
+ *   centered: true
+ * });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < numEpochs; epoch++) {
+ *   optimizer.zeroGrad();
+ *   // ...
+ *   optimizer.step();
+ * }
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class RMSprop extends Optimizer<RMSpropOptions, RMSpropState> {
+    /** Internal counter tracking total number of optimization steps */
+    private _stepCount;
+    /**
+     * Get the total number of optimization steps performed.
+     *
+     * @returns Number of steps taken
+     */
+    get stepCount(): number;
+    /**
+     * Create a new RMSprop optimizer.
+     *
+     * @param params - Iterable of parameters or parameter groups to optimize
+     * @param options - Optimization options
+     * @param options.lr - Learning rate (default: 0.01)
+     * @param options.alpha - Smoothing constant (default: 0.99)
+     * @param options.eps - Numerical stability constant (default: 1e-8)
+     * @param options.weightDecay - Weight decay coefficient (default: 0)
+     * @param options.momentum - Momentum factor (default: 0)
+     * @param options.centered - Use centered variant (default: false)
+     * @throws {InvalidParameterError} If a parameter is invalid
+     */
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<RMSpropOptions>>, options?: {
+        readonly lr?: number;
+        readonly alpha?: number;
+        readonly eps?: number;
+        readonly weightDecay?: number;
+        readonly momentum?: number;
+        readonly centered?: boolean;
+    });
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+    protected isState(state: Record<string, unknown>): state is RMSpropState;
+    step(closure?: () => number): number | undefined;
+}
+type SGDOptions = {
+    lr: number;
+    momentum: number;
+    dampening: number;
+    weightDecay: number;
+    nesterov: boolean;
+};
+type SGDState = {
+    momentumBuffer?: Float64Array;
+};
+/**
+ * Stochastic Gradient Descent (SGD) optimizer.
+ *
+ * Implements vanilla SGD with optional momentum, weight decay, and Nesterov acceleration.
+ *
+ * @example
+ * ```ts
+ * import { SGD } from 'deepbox/optim';
+ * import { Module } from 'deepbox/nn';
+ *
+ * const model: Module = ...;
+ * const optimizer = new SGD(model.parameters(), {
+ *   lr: 0.01,
+ *   momentum: 0.9,
+ *   weightDecay: 5e-4,
+ *   nesterov: true
+ * });
+ *
+ * // Training loop
+ * for (let epoch = 0; epoch < numEpochs; epoch++) {
+ *   for (const [inputs, targets] of dataLoader) {
+ *     optimizer.zeroGrad();
+ *     const outputs = model.forward(inputs);
+ *     const loss = criterion(outputs, targets);
+ *     loss.backward();
+ *     optimizer.step();
+ *   }
+ * }
+ * ```
+ *
+ * @category Optimizers
+ */
+declare class SGD extends Optimizer<SGDOptions, SGDState> {
+    /** Internal counter tracking total number of optimization steps */
+    private _stepCount;
+    get stepCount(): number;
+    /**
+     * Create a new SGD optimizer.
+     *
+     * @param params - Iterable of parameters or parameter groups to optimize
+     * @param options - Optimization options
+     * @param options.lr - Learning rate (default: 0.01)
+     * @param options.momentum - Momentum factor (default: 0)
+     * @param options.dampening - Dampening for momentum (default: 0)
+     * @param options.weightDecay - Weight decay (L2 penalty) (default: 0)
+     * @param options.nesterov - Enable Nesterov momentum (default: false)
+     */
+    constructor(params: Iterable<GradTensor> | ReadonlyArray<ParamGroup<SGDOptions>>, options?: {
+        readonly lr?: number;
+        readonly momentum?: number;
+        readonly dampening?: number;
+        readonly weightDecay?: number;
+        readonly nesterov?: boolean;
+    });
+    /**
+     * Perform a single optimization step.
+     *
+     * Implements the SGD update rule with optional momentum and weight decay.
+     *
+     * @param closure - Optional closure that reevaluates the model and returns the loss
+     * @returns Loss value if closure is provided
+     */
+    protected isState(state: Record<string, unknown>): state is SGDState;
+    step(closure?: () => number): number | undefined;
+    /**
+     * Get the current learning rate.
+     *
+     * @param groupIdx - Parameter group index (default: 0)
+     * @returns Current learning rate
+     */
+    getLearningRate(groupIdx?: number): number;
+    /**
+     * Set the learning rate for all parameter groups.
+     *
+     * @param lr - New learning rate
+     */
+    setLearningRate(lr: number): void;
+}
+/**
+ * Interface for optimizer-like objects that schedulers can work with.
+ * This allows schedulers to work with different optimizer implementations.
+ * Parameter groups may expose `lr` directly or via `options.lr`.
+ */
+interface SchedulerOptimizer {
+    paramGroups: SchedulerParamGroup[];
+}
+type SchedulerParamGroup = {
+    params: unknown[];
+    lr?: number;
+    options?: Record<string, unknown>;
+};
+/**
+ * Base class for learning rate schedulers.
+ *
+ * Learning rate schedulers adjust the learning rate during training according
+ * to a predefined schedule. This can help improve convergence and prevent
+ * overshooting optimal solutions.
+ *
+ * @example
+ * ```ts
+ * import { SGD, StepLR } from 'deepbox/optim';
+ *
+ * const optimizer = new SGD(model.parameters(), { lr: 0.1 });
+ * const scheduler = new StepLR(optimizer, { stepSize: 10, gamma: 0.1 });
+ *
+ * for (let epoch = 0; epoch < 100; epoch++) {
+ *   train();
+ *   scheduler.step();
+ * }
+ * ```
+ *
+ * @category Optimization
+ */
+declare abstract class LRScheduler {
+    protected optimizer: SchedulerOptimizer;
+    protected lastEpoch: number;
+    protected baseLrs: number[];
+    constructor(optimizer: SchedulerOptimizer, lastEpoch?: number);
+    protected initializeFromLastEpoch(lastEpoch: number): void;
+    /**
+     * Compute the learning rate for the current epoch.
+     * Must be implemented by subclasses.
+     *
+     * @returns Array of learning rates for each parameter group
+     */
+    abstract getLr(): number[];
+    /**
+     * Perform a scheduler step, updating learning rates.
+     *
+     * Should be called once per epoch after the optimizer step.
+     */
+    step(): void;
+    /**
+     * Get the current learning rates for all parameter groups.
+     */
+    getLastLr(): number[];
+    /**
+     * Get current epoch number.
+     */
+    get epoch(): number;
+}
+/**
+ * Step learning rate scheduler.
+ *
+ * Decays the learning rate by gamma every stepSize epochs.
+ * lr = baseLr * gamma^(epoch // stepSize)
+ *
+ * @example
+ * ```ts
+ * const scheduler = new StepLR(optimizer, { stepSize: 30, gamma: 0.1 });
+ * // lr = 0.1 for epochs 0-29
+ * // lr = 0.01 for epochs 30-59
+ * // lr = 0.001 for epochs 60-89
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.StepLR.html | PyTorch StepLR}
+ */
+declare class StepLR extends LRScheduler {
+    private stepSize;
+    private gamma;
+    constructor(optimizer: SchedulerOptimizer, options: {
+        stepSize: number;
+        gamma?: number;
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+}
+/**
+ * Exponential learning rate scheduler.
+ *
+ * Decays the learning rate exponentially every epoch.
+ * lr = baseLr * gamma^epoch
+ *
+ * @example
+ * ```ts
+ * const scheduler = new ExponentialLR(optimizer, { gamma: 0.95 });
+ * // lr *= 0.95 each epoch
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ExponentialLR.html | PyTorch ExponentialLR}
+ */
+declare class ExponentialLR extends LRScheduler {
+    private gamma;
+    constructor(optimizer: SchedulerOptimizer, options: {
+        gamma: number;
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+}
+/**
+ * Cosine annealing learning rate scheduler.
+ *
+ * Sets the learning rate using a cosine annealing schedule.
+ * lr = etaMin + (baseLr - etaMin) * (1 + cos(π * epoch / T_max)) / 2
+ *
+ * @example
+ * ```ts
+ * const scheduler = new CosineAnnealingLR(optimizer, { T_max: 100, etaMin: 0.001 });
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.CosineAnnealingLR.html | PyTorch CosineAnnealingLR}
+ */
+declare class CosineAnnealingLR extends LRScheduler {
+    private T_max;
+    private etaMin;
+    constructor(optimizer: SchedulerOptimizer, options: {
+        T_max: number;
+        etaMin?: number;
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+}
+/**
+ * Multi-step learning rate scheduler.
+ *
+ * Decays the learning rate by gamma once the epoch reaches one of the milestones.
+ *
+ * @example
+ * ```ts
+ * const scheduler = new MultiStepLR(optimizer, { milestones: [30, 80], gamma: 0.1 });
+ * // lr = 0.1 for epochs 0-29
+ * // lr = 0.01 for epochs 30-79
+ * // lr = 0.001 for epochs 80+
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.MultiStepLR.html | PyTorch MultiStepLR}
+ */
+declare class MultiStepLR extends LRScheduler {
+    private sortedMilestones;
+    private gamma;
+    constructor(optimizer: SchedulerOptimizer, options: {
+        milestones: number[];
+        gamma?: number;
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+}
+/**
+ * Linear learning rate scheduler.
+ *
+ * Linearly interpolates the learning rate multiplicative factor from startFactor
+ * to endFactor over totalIters epochs. After totalIters, the factor remains at endFactor.
+ *
+ * lr = baseLr * (startFactor + (endFactor - startFactor) * epoch / totalIters)
+ *
+ * @example
+ * ```ts
+ * const scheduler = new LinearLR(optimizer, {
+ *   startFactor: 0.1,
+ *   endFactor: 0.01,
+ *   totalIters: 100
+ * });
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.LinearLR.html | PyTorch LinearLR}
+ */
+declare class LinearLR extends LRScheduler {
+    private startFactor;
+    private endFactor;
+    private totalIters;
+    constructor(optimizer: SchedulerOptimizer, options: {
+        startFactor?: number;
+        endFactor?: number;
+        totalIters: number;
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+}
+/**
+ * Reduce learning rate on plateau.
+ *
+ * Reduces learning rate when a metric has stopped improving.
+ * This scheduler reads a metric value and if no improvement is seen
+ * for 'patience' epochs, the learning rate is reduced.
+ *
+ * @example
+ * ```ts
+ * const scheduler = new ReduceLROnPlateau(optimizer, {
+ *   mode: 'min',
+ *   factor: 0.1,
+ *   patience: 10
+ * });
+ *
+ * for (let epoch = 0; epoch < 100; epoch++) {
+ *   const valLoss = validate();
+ *   scheduler.step(valLoss);
+ * }
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html | PyTorch ReduceLROnPlateau}
+ */
+declare class ReduceLROnPlateau {
+    private optimizer;
+    private mode;
+    private factor;
+    private patience;
+    private threshold;
+    private cooldown;
+    private minLr;
+    private best;
+    private numBadEpochs;
+    private cooldownCounter;
+    constructor(optimizer: SchedulerOptimizer, options?: {
+        mode?: "min" | "max";
+        factor?: number;
+        patience?: number;
+        threshold?: number;
+        cooldown?: number;
+        minLr?: number;
+    });
+    /**
+     * Check if metric improved.
+     */
+    private isBetter;
+    /**
+     * Perform a scheduler step based on the metric value.
+     *
+     * @param metric - Current value of the metric being monitored
+     */
+    step(metric: number): void;
+    /**
+     * Reduce learning rate for all parameter groups.
+     */
+    private reduceLr;
+    /**
+     * Get the current learning rates for all parameter groups.
+     */
+    getLastLr(): number[];
+}
+/**
+ * Warmup scheduler that wraps another scheduler.
+ *
+ * Linearly increases the learning rate from 0 to the base lr over warmupEpochs,
+ * then delegates to the wrapped scheduler.
+ *
+ * @example
+ * ```ts
+ * const baseScheduler = new CosineAnnealingLR(optimizer, { T_max: 100 });
+ * const scheduler = new WarmupLR(optimizer, baseScheduler, { warmupEpochs: 5 });
+ * ```
+ */
+declare class WarmupLR extends LRScheduler {
+    private warmupEpochs;
+    private afterScheduler;
+    constructor(optimizer: SchedulerOptimizer, afterScheduler: LRScheduler | null, options: {
+        warmupEpochs: number;
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+    step(): void;
+}
+/**
+ * One-cycle learning rate scheduler.
+ *
+ * Implements the 1cycle policy: lr starts at maxLr/divFactor, increases to maxLr
+ * over pctStart of the training, then decreases to maxLr/finalDivFactor.
+ *
+ * @example
+ * ```ts
+ * const scheduler = new OneCycleLR(optimizer, {
+ *   maxLr: 0.1,
+ *   totalSteps: 1000,
+ *   pctStart: 0.3
+ * });
+ * ```
+ *
+ * @see {@link https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.OneCycleLR.html | PyTorch OneCycleLR}
+ */
+declare class OneCycleLR extends LRScheduler {
+    private maxLr;
+    private totalSteps;
+    private pctStart;
+    private divFactor;
+    private finalDivFactor;
+    private annealStrategy;
+    constructor(optimizer: SchedulerOptimizer, options: {
+        maxLr: number;
+        totalSteps: number;
+        pctStart?: number;
+        divFactor?: number;
+        finalDivFactor?: number;
+        annealStrategy?: "cos" | "linear";
+        lastEpoch?: number;
+    });
+    getLr(): number[];
+}
+type index_AdaDelta = AdaDelta;
+declare const index_AdaDelta: typeof AdaDelta;
+type index_Adagrad = Adagrad;
+declare const index_Adagrad: typeof Adagrad;
+type index_Adam = Adam;
+declare const index_Adam: typeof Adam;
+type index_AdamW = AdamW;
+declare const index_AdamW: typeof AdamW;
+type index_CosineAnnealingLR = CosineAnnealingLR;
+declare const index_CosineAnnealingLR: typeof CosineAnnealingLR;
+type index_ExponentialLR = ExponentialLR;
+declare const index_ExponentialLR: typeof ExponentialLR;
+type index_LRScheduler = LRScheduler;
+declare const index_LRScheduler: typeof LRScheduler;
+type index_LinearLR = LinearLR;
+declare const index_LinearLR: typeof LinearLR;
+type index_MultiStepLR = MultiStepLR;
+declare const index_MultiStepLR: typeof MultiStepLR;
+type index_Nadam = Nadam;
+declare const index_Nadam: typeof Nadam;
+type index_OneCycleLR = OneCycleLR;
+declare const index_OneCycleLR: typeof OneCycleLR;
+type index_Optimizer<Options extends Record<string, unknown>, State extends Record<string, unknown>> = Optimizer<Options, State>;
+declare const index_Optimizer: typeof Optimizer;
+type index_ParamGroup<Options extends Record<string, unknown>> = ParamGroup<Options>;
+type index_RMSprop = RMSprop;
+declare const index_RMSprop: typeof RMSprop;
+type index_ReduceLROnPlateau = ReduceLROnPlateau;
+declare const index_ReduceLROnPlateau: typeof ReduceLROnPlateau;
+type index_SGD = SGD;
+declare const index_SGD: typeof SGD;
+type index_StepLR = StepLR;
+declare const index_StepLR: typeof StepLR;
+type index_WarmupLR = WarmupLR;
+declare const index_WarmupLR: typeof WarmupLR;
+declare namespace index {
+  export { index_AdaDelta as AdaDelta, index_Adagrad as Adagrad, index_Adam as Adam, index_AdamW as AdamW, index_CosineAnnealingLR as CosineAnnealingLR, index_ExponentialLR as ExponentialLR, index_LRScheduler as LRScheduler, index_LinearLR as LinearLR, index_MultiStepLR as MultiStepLR, index_Nadam as Nadam, index_OneCycleLR as OneCycleLR, index_Optimizer as Optimizer, type index_ParamGroup as ParamGroup, index_RMSprop as RMSprop, index_ReduceLROnPlateau as ReduceLROnPlateau, index_SGD as SGD, index_StepLR as StepLR, index_WarmupLR as WarmupLR };
+}
+export { AdaDelta as A, CosineAnnealingLR as C, ExponentialLR as E, LinearLR as L, MultiStepLR as M, Nadam as N, Optimizer as O, type ParamGroup as P, RMSprop as R, SGD as S, WarmupLR as W, Adagrad as a, Adam as b, AdamW as c, LRScheduler as d, OneCycleLR as e, ReduceLROnPlateau as f, StepLR as g, index as i };