npm - catniff - Versions diffs - 0.8.13 → 0.8.15 - Mend

catniff 0.8.13 → 0.8.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -143,9 +143,8 @@ All available APIs are in [`./src/`](./src/) if you want to dig deeper.
 * More general tensor ops.
 * More general neural net APIs.
-* GPU acceleration.
-* Comprehensive caching.
-* Bug fixes.
+* GPU acceleration, possibly through WebGPU, Libtorch bindings, or CUDA.
+* Proper optimization.
 * More detailed documentation.
 * Code refactoring.
 * Proper tests.

package/dist/core.js CHANGED Viewed

@@ -20,21 +20,21 @@ class Tensor {
     static createGraph = false;
     constructor(value, options = {}) {
         // Memory buffer
-        this.dtype = options.dtype || "float32";
+        this.dtype = options.dtype ?? "float32";
         const flatValue = Tensor.flattenValue(value);
         const TypedArrayConstructor = dtype_1.TypedArray[this.dtype];
         this.value = flatValue instanceof TypedArrayConstructor ? flatValue : TypedArrayConstructor.from(flatValue);
         // Tensor metadata
-        this.shape = options.shape || Tensor.getShape(value);
-        this.strides = options.strides || Tensor.getStrides(this.shape);
-        this.offset = options.offset || 0;
-        this.numel = options.numel || Tensor.shapeToSize(this.shape);
-        this.device = options.device || "cpu";
+        this.shape = options.shape ?? Tensor.getShape(value);
+        this.strides = options.strides ?? Tensor.getStrides(this.shape);
+        this.offset = options.offset ?? 0;
+        this.numel = options.numel ?? Tensor.shapeToSize(this.shape);
+        this.device = options.device ?? "cpu";
         // Autograd data
         this.grad = options.grad;
         this.requiresGrad = options.requiresGrad ?? false;
-        this.gradFn = options.gradFn || (() => { });
-        this.children = options.children || [];
+        this.gradFn = options.gradFn ?? (() => { });
+        this.children = options.children ?? [];
         // Move to device in-place
         this.to_(this.device);
     }
@@ -622,14 +622,14 @@ class Tensor {
             return this;
         const newShape = [];
         const newStrides = [];
-        let newOffset = this.offset || 0;
+        let newOffset = this.offset;
         // Pad ranges to match tensor dimensions
         const paddedRanges = [...ranges];
         while (paddedRanges.length < this.shape.length) {
             paddedRanges.push([]);
         }
         for (let i = 0; i < this.shape.length; i++) {
-            const range = paddedRanges[i] || [];
+            const range = paddedRanges[i] ?? [];
             const dimSize = this.shape[i];
             const stride = this.strides[i];
             // Default values
@@ -675,7 +675,7 @@ class Tensor {
                     const originalCoords = new Array(slicedCoords.length);
                     for (let dim = 0; dim < slicedCoords.length; dim++) {
                         const coord = slicedCoords[dim];
-                        const range = paddedRanges[dim] || [];
+                        const range = paddedRanges[dim] ?? [];
                         const start = range[0] ?? 0;
                         const step = range[2] ?? 1;
                         const normalizedStart = start < 0 ? start + this.shape[dim] : start;
@@ -2504,7 +2504,7 @@ class Tensor {
                 visited.add(node);
                 // Reset grad to zeros if specified
                 if (zeroGrad) {
-                    node.grad = Tensor.zerosLike(node);
+                    delete node.grad;
                 }
                 for (let child of node.children)
                     build(child);

package/dist/lrscheduler.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
-import { BaseOptimizer } from "./optim";
+import { OptimizerWithLR } from "./optim";
 export declare class StepLR {
-    optimizer: BaseOptimizer;
+    optimizer: OptimizerWithLR;
     stepSize: number;
     gamma: number;
     lastEpoch: number;
     baseLR: number;
-    constructor(optimizer: BaseOptimizer, stepSize: number, gamma?: number, lastEpoch?: number);
+    baseGroupLRs: number[];
+    constructor(optimizer: OptimizerWithLR, stepSize: number, gamma?: number, lastEpoch?: number);
     step(epoch?: number): void;
 }
 export declare const LRScheduler: {

package/dist/lrscheduler.js CHANGED Viewed

@@ -7,12 +7,14 @@ class StepLR {
     gamma;
     lastEpoch;
     baseLR;
+    baseGroupLRs;
     constructor(optimizer, stepSize, gamma = 0.1, lastEpoch = -1) {
         this.optimizer = optimizer;
         this.stepSize = stepSize;
         this.gamma = gamma;
         this.lastEpoch = lastEpoch;
-        this.baseLR = this.optimizer.lr;
+        this.baseLR = optimizer.lr;
+        this.baseGroupLRs = this.optimizer.paramGroups.map(paramGroup => paramGroup.lr ?? this.optimizer.lr);
     }
     step(epoch) {
         if (typeof epoch === "undefined") {
@@ -22,6 +24,11 @@ class StepLR {
         else {
             this.lastEpoch = epoch;
         }
+        // Update LR of each group
+        for (let index = 0; index < this.baseGroupLRs.length; index++) {
+            this.optimizer.paramGroups[index].lr = this.baseGroupLRs[index] * this.gamma ** Math.floor(epoch / this.stepSize);
+        }
+        // Update default LR
         this.optimizer.lr = this.baseLR * this.gamma ** Math.floor(epoch / this.stepSize);
     }
 }

package/dist/optim.d.ts CHANGED Viewed

@@ -1,12 +1,15 @@
 import { Tensor } from "./core";
-export interface BaseOptimizerOptions {
-    lr?: number;
+export interface BaseParamGroup {
+    params: Tensor[];
+    [key: string]: any;
 }
 export declare abstract class BaseOptimizer {
-    params: Tensor[];
+    paramGroups: BaseParamGroup[];
+    constructor(params: Tensor[] | BaseParamGroup[]);
+    zeroGrad(del?: boolean): void;
+}
+export interface OptimizerWithLR extends BaseOptimizer {
     lr: number;
-    constructor(params: Tensor[], options?: BaseOptimizerOptions);
-    zeroGrad(): void;
 }
 export interface SGDOptions {
     lr?: number;
@@ -15,13 +18,18 @@ export interface SGDOptions {
     weightDecay?: number;
     nesterov?: boolean;
 }
+export interface SGDParamGroup extends SGDOptions {
+    params: Tensor[];
+}
 export declare class SGD extends BaseOptimizer {
-    momentumBuffers: Map<Tensor, Tensor>;
+    paramGroups: SGDParamGroup[];
+    lr: number;
     momentum: number;
     dampening: number;
     weightDecay: number;
     nesterov: boolean;
-    constructor(params: Tensor[], options?: SGDOptions);
+    momentumBuffers: Map<Tensor, Tensor>;
+    constructor(params: Tensor[] | SGDParamGroup[], options?: SGDOptions);
     step(): void;
 }
 export interface AdamOptions {
@@ -30,14 +38,19 @@ export interface AdamOptions {
     eps?: number;
     weightDecay?: number;
 }
+export interface AdamParamGroup extends AdamOptions {
+    params: Tensor[];
+}
 export declare class Adam extends BaseOptimizer {
-    momentumBuffers: Map<Tensor, Tensor>;
-    velocityBuffers: Map<Tensor, Tensor>;
-    stepCount: number;
+    paramGroups: AdamParamGroup[];
+    lr: number;
     betas: [number, number];
     eps: number;
     weightDecay: number;
-    constructor(params: Tensor[], options?: AdamOptions);
+    momentumBuffers: Map<Tensor, Tensor>;
+    velocityBuffers: Map<Tensor, Tensor>;
+    stepCounts: Map<Tensor, number>;
+    constructor(params: Tensor[] | AdamParamGroup[], options?: AdamOptions);
     step(): void;
 }
 export interface AdamWOptions {
@@ -46,14 +59,19 @@ export interface AdamWOptions {
     eps?: number;
     weightDecay?: number;
 }
+export interface AdamWParamGroup extends AdamWOptions {
+    params: Tensor[];
+}
 export declare class AdamW extends BaseOptimizer {
-    momentumBuffers: Map<Tensor, Tensor>;
-    velocityBuffers: Map<Tensor, Tensor>;
-    stepCount: number;
+    paramGroups: AdamWParamGroup[];
+    lr: number;
     betas: [number, number];
     eps: number;
     weightDecay: number;
-    constructor(params: Tensor[], options?: AdamWOptions);
+    momentumBuffers: Map<Tensor, Tensor>;
+    velocityBuffers: Map<Tensor, Tensor>;
+    stepCounts: Map<Tensor, number>;
+    constructor(params: Tensor[] | AdamWParamGroup[], options?: AdamWOptions);
     step(): void;
 }
 export declare const Optim: {

package/dist/optim.js CHANGED Viewed

@@ -3,183 +3,220 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.Optim = exports.AdamW = exports.Adam = exports.SGD = exports.BaseOptimizer = void 0;
 const core_1 = require("./core");
 class BaseOptimizer {
-    params;
-    lr;
-    constructor(params, options) {
-        this.params = params;
-        this.lr = options?.lr || 0.001;
+    paramGroups;
+    constructor(params) {
+        if (params[0] instanceof core_1.Tensor) {
+            this.paramGroups = [{ params: params }];
+        }
+        else {
+            this.paramGroups = params;
+        }
     }
-    zeroGrad() {
-        for (let index = 0; index < this.params.length; index++) {
-            const param = this.params[index];
-            param.grad = core_1.Tensor.zerosLike(param);
+    zeroGrad(del = true) {
+        for (let index = 0; index < this.paramGroups.length; index++) {
+            const paramGroup = this.paramGroups[index];
+            for (const param of paramGroup.params) {
+                if (del) {
+                    delete param.grad;
+                }
+                else {
+                    param.grad = core_1.Tensor.zerosLike(param);
+                }
+            }
         }
     }
 }
 exports.BaseOptimizer = BaseOptimizer;
 class SGD extends BaseOptimizer {
-    momentumBuffers = new Map();
+    lr;
     momentum;
     dampening;
     weightDecay;
     nesterov;
+    momentumBuffers = new Map();
     constructor(params, options) {
-        super(params, options);
-        this.momentum = options?.momentum || 0;
-        this.dampening = options?.dampening || 0;
-        this.weightDecay = options?.weightDecay || 0;
-        this.nesterov = options?.nesterov || false;
+        super(params);
+        this.lr = options?.lr ?? 0.001;
+        this.momentum = options?.momentum ?? 0;
+        this.dampening = options?.dampening ?? 0;
+        this.weightDecay = options?.weightDecay ?? 0;
+        this.nesterov = options?.nesterov ?? false;
     }
     step() {
-        for (const param of this.params) {
-            if (!param.grad || !param.requiresGrad)
-                continue;
-            let grad = param.grad.detach(), detachedParam = param.detach();
-            // Apply weight decay (L2 regularization)
-            if (this.weightDecay !== 0) {
-                grad = grad.add(detachedParam.mul(this.weightDecay));
-            }
-            // Apply momentum
-            if (this.momentum !== 0) {
-                let buf = this.momentumBuffers.get(param);
-                if (!buf) {
-                    // First time: initialize momentum buffer with current gradient
-                    buf = grad.clone();
-                    this.momentumBuffers.set(param, buf);
-                }
-                else {
-                    // Update momentum buffer: buf = momentum * buf + (1 - dampening) * grad
-                    buf = buf.mul(this.momentum).add(grad.mul(1 - this.dampening));
-                    this.momentumBuffers.set(param, buf);
-                }
-                if (this.nesterov) {
-                    // Nesterov momentum: grad = grad + momentum * buf
-                    grad = grad.add(buf.mul(this.momentum));
+        for (const paramGroup of this.paramGroups) {
+            const lr = paramGroup.lr ?? this.lr;
+            const momentum = paramGroup.momentum ?? this.momentum;
+            const dampening = paramGroup.dampening ?? this.dampening;
+            const weightDecay = paramGroup.weightDecay ?? this.weightDecay;
+            const nesterov = paramGroup.nesterov ?? this.nesterov;
+            for (const param of paramGroup.params) {
+                if (!param.grad || !param.requiresGrad)
+                    continue;
+                let grad = param.grad.detach(), detachedParam = param.detach();
+                // Apply weight decay (L2 regularization)
+                if (weightDecay !== 0) {
+                    grad = grad.add(detachedParam.mul(weightDecay));
                 }
-                else {
-                    // Standard momentum: use momentum buffer as gradient
-                    grad = buf;
+                // Apply momentum
+                if (momentum !== 0) {
+                    let buf = this.momentumBuffers.get(param);
+                    if (!buf) {
+                        // First time: initialize momentum buffer with current gradient
+                        buf = grad.clone();
+                        this.momentumBuffers.set(param, buf);
+                    }
+                    else {
+                        // Update momentum buffer: buf = momentum * buf + (1 - dampening) * grad
+                        buf = buf.mul(momentum).add(grad.mul(1 - dampening));
+                        this.momentumBuffers.set(param, buf);
+                    }
+                    if (nesterov) {
+                        // Nesterov momentum: grad = grad + momentum * buf
+                        grad = grad.add(buf.mul(momentum));
+                    }
+                    else {
+                        // Standard momentum: use momentum buffer as gradient
+                        grad = buf;
+                    }
                 }
+                // Update parameter: param = param - lr * grad
+                const newParam = detachedParam.sub(grad.mul(lr));
+                param.replace(newParam);
             }
-            // Update parameter: param = param - lr * grad
-            const newParam = detachedParam.sub(grad.mul(this.lr));
-            param.replace(newParam);
         }
     }
 }
 exports.SGD = SGD;
 class Adam extends BaseOptimizer {
-    momentumBuffers = new Map(); // First moment (m_t)
-    velocityBuffers = new Map(); // Second moment (v_t)
-    stepCount = 0;
+    lr;
     betas;
     eps;
     weightDecay;
+    momentumBuffers = new Map(); // First moment (m_t)
+    velocityBuffers = new Map(); // Second moment (v_t)
+    stepCounts = new Map();
     constructor(params, options) {
-        super(params, options);
-        this.betas = options?.betas || [0.9, 0.999];
-        this.eps = options?.eps || 1e-8;
-        this.weightDecay = options?.weightDecay || 0;
+        super(params);
+        this.lr = options?.lr ?? 0.001;
+        this.betas = options?.betas ?? [0.9, 0.999];
+        this.eps = options?.eps ?? 1e-8;
+        this.weightDecay = options?.weightDecay ?? 0;
     }
     step() {
-        this.stepCount++;
-        const beta1 = this.betas[0];
-        const beta2 = this.betas[1];
-        // Bias correction factors
-        const biasCorrection1 = 1 - Math.pow(beta1, this.stepCount);
-        const biasCorrection2 = 1 - Math.pow(beta2, this.stepCount);
-        for (const param of this.params) {
-            if (!param.grad || !param.requiresGrad)
-                continue;
-            let grad = param.grad.detach(), detachedParam = param.detach();
-            // Apply weight decay (L2 regularization)
-            if (this.weightDecay !== 0) {
-                grad = grad.add(detachedParam.mul(this.weightDecay));
-            }
-            // Get or initialize first moment buffer (momentum)
-            let momentumBuffer = this.momentumBuffers.get(param);
-            if (!momentumBuffer) {
-                momentumBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+        for (const paramGroup of this.paramGroups) {
+            const lr = paramGroup.lr ?? this.lr;
+            const betas = paramGroup.betas ?? this.betas;
+            const eps = paramGroup.eps ?? this.eps;
+            const weightDecay = paramGroup.weightDecay ?? this.weightDecay;
+            for (const param of paramGroup.params) {
+                if (!param.grad || !param.requiresGrad)
+                    continue;
+                // Get current step for param, initialize if has not step before
+                const stepCount = (this.stepCounts.get(param) ?? 0) + 1;
+                this.stepCounts.set(param, stepCount);
+                // Bias correction factors
+                const [beta1, beta2] = betas;
+                const biasCorrection1 = 1 - Math.pow(beta1, stepCount);
+                const biasCorrection2 = 1 - Math.pow(beta2, stepCount);
+                let grad = param.grad.detach(), detachedParam = param.detach();
+                // Apply weight decay (L2 regularization)
+                if (weightDecay !== 0) {
+                    grad = grad.add(detachedParam.mul(weightDecay));
+                }
+                // Get or initialize first moment buffer (momentum)
+                let momentumBuffer = this.momentumBuffers.get(param);
+                if (!momentumBuffer) {
+                    momentumBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                    this.momentumBuffers.set(param, momentumBuffer);
+                }
+                // Get or initialize second moment buffer (velocity)
+                let velocityBuffer = this.velocityBuffers.get(param);
+                if (!velocityBuffer) {
+                    velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                    this.velocityBuffers.set(param, velocityBuffer);
+                }
+                // Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
+                momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
                 this.momentumBuffers.set(param, momentumBuffer);
-            }
-            // Get or initialize second moment buffer (velocity)
-            let velocityBuffer = this.velocityBuffers.get(param);
-            if (!velocityBuffer) {
-                velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                // Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
+                velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
                 this.velocityBuffers.set(param, velocityBuffer);
+                // Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
+                const correctedMomentum = momentumBuffer.div(biasCorrection1);
+                // Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
+                const correctedVelocity = velocityBuffer.div(biasCorrection2);
+                // Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
+                const denom = correctedVelocity.sqrt().add(eps);
+                const stepSize = correctedMomentum.div(denom).mul(lr);
+                const newParam = detachedParam.sub(stepSize);
+                param.replace(newParam);
             }
-            // Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
-            momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
-            this.momentumBuffers.set(param, momentumBuffer);
-            // Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
-            velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
-            this.velocityBuffers.set(param, velocityBuffer);
-            // Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
-            const correctedMomentum = momentumBuffer.div(biasCorrection1);
-            // Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
-            const correctedVelocity = velocityBuffer.div(biasCorrection2);
-            // Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
-            const denom = correctedVelocity.sqrt().add(this.eps);
-            const stepSize = correctedMomentum.div(denom).mul(this.lr);
-            const newParam = detachedParam.sub(stepSize);
-            param.replace(newParam);
         }
     }
 }
 exports.Adam = Adam;
 class AdamW extends BaseOptimizer {
-    momentumBuffers = new Map(); // First moment (m_t)
-    velocityBuffers = new Map(); // Second moment (v_t)
-    stepCount = 0;
+    lr;
     betas;
     eps;
     weightDecay;
+    momentumBuffers = new Map(); // First moment (m_t)
+    velocityBuffers = new Map(); // Second moment (v_t)
+    stepCounts = new Map();
     constructor(params, options) {
-        super(params, options);
-        this.betas = options?.betas || [0.9, 0.999];
-        this.eps = options?.eps || 1e-8;
-        this.weightDecay = options?.weightDecay || 0.01;
+        super(params);
+        this.lr = options?.lr ?? 0.001;
+        this.betas = options?.betas ?? [0.9, 0.999];
+        this.eps = options?.eps ?? 1e-8;
+        this.weightDecay = options?.weightDecay ?? 0.01;
     }
     step() {
-        this.stepCount++;
-        const beta1 = this.betas[0];
-        const beta2 = this.betas[1];
-        // Bias correction factors
-        const biasCorrection1 = 1 - Math.pow(beta1, this.stepCount);
-        const biasCorrection2 = 1 - Math.pow(beta2, this.stepCount);
-        for (const param of this.params) {
-            if (!param.grad || !param.requiresGrad)
-                continue;
-            let grad = param.grad.detach(), detachedParam = param.detach();
-            // Apply weight decay (L2 regularization)
-            detachedParam = detachedParam.sub(detachedParam.mul(this.weightDecay).mul(this.lr));
-            // Get or initialize first moment buffer (momentum)
-            let momentumBuffer = this.momentumBuffers.get(param);
-            if (!momentumBuffer) {
-                momentumBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+        for (const paramGroup of this.paramGroups) {
+            const lr = paramGroup.lr ?? this.lr;
+            const betas = paramGroup.betas ?? this.betas;
+            const eps = paramGroup.eps ?? this.eps;
+            const weightDecay = paramGroup.weightDecay ?? this.weightDecay;
+            for (const param of paramGroup.params) {
+                if (!param.grad || !param.requiresGrad)
+                    continue;
+                // Get current step for param, initialize if has not step before
+                const stepCount = (this.stepCounts.get(param) ?? 0) + 1;
+                this.stepCounts.set(param, stepCount);
+                // Bias correction factors
+                const [beta1, beta2] = betas;
+                const biasCorrection1 = 1 - Math.pow(beta1, stepCount);
+                const biasCorrection2 = 1 - Math.pow(beta2, stepCount);
+                let grad = param.grad.detach(), detachedParam = param.detach();
+                // Apply weight decay (L2 regularization)
+                detachedParam = detachedParam.sub(detachedParam.mul(weightDecay).mul(lr));
+                // Get or initialize first moment buffer (momentum)
+                let momentumBuffer = this.momentumBuffers.get(param);
+                if (!momentumBuffer) {
+                    momentumBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                    this.momentumBuffers.set(param, momentumBuffer);
+                }
+                // Get or initialize second moment buffer (velocity)
+                let velocityBuffer = this.velocityBuffers.get(param);
+                if (!velocityBuffer) {
+                    velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                    this.velocityBuffers.set(param, velocityBuffer);
+                }
+                // Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
+                momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
                 this.momentumBuffers.set(param, momentumBuffer);
-            }
-            // Get or initialize second moment buffer (velocity)
-            let velocityBuffer = this.velocityBuffers.get(param);
-            if (!velocityBuffer) {
-                velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                // Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
+                velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
                 this.velocityBuffers.set(param, velocityBuffer);
+                // Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
+                const correctedMomentum = momentumBuffer.div(biasCorrection1);
+                // Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
+                const correctedVelocity = velocityBuffer.div(biasCorrection2);
+                // Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
+                const denom = correctedVelocity.sqrt().add(eps);
+                const stepSize = correctedMomentum.div(denom).mul(lr);
+                const newParam = detachedParam.sub(stepSize);
+                param.replace(newParam);
             }
-            // Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
-            momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
-            this.momentumBuffers.set(param, momentumBuffer);
-            // Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
-            velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
-            this.velocityBuffers.set(param, velocityBuffer);
-            // Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
-            const correctedMomentum = momentumBuffer.div(biasCorrection1);
-            // Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
-            const correctedVelocity = velocityBuffer.div(biasCorrection2);
-            // Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
-            const denom = correctedVelocity.sqrt().add(this.eps);
-            const stepSize = correctedMomentum.div(denom).mul(this.lr);
-            const newParam = detachedParam.sub(stepSize);
-            param.replace(newParam);
         }
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.8.13",
+  "version": "0.8.15",
   "description": "Torch-like deep learning framework for Javascript",
   "main": "./dist/index.js",
   "scripts": {