npm - catniff - Versions diffs - 0.5.9 → 0.5.10 - Mend

catniff 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -38,7 +38,7 @@ export declare class Tensor {
     static elementWiseSelf(tA: Tensor, op: (tA: number) => number): Tensor;
     elementWiseABDAG(other: TensorValue | Tensor, op: (a: number, b: number) => number, thisGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor, otherGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor): Tensor;
     elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
-    static forceTensor(value: TensorValue | Tensor): Tensor;
+    handleOther(other: Tensor | TensorValue): Tensor;
     static addGrad(tensor: Tensor, accumGrad: Tensor): void;
     isContiguous(): boolean;
     contiguous(): Tensor;
@@ -174,7 +174,7 @@ export declare class Tensor {
     withGrad(requiresGrad: boolean): Tensor;
     detach(): Tensor;
     clone(): Tensor;
-    replace(other: Tensor, allowShapeMismatch?: boolean): Tensor;
+    replace(other: Tensor | TensorValue, allowShapeMismatch?: boolean): Tensor;
     static backends: Map<string, Backend>;
     to(device: string): Tensor;
     to_(device: string): Tensor;

package/dist/core.js CHANGED Viewed

@@ -183,7 +183,7 @@ class Tensor {
     }
     // Utility to do element-wise operation and build a dag node with another tensor
     elementWiseABDAG(other, op, thisGrad = () => new Tensor(0), otherGrad = () => new Tensor(0)) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         const out = Tensor.elementWiseAB(this, other, op);
         if (this.requiresGrad) {
             out.requiresGrad = true;
@@ -225,11 +225,15 @@ class Tensor {
         }
         return out;
     }
-    // Utility to force an input value to be a tensor
-    static forceTensor(value) {
-        if (value instanceof Tensor)
-            return value;
-        return new Tensor(value);
+    // Utility to handle other tensor if an op needs a second operand
+    handleOther(other) {
+        if (other instanceof Tensor) {
+            if (this.device !== other.device) {
+                throw new Error("Can not operate on tensors that are not on the same device");
+            }
+            return other;
+        }
+        return new Tensor(other, { device: this.device });
     }
     // Utility to add to gradient of tensor
     static addGrad(tensor, accumGrad) {
@@ -1199,7 +1203,7 @@ class Tensor {
     }
     // 1D tensor dot product
     dot(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 1D shape
         if (this.shape.length !== 1 || other.shape.length !== 1) {
             throw new Error("Inputs are not 1D tensors");
@@ -1237,7 +1241,7 @@ class Tensor {
     }
     // Matrix multiplication
     mm(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 2D shape
         if (this.shape.length !== 2 || other.shape.length !== 2) {
             throw new Error("Inputs are not matrices");
@@ -1292,7 +1296,7 @@ class Tensor {
     }
     // Batched 3D tensor matmul
     bmm(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 3D shape
         if (this.shape.length !== 3 || other.shape.length !== 3 || this.shape[0] !== other.shape[0]) {
             throw new Error("Inputs are not 3D tensors with the same first dim size");
@@ -1350,7 +1354,7 @@ class Tensor {
     }
     // Convert right-side 1D tensor to a vector (nx1 tensor) to do matmul
     mv(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 2D shape
         if (this.shape.length !== 2 || other.shape.length !== 1) {
             throw new Error("Input is not a 2D and 1D tensor pair");
@@ -1359,7 +1363,7 @@ class Tensor {
     }
     // General matrix multiplication with different shapes
     matmul(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         const isThis1D = this.shape.length === 1;
         const isOther1D = other.shape.length === 1;
         if (isThis1D && isOther1D) {
@@ -1692,6 +1696,7 @@ class Tensor {
     }
     // Returns this tensor with value replaced with the value of another tensor
     replace(other, allowShapeMismatch = false) {
+        other = this.handleOther(other);
         // Verify shape
         if (!allowShapeMismatch) {
             for (let index = 0; index < this.shape.length; index++) {

package/dist/nn.d.ts CHANGED Viewed

@@ -55,7 +55,7 @@ declare class LayerNorm {
     eps: number;
     normalizedShape: number[];
     constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, bias?: boolean, device?: string);
-    forward(input: Tensor | TensorValue): Tensor;
+    forward(input: Tensor): Tensor;
 }
 export interface StateDict {
     [key: string]: any;
@@ -68,6 +68,7 @@ export declare const nn: {
     LayerNorm: typeof LayerNorm;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];
+        moveParameters(model: any, device: string): void;
         getStateDict(model: any, prefix?: string, visited?: WeakSet<object>): StateDict;
         loadStateDict(model: any, stateDict: StateDict, prefix?: string, visited?: WeakSet<object>): void;
     };

package/dist/nn.js CHANGED Viewed

@@ -20,7 +20,7 @@ class Linear {
         }
     }
     forward(input) {
-        input = core_1.Tensor.forceTensor(input);
+        input = this.weight.handleOther(input);
         return linearTransform(input, this.weight, this.bias);
     }
 }
@@ -49,8 +49,8 @@ class RNNCell {
         }
     }
     forward(input, hidden) {
-        input = core_1.Tensor.forceTensor(input);
-        hidden = core_1.Tensor.forceTensor(hidden);
+        input = this.weightIH.handleOther(input);
+        hidden = this.weightHH.handleOther(hidden);
         return rnnTransform(input, hidden, this.weightIH, this.weightHH, this.biasIH, this.biasHH).tanh();
     }
 }
@@ -85,8 +85,8 @@ class GRUCell {
         }
     }
     forward(input, hidden) {
-        input = core_1.Tensor.forceTensor(input);
-        hidden = core_1.Tensor.forceTensor(hidden);
+        input = this.weightIN.handleOther(input);
+        hidden = this.weightHN.handleOther(hidden);
         const r = rnnTransform(input, hidden, this.weightIR, this.weightHR, this.biasIR, this.biasHR).sigmoid();
         const z = rnnTransform(input, hidden, this.weightIZ, this.weightHZ, this.biasIZ, this.biasHZ).sigmoid();
         const n = linearTransform(input, this.weightIN, this.biasIN).add(r.mul(linearTransform(hidden, this.weightHN, this.biasHN))).tanh();
@@ -132,9 +132,9 @@ class LSTMCell {
         }
     }
     forward(input, hidden, cell) {
-        input = core_1.Tensor.forceTensor(input);
-        hidden = core_1.Tensor.forceTensor(hidden);
-        cell = core_1.Tensor.forceTensor(cell);
+        input = this.weightII.handleOther(input);
+        hidden = this.weightHI.handleOther(hidden);
+        cell = this.weightHI.handleOther(cell);
         const i = rnnTransform(input, hidden, this.weightII, this.weightHI, this.biasII, this.biasHI).sigmoid();
         const f = rnnTransform(input, hidden, this.weightIF, this.weightHF, this.biasIF, this.biasHF).sigmoid();
         const g = rnnTransform(input, hidden, this.weightIG, this.weightHG, this.biasIG, this.biasHG).tanh();
@@ -163,7 +163,6 @@ class LayerNorm {
         }
     }
     forward(input) {
-        input = core_1.Tensor.forceTensor(input);
         // Normalize over the specified dimensions
         const normalizedDims = this.normalizedShape.length;
         const startDim = input.shape.length - normalizedDims;
@@ -208,6 +207,12 @@ const state = {
         }
         return parameters;
     },
+    moveParameters(model, device) {
+        const params = state.getParameters(model);
+        for (const param of params) {
+            param.to_(device);
+        }
+    },
     getStateDict(model, prefix = "", visited = new WeakSet()) {
         if (visited.has(model))
             return {};

package/dist/optim.d.ts CHANGED Viewed

@@ -38,9 +38,27 @@ declare class Adam extends BaseOptimizer {
     constructor(params: Tensor[], options?: AdamOptions);
     step(): void;
 }
+export interface AdamWOptions {
+    lr?: number;
+    betas?: [number, number];
+    eps?: number;
+    weightDecay?: number;
+}
+declare class AdamW extends BaseOptimizer {
+    momentumBuffers: Map<Tensor, Tensor>;
+    velocityBuffers: Map<Tensor, Tensor>;
+    stepCount: number;
+    lr: number;
+    betas: [number, number];
+    eps: number;
+    weightDecay: number;
+    constructor(params: Tensor[], options?: AdamWOptions);
+    step(): void;
+}
 export declare class Optim {
     static BaseOptimizer: typeof BaseOptimizer;
     static SGD: typeof SGD;
     static Adam: typeof Adam;
+    static AdamW: typeof AdamW;
 }
 export {};

package/dist/optim.js CHANGED Viewed

@@ -126,9 +126,68 @@ class Adam extends BaseOptimizer {
         }
     }
 }
+class AdamW extends BaseOptimizer {
+    momentumBuffers = new Map(); // First moment (m_t)
+    velocityBuffers = new Map(); // Second moment (v_t)
+    stepCount = 0;
+    lr;
+    betas;
+    eps;
+    weightDecay;
+    constructor(params, options) {
+        super(params);
+        this.lr = options?.lr || 0.001;
+        this.betas = options?.betas || [0.9, 0.999];
+        this.eps = options?.eps || 1e-8;
+        this.weightDecay = options?.weightDecay || 0;
+    }
+    step() {
+        this.stepCount++;
+        const beta1 = this.betas[0];
+        const beta2 = this.betas[1];
+        // Bias correction factors
+        const biasCorrection1 = 1 - Math.pow(beta1, this.stepCount);
+        const biasCorrection2 = 1 - Math.pow(beta2, this.stepCount);
+        for (const param of this.params) {
+            if (!param.grad || !param.requiresGrad)
+                continue;
+            let grad = param.grad.detach(), detachedParam = param.detach();
+            // Apply weight decay (L2 regularization)
+            detachedParam = detachedParam.sub(detachedParam.mul(this.weightDecay).mul(this.lr));
+            // Get or initialize first moment buffer (momentum)
+            let momentumBuffer = this.momentumBuffers.get(param);
+            if (!momentumBuffer) {
+                momentumBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                this.momentumBuffers.set(param, momentumBuffer);
+            }
+            // Get or initialize second moment buffer (velocity)
+            let velocityBuffer = this.velocityBuffers.get(param);
+            if (!velocityBuffer) {
+                velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                this.velocityBuffers.set(param, velocityBuffer);
+            }
+            // Update biased first moment estimate: m_t = β1 * m_{t-1} + (1 - β1) * g_t
+            momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
+            this.momentumBuffers.set(param, momentumBuffer);
+            // Update biased second moment estimate: v_t = β2 * v_{t-1} + (1 - β2) * g_t^2
+            velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
+            this.velocityBuffers.set(param, velocityBuffer);
+            // Compute bias-corrected first moment: m̂_t = m_t / (1 - β1^t)
+            const correctedMomentum = momentumBuffer.div(biasCorrection1);
+            // Compute bias-corrected second moment: v̂_t = v_t / (1 - β2^t)
+            const correctedVelocity = velocityBuffer.div(biasCorrection2);
+            // Update parameters: θ_t = θ_t - α * m̂_t / (√v̂_t + ε)
+            const denom = correctedVelocity.sqrt().add(this.eps);
+            const stepSize = correctedMomentum.div(denom).mul(this.lr);
+            const newParam = detachedParam.sub(stepSize);
+            param.replace(newParam);
+        }
+    }
+}
 class Optim {
     static BaseOptimizer = BaseOptimizer;
     static SGD = SGD;
     static Adam = Adam;
+    static AdamW = AdamW;
 }
 exports.Optim = Optim;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.5.9",
+  "version": "0.5.10",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {