npm - catniff - Versions diffs - 0.5.9 → 0.5.11 - Mend

catniff 0.5.9 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -38,7 +38,7 @@ export declare class Tensor {
     static elementWiseSelf(tA: Tensor, op: (tA: number) => number): Tensor;
     elementWiseABDAG(other: TensorValue | Tensor, op: (a: number, b: number) => number, thisGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor, otherGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor): Tensor;
     elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
-    static forceTensor(value: TensorValue | Tensor): Tensor;
+    handleOther(other: Tensor | TensorValue): Tensor;
     static addGrad(tensor: Tensor, accumGrad: Tensor): void;
     isContiguous(): boolean;
     contiguous(): Tensor;
@@ -50,6 +50,8 @@ export declare class Tensor {
     mean(dims?: number[] | number, keepDims?: boolean): Tensor;
     max(dims?: number[] | number, keepDims?: boolean): Tensor;
     min(dims?: number[] | number, keepDims?: boolean): Tensor;
+    all(dims?: number[] | number, keepDims?: boolean): Tensor;
+    any(dims?: number[] | number, keepDims?: boolean): Tensor;
     var(dims?: number[] | number, keepDims?: boolean): Tensor;
     std(dims?: number[] | number, keepDims?: boolean): Tensor;
     softmax(dims?: number[] | number): Tensor;
@@ -174,7 +176,7 @@ export declare class Tensor {
     withGrad(requiresGrad: boolean): Tensor;
     detach(): Tensor;
     clone(): Tensor;
-    replace(other: Tensor, allowShapeMismatch?: boolean): Tensor;
+    replace(other: Tensor | TensorValue, allowShapeMismatch?: boolean): Tensor;
     static backends: Map<string, Backend>;
     to(device: string): Tensor;
     to_(device: string): Tensor;

package/dist/core.js CHANGED Viewed

@@ -183,7 +183,7 @@ class Tensor {
     }
     // Utility to do element-wise operation and build a dag node with another tensor
     elementWiseABDAG(other, op, thisGrad = () => new Tensor(0), otherGrad = () => new Tensor(0)) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         const out = Tensor.elementWiseAB(this, other, op);
         if (this.requiresGrad) {
             out.requiresGrad = true;
@@ -225,11 +225,15 @@ class Tensor {
         }
         return out;
     }
-    // Utility to force an input value to be a tensor
-    static forceTensor(value) {
-        if (value instanceof Tensor)
-            return value;
-        return new Tensor(value);
+    // Utility to handle other tensor if an op needs a second operand
+    handleOther(other) {
+        if (other instanceof Tensor) {
+            if (this.device !== other.device) {
+                throw new Error("Can not operate on tensors that are not on the same device");
+            }
+            return other;
+        }
+        return new Tensor(other, { device: this.device });
     }
     // Utility to add to gradient of tensor
     static addGrad(tensor, accumGrad) {
@@ -428,9 +432,9 @@ class Tensor {
         }
         // Calculate new value after sum
         for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
-            const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+            const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+            outCoords[dims] = 0;
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
             // Add into sum
@@ -479,9 +483,9 @@ class Tensor {
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate new value after multiplying
         for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
-            const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+            const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+            outCoords[dims] = 0;
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
             // Multiply into product
@@ -498,9 +502,9 @@ class Tensor {
             out.gradFn = () => {
                 const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
                 for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
                     // Force 0 on reduced axes to collapse into size-1 dims
-                    const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+                    const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    outCoords[dims] = 0;
                     // Convert output coordinates to flat index
                     const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
                     // Grad is the product of other elements of the same axis, which is product of all els divided by the current value
@@ -537,9 +541,9 @@ class Tensor {
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate sums and how many elements contribute to specific positions
         for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
-            const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+            const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+            outCoords[dims] = 0;
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
             // Calculate sum and contributors to the sum
@@ -562,9 +566,9 @@ class Tensor {
                 const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
                 // Calculate grad by assigning 1 divided by the number of contributors to the position
                 for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
                     // Force 0 on reduced axes to collapse into size-1 dims
-                    const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+                    const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    outCoords[dims] = 0;
                     // Convert output coordinates to flat index
                     const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
                     // Mean = 1/n * (el1 + el2 + ... + eln) so grad = 1/n
@@ -600,9 +604,9 @@ class Tensor {
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate maximum values of axes
         for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
-            const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+            const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+            outCoords[dims] = 0;
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
             // Get max over time
@@ -623,18 +627,18 @@ class Tensor {
                 const shareCounts = new Array(outputSize).fill(0);
                 const originalValue = this.value;
                 for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
                     // Force 0 on reduced axes to collapse into size-1 dims
-                    const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+                    const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    outCoords[dims] = 0;
                     // Convert output coordinates to flat index
                     const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
                     // We collect how many elements share the same max value first
                     shareCounts[outFlatIndex] += outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 : 0;
                 }
                 for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
                     // Force 0 on reduced axes to collapse into size-1 dims
-                    const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+                    const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    outCoords[dims] = 0;
                     // Convert output coordinates to flat index
                     const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
                     // Here we share the grad between the elements that share the same max value
@@ -670,9 +674,9 @@ class Tensor {
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate minimum values of axes
         for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
-            const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+            const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+            outCoords[dims] = 0;
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
             // Get min over time
@@ -693,18 +697,18 @@ class Tensor {
                 const shareCounts = new Array(outputSize).fill(0);
                 const originalValue = this.value;
                 for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
                     // Force 0 on reduced axes to collapse into size-1 dims
-                    const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+                    const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    outCoords[dims] = 0;
                     // Convert output coordinates to flat index
                     const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
                     // We collect how many elements share the same min value first
                     shareCounts[outFlatIndex] += outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 : 0;
                 }
                 for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
-                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
                     // Force 0 on reduced axes to collapse into size-1 dims
-                    const outCoords = coords.map((val, i) => dims === i ? 0 : val);
+                    const outCoords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    outCoords[dims] = 0;
                     // Convert output coordinates to flat index
                     const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
                     // Here we share the grad between the elements that share the same min value
@@ -716,6 +720,14 @@ class Tensor {
         }
         return keepDims ? out : out.squeeze(dims);
     }
+    // Tensor all condition reduction
+    all(dims, keepDims = false) {
+        return this.min(dims, keepDims).ne(0);
+    }
+    // Tensor any condition reduction
+    any(dims, keepDims = false) {
+        return this.max(dims, keepDims).ne(0);
+    }
     // Tensor variance reduction
     var(dims, keepDims = false) {
         const meanXSquared = this.square().mean(dims, keepDims);
@@ -1199,7 +1211,7 @@ class Tensor {
     }
     // 1D tensor dot product
     dot(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 1D shape
         if (this.shape.length !== 1 || other.shape.length !== 1) {
             throw new Error("Inputs are not 1D tensors");
@@ -1237,7 +1249,7 @@ class Tensor {
     }
     // Matrix multiplication
     mm(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 2D shape
         if (this.shape.length !== 2 || other.shape.length !== 2) {
             throw new Error("Inputs are not matrices");
@@ -1292,7 +1304,7 @@ class Tensor {
     }
     // Batched 3D tensor matmul
     bmm(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 3D shape
         if (this.shape.length !== 3 || other.shape.length !== 3 || this.shape[0] !== other.shape[0]) {
             throw new Error("Inputs are not 3D tensors with the same first dim size");
@@ -1350,7 +1362,7 @@ class Tensor {
     }
     // Convert right-side 1D tensor to a vector (nx1 tensor) to do matmul
     mv(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         // Verify 2D shape
         if (this.shape.length !== 2 || other.shape.length !== 1) {
             throw new Error("Input is not a 2D and 1D tensor pair");
@@ -1359,7 +1371,7 @@ class Tensor {
     }
     // General matrix multiplication with different shapes
     matmul(other) {
-        other = Tensor.forceTensor(other);
+        other = this.handleOther(other);
         const isThis1D = this.shape.length === 1;
         const isOther1D = other.shape.length === 1;
         if (isThis1D && isOther1D) {
@@ -1692,6 +1704,7 @@ class Tensor {
     }
     // Returns this tensor with value replaced with the value of another tensor
     replace(other, allowShapeMismatch = false) {
+        other = this.handleOther(other);
         // Verify shape
         if (!allowShapeMismatch) {
             for (let index = 0; index < this.shape.length; index++) {

package/dist/nn.d.ts CHANGED Viewed

@@ -55,7 +55,7 @@ declare class LayerNorm {
     eps: number;
     normalizedShape: number[];
     constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, bias?: boolean, device?: string);
-    forward(input: Tensor | TensorValue): Tensor;
+    forward(input: Tensor): Tensor;
 }
 export interface StateDict {
     [key: string]: any;
@@ -68,6 +68,7 @@ export declare const nn: {
     LayerNorm: typeof LayerNorm;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];
+        moveParameters(model: any, device: string): void;
         getStateDict(model: any, prefix?: string, visited?: WeakSet<object>): StateDict;
         loadStateDict(model: any, stateDict: StateDict, prefix?: string, visited?: WeakSet<object>): void;
     };

package/dist/nn.js CHANGED Viewed

@@ -20,7 +20,7 @@ class Linear {
         }
     }
     forward(input) {
-        input = core_1.Tensor.forceTensor(input);
+        input = this.weight.handleOther(input);
         return linearTransform(input, this.weight, this.bias);
     }
 }
@@ -49,8 +49,8 @@ class RNNCell {
         }
     }
     forward(input, hidden) {
-        input = core_1.Tensor.forceTensor(input);
-        hidden = core_1.Tensor.forceTensor(hidden);
+        input = this.weightIH.handleOther(input);
+        hidden = this.weightHH.handleOther(hidden);
         return rnnTransform(input, hidden, this.weightIH, this.weightHH, this.biasIH, this.biasHH).tanh();
     }
 }
@@ -85,8 +85,8 @@ class GRUCell {
         }
     }
     forward(input, hidden) {
-        input = core_1.Tensor.forceTensor(input);
-        hidden = core_1.Tensor.forceTensor(hidden);
+        input = this.weightIN.handleOther(input);
+        hidden = this.weightHN.handleOther(hidden);
         const r = rnnTransform(input, hidden, this.weightIR, this.weightHR, this.biasIR, this.biasHR).sigmoid();
         const z = rnnTransform(input, hidden, this.weightIZ, this.weightHZ, this.biasIZ, this.biasHZ).sigmoid();
         const n = linearTransform(input, this.weightIN, this.biasIN).add(r.mul(linearTransform(hidden, this.weightHN, this.biasHN))).tanh();
@@ -132,9 +132,9 @@ class LSTMCell {
         }
     }
     forward(input, hidden, cell) {
-        input = core_1.Tensor.forceTensor(input);
-        hidden = core_1.Tensor.forceTensor(hidden);
-        cell = core_1.Tensor.forceTensor(cell);
+        input = this.weightII.handleOther(input);
+        hidden = this.weightHI.handleOther(hidden);
+        cell = this.weightHI.handleOther(cell);
         const i = rnnTransform(input, hidden, this.weightII, this.weightHI, this.biasII, this.biasHI).sigmoid();
         const f = rnnTransform(input, hidden, this.weightIF, this.weightHF, this.biasIF, this.biasHF).sigmoid();
         const g = rnnTransform(input, hidden, this.weightIG, this.weightHG, this.biasIG, this.biasHG).tanh();
@@ -163,7 +163,6 @@ class LayerNorm {
         }
     }
     forward(input) {
-        input = core_1.Tensor.forceTensor(input);
         // Normalize over the specified dimensions
         const normalizedDims = this.normalizedShape.length;
         const startDim = input.shape.length - normalizedDims;
@@ -208,6 +207,12 @@ const state = {
         }
         return parameters;
     },
+    moveParameters(model, device) {
+        const params = state.getParameters(model);
+        for (const param of params) {
+            param.to_(device);
+        }
+    },
     getStateDict(model, prefix = "", visited = new WeakSet()) {
         if (visited.has(model))
             return {};

package/dist/optim.d.ts CHANGED Viewed

@@ -38,9 +38,27 @@ declare class Adam extends BaseOptimizer {
     constructor(params: Tensor[], options?: AdamOptions);
     step(): void;
 }
+export interface AdamWOptions {
+    lr?: number;
+    betas?: [number, number];
+    eps?: number;
+    weightDecay?: number;
+}
+declare class AdamW extends BaseOptimizer {
+    momentumBuffers: Map<Tensor, Tensor>;
+    velocityBuffers: Map<Tensor, Tensor>;
+    stepCount: number;
+    lr: number;
+    betas: [number, number];
+    eps: number;
+    weightDecay: number;
+    constructor(params: Tensor[], options?: AdamWOptions);
+    step(): void;
+}
 export declare class Optim {
     static BaseOptimizer: typeof BaseOptimizer;
     static SGD: typeof SGD;
     static Adam: typeof Adam;
+    static AdamW: typeof AdamW;
 }
 export {};

package/dist/optim.js CHANGED Viewed

@@ -126,9 +126,68 @@ class Adam extends BaseOptimizer {
         }
     }
 }
+class AdamW extends BaseOptimizer {
+    momentumBuffers = new Map(); // First moment (m_t)
+    velocityBuffers = new Map(); // Second moment (v_t)
+    stepCount = 0;
+    lr;
+    betas;
+    eps;
+    weightDecay;
+    constructor(params, options) {
+        super(params);
+        this.lr = options?.lr || 0.001;
+        this.betas = options?.betas || [0.9, 0.999];
+        this.eps = options?.eps || 1e-8;
+        this.weightDecay = options?.weightDecay || 0;
+    }
+    step() {
+        this.stepCount++;
+        const beta1 = this.betas[0];
+        const beta2 = this.betas[1];
+        // Bias correction factors
+        const biasCorrection1 = 1 - Math.pow(beta1, this.stepCount);
+        const biasCorrection2 = 1 - Math.pow(beta2, this.stepCount);
+        for (const param of this.params) {
+            if (!param.grad || !param.requiresGrad)
+                continue;
+            let grad = param.grad.detach(), detachedParam = param.detach();
+            // Apply weight decay (L2 regularization)
+            detachedParam = detachedParam.sub(detachedParam.mul(this.weightDecay).mul(this.lr));
+            // Get or initialize first moment buffer (momentum)
+            let momentumBuffer = this.momentumBuffers.get(param);
+            if (!momentumBuffer) {
+                momentumBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                this.momentumBuffers.set(param, momentumBuffer);
+            }
+            // Get or initialize second moment buffer (velocity)
+            let velocityBuffer = this.velocityBuffers.get(param);
+            if (!velocityBuffer) {
+                velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
+                this.velocityBuffers.set(param, velocityBuffer);
+            }
+            // Update biased first moment estimate: m_t = β1 * m_{t-1} + (1 - β1) * g_t
+            momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
+            this.momentumBuffers.set(param, momentumBuffer);
+            // Update biased second moment estimate: v_t = β2 * v_{t-1} + (1 - β2) * g_t^2
+            velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
+            this.velocityBuffers.set(param, velocityBuffer);
+            // Compute bias-corrected first moment: m̂_t = m_t / (1 - β1^t)
+            const correctedMomentum = momentumBuffer.div(biasCorrection1);
+            // Compute bias-corrected second moment: v̂_t = v_t / (1 - β2^t)
+            const correctedVelocity = velocityBuffer.div(biasCorrection2);
+            // Update parameters: θ_t = θ_t - α * m̂_t / (√v̂_t + ε)
+            const denom = correctedVelocity.sqrt().add(this.eps);
+            const stepSize = correctedMomentum.div(denom).mul(this.lr);
+            const newParam = detachedParam.sub(stepSize);
+            param.replace(newParam);
+        }
+    }
+}
 class Optim {
     static BaseOptimizer = BaseOptimizer;
     static SGD = SGD;
     static Adam = Adam;
+    static AdamW = AdamW;
 }
 exports.Optim = Optim;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.5.9",
+  "version": "0.5.11",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {