npm - catniff - Versions diffs - 0.2.14 → 0.2.16 - Mend

catniff 0.2.14 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -43,6 +43,7 @@ export declare class Tensor {
     mean(dims?: number[] | number, keepDims?: boolean): Tensor;
     max(dims?: number[] | number, keepDims?: boolean): Tensor;
     min(dims?: number[] | number, keepDims?: boolean): Tensor;
+    softmax(dims?: number[] | number): Tensor;
     add(other: TensorValue | Tensor): Tensor;
     sub(other: TensorValue | Tensor): Tensor;
     subtract: (other: TensorValue | Tensor) => Tensor;

package/dist/core.js CHANGED Viewed

@@ -355,14 +355,12 @@ class Tensor {
             gradValue = new Array(originalSize).fill(0);
         }
         // Calculate new value after sum
-        for (let index = 0; index < originalSize; index++) {
-            const coords = Tensor.indexToCoords(index, this.strides);
+        for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
             const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-            // Accumulate, outFlatIndex should match multiple realFlatIndexes
-            const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
             // Add into sum
             outputValue[outFlatIndex] += this.value[realFlatIndex];
             // Mark for gradient if needed
@@ -402,14 +400,12 @@ class Tensor {
         const outputValue = new Array(outputSize).fill(1);
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate new value after multiplying
-        for (let index = 0; index < originalSize; index++) {
-            const coords = Tensor.indexToCoords(index, this.strides);
+        for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
             const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-            // Accumulate, outFlatIndex should match multiple realFlatIndexes
-            const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
             // Multiply into product
             outputValue[outFlatIndex] *= this.value[realFlatIndex];
         }
@@ -419,21 +415,19 @@ class Tensor {
         });
         // Set up gradient if needed
         if (this.requiresGrad) {
-            const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
-            for (let index = 0; index < originalSize; index++) {
-                const coords = Tensor.indexToCoords(index, this.strides);
-                // Force 0 on reduced axes to collapse into size-1 dims
-                const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
-                // Convert output coordinates to flat index
-                const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-                // Accumulate, outFlatIndex should match multiple realFlatIndexes
-                const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
-                // Grad is the product of other elements of the same axis, which is product of all els divided by the current value
-                gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
-            }
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
+                const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
+                for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    // Force 0 on reduced axes to collapse into size-1 dims
+                    const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
+                    // Convert output coordinates to flat index
+                    const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
+                    // Grad is the product of other elements of the same axis, which is product of all els divided by the current value
+                    gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
+                }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
                 Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
             };
@@ -458,14 +452,12 @@ class Tensor {
         const outputFeeders = new Array(outputSize).fill(0);
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate sums and how many elements contribute to specific positions
-        for (let index = 0; index < originalSize; index++) {
-            const coords = Tensor.indexToCoords(index, this.strides);
+        for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
             const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-            // Accumulate, outFlatIndex should match multiple realFlatIndexes
-            const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
             // Calculate sum and contributors to the sum
             outputValue[outFlatIndex] += this.value[realFlatIndex];
             outputFeeders[outFlatIndex]++;
@@ -480,22 +472,20 @@ class Tensor {
         });
         // Set up gradient if needed
         if (this.requiresGrad) {
-            const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
-            // Calculate grad by assigning 1 divided by the number of contributors to the position
-            for (let index = 0; index < originalSize; index++) {
-                const coords = Tensor.indexToCoords(index, this.strides);
-                // Force 0 on reduced axes to collapse into size-1 dims
-                const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
-                // Convert output coordinates to flat index
-                const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-                // Accumulate, outFlatIndex should match multiple realFlatIndexes
-                const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
-                // Mean = 1/n * (el1 + el2 + ... + eln) so grad = 1/n
-                gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
-            }
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
+                const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
+                // Calculate grad by assigning 1 divided by the number of contributors to the position
+                for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    // Force 0 on reduced axes to collapse into size-1 dims
+                    const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
+                    // Convert output coordinates to flat index
+                    const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
+                    // Mean = 1/n * (el1 + el2 + ... + eln) so grad = 1/n
+                    gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
+                }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
                 Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
             };
@@ -519,14 +509,12 @@ class Tensor {
         const outputValue = new Array(outputSize).fill(-Infinity);
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate maximum values of axes
-        for (let index = 0; index < originalSize; index++) {
-            const coords = Tensor.indexToCoords(index, this.strides);
+        for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
             const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-            // Accumulate, outFlatIndex should match multiple realFlatIndexes
-            const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
             // Get max over time
             if (this.value[realFlatIndex] > outputValue[outFlatIndex]) {
                 outputValue[outFlatIndex] = this.value[realFlatIndex];
@@ -538,21 +526,19 @@ class Tensor {
         });
         // Set up gradient if needed
         if (this.requiresGrad) {
-            const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
-            for (let index = 0; index < originalSize; index++) {
-                const coords = Tensor.indexToCoords(index, this.strides);
-                // Force 0 on reduced axes to collapse into size-1 dims
-                const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
-                // Convert output coordinates to flat index
-                const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-                // Accumulate, outFlatIndex should match multiple realFlatIndexes
-                const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
-                // Calculate grad by checking if a positon holds a value equal to the max value
-                gradValue[realFlatIndex] = outputValue[outFlatIndex] === this.value[realFlatIndex] ? 1 : 0;
-            }
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
+                const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
+                for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    // Force 0 on reduced axes to collapse into size-1 dims
+                    const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
+                    // Convert output coordinates to flat index
+                    const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
+                    // Calculate grad by checking if a positon holds a value equal to the max value
+                    gradValue[realFlatIndex] = outputValue[outFlatIndex] === this.value[realFlatIndex] ? 1 : 0;
+                }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
                 Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
             };
@@ -576,14 +562,12 @@ class Tensor {
         const outputValue = new Array(outputSize).fill(Infinity);
         const originalSize = Tensor.shapeToSize(this.shape);
         // Calculate minimum values of axes
-        for (let index = 0; index < originalSize; index++) {
-            const coords = Tensor.indexToCoords(index, this.strides);
+        for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
             // Force 0 on reduced axes to collapse into size-1 dims
             const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
             // Convert output coordinates to flat index
             const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-            // Accumulate, outFlatIndex should match multiple realFlatIndexes
-            const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
             // Get min over time
             if (this.value[realFlatIndex] < outputValue[outFlatIndex]) {
                 outputValue[outFlatIndex] = this.value[realFlatIndex];
@@ -595,27 +579,89 @@ class Tensor {
         });
         // Set up gradient if needed
         if (this.requiresGrad) {
-            const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
-            for (let index = 0; index < originalSize; index++) {
-                const coords = Tensor.indexToCoords(index, this.strides);
-                // Force 0 on reduced axes to collapse into size-1 dims
-                const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
-                // Convert output coordinates to flat index
-                const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
-                // Accumulate, outFlatIndex should match multiple realFlatIndexes
-                const realFlatIndex = Tensor.coordsToIndex(coords, this.strides);
-                // Calculate grad by checking if a positon holds a value equal to the min value
-                gradValue[realFlatIndex] = outputValue[outFlatIndex] === this.value[realFlatIndex] ? 1 : 0;
-            }
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
+                const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
+                for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
+                    const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
+                    // Force 0 on reduced axes to collapse into size-1 dims
+                    const outCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
+                    // Convert output coordinates to flat index
+                    const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
+                    // Calculate grad by checking if a positon holds a value equal to the min value
+                    gradValue[realFlatIndex] = outputValue[outFlatIndex] === this.value[realFlatIndex] ? 1 : 0;
+                }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
                 Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
     }
+    // Tensor product reduction
+    softmax(dims) {
+        if (typeof this.value === "number")
+            return this;
+        if (typeof dims === "number") {
+            dims = [dims];
+        }
+        if (typeof dims === "undefined") {
+            dims = Array.from({ length: this.shape.length }, (_, index) => index);
+        }
+        // Dims that are reduced now have size-1
+        const expSumShape = this.shape.map((dim, i) => dims.includes(i) ? 1 : dim);
+        const expSumStrides = Tensor.getStrides(expSumShape);
+        const expSumSize = Tensor.shapeToSize(expSumShape);
+        const expSumValue = new Array(expSumSize).fill(0);
+        const outputShape = this.shape;
+        const outputStrides = this.strides;
+        const outputSize = Tensor.shapeToSize(outputShape);
+        const outputValue = new Array(outputSize);
+        // Calculate sums of e^xi over axes
+        for (let realFlatIndex = 0; realFlatIndex < outputSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, outputStrides);
+            // Force 0 on reduced axes to collapse into size-1 dims
+            const expSumCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
+            // Convert exp sum coordinates to flat index
+            const expSumFlatIndex = Tensor.coordsToIndex(expSumCoords, expSumStrides);
+            // Add e^x to the sum cache
+            expSumValue[expSumFlatIndex] += Math.exp(this.value[realFlatIndex]);
+        }
+        // Calculate e^xi / sum over axes
+        for (let realFlatIndex = 0; realFlatIndex < outputSize; realFlatIndex++) {
+            const coords = Tensor.indexToCoords(realFlatIndex, outputStrides);
+            // Force 0 on reduced axes to collapse into size-1 dims
+            const expSumCoords = coords.map((val, i) => dims.includes(i) ? 0 : val);
+            // Convert exp sum coordinates to flat index
+            const expSumFlatIndex = Tensor.coordsToIndex(expSumCoords, expSumStrides);
+            // Calculate e^xi / sum
+            outputValue[realFlatIndex] = Math.exp(this.value[realFlatIndex]) / expSumValue[expSumFlatIndex];
+        }
+        const out = new Tensor(outputValue, {
+            shape: outputShape,
+            strides: outputStrides
+        });
+        // Set up gradient if needed
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                const upstreamGrad = out.grad.withGrad(false);
+                const softmaxOutput = out.withGrad(false);
+                // Compute element-wise product: ∂L/∂σᵢ × σᵢ
+                const gradTimesOutput = upstreamGrad.mul(softmaxOutput);
+                // Sum over softmax dimensions: Σᵢ(∂L/∂σᵢ × σᵢ)
+                const sumGradOutput = gradTimesOutput.sum(dims, true); // keepDims=true for broadcasting
+                // Apply softmax gradient formula:
+                // ∂L/∂zⱼ = (∂L/∂σⱼ × σⱼ) - (σⱼ × Σᵢ(∂L/∂σᵢ × σᵢ))
+                const term1 = upstreamGrad.mul(softmaxOutput); // ∂L/∂σⱼ × σⱼ
+                const term2 = softmaxOutput.mul(sumGradOutput); // σⱼ × Σᵢ(∂L/∂σᵢ × σᵢ)
+                const localGrad = term1.sub(term2);
+                Tensor.addGrad(this, localGrad);
+            };
+        }
+        return out;
+    }
     // Tensor element-wise addition
     add(other) {
         return this.elementWiseABDAG(other, (a, b) => a + b, (self, other, outGrad) => outGrad, (self, other, outGrad) => outGrad);
@@ -1144,19 +1190,94 @@ class Tensor {
     // General matrix multiplication with different shapes
     matmul(other) {
         other = Tensor.forceTensor(other);
-        if (this.shape.length === 1 && other.shape.length === 1) {
+        const isThis1D = this.shape.length === 1;
+        const isOther1D = other.shape.length === 1;
+        if (isThis1D && isOther1D) {
             return this.dot(other);
         }
-        else if (this.shape.length === 1 && other.shape.length === 2) {
+        else if (isThis1D && other.shape.length === 2) {
             return this.unsqueeze(0).mm(other).squeeze(0);
         }
-        else if (this.shape.length === 2 && other.shape.length === 1) {
+        else if (this.shape.length === 2 && isOther1D) {
             return this.mv(other);
         }
         else if (this.shape.length === 2 && other.shape.length === 2) {
             return this.mm(other);
         }
-        // Too lazy for batched matmul
+        else if ((isThis1D && other.shape.length > 2) ||
+            (isOther1D && this.shape.length > 2) ||
+            (other.shape.length > 2 && this.shape.length > 2)) {
+            // Append/prepend dims if needed
+            const self = isThis1D ? this.unsqueeze(0) : this;
+            other = isOther1D ? other.unsqueeze(1) : other;
+            // Padding
+            const [selfStrides, otherStrides, selfShape, otherShape] = Tensor.padShape(self.strides, other.strides, self.shape, other.shape);
+            const lastDim = selfShape.length - 1;
+            // Prepare data for broadcasting
+            const batchA = self.value;
+            const batchB = other.value;
+            const batchARows = selfShape[lastDim - 1];
+            const batchACols = selfShape[lastDim];
+            const batchBRows = otherShape[lastDim - 1];
+            const batchBCols = otherShape[lastDim];
+            // Verify if can do matmul
+            if (batchACols !== batchBRows)
+                throw new Error("Invalid matrices shape for multiplication");
+            // Prepare shape, strides, size info, but more importantly the offset-related data to loop through the outer, non-matrix dims
+            // Self and other's offset data
+            const selfOffsetShape = selfShape.slice(0, -2);
+            const otherOffsetShape = otherShape.slice(0, -2);
+            const selfOffsetStrides = selfStrides.slice(0, -2);
+            const otherOffsetStrides = otherStrides.slice(0, -2);
+            // The output's offset data
+            const offsetShape = Tensor.broadcastShapes(selfOffsetShape, otherOffsetShape);
+            const offsetSize = Tensor.shapeToSize(offsetShape);
+            const offsetStrides = Tensor.getStrides(offsetShape);
+            // Output shape, strides, size, value
+            const outputShape = [...offsetShape, batchARows, batchBCols];
+            const outputStrides = Tensor.getStrides(outputShape);
+            const outputSize = Tensor.shapeToSize(outputShape);
+            const outputValue = new Array(outputSize).fill(0);
+            // Loop through outer dims and do matmul on two outer-most dims
+            for (let index = 0; index < offsetSize; index++) {
+                const coords = Tensor.indexToCoords(index, offsetStrides);
+                const offset = Tensor.coordsToIndex(coords, outputStrides.slice(0, -2));
+                const selfOffset = Tensor.coordsToUnbroadcastedIndex(coords, selfOffsetShape, selfOffsetStrides);
+                const otherOffset = Tensor.coordsToUnbroadcastedIndex(coords, otherOffsetShape, otherOffsetStrides);
+                for (let i = 0; i < batchARows; i++) {
+                    for (let j = 0; j < batchBCols; j++) {
+                        for (let k = 0; k < batchACols; k++) {
+                            const outputIdx = offset + i * outputStrides[lastDim - 1] + j * outputStrides[lastDim];
+                            const selfIdx = selfOffset + i * selfStrides[lastDim - 1] + k * selfStrides[lastDim];
+                            const otherIdx = otherOffset + k * otherStrides[lastDim - 1] + j * otherStrides[lastDim];
+                            outputValue[outputIdx] += batchA[selfIdx] * batchB[otherIdx];
+                        }
+                    }
+                }
+            }
+            const out = new Tensor(outputValue, { shape: outputShape, strides: outputStrides });
+            if (this.requiresGrad) {
+                out.requiresGrad = true;
+                out.children.push(this);
+            }
+            if (other.requiresGrad) {
+                out.requiresGrad = true;
+                out.children.push(other);
+            }
+            if (out.requiresGrad) {
+                out.gradFn = () => {
+                    other = other;
+                    const outGrad = out.grad.withGrad(false);
+                    const selfNoGrad = self.withGrad(false);
+                    const otherNoGrad = other.withGrad(false);
+                    if (this.requiresGrad)
+                        Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(lastDim - 1, lastDim)));
+                    if (other.requiresGrad)
+                        Tensor.addGrad(other, selfNoGrad.transpose(lastDim - 1, lastDim).matmul(outGrad));
+                };
+            }
+            return out;
+        }
         throw new Error(`Shapes [${this.shape}] and [${other.shape}] are not supported`);
     }
     // Utility to create a new tensor filled with a number

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.2.14",
+  "version": "0.2.16",
   "description": "A small Torch-like deep learning framework for Javascript with tensor and autograd support",
   "main": "index.js",
   "scripts": {