npm - catniff - Versions diffs - 0.5.4 → 0.5.6 - Mend

catniff 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -120,10 +120,11 @@ All available APIs are in [`./src/`](./src/) if you want to dig deeper.
 ## Todos
-* Bug fixes.
-* More tensor ops.
-* GPU acceleration.
+* More general tensor ops.
 * More general neural net APIs.
+* GPU acceleration.
+* Comprehensive caching.
+* Bug fixes.
 * More detailed documentation.
 * Code refactoring.
 * Proper tests.

package/dist/core.d.ts CHANGED Viewed

@@ -40,6 +40,9 @@ export declare class Tensor {
     elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
     static forceTensor(value: TensorValue | Tensor): Tensor;
     static addGrad(tensor: Tensor, accumGrad: Tensor): void;
+    isContiguous(): boolean;
+    contiguous(): Tensor;
+    reshape(newShape: readonly number[]): Tensor;
     squeeze(dims?: number[] | number): Tensor;
     unsqueeze(dim: number): Tensor;
     sum(dims?: number[] | number, keepDims?: boolean): Tensor;
@@ -125,6 +128,7 @@ export declare class Tensor {
     softsign(): Tensor;
     silu(): Tensor;
     mish(): Tensor;
+    gelu(approximate?: string): Tensor;
     maximum(other: TensorValue | Tensor): Tensor;
     minimum(other: TensorValue | Tensor): Tensor;
     round(): Tensor;
@@ -142,6 +146,7 @@ export declare class Tensor {
     swapaxes: (dim1: number, dim2: number) => Tensor;
     swapdims: (dim1: number, dim2: number) => Tensor;
     t(): Tensor;
+    permute(dims: number[]): Tensor;
     dot(other: TensorValue | Tensor): Tensor;
     mm(other: TensorValue | Tensor): Tensor;
     bmm(other: TensorValue | Tensor): Tensor;

package/dist/core.js CHANGED Viewed

@@ -201,9 +201,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, thisGrad(selfNoGrad, otherNoGrad, outGrad));
                 if (other.requiresGrad)
@@ -222,8 +222,8 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, thisGrad(selfNoGrad, outGrad));
             };
@@ -261,6 +261,64 @@ class Tensor {
             tensor.grad = tensor.grad.add(squeezedGrad);
         }
     }
+    // Contiguity-related ops
+    isContiguous() {
+        const expectedStrides = Tensor.getStrides(this.shape);
+        if (expectedStrides.length !== this.strides.length) {
+            return false;
+        }
+        for (let i = 0; i < this.strides.length; i++) {
+            if (this.strides[i] !== expectedStrides[i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+    contiguous() {
+        // Check if scalar
+        if (typeof this.value === "number")
+            return this;
+        // Check if already contiguous
+        if (this.isContiguous())
+            return this;
+        const outputStrides = Tensor.getStrides(this.shape);
+        const outputSize = Tensor.shapeToSize(this.shape);
+        const outputValue = new Array(outputSize);
+        for (let index = 0; index < outputSize; index++) {
+            const outputCoords = Tensor.indexToCoords(index, outputStrides);
+            const originalIndex = Tensor.coordsToIndex(outputCoords, this.strides);
+            outputValue[index] = this.value[originalIndex];
+        }
+        const out = new Tensor(outputValue, { shape: this.shape, strides: outputStrides });
+        // Gradient flow back to the original tensor
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                Tensor.addGrad(this, out.grad);
+            };
+        }
+        return out;
+    }
+    reshape(newShape) {
+        // Verify shape size
+        const originalSize = Tensor.shapeToSize(this.shape);
+        const outputSize = Tensor.shapeToSize(newShape);
+        if (originalSize !== outputSize) {
+            throw new Error("Cannot reshape: incompatible sizes");
+        }
+        const outputStrides = Tensor.getStrides(newShape);
+        const out = new Tensor(this.contiguous().value, { shape: newShape, strides: outputStrides });
+        // Gradient reshaped and flow back to the original tensor
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                Tensor.addGrad(this, out.grad.reshape(this.shape));
+            };
+        }
+        return out;
+    }
     // Tensor squeeze
     squeeze(dims) {
         if (typeof this.value === "number")
@@ -302,7 +360,7 @@ class Tensor {
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
-                let restoredGrad = out.grad.withGrad(false);
+                let restoredGrad = out.grad;
                 for (let i = dims.length - 1; i >= 0; i--) {
                     restoredGrad = restoredGrad.unsqueeze(dims[i]);
                 }
@@ -338,7 +396,7 @@ class Tensor {
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
-                Tensor.addGrad(this, out.grad.withGrad(false).squeeze(dim));
+                Tensor.addGrad(this, out.grad.squeeze(dim));
             };
         }
         return out;
@@ -397,7 +455,7 @@ class Tensor {
             out.children.push(this);
             out.gradFn = () => {
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -454,7 +512,7 @@ class Tensor {
                     gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -518,7 +576,7 @@ class Tensor {
                     gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -588,7 +646,7 @@ class Tensor {
                     gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -658,7 +716,7 @@ class Tensor {
                     gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -727,8 +785,8 @@ class Tensor {
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
-                const upstreamGrad = out.grad.withGrad(false);
-                const softmaxOutput = out.withGrad(false);
+                const upstreamGrad = out.grad;
+                const softmaxOutput = out.detach();
                 // Compute element-wise product: ∂L/∂σᵢ × σᵢ
                 const gradTimesOutput = upstreamGrad.mul(softmaxOutput);
                 // Sum over softmax dimensions: Σᵢ(∂L/∂σᵢ × σᵢ)
@@ -1000,6 +1058,34 @@ class Tensor {
             return outGrad.mul(derivative);
         });
     }
+    // Tensor element-wise gelu
+    gelu(approximate = "none") {
+        if (approximate === "none") {
+            return this.elementWiseSelfDAG((a) => 0.5 * a * (1 + (0, utils_1.erf)(a / Math.sqrt(2))), (self, outGrad) => {
+                const sqrt2 = Math.sqrt(2);
+                const sqrt2OverPi = Math.sqrt(2 / Math.PI);
+                const xOverSqrt2 = self.div(sqrt2);
+                const erfVal = xOverSqrt2.erf();
+                const phi = xOverSqrt2.square().neg().exp().div(sqrt2OverPi);
+                const derivative = erfVal.add(1).mul(0.5).add(self.mul(phi));
+                return outGrad.mul(derivative);
+            });
+        }
+        else if (approximate === "tanh") {
+            return this.elementWiseSelfDAG((a) => 0.5 * a * (1 + Math.tanh(Math.sqrt(2 / Math.PI) * (a + 0.044715 * a * a * a))), (self, outGrad) => {
+                const sqrt2OverPi = Math.sqrt(2 / Math.PI);
+                const c = 0.044715;
+                const tanhArg = self.add(self.pow(3).mul(c)).mul(sqrt2OverPi);
+                const tanhVal = tanhArg.tanh();
+                const sechSquared = tanhVal.square().neg().add(1);
+                const term1 = tanhVal.add(1).mul(0.5);
+                const term2 = self.mul(sechSquared).mul(sqrt2OverPi).mul(self.square().mul(c * 3).add(1)).mul(0.5);
+                const derivative = term1.add(term2);
+                return outGrad.mul(derivative);
+            });
+        }
+        throw new Error("Specified approximation does not exist");
+    }
     // Tensor element-wise maximum
     maximum(other) {
         return this.elementWiseABDAG(other, (a, b) => Math.max(a, b), (self, other, outGrad) => outGrad.mul(self.gt(other).add(self.eq(other).mul(0.5))), (self, other, outGrad) => outGrad.mul(other.gt(self).add(other.eq(self).mul(0.5))));
@@ -1068,7 +1154,7 @@ class Tensor {
         if (this.requiresGrad) {
             out.children.push(this);
             out.gradFn = () => {
-                Tensor.addGrad(this, out.grad.withGrad(false).transpose(dim1, dim2));
+                Tensor.addGrad(this, out.grad.transpose(dim1, dim2));
             };
         }
         return out;
@@ -1083,6 +1169,39 @@ class Tensor {
         }
         return this.transpose(0, 1);
     }
+    // Permute
+    permute(dims) {
+        if (dims.length !== this.shape.length) {
+            throw new Error("Permutation must specify all dimensions");
+        }
+        // Compute new shape and strides
+        const newShape = new Array(dims.length);
+        const newStrides = new Array(dims.length);
+        for (let index = 0; index < dims.length; index++) {
+            const dim = dims[index];
+            newShape[index] = this.shape[dim];
+            newStrides[index] = this.strides[dim];
+        }
+        const out = new Tensor(this.value, {
+            shape: newShape,
+            strides: newStrides
+        });
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                // Compute inverse permutation
+                const inverseAxes = new Array(dims.length);
+                for (let i = 0; i < dims.length; i++) {
+                    inverseAxes[dims[i]] = i;
+                }
+                // Permute gradient back to original order
+                const permutedGrad = out.grad.permute(inverseAxes);
+                Tensor.addGrad(this, permutedGrad);
+            };
+        }
+        return out;
+    }
     // 1D tensor dot product
     dot(other) {
         other = Tensor.forceTensor(other);
@@ -1110,9 +1229,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, outGrad.mul(otherNoGrad));
                 if (other.requiresGrad)
@@ -1165,9 +1284,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, outGrad.mm(otherNoGrad.t()));
                 if (other.requiresGrad)
@@ -1223,9 +1342,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, outGrad.bmm(otherNoGrad.transpose(1, 2)));
                 if (other.requiresGrad)
@@ -1260,9 +1379,8 @@ class Tensor {
         else if (this.shape.length === 2 && other.shape.length === 2) {
             return this.mm(other);
         }
-        else if ((isThis1D && other.shape.length > 2) ||
-            (isOther1D && this.shape.length > 2) ||
-            (other.shape.length > 2 && this.shape.length > 2)) {
+        else if ((this.shape.length > 0 && other.shape.length >= 2) ||
+            (this.shape.length >= 2 && other.shape.length > 0)) {
             // Append/prepend dims if needed
             const self = isThis1D ? this.unsqueeze(0) : this;
             other = isOther1D ? other.unsqueeze(1) : other;
@@ -1323,9 +1441,9 @@ class Tensor {
             if (out.requiresGrad) {
                 out.gradFn = () => {
                     other = other;
-                    const outGrad = out.grad.withGrad(false);
-                    const selfNoGrad = self.withGrad(false);
-                    const otherNoGrad = other.withGrad(false);
+                    const outGrad = out.grad;
+                    const selfNoGrad = self.detach();
+                    const otherNoGrad = other.detach();
                     if (this.requiresGrad)
                         Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(lastDim - 1, lastDim)));
                     if (other.requiresGrad)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.5.4",
+  "version": "0.5.6",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {