npm - catniff - Versions diffs - 0.5.5 → 0.5.7 - Mend

catniff 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -120,10 +120,11 @@ All available APIs are in [`./src/`](./src/) if you want to dig deeper.
 ## Todos
-* Bug fixes.
-* More tensor ops.
-* GPU acceleration.
+* More general tensor ops.
 * More general neural net APIs.
+* GPU acceleration.
+* Comprehensive caching.
+* Bug fixes.
 * More detailed documentation.
 * Code refactoring.
 * Proper tests.

package/dist/core.d.ts CHANGED Viewed

@@ -40,6 +40,9 @@ export declare class Tensor {
     elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
     static forceTensor(value: TensorValue | Tensor): Tensor;
     static addGrad(tensor: Tensor, accumGrad: Tensor): void;
+    isContiguous(): boolean;
+    contiguous(): Tensor;
+    reshape(newShape: readonly number[]): Tensor;
     squeeze(dims?: number[] | number): Tensor;
     unsqueeze(dim: number): Tensor;
     sum(dims?: number[] | number, keepDims?: boolean): Tensor;
@@ -143,6 +146,7 @@ export declare class Tensor {
     swapaxes: (dim1: number, dim2: number) => Tensor;
     swapdims: (dim1: number, dim2: number) => Tensor;
     t(): Tensor;
+    permute(dims: number[]): Tensor;
     dot(other: TensorValue | Tensor): Tensor;
     mm(other: TensorValue | Tensor): Tensor;
     bmm(other: TensorValue | Tensor): Tensor;

package/dist/core.js CHANGED Viewed

@@ -201,9 +201,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, thisGrad(selfNoGrad, otherNoGrad, outGrad));
                 if (other.requiresGrad)
@@ -222,8 +222,8 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, thisGrad(selfNoGrad, outGrad));
             };
@@ -261,6 +261,64 @@ class Tensor {
             tensor.grad = tensor.grad.add(squeezedGrad);
         }
     }
+    // Contiguity-related ops
+    isContiguous() {
+        const expectedStrides = Tensor.getStrides(this.shape);
+        if (expectedStrides.length !== this.strides.length) {
+            return false;
+        }
+        for (let i = 0; i < this.strides.length; i++) {
+            if (this.strides[i] !== expectedStrides[i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+    contiguous() {
+        // Check if scalar
+        if (typeof this.value === "number")
+            return this;
+        // Check if already contiguous
+        if (this.isContiguous())
+            return this;
+        const outputStrides = Tensor.getStrides(this.shape);
+        const outputSize = Tensor.shapeToSize(this.shape);
+        const outputValue = new Array(outputSize);
+        for (let index = 0; index < outputSize; index++) {
+            const outputCoords = Tensor.indexToCoords(index, outputStrides);
+            const originalIndex = Tensor.coordsToIndex(outputCoords, this.strides);
+            outputValue[index] = this.value[originalIndex];
+        }
+        const out = new Tensor(outputValue, { shape: this.shape, strides: outputStrides });
+        // Gradient flow back to the original tensor
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                Tensor.addGrad(this, out.grad);
+            };
+        }
+        return out;
+    }
+    reshape(newShape) {
+        // Verify shape size
+        const originalSize = Tensor.shapeToSize(this.shape);
+        const outputSize = Tensor.shapeToSize(newShape);
+        if (originalSize !== outputSize) {
+            throw new Error("Cannot reshape: incompatible sizes");
+        }
+        const outputStrides = Tensor.getStrides(newShape);
+        const out = new Tensor(this.contiguous().value, { shape: newShape, strides: outputStrides });
+        // Gradient reshaped and flow back to the original tensor
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                Tensor.addGrad(this, out.grad.reshape(this.shape));
+            };
+        }
+        return out;
+    }
     // Tensor squeeze
     squeeze(dims) {
         if (typeof this.value === "number")
@@ -302,7 +360,7 @@ class Tensor {
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
-                let restoredGrad = out.grad.withGrad(false);
+                let restoredGrad = out.grad;
                 for (let i = dims.length - 1; i >= 0; i--) {
                     restoredGrad = restoredGrad.unsqueeze(dims[i]);
                 }
@@ -338,7 +396,7 @@ class Tensor {
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
-                Tensor.addGrad(this, out.grad.withGrad(false).squeeze(dim));
+                Tensor.addGrad(this, out.grad.squeeze(dim));
             };
         }
         return out;
@@ -397,7 +455,7 @@ class Tensor {
             out.children.push(this);
             out.gradFn = () => {
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -454,7 +512,7 @@ class Tensor {
                     gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -518,7 +576,7 @@ class Tensor {
                     gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -588,7 +646,7 @@ class Tensor {
                     gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -658,7 +716,7 @@ class Tensor {
                     gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
                 }
                 const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
-                Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
+                Tensor.addGrad(this, out.grad.mul(localGrad));
             };
         }
         return keepDims ? out : out.squeeze(dims);
@@ -727,8 +785,8 @@ class Tensor {
             out.requiresGrad = true;
             out.children.push(this);
             out.gradFn = () => {
-                const upstreamGrad = out.grad.withGrad(false);
-                const softmaxOutput = out.withGrad(false);
+                const upstreamGrad = out.grad;
+                const softmaxOutput = out.detach();
                 // Compute element-wise product: ∂L/∂σᵢ × σᵢ
                 const gradTimesOutput = upstreamGrad.mul(softmaxOutput);
                 // Sum over softmax dimensions: Σᵢ(∂L/∂σᵢ × σᵢ)
@@ -1096,7 +1154,7 @@ class Tensor {
         if (this.requiresGrad) {
             out.children.push(this);
             out.gradFn = () => {
-                Tensor.addGrad(this, out.grad.withGrad(false).transpose(dim1, dim2));
+                Tensor.addGrad(this, out.grad.transpose(dim1, dim2));
             };
         }
         return out;
@@ -1111,6 +1169,39 @@ class Tensor {
         }
         return this.transpose(0, 1);
     }
+    // Permute
+    permute(dims) {
+        if (dims.length !== this.shape.length) {
+            throw new Error("Permutation must specify all dimensions");
+        }
+        // Compute new shape and strides
+        const newShape = new Array(dims.length);
+        const newStrides = new Array(dims.length);
+        for (let index = 0; index < dims.length; index++) {
+            const dim = dims[index];
+            newShape[index] = this.shape[dim];
+            newStrides[index] = this.strides[dim];
+        }
+        const out = new Tensor(this.value, {
+            shape: newShape,
+            strides: newStrides
+        });
+        if (this.requiresGrad) {
+            out.requiresGrad = true;
+            out.children.push(this);
+            out.gradFn = () => {
+                // Compute inverse permutation
+                const inverseAxes = new Array(dims.length);
+                for (let i = 0; i < dims.length; i++) {
+                    inverseAxes[dims[i]] = i;
+                }
+                // Permute gradient back to original order
+                const permutedGrad = out.grad.permute(inverseAxes);
+                Tensor.addGrad(this, permutedGrad);
+            };
+        }
+        return out;
+    }
     // 1D tensor dot product
     dot(other) {
         other = Tensor.forceTensor(other);
@@ -1138,9 +1229,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, outGrad.mul(otherNoGrad));
                 if (other.requiresGrad)
@@ -1193,9 +1284,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, outGrad.mm(otherNoGrad.t()));
                 if (other.requiresGrad)
@@ -1251,9 +1342,9 @@ class Tensor {
         if (out.requiresGrad) {
             out.gradFn = () => {
                 // Disable gradient collecting of gradients themselves
-                const outGrad = out.grad.withGrad(false);
-                const selfNoGrad = this.withGrad(false);
-                const otherNoGrad = other.withGrad(false);
+                const outGrad = out.grad;
+                const selfNoGrad = this.detach();
+                const otherNoGrad = other.detach();
                 if (this.requiresGrad)
                     Tensor.addGrad(this, outGrad.bmm(otherNoGrad.transpose(1, 2)));
                 if (other.requiresGrad)
@@ -1350,9 +1441,9 @@ class Tensor {
             if (out.requiresGrad) {
                 out.gradFn = () => {
                     other = other;
-                    const outGrad = out.grad.withGrad(false);
-                    const selfNoGrad = self.withGrad(false);
-                    const otherNoGrad = other.withGrad(false);
+                    const outGrad = out.grad;
+                    const selfNoGrad = self.detach();
+                    const otherNoGrad = other.detach();
                     if (this.requiresGrad)
                         Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(lastDim - 1, lastDim)));
                     if (other.requiresGrad)

package/dist/nn.d.ts CHANGED Viewed

@@ -29,7 +29,7 @@ declare class GRUCell {
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
     forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue): Tensor;
 }
-export declare class LSTMCell {
+declare class LSTMCell {
     weightII: Tensor;
     weightIF: Tensor;
     weightIG: Tensor;
@@ -49,6 +49,14 @@ export declare class LSTMCell {
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
     forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue, cell: Tensor | TensorValue): [Tensor, Tensor];
 }
+declare class LayerNorm {
+    weight?: Tensor;
+    bias?: Tensor;
+    eps: number;
+    normalizedShape: number[];
+    constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, bias?: boolean, device?: string);
+    forward(input: Tensor | TensorValue): Tensor;
+}
 interface StateDict {
     [key: string]: any;
 }
@@ -56,6 +64,8 @@ export declare const nn: {
     Linear: typeof Linear;
     RNNCell: typeof RNNCell;
     GRUCell: typeof GRUCell;
+    LSTMCell: typeof LSTMCell;
+    LayerNorm: typeof LayerNorm;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];
         getStateDict(model: any, prefix?: string, visited?: WeakSet<object>): StateDict;

package/dist/nn.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.nn = exports.LSTMCell = void 0;
+exports.nn = void 0;
 const core_1 = require("./core");
 function linearTransform(input, weight, bias) {
     let output = input.matmul(weight.t());
@@ -144,7 +144,51 @@ class LSTMCell {
         return [h, c];
     }
 }
-exports.LSTMCell = LSTMCell;
+class LayerNorm {
+    weight;
+    bias;
+    eps;
+    normalizedShape;
+    constructor(normalizedShape, eps = 1e-5, elementwiseAffine = true, bias = true, device) {
+        this.eps = eps;
+        this.normalizedShape = Array.isArray(normalizedShape) ? normalizedShape : [normalizedShape];
+        if (this.normalizedShape.length === 0) {
+            throw new Error("Normalized shape cannot be empty");
+        }
+        if (elementwiseAffine) {
+            this.weight = core_1.Tensor.ones(this.normalizedShape, { requiresGrad: true, device });
+            if (bias) {
+                this.bias = core_1.Tensor.zeros(this.normalizedShape, { requiresGrad: true, device });
+            }
+        }
+    }
+    forward(input) {
+        input = core_1.Tensor.forceTensor(input);
+        // Normalize over the specified dimensions
+        const normalizedDims = this.normalizedShape.length;
+        const startDim = input.shape.length - normalizedDims;
+        if (startDim < 0) {
+            throw new Error("Input does not have enough dims to normalize");
+        }
+        const dims = [];
+        for (let i = 0; i < normalizedDims; i++) {
+            if (input.shape[startDim + i] !== this.normalizedShape[i]) {
+                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${this.normalizedShape[i]}, got ${input.shape[startDim + i]}`);
+            }
+            dims.push(startDim + i);
+        }
+        const mean = input.mean(dims, true);
+        const variance = input.sub(mean).pow(2).mean(dims, true);
+        let normalized = input.sub(mean).div(variance.add(this.eps).sqrt());
+        if (this.weight) {
+            normalized = normalized.mul(this.weight);
+        }
+        if (this.bias) {
+            normalized = normalized.add(this.bias);
+        }
+        return normalized;
+    }
+}
 const state = {
     getParameters(model, visited = new WeakSet()) {
         if (visited.has(model))
@@ -178,7 +222,7 @@ const state = {
                 stateDict[fullKey] = value.val();
             }
             else if (typeof value === "object" && value !== null) {
-                Object.assign(stateDict, this.getStateDict(value, fullKey, visited));
+                Object.assign(stateDict, state.getStateDict(value, fullKey, visited));
             }
         }
         return stateDict;
@@ -196,7 +240,7 @@ const state = {
                 value.replace(new core_1.Tensor(stateDict[fullKey], { device: value.device }));
             }
             else if (typeof value === "object" && value !== null) {
-                this.loadStateDict(value, stateDict, fullKey, visited);
+                state.loadStateDict(value, stateDict, fullKey, visited);
             }
         }
     }
@@ -205,5 +249,7 @@ exports.nn = {
     Linear,
     RNNCell,
     GRUCell,
+    LSTMCell,
+    LayerNorm,
     state
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.5.5",
+  "version": "0.5.7",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {