npm - catniff - Versions diffs - 0.8.21 → 0.8.23 - Mend

catniff 0.8.21 → 0.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -1,5 +1,8 @@
 import { Backend } from "./backend";
 import { dtype, MemoryBuffer } from "./dtype";
+export type Callable = ((input: Tensor) => Tensor) | {
+    forward: (input: Tensor) => Tensor;
+};
 export type TensorValue = number | ArrayLike<TensorValue>;
 export interface TensorOptions {
     shape?: number[];
@@ -67,6 +70,7 @@ export declare class Tensor {
     chunk(chunks: number, dim?: number): Tensor[];
     expand(newShape: number[]): Tensor;
     unfold(dim: number, size: number, step: number): Tensor;
+    pad(pad: number[], mode?: string, value?: number): Tensor;
     cat(other: Tensor | TensorValue, dim?: number): Tensor;
     stack(others: (Tensor | TensorValue)[], dim?: number): Tensor;
     squeeze(dims?: number[] | number): Tensor;
@@ -220,6 +224,13 @@ export declare class Tensor {
     tril(diagonal?: number): Tensor;
     maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
     multinomial(numSamples: number, replacement?: boolean): Tensor;
+    linear(weight: Tensor | TensorValue, bias?: Tensor | TensorValue): Tensor;
+    sequential(callables: Callable[]): Tensor;
+    layerNorm(normalizedShape: number[], weight?: Tensor | TensorValue, bias?: Tensor | TensorValue, eps?: number): Tensor;
+    rmsNorm(normalizedShape: number[], weight?: Tensor | TensorValue, eps?: number): Tensor;
+    instanceNorm(weight?: Tensor | TensorValue, bias?: Tensor | TensorValue, eps?: number): Tensor;
+    groupNorm(numGroups: number, weight?: Tensor | TensorValue, bias?: Tensor | TensorValue, eps?: number): Tensor;
+    scaledDotProductAttention(key: Tensor | TensorValue, value: Tensor | TensorValue, attnMask?: Tensor, dropout?: number, isCausal?: boolean, scale?: number): Tensor;
     static full(shape: number[], num: number, options?: TensorOptions): Tensor;
     static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
     static ones(shape?: number[], options?: TensorOptions): Tensor;

package/dist/core.js CHANGED Viewed

@@ -328,8 +328,14 @@ class Tensor {
         }
         const reducedGrad = accumGrad.sum(axesToReduce, true);
         const squeezedGrad = reducedGrad.squeeze(axesToSqueeze);
+        // Enforce 0-offset contiguous grads and correct dtype
         if (typeof tensor.grad === "undefined") {
-            tensor.grad = squeezedGrad;
+            let grad = squeezedGrad;
+            // Handle potentially contiguous tensors with non zero offset
+            if (grad.offset !== 0) {
+                grad = grad.clone();
+            }
+            tensor.grad = grad.contiguous().cast(tensor.dtype);
         }
         else {
             tensor.grad = tensor.grad.add(squeezedGrad.cast(tensor.dtype));
@@ -808,6 +814,70 @@ class Tensor {
         }
         return out;
     }
+    // Tensor padding
+    pad(pad, mode = "constant", value = 0) {
+        const original = this.clone().contiguous(); // This is needed for index padding to work
+        const outputShape = [...original.shape];
+        const paddingPerDim = [];
+        for (let i = 0; i < original.shape.length; i++) {
+            const left = pad[(original.shape.length - 1 - i) * 2] || 0;
+            const right = pad[(original.shape.length - 1 - i) * 2 + 1] || 0;
+            paddingPerDim[i] = { left, right };
+            outputShape[i] += left + right;
+        }
+        const outputSize = Tensor.shapeToSize(outputShape);
+        if (mode === "constant") {
+            const outputValue = new dtype_1.TypedArray[original.dtype](outputSize).fill(value);
+            const outputStrides = Tensor.getStrides(outputShape);
+            for (let index = 0; index < original.numel; index++) {
+                const coords = Tensor.indexToCoords(index, original.strides);
+                let paddedIndex = 0;
+                // Pad each coord
+                for (let j = 0; j < original.shape.length; j++) {
+                    const shiftedCoord = coords[j] + paddingPerDim[j].left;
+                    paddedIndex += shiftedCoord * outputStrides[j];
+                }
+                outputValue[paddedIndex] = original.value[index];
+            }
+            const out = new Tensor(outputValue, {
+                shape: outputShape,
+                strides: outputStrides,
+                offset: 0,
+                dtype: original.dtype,
+                device: original.device
+            });
+            if (original.requiresGrad) {
+                out.requiresGrad = true;
+                out.children.push(original);
+                out.gradFn = () => {
+                    const outGrad = out.grad;
+                    const gradValue = new dtype_1.TypedArray[original.dtype](original.numel);
+                    const gradStrides = Tensor.getStrides(original.shape);
+                    for (let index = 0; index < gradValue.length; index++) {
+                        const coords = Tensor.indexToCoords(index, gradStrides);
+                        let paddedIndex = 0;
+                        // Pad each coord
+                        for (let j = 0; j < original.shape.length; j++) {
+                            const shiftedCoord = coords[j] + paddingPerDim[j].left;
+                            paddedIndex += shiftedCoord * outputStrides[j];
+                        }
+                        gradValue[index] = outGrad.value[paddedIndex];
+                    }
+                    Tensor.addGrad(original, new Tensor(gradValue, {
+                        shape: original.shape,
+                        strides: gradStrides,
+                        offset: 0,
+                        dtype: original.dtype,
+                        device: original.device
+                    }));
+                };
+            }
+            return out;
+        }
+        else {
+            throw new Error(`Padding mode not supported: "${mode}"`);
+        }
+    }
     // Tensor concatentation
     cat(other, dim = 0) {
         other = this.handleOther(other);
@@ -2302,6 +2372,174 @@ class Tensor {
             dtype: "int32"
         });
     }
+    // Functional linear projection
+    linear(weight, bias) {
+        weight = this.handleOther(weight);
+        let output = this.matmul(weight.transpose(-1, -2));
+        if (bias) {
+            bias = this.handleOther(bias);
+            output = output.add(bias);
+        }
+        return output;
+    }
+    // Functional sequential chaining
+    sequential(callables) {
+        let res = this;
+        for (let index = 0; index < callables.length; index++) {
+            const callable = callables[index];
+            if (typeof callable === "function") {
+                res = callable(res);
+            }
+            else if (typeof callable === "object" && typeof callable.forward === "function") {
+                res = callable.forward(res);
+            }
+        }
+        return res;
+    }
+    // Functional layer norm
+    layerNorm(normalizedShape, weight, bias, eps = 1e-05) {
+        // Normalize over the specified dimensions
+        const normalizedDims = normalizedShape.length;
+        const startDim = this.shape.length - normalizedDims;
+        if (startDim < 0) {
+            throw new Error("Input does not have enough dims to normalize");
+        }
+        const dims = [];
+        for (let i = 0; i < normalizedDims; i++) {
+            if (this.shape[startDim + i] !== normalizedShape[i]) {
+                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${normalizedShape[i]}, got ${this.shape[startDim + i]}`);
+            }
+            dims.push(startDim + i);
+        }
+        const mean = this.mean(dims, true);
+        const centered = this.sub(mean);
+        const variance = centered.pow(2).mean(dims, true);
+        let normalized = centered.div(variance.add(eps).sqrt());
+        if (weight) {
+            normalized = normalized.mul(weight);
+        }
+        if (bias) {
+            normalized = normalized.add(bias);
+        }
+        return normalized;
+    }
+    // Functional RMS norm
+    rmsNorm(normalizedShape, weight, eps = 1e-5) {
+        // Normalize over the specified dimensions
+        const normalizedDims = normalizedShape.length;
+        const startDim = this.shape.length - normalizedDims;
+        if (startDim < 0) {
+            throw new Error("Input does not have enough dims to normalize");
+        }
+        const dims = [];
+        for (let i = 0; i < normalizedDims; i++) {
+            if (this.shape[startDim + i] !== normalizedShape[i]) {
+                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${normalizedShape[i]}, got ${this.shape[startDim + i]}`);
+            }
+            dims.push(startDim + i);
+        }
+        let rms = this.square().mean(dims, true).add(eps).sqrt();
+        let normalized = this.div(rms);
+        if (weight) {
+            normalized = normalized.mul(weight);
+        }
+        return normalized;
+    }
+    // Functional instance norm
+    instanceNorm(weight, bias, eps = 1e-5) {
+        // Input should be at least 3D: [N, C, ...spatial dims]
+        if (this.shape.length < 3) {
+            throw new Error("InstanceNorm expects at least 3D input [N, C, ...spatial]");
+        }
+        // Normalize across spatial dimensions (all dims after channel dim)
+        const dims = [];
+        for (let i = 2; i < this.shape.length; i++) {
+            dims.push(i);
+        }
+        const mean = this.mean(dims, true);
+        const centered = this.sub(mean);
+        const variance = centered.pow(2).mean(dims, true);
+        let normalized = centered.div(variance.add(eps).sqrt());
+        const numFeatures = this.shape[1];
+        if (weight) {
+            // Reshape weight to [1, C, 1, 1, ...] for broadcasting
+            weight = this.handleOther(weight);
+            const weightShape = [1, numFeatures, ...Array(this.shape.length - 2).fill(1)];
+            const weightReshaped = weight.reshape(weightShape);
+            normalized = normalized.mul(weightReshaped);
+        }
+        if (bias) {
+            // Reshape bias to [1, C, 1, 1, ...] for broadcasting
+            bias = this.handleOther(bias);
+            const biasShape = [1, numFeatures, ...Array(this.shape.length - 2).fill(1)];
+            const biasReshaped = bias.reshape(biasShape);
+            normalized = normalized.add(biasReshaped);
+        }
+        return normalized;
+    }
+    // Functional group norm
+    groupNorm(numGroups, weight, bias, eps = 1e-5) {
+        // Input should be at least 3D: [N, C, ...spatial dims]
+        if (this.shape.length < 3) {
+            throw new Error("GroupNorm expects at least 3D input [N, C, ...spatial]");
+        }
+        const N = this.shape[0];
+        const C = this.shape[1];
+        const spatialDims = this.shape.slice(2);
+        const channelsPerGroup = C / numGroups;
+        // Reshape: [N, C, ...spatial] -> [N, G, C//G, ...spatial]
+        const reshapedInput = this.reshape([N, numGroups, channelsPerGroup, ...spatialDims]);
+        // Normalize across (C//G, ...spatial) dimensions for each group
+        // That's dims [2, 3, 4, ...] in the reshaped tensor
+        const dims = [];
+        for (let i = 2; i < reshapedInput.shape.length; i++) {
+            dims.push(i);
+        }
+        const mean = reshapedInput.mean(dims, true);
+        const centered = reshapedInput.sub(mean);
+        const variance = centered.pow(2).mean(dims, true);
+        let normalized = centered.div(variance.add(eps).sqrt());
+        // Reshape back: [N, G, C//G, ...spatial] -> [N, C, ...spatial]
+        normalized = normalized.reshape(this.shape);
+        const numChannels = this.shape[1];
+        if (weight) {
+            // Reshape weight to [1, C, 1, 1, ...] for broadcasting
+            weight = this.handleOther(weight);
+            const weightShape = [1, numChannels, ...Array(spatialDims.length).fill(1)];
+            const weightReshaped = weight.reshape(weightShape);
+            normalized = normalized.mul(weightReshaped);
+        }
+        if (bias) {
+            // Reshape bias to [1, C, 1, 1, ...] for broadcasting
+            bias = this.handleOther(bias);
+            const biasShape = [1, numChannels, ...Array(spatialDims.length).fill(1)];
+            const biasReshaped = bias.reshape(biasShape);
+            normalized = normalized.add(biasReshaped);
+        }
+        return normalized;
+    }
+    // Functional scaled dot product attention
+    scaledDotProductAttention(key, value, attnMask, dropout = 0, isCausal = false, scale) {
+        key = this.handleOther(key);
+        value = this.handleOther(value);
+        const targetLen = this.shape[this.shape.length - 2];
+        const sourceLen = key.shape[key.shape.length - 2];
+        const dimSize = this.shape[this.shape.length - 1];
+        // Attention scores
+        let scores = this.matmul(key.transpose(-2, -1)).div(scale ?? Math.sqrt(dimSize));
+        // Set attention mask to causal mask if specified
+        if (isCausal) {
+            attnMask = Tensor.ones([targetLen, sourceLen], { device: this.device }).triu(1);
+        }
+        // Apply attention mask if specified
+        if (attnMask) {
+            scores = scores.maskedFill(attnMask, -Infinity);
+        }
+        // Calculate attention weights
+        let attnWeights = scores.softmax().dropout(dropout);
+        // Apply attention to values
+        return attnWeights.matmul(value);
+    }
     // Utility to create a new tensor filled with a number
     static full(shape, num, options = {}) {
         if (shape.length === 0)

package/dist/nn.d.ts CHANGED Viewed

@@ -1,10 +1,15 @@
-import { Tensor, TensorValue } from "./core";
+import { Callable, Tensor } from "./core";
 import { dtype } from "./dtype";
 export declare class Linear {
     weight: Tensor;
     bias?: Tensor;
     constructor(inFeatures: number, outFeatures: number, bias?: boolean, device?: string, dtype?: dtype);
-    forward(input: Tensor | TensorValue): Tensor;
+    forward(input: Tensor): Tensor;
+}
+export declare class Sequential {
+    callables: Callable[];
+    constructor(callables: Callable[]);
+    forward(input: Tensor): Tensor;
 }
 export declare class RNNCell {
     weightIH: Tensor;
@@ -12,7 +17,7 @@ export declare class RNNCell {
     biasIH?: Tensor;
     biasHH?: Tensor;
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string, dtype?: dtype);
-    forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue): Tensor;
+    forward(input: Tensor, hidden: Tensor): Tensor;
 }
 export declare class GRUCell {
     weightIR: Tensor;
@@ -28,7 +33,7 @@ export declare class GRUCell {
     biasHZ?: Tensor;
     biasHN?: Tensor;
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string, dtype?: dtype);
-    forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue): Tensor;
+    forward(input: Tensor, hidden: Tensor): Tensor;
 }
 export declare class LSTMCell {
     weightII: Tensor;
@@ -48,7 +53,7 @@ export declare class LSTMCell {
     biasHG?: Tensor;
     biasHO?: Tensor;
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string, dtype?: dtype);
-    forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue, cell: Tensor | TensorValue): [Tensor, Tensor];
+    forward(input: Tensor, hidden: Tensor, cell: Tensor): [Tensor, Tensor];
 }
 export declare class BatchNorm {
     weight?: Tensor;
@@ -99,9 +104,8 @@ export declare class RMSNorm {
 export declare class Embedding {
     weight: Tensor;
     constructor(numEmbeddings: number, embeddingDim: number, device?: string, dtype?: dtype);
-    forward(input: Tensor | TensorValue): Tensor;
+    forward(input: Tensor): Tensor;
 }
-export declare function scaledDotProductAttention(query: Tensor, key: Tensor, value: Tensor, attnMask?: Tensor, dropout?: number, isCausal?: boolean, scale?: number): Tensor;
 export declare class MultiheadAttention {
     qProjection: Linear;
     kProjection: Linear;
@@ -119,6 +123,7 @@ export interface StateDict {
 }
 export declare const nn: {
     Linear: typeof Linear;
+    Sequential: typeof Sequential;
     RNNCell: typeof RNNCell;
     GRUCell: typeof GRUCell;
     LSTMCell: typeof LSTMCell;
@@ -128,7 +133,6 @@ export declare const nn: {
     LayerNorm: typeof LayerNorm;
     RMSNorm: typeof RMSNorm;
     Embedding: typeof Embedding;
-    scaledDotProductAttention: typeof scaledDotProductAttention;
     MultiheadAttention: typeof MultiheadAttention;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];

package/dist/nn.js CHANGED Viewed

@@ -1,15 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.nn = exports.MultiheadAttention = exports.Embedding = exports.RMSNorm = exports.LayerNorm = exports.GroupNorm = exports.InstanceNorm = exports.BatchNorm = exports.LSTMCell = exports.GRUCell = exports.RNNCell = exports.Linear = void 0;
-exports.scaledDotProductAttention = scaledDotProductAttention;
+exports.nn = exports.MultiheadAttention = exports.Embedding = exports.RMSNorm = exports.LayerNorm = exports.GroupNorm = exports.InstanceNorm = exports.BatchNorm = exports.LSTMCell = exports.GRUCell = exports.RNNCell = exports.Sequential = exports.Linear = void 0;
 const core_1 = require("./core");
-function linearTransform(input, weight, bias) {
-    let output = input.matmul(weight.t());
-    if (bias) {
-        output = output.add(bias);
-    }
-    return output;
-}
 class Linear {
     weight;
     bias;
@@ -21,20 +13,22 @@ class Linear {
         }
     }
     forward(input) {
-        input = this.weight.handleOther(input);
-        return linearTransform(input, this.weight, this.bias);
+        return input.linear(this.weight, this.bias);
     }
 }
 exports.Linear = Linear;
-function rnnTransform(input, hidden, inputWeight, hiddenWeight, inputBias, hiddenBias) {
-    let output = input.matmul(inputWeight.t()).add(hidden.matmul(hiddenWeight.t()));
-    if (inputBias) {
-        output = output.add(inputBias);
+class Sequential {
+    callables;
+    constructor(callables) {
+        this.callables = callables;
     }
-    if (hiddenBias) {
-        output = output.add(hiddenBias);
+    forward(input) {
+        return input.sequential(this.callables);
     }
-    return output;
+}
+exports.Sequential = Sequential;
+function rnnTransform(input, hidden, inputWeight, hiddenWeight, inputBias, hiddenBias) {
+    return input.linear(inputWeight, inputBias).add(hidden.linear(hiddenWeight, hiddenBias));
 }
 class RNNCell {
     weightIH;
@@ -51,8 +45,6 @@ class RNNCell {
         }
     }
     forward(input, hidden) {
-        input = this.weightIH.handleOther(input);
-        hidden = this.weightHH.handleOther(hidden);
         return rnnTransform(input, hidden, this.weightIH, this.weightHH, this.biasIH, this.biasHH).tanh();
     }
 }
@@ -88,11 +80,9 @@ class GRUCell {
         }
     }
     forward(input, hidden) {
-        input = this.weightIN.handleOther(input);
-        hidden = this.weightHN.handleOther(hidden);
         const r = rnnTransform(input, hidden, this.weightIR, this.weightHR, this.biasIR, this.biasHR).sigmoid();
         const z = rnnTransform(input, hidden, this.weightIZ, this.weightHZ, this.biasIZ, this.biasHZ).sigmoid();
-        const n = linearTransform(input, this.weightIN, this.biasIN).add(r.mul(linearTransform(hidden, this.weightHN, this.biasHN))).tanh();
+        const n = input.linear(this.weightIN, this.biasIN).add(r.mul(hidden.linear(this.weightHN, this.biasHN))).tanh();
         return (z.neg().add(1).mul(n).add(z.mul(hidden)));
     }
 }
@@ -136,9 +126,6 @@ class LSTMCell {
         }
     }
     forward(input, hidden, cell) {
-        input = this.weightII.handleOther(input);
-        hidden = this.weightHI.handleOther(hidden);
-        cell = this.weightHI.handleOther(cell);
         const i = rnnTransform(input, hidden, this.weightII, this.weightHI, this.biasII, this.biasHI).sigmoid();
         const f = rnnTransform(input, hidden, this.weightIF, this.weightHF, this.biasIF, this.biasHF).sigmoid();
         const g = rnnTransform(input, hidden, this.weightIG, this.weightHG, this.biasIG, this.biasHG).tanh();
@@ -240,34 +227,10 @@ class InstanceNorm {
         }
     }
     forward(input) {
-        // Input should be at least 3D: [N, C, ...spatial dims]
-        if (input.shape.length < 3) {
-            throw new Error("InstanceNorm expects at least 3D input [N, C, ...spatial]");
-        }
         if (input.shape[1] !== this.numFeatures) {
             throw new Error(`Expected ${this.numFeatures} channels, got ${input.shape[1]}`);
         }
-        // Normalize across spatial dimensions (all dims after channel dim)
-        const dims = [];
-        for (let i = 2; i < input.shape.length; i++) {
-            dims.push(i);
-        }
-        const mean = input.mean(dims, true);
-        const variance = input.sub(mean).pow(2).mean(dims, true);
-        let normalized = input.sub(mean).div(variance.add(this.eps).sqrt());
-        if (this.weight) {
-            // Reshape weight to [1, C, 1, 1, ...] for broadcasting
-            const weightShape = [1, this.numFeatures, ...Array(input.shape.length - 2).fill(1)];
-            const weightReshaped = this.weight.reshape(weightShape);
-            normalized = normalized.mul(weightReshaped);
-        }
-        if (this.bias) {
-            // Reshape bias to [1, C, 1, 1, ...] for broadcasting
-            const biasShape = [1, this.numFeatures, ...Array(input.shape.length - 2).fill(1)];
-            const biasReshaped = this.bias.reshape(biasShape);
-            normalized = normalized.add(biasReshaped);
-        }
-        return normalized;
+        return input.instanceNorm(this.weight, this.bias, this.eps);
     }
 }
 exports.InstanceNorm = InstanceNorm;
@@ -290,43 +253,10 @@ class GroupNorm {
         }
     }
     forward(input) {
-        // Input should be at least 3D: [N, C, ...spatial dims]
-        if (input.shape.length < 3) {
-            throw new Error("GroupNorm expects at least 3D input [N, C, ...spatial]");
-        }
         if (input.shape[1] !== this.numChannels) {
             throw new Error(`Expected ${this.numChannels} channels, got ${input.shape[1]}`);
         }
-        const N = input.shape[0];
-        const C = input.shape[1];
-        const spatialDims = input.shape.slice(2);
-        const channelsPerGroup = C / this.numGroups;
-        // Reshape: [N, C, ...spatial] -> [N, G, C//G, ...spatial]
-        const reshapedInput = input.reshape([N, this.numGroups, channelsPerGroup, ...spatialDims]);
-        // Normalize across (C//G, ...spatial) dimensions for each group
-        // That's dims [2, 3, 4, ...] in the reshaped tensor
-        const dims = [];
-        for (let i = 2; i < reshapedInput.shape.length; i++) {
-            dims.push(i);
-        }
-        const mean = reshapedInput.mean(dims, true);
-        const variance = reshapedInput.sub(mean).pow(2).mean(dims, true);
-        let normalized = reshapedInput.sub(mean).div(variance.add(this.eps).sqrt());
-        // Reshape back: [N, G, C//G, ...spatial] -> [N, C, ...spatial]
-        normalized = normalized.reshape(input.shape);
-        if (this.weight) {
-            // Reshape weight to [1, C, 1, 1, ...] for broadcasting
-            const weightShape = [1, this.numChannels, ...Array(spatialDims.length).fill(1)];
-            const weightReshaped = this.weight.reshape(weightShape);
-            normalized = normalized.mul(weightReshaped);
-        }
-        if (this.bias) {
-            // Reshape bias to [1, C, 1, 1, ...] for broadcasting
-            const biasShape = [1, this.numChannels, ...Array(spatialDims.length).fill(1)];
-            const biasReshaped = this.bias.reshape(biasShape);
-            normalized = normalized.add(biasReshaped);
-        }
-        return normalized;
+        return input.groupNorm(this.numGroups, this.weight, this.bias, this.eps);
     }
 }
 exports.GroupNorm = GroupNorm;
@@ -349,29 +279,7 @@ class LayerNorm {
         }
     }
     forward(input) {
-        // Normalize over the specified dimensions
-        const normalizedDims = this.normalizedShape.length;
-        const startDim = input.shape.length - normalizedDims;
-        if (startDim < 0) {
-            throw new Error("Input does not have enough dims to normalize");
-        }
-        const dims = [];
-        for (let i = 0; i < normalizedDims; i++) {
-            if (input.shape[startDim + i] !== this.normalizedShape[i]) {
-                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${this.normalizedShape[i]}, got ${input.shape[startDim + i]}`);
-            }
-            dims.push(startDim + i);
-        }
-        const mean = input.mean(dims, true);
-        const variance = input.sub(mean).pow(2).mean(dims, true);
-        let normalized = input.sub(mean).div(variance.add(this.eps).sqrt());
-        if (this.weight) {
-            normalized = normalized.mul(this.weight);
-        }
-        if (this.bias) {
-            normalized = normalized.add(this.bias);
-        }
-        return normalized;
+        return input.layerNorm(this.normalizedShape, this.weight, this.bias, this.eps);
     }
 }
 exports.LayerNorm = LayerNorm;
@@ -390,25 +298,7 @@ class RMSNorm {
         }
     }
     forward(input) {
-        // Normalize over the specified dimensions
-        const normalizedDims = this.normalizedShape.length;
-        const startDim = input.shape.length - normalizedDims;
-        if (startDim < 0) {
-            throw new Error("Input does not have enough dims to normalize");
-        }
-        const dims = [];
-        for (let i = 0; i < normalizedDims; i++) {
-            if (input.shape[startDim + i] !== this.normalizedShape[i]) {
-                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${this.normalizedShape[i]}, got ${input.shape[startDim + i]}`);
-            }
-            dims.push(startDim + i);
-        }
-        let rms = input.square().mean(dims, true).add(this.eps).sqrt();
-        let normalized = input.div(rms);
-        if (this.weight) {
-            normalized = normalized.mul(this.weight);
-        }
-        return normalized;
+        return input.rmsNorm(this.normalizedShape, this.weight, this.eps);
     }
 }
 exports.RMSNorm = RMSNorm;
@@ -422,25 +312,6 @@ class Embedding {
     }
 }
 exports.Embedding = Embedding;
-function scaledDotProductAttention(query, key, value, attnMask, dropout = 0, isCausal = false, scale) {
-    const targetLen = query.shape[query.shape.length - 2];
-    const sourceLen = key.shape[key.shape.length - 2];
-    const dimSize = query.shape[query.shape.length - 1];
-    // Attention scores
-    let scores = query.matmul(key.transpose(-2, -1)).div(scale ?? Math.sqrt(dimSize));
-    // Set attention mask to causal mask if specified
-    if (isCausal) {
-        attnMask = core_1.Tensor.ones([targetLen, sourceLen], { device: query.device }).triu(1);
-    }
-    // Apply attention mask if specified
-    if (attnMask) {
-        scores = scores.maskedFill(attnMask, -Infinity);
-    }
-    // Calculate attention weights
-    let attnWeights = scores.softmax().dropout(dropout);
-    // Apply attention to values
-    return attnWeights.matmul(value);
-}
 class MultiheadAttention {
     qProjection;
     kProjection;
@@ -561,6 +432,7 @@ const state = {
 };
 exports.nn = {
     Linear,
+    Sequential,
     RNNCell,
     GRUCell,
     LSTMCell,
@@ -570,7 +442,6 @@ exports.nn = {
     LayerNorm,
     RMSNorm,
     Embedding,
-    scaledDotProductAttention,
     MultiheadAttention,
     state
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.8.21",
+  "version": "0.8.23",
   "description": "Torch-like deep learning framework for Javascript",
   "main": "./dist/index.js",
   "scripts": {