npm - catniff - Versions diffs - 0.6.13 → 0.6.15 - Mend

catniff 0.6.13 → 0.6.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/nn.d.ts CHANGED Viewed

@@ -57,6 +57,13 @@ declare class LayerNorm {
     constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, bias?: boolean, device?: string);
     forward(input: Tensor): Tensor;
 }
+declare class RMSNorm {
+    weight?: Tensor;
+    eps: number;
+    normalizedShape: number[];
+    constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, device?: string);
+    forward(input: Tensor): Tensor;
+}
 declare class Embedding {
     weight: Tensor;
     constructor(numEmbeddings: number, embeddingDim: number, device: string);
@@ -83,6 +90,7 @@ export declare const nn: {
     GRUCell: typeof GRUCell;
     LSTMCell: typeof LSTMCell;
     LayerNorm: typeof LayerNorm;
+    RMSNorm: typeof RMSNorm;
     Embedding: typeof Embedding;
     MultiheadAttention: typeof MultiheadAttention;
     state: {

package/dist/nn.js CHANGED Viewed

@@ -188,6 +188,42 @@ class LayerNorm {
         return normalized;
     }
 }
+class RMSNorm {
+    weight;
+    eps;
+    normalizedShape;
+    constructor(normalizedShape, eps = 1e-5, elementwiseAffine = true, device) {
+        this.eps = eps;
+        this.normalizedShape = Array.isArray(normalizedShape) ? normalizedShape : [normalizedShape];
+        if (this.normalizedShape.length === 0) {
+            throw new Error("Normalized shape cannot be empty");
+        }
+        if (elementwiseAffine) {
+            this.weight = core_1.Tensor.ones(this.normalizedShape, { requiresGrad: true, device });
+        }
+    }
+    forward(input) {
+        // Normalize over the specified dimensions
+        const normalizedDims = this.normalizedShape.length;
+        const startDim = input.shape.length - normalizedDims;
+        if (startDim < 0) {
+            throw new Error("Input does not have enough dims to normalize");
+        }
+        const dims = [];
+        for (let i = 0; i < normalizedDims; i++) {
+            if (input.shape[startDim + i] !== this.normalizedShape[i]) {
+                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${this.normalizedShape[i]}, got ${input.shape[startDim + i]}`);
+            }
+            dims.push(startDim + i);
+        }
+        let rms = input.square().mean(dims, true).add(this.eps).sqrt();
+        let normalized = input.div(rms);
+        if (this.weight) {
+            normalized = normalized.mul(this.weight);
+        }
+        return normalized;
+    }
+}
 class Embedding {
     weight;
     constructor(numEmbeddings, embeddingDim, device) {
@@ -316,6 +352,7 @@ exports.nn = {
     GRUCell,
     LSTMCell,
     LayerNorm,
+    RMSNorm,
     Embedding,
     MultiheadAttention,
     state

package/dist/optim.js CHANGED Viewed

@@ -139,7 +139,7 @@ class AdamW extends BaseOptimizer {
         this.lr = options?.lr || 0.001;
         this.betas = options?.betas || [0.9, 0.999];
         this.eps = options?.eps || 1e-8;
-        this.weightDecay = options?.weightDecay || 0;
+        this.weightDecay = options?.weightDecay || 0.01;
     }
     step() {
         this.stepCount++;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.6.13",
+  "version": "0.6.15",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {