npm - catniff - Versions diffs - 0.6.14 → 0.7.0 - Mend

catniff 0.6.14 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/lrscheduler.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import { BaseOptimizer } from "./optim";
+export declare class StepLR {
+    optimizer: BaseOptimizer;
+    stepSize: number;
+    gamma: number;
+    lastEpoch: number;
+    baseLR: number;
+    constructor(optimizer: BaseOptimizer, stepSize: number, gamma?: number, lastEpoch?: number);
+    step(epoch?: number): void;
+}
+export declare const LRScheduler: {
+    StepLR: typeof StepLR;
+};

package/dist/lrscheduler.js ADDED Viewed

@@ -0,0 +1,31 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.LRScheduler = exports.StepLR = void 0;
+class StepLR {
+    optimizer;
+    stepSize;
+    gamma;
+    lastEpoch;
+    baseLR;
+    constructor(optimizer, stepSize, gamma = 0.1, lastEpoch = -1) {
+        this.optimizer = optimizer;
+        this.stepSize = stepSize;
+        this.gamma = gamma;
+        this.lastEpoch = lastEpoch;
+        this.baseLR = this.optimizer.lr;
+    }
+    step(epoch) {
+        if (typeof epoch === "undefined") {
+            this.lastEpoch++;
+            epoch = this.lastEpoch;
+        }
+        else {
+            this.lastEpoch = epoch;
+        }
+        this.optimizer.lr = this.baseLR * this.gamma ** Math.floor(epoch / this.stepSize);
+    }
+}
+exports.StepLR = StepLR;
+exports.LRScheduler = {
+    StepLR
+};

package/dist/nn.d.ts CHANGED Viewed

@@ -1,11 +1,11 @@
 import { Tensor, TensorValue } from "./core";
-declare class Linear {
+export declare class Linear {
     weight: Tensor;
     bias?: Tensor;
     constructor(inFeatures: number, outFeatures: number, bias?: boolean, device?: string);
     forward(input: Tensor | TensorValue): Tensor;
 }
-declare class RNNCell {
+export declare class RNNCell {
     weightIH: Tensor;
     weightHH: Tensor;
     biasIH?: Tensor;
@@ -13,7 +13,7 @@ declare class RNNCell {
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
     forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue): Tensor;
 }
-declare class GRUCell {
+export declare class GRUCell {
     weightIR: Tensor;
     weightIZ: Tensor;
     weightIN: Tensor;
@@ -29,7 +29,7 @@ declare class GRUCell {
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
     forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue): Tensor;
 }
-declare class LSTMCell {
+export declare class LSTMCell {
     weightII: Tensor;
     weightIF: Tensor;
     weightIG: Tensor;
@@ -49,7 +49,7 @@ declare class LSTMCell {
     constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
     forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue, cell: Tensor | TensorValue): [Tensor, Tensor];
 }
-declare class LayerNorm {
+export declare class LayerNorm {
     weight?: Tensor;
     bias?: Tensor;
     eps: number;
@@ -57,12 +57,19 @@ declare class LayerNorm {
     constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, bias?: boolean, device?: string);
     forward(input: Tensor): Tensor;
 }
-declare class Embedding {
+export declare class RMSNorm {
+    weight?: Tensor;
+    eps: number;
+    normalizedShape: number[];
+    constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, device?: string);
+    forward(input: Tensor): Tensor;
+}
+export declare class Embedding {
     weight: Tensor;
     constructor(numEmbeddings: number, embeddingDim: number, device: string);
     forward(input: Tensor | TensorValue): Tensor;
 }
-declare class MultiheadAttention {
+export declare class MultiheadAttention {
     qProjection: Linear;
     kProjection: Linear;
     vProjection: Linear;
@@ -83,6 +90,7 @@ export declare const nn: {
     GRUCell: typeof GRUCell;
     LSTMCell: typeof LSTMCell;
     LayerNorm: typeof LayerNorm;
+    RMSNorm: typeof RMSNorm;
     Embedding: typeof Embedding;
     MultiheadAttention: typeof MultiheadAttention;
     state: {
@@ -92,4 +100,3 @@ export declare const nn: {
         loadStateDict(model: any, stateDict: StateDict, prefix?: string, visited?: WeakSet<object>): void;
     };
 };
-export {};

package/dist/nn.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.nn = void 0;
+exports.nn = exports.MultiheadAttention = exports.Embedding = exports.RMSNorm = exports.LayerNorm = exports.LSTMCell = exports.GRUCell = exports.RNNCell = exports.Linear = void 0;
 const core_1 = require("./core");
 function linearTransform(input, weight, bias) {
     let output = input.matmul(weight.t());
@@ -24,6 +24,7 @@ class Linear {
         return linearTransform(input, this.weight, this.bias);
     }
 }
+exports.Linear = Linear;
 function rnnTransform(input, hidden, inputWeight, hiddenWeight, inputBias, hiddenBias) {
     let output = input.matmul(inputWeight.t()).add(hidden.matmul(hiddenWeight.t()));
     if (inputBias) {
@@ -54,6 +55,7 @@ class RNNCell {
         return rnnTransform(input, hidden, this.weightIH, this.weightHH, this.biasIH, this.biasHH).tanh();
     }
 }
+exports.RNNCell = RNNCell;
 class GRUCell {
     weightIR;
     weightIZ;
@@ -93,6 +95,7 @@ class GRUCell {
         return (z.neg().add(1).mul(n).add(z.mul(hidden)));
     }
 }
+exports.GRUCell = GRUCell;
 class LSTMCell {
     weightII;
     weightIF;
@@ -144,6 +147,7 @@ class LSTMCell {
         return [h, c];
     }
 }
+exports.LSTMCell = LSTMCell;
 class LayerNorm {
     weight;
     bias;
@@ -188,6 +192,44 @@ class LayerNorm {
         return normalized;
     }
 }
+exports.LayerNorm = LayerNorm;
+class RMSNorm {
+    weight;
+    eps;
+    normalizedShape;
+    constructor(normalizedShape, eps = 1e-5, elementwiseAffine = true, device) {
+        this.eps = eps;
+        this.normalizedShape = Array.isArray(normalizedShape) ? normalizedShape : [normalizedShape];
+        if (this.normalizedShape.length === 0) {
+            throw new Error("Normalized shape cannot be empty");
+        }
+        if (elementwiseAffine) {
+            this.weight = core_1.Tensor.ones(this.normalizedShape, { requiresGrad: true, device });
+        }
+    }
+    forward(input) {
+        // Normalize over the specified dimensions
+        const normalizedDims = this.normalizedShape.length;
+        const startDim = input.shape.length - normalizedDims;
+        if (startDim < 0) {
+            throw new Error("Input does not have enough dims to normalize");
+        }
+        const dims = [];
+        for (let i = 0; i < normalizedDims; i++) {
+            if (input.shape[startDim + i] !== this.normalizedShape[i]) {
+                throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${this.normalizedShape[i]}, got ${input.shape[startDim + i]}`);
+            }
+            dims.push(startDim + i);
+        }
+        let rms = input.square().mean(dims, true).add(this.eps).sqrt();
+        let normalized = input.div(rms);
+        if (this.weight) {
+            normalized = normalized.mul(this.weight);
+        }
+        return normalized;
+    }
+}
+exports.RMSNorm = RMSNorm;
 class Embedding {
     weight;
     constructor(numEmbeddings, embeddingDim, device) {
@@ -197,6 +239,7 @@ class Embedding {
         return this.weight.index(input);
     }
 }
+exports.Embedding = Embedding;
 class MultiheadAttention {
     qProjection;
     kProjection;
@@ -248,6 +291,7 @@ class MultiheadAttention {
         return [output, needWeights ? attnWeights : undefined];
     }
 }
+exports.MultiheadAttention = MultiheadAttention;
 const state = {
     getParameters(model, visited = new WeakSet()) {
         if (visited.has(model))
@@ -316,6 +360,7 @@ exports.nn = {
     GRUCell,
     LSTMCell,
     LayerNorm,
+    RMSNorm,
     Embedding,
     MultiheadAttention,
     state

package/dist/optim.d.ts CHANGED Viewed

@@ -1,7 +1,11 @@
 import { Tensor } from "./core";
-declare abstract class BaseOptimizer {
+export interface BaseOptimizerOptions {
+    lr?: number;
+}
+export declare abstract class BaseOptimizer {
     params: Tensor[];
-    constructor(params: Tensor[]);
+    lr: number;
+    constructor(params: Tensor[], options?: BaseOptimizerOptions);
     zeroGrad(): void;
 }
 export interface SGDOptions {
@@ -11,9 +15,8 @@ export interface SGDOptions {
     weightDecay?: number;
     nesterov?: boolean;
 }
-declare class SGD extends BaseOptimizer {
+export declare class SGD extends BaseOptimizer {
     momentumBuffers: Map<Tensor, Tensor>;
-    lr: number;
     momentum: number;
     dampening: number;
     weightDecay: number;
@@ -27,11 +30,10 @@ export interface AdamOptions {
     eps?: number;
     weightDecay?: number;
 }
-declare class Adam extends BaseOptimizer {
+export declare class Adam extends BaseOptimizer {
     momentumBuffers: Map<Tensor, Tensor>;
     velocityBuffers: Map<Tensor, Tensor>;
     stepCount: number;
-    lr: number;
     betas: [number, number];
     eps: number;
     weightDecay: number;
@@ -44,11 +46,10 @@ export interface AdamWOptions {
     eps?: number;
     weightDecay?: number;
 }
-declare class AdamW extends BaseOptimizer {
+export declare class AdamW extends BaseOptimizer {
     momentumBuffers: Map<Tensor, Tensor>;
     velocityBuffers: Map<Tensor, Tensor>;
     stepCount: number;
-    lr: number;
     betas: [number, number];
     eps: number;
     weightDecay: number;
@@ -61,4 +62,3 @@ export declare const Optim: {
     Adam: typeof Adam;
     AdamW: typeof AdamW;
 };
-export {};

package/dist/optim.js CHANGED Viewed

@@ -1,11 +1,13 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.Optim = void 0;
+exports.Optim = exports.AdamW = exports.Adam = exports.SGD = exports.BaseOptimizer = void 0;
 const core_1 = require("./core");
 class BaseOptimizer {
     params;
-    constructor(params) {
+    lr;
+    constructor(params, options) {
         this.params = params;
+        this.lr = options?.lr || 0.001;
     }
     zeroGrad() {
         for (let index = 0; index < this.params.length; index++) {
@@ -14,16 +16,15 @@ class BaseOptimizer {
         }
     }
 }
+exports.BaseOptimizer = BaseOptimizer;
 class SGD extends BaseOptimizer {
     momentumBuffers = new Map();
-    lr;
     momentum;
     dampening;
     weightDecay;
     nesterov;
     constructor(params, options) {
-        super(params);
-        this.lr = options?.lr || 0.001;
+        super(params, options);
         this.momentum = options?.momentum || 0;
         this.dampening = options?.dampening || 0;
         this.weightDecay = options?.weightDecay || 0;
@@ -66,17 +67,16 @@ class SGD extends BaseOptimizer {
         }
     }
 }
+exports.SGD = SGD;
 class Adam extends BaseOptimizer {
     momentumBuffers = new Map(); // First moment (m_t)
     velocityBuffers = new Map(); // Second moment (v_t)
     stepCount = 0;
-    lr;
     betas;
     eps;
     weightDecay;
     constructor(params, options) {
-        super(params);
-        this.lr = options?.lr || 0.001;
+        super(params, options);
         this.betas = options?.betas || [0.9, 0.999];
         this.eps = options?.eps || 1e-8;
         this.weightDecay = options?.weightDecay || 0;
@@ -126,17 +126,16 @@ class Adam extends BaseOptimizer {
         }
     }
 }
+exports.Adam = Adam;
 class AdamW extends BaseOptimizer {
     momentumBuffers = new Map(); // First moment (m_t)
     velocityBuffers = new Map(); // Second moment (v_t)
     stepCount = 0;
-    lr;
     betas;
     eps;
     weightDecay;
     constructor(params, options) {
-        super(params);
-        this.lr = options?.lr || 0.001;
+        super(params, options);
         this.betas = options?.betas || [0.9, 0.999];
         this.eps = options?.eps || 1e-8;
         this.weightDecay = options?.weightDecay || 0.01;
@@ -184,6 +183,7 @@ class AdamW extends BaseOptimizer {
         }
     }
 }
+exports.AdamW = AdamW;
 exports.Optim = {
     BaseOptimizer,
     SGD,

package/index.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 export * from "./dist/core";
 export * from "./dist/optim";
 export * from "./dist/nn";
+export * from "./dist/lrscheduler";

package/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 module.exports = {
     ...require("./dist/core"),
     ...require("./dist/optim"),
-    ...require("./dist/nn")
+    ...require("./dist/nn"),
+    ...require("./dist/lrscheduler")
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.6.14",
+  "version": "0.7.0",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {