npm - catniff - Versions diffs - 0.6.3 → 0.6.5 - Mend

catniff 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -76,9 +76,9 @@ optim.step();
 console.log("Updated weight:", w.data);  // Should move toward 3.0
 ```
-## Neural networks
+## Neural networks & Deep learning
-There are built-in neural network constructs in Catniff as well:
+There are built-in neural network constructs in Catniff as well, from simple prebuilt nn layers:
 ```js
 const { Tensor, nn } = require("catniff");
@@ -102,6 +102,24 @@ gruCell.forward(b, c);
 lstmCell.forward(b, c, c);
 ```
+to more advanced constructs like normalization, embedding, and attention:
+```js
+// 1. Embedding: tokens -> vectors
+const embedding = new nn.Embedding(100, 64);
+const tokens = new Tensor([[1, 5, 23], [8, 2, 15]]);
+const embedded = embedding.forward(tokens);
+// 2. Self-Attention
+const attention = new nn.MultiheadAttention(64, 8, 0.1);
+const [output, weights] = attention.forward(embedded, embedded, embedded);
+// 3. Layer Normalization
+const layerNorm = new nn.LayerNorm(64);
+const normalized = layerNorm.forward(output);
+console.log(normalized.val());
+```
 And it can still do much more, check out the docs and examples below for more information.
 ## Documentation

package/dist/core.d.ts CHANGED Viewed

@@ -182,6 +182,9 @@ export declare class Tensor {
     mv(other: TensorValue | Tensor): Tensor;
     matmul(other: TensorValue | Tensor): Tensor;
     dropout(rate: number): Tensor;
+    triu(diagonal?: number): Tensor;
+    tril(diagonal?: number): Tensor;
+    maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
     static full(shape: readonly number[], num: number, options?: TensorOptions): Tensor;
     static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
     static ones(shape?: readonly number[], options?: TensorOptions): Tensor;
@@ -194,6 +197,7 @@ export declare class Tensor {
     static randnLike(tensor: Tensor, options?: TensorOptions): Tensor;
     static randint(shape: readonly number[], low: number, high: number, options?: TensorOptions): Tensor;
     static randintLike(tensor: Tensor, low: number, high: number, options?: TensorOptions): Tensor;
+    static randperm(n: number, options?: TensorOptions): Tensor;
     static normal(shape: number[], mean: number, stdDev: number, options?: TensorOptions): Tensor;
     static uniform(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
     static arange(start: number, stop?: number, step?: number, options?: TensorOptions): Tensor;

package/dist/core.js CHANGED Viewed

@@ -1417,6 +1417,59 @@ class Tensor {
         const mask = uniform.lt(keepRate);
         return this.mul(mask).div(keepRate);
     }
+    // Get the upper triangular part with respect to main diagonal
+    triu(diagonal = 0) {
+        if (this.shape.length < 2) {
+            throw new Error("triu requires at least 2 dimensions");
+        }
+        const maskShape = this.shape.slice(-2);
+        const maskStrides = Tensor.getStrides(maskShape);
+        const maskSize = Tensor.shapeToSize(maskShape);
+        const maskValue = new Array(maskSize).fill(1);
+        const [rows, cols] = maskShape;
+        for (let i = 0; i < rows; i++) {
+            const maxJ = Math.min(cols, i + diagonal);
+            for (let j = 0; j < maxJ; j++) {
+                maskValue[i * maskStrides[0] + j * maskStrides[1]] = 0;
+            }
+        }
+        const mask = new Tensor(maskValue, {
+            shape: maskShape,
+            strides: maskStrides,
+            numel: maskSize,
+            device: this.device
+        });
+        return this.mul(mask);
+    }
+    // Get the lower triangular part with respect to main diagonal
+    tril(diagonal = 0) {
+        if (this.shape.length < 2) {
+            throw new Error("triu requires at least 2 dimensions");
+        }
+        const maskShape = this.shape.slice(-2);
+        const maskStrides = Tensor.getStrides(maskShape);
+        const maskSize = Tensor.shapeToSize(maskShape);
+        const maskValue = new Array(maskSize).fill(0);
+        const [rows, cols] = maskShape;
+        for (let i = 0; i < rows; i++) {
+            const maxJ = Math.min(cols, i + diagonal + 1);
+            for (let j = 0; j < maxJ; j++) {
+                maskValue[i * maskStrides[0] + j * maskStrides[1]] = 1;
+            }
+        }
+        const mask = new Tensor(maskValue, {
+            shape: maskShape,
+            strides: maskStrides,
+            numel: maskSize,
+            device: this.device
+        });
+        return this.mul(mask);
+    }
+    // Fill specific positions of this tensor with a value through a mask
+    maskedFill(mask, value) {
+        mask = this.handleOther(mask);
+        return this.mul(mask.logicalNot()).add(mask.mul(value));
+    }
     // Utility to create a new tensor filled with a number
     static full(shape, num, options = {}) {
         if (shape.length === 0)
@@ -1552,6 +1605,15 @@ class Tensor {
             ...options
         });
     }
+    // Utility to create a new tensor filled with integers from 0 to n, randomly shuffled
+    static randperm(n, options = {}) {
+        const outputValue = new Array(n);
+        for (let i = 0; i < n; i++) {
+            outputValue[i] = i;
+        }
+        (0, utils_1.fyShuffle)(outputValue);
+        return new Tensor(outputValue, { shape: [n], numel: n, ...options });
+    }
     // Utility to create a new tensor filled with a random number with normal distribution of custom mean and stddev
     static normal(shape, mean, stdDev, options = {}) {
         if (shape.length === 0)

package/dist/nn.d.ts CHANGED Viewed

@@ -62,6 +62,18 @@ declare class Embedding {
     constructor(numEmbeddings: number, embeddingDim: number, device: string);
     forward(input: Tensor | TensorValue): Tensor;
 }
+declare class MultiheadAttention {
+    qProjection: Linear;
+    kProjection: Linear;
+    vProjection: Linear;
+    oProjection: Linear;
+    embedDim: number;
+    numHeads: number;
+    headDim: number;
+    dropout: number;
+    constructor(embedDim: number, numHeads: number, dropout?: number, bias?: boolean, device?: string);
+    forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean): [Tensor, Tensor | undefined];
+}
 export interface StateDict {
     [key: string]: any;
 }
@@ -72,6 +84,7 @@ export declare const nn: {
     LSTMCell: typeof LSTMCell;
     LayerNorm: typeof LayerNorm;
     Embedding: typeof Embedding;
+    MultiheadAttention: typeof MultiheadAttention;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];
         moveParameters(model: any, device: string): void;

package/dist/nn.js CHANGED Viewed

@@ -197,6 +197,57 @@ class Embedding {
         return this.weight.index(input);
     }
 }
+class MultiheadAttention {
+    qProjection;
+    kProjection;
+    vProjection;
+    oProjection;
+    embedDim;
+    numHeads;
+    headDim;
+    dropout;
+    constructor(embedDim, numHeads, dropout = 0, bias = true, device) {
+        this.qProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
+        this.kProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
+        this.vProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
+        this.oProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
+        this.embedDim = embedDim;
+        this.numHeads = numHeads;
+        this.headDim = Math.floor(embedDim / numHeads);
+        this.dropout = dropout;
+    }
+    forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true) {
+        // Batch-first
+        const [batchSize, targetLen, embedDim] = query.shape;
+        const sourceLen = key.shape[1];
+        let Q = this.qProjection.forward(query); // (batchSize, targetLen, embedDim)
+        let K = this.kProjection.forward(key); // (batchSize, sourceLen, embedDim)
+        let V = this.vProjection.forward(value); // (batchSize, sourceLen, embedDim)
+        // (batchSize, numHeads, targetLen/sourceLen, headDim)
+        Q = Q.reshape([batchSize, targetLen, this.numHeads, this.headDim]).transpose(1, 2);
+        K = K.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
+        V = V.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
+        // Attention scores
+        let scores = Q.matmul(K.transpose(-2, -1)).div(Math.sqrt(this.headDim));
+        // Apply attention mask if specified
+        if (attnMask) {
+            scores = scores.maskedFill(attnMask, -Infinity);
+        }
+        // Calculate attention weights
+        let attnWeights = scores.softmax().dropout(this.dropout);
+        // Apply attention to values
+        let attnOutput = attnWeights.matmul(V); // (batchSize, numHeads, targetLen, headDim)
+        // (batchSize, targetLen, embedDim)
+        attnOutput = attnOutput.transpose(1, 2).reshape([batchSize, targetLen, embedDim]);
+        // Output
+        const output = this.oProjection.forward(attnOutput);
+        // Average weights if needed
+        if (averageAttnWeights) {
+            attnWeights = attnWeights.mean(1);
+        }
+        return [output, needWeights ? attnWeights : undefined];
+    }
+}
 const state = {
     getParameters(model, visited = new WeakSet()) {
         if (visited.has(model))
@@ -266,5 +317,6 @@ exports.nn = {
     LSTMCell,
     LayerNorm,
     Embedding,
+    MultiheadAttention,
     state
 };

package/dist/utils.d.ts CHANGED Viewed

@@ -4,3 +4,4 @@ export declare function erfinv(x: number): number;
 export declare function randUniform(low?: number, high?: number): number;
 export declare function randNormal(mean?: number, stdDev?: number): number;
 export declare function randInt(low: number, high: number): number;
+export declare function fyShuffle(array: any[]): void;

package/dist/utils.js CHANGED Viewed

@@ -6,6 +6,7 @@ exports.erfinv = erfinv;
 exports.randUniform = randUniform;
 exports.randNormal = randNormal;
 exports.randInt = randInt;
+exports.fyShuffle = fyShuffle;
 // Error function using Abramowitz and Stegun approximation
 function erf(x) {
     const a1 = 0.254829592;
@@ -36,15 +37,26 @@ function erfinv(x) {
     const sign = x >= 0 ? 1 : -1;
     return sign * Math.sqrt(-part1 + Math.sqrt(part1 * part1 - part2));
 }
+// Generate a random number with uniform distribution
 function randUniform(low = 0, high = 1) {
     return Math.random() * (high - low) + low;
 }
+// Generate a random number with normal distribution
 function randNormal(mean = 0, stdDev = 1) {
     const u = 1 - Math.random();
     const v = 1 - Math.random();
     const z = Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
     return z * stdDev + mean;
 }
+// Generate a random integer
 function randInt(low, high) {
     return Math.floor(Math.random() * (high - low) + low);
 }
+// Randomly shuffle an array with fisher-yates algorithm
+function fyShuffle(array) {
+    for (let i = array.length - 1; i > 0; i--) {
+        const j = Math.floor(Math.random() * (i + 1));
+        [array[i], array[j]] = [array[j], array[i]];
+    }
+}
+;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.6.3",
+  "version": "0.6.5",
   "description": "A small Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {