npm - catniff - Versions diffs - 0.8.2 → 0.8.3 - Mend

catniff 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/core.js CHANGED Viewed

@@ -636,7 +636,7 @@ class Tensor {
             let start = range[0] ?? 0;
             let end = range[1] ?? dimSize;
             let step = range[2] ?? 1;
-            // Handle negative indicesoutGrad
+            // Handle negative indices
             if (start < 0)
                 start += dimSize;
             if (end < 0)

package/dist/nn.d.ts CHANGED Viewed

@@ -70,6 +70,7 @@ export declare class Embedding {
     constructor(numEmbeddings: number, embeddingDim: number, device?: string, dtype?: dtype);
     forward(input: Tensor | TensorValue): Tensor;
 }
+export declare function scaledDotProductAttention(query: Tensor, key: Tensor, value: Tensor, attnMask?: Tensor, dropout?: number, isCausal?: boolean, scale?: number): Tensor;
 export declare class MultiheadAttention {
     qProjection: Linear;
     kProjection: Linear;
@@ -80,7 +81,7 @@ export declare class MultiheadAttention {
     headDim: number;
     dropout: number;
     constructor(embedDim: number, numHeads: number, dropout?: number, bias?: boolean, device?: string, dtype?: dtype);
-    forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean): [Tensor, Tensor | undefined];
+    forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean, isCausal?: boolean): [Tensor, Tensor | undefined];
 }
 export interface StateDict {
     [key: string]: any;
@@ -93,6 +94,7 @@ export declare const nn: {
     LayerNorm: typeof LayerNorm;
     RMSNorm: typeof RMSNorm;
     Embedding: typeof Embedding;
+    scaledDotProductAttention: typeof scaledDotProductAttention;
     MultiheadAttention: typeof MultiheadAttention;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];

package/dist/nn.js CHANGED Viewed

@@ -1,6 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.nn = exports.MultiheadAttention = exports.Embedding = exports.RMSNorm = exports.LayerNorm = exports.LSTMCell = exports.GRUCell = exports.RNNCell = exports.Linear = void 0;
+exports.scaledDotProductAttention = scaledDotProductAttention;
 const core_1 = require("./core");
 function linearTransform(input, weight, bias) {
     let output = input.matmul(weight.t());
@@ -240,6 +241,25 @@ class Embedding {
     }
 }
 exports.Embedding = Embedding;
+function scaledDotProductAttention(query, key, value, attnMask, dropout = 0, isCausal = false, scale) {
+    const targetLen = query.shape[query.shape.length - 2];
+    const sourceLen = key.shape[key.shape.length - 2];
+    const dimSize = query.shape[query.shape.length - 1];
+    // Attention scores
+    let scores = query.matmul(key.transpose(-2, -1)).div(scale ?? Math.sqrt(dimSize));
+    // Set attention mask to causal mask if specified
+    if (isCausal) {
+        attnMask = core_1.Tensor.ones([targetLen, sourceLen], { device: query.device }).triu(1);
+    }
+    // Apply attention mask if specified
+    if (attnMask) {
+        scores = scores.maskedFill(attnMask, -Infinity);
+    }
+    // Calculate attention weights
+    let attnWeights = scores.softmax().dropout(dropout);
+    // Apply attention to values
+    return attnWeights.matmul(value);
+}
 class MultiheadAttention {
     qProjection;
     kProjection;
@@ -259,7 +279,7 @@ class MultiheadAttention {
         this.headDim = Math.floor(embedDim / numHeads);
         this.dropout = dropout;
     }
-    forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true) {
+    forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true, isCausal = false) {
         // Batch-first
         const [batchSize, targetLen, embedDim] = query.shape;
         const sourceLen = key.shape[1];
@@ -272,6 +292,10 @@ class MultiheadAttention {
         V = V.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
         // Attention scores
         let scores = Q.matmul(K.transpose(-2, -1)).div(Math.sqrt(this.headDim));
+        // Set attention mask to causal mask if specified
+        if (isCausal) {
+            attnMask = core_1.Tensor.ones([targetLen, sourceLen], { device: this.qProjection.weight.device }).triu(1);
+        }
         // Apply attention mask if specified
         if (attnMask) {
             scores = scores.maskedFill(attnMask, -Infinity);
@@ -362,6 +386,7 @@ exports.nn = {
     LayerNorm,
     RMSNorm,
     Embedding,
+    scaledDotProductAttention,
     MultiheadAttention,
     state
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.8.2",
+  "version": "0.8.3",
   "description": "Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {