npm - catniff - Versions diffs - 0.8.2 → 0.8.4 - Mend

catniff 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -201,6 +201,7 @@ export declare class Tensor {
     triu(diagonal?: number): Tensor;
     tril(diagonal?: number): Tensor;
     maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
+    multinomial(numSamples: number, replacement?: boolean): Tensor;
     static full(shape: number[], num: number, options?: TensorOptions): Tensor;
     static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
     static ones(shape?: number[], options?: TensorOptions): Tensor;

package/dist/core.js CHANGED Viewed

@@ -636,7 +636,7 @@ class Tensor {
             let start = range[0] ?? 0;
             let end = range[1] ?? dimSize;
             let step = range[2] ?? 1;
-            // Handle negative indicesoutGrad
+            // Handle negative indices
             if (start < 0)
                 start += dimSize;
             if (end < 0)
@@ -949,14 +949,14 @@ class Tensor {
         // Copy if not contiguous
         const outputSize = this.numel;
         const outputShape = this.shape;
-        let outputValue, outputStrides;
+        const outputValue = new dtype_1.TypedArray[this.dtype](outputSize);
+        const outputStrides = Tensor.getStrides(outputShape);
         if (this.isContiguous()) {
-            outputValue = [...this.value];
-            outputStrides = this.strides;
+            // Fast path: direct copy
+            outputValue.set(this.value.subarray(this.offset, this.offset + outputSize));
         }
         else {
-            outputValue = new dtype_1.TypedArray[this.dtype](outputSize);
-            outputStrides = Tensor.getStrides(outputShape);
+            // Slow path: coordinate conversion
             for (let flatIndex = 0; flatIndex < outputSize; flatIndex++) {
                 const coords = Tensor.indexToCoords(flatIndex, outputStrides);
                 const originalIndex = Tensor.coordsToIndex(coords, this.strides);
@@ -1912,6 +1912,129 @@ class Tensor {
         mask = this.handleOther(mask);
         return this.mul(mask.logicalNot()).add(mask.mul(value));
     }
+    // Multinomial sampling
+    multinomial(numSamples, replacement = false) {
+        // Validate input dimensions (1D or 2D only)
+        if (this.shape.length === 0 || this.shape.length > 2) {
+            throw new Error("multinomial only supports 1D or 2D probability tensors");
+        }
+        const is1D = this.shape.length === 1;
+        const numDist = is1D ? 1 : this.shape[0];
+        const numCategories = is1D ? this.shape[0] : this.shape[1];
+        // Validate numSamples
+        if (numSamples <= 0) {
+            throw new Error("Number of samples must be positive");
+        }
+        if (!replacement && numSamples > numCategories) {
+            throw new Error(`Cannot sample ${numSamples} without replacement from ${numCategories} categories`);
+        }
+        // Make contiguous copy of probabilities
+        const probsSize = this.numel;
+        const probs = new dtype_1.TypedArray[this.dtype](probsSize);
+        if (this.isContiguous()) {
+            // Fast path: direct copy
+            probs.set(this.value.subarray(this.offset, this.offset + probsSize));
+        }
+        else {
+            // Slow path: coordinate conversion
+            const defaultStrides = Tensor.getStrides(this.shape);
+            for (let i = 0; i < probsSize; i++) {
+                const coords = Tensor.indexToCoords(i, defaultStrides);
+                const idx = Tensor.coordsToIndex(coords, this.strides);
+                probs[i] = this.value[idx + this.offset];
+            }
+        }
+        // Output setup
+        const outputShape = is1D ? [numSamples] : [numDist, numSamples];
+        const outputValue = new Int32Array(numDist * numSamples);
+        // Sample from each distribution
+        for (let dist = 0; dist < numDist; dist++) {
+            const offset = dist * numCategories;
+            // Extract this distribution's probabilities
+            const distProbs = probs.slice(offset, offset + numCategories);
+            // Validate and normalize
+            let sum = 0;
+            for (let i = 0; i < numCategories; i++) {
+                if (distProbs[i] < 0) {
+                    throw new Error("Probabilities cannot be negative");
+                }
+                sum += distProbs[i];
+            }
+            if (sum <= 0) {
+                throw new Error("Probabilities must sum to a positive value");
+            }
+            // Normalize
+            for (let i = 0; i < numCategories; i++) {
+                distProbs[i] /= sum;
+            }
+            if (replacement) {
+                // With replacement: use CDF for efficient sampling
+                const cdf = new Array(numCategories);
+                let cumSum = 0;
+                for (let i = 0; i < numCategories; i++) {
+                    cumSum += distProbs[i];
+                    cdf[i] = cumSum;
+                }
+                cdf[numCategories - 1] = 1;
+                for (let s = 0; s < numSamples; s++) {
+                    const r = Math.random();
+                    // Binary search for efficiency
+                    let left = 0;
+                    let right = numCategories - 1;
+                    while (left < right) {
+                        const mid = Math.floor((left + right) / 2);
+                        if (r <= cdf[mid]) {
+                            right = mid;
+                        }
+                        else {
+                            left = mid + 1;
+                        }
+                    }
+                    outputValue[dist * numSamples + s] = left;
+                }
+            }
+            else {
+                // Without replacement: weighted sampling without replacement
+                const available = Array.from({ length: numCategories }, (_, i) => ({
+                    idx: i,
+                    prob: distProbs[i]
+                }));
+                for (let s = 0; s < numSamples; s++) {
+                    // Compute sum of remaining probabilities
+                    let remainingSum = 0;
+                    for (const item of available) {
+                        remainingSum += item.prob;
+                    }
+                    // Sample from remaining
+                    const r = Math.random() * remainingSum;
+                    let cumSum = 0;
+                    let selectedIdx = -1;
+                    for (let i = 0; i < available.length; i++) {
+                        cumSum += available[i].prob;
+                        if (r <= cumSum) {
+                            selectedIdx = i;
+                            break;
+                        }
+                    }
+                    // Handle floating point edge case
+                    if (selectedIdx === -1) {
+                        selectedIdx = available.length - 1;
+                    }
+                    // Store result and remove from available
+                    outputValue[dist * numSamples + s] = available[selectedIdx].idx;
+                    available.splice(selectedIdx, 1);
+                }
+            }
+        }
+        return new Tensor(outputValue, {
+            shape: outputShape,
+            strides: Tensor.getStrides(outputShape),
+            offset: 0,
+            numel: numDist * numSamples,
+            device: this.device,
+            dtype: "int32"
+        });
+    }
     // Utility to create a new tensor filled with a number
     static full(shape, num, options = {}) {
         if (shape.length === 0)
@@ -2094,6 +2217,7 @@ class Tensor {
             shape,
             offset: 0,
             numel: outputSize,
+            dtype: "int32",
             ...options
         });
     }
@@ -2130,6 +2254,7 @@ class Tensor {
             shape: [n],
             offset: 0,
             numel: n,
+            dtype: "int32",
             ...options
         });
     }

package/dist/nn.d.ts CHANGED Viewed

@@ -70,6 +70,7 @@ export declare class Embedding {
     constructor(numEmbeddings: number, embeddingDim: number, device?: string, dtype?: dtype);
     forward(input: Tensor | TensorValue): Tensor;
 }
+export declare function scaledDotProductAttention(query: Tensor, key: Tensor, value: Tensor, attnMask?: Tensor, dropout?: number, isCausal?: boolean, scale?: number): Tensor;
 export declare class MultiheadAttention {
     qProjection: Linear;
     kProjection: Linear;
@@ -80,7 +81,7 @@ export declare class MultiheadAttention {
     headDim: number;
     dropout: number;
     constructor(embedDim: number, numHeads: number, dropout?: number, bias?: boolean, device?: string, dtype?: dtype);
-    forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean): [Tensor, Tensor | undefined];
+    forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean, isCausal?: boolean): [Tensor, Tensor | undefined];
 }
 export interface StateDict {
     [key: string]: any;
@@ -93,6 +94,7 @@ export declare const nn: {
     LayerNorm: typeof LayerNorm;
     RMSNorm: typeof RMSNorm;
     Embedding: typeof Embedding;
+    scaledDotProductAttention: typeof scaledDotProductAttention;
     MultiheadAttention: typeof MultiheadAttention;
     state: {
         getParameters(model: any, visited?: WeakSet<object>): Tensor[];

package/dist/nn.js CHANGED Viewed

@@ -1,6 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.nn = exports.MultiheadAttention = exports.Embedding = exports.RMSNorm = exports.LayerNorm = exports.LSTMCell = exports.GRUCell = exports.RNNCell = exports.Linear = void 0;
+exports.scaledDotProductAttention = scaledDotProductAttention;
 const core_1 = require("./core");
 function linearTransform(input, weight, bias) {
     let output = input.matmul(weight.t());
@@ -240,6 +241,25 @@ class Embedding {
     }
 }
 exports.Embedding = Embedding;
+function scaledDotProductAttention(query, key, value, attnMask, dropout = 0, isCausal = false, scale) {
+    const targetLen = query.shape[query.shape.length - 2];
+    const sourceLen = key.shape[key.shape.length - 2];
+    const dimSize = query.shape[query.shape.length - 1];
+    // Attention scores
+    let scores = query.matmul(key.transpose(-2, -1)).div(scale ?? Math.sqrt(dimSize));
+    // Set attention mask to causal mask if specified
+    if (isCausal) {
+        attnMask = core_1.Tensor.ones([targetLen, sourceLen], { device: query.device }).triu(1);
+    }
+    // Apply attention mask if specified
+    if (attnMask) {
+        scores = scores.maskedFill(attnMask, -Infinity);
+    }
+    // Calculate attention weights
+    let attnWeights = scores.softmax().dropout(dropout);
+    // Apply attention to values
+    return attnWeights.matmul(value);
+}
 class MultiheadAttention {
     qProjection;
     kProjection;
@@ -259,7 +279,7 @@ class MultiheadAttention {
         this.headDim = Math.floor(embedDim / numHeads);
         this.dropout = dropout;
     }
-    forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true) {
+    forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true, isCausal = false) {
         // Batch-first
         const [batchSize, targetLen, embedDim] = query.shape;
         const sourceLen = key.shape[1];
@@ -272,6 +292,10 @@ class MultiheadAttention {
         V = V.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
         // Attention scores
         let scores = Q.matmul(K.transpose(-2, -1)).div(Math.sqrt(this.headDim));
+        // Set attention mask to causal mask if specified
+        if (isCausal) {
+            attnMask = core_1.Tensor.ones([targetLen, sourceLen], { device: this.qProjection.weight.device }).triu(1);
+        }
         // Apply attention mask if specified
         if (attnMask) {
             scores = scores.maskedFill(attnMask, -Infinity);
@@ -362,6 +386,7 @@ exports.nn = {
     LayerNorm,
     RMSNorm,
     Embedding,
+    scaledDotProductAttention,
     MultiheadAttention,
     state
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "catniff",
-  "version": "0.8.2",
+  "version": "0.8.4",
   "description": "Torch-like deep learning framework for Javascript",
   "main": "index.js",
   "scripts": {