npm - @oxide-js/spiking - Versions diffs - 1.1.0 → 1.3.0 - Mend

@oxide-js/spiking 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/CHANGELOG.md +19 -0
package/index.cjs +322 -0
package/index.d.ts +5 -13
package/index.js +6 -2
package/package.json +1 -1
package/spiking-native.linux-x64-gnu.node +0 -0
package/src/index.ts +4 -2
package/src/layers/SpikingDense.ts +71 -42
package/src/layers/SpikingDenseBPTT.ts +303 -0
package/src/layers/SpikingEmbedding.ts +154 -142
package/src/layers/SpikingSelfAttention.ts +335 -0
package/src/native_backend.ts +39 -3
package/src-rust/src/contrastive.rs +85 -0
package/src-rust/src/delta.rs +51 -0
package/src-rust/src/dot_product.rs +47 -0
package/src-rust/src/embedding.rs +28 -0
package/src-rust/src/lib.rs +16 -460
package/src-rust/src/lif.rs +44 -0
package/src-rust/src/surrogate.rs +28 -0
package/test/SpikingDenseBPTT.test.ts +151 -0
package/test/SpikingSelfAttention.test.ts +148 -0
package/test/test_embedding_overlap.ts +181 -0
package/examples/demo.ts +0 -101
package/src/models/SpikingSentenceEmbedder.ts +0 -135

package/src/layers/SpikingSelfAttention.ts ADDED Viewed

@@ -0,0 +1,335 @@
+import { BaseLayer, LayerConfig, ForwardOptions } from "@oxide-js/layers";
+import { Matrix, mj } from "@oxide-js/core";
+import { isNativeAvailable, lifStepNativeWrapper } from "../native_backend.js";
+import dotProductAddOnly from "../math/dotProductAddOnly.js";
+export interface SpikingSelfAttentionConfig extends LayerConfig {
+  d_model: number;
+  sequenceLength: number;
+  kernelInitializer?: string;
+  betaRange?: [number, number];
+  thresholdRange?: [number, number];
+}
+export class SpikingSelfAttention extends BaseLayer {
+  public d_model: number;
+  public sequenceLength: number;
+  public kernelInitializer: string;
+  public betaRange: [number, number];
+  public thresholdRange: [number, number];
+  // Q, K, V kernels
+  public get kernelQ(): Matrix | undefined { return this.getParameter("kernelQ"); }
+  public get kernelK(): Matrix | undefined { return this.getParameter("kernelK"); }
+  public get kernelV(): Matrix | undefined { return this.getParameter("kernelV"); }
+  // LIF state untuk Q, K, V (opsional, jika ingin akumulasi temporal)
+  public betaQKV!: Float32Array;
+  public thresholdQKV!: Float32Array;
+  public potentialsQ!: Matrix;
+  public potentialsK!: Matrix;
+  public potentialsV!: Matrix;
+  // LIF state untuk Attention Scores (Pengganti Softmax)
+  public betaScores!: Float32Array;
+  public thresholdScores!: Float32Array;
+  public potentialsScores!: Matrix;
+  // Cache input untuk Local Learning
+  public lastInputs?: Matrix;
+  constructor(config: SpikingSelfAttentionConfig) {
+    super(config);
+    this.d_model = config.d_model;
+    this.sequenceLength = config.sequenceLength;
+    this.kernelInitializer = config.kernelInitializer || "glorot_normal";
+    this.betaRange = config.betaRange || [0.8, 0.99];
+    this.thresholdRange = config.thresholdRange || [0.1, 0.3];
+  }
+  public computeOutputShape(inputShape: number[]): number[] {
+    const batch = inputShape[0] ?? -1;
+    // Asumsi input shape: [batch * seqLen, d_model]
+    return [batch, this.d_model]; // Actually [batch * seqLen, d_model]
+  }
+  public build(inputShape: number[]): void {
+    super.build(inputShape);
+    const inFeatures = inputShape[inputShape.length - 1]; // Seharusnya sama dengan d_model
+    // 1. Inisialisasi Bobot Q, K, V
+    this.addParameter("kernelQ", this.createInitializer(this.kernelInitializer, [inFeatures, this.d_model]), true, [inFeatures, this.d_model]);
+    this.addParameter("kernelK", this.createInitializer(this.kernelInitializer, [inFeatures, this.d_model]), true, [inFeatures, this.d_model]);
+    this.addParameter("kernelV", this.createInitializer(this.kernelInitializer, [inFeatures, this.d_model]), true, [inFeatures, this.d_model]);
+    // OPTIMIZATION: Scale up initial weights so neurons actually spike (prevent Layer 2 death)
+    const scale = Math.sqrt(inFeatures);
+    const kQ = this.kernelQ!._data;
+    const kK = this.kernelK!._data;
+    const kV = this.kernelV!._data;
+    for(let i = 0; i < kQ.length; i++) {
+        kQ[i] *= scale;
+        kK[i] *= scale;
+        kV[i] *= scale;
+    }
+    // 2. Inisialisasi parameter LIF untuk Q, K, V
+    this.betaQKV = new Float32Array(this.d_model);
+    this.thresholdQKV = new Float32Array(this.d_model);
+    for (let i = 0; i < this.d_model; i++) {
+        this.betaQKV[i] = this.betaRange[0] + Math.random() * (this.betaRange[1] - this.betaRange[0]);
+        this.thresholdQKV[i] = this.thresholdRange[0] + Math.random() * (this.thresholdRange[1] - this.thresholdRange[0]);
+    }
+    // 3. Inisialisasi parameter LIF untuk Attention Scores (Pengganti Softmax)
+    this.betaScores = new Float32Array(this.sequenceLength);
+    this.thresholdScores = new Float32Array(this.sequenceLength);
+    for (let i = 0; i < this.sequenceLength; i++) {
+        this.betaScores[i] = 0.9;
+        // Ambang batas diturunkan tajam untuk mencegah Dead Neurons
+        this.thresholdScores[i] = 1.0;
+    }
+    // Inisialisasi Potentials akan dilakukan secara dinamis pada saat forward
+    this.potentialsQ = Matrix.fromFlat(new Float32Array(0), [0, 0]);
+    this.potentialsK = Matrix.fromFlat(new Float32Array(0), [0, 0]);
+    this.potentialsV = Matrix.fromFlat(new Float32Array(0), [0, 0]);
+    this.potentialsScores = Matrix.fromFlat(new Float32Array(0), [0, 0]);
+  }
+  private sqDataBuffer?: Float32Array;
+  private skDataBuffer?: Float32Array;
+  private svDataBuffer?: Float32Array;
+  private dummyLpBuffer?: Float32Array;
+  private matchScoresBuffer?: Float32Array;
+  private qGatedVBuffer?: Float32Array;
+  private outSpikesBuffer?: Float32Array;
+  private sScoresDataBuffer?: Float32Array;
+  private dummyLpScoresBuffer?: Float32Array;
+  private tempMatchesBuffer?: Float32Array;
+  private ensurePotentialsShape(batchSeq: number, seqLen: number) {
+    if (this.potentialsQ._shape[0] !== batchSeq || !this.sqDataBuffer) {
+       this.potentialsQ = Matrix.fromFlat(new Float32Array(batchSeq * this.d_model), [batchSeq, this.d_model]);
+       this.potentialsK = Matrix.fromFlat(new Float32Array(batchSeq * this.d_model), [batchSeq, this.d_model]);
+       this.potentialsV = Matrix.fromFlat(new Float32Array(batchSeq * this.d_model), [batchSeq, this.d_model]);
+       this.potentialsScores = Matrix.fromFlat(new Float32Array(batchSeq * seqLen), [batchSeq, seqLen]);
+       this.sqDataBuffer = new Float32Array(batchSeq * this.d_model);
+       this.skDataBuffer = new Float32Array(batchSeq * this.d_model);
+       this.svDataBuffer = new Float32Array(batchSeq * this.d_model);
+       this.dummyLpBuffer = new Float32Array(batchSeq * this.d_model);
+       this.matchScoresBuffer = new Float32Array(batchSeq * seqLen);
+       this.qGatedVBuffer = new Float32Array(batchSeq * this.d_model);
+       this.outSpikesBuffer = new Float32Array(batchSeq * this.d_model);
+       this.sScoresDataBuffer = new Float32Array(batchSeq * seqLen);
+       this.dummyLpScoresBuffer = new Float32Array(batchSeq * seqLen);
+       this.tempMatchesBuffer = new Float32Array(seqLen);
+    }
+  }
+  public resetState() {
+     if (this.potentialsQ) this.potentialsQ._data.fill(0);
+     if (this.potentialsK) this.potentialsK._data.fill(0);
+     if (this.potentialsV) this.potentialsV._data.fill(0);
+     if (this.potentialsScores) this.potentialsScores._data.fill(0);
+  }
+  protected compute(inputs: Matrix, options?: ForwardOptions): Matrix {
+    // Asumsi inputs adalah flat [batch * seqLen, d_model]
+    const batchSeq = inputs._shape[0];
+    const seqLen = this.sequenceLength;
+    const batch = batchSeq / seqLen;
+    const d_model = this.d_model;
+    if (!Number.isInteger(batch)) {
+        throw new Error(`[SpikingSelfAttention] Jumlah baris input (${batchSeq}) harus merupakan kelipatan dari sequenceLength (${seqLen}).`);
+    }
+    this.ensurePotentialsShape(batchSeq, seqLen);
+    this.lastInputs = inputs; // Simpan untuk local learning
+    // 1. Proyeksi Q, K, V (Hanya Addisi / Pergeseran Bit karena input spike biner)
+    let dotQ = dotProductAddOnly(inputs, this.kernelQ!);
+    let dotK = dotProductAddOnly(inputs, this.kernelK!);
+    let dotV = dotProductAddOnly(inputs, this.kernelV!);
+    // 2. LIF Step untuk menghasilkan S_Q, S_K, S_V (Matriks Biner)
+    const sqData = this.sqDataBuffer!;
+    sqData.fill(0);
+    const skData = this.skDataBuffer!;
+    skData.fill(0);
+    const svData = this.svDataBuffer!;
+    svData.fill(0);
+    const dummyLp = this.dummyLpBuffer!;
+    dummyLp.fill(0);
+    // Q
+    if (isNativeAvailable()) {
+        lifStepNativeWrapper(this.potentialsQ._data, dotQ._data, sqData, dummyLp, this.betaQKV, this.thresholdQKV);
+        lifStepNativeWrapper(this.potentialsK._data, dotK._data, skData, dummyLp, this.betaQKV, this.thresholdQKV);
+        lifStepNativeWrapper(this.potentialsV._data, dotV._data, svData, dummyLp, this.betaQKV, this.thresholdQKV);
+    } else {
+        this.runLIF(this.potentialsQ._data, dotQ._data, sqData, batchSeq, d_model, this.betaQKV, this.thresholdQKV);
+        this.runLIF(this.potentialsK._data, dotK._data, skData, batchSeq, d_model, this.betaQKV, this.thresholdQKV);
+        this.runLIF(this.potentialsV._data, dotV._data, svData, batchSeq, d_model, this.betaQKV, this.thresholdQKV);
+    }
+    // 3. Menghitung Skor Kecocokan (SQ dot SK^T) menggunakan operasi AND / bit-wise addition
+    // Hasilnya akan berukuran [batch * seqLen, seqLen]
+    const matchScores = this.matchScoresBuffer!;
+    matchScores.fill(0);
+    for (let b = 0; b < batch; b++) {
+        for (let i = 0; i < seqLen; i++) {
+            const qBase = b * seqLen * d_model + i * d_model;
+            // Pre-collect non-zero indices for Q to exploit sparsity
+            const nonZeroQ: number[] = [];
+            for (let d = 0; d < d_model; d++) {
+                if (sqData[qBase + d] > 0) nonZeroQ.push(d);
+            }
+            if (nonZeroQ.length === 0) continue;
+            let maxMatch = 0;
+            const tempMatches = this.tempMatchesBuffer!;
+            tempMatches.fill(0);
+            for (let j = 0; j < seqLen; j++) {
+                let matchCount = 0;
+                const kBase = b * seqLen * d_model + j * d_model;
+                for (let k = 0; k < nonZeroQ.length; k++) {
+                    const d = nonZeroQ[k];
+                    if (skData[kBase + d] > 0) matchCount++;
+                }
+                tempMatches[j] = matchCount;
+                if (matchCount > maxMatch) {
+                    maxMatch = matchCount;
+                }
+            }
+            for (let j = 0; j < seqLen; j++) {
+                if (maxMatch > 0) {
+                    matchScores[b * seqLen * seqLen + i * seqLen + j] = tempMatches[j] / maxMatch;
+                } else {
+                    matchScores[b * seqLen * seqLen + i * seqLen + j] = 0;
+                }
+            }
+        }
+    }
+    // 4. Pengganti Softmax: Lewatkan skor kecocokan ke lapisan LIF
+    const sScoresData = this.sScoresDataBuffer!;
+    sScoresData.fill(0);
+    const dummyLpScores = this.dummyLpScoresBuffer!;
+    dummyLpScores.fill(0);
+    if (isNativeAvailable()) {
+        lifStepNativeWrapper(this.potentialsScores._data, matchScores, sScoresData, dummyLpScores, this.betaScores, this.thresholdScores);
+    } else {
+        this.runLIF(this.potentialsScores._data, matchScores, sScoresData, batchSeq, seqLen, this.betaScores, this.thresholdScores);
+    }
+    const outData = this.outSpikesBuffer!;
+    outData.fill(0);
+    for (let b = 0; b < batch; b++) {
+        for (let j = 0; j < seqLen; j++) {
+            const vBase = b * seqLen * d_model + j * d_model;
+            // Pre-collect non-zero indices for V to exploit sparsity
+            const nonZeroV: number[] = [];
+            for (let d = 0; d < d_model; d++) {
+                if (svData[vBase + d] > 0) nonZeroV.push(d);
+            }
+            if (nonZeroV.length === 0) continue;
+            for (let i = 0; i < seqLen; i++) {
+                const gradedScore = matchScores[b * seqLen * seqLen + i * seqLen + j];
+                if (gradedScore > 0) {
+                    const outBase = b * seqLen * d_model + i * d_model;
+                    for (let k = 0; k < nonZeroV.length; k++) {
+                        const d = nonZeroV[k];
+                        outData[outBase + d] += gradedScore * svData[vBase + d];
+                    }
+                }
+            }
+        }
+    }
+    // Opsional: Batasi output menjadi biner (spike) jika layer berikutnya menuntut binary matrix
+    for (let i = 0; i < outData.length; i++) {
+        if (outData[i] > 1.0) outData[i] = 1.0;
+    }
+    return Matrix.fromFlat(outData, [batchSeq, d_model]);
+  }
+  private runLIF(pot: Float32Array, input: Float32Array, output: Float32Array, batch: number, dim: number, beta: Float32Array, threshold: Float32Array) {
+      for (let b = 0; b < batch; b++) {
+          const offset = b * dim;
+          for (let i = 0; i < dim; i++) {
+              const idx = offset + i;
+              pot[idx] = Math.min((pot[idx] * beta[i]) + input[idx], 1.0);
+          }
+          for (let i = 0; i < dim; i++) {
+              const idx = offset + i;
+              if (pot[idx] >= threshold[i]) {
+                  output[idx] = 1.0;
+                  pot[idx] -= threshold[i];
+              } else {
+                  output[idx] = 0.0;
+              }
+          }
+      }
+  }
+  public learnAttention(errorSignal: Matrix, learningRate: number = 0.01) {
+      if (!this.lastInputs) {
+          throw new Error("[SpikingSelfAttention] Cannot run learning before forward() is executed.");
+      }
+      const err = errorSignal._data;
+      const inputs = this.lastInputs._data;
+      const batchSeq = this.lastInputs._shape[0];
+      // Karena inputs masuk setelah layer 1, shape-nya [batchSeq, d_model]
+      const inFeatures = this.lastInputs._shape[1] || this.d_model;
+      const d_model = this.d_model;
+      // Update Local Learning: Karena fungsi non-differentiable rumit,
+      // kita mendistribusikan sinyal error secara merata ke kernel Q, K, dan V (Hebbian/Surrogate style)
+      const kQ = this.kernelQ!._data;
+      const kK = this.kernelK!._data;
+      const kV = this.kernelV!._data;
+      for (let b = 0; b < batchSeq; b++) {
+          const errOffset = b * d_model;
+          const inOffset = b * inFeatures;
+          for (let i = 0; i < inFeatures; i++) {
+              const inVal = inputs[inOffset + i];
+              if (inVal > 0) { // Sparse update
+                  const kOffset = i * d_model;
+                  for (let d = 0; d < d_model; d++) {
+                      // Dopamine drive sangat kecil untuk membangkitkan neuron mati tanpa over-saturate
+                      const dopamine = 0.00005;
+                      let deltaQ = (learningRate * err[errOffset + d] * inVal) + dopamine;
+                      let deltaK = (learningRate * err[errOffset + d] * inVal) + dopamine;
+                      let deltaV = (learningRate * err[errOffset + d] * inVal) + dopamine;
+                      kQ[kOffset + d] = Math.max(-1.0, Math.min(1.0, kQ[kOffset + d] + deltaQ));
+                      kK[kOffset + d] = Math.max(-1.0, Math.min(1.0, kK[kOffset + d] + deltaK));
+                      kV[kOffset + d] = Math.max(-1.0, Math.min(1.0, kV[kOffset + d] + deltaV));
+                  }
+              }
+          }
+      }
+  }
+  public getConfig(): Record<string, any> {
+    return {
+      ...super.getConfig(),
+      d_model: this.d_model,
+      sequenceLength: this.sequenceLength,
+      kernelInitializer: this.kernelInitializer
+    };
+  }
+}

package/src/native_backend.ts CHANGED Viewed

@@ -47,8 +47,8 @@ export const lifStepNativeWrapper = (
   dot: Float32Array,
   spikes: Float32Array,
   lastPotentials: Float32Array,
-  beta: number,
-  threshold: number
+  beta: Float32Array,
+  threshold: Float32Array
 ): void => {
   if (!native) throw new Error("Spiking Native backend not available");
   native.lifStepNative(potentials, dot, spikes, lastPotentials, beta, threshold);
@@ -57,7 +57,7 @@ export const lifStepNativeWrapper = (
 export const maskSurrogateNativeWrapper = (
   errorSignal: Float32Array,
   potentials: Float32Array,
-  threshold: number,
+  threshold: Float32Array,
   windowSize: number
 ): void => {
   if (!native) throw new Error("Spiking Native backend not available");
@@ -88,3 +88,39 @@ export const applyAddOnlyDeltaNativeWrapper = (
     useBias
   );
 };
+export const applyEmbeddingDeltaNativeWrapper = (
+  embeddings: Float32Array,
+  inputs: Float32Array,
+  errorSignal: Float32Array,
+  learningRate: number,
+  inputDim: number,
+  outputDim: number
+): void => {
+  if (!native) throw new Error("Spiking Native backend not available");
+  native.applyEmbeddingDeltaNative(
+    embeddings,
+    inputs,
+    errorSignal,
+    learningRate,
+    inputDim,
+    outputDim
+  );
+};
+export const contrastiveHebbianNativeWrapper = (
+  spikes: Float32Array,
+  errData: Float32Array,
+  numPairs: number,
+  sequenceLength: number,
+  dModel: number
+): number => {
+  if (!native) throw new Error("Spiking Native backend not available");
+  return native.contrastiveHebbianNative(
+    spikes,
+    errData,
+    numPairs,
+    sequenceLength,
+    dModel
+  );
+};

package/src-rust/src/contrastive.rs ADDED Viewed

@@ -0,0 +1,85 @@
+use napi_derive::napi;
+use napi::bindgen_prelude::Float32Array;
+use rayon::prelude::*;
+#[napi]
+pub fn contrastive_hebbian_native(
+    spikes: Float32Array,
+    mut err_data: Float32Array,
+    num_pairs: u32,
+    sequence_length: u32,
+    d_model: u32,
+) -> f64 {
+    let spikes_slice: &[f32] = &spikes;
+    let err_slice: &mut [f32] = &mut err_data;
+    let num_pairs = num_pairs as usize;
+    let seq_len = sequence_length as usize;
+    let d_model = d_model as usize;
+    let chunk_size = seq_len * d_model;
+    let total_loss: f32 = err_slice.par_chunks_mut(chunk_size).enumerate().map(|(b, chunk)| {
+        let mut local_loss = 0.0f32;
+        if b < num_pairs {
+            // Ini adalah vektor Q
+            let i = b;
+            let p_offset = (num_pairs + i) * chunk_size;
+            let n_offset = (num_pairs + ((i + 1) % num_pairs)) * chunk_size;
+            let q_offset = i * chunk_size;
+            for rem in 0..chunk_size {
+                let q_spike = spikes_slice[q_offset + rem];
+                let p_spike = spikes_slice[p_offset + rem];
+                let n_spike = spikes_slice[n_offset + rem];
+                let mut pull = p_spike - q_spike;
+                if q_spike == 0.0 && p_spike == 0.0 && n_spike == 0.0 {
+                    pull = 0.05; // Suntik energi
+                }
+                let push = (q_spike * n_spike) * 0.2;
+                chunk[rem] = pull - push;
+                if pull != 0.0 || push != 0.0 {
+                    local_loss += pull.abs() + push;
+                }
+            }
+        } else {
+            // Ini adalah vektor P atau N
+            let p_index = b - num_pairs;
+            // Peran sebagai P untuk i = p_index
+            let q_offset_p = p_index * chunk_size;
+            let n_offset_p = (num_pairs + ((p_index + 1) % num_pairs)) * chunk_size;
+            // Peran sebagai N untuk i = p_index - 1 (dengan wrap around)
+            let i_n = if p_index == 0 { num_pairs - 1 } else { p_index - 1 };
+            let q_offset_n = i_n * chunk_size;
+            for rem in 0..chunk_size {
+                let q_spike_p = spikes_slice[q_offset_p + rem];
+                let p_spike_p = spikes_slice[b * chunk_size + rem];
+                let n_spike_p = spikes_slice[n_offset_p + rem];
+                let mut pull_p = p_spike_p - q_spike_p;
+                if q_spike_p == 0.0 && p_spike_p == 0.0 && n_spike_p == 0.0 {
+                    pull_p = 0.05;
+                }
+                let contrib_p = -pull_p;
+                let q_spike_n = spikes_slice[q_offset_n + rem];
+                let n_spike_n = spikes_slice[b * chunk_size + rem];
+                let push_n = (q_spike_n * n_spike_n) * 0.2;
+                let contrib_n = -push_n;
+                chunk[rem] = contrib_p + contrib_n;
+            }
+        }
+        local_loss
+    }).sum();
+    total_loss as f64
+}

package/src-rust/src/delta.rs ADDED Viewed

@@ -0,0 +1,51 @@
+use napi_derive::napi;
+use napi::bindgen_prelude::Float32Array;
+use rayon::prelude::*;
+#[napi]
+pub fn apply_add_only_delta_native(
+    mut kernel: Float32Array,
+    mut bias: Float32Array,
+    inputs: Float32Array,
+    error_signal: Float32Array,
+    learning_rate: f64,
+    batch: u32,
+    in_features: u32,
+    units: u32,
+    use_bias: bool
+) {
+    let in_feat = in_features as usize;
+    let u = units as usize;
+    let b_size = batch as usize;
+    let lr = learning_rate as f32;
+    let kernel_slice: &mut [f32] = &mut kernel;
+    let bias_slice: &mut [f32] = &mut bias;
+    let in_slice: &[f32] = &inputs;
+    let err_slice: &[f32] = &error_signal;
+    kernel_slice.par_chunks_mut(u).enumerate().for_each(|(k, kernel_row)| {
+        for b in 0..b_size {
+            if in_slice[b * in_feat + k] > 0.5 {
+                let err_offset = b * u;
+                for j in 0..u {
+                    kernel_row[j] += lr * err_slice[err_offset + j];
+                }
+            }
+        }
+        for j in 0..u {
+            kernel_row[j] = kernel_row[j].clamp(-1.0, 1.0);
+        }
+    });
+    if use_bias {
+        bias_slice.par_iter_mut().enumerate().for_each(|(j, b_val)| {
+            let mut sum = 0.0;
+            for b in 0..b_size {
+                sum += err_slice[b * u + j];
+            }
+            *b_val += (lr * sum) / (b_size as f32);
+            *b_val = b_val.clamp(-1.0, 1.0);
+        });
+    }
+}

package/src-rust/src/dot_product.rs ADDED Viewed

@@ -0,0 +1,47 @@
+use napi_derive::napi;
+use napi::bindgen_prelude::Float32Array;
+use rayon::prelude::*;
+#[napi]
+pub fn dot_product_add_only_native(
+    a_data: Float32Array,
+    a_rows_orig: u32,
+    a_cols_orig: u32,
+    b_data: Float32Array,
+    b_rows_orig: u32,
+    b_cols_orig: u32,
+    trans_a: bool,
+    trans_b: bool,
+    mut out_data: Float32Array
+) {
+    let a_rows = if trans_a { a_cols_orig } else { a_rows_orig } as usize;
+    let a_cols = if trans_a { a_rows_orig } else { a_cols_orig } as usize;
+    let b_cols = if trans_b { b_rows_orig } else { b_cols_orig } as usize;
+    let a_slice: &[f32] = &a_data;
+    let b_slice: &[f32] = &b_data;
+    let out_slice: &mut [f32] = &mut out_data;
+    out_slice.par_chunks_mut(b_cols).enumerate().for_each(|(i, out_row)| {
+        let a_offset = i * a_cols;
+        for k in 0..a_cols {
+            let a_val = if trans_a {
+                a_slice[k * a_rows + i]
+            } else {
+                a_slice[a_offset + k]
+            };
+            if a_val > 0.5 {
+                let b_offset = k * b_cols;
+                for j in 0..b_cols {
+                    let b_val = if trans_b {
+                        b_slice[j * a_cols + k]
+                    } else {
+                        b_slice[b_offset + j]
+                    };
+                    out_row[j] += b_val;
+                }
+            }
+        }
+    });
+}

package/src-rust/src/embedding.rs ADDED Viewed

@@ -0,0 +1,28 @@
+use napi_derive::napi;
+use napi::bindgen_prelude::Float32Array;
+#[napi]
+pub fn apply_embedding_delta_native(
+    mut embeddings: Float32Array,
+    inputs: Float32Array,
+    error_signal: Float32Array,
+    learning_rate: f64,
+    input_dim: u32,
+    output_dim: u32
+) {
+    let batch = inputs.len();
+    let out_dim = output_dim as usize;
+    for b in 0..batch {
+        let token_idx = inputs[b] as i32;
+        if token_idx >= 0 && token_idx < input_dim as i32 {
+            let token_idx = token_idx as usize;
+            let emb_offset = token_idx * out_dim;
+            let err_offset = b * out_dim;
+            for j in 0..out_dim {
+                embeddings[emb_offset + j] += learning_rate as f32 * error_signal[err_offset + j];
+                embeddings[emb_offset + j] = embeddings[emb_offset + j].clamp(-1.0, 1.0);
+            }
+        }
+    }
+}