npm - @oxide-js/spiking - Versions diffs - 1.1.0 - Mend

@oxide-js/spiking 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +12 -0
package/examples/demo.ts +101 -0
package/index.d.ts +19 -0
package/index.js +316 -0
package/package.json +47 -0
package/src/index.ts +5 -0
package/src/layers/SpikingDense.ts +237 -0
package/src/layers/SpikingEmbedding.ts +227 -0
package/src/math/dotProductAddOnly.ts +229 -0
package/src/models/SpikingSentenceEmbedder.ts +135 -0
package/src/native_backend.ts +90 -0
package/src-rust/Cargo.lock +324 -0
package/src-rust/Cargo.toml +17 -0
package/src-rust/build.rs +5 -0
package/src-rust/src/lib.rs +462 -0
package/test/test_embedding.ts +126 -0
package/test/test_xor.ts +122 -0
package/tsconfig.json +9 -0

package/src/layers/SpikingDense.ts ADDED Viewed

@@ -0,0 +1,237 @@
+import { BaseLayer, LayerConfig, ForwardOptions } from "@oxide-js/layers";
+import { Matrix, mj } from "@oxide-js/core";
+import {
+    isNativeAvailable,
+    lifStepNativeWrapper,
+    maskSurrogateNativeWrapper,
+    applyAddOnlyDeltaNativeWrapper
+} from "../native_backend.js";
+import dotProductAddOnly from "../math/dotProductAddOnly.js";
+export interface SpikingDenseConfig extends LayerConfig {
+  units: number;
+  useBias?: boolean;
+  kernelInitializer?: string;
+  biasInitializer?: string;
+  beta?: number;
+  threshold?: number;
+}
+export class SpikingDense extends BaseLayer {
+  public units: number;
+  public useBias: boolean;
+  public kernelInitializer: string;
+  public biasInitializer: string;
+  public beta: number;
+  public threshold: number;
+  public potentials!: Matrix;
+  public lastPotentials?: Matrix;
+  public lastInputs?: Matrix;
+  public lastSpikes?: Matrix;
+  public get kernel(): Matrix | undefined {
+    return this.getParameter("kernel");
+  }
+  public get bias(): Matrix | undefined {
+    return this.getParameter("bias");
+  }
+  constructor(config: SpikingDenseConfig) {
+    super(config);
+    this.units = config.units;
+    this.useBias = config.useBias ?? true;
+    this.kernelInitializer = config.kernelInitializer || "glorot_normal";
+    this.biasInitializer = config.biasInitializer || "zeros";
+    this.beta = config.beta ?? 0.9;
+    this.threshold = config.threshold ?? 1.0;
+  }
+  public computeOutputShape(inputShape: number[]): number[] {
+    const batch = inputShape[0] ?? -1;
+    return [batch, this.units];
+  }
+  public build(inputShape: number[]): void {
+    super.build(inputShape);
+    const inFeatures = inputShape[inputShape.length - 1];
+    const kernelVal = this.createInitializer(this.kernelInitializer, [inFeatures, this.units]);
+    this.addParameter("kernel", kernelVal, true, [inFeatures, this.units]);
+    if (this.useBias) {
+      const biasVal = this.createInitializer(this.biasInitializer, [this.units, 1]);
+      this.addParameter("bias", biasVal, true, [this.units, 1]);
+    }
+    // Inisialisasi state
+    this.potentials = Matrix.fromFlat(new Float32Array(this.units), [1, this.units]);
+  }
+  private ensurePotentialsShape(batch: number) {
+    if (this.potentials._shape[0] !== batch) {
+       this.potentials = Matrix.fromFlat(new Float32Array(batch * this.units), [batch, this.units]);
+    }
+  }
+  public resetState() {
+     if (this.potentials) this.potentials._data.fill(0);
+     this.lastPotentials = undefined;
+     this.lastInputs = undefined;
+     this.lastSpikes = undefined;
+  }
+  protected compute(inputs: Matrix, options?: ForwardOptions): Matrix {
+    const kernel = this.kernel!;
+    const batch = inputs._shape[0];
+    this.ensurePotentialsShape(batch);
+    // 1. Spiking-optimized matrix multiplication (Add-Only)
+    let dot = dotProductAddOnly(inputs, kernel);
+    // 2. Add bias
+    if (this.useBias && this.bias) {
+      mj.addBiasRow(dot, this.bias);
+    }
+    // 3 & 4. Leaky Integrate, Fire & Reset
+    const outData = new Float32Array(batch * this.units);
+    const outSpikes = Matrix.fromFlat(outData, [batch, this.units]);
+    this.lastPotentials = Matrix.fromFlat(new Float32Array(batch * this.units), [batch, this.units]);
+    if (isNativeAvailable()) {
+        lifStepNativeWrapper(
+            this.potentials._data,
+            dot._data,
+            outSpikes._data,
+            this.lastPotentials._data,
+            this.beta,
+            this.threshold
+        );
+    } else {
+        const potData = this.potentials._data;
+        const dotData = dot._data;
+        const thresh = this.threshold;
+        const lpData = this.lastPotentials._data;
+        for (let i = 0; i < potData.length; i++) {
+            potData[i] = (potData[i] * this.beta) + dotData[i];
+            lpData[i] = potData[i];
+        }
+        for (let i = 0; i < potData.length; i++) {
+          if (potData[i] >= thresh) {
+            outData[i] = 1;
+            potData[i] -= thresh;
+          } else {
+            outData[i] = 0;
+          }
+        }
+    }
+    // Simpan memori untuk belajar
+    this.lastInputs = inputs;
+    this.lastSpikes = outSpikes;
+    return outSpikes;
+  }
+  public learnOutput(errorSignal: Matrix, learningRate: number = 0.01): Matrix {
+      this.applyAddOnlyDelta(errorSignal, learningRate);
+      return errorSignal;
+  }
+  public learnHidden(errorFromNext: Matrix, B: Matrix, learningRate: number = 0.01): Matrix {
+      // Broadcast error mundur
+      let eHidden = mj.dotProduct(errorFromNext, B, undefined, false, false); // E * B
+      // Surrogate Mask: Boxcar (Murni Add-Only mask, tanpa perkalian float!)
+      if (this.lastPotentials) {
+          if (isNativeAvailable()) {
+              maskSurrogateNativeWrapper(
+                  eHidden._data,
+                  this.lastPotentials._data,
+                  this.threshold,
+                  1.0
+              );
+          } else {
+              const eData = eHidden._data;
+              const pData = this.lastPotentials._data;
+              const thresh = this.threshold;
+              const windowSize = 1.0;
+              for (let i = 0; i < eData.length; i++) {
+                  if (Math.abs(pData[i] - thresh) > windowSize) {
+                      eData[i] = 0;
+                  }
+              }
+          }
+      }
+      this.applyAddOnlyDelta(eHidden, learningRate);
+      return eHidden;
+  }
+  private applyAddOnlyDelta(errorSignal: Matrix, learningRate: number) {
+      if (!this.lastInputs || !this.lastSpikes) {
+          throw new Error("[SpikingDense] Cannot run learning before forward() is executed. 'lastInputs' or 'lastSpikes' is undefined.");
+      }
+      const kernel = this.kernel!._data;
+      const inputs = this.lastInputs._data;
+      const err = errorSignal._data;
+      const batch = this.lastInputs._shape[0];
+      const inFeatures = this.lastInputs._shape[1];
+      const units = this.units;
+      if (isNativeAvailable()) {
+          const dummyBias = this.useBias && this.bias ? this.bias._data : new Float32Array(0);
+          applyAddOnlyDeltaNativeWrapper(
+              kernel,
+              dummyBias,
+              inputs,
+              err,
+              learningRate,
+              batch,
+              inFeatures,
+              units,
+              this.useBias
+          );
+      } else {
+          // Delta rule add-only
+          for (let b = 0; b < batch; b++) {
+              const inOffset = b * inFeatures;
+              const errOffset = b * units;
+              for (let k = 0; k < inFeatures; k++) {
+                  // HANYA update jika input menyala (Spike = 1) -> Add Only Update!
+                  if (inputs[inOffset + k] === 1) {
+                      const kOffset = k * units;
+                      for (let j = 0; j < units; j++) {
+                          kernel[kOffset + j] += learningRate * err[errOffset + j];
+                      }
+                  }
+              }
+              if (this.useBias && this.bias) {
+                  const biasData = this.bias._data;
+                  for (let j = 0; j < units; j++) {
+                      biasData[j] += learningRate * err[errOffset + j];
+                  }
+              }
+          }
+      }
+  }
+  public getConfig(): Record<string, any> {
+    return {
+      ...super.getConfig(),
+      units: this.units,
+      useBias: this.useBias,
+      kernelInitializer: this.kernelInitializer,
+      biasInitializer: this.biasInitializer,
+      beta: this.beta,
+      threshold: this.threshold
+    };
+  }
+}

package/src/layers/SpikingEmbedding.ts ADDED Viewed

@@ -0,0 +1,227 @@
+import { BaseLayer, LayerConfig, ForwardOptions } from "@oxide-js/layers";
+import { Matrix, mj } from "@oxide-js/core";
+import {
+    isNativeAvailable,
+    lifStepNativeWrapper,
+    maskSurrogateNativeWrapper
+} from "../native_backend.js";
+export interface SpikingEmbeddingConfig extends LayerConfig {
+  inputDim: number; // Ukuran vocabulary
+  outputDim: number; // Dimensi embedding (jumlah neuron)
+  beta?: number; // Decay factor LIF
+  threshold?: number; // Ambang batas Spike
+  embeddingsInitializer?: string; // Tipe inisialisasi bobot
+}
+export class SpikingEmbedding extends BaseLayer {
+  public inputDim: number;
+  public outputDim: number;
+  public beta: number;
+  public threshold: number;
+  public potentials!: Matrix;
+  public lastPotentials?: Matrix;
+  public lastInputs?: Matrix;
+  public lastSpikes?: Matrix;
+  public embeddingsInitializer: string;
+  public get kernel(): Matrix | undefined {
+    return this.getParameter("kernel");
+  }
+  constructor(config: SpikingEmbeddingConfig) {
+    super(config);
+    this.inputDim = config.inputDim;
+    this.outputDim = config.outputDim;
+    this.beta = config.beta ?? 0.9;
+    this.threshold = config.threshold ?? 1.0;
+    this.embeddingsInitializer = config.embeddingsInitializer || "glorot_normal";
+  }
+  public computeOutputShape(inputShape: number[]): number[] {
+    const batch = inputShape[0] ?? -1;
+    return [batch, this.outputDim];
+  }
+  public build(inputShape: number[]): void {
+    super.build(inputShape);
+    const kernelVal = this.createInitializer(this.embeddingsInitializer, [this.inputDim, this.outputDim]);
+    this.addParameter("kernel", kernelVal, true, [this.inputDim, this.outputDim]);
+  }
+  public resetState() {
+     if (this.potentials) this.potentials._data.fill(0);
+     this.lastPotentials = undefined;
+     this.lastInputs = undefined;
+     this.lastSpikes = undefined;
+  }
+  private ensurePotentialsShape(batch: number) {
+    if (!this.potentials || this.potentials._shape[0] !== batch) {
+      this.potentials = Matrix.fromFlat(
+        new Float32Array(batch * this.outputDim),
+        [batch, this.outputDim]
+      );
+    }
+  }
+  protected compute(inputs: Matrix, options?: ForwardOptions): Matrix {
+    const kernel = this.kernel!._data;
+    const batch = inputs._shape[0];
+    const inputData = inputs._data;
+    this.ensurePotentialsShape(batch);
+    // 1. Lookup Row (Pengganti dot-product)
+    const dotData = new Float32Array(batch * this.outputDim);
+    for (let b = 0; b < batch; b++) {
+      const tokenId = Math.round(inputData[b]); // Asumsi input adalah ID token berukuran [batch, 1]
+      // Jika token valid, ekstrak barisnya sebagai Arus (Current)
+      if (tokenId >= 0 && tokenId < this.inputDim) {
+         const kernelOffset = tokenId * this.outputDim;
+         const dotOffset = b * this.outputDim;
+         for (let j = 0; j < this.outputDim; j++) {
+            dotData[dotOffset + j] = kernel[kernelOffset + j];
+         }
+      }
+    }
+    // 2 & 3. Leaky Integrate, Fire & Reset
+    const outData = new Float32Array(batch * this.outputDim);
+    const outSpikes = Matrix.fromFlat(outData, [batch, this.outputDim]);
+    this.lastPotentials = Matrix.fromFlat(new Float32Array(batch * this.outputDim), [batch, this.outputDim]);
+    if (isNativeAvailable()) {
+        lifStepNativeWrapper(
+            this.potentials._data,
+            dotData,
+            outSpikes._data,
+            this.lastPotentials._data,
+            this.beta,
+            this.threshold
+        );
+    } else {
+        const potData = this.potentials._data;
+        const thresh = this.threshold;
+        const lpData = this.lastPotentials._data;
+        for (let i = 0; i < potData.length; i++) {
+            potData[i] = (potData[i] * this.beta) + dotData[i];
+            lpData[i] = potData[i];
+        }
+        for (let i = 0; i < potData.length; i++) {
+          if (potData[i] >= thresh) {
+            outData[i] = 1;
+            potData[i] -= thresh;
+          } else {
+            outData[i] = 0;
+          }
+        }
+    }
+    // Simpan memori untuk update bobot
+    this.lastInputs = inputs;
+    this.lastSpikes = outSpikes;
+    return outSpikes;
+  }
+  // Embedding hanya menerima instruksi belajar dari layer atasnya (eHidden yang sudah dikalikan matriks B)
+  public learnEmbedding(errorFromNext: Matrix, B: Matrix, learningRate: number = 0.01): Matrix {
+      if (!this.lastInputs) {
+          throw new Error("[SpikingEmbedding] Cannot run learnEmbedding() before forward() is executed. 'lastInputs' is undefined.");
+      }
+      const kernel = this.kernel!._data;
+      const inputData = this.lastInputs._data;
+      const batch = this.lastInputs._shape[0];
+      // Hitung error yang mampir ke embedding
+      // E * B (Feedback Alignment)
+      // Gunakan matmul biasa karena B adalah float, dan errorFromNext mungkin float
+      const eHidden = Matrix.fromFlat(new Float32Array(batch * this.outputDim), [batch, this.outputDim]);
+      // Namun karena OxideJS Matrix belum memiliki fungsi dot produk standar terbuka yang stabil,
+      // kita harus hati-hati di sini. Untuk simplifikasi, eHidden = errorFromNext * B.
+      // Kita asumsikan ada utilitas dotProduct standar dari core.
+      // Jika B adalah matriks Dense (dimensi: outUnits x hiddenUnits), maka
+      // eHidden [batch, hiddenUnits] = errorFromNext [batch, outUnits] dot B [outUnits, hiddenUnits]
+      // Kita panggil dot product standar (bukan Add-Only, karena error dan B sama-sama float)
+      let eHiddenMatrix = mj.dotProduct(errorFromNext, B, undefined, false, false);
+      // Surrogate Mask: Boxcar
+      if (this.lastPotentials) {
+          if (isNativeAvailable()) {
+              maskSurrogateNativeWrapper(
+                  eHiddenMatrix._data,
+                  this.lastPotentials._data,
+                  this.threshold,
+                  1.0
+              );
+          } else {
+              const eData = eHiddenMatrix._data;
+              const pData = this.lastPotentials._data;
+              const thresh = this.threshold;
+              const windowSize = 1.0;
+              for (let i = 0; i < eData.length; i++) {
+                  if (Math.abs(pData[i] - thresh) > windowSize) {
+                      eData[i] = 0;
+                  }
+              }
+          }
+      }
+      // Delta Rule Update pada baris Lookup (sangat efisien)
+      const err = eHiddenMatrix._data;
+      for (let b = 0; b < batch; b++) {
+          const tokenId = Math.round(inputData[b]);
+          if (tokenId >= 0 && tokenId < this.inputDim) {
+              const kOffset = tokenId * this.outputDim;
+              const errOffset = b * this.outputDim;
+              for (let j = 0; j < this.outputDim; j++) {
+                  kernel[kOffset + j] += learningRate * err[errOffset + j];
+              }
+          }
+      }
+      return eHiddenMatrix;
+  }
+  /**
+   * Word2Vec CBOW-style Hebbian Contrastive Learning
+   * Memungkinkan pembelajaran embedding semantik secara topologis tanpa representation collapse.
+   */
+  public learnHebbian(
+    tokens: number[] | Float32Array,
+    positiveContext: Float32Array,
+    negativeContexts: Float32Array[],
+    learningRate: number = 0.01,
+    marginPositive: number = 0.1,
+    marginNegative: number = 0.05
+  ): void {
+      const kernel = this.kernel!._data;
+      const dim = this.outputDim;
+      for (let n = 0; n < negativeContexts.length; n++) {
+          const negMean = negativeContexts[n];
+          for (let i = 0; i < tokens.length; i++) {
+              const tokenId = Math.round(tokens[i]);
+              if (tokenId >= 0 && tokenId < this.inputDim) {
+                  const offset = tokenId * dim;
+                  for (let j = 0; j < dim; j++) {
+                      // Tarik kata ke arah konteks kalimatnya (Positive) - hanya sekali per token
+                      const posGradient = (n === 0) ? (positiveContext[j] - kernel[offset + j]) : 0;
+                      // Tolak kata dari konteks kalimat acak (Negative)
+                      const negGradient = kernel[offset + j] - negMean[j];
+                      const update = (posGradient * marginPositive) - (negGradient * marginNegative);
+                      kernel[offset + j] += learningRate * update;
+                  }
+              }
+          }
+      }
+  }
+}

package/src/math/dotProductAddOnly.ts ADDED Viewed

@@ -0,0 +1,229 @@
+import { Matrix } from "@oxide-js/core";
+import { engine } from "@oxide-js/core";
+import { isNativeAvailable, dotProductAddOnlyNativeWrapper } from "../native_backend.js";
+/**
+ * Perkalian product matrix a dan b KHUSUS UNTUK SNN (Add-Only)
+ * Salah satu matriks HARUS berupa matriks biner (hanya berisi 0 dan 1).
+ * Jika 0 maka di-skip, jika 1 maka cukup tambahkan nilainya tanpa dikalikan.
+ *
+ * @param a Matrix
+ * @param b Matrix
+ * @param out Optional Matrix to store result
+ * @param transA Jika true, anggap a adalah a^T
+ * @param transB Jika true, anggap b adalah b^T
+ * @returns Matrix
+ */
+export default function dotProductAddOnly(
+  a: Matrix,
+  b: Matrix,
+  out?: Matrix,
+  transA: boolean = false,
+  transB: boolean = false
+): Matrix {
+  const aRowsOrig = a._shape[0], aColsOrig = a._shape[1];
+  const bRowsOrig = b._shape[0], bColsOrig = b._shape[1];
+  const aRows = transA ? aColsOrig : aRowsOrig;
+  const aCols = transA ? aRowsOrig : aColsOrig;
+  const bRows = transB ? bColsOrig : bRowsOrig;
+  const bCols = transB ? bRowsOrig : bColsOrig;
+  if (aCols !== bRows) {
+    throw new Error(`Dimensi matrix tidak cocok untuk dot product: [${aRows}x${aCols}] * [${bRows}x${bCols}]`);
+  }
+  if (out) {
+    if (out._shape[0] !== aRows || out._shape[1] !== bCols) {
+      throw new Error(`Output matrix shape mismatch: expected [${aRows}x${bCols}], got [${out._shape[0]}x${out._shape[1]}]`);
+    }
+  }
+  // Verifikasi kondisi biner: salah satu matrix harus berupa 0 dan 1
+  let aIsBinary = true;
+  for (let i = 0; i < a._data.length; i++) {
+    const val = a._data[i];
+    if (val !== 0 && val !== 1) {
+      aIsBinary = false;
+      break;
+    }
+  }
+  let bIsBinary = true;
+  if (!aIsBinary) {
+    for (let i = 0; i < b._data.length; i++) {
+      const val = b._data[i];
+      if (val !== 0 && val !== 1) {
+        bIsBinary = false;
+        break;
+      }
+    }
+  }
+  if (!aIsBinary && !bIsBinary) {
+    throw new Error("SNN Error: Kedua matriks adalah floating-point. Setidaknya salah satu matriks harus hanya berisi 0 dan 1.");
+  }
+  const resultData = out ? out._data : new Float32Array(aRows * bCols);
+  const aData = a._data;
+  const bData = b._data;
+  if (isNativeAvailable()) {
+    dotProductAddOnlyNativeWrapper(
+      aData,
+      aRowsOrig,
+      aColsOrig,
+      bData,
+      bRowsOrig,
+      bColsOrig,
+      transA,
+      transB,
+      resultData
+    );
+  } else {
+    // Standar A * B (atau A^T * B)
+    if (!transB) {
+    if (out) resultData.fill(0);
+    for (let i = 0; i < aRows; i++) {
+      const rOffset = i * bCols;
+      for (let k = 0; k < aCols; k++) {
+        const aik = transA ? aData[k * aRows + i] : aData[i * aCols + k];
+        // Skip awal jika kita tahu aik = 0 (berlaku untuk kedua kasus binary)
+        if (aik === 0) continue;
+        const kOffset = k * bCols;
+        let j = 0;
+        const jBound = bCols - 8;
+        if (aIsBinary) {
+          // aik pasti 1 di sini
+          for (; j <= jBound; j += 8) {
+            resultData[rOffset + j] += bData[kOffset + j];
+            resultData[rOffset + j + 1] += bData[kOffset + j + 1];
+            resultData[rOffset + j + 2] += bData[kOffset + j + 2];
+            resultData[rOffset + j + 3] += bData[kOffset + j + 3];
+            resultData[rOffset + j + 4] += bData[kOffset + j + 4];
+            resultData[rOffset + j + 5] += bData[kOffset + j + 5];
+            resultData[rOffset + j + 6] += bData[kOffset + j + 6];
+            resultData[rOffset + j + 7] += bData[kOffset + j + 7];
+          }
+          for (; j < bCols; j++) {
+            resultData[rOffset + j] += bData[kOffset + j];
+          }
+        } else {
+          // bIsBinary = true, aik adalah float biasa
+          for (; j <= jBound; j += 8) {
+            if (bData[kOffset + j] === 1) resultData[rOffset + j] += aik;
+            if (bData[kOffset + j + 1] === 1) resultData[rOffset + j + 1] += aik;
+            if (bData[kOffset + j + 2] === 1) resultData[rOffset + j + 2] += aik;
+            if (bData[kOffset + j + 3] === 1) resultData[rOffset + j + 3] += aik;
+            if (bData[kOffset + j + 4] === 1) resultData[rOffset + j + 4] += aik;
+            if (bData[kOffset + j + 5] === 1) resultData[rOffset + j + 5] += aik;
+            if (bData[kOffset + j + 6] === 1) resultData[rOffset + j + 6] += aik;
+            if (bData[kOffset + j + 7] === 1) resultData[rOffset + j + 7] += aik;
+          }
+          for (; j < bCols; j++) {
+            if (bData[kOffset + j] === 1) resultData[rOffset + j] += aik;
+          }
+        }
+      }
+    }
+  }
+  // A * B^T (atau A^T * B^T)
+  else {
+    for (let i = 0; i < aRows; i++) {
+      const rOffset = i * bCols;
+      for (let j = 0; j < bCols; j++) {
+        let sum = 0;
+        let k = 0;
+        const kBound = aCols - 8;
+        if (aIsBinary) {
+          for (; k <= kBound; k += 8) {
+            const aik0 = transA ? aData[k * aRows + i] : aData[i * aCols + k];
+            if (aik0 === 1) sum += bData[j * aCols + k];
+            const aik1 = transA ? aData[(k + 1) * aRows + i] : aData[i * aCols + (k + 1)];
+            if (aik1 === 1) sum += bData[j * aCols + (k + 1)];
+            const aik2 = transA ? aData[(k + 2) * aRows + i] : aData[i * aCols + (k + 2)];
+            if (aik2 === 1) sum += bData[j * aCols + (k + 2)];
+            const aik3 = transA ? aData[(k + 3) * aRows + i] : aData[i * aCols + (k + 3)];
+            if (aik3 === 1) sum += bData[j * aCols + (k + 3)];
+            const aik4 = transA ? aData[(k + 4) * aRows + i] : aData[i * aCols + (k + 4)];
+            if (aik4 === 1) sum += bData[j * aCols + (k + 4)];
+            const aik5 = transA ? aData[(k + 5) * aRows + i] : aData[i * aCols + (k + 5)];
+            if (aik5 === 1) sum += bData[j * aCols + (k + 5)];
+            const aik6 = transA ? aData[(k + 6) * aRows + i] : aData[i * aCols + (k + 6)];
+            if (aik6 === 1) sum += bData[j * aCols + (k + 6)];
+            const aik7 = transA ? aData[(k + 7) * aRows + i] : aData[i * aCols + (k + 7)];
+            if (aik7 === 1) sum += bData[j * aCols + (k + 7)];
+          }
+          for (; k < aCols; k++) {
+            const aik = transA ? aData[k * aRows + i] : aData[i * aCols + k];
+            if (aik === 1) sum += bData[j * aCols + k];
+          }
+        } else {
+          // bIsBinary = true
+          for (; k <= kBound; k += 8) {
+            if (bData[j * aCols + k] === 1) {
+              sum += transA ? aData[k * aRows + i] : aData[i * aCols + k];
+            }
+            if (bData[j * aCols + (k + 1)] === 1) {
+              sum += transA ? aData[(k + 1) * aRows + i] : aData[i * aCols + (k + 1)];
+            }
+            if (bData[j * aCols + (k + 2)] === 1) {
+              sum += transA ? aData[(k + 2) * aRows + i] : aData[i * aCols + (k + 2)];
+            }
+            if (bData[j * aCols + (k + 3)] === 1) {
+              sum += transA ? aData[(k + 3) * aRows + i] : aData[i * aCols + (k + 3)];
+            }
+            if (bData[j * aCols + (k + 4)] === 1) {
+              sum += transA ? aData[(k + 4) * aRows + i] : aData[i * aCols + (k + 4)];
+            }
+            if (bData[j * aCols + (k + 5)] === 1) {
+              sum += transA ? aData[(k + 5) * aRows + i] : aData[i * aCols + (k + 5)];
+            }
+            if (bData[j * aCols + (k + 6)] === 1) {
+              sum += transA ? aData[(k + 6) * aRows + i] : aData[i * aCols + (k + 6)];
+            }
+            if (bData[j * aCols + (k + 7)] === 1) {
+              sum += transA ? aData[(k + 7) * aRows + i] : aData[i * aCols + (k + 7)];
+            }
+          }
+          for (; k < aCols; k++) {
+            if (bData[j * aCols + k] === 1) {
+              sum += transA ? aData[k * aRows + i] : aData[i * aCols + k];
+            }
+          }
+        }
+        resultData[rOffset + j] = sum;
+      }
+    }
+    }
+  }
+  const res = out ? out : Matrix.fromFlat(resultData, [aRows, bCols]);
+  // RECORD FOR AUTO-DIFF
+  // Asumsikan engine tersedia dari core
+  if (engine && engine.tape) {
+    engine.record([a, b], [res], (grad: Matrix) => {
+      const gA = !transA
+        ? dotProductAddOnly(grad, b, undefined, false, !transB)
+        : dotProductAddOnly(b, grad, undefined, transB, true);
+      const gB = !transB
+        ? dotProductAddOnly(a, grad, undefined, !transA, false)
+        : dotProductAddOnly(grad, a, undefined, true, transA);
+      return [gA, gB];
+    }, { saveInput: false, saveOutput: false, requireInputStability: true });
+  }
+  return res;
+}