npm - @dniskav/neuron - Versions diffs - 0.2.2 → 0.2.3 - Mend

@dniskav/neuron 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -15,6 +15,7 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
 | `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
 | `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
 | `NetworkTransformer` | Full token-classification Transformer: embeddings → N blocks → per-token logits. |
+| `NetworkTransformerRL` | Transformer for RL agents: continuous input projection → causal attention → Q-values. Remembers the last N steps. |
 | `TransformerBlock` | One Transformer block: multi-head attention + FFN + LayerNorm × 2 with residuals. |
 | `MultiHeadAttention` | N parallel attention heads concatenated and projected to `d_model`. |
 | `AttentionHead` | Single scaled dot-product self-attention head (Q / K / V projections + backprop). |
@@ -269,6 +270,43 @@ const weights = net.getAttentionWeights();
 Each head in each block learns a different type of relationship (row, column,
 3×3 box). The network figures this out by itself through training.
+### NetworkTransformerRL — Transformer for reinforcement learning
+`NetworkTransformerRL` uses causal self-attention over a sliding window of past states to output Q-values. Unlike `NetworkLSTM`, the agent attends to specific past moments rather than compressing them into a single hidden vector.
+```ts
+import { NetworkTransformerRL } from "@dniskav/neuron";
+// Agent sees the last 8 steps, each step is a 7-value sensor vector → 4 actions
+const net = new NetworkTransformerRL(8, 7, {
+  d_model:  32,
+  nHeads:   2,
+  d_ff:     64,
+  nBlocks:  2,
+  nActions: 4,
+});
+// Each step: feed the last N states as a sequence
+const sequence = getLastNStates();      // number[][] — shape: [8, 7]
+const qValues  = net.predict(sequence); // number[4]
+// Q-learning update: train toward Bellman target
+const action  = argmax(qValues);
+const reward  = env.step(action);
+const targets = qValues.slice();
+targets[action] = reward + 0.99 * Math.max(...net.predict(nextSequence));
+const loss = net.train(sequence, targets, 0.001);
+```
+The last step in the sequence gets 2× pooling weight — the most recent state contributes more to the decision.
+```ts
+// Inspect what the agent is attending to
+const attnWeights = net.getAttentionWeights();
+// attnWeights[blockIdx][headIdx] → seqLen × seqLen matrix
+```
 ## Possible improvements
 1. **Support for batches** in training to improve efficiency and gradient stability.

package/dist/index.d.mts CHANGED Viewed

@@ -296,6 +296,34 @@ declare class NetworkTransformerRL {
     predict(sequence: number[][]): number[];
     train(sequence: number[][], target: number[], lr: number): number;
     getAttentionWeights(): (number[][] | null)[][];
+    getWeights(): {
+        inputProj: number[][];
+        blocks: {
+            attn: {
+                heads: {
+                    Wq: number[][];
+                    Wk: number[][];
+                    Wv: number[][];
+                }[];
+                Wo: number[][];
+            };
+            norm1: {
+                gamma: number[];
+                beta: number[];
+            };
+            norm2: {
+                gamma: number[];
+                beta: number[];
+            };
+            ff1: number[][];
+            ff2: number[][];
+            b1: number[];
+            b2: number[];
+        }[];
+        outputProj: number[][];
+        outputBias: number[];
+    };
+    setWeights(data: ReturnType<NetworkTransformerRL['getWeights']>): void;
     private _forward;
     private _pool;
 }

package/dist/index.d.ts CHANGED Viewed

@@ -296,6 +296,34 @@ declare class NetworkTransformerRL {
     predict(sequence: number[][]): number[];
     train(sequence: number[][], target: number[], lr: number): number;
     getAttentionWeights(): (number[][] | null)[][];
+    getWeights(): {
+        inputProj: number[][];
+        blocks: {
+            attn: {
+                heads: {
+                    Wq: number[][];
+                    Wk: number[][];
+                    Wv: number[][];
+                }[];
+                Wo: number[][];
+            };
+            norm1: {
+                gamma: number[];
+                beta: number[];
+            };
+            norm2: {
+                gamma: number[];
+                beta: number[];
+            };
+            ff1: number[][];
+            ff2: number[][];
+            b1: number[];
+            b2: number[];
+        }[];
+        outputProj: number[][];
+        outputBias: number[];
+    };
+    setWeights(data: ReturnType<NetworkTransformerRL['getWeights']>): void;
     private _forward;
     private _pool;
 }

package/dist/index.js CHANGED Viewed

@@ -1190,6 +1190,54 @@ var NetworkTransformerRL = class {
   getAttentionWeights() {
     return this.blocks.map((b) => b.getAttentionWeights());
   }
+  // ── Serialization ──────────────────────────────────────────────────────────
+  getWeights() {
+    return {
+      inputProj: this.inputProj.W.map((r) => [...r]),
+      blocks: this.blocks.map((b) => ({
+        attn: {
+          heads: b.attn.heads.map((h) => ({
+            Wq: h.Wq.W.map((r) => [...r]),
+            Wk: h.Wk.W.map((r) => [...r]),
+            Wv: h.Wv.W.map((r) => [...r])
+          })),
+          Wo: b.attn.Wo.W.map((r) => [...r])
+        },
+        norm1: { gamma: [...b.norm1.gamma], beta: [...b.norm1.beta] },
+        norm2: { gamma: [...b.norm2.gamma], beta: [...b.norm2.beta] },
+        ff1: b.ff1.W.map((r) => [...r]),
+        ff2: b.ff2.W.map((r) => [...r]),
+        b1: [...b.b1],
+        b2: [...b.b2]
+      })),
+      outputProj: this.outputProj.W.map((r) => [...r]),
+      outputBias: [...this.outputBias]
+    };
+  }
+  setWeights(data) {
+    data.inputProj.forEach((row, i) => {
+      this.inputProj.W[i] = [...row];
+    });
+    data.blocks.forEach((bd, b) => {
+      const blk = this.blocks[b];
+      bd.attn.heads.forEach((hd, h) => {
+        blk.attn.heads[h].Wq.W = hd.Wq.map((r) => [...r]);
+        blk.attn.heads[h].Wk.W = hd.Wk.map((r) => [...r]);
+        blk.attn.heads[h].Wv.W = hd.Wv.map((r) => [...r]);
+      });
+      blk.attn.Wo.W = bd.attn.Wo.map((r) => [...r]);
+      blk.norm1.gamma = [...bd.norm1.gamma];
+      blk.norm1.beta = [...bd.norm1.beta];
+      blk.norm2.gamma = [...bd.norm2.gamma];
+      blk.norm2.beta = [...bd.norm2.beta];
+      blk.ff1.W = bd.ff1.map((r) => [...r]);
+      blk.ff2.W = bd.ff2.map((r) => [...r]);
+      blk.b1 = [...bd.b1];
+      blk.b2 = [...bd.b2];
+    });
+    this.outputProj.W = data.outputProj.map((r) => [...r]);
+    this.outputBias = [...data.outputBias];
+  }
   // ── Internal ────────────────────────────────────────────────────────────────
   _forward(sequence) {
     let h = sequence.map(

package/dist/index.mjs CHANGED Viewed

@@ -1130,6 +1130,54 @@ var NetworkTransformerRL = class {
   getAttentionWeights() {
     return this.blocks.map((b) => b.getAttentionWeights());
   }
+  // ── Serialization ──────────────────────────────────────────────────────────
+  getWeights() {
+    return {
+      inputProj: this.inputProj.W.map((r) => [...r]),
+      blocks: this.blocks.map((b) => ({
+        attn: {
+          heads: b.attn.heads.map((h) => ({
+            Wq: h.Wq.W.map((r) => [...r]),
+            Wk: h.Wk.W.map((r) => [...r]),
+            Wv: h.Wv.W.map((r) => [...r])
+          })),
+          Wo: b.attn.Wo.W.map((r) => [...r])
+        },
+        norm1: { gamma: [...b.norm1.gamma], beta: [...b.norm1.beta] },
+        norm2: { gamma: [...b.norm2.gamma], beta: [...b.norm2.beta] },
+        ff1: b.ff1.W.map((r) => [...r]),
+        ff2: b.ff2.W.map((r) => [...r]),
+        b1: [...b.b1],
+        b2: [...b.b2]
+      })),
+      outputProj: this.outputProj.W.map((r) => [...r]),
+      outputBias: [...this.outputBias]
+    };
+  }
+  setWeights(data) {
+    data.inputProj.forEach((row, i) => {
+      this.inputProj.W[i] = [...row];
+    });
+    data.blocks.forEach((bd, b) => {
+      const blk = this.blocks[b];
+      bd.attn.heads.forEach((hd, h) => {
+        blk.attn.heads[h].Wq.W = hd.Wq.map((r) => [...r]);
+        blk.attn.heads[h].Wk.W = hd.Wk.map((r) => [...r]);
+        blk.attn.heads[h].Wv.W = hd.Wv.map((r) => [...r]);
+      });
+      blk.attn.Wo.W = bd.attn.Wo.map((r) => [...r]);
+      blk.norm1.gamma = [...bd.norm1.gamma];
+      blk.norm1.beta = [...bd.norm1.beta];
+      blk.norm2.gamma = [...bd.norm2.gamma];
+      blk.norm2.beta = [...bd.norm2.beta];
+      blk.ff1.W = bd.ff1.map((r) => [...r]);
+      blk.ff2.W = bd.ff2.map((r) => [...r]);
+      blk.b1 = [...bd.b1];
+      blk.b2 = [...bd.b2];
+    });
+    this.outputProj.W = data.outputProj.map((r) => [...r]);
+    this.outputBias = [...data.outputBias];
+  }
   // ── Internal ────────────────────────────────────────────────────────────────
   _forward(sequence) {
     let h = sequence.map(

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dniskav/neuron",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "Minimal neural network from scratch — neuron, layer, network, backpropagation. No dependencies.",
   "main": "dist/index.js",
   "module": "dist/index.mjs",