npm - @dniskav/neuron - Versions diffs - 0.1.0 → 0.1.2 - Mend

@dniskav/neuron 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -1,5 +1,8 @@
 # @dniskav/neuron
+[![npm](https://img.shields.io/npm/v/@dniskav/neuron)](https://www.npmjs.com/package/@dniskav/neuron)
+[![license](https://img.shields.io/npm/l/@dniskav/neuron)](LICENSE)
 A minimal, dependency-free neural network library built from scratch in TypeScript. Designed for learning and experimentation — every line of math is readable.
 ## What's inside
@@ -11,6 +14,8 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
 | `Layer` | A group of `NeuronN` neurons that share the same inputs. |
 | `Network` | Two-layer network (hidden + output) with backpropagation. |
 | `NetworkN` | Deep network of arbitrary depth. Define your architecture as `[inputs, ...hidden, outputs]`. |
+| `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
+| `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
 ## Install
@@ -95,6 +100,47 @@ const [out1, out2] = net.predict([0.5, 0.3, 0.8]);
 net.trainWithDeltas(inputs, [0.4, -0.2], 0.05);
 ```
+### NetworkLSTM — recurrent network with memory
+`NetworkLSTM` adds within-episode memory: the network can remember what happened in previous steps of the same sequence.
+```ts
+import { NetworkLSTM } from "@dniskav/neuron";
+// 1 input → LSTM(8 hidden) → Dense(4) → 1 output
+const net = new NetworkLSTM(1, 8, [4, 1]);
+// Task: predict 1 if we're past step 3 in the episode, else 0
+// A feedforward net can't do this — it has no memory of step count.
+for (let epoch = 0; epoch < 300; epoch++) {
+  net.resetState();             // clear memory at episode start
+  const targets: number[][] = [];
+  for (let step = 0; step < 6; step++) {
+    net.predict([1]);           // same input every step
+    targets.push([step >= 3 ? 1 : 0]);
+  }
+  net.train(targets, 0.05);    // BPTT across the full episode
+}
+// Run a fresh episode and check predictions
+net.resetState();
+for (let step = 0; step < 6; step++) {
+  const [out] = net.predict([1]);
+  console.log(`step ${step}: ${out.toFixed(2)}  (expected: ${step >= 3 ? 1 : 0})`);
+}
+// step 0: 0.07  (expected: 0)
+// step 1: 0.11  (expected: 0)
+// step 2: 0.18  (expected: 0)
+// step 3: 0.81  (expected: 1)
+// step 4: 0.89  (expected: 1)
+// step 5: 0.93  (expected: 1)
+```
+The network learns to count steps using its hidden state — no external counter needed.
 ## How it works
 Every class uses **sigmoid** as its activation function and **gradient descent** to update weights:

package/dist/index.d.mts CHANGED Viewed

@@ -37,4 +37,82 @@ declare class NetworkN {
     trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
 }
-export { Layer, Network, NetworkN, Neuron, NeuronN };
+declare class Gate {
+    W: number[][];
+    b: number[];
+    constructor(inputSize: number, hSize: number, initBias?: number);
+    linear(combined: number[]): number[];
+}
+declare class LSTMLayer {
+    readonly inputSize: number;
+    readonly hSize: number;
+    h: number[];
+    c: number[];
+    forgetGate: Gate;
+    inputGate: Gate;
+    cellGate: Gate;
+    outputGate: Gate;
+    private _traj;
+    constructor(inputSize: number, hiddenSize: number);
+    reset(): void;
+    predict(inputs: number[]): number[];
+    backprop(dh_seq: number[][], lr: number): void;
+    getWeights(): {
+        forgetGate: {
+            W: number[][];
+            b: number[];
+        };
+        inputGate: {
+            W: number[][];
+            b: number[];
+        };
+        cellGate: {
+            W: number[][];
+            b: number[];
+        };
+        outputGate: {
+            W: number[][];
+            b: number[];
+        };
+    };
+    setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
+}
+declare class NetworkLSTM {
+    readonly inputSize: number;
+    readonly hiddenSize: number;
+    lstm: LSTMLayer;
+    denseLayers: Layer[];
+    private _acts;
+    constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
+    resetState(): void;
+    predict(inputs: number[]): number[];
+    train(targets: number[][], lr: number): void;
+    getWeights(): {
+        lstm: {
+            forgetGate: {
+                W: number[][];
+                b: number[];
+            };
+            inputGate: {
+                W: number[][];
+                b: number[];
+            };
+            cellGate: {
+                W: number[][];
+                b: number[];
+            };
+            outputGate: {
+                W: number[][];
+                b: number[];
+            };
+        };
+        dense: {
+            weights: number[];
+            bias: number;
+        }[][];
+    };
+    setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
+}
+export { LSTMLayer, Layer, Network, NetworkLSTM, NetworkN, Neuron, NeuronN };

package/dist/index.d.ts CHANGED Viewed

@@ -37,4 +37,82 @@ declare class NetworkN {
     trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
 }
-export { Layer, Network, NetworkN, Neuron, NeuronN };
+declare class Gate {
+    W: number[][];
+    b: number[];
+    constructor(inputSize: number, hSize: number, initBias?: number);
+    linear(combined: number[]): number[];
+}
+declare class LSTMLayer {
+    readonly inputSize: number;
+    readonly hSize: number;
+    h: number[];
+    c: number[];
+    forgetGate: Gate;
+    inputGate: Gate;
+    cellGate: Gate;
+    outputGate: Gate;
+    private _traj;
+    constructor(inputSize: number, hiddenSize: number);
+    reset(): void;
+    predict(inputs: number[]): number[];
+    backprop(dh_seq: number[][], lr: number): void;
+    getWeights(): {
+        forgetGate: {
+            W: number[][];
+            b: number[];
+        };
+        inputGate: {
+            W: number[][];
+            b: number[];
+        };
+        cellGate: {
+            W: number[][];
+            b: number[];
+        };
+        outputGate: {
+            W: number[][];
+            b: number[];
+        };
+    };
+    setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
+}
+declare class NetworkLSTM {
+    readonly inputSize: number;
+    readonly hiddenSize: number;
+    lstm: LSTMLayer;
+    denseLayers: Layer[];
+    private _acts;
+    constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
+    resetState(): void;
+    predict(inputs: number[]): number[];
+    train(targets: number[][], lr: number): void;
+    getWeights(): {
+        lstm: {
+            forgetGate: {
+                W: number[][];
+                b: number[];
+            };
+            inputGate: {
+                W: number[][];
+                b: number[];
+            };
+            cellGate: {
+                W: number[][];
+                b: number[];
+            };
+            outputGate: {
+                W: number[][];
+                b: number[];
+            };
+        };
+        dense: {
+            weights: number[];
+            bias: number;
+        }[][];
+    };
+    setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
+}
+export { LSTMLayer, Layer, Network, NetworkLSTM, NetworkN, Neuron, NeuronN };

package/dist/index.js CHANGED Viewed

@@ -20,8 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
+  LSTMLayer: () => LSTMLayer,
   Layer: () => Layer,
   Network: () => Network,
+  NetworkLSTM: () => NetworkLSTM,
   NetworkN: () => NetworkN,
   Neuron: () => Neuron,
   NeuronN: () => NeuronN
@@ -167,10 +169,257 @@ var NetworkN = class {
     }
   }
 };
+// src/LSTMLayer.ts
+function sigmoid3(x) {
+  return 1 / (1 + Math.exp(-x));
+}
+function tanh(x) {
+  const e = Math.exp(2 * x);
+  return (e - 1) / (e + 1);
+}
+var Gate = class {
+  // shape: [hSize]
+  constructor(inputSize, hSize, initBias = 0) {
+    const n = inputSize + hSize;
+    const limit = Math.sqrt(2 / n);
+    this.W = Array.from(
+      { length: hSize },
+      () => Array.from({ length: n }, () => (Math.random() * 2 - 1) * limit)
+    );
+    this.b = new Array(hSize).fill(initBias);
+  }
+  linear(combined) {
+    return this.W.map(
+      (row, i) => row.reduce((s, w, j) => s + w * combined[j], this.b[i])
+    );
+  }
+};
+var LSTMLayer = class {
+  constructor(inputSize, hiddenSize) {
+    this._traj = [];
+    this.inputSize = inputSize;
+    this.hSize = hiddenSize;
+    this.h = new Array(hiddenSize).fill(0);
+    this.c = new Array(hiddenSize).fill(0);
+    this.forgetGate = new Gate(inputSize, hiddenSize, 1);
+    this.inputGate = new Gate(inputSize, hiddenSize);
+    this.cellGate = new Gate(inputSize, hiddenSize);
+    this.outputGate = new Gate(inputSize, hiddenSize);
+  }
+  // ── Reset state and trajectory (call at episode start) ────────────────────
+  reset() {
+    this.h = new Array(this.hSize).fill(0);
+    this.c = new Array(this.hSize).fill(0);
+    this._traj = [];
+  }
+  // ── Forward pass ──────────────────────────────────────────────────────────
+  predict(inputs) {
+    const combined = [...inputs, ...this.h];
+    const c_prev = [...this.c];
+    const zf = this.forgetGate.linear(combined);
+    const zi = this.inputGate.linear(combined);
+    const zg = this.cellGate.linear(combined);
+    const zo = this.outputGate.linear(combined);
+    const zf_a = zf.map(sigmoid3);
+    const zi_a = zi.map(sigmoid3);
+    const zg_a = zg.map(tanh);
+    const zo_a = zo.map(sigmoid3);
+    const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
+    const h = zo_a.map((o, k) => o * tanh(c[k]));
+    this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
+    this.h = h;
+    this.c = c;
+    return h;
+  }
+  // ── BPTT (Backpropagation Through Time) ────────────────────────────────────
+  // dh_seq: dL/dh for each timestep, same length as trajectory.
+  // Accumulates gradients across the full sequence, then applies them in one
+  // update (batch gradient) scaled by lr / T.
+  backprop(dh_seq, lr) {
+    const T = this._traj.length;
+    if (T === 0 || dh_seq.length !== T) return;
+    const hSize = this.hSize;
+    const combSize = this.inputSize + hSize;
+    const dWf = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dWi = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dWg = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dWo = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dbf = new Array(hSize).fill(0);
+    const dbi = new Array(hSize).fill(0);
+    const dbg = new Array(hSize).fill(0);
+    const dbo = new Array(hSize).fill(0);
+    let dh_next = new Array(hSize).fill(0);
+    let dc_next = new Array(hSize).fill(0);
+    for (let t = T - 1; t >= 0; t--) {
+      const s = this._traj[t];
+      const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
+      const tanh_c = s.c.map(tanh);
+      const do_a = dh.map((d, k) => d * tanh_c[k]);
+      const dc = dh.map(
+        (d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
+      );
+      const df_a = dc.map((d, k) => d * s.c_prev[k]);
+      const di_a = dc.map((d, k) => d * s.zg_a[k]);
+      const dg_a = dc.map((d, k) => d * s.zi_a[k]);
+      const dzo = do_a.map((d, k) => d * s.zo_a[k] * (1 - s.zo_a[k]));
+      const dzf = df_a.map((d, k) => d * s.zf_a[k] * (1 - s.zf_a[k]));
+      const dzi = di_a.map((d, k) => d * s.zi_a[k] * (1 - s.zi_a[k]));
+      const dzg = dg_a.map((d, k) => d * (1 - s.zg_a[k] ** 2));
+      for (let k = 0; k < hSize; k++) {
+        for (let j = 0; j < combSize; j++) {
+          dWf[k][j] += dzf[k] * s.combined[j];
+          dWi[k][j] += dzi[k] * s.combined[j];
+          dWg[k][j] += dzg[k] * s.combined[j];
+          dWo[k][j] += dzo[k] * s.combined[j];
+        }
+        dbf[k] += dzf[k];
+        dbi[k] += dzi[k];
+        dbg[k] += dzg[k];
+        dbo[k] += dzo[k];
+      }
+      dh_next = new Array(hSize).fill(0);
+      for (let k = 0; k < hSize; k++) {
+        for (let j = this.inputSize; j < combSize; j++) {
+          dh_next[j - this.inputSize] += dzf[k] * this.forgetGate.W[k][j] + dzi[k] * this.inputGate.W[k][j] + dzg[k] * this.cellGate.W[k][j] + dzo[k] * this.outputGate.W[k][j];
+        }
+      }
+      dc_next = dc.map((d, k) => d * s.zf_a[k]);
+    }
+    const scale = lr / T;
+    for (let k = 0; k < hSize; k++) {
+      for (let j = 0; j < combSize; j++) {
+        this.forgetGate.W[k][j] += scale * dWf[k][j];
+        this.inputGate.W[k][j] += scale * dWi[k][j];
+        this.cellGate.W[k][j] += scale * dWg[k][j];
+        this.outputGate.W[k][j] += scale * dWo[k][j];
+      }
+      this.forgetGate.b[k] += scale * dbf[k];
+      this.inputGate.b[k] += scale * dbi[k];
+      this.cellGate.b[k] += scale * dbg[k];
+      this.outputGate.b[k] += scale * dbo[k];
+    }
+    this._traj = [];
+  }
+  // ── Serialization ─────────────────────────────────────────────────────────
+  getWeights() {
+    return {
+      forgetGate: { W: this.forgetGate.W, b: this.forgetGate.b },
+      inputGate: { W: this.inputGate.W, b: this.inputGate.b },
+      cellGate: { W: this.cellGate.W, b: this.cellGate.b },
+      outputGate: { W: this.outputGate.W, b: this.outputGate.b }
+    };
+  }
+  setWeights(data) {
+    this.forgetGate.W = data.forgetGate.W;
+    this.forgetGate.b = data.forgetGate.b;
+    this.inputGate.W = data.inputGate.W;
+    this.inputGate.b = data.inputGate.b;
+    this.cellGate.W = data.cellGate.W;
+    this.cellGate.b = data.cellGate.b;
+    this.outputGate.W = data.outputGate.W;
+    this.outputGate.b = data.outputGate.b;
+  }
+};
+// src/NetworkLSTM.ts
+var NetworkLSTM = class {
+  // [T][layer+1][neuron]
+  constructor(inputSize, hiddenSize, denseStructure) {
+    this.inputSize = inputSize;
+    this.hiddenSize = hiddenSize;
+    this.lstm = new LSTMLayer(inputSize, hiddenSize);
+    this.denseLayers = [];
+    const sizes = [hiddenSize, ...denseStructure];
+    for (let i = 1; i < sizes.length; i++) {
+      this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
+    }
+    this._acts = [];
+  }
+  // ── Reset recurrent state (call at episode start) ─────────────────────────
+  resetState() {
+    this.lstm.reset();
+    this._acts = [];
+  }
+  // ── Forward pass ──────────────────────────────────────────────────────────
+  predict(inputs) {
+    const h = this.lstm.predict(inputs);
+    const acts = [h];
+    for (const layer of this.denseLayers) {
+      acts.push(layer.predict(acts[acts.length - 1]));
+    }
+    this._acts.push(acts);
+    return acts[acts.length - 1];
+  }
+  // ── Train on a full episode ────────────────────────────────────────────────
+  // targets: one target vector per step (same order as predict() calls).
+  // Accumulates gradients across all T steps before applying (batch update).
+  train(targets, lr) {
+    const T = this._acts.length;
+    if (T === 0 || targets.length !== T) return;
+    const denseGrads = this.denseLayers.map((layer) => ({
+      dW: layer.neurons.map((n) => new Array(n.weights.length).fill(0)),
+      db: new Array(layer.neurons.length).fill(0)
+    }));
+    const dh_seq = [];
+    for (let t = 0; t < T; t++) {
+      const acts = this._acts[t];
+      const pred = acts[acts.length - 1];
+      let deltas = pred.map((p, i) => (targets[t][i] - p) * p * (1 - p));
+      for (let l = this.denseLayers.length - 1; l >= 0; l--) {
+        const layer = this.denseLayers[l];
+        const layerIn = acts[l];
+        const grad = denseGrads[l];
+        const prevDeltas = layerIn.map((out, j) => {
+          const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
+          return errProp * out * (1 - out);
+        });
+        layer.neurons.forEach((n, k) => {
+          n.weights.forEach((_, j) => {
+            grad.dW[k][j] += deltas[k] * layerIn[j];
+          });
+          grad.db[k] += deltas[k];
+        });
+        deltas = prevDeltas;
+      }
+      dh_seq.push(deltas);
+    }
+    for (let l = 0; l < this.denseLayers.length; l++) {
+      const layer = this.denseLayers[l];
+      const grad = denseGrads[l];
+      layer.neurons.forEach((n, k) => {
+        n.weights = n.weights.map((w, j) => w + lr / T * grad.dW[k][j]);
+        n.bias += lr / T * grad.db[k];
+      });
+    }
+    this.lstm.backprop(dh_seq, lr);
+    this._acts = [];
+  }
+  // ── Serialization ─────────────────────────────────────────────────────────
+  getWeights() {
+    return {
+      lstm: this.lstm.getWeights(),
+      dense: this.denseLayers.map(
+        (layer) => layer.neurons.map((n) => ({ weights: [...n.weights], bias: n.bias }))
+      )
+    };
+  }
+  setWeights(data) {
+    this.lstm.setWeights(data.lstm);
+    data.dense.forEach((layerData, l) => {
+      layerData.forEach((neuronData, k) => {
+        this.denseLayers[l].neurons[k].weights = [...neuronData.weights];
+        this.denseLayers[l].neurons[k].bias = neuronData.bias;
+      });
+    });
+  }
+};
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
+  LSTMLayer,
   Layer,
   Network,
+  NetworkLSTM,
   NetworkN,
   Neuron,
   NeuronN

package/dist/index.mjs CHANGED Viewed

@@ -137,9 +137,256 @@ var NetworkN = class {
     }
   }
 };
+// src/LSTMLayer.ts
+function sigmoid3(x) {
+  return 1 / (1 + Math.exp(-x));
+}
+function tanh(x) {
+  const e = Math.exp(2 * x);
+  return (e - 1) / (e + 1);
+}
+var Gate = class {
+  // shape: [hSize]
+  constructor(inputSize, hSize, initBias = 0) {
+    const n = inputSize + hSize;
+    const limit = Math.sqrt(2 / n);
+    this.W = Array.from(
+      { length: hSize },
+      () => Array.from({ length: n }, () => (Math.random() * 2 - 1) * limit)
+    );
+    this.b = new Array(hSize).fill(initBias);
+  }
+  linear(combined) {
+    return this.W.map(
+      (row, i) => row.reduce((s, w, j) => s + w * combined[j], this.b[i])
+    );
+  }
+};
+var LSTMLayer = class {
+  constructor(inputSize, hiddenSize) {
+    this._traj = [];
+    this.inputSize = inputSize;
+    this.hSize = hiddenSize;
+    this.h = new Array(hiddenSize).fill(0);
+    this.c = new Array(hiddenSize).fill(0);
+    this.forgetGate = new Gate(inputSize, hiddenSize, 1);
+    this.inputGate = new Gate(inputSize, hiddenSize);
+    this.cellGate = new Gate(inputSize, hiddenSize);
+    this.outputGate = new Gate(inputSize, hiddenSize);
+  }
+  // ── Reset state and trajectory (call at episode start) ────────────────────
+  reset() {
+    this.h = new Array(this.hSize).fill(0);
+    this.c = new Array(this.hSize).fill(0);
+    this._traj = [];
+  }
+  // ── Forward pass ──────────────────────────────────────────────────────────
+  predict(inputs) {
+    const combined = [...inputs, ...this.h];
+    const c_prev = [...this.c];
+    const zf = this.forgetGate.linear(combined);
+    const zi = this.inputGate.linear(combined);
+    const zg = this.cellGate.linear(combined);
+    const zo = this.outputGate.linear(combined);
+    const zf_a = zf.map(sigmoid3);
+    const zi_a = zi.map(sigmoid3);
+    const zg_a = zg.map(tanh);
+    const zo_a = zo.map(sigmoid3);
+    const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
+    const h = zo_a.map((o, k) => o * tanh(c[k]));
+    this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
+    this.h = h;
+    this.c = c;
+    return h;
+  }
+  // ── BPTT (Backpropagation Through Time) ────────────────────────────────────
+  // dh_seq: dL/dh for each timestep, same length as trajectory.
+  // Accumulates gradients across the full sequence, then applies them in one
+  // update (batch gradient) scaled by lr / T.
+  backprop(dh_seq, lr) {
+    const T = this._traj.length;
+    if (T === 0 || dh_seq.length !== T) return;
+    const hSize = this.hSize;
+    const combSize = this.inputSize + hSize;
+    const dWf = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dWi = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dWg = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dWo = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
+    const dbf = new Array(hSize).fill(0);
+    const dbi = new Array(hSize).fill(0);
+    const dbg = new Array(hSize).fill(0);
+    const dbo = new Array(hSize).fill(0);
+    let dh_next = new Array(hSize).fill(0);
+    let dc_next = new Array(hSize).fill(0);
+    for (let t = T - 1; t >= 0; t--) {
+      const s = this._traj[t];
+      const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
+      const tanh_c = s.c.map(tanh);
+      const do_a = dh.map((d, k) => d * tanh_c[k]);
+      const dc = dh.map(
+        (d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
+      );
+      const df_a = dc.map((d, k) => d * s.c_prev[k]);
+      const di_a = dc.map((d, k) => d * s.zg_a[k]);
+      const dg_a = dc.map((d, k) => d * s.zi_a[k]);
+      const dzo = do_a.map((d, k) => d * s.zo_a[k] * (1 - s.zo_a[k]));
+      const dzf = df_a.map((d, k) => d * s.zf_a[k] * (1 - s.zf_a[k]));
+      const dzi = di_a.map((d, k) => d * s.zi_a[k] * (1 - s.zi_a[k]));
+      const dzg = dg_a.map((d, k) => d * (1 - s.zg_a[k] ** 2));
+      for (let k = 0; k < hSize; k++) {
+        for (let j = 0; j < combSize; j++) {
+          dWf[k][j] += dzf[k] * s.combined[j];
+          dWi[k][j] += dzi[k] * s.combined[j];
+          dWg[k][j] += dzg[k] * s.combined[j];
+          dWo[k][j] += dzo[k] * s.combined[j];
+        }
+        dbf[k] += dzf[k];
+        dbi[k] += dzi[k];
+        dbg[k] += dzg[k];
+        dbo[k] += dzo[k];
+      }
+      dh_next = new Array(hSize).fill(0);
+      for (let k = 0; k < hSize; k++) {
+        for (let j = this.inputSize; j < combSize; j++) {
+          dh_next[j - this.inputSize] += dzf[k] * this.forgetGate.W[k][j] + dzi[k] * this.inputGate.W[k][j] + dzg[k] * this.cellGate.W[k][j] + dzo[k] * this.outputGate.W[k][j];
+        }
+      }
+      dc_next = dc.map((d, k) => d * s.zf_a[k]);
+    }
+    const scale = lr / T;
+    for (let k = 0; k < hSize; k++) {
+      for (let j = 0; j < combSize; j++) {
+        this.forgetGate.W[k][j] += scale * dWf[k][j];
+        this.inputGate.W[k][j] += scale * dWi[k][j];
+        this.cellGate.W[k][j] += scale * dWg[k][j];
+        this.outputGate.W[k][j] += scale * dWo[k][j];
+      }
+      this.forgetGate.b[k] += scale * dbf[k];
+      this.inputGate.b[k] += scale * dbi[k];
+      this.cellGate.b[k] += scale * dbg[k];
+      this.outputGate.b[k] += scale * dbo[k];
+    }
+    this._traj = [];
+  }
+  // ── Serialization ─────────────────────────────────────────────────────────
+  getWeights() {
+    return {
+      forgetGate: { W: this.forgetGate.W, b: this.forgetGate.b },
+      inputGate: { W: this.inputGate.W, b: this.inputGate.b },
+      cellGate: { W: this.cellGate.W, b: this.cellGate.b },
+      outputGate: { W: this.outputGate.W, b: this.outputGate.b }
+    };
+  }
+  setWeights(data) {
+    this.forgetGate.W = data.forgetGate.W;
+    this.forgetGate.b = data.forgetGate.b;
+    this.inputGate.W = data.inputGate.W;
+    this.inputGate.b = data.inputGate.b;
+    this.cellGate.W = data.cellGate.W;
+    this.cellGate.b = data.cellGate.b;
+    this.outputGate.W = data.outputGate.W;
+    this.outputGate.b = data.outputGate.b;
+  }
+};
+// src/NetworkLSTM.ts
+var NetworkLSTM = class {
+  // [T][layer+1][neuron]
+  constructor(inputSize, hiddenSize, denseStructure) {
+    this.inputSize = inputSize;
+    this.hiddenSize = hiddenSize;
+    this.lstm = new LSTMLayer(inputSize, hiddenSize);
+    this.denseLayers = [];
+    const sizes = [hiddenSize, ...denseStructure];
+    for (let i = 1; i < sizes.length; i++) {
+      this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
+    }
+    this._acts = [];
+  }
+  // ── Reset recurrent state (call at episode start) ─────────────────────────
+  resetState() {
+    this.lstm.reset();
+    this._acts = [];
+  }
+  // ── Forward pass ──────────────────────────────────────────────────────────
+  predict(inputs) {
+    const h = this.lstm.predict(inputs);
+    const acts = [h];
+    for (const layer of this.denseLayers) {
+      acts.push(layer.predict(acts[acts.length - 1]));
+    }
+    this._acts.push(acts);
+    return acts[acts.length - 1];
+  }
+  // ── Train on a full episode ────────────────────────────────────────────────
+  // targets: one target vector per step (same order as predict() calls).
+  // Accumulates gradients across all T steps before applying (batch update).
+  train(targets, lr) {
+    const T = this._acts.length;
+    if (T === 0 || targets.length !== T) return;
+    const denseGrads = this.denseLayers.map((layer) => ({
+      dW: layer.neurons.map((n) => new Array(n.weights.length).fill(0)),
+      db: new Array(layer.neurons.length).fill(0)
+    }));
+    const dh_seq = [];
+    for (let t = 0; t < T; t++) {
+      const acts = this._acts[t];
+      const pred = acts[acts.length - 1];
+      let deltas = pred.map((p, i) => (targets[t][i] - p) * p * (1 - p));
+      for (let l = this.denseLayers.length - 1; l >= 0; l--) {
+        const layer = this.denseLayers[l];
+        const layerIn = acts[l];
+        const grad = denseGrads[l];
+        const prevDeltas = layerIn.map((out, j) => {
+          const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
+          return errProp * out * (1 - out);
+        });
+        layer.neurons.forEach((n, k) => {
+          n.weights.forEach((_, j) => {
+            grad.dW[k][j] += deltas[k] * layerIn[j];
+          });
+          grad.db[k] += deltas[k];
+        });
+        deltas = prevDeltas;
+      }
+      dh_seq.push(deltas);
+    }
+    for (let l = 0; l < this.denseLayers.length; l++) {
+      const layer = this.denseLayers[l];
+      const grad = denseGrads[l];
+      layer.neurons.forEach((n, k) => {
+        n.weights = n.weights.map((w, j) => w + lr / T * grad.dW[k][j]);
+        n.bias += lr / T * grad.db[k];
+      });
+    }
+    this.lstm.backprop(dh_seq, lr);
+    this._acts = [];
+  }
+  // ── Serialization ─────────────────────────────────────────────────────────
+  getWeights() {
+    return {
+      lstm: this.lstm.getWeights(),
+      dense: this.denseLayers.map(
+        (layer) => layer.neurons.map((n) => ({ weights: [...n.weights], bias: n.bias }))
+      )
+    };
+  }
+  setWeights(data) {
+    this.lstm.setWeights(data.lstm);
+    data.dense.forEach((layerData, l) => {
+      layerData.forEach((neuronData, k) => {
+        this.denseLayers[l].neurons[k].weights = [...neuronData.weights];
+        this.denseLayers[l].neurons[k].bias = neuronData.bias;
+      });
+    });
+  }
+};
 export {
+  LSTMLayer,
   Layer,
   Network,
+  NetworkLSTM,
   NetworkN,
   Neuron,
   NeuronN

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dniskav/neuron",
-  "version": "0.1.0",
+  "version": "0.1.2",
   "description": "Minimal neural network from scratch — neuron, layer, network, backpropagation. No dependencies.",
   "main": "dist/index.js",
   "module": "dist/index.mjs",