@dniskav/neuron 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # @dniskav/neuron
2
2
 
3
+ [![npm](https://img.shields.io/npm/v/@dniskav/neuron)](https://www.npmjs.com/package/@dniskav/neuron)
4
+ [![license](https://img.shields.io/npm/l/@dniskav/neuron)](LICENSE)
5
+
3
6
  A minimal, dependency-free neural network library built from scratch in TypeScript. Designed for learning and experimentation — every line of math is readable.
4
7
 
5
8
  ## What's inside
@@ -11,6 +14,8 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
11
14
  | `Layer` | A group of `NeuronN` neurons that share the same inputs. |
12
15
  | `Network` | Two-layer network (hidden + output) with backpropagation. |
13
16
  | `NetworkN` | Deep network of arbitrary depth. Define your architecture as `[inputs, ...hidden, outputs]`. |
17
+ | `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
18
+ | `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
14
19
 
15
20
  ## Install
16
21
 
@@ -95,6 +100,47 @@ const [out1, out2] = net.predict([0.5, 0.3, 0.8]);
95
100
  net.trainWithDeltas(inputs, [0.4, -0.2], 0.05);
96
101
  ```
97
102
 
103
+ ### NetworkLSTM — recurrent network with memory
104
+
105
+ `NetworkLSTM` adds within-episode memory: the network can remember what happened in previous steps of the same sequence.
106
+
107
+ ```ts
108
+ import { NetworkLSTM } from "@dniskav/neuron";
109
+
110
+ // 1 input → LSTM(8 hidden) → Dense(4) → 1 output
111
+ const net = new NetworkLSTM(1, 8, [4, 1]);
112
+
113
+ // Task: predict 1 if we're past step 3 in the episode, else 0
114
+ // A feedforward net can't do this — it has no memory of step count.
115
+
116
+ for (let epoch = 0; epoch < 300; epoch++) {
117
+ net.resetState(); // clear memory at episode start
118
+
119
+ const targets: number[][] = [];
120
+ for (let step = 0; step < 6; step++) {
121
+ net.predict([1]); // same input every step
122
+ targets.push([step >= 3 ? 1 : 0]);
123
+ }
124
+
125
+ net.train(targets, 0.05); // BPTT across the full episode
126
+ }
127
+
128
+ // Run a fresh episode and check predictions
129
+ net.resetState();
130
+ for (let step = 0; step < 6; step++) {
131
+ const [out] = net.predict([1]);
132
+ console.log(`step ${step}: ${out.toFixed(2)} (expected: ${step >= 3 ? 1 : 0})`);
133
+ }
134
+ // step 0: 0.07 (expected: 0)
135
+ // step 1: 0.11 (expected: 0)
136
+ // step 2: 0.18 (expected: 0)
137
+ // step 3: 0.81 (expected: 1)
138
+ // step 4: 0.89 (expected: 1)
139
+ // step 5: 0.93 (expected: 1)
140
+ ```
141
+
142
+ The network learns to count steps using its hidden state — no external counter needed.
143
+
98
144
  ## How it works
99
145
 
100
146
  Every class uses **sigmoid** as its activation function and **gradient descent** to update weights:
package/dist/index.d.mts CHANGED
@@ -37,4 +37,82 @@ declare class NetworkN {
37
37
  trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
38
38
  }
39
39
 
40
- export { Layer, Network, NetworkN, Neuron, NeuronN };
40
+ declare class Gate {
41
+ W: number[][];
42
+ b: number[];
43
+ constructor(inputSize: number, hSize: number, initBias?: number);
44
+ linear(combined: number[]): number[];
45
+ }
46
+ declare class LSTMLayer {
47
+ readonly inputSize: number;
48
+ readonly hSize: number;
49
+ h: number[];
50
+ c: number[];
51
+ forgetGate: Gate;
52
+ inputGate: Gate;
53
+ cellGate: Gate;
54
+ outputGate: Gate;
55
+ private _traj;
56
+ constructor(inputSize: number, hiddenSize: number);
57
+ reset(): void;
58
+ predict(inputs: number[]): number[];
59
+ backprop(dh_seq: number[][], lr: number): void;
60
+ getWeights(): {
61
+ forgetGate: {
62
+ W: number[][];
63
+ b: number[];
64
+ };
65
+ inputGate: {
66
+ W: number[][];
67
+ b: number[];
68
+ };
69
+ cellGate: {
70
+ W: number[][];
71
+ b: number[];
72
+ };
73
+ outputGate: {
74
+ W: number[][];
75
+ b: number[];
76
+ };
77
+ };
78
+ setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
79
+ }
80
+
81
+ declare class NetworkLSTM {
82
+ readonly inputSize: number;
83
+ readonly hiddenSize: number;
84
+ lstm: LSTMLayer;
85
+ denseLayers: Layer[];
86
+ private _acts;
87
+ constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
88
+ resetState(): void;
89
+ predict(inputs: number[]): number[];
90
+ train(targets: number[][], lr: number): void;
91
+ getWeights(): {
92
+ lstm: {
93
+ forgetGate: {
94
+ W: number[][];
95
+ b: number[];
96
+ };
97
+ inputGate: {
98
+ W: number[][];
99
+ b: number[];
100
+ };
101
+ cellGate: {
102
+ W: number[][];
103
+ b: number[];
104
+ };
105
+ outputGate: {
106
+ W: number[][];
107
+ b: number[];
108
+ };
109
+ };
110
+ dense: {
111
+ weights: number[];
112
+ bias: number;
113
+ }[][];
114
+ };
115
+ setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
116
+ }
117
+
118
+ export { LSTMLayer, Layer, Network, NetworkLSTM, NetworkN, Neuron, NeuronN };
package/dist/index.d.ts CHANGED
@@ -37,4 +37,82 @@ declare class NetworkN {
37
37
  trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
38
38
  }
39
39
 
40
- export { Layer, Network, NetworkN, Neuron, NeuronN };
40
+ declare class Gate {
41
+ W: number[][];
42
+ b: number[];
43
+ constructor(inputSize: number, hSize: number, initBias?: number);
44
+ linear(combined: number[]): number[];
45
+ }
46
+ declare class LSTMLayer {
47
+ readonly inputSize: number;
48
+ readonly hSize: number;
49
+ h: number[];
50
+ c: number[];
51
+ forgetGate: Gate;
52
+ inputGate: Gate;
53
+ cellGate: Gate;
54
+ outputGate: Gate;
55
+ private _traj;
56
+ constructor(inputSize: number, hiddenSize: number);
57
+ reset(): void;
58
+ predict(inputs: number[]): number[];
59
+ backprop(dh_seq: number[][], lr: number): void;
60
+ getWeights(): {
61
+ forgetGate: {
62
+ W: number[][];
63
+ b: number[];
64
+ };
65
+ inputGate: {
66
+ W: number[][];
67
+ b: number[];
68
+ };
69
+ cellGate: {
70
+ W: number[][];
71
+ b: number[];
72
+ };
73
+ outputGate: {
74
+ W: number[][];
75
+ b: number[];
76
+ };
77
+ };
78
+ setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
79
+ }
80
+
81
+ declare class NetworkLSTM {
82
+ readonly inputSize: number;
83
+ readonly hiddenSize: number;
84
+ lstm: LSTMLayer;
85
+ denseLayers: Layer[];
86
+ private _acts;
87
+ constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
88
+ resetState(): void;
89
+ predict(inputs: number[]): number[];
90
+ train(targets: number[][], lr: number): void;
91
+ getWeights(): {
92
+ lstm: {
93
+ forgetGate: {
94
+ W: number[][];
95
+ b: number[];
96
+ };
97
+ inputGate: {
98
+ W: number[][];
99
+ b: number[];
100
+ };
101
+ cellGate: {
102
+ W: number[][];
103
+ b: number[];
104
+ };
105
+ outputGate: {
106
+ W: number[][];
107
+ b: number[];
108
+ };
109
+ };
110
+ dense: {
111
+ weights: number[];
112
+ bias: number;
113
+ }[][];
114
+ };
115
+ setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
116
+ }
117
+
118
+ export { LSTMLayer, Layer, Network, NetworkLSTM, NetworkN, Neuron, NeuronN };
package/dist/index.js CHANGED
@@ -20,8 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ LSTMLayer: () => LSTMLayer,
23
24
  Layer: () => Layer,
24
25
  Network: () => Network,
26
+ NetworkLSTM: () => NetworkLSTM,
25
27
  NetworkN: () => NetworkN,
26
28
  Neuron: () => Neuron,
27
29
  NeuronN: () => NeuronN
@@ -167,10 +169,257 @@ var NetworkN = class {
167
169
  }
168
170
  }
169
171
  };
172
+
173
+ // src/LSTMLayer.ts
174
+ function sigmoid3(x) {
175
+ return 1 / (1 + Math.exp(-x));
176
+ }
177
+ function tanh(x) {
178
+ const e = Math.exp(2 * x);
179
+ return (e - 1) / (e + 1);
180
+ }
181
+ var Gate = class {
182
+ // shape: [hSize]
183
+ constructor(inputSize, hSize, initBias = 0) {
184
+ const n = inputSize + hSize;
185
+ const limit = Math.sqrt(2 / n);
186
+ this.W = Array.from(
187
+ { length: hSize },
188
+ () => Array.from({ length: n }, () => (Math.random() * 2 - 1) * limit)
189
+ );
190
+ this.b = new Array(hSize).fill(initBias);
191
+ }
192
+ linear(combined) {
193
+ return this.W.map(
194
+ (row, i) => row.reduce((s, w, j) => s + w * combined[j], this.b[i])
195
+ );
196
+ }
197
+ };
198
+ var LSTMLayer = class {
199
+ constructor(inputSize, hiddenSize) {
200
+ this._traj = [];
201
+ this.inputSize = inputSize;
202
+ this.hSize = hiddenSize;
203
+ this.h = new Array(hiddenSize).fill(0);
204
+ this.c = new Array(hiddenSize).fill(0);
205
+ this.forgetGate = new Gate(inputSize, hiddenSize, 1);
206
+ this.inputGate = new Gate(inputSize, hiddenSize);
207
+ this.cellGate = new Gate(inputSize, hiddenSize);
208
+ this.outputGate = new Gate(inputSize, hiddenSize);
209
+ }
210
+ // ── Reset state and trajectory (call at episode start) ────────────────────
211
+ reset() {
212
+ this.h = new Array(this.hSize).fill(0);
213
+ this.c = new Array(this.hSize).fill(0);
214
+ this._traj = [];
215
+ }
216
+ // ── Forward pass ──────────────────────────────────────────────────────────
217
+ predict(inputs) {
218
+ const combined = [...inputs, ...this.h];
219
+ const c_prev = [...this.c];
220
+ const zf = this.forgetGate.linear(combined);
221
+ const zi = this.inputGate.linear(combined);
222
+ const zg = this.cellGate.linear(combined);
223
+ const zo = this.outputGate.linear(combined);
224
+ const zf_a = zf.map(sigmoid3);
225
+ const zi_a = zi.map(sigmoid3);
226
+ const zg_a = zg.map(tanh);
227
+ const zo_a = zo.map(sigmoid3);
228
+ const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
229
+ const h = zo_a.map((o, k) => o * tanh(c[k]));
230
+ this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
231
+ this.h = h;
232
+ this.c = c;
233
+ return h;
234
+ }
235
+ // ── BPTT (Backpropagation Through Time) ────────────────────────────────────
236
+ // dh_seq: dL/dh for each timestep, same length as trajectory.
237
+ // Accumulates gradients across the full sequence, then applies them in one
238
+ // update (batch gradient) scaled by lr / T.
239
+ backprop(dh_seq, lr) {
240
+ const T = this._traj.length;
241
+ if (T === 0 || dh_seq.length !== T) return;
242
+ const hSize = this.hSize;
243
+ const combSize = this.inputSize + hSize;
244
+ const dWf = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
245
+ const dWi = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
246
+ const dWg = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
247
+ const dWo = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
248
+ const dbf = new Array(hSize).fill(0);
249
+ const dbi = new Array(hSize).fill(0);
250
+ const dbg = new Array(hSize).fill(0);
251
+ const dbo = new Array(hSize).fill(0);
252
+ let dh_next = new Array(hSize).fill(0);
253
+ let dc_next = new Array(hSize).fill(0);
254
+ for (let t = T - 1; t >= 0; t--) {
255
+ const s = this._traj[t];
256
+ const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
257
+ const tanh_c = s.c.map(tanh);
258
+ const do_a = dh.map((d, k) => d * tanh_c[k]);
259
+ const dc = dh.map(
260
+ (d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
261
+ );
262
+ const df_a = dc.map((d, k) => d * s.c_prev[k]);
263
+ const di_a = dc.map((d, k) => d * s.zg_a[k]);
264
+ const dg_a = dc.map((d, k) => d * s.zi_a[k]);
265
+ const dzo = do_a.map((d, k) => d * s.zo_a[k] * (1 - s.zo_a[k]));
266
+ const dzf = df_a.map((d, k) => d * s.zf_a[k] * (1 - s.zf_a[k]));
267
+ const dzi = di_a.map((d, k) => d * s.zi_a[k] * (1 - s.zi_a[k]));
268
+ const dzg = dg_a.map((d, k) => d * (1 - s.zg_a[k] ** 2));
269
+ for (let k = 0; k < hSize; k++) {
270
+ for (let j = 0; j < combSize; j++) {
271
+ dWf[k][j] += dzf[k] * s.combined[j];
272
+ dWi[k][j] += dzi[k] * s.combined[j];
273
+ dWg[k][j] += dzg[k] * s.combined[j];
274
+ dWo[k][j] += dzo[k] * s.combined[j];
275
+ }
276
+ dbf[k] += dzf[k];
277
+ dbi[k] += dzi[k];
278
+ dbg[k] += dzg[k];
279
+ dbo[k] += dzo[k];
280
+ }
281
+ dh_next = new Array(hSize).fill(0);
282
+ for (let k = 0; k < hSize; k++) {
283
+ for (let j = this.inputSize; j < combSize; j++) {
284
+ dh_next[j - this.inputSize] += dzf[k] * this.forgetGate.W[k][j] + dzi[k] * this.inputGate.W[k][j] + dzg[k] * this.cellGate.W[k][j] + dzo[k] * this.outputGate.W[k][j];
285
+ }
286
+ }
287
+ dc_next = dc.map((d, k) => d * s.zf_a[k]);
288
+ }
289
+ const scale = lr / T;
290
+ for (let k = 0; k < hSize; k++) {
291
+ for (let j = 0; j < combSize; j++) {
292
+ this.forgetGate.W[k][j] += scale * dWf[k][j];
293
+ this.inputGate.W[k][j] += scale * dWi[k][j];
294
+ this.cellGate.W[k][j] += scale * dWg[k][j];
295
+ this.outputGate.W[k][j] += scale * dWo[k][j];
296
+ }
297
+ this.forgetGate.b[k] += scale * dbf[k];
298
+ this.inputGate.b[k] += scale * dbi[k];
299
+ this.cellGate.b[k] += scale * dbg[k];
300
+ this.outputGate.b[k] += scale * dbo[k];
301
+ }
302
+ this._traj = [];
303
+ }
304
+ // ── Serialization ─────────────────────────────────────────────────────────
305
+ getWeights() {
306
+ return {
307
+ forgetGate: { W: this.forgetGate.W, b: this.forgetGate.b },
308
+ inputGate: { W: this.inputGate.W, b: this.inputGate.b },
309
+ cellGate: { W: this.cellGate.W, b: this.cellGate.b },
310
+ outputGate: { W: this.outputGate.W, b: this.outputGate.b }
311
+ };
312
+ }
313
+ setWeights(data) {
314
+ this.forgetGate.W = data.forgetGate.W;
315
+ this.forgetGate.b = data.forgetGate.b;
316
+ this.inputGate.W = data.inputGate.W;
317
+ this.inputGate.b = data.inputGate.b;
318
+ this.cellGate.W = data.cellGate.W;
319
+ this.cellGate.b = data.cellGate.b;
320
+ this.outputGate.W = data.outputGate.W;
321
+ this.outputGate.b = data.outputGate.b;
322
+ }
323
+ };
324
+
325
+ // src/NetworkLSTM.ts
326
+ var NetworkLSTM = class {
327
+ // [T][layer+1][neuron]
328
+ constructor(inputSize, hiddenSize, denseStructure) {
329
+ this.inputSize = inputSize;
330
+ this.hiddenSize = hiddenSize;
331
+ this.lstm = new LSTMLayer(inputSize, hiddenSize);
332
+ this.denseLayers = [];
333
+ const sizes = [hiddenSize, ...denseStructure];
334
+ for (let i = 1; i < sizes.length; i++) {
335
+ this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
336
+ }
337
+ this._acts = [];
338
+ }
339
+ // ── Reset recurrent state (call at episode start) ─────────────────────────
340
+ resetState() {
341
+ this.lstm.reset();
342
+ this._acts = [];
343
+ }
344
+ // ── Forward pass ──────────────────────────────────────────────────────────
345
+ predict(inputs) {
346
+ const h = this.lstm.predict(inputs);
347
+ const acts = [h];
348
+ for (const layer of this.denseLayers) {
349
+ acts.push(layer.predict(acts[acts.length - 1]));
350
+ }
351
+ this._acts.push(acts);
352
+ return acts[acts.length - 1];
353
+ }
354
+ // ── Train on a full episode ────────────────────────────────────────────────
355
+ // targets: one target vector per step (same order as predict() calls).
356
+ // Accumulates gradients across all T steps before applying (batch update).
357
+ train(targets, lr) {
358
+ const T = this._acts.length;
359
+ if (T === 0 || targets.length !== T) return;
360
+ const denseGrads = this.denseLayers.map((layer) => ({
361
+ dW: layer.neurons.map((n) => new Array(n.weights.length).fill(0)),
362
+ db: new Array(layer.neurons.length).fill(0)
363
+ }));
364
+ const dh_seq = [];
365
+ for (let t = 0; t < T; t++) {
366
+ const acts = this._acts[t];
367
+ const pred = acts[acts.length - 1];
368
+ let deltas = pred.map((p, i) => (targets[t][i] - p) * p * (1 - p));
369
+ for (let l = this.denseLayers.length - 1; l >= 0; l--) {
370
+ const layer = this.denseLayers[l];
371
+ const layerIn = acts[l];
372
+ const grad = denseGrads[l];
373
+ const prevDeltas = layerIn.map((out, j) => {
374
+ const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
375
+ return errProp * out * (1 - out);
376
+ });
377
+ layer.neurons.forEach((n, k) => {
378
+ n.weights.forEach((_, j) => {
379
+ grad.dW[k][j] += deltas[k] * layerIn[j];
380
+ });
381
+ grad.db[k] += deltas[k];
382
+ });
383
+ deltas = prevDeltas;
384
+ }
385
+ dh_seq.push(deltas);
386
+ }
387
+ for (let l = 0; l < this.denseLayers.length; l++) {
388
+ const layer = this.denseLayers[l];
389
+ const grad = denseGrads[l];
390
+ layer.neurons.forEach((n, k) => {
391
+ n.weights = n.weights.map((w, j) => w + lr / T * grad.dW[k][j]);
392
+ n.bias += lr / T * grad.db[k];
393
+ });
394
+ }
395
+ this.lstm.backprop(dh_seq, lr);
396
+ this._acts = [];
397
+ }
398
+ // ── Serialization ─────────────────────────────────────────────────────────
399
+ getWeights() {
400
+ return {
401
+ lstm: this.lstm.getWeights(),
402
+ dense: this.denseLayers.map(
403
+ (layer) => layer.neurons.map((n) => ({ weights: [...n.weights], bias: n.bias }))
404
+ )
405
+ };
406
+ }
407
+ setWeights(data) {
408
+ this.lstm.setWeights(data.lstm);
409
+ data.dense.forEach((layerData, l) => {
410
+ layerData.forEach((neuronData, k) => {
411
+ this.denseLayers[l].neurons[k].weights = [...neuronData.weights];
412
+ this.denseLayers[l].neurons[k].bias = neuronData.bias;
413
+ });
414
+ });
415
+ }
416
+ };
170
417
  // Annotate the CommonJS export names for ESM import in node:
171
418
  0 && (module.exports = {
419
+ LSTMLayer,
172
420
  Layer,
173
421
  Network,
422
+ NetworkLSTM,
174
423
  NetworkN,
175
424
  Neuron,
176
425
  NeuronN
package/dist/index.mjs CHANGED
@@ -137,9 +137,256 @@ var NetworkN = class {
137
137
  }
138
138
  }
139
139
  };
140
+
141
+ // src/LSTMLayer.ts
142
+ function sigmoid3(x) {
143
+ return 1 / (1 + Math.exp(-x));
144
+ }
145
+ function tanh(x) {
146
+ const e = Math.exp(2 * x);
147
+ return (e - 1) / (e + 1);
148
+ }
149
+ var Gate = class {
150
+ // shape: [hSize]
151
+ constructor(inputSize, hSize, initBias = 0) {
152
+ const n = inputSize + hSize;
153
+ const limit = Math.sqrt(2 / n);
154
+ this.W = Array.from(
155
+ { length: hSize },
156
+ () => Array.from({ length: n }, () => (Math.random() * 2 - 1) * limit)
157
+ );
158
+ this.b = new Array(hSize).fill(initBias);
159
+ }
160
+ linear(combined) {
161
+ return this.W.map(
162
+ (row, i) => row.reduce((s, w, j) => s + w * combined[j], this.b[i])
163
+ );
164
+ }
165
+ };
166
+ var LSTMLayer = class {
167
+ constructor(inputSize, hiddenSize) {
168
+ this._traj = [];
169
+ this.inputSize = inputSize;
170
+ this.hSize = hiddenSize;
171
+ this.h = new Array(hiddenSize).fill(0);
172
+ this.c = new Array(hiddenSize).fill(0);
173
+ this.forgetGate = new Gate(inputSize, hiddenSize, 1);
174
+ this.inputGate = new Gate(inputSize, hiddenSize);
175
+ this.cellGate = new Gate(inputSize, hiddenSize);
176
+ this.outputGate = new Gate(inputSize, hiddenSize);
177
+ }
178
+ // ── Reset state and trajectory (call at episode start) ────────────────────
179
+ reset() {
180
+ this.h = new Array(this.hSize).fill(0);
181
+ this.c = new Array(this.hSize).fill(0);
182
+ this._traj = [];
183
+ }
184
+ // ── Forward pass ──────────────────────────────────────────────────────────
185
+ predict(inputs) {
186
+ const combined = [...inputs, ...this.h];
187
+ const c_prev = [...this.c];
188
+ const zf = this.forgetGate.linear(combined);
189
+ const zi = this.inputGate.linear(combined);
190
+ const zg = this.cellGate.linear(combined);
191
+ const zo = this.outputGate.linear(combined);
192
+ const zf_a = zf.map(sigmoid3);
193
+ const zi_a = zi.map(sigmoid3);
194
+ const zg_a = zg.map(tanh);
195
+ const zo_a = zo.map(sigmoid3);
196
+ const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
197
+ const h = zo_a.map((o, k) => o * tanh(c[k]));
198
+ this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
199
+ this.h = h;
200
+ this.c = c;
201
+ return h;
202
+ }
203
+ // ── BPTT (Backpropagation Through Time) ────────────────────────────────────
204
+ // dh_seq: dL/dh for each timestep, same length as trajectory.
205
+ // Accumulates gradients across the full sequence, then applies them in one
206
+ // update (batch gradient) scaled by lr / T.
207
+ backprop(dh_seq, lr) {
208
+ const T = this._traj.length;
209
+ if (T === 0 || dh_seq.length !== T) return;
210
+ const hSize = this.hSize;
211
+ const combSize = this.inputSize + hSize;
212
+ const dWf = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
213
+ const dWi = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
214
+ const dWg = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
215
+ const dWo = Array.from({ length: hSize }, () => new Array(combSize).fill(0));
216
+ const dbf = new Array(hSize).fill(0);
217
+ const dbi = new Array(hSize).fill(0);
218
+ const dbg = new Array(hSize).fill(0);
219
+ const dbo = new Array(hSize).fill(0);
220
+ let dh_next = new Array(hSize).fill(0);
221
+ let dc_next = new Array(hSize).fill(0);
222
+ for (let t = T - 1; t >= 0; t--) {
223
+ const s = this._traj[t];
224
+ const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
225
+ const tanh_c = s.c.map(tanh);
226
+ const do_a = dh.map((d, k) => d * tanh_c[k]);
227
+ const dc = dh.map(
228
+ (d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
229
+ );
230
+ const df_a = dc.map((d, k) => d * s.c_prev[k]);
231
+ const di_a = dc.map((d, k) => d * s.zg_a[k]);
232
+ const dg_a = dc.map((d, k) => d * s.zi_a[k]);
233
+ const dzo = do_a.map((d, k) => d * s.zo_a[k] * (1 - s.zo_a[k]));
234
+ const dzf = df_a.map((d, k) => d * s.zf_a[k] * (1 - s.zf_a[k]));
235
+ const dzi = di_a.map((d, k) => d * s.zi_a[k] * (1 - s.zi_a[k]));
236
+ const dzg = dg_a.map((d, k) => d * (1 - s.zg_a[k] ** 2));
237
+ for (let k = 0; k < hSize; k++) {
238
+ for (let j = 0; j < combSize; j++) {
239
+ dWf[k][j] += dzf[k] * s.combined[j];
240
+ dWi[k][j] += dzi[k] * s.combined[j];
241
+ dWg[k][j] += dzg[k] * s.combined[j];
242
+ dWo[k][j] += dzo[k] * s.combined[j];
243
+ }
244
+ dbf[k] += dzf[k];
245
+ dbi[k] += dzi[k];
246
+ dbg[k] += dzg[k];
247
+ dbo[k] += dzo[k];
248
+ }
249
+ dh_next = new Array(hSize).fill(0);
250
+ for (let k = 0; k < hSize; k++) {
251
+ for (let j = this.inputSize; j < combSize; j++) {
252
+ dh_next[j - this.inputSize] += dzf[k] * this.forgetGate.W[k][j] + dzi[k] * this.inputGate.W[k][j] + dzg[k] * this.cellGate.W[k][j] + dzo[k] * this.outputGate.W[k][j];
253
+ }
254
+ }
255
+ dc_next = dc.map((d, k) => d * s.zf_a[k]);
256
+ }
257
+ const scale = lr / T;
258
+ for (let k = 0; k < hSize; k++) {
259
+ for (let j = 0; j < combSize; j++) {
260
+ this.forgetGate.W[k][j] += scale * dWf[k][j];
261
+ this.inputGate.W[k][j] += scale * dWi[k][j];
262
+ this.cellGate.W[k][j] += scale * dWg[k][j];
263
+ this.outputGate.W[k][j] += scale * dWo[k][j];
264
+ }
265
+ this.forgetGate.b[k] += scale * dbf[k];
266
+ this.inputGate.b[k] += scale * dbi[k];
267
+ this.cellGate.b[k] += scale * dbg[k];
268
+ this.outputGate.b[k] += scale * dbo[k];
269
+ }
270
+ this._traj = [];
271
+ }
272
+ // ── Serialization ─────────────────────────────────────────────────────────
273
+ getWeights() {
274
+ return {
275
+ forgetGate: { W: this.forgetGate.W, b: this.forgetGate.b },
276
+ inputGate: { W: this.inputGate.W, b: this.inputGate.b },
277
+ cellGate: { W: this.cellGate.W, b: this.cellGate.b },
278
+ outputGate: { W: this.outputGate.W, b: this.outputGate.b }
279
+ };
280
+ }
281
+ setWeights(data) {
282
+ this.forgetGate.W = data.forgetGate.W;
283
+ this.forgetGate.b = data.forgetGate.b;
284
+ this.inputGate.W = data.inputGate.W;
285
+ this.inputGate.b = data.inputGate.b;
286
+ this.cellGate.W = data.cellGate.W;
287
+ this.cellGate.b = data.cellGate.b;
288
+ this.outputGate.W = data.outputGate.W;
289
+ this.outputGate.b = data.outputGate.b;
290
+ }
291
+ };
292
+
293
+ // src/NetworkLSTM.ts
294
+ var NetworkLSTM = class {
295
+ // [T][layer+1][neuron]
296
+ constructor(inputSize, hiddenSize, denseStructure) {
297
+ this.inputSize = inputSize;
298
+ this.hiddenSize = hiddenSize;
299
+ this.lstm = new LSTMLayer(inputSize, hiddenSize);
300
+ this.denseLayers = [];
301
+ const sizes = [hiddenSize, ...denseStructure];
302
+ for (let i = 1; i < sizes.length; i++) {
303
+ this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
304
+ }
305
+ this._acts = [];
306
+ }
307
+ // ── Reset recurrent state (call at episode start) ─────────────────────────
308
+ resetState() {
309
+ this.lstm.reset();
310
+ this._acts = [];
311
+ }
312
+ // ── Forward pass ──────────────────────────────────────────────────────────
313
+ predict(inputs) {
314
+ const h = this.lstm.predict(inputs);
315
+ const acts = [h];
316
+ for (const layer of this.denseLayers) {
317
+ acts.push(layer.predict(acts[acts.length - 1]));
318
+ }
319
+ this._acts.push(acts);
320
+ return acts[acts.length - 1];
321
+ }
322
+ // ── Train on a full episode ────────────────────────────────────────────────
323
+ // targets: one target vector per step (same order as predict() calls).
324
+ // Accumulates gradients across all T steps before applying (batch update).
325
+ train(targets, lr) {
326
+ const T = this._acts.length;
327
+ if (T === 0 || targets.length !== T) return;
328
+ const denseGrads = this.denseLayers.map((layer) => ({
329
+ dW: layer.neurons.map((n) => new Array(n.weights.length).fill(0)),
330
+ db: new Array(layer.neurons.length).fill(0)
331
+ }));
332
+ const dh_seq = [];
333
+ for (let t = 0; t < T; t++) {
334
+ const acts = this._acts[t];
335
+ const pred = acts[acts.length - 1];
336
+ let deltas = pred.map((p, i) => (targets[t][i] - p) * p * (1 - p));
337
+ for (let l = this.denseLayers.length - 1; l >= 0; l--) {
338
+ const layer = this.denseLayers[l];
339
+ const layerIn = acts[l];
340
+ const grad = denseGrads[l];
341
+ const prevDeltas = layerIn.map((out, j) => {
342
+ const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
343
+ return errProp * out * (1 - out);
344
+ });
345
+ layer.neurons.forEach((n, k) => {
346
+ n.weights.forEach((_, j) => {
347
+ grad.dW[k][j] += deltas[k] * layerIn[j];
348
+ });
349
+ grad.db[k] += deltas[k];
350
+ });
351
+ deltas = prevDeltas;
352
+ }
353
+ dh_seq.push(deltas);
354
+ }
355
+ for (let l = 0; l < this.denseLayers.length; l++) {
356
+ const layer = this.denseLayers[l];
357
+ const grad = denseGrads[l];
358
+ layer.neurons.forEach((n, k) => {
359
+ n.weights = n.weights.map((w, j) => w + lr / T * grad.dW[k][j]);
360
+ n.bias += lr / T * grad.db[k];
361
+ });
362
+ }
363
+ this.lstm.backprop(dh_seq, lr);
364
+ this._acts = [];
365
+ }
366
+ // ── Serialization ─────────────────────────────────────────────────────────
367
+ getWeights() {
368
+ return {
369
+ lstm: this.lstm.getWeights(),
370
+ dense: this.denseLayers.map(
371
+ (layer) => layer.neurons.map((n) => ({ weights: [...n.weights], bias: n.bias }))
372
+ )
373
+ };
374
+ }
375
+ setWeights(data) {
376
+ this.lstm.setWeights(data.lstm);
377
+ data.dense.forEach((layerData, l) => {
378
+ layerData.forEach((neuronData, k) => {
379
+ this.denseLayers[l].neurons[k].weights = [...neuronData.weights];
380
+ this.denseLayers[l].neurons[k].bias = neuronData.bias;
381
+ });
382
+ });
383
+ }
384
+ };
140
385
  export {
386
+ LSTMLayer,
141
387
  Layer,
142
388
  Network,
389
+ NetworkLSTM,
143
390
  NetworkN,
144
391
  Neuron,
145
392
  NeuronN
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dniskav/neuron",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Minimal neural network from scratch — neuron, layer, network, backpropagation. No dependencies.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",