@dniskav/neuron 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,3 @@
1
- # @dniskav/neuron
2
-
3
1
  [![npm](https://img.shields.io/npm/v/@dniskav/neuron)](https://www.npmjs.com/package/@dniskav/neuron)
4
2
  [![license](https://img.shields.io/npm/l/@dniskav/neuron)](LICENSE)
5
3
 
@@ -7,15 +5,19 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
7
5
 
8
6
  ## What's inside
9
7
 
10
- | Class | Description |
11
- |-------|-------------|
8
+ | Export | Description |
9
+ |--------|-------------|
12
10
  | `Neuron` | Single-input neuron. The simplest possible unit: one weight, one bias. |
13
- | `NeuronN` | N-input neuron with Xavier initialization and sigmoid activation. |
11
+ | `NeuronN` | N-input neuron with Xavier initialization and configurable activation. |
14
12
  | `Layer` | A group of `NeuronN` neurons that share the same inputs. |
15
13
  | `Network` | Two-layer network (hidden + output) with backpropagation. |
16
14
  | `NetworkN` | Deep network of arbitrary depth. Define your architecture as `[inputs, ...hidden, outputs]`. |
17
15
  | `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
18
16
  | `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
17
+ | `sigmoid` `relu` `tanh` `linear` | Built-in activation functions. |
18
+ | `SGD` `Momentum` `Adam` | Optimizers. Each instance tracks its own state per weight. |
19
+ | `mse` `crossEntropy` | Loss functions for evaluation and logging. |
20
+ | `mseDelta` `crossEntropyDelta` | Output-layer delta functions for use with `trainWithDeltas`. |
19
21
 
20
22
  ## Install
21
23
 
@@ -92,12 +94,73 @@ net.train([0.5, 0.3, 0.8], [1, 0], 0.05);
92
94
  const [out1, out2] = net.predict([0.5, 0.3, 0.8]);
93
95
  ```
94
96
 
97
+ ### Activations — ReLU, tanh, and more
98
+
99
+ Pass an activation per layer. The last layer typically uses `sigmoid` for binary output or `linear` for regression.
100
+
101
+ ```ts
102
+ import { NetworkN, relu, sigmoid } from "@dniskav/neuron";
103
+
104
+ const net = new NetworkN([3, 64, 32, 1], {
105
+ activations: [relu, relu, sigmoid],
106
+ });
107
+ ```
108
+
109
+ Available: `sigmoid`, `relu`, `tanh`, `linear`.
110
+
111
+ ### Optimizers — Adam, Momentum, SGD
112
+
113
+ Pass an optimizer factory. Each weight gets its own instance with independent state.
114
+
115
+ ```ts
116
+ import { NetworkN, relu, sigmoid, Adam } from "@dniskav/neuron";
117
+
118
+ const net = new NetworkN([2, 64, 1], {
119
+ activations: [relu, sigmoid],
120
+ optimizer: () => new Adam(), // default: beta1=0.9, beta2=0.999
121
+ });
122
+
123
+ // Momentum example
124
+ import { Momentum } from "@dniskav/neuron";
125
+ const net2 = new NetworkN([2, 32, 1], {
126
+ optimizer: () => new Momentum(0.9),
127
+ });
128
+ ```
129
+
130
+ Optimizers also work in `NetworkLSTM` (applied to the dense layers):
131
+
132
+ ```ts
133
+ import { NetworkLSTM, relu, Adam } from "@dniskav/neuron";
134
+
135
+ const net = new NetworkLSTM(1, 8, [4, 1], {
136
+ denseActivation: relu,
137
+ optimizer: () => new Adam(0.001),
138
+ });
139
+ ```
140
+
141
+ ### Loss utilities
142
+
143
+ ```ts
144
+ import { mse, crossEntropy } from "@dniskav/neuron";
145
+
146
+ const predicted = net.predict([0.5, 0.3]);
147
+ console.log(mse(predicted, [1, 0]));
148
+ console.log(crossEntropy(predicted, [1, 0]));
149
+ ```
150
+
95
151
  ### trainWithDeltas — custom loss / physics-based gradients
96
152
 
97
153
  `NetworkN` also exposes `trainWithDeltas` for when you compute your own output-layer deltas (e.g., from a physics simulation or a custom loss function):
98
154
 
99
155
  ```ts
100
- net.trainWithDeltas(inputs, [0.4, -0.2], 0.05);
156
+ import { NetworkN, mseDelta } from "@dniskav/neuron";
157
+
158
+ const net = new NetworkN([3, 16, 2]);
159
+ const pred = net.predict(inputs);
160
+
161
+ // Compute deltas manually using a helper, or from any external signal
162
+ const deltas = pred.map((p, i) => mseDelta(p, targets[i]));
163
+ net.trainWithDeltas(inputs, deltas, 0.01);
101
164
  ```
102
165
 
103
166
  ### NetworkLSTM — recurrent network with memory
@@ -143,17 +206,19 @@ The network learns to count steps using its hidden state — no external counter
143
206
 
144
207
  ## How it works
145
208
 
146
- Every class uses **sigmoid** as its activation function and **gradient descent** to update weights:
209
+ Each class applies an **activation function** to the weighted sum of inputs and uses **gradient descent** to update weights:
147
210
 
148
211
  ```
149
- weight += lr × error × input
150
- bias += lr × error
212
+ weight += lr × delta × input
213
+ bias += lr × delta
151
214
  ```
152
215
 
153
- `NetworkN` implements full **backpropagation** across all layers, propagating deltas from the output back to the first layer using the chain rule.
216
+ `NetworkN` implements full **backpropagation** across all layers, propagating deltas from the output back to the first layer using the chain rule. The derivative of the chosen activation is applied at each layer.
154
217
 
155
218
  `NeuronN` uses simplified **Xavier initialization** — weights start in `[-√(1/n), +√(1/n)]` — so gradients flow well from the start of training.
156
219
 
220
+ When an **optimizer** is used (e.g., Adam), the raw gradient is passed to the optimizer instead of being applied directly. Each weight maintains its own optimizer state (velocity, moments).
221
+
157
222
  ## Build
158
223
 
159
224
  ```bash
@@ -161,6 +226,15 @@ npm run build # outputs CJS + ESM + type declarations to dist/
161
226
  npm run dev # watch mode
162
227
  ```
163
228
 
229
+ ## For AI agents
230
+
231
+ If you are an AI agent or LLM working with this codebase, read [AGENTS.md](AGENTS.md) first. It contains the full class hierarchy, design constraints, and what this library does not do.
232
+
233
+ ## Possible improvements
234
+
235
+ 1. **Support for batches** in training to improve efficiency.
236
+ 2. **Improve documentation** with more advanced examples and use cases.
237
+
164
238
  ## License
165
239
 
166
240
  MIT
package/dist/index.d.mts CHANGED
@@ -6,17 +6,53 @@ declare class Neuron {
6
6
  train(input: number, target: number, lr: number): void;
7
7
  }
8
8
 
9
+ interface Activation {
10
+ fn(x: number): number;
11
+ dfn(out: number): number;
12
+ }
13
+ declare const sigmoid: Activation;
14
+ declare const tanh: Activation;
15
+ declare const relu: Activation;
16
+ declare const linear: Activation;
17
+
18
+ interface Optimizer {
19
+ step(weight: number, gradient: number, lr: number): number;
20
+ }
21
+ type OptimizerFactory = () => Optimizer;
22
+ declare class SGD implements Optimizer {
23
+ step(weight: number, gradient: number, lr: number): number;
24
+ }
25
+ declare class Momentum implements Optimizer {
26
+ readonly beta: number;
27
+ private v;
28
+ constructor(beta?: number);
29
+ step(weight: number, gradient: number, lr: number): number;
30
+ }
31
+ declare class Adam implements Optimizer {
32
+ readonly beta1: number;
33
+ readonly beta2: number;
34
+ readonly epsilon: number;
35
+ private m;
36
+ private v;
37
+ private t;
38
+ constructor(beta1?: number, beta2?: number, epsilon?: number);
39
+ step(weight: number, gradient: number, lr: number): number;
40
+ }
41
+
9
42
  declare class NeuronN {
10
43
  weights: number[];
11
44
  bias: number;
12
- constructor(nInputs: number);
45
+ readonly activation: Activation;
46
+ private _opts;
47
+ constructor(nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
13
48
  predict(inputs: number[]): number;
49
+ _update(weightGrads: number[], biasGrad: number, lr: number): void;
14
50
  train(inputs: number[], target: number, lr: number): void;
15
51
  }
16
52
 
17
53
  declare class Layer {
18
54
  neurons: NeuronN[];
19
- constructor(nNeurons: number, nInputs: number);
55
+ constructor(nNeurons: number, nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
20
56
  predict(inputs: number[]): number[];
21
57
  }
22
58
 
@@ -28,10 +64,14 @@ declare class Network {
28
64
  train(inputs: number[], target: number, lr: number): number;
29
65
  }
30
66
 
67
+ interface NetworkNOptions {
68
+ activations?: Activation[];
69
+ optimizer?: OptimizerFactory;
70
+ }
31
71
  declare class NetworkN {
32
72
  readonly structure: number[];
33
73
  layers: Layer[];
34
- constructor(structure: number[]);
74
+ constructor(structure: number[], options?: NetworkNOptions);
35
75
  predict(inputs: number[]): number[];
36
76
  train(inputs: number[], targets: number[], lr: number): number;
37
77
  trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
@@ -78,13 +118,17 @@ declare class LSTMLayer {
78
118
  setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
79
119
  }
80
120
 
121
+ interface NetworkLSTMOptions {
122
+ denseActivation?: Activation;
123
+ optimizer?: OptimizerFactory;
124
+ }
81
125
  declare class NetworkLSTM {
82
126
  readonly inputSize: number;
83
127
  readonly hiddenSize: number;
84
128
  lstm: LSTMLayer;
85
129
  denseLayers: Layer[];
86
130
  private _acts;
87
- constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
131
+ constructor(inputSize: number, hiddenSize: number, denseStructure: number[], options?: NetworkLSTMOptions);
88
132
  resetState(): void;
89
133
  predict(inputs: number[]): number[];
90
134
  train(targets: number[][], lr: number): void;
@@ -115,4 +159,10 @@ declare class NetworkLSTM {
115
159
  setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
116
160
  }
117
161
 
118
- export { LSTMLayer, Layer, Network, NetworkLSTM, NetworkN, Neuron, NeuronN };
162
+ declare function mse(predicted: number[], actual: number[]): number;
163
+ declare function crossEntropy(predicted: number[], actual: number[]): number;
164
+ declare function mseDelta(predicted: number, actual: number): number;
165
+ declare function crossEntropyDelta(predicted: number, actual: number): number;
166
+ declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
167
+
168
+ export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, linear, mse, mseDelta, relu, sigmoid, tanh };
package/dist/index.d.ts CHANGED
@@ -6,17 +6,53 @@ declare class Neuron {
6
6
  train(input: number, target: number, lr: number): void;
7
7
  }
8
8
 
9
+ interface Activation {
10
+ fn(x: number): number;
11
+ dfn(out: number): number;
12
+ }
13
+ declare const sigmoid: Activation;
14
+ declare const tanh: Activation;
15
+ declare const relu: Activation;
16
+ declare const linear: Activation;
17
+
18
+ interface Optimizer {
19
+ step(weight: number, gradient: number, lr: number): number;
20
+ }
21
+ type OptimizerFactory = () => Optimizer;
22
+ declare class SGD implements Optimizer {
23
+ step(weight: number, gradient: number, lr: number): number;
24
+ }
25
+ declare class Momentum implements Optimizer {
26
+ readonly beta: number;
27
+ private v;
28
+ constructor(beta?: number);
29
+ step(weight: number, gradient: number, lr: number): number;
30
+ }
31
+ declare class Adam implements Optimizer {
32
+ readonly beta1: number;
33
+ readonly beta2: number;
34
+ readonly epsilon: number;
35
+ private m;
36
+ private v;
37
+ private t;
38
+ constructor(beta1?: number, beta2?: number, epsilon?: number);
39
+ step(weight: number, gradient: number, lr: number): number;
40
+ }
41
+
9
42
  declare class NeuronN {
10
43
  weights: number[];
11
44
  bias: number;
12
- constructor(nInputs: number);
45
+ readonly activation: Activation;
46
+ private _opts;
47
+ constructor(nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
13
48
  predict(inputs: number[]): number;
49
+ _update(weightGrads: number[], biasGrad: number, lr: number): void;
14
50
  train(inputs: number[], target: number, lr: number): void;
15
51
  }
16
52
 
17
53
  declare class Layer {
18
54
  neurons: NeuronN[];
19
- constructor(nNeurons: number, nInputs: number);
55
+ constructor(nNeurons: number, nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
20
56
  predict(inputs: number[]): number[];
21
57
  }
22
58
 
@@ -28,10 +64,14 @@ declare class Network {
28
64
  train(inputs: number[], target: number, lr: number): number;
29
65
  }
30
66
 
67
+ interface NetworkNOptions {
68
+ activations?: Activation[];
69
+ optimizer?: OptimizerFactory;
70
+ }
31
71
  declare class NetworkN {
32
72
  readonly structure: number[];
33
73
  layers: Layer[];
34
- constructor(structure: number[]);
74
+ constructor(structure: number[], options?: NetworkNOptions);
35
75
  predict(inputs: number[]): number[];
36
76
  train(inputs: number[], targets: number[], lr: number): number;
37
77
  trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
@@ -78,13 +118,17 @@ declare class LSTMLayer {
78
118
  setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
79
119
  }
80
120
 
121
+ interface NetworkLSTMOptions {
122
+ denseActivation?: Activation;
123
+ optimizer?: OptimizerFactory;
124
+ }
81
125
  declare class NetworkLSTM {
82
126
  readonly inputSize: number;
83
127
  readonly hiddenSize: number;
84
128
  lstm: LSTMLayer;
85
129
  denseLayers: Layer[];
86
130
  private _acts;
87
- constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
131
+ constructor(inputSize: number, hiddenSize: number, denseStructure: number[], options?: NetworkLSTMOptions);
88
132
  resetState(): void;
89
133
  predict(inputs: number[]): number[];
90
134
  train(targets: number[][], lr: number): void;
@@ -115,4 +159,10 @@ declare class NetworkLSTM {
115
159
  setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
116
160
  }
117
161
 
118
- export { LSTMLayer, Layer, Network, NetworkLSTM, NetworkN, Neuron, NeuronN };
162
+ declare function mse(predicted: number[], actual: number[]): number;
163
+ declare function crossEntropy(predicted: number[], actual: number[]): number;
164
+ declare function mseDelta(predicted: number, actual: number): number;
165
+ declare function crossEntropyDelta(predicted: number, actual: number): number;
166
+ declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
167
+
168
+ export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, linear, mse, mseDelta, relu, sigmoid, tanh };
package/dist/index.js CHANGED
@@ -20,13 +20,25 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ Adam: () => Adam,
23
24
  LSTMLayer: () => LSTMLayer,
24
25
  Layer: () => Layer,
26
+ Momentum: () => Momentum,
25
27
  Network: () => Network,
26
28
  NetworkLSTM: () => NetworkLSTM,
27
29
  NetworkN: () => NetworkN,
28
30
  Neuron: () => Neuron,
29
- NeuronN: () => NeuronN
31
+ NeuronN: () => NeuronN,
32
+ SGD: () => SGD,
33
+ crossEntropy: () => crossEntropy,
34
+ crossEntropyDelta: () => crossEntropyDelta,
35
+ crossEntropyDeltaRaw: () => crossEntropyDeltaRaw,
36
+ linear: () => linear,
37
+ mse: () => mse,
38
+ mseDelta: () => mseDelta,
39
+ relu: () => relu,
40
+ sigmoid: () => sigmoid2,
41
+ tanh: () => tanh
30
42
  });
31
43
  module.exports = __toCommonJS(index_exports);
32
44
 
@@ -50,32 +62,97 @@ var Neuron = class {
50
62
  }
51
63
  };
52
64
 
65
+ // src/activations.ts
66
+ var sigmoid2 = {
67
+ fn: (x) => 1 / (1 + Math.exp(-x)),
68
+ dfn: (out) => out * (1 - out)
69
+ };
70
+ var tanh = {
71
+ fn: (x) => {
72
+ const e = Math.exp(2 * x);
73
+ return (e - 1) / (e + 1);
74
+ },
75
+ dfn: (out) => 1 - out * out
76
+ };
77
+ var relu = {
78
+ fn: (x) => Math.max(0, x),
79
+ dfn: (out) => out > 0 ? 1 : 0
80
+ };
81
+ var linear = {
82
+ fn: (x) => x,
83
+ dfn: () => 1
84
+ };
85
+
86
+ // src/optimizers.ts
87
+ var SGD = class {
88
+ step(weight, gradient, lr) {
89
+ return weight + lr * gradient;
90
+ }
91
+ };
92
+ var Momentum = class {
93
+ constructor(beta = 0.9) {
94
+ this.beta = beta;
95
+ this.v = 0;
96
+ }
97
+ step(weight, gradient, lr) {
98
+ this.v = this.beta * this.v + lr * gradient;
99
+ return weight + this.v;
100
+ }
101
+ };
102
+ var Adam = class {
103
+ constructor(beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8) {
104
+ this.beta1 = beta1;
105
+ this.beta2 = beta2;
106
+ this.epsilon = epsilon;
107
+ this.m = 0;
108
+ this.v = 0;
109
+ this.t = 0;
110
+ }
111
+ step(weight, gradient, lr) {
112
+ this.t++;
113
+ this.m = this.beta1 * this.m + (1 - this.beta1) * gradient;
114
+ this.v = this.beta2 * this.v + (1 - this.beta2) * gradient * gradient;
115
+ const mHat = this.m / (1 - Math.pow(this.beta1, this.t));
116
+ const vHat = this.v / (1 - Math.pow(this.beta2, this.t));
117
+ return weight + lr * mHat / (Math.sqrt(vHat) + this.epsilon);
118
+ }
119
+ };
120
+
53
121
  // src/NeuronN.ts
54
- function sigmoid2(x) {
55
- return 1 / (1 + Math.exp(-x));
56
- }
122
+ var defaultOptimizer = () => new SGD();
57
123
  var NeuronN = class {
58
- constructor(nInputs) {
124
+ constructor(nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer) {
59
125
  const limit = Math.sqrt(1 / nInputs);
60
126
  this.weights = Array.from({ length: nInputs }, () => (Math.random() * 2 - 1) * limit);
61
127
  this.bias = 0;
128
+ this.activation = activation;
129
+ this._opts = Array.from({ length: nInputs + 1 }, optimizerFactory);
62
130
  }
63
131
  predict(inputs) {
64
132
  const sum = inputs.reduce((acc, e, i) => acc + e * this.weights[i], this.bias);
65
- return sigmoid2(sum);
133
+ return this.activation.fn(sum);
134
+ }
135
+ // Apply pre-computed gradients via the optimizer.
136
+ // Called internally by Layer / NetworkN / NetworkLSTM during backprop.
137
+ _update(weightGrads, biasGrad, lr) {
138
+ this.weights = this.weights.map((w, i) => this._opts[i].step(w, weightGrads[i], lr));
139
+ this.bias = this._opts[this.weights.length].step(this.bias, biasGrad, lr);
66
140
  }
67
141
  train(inputs, target, lr) {
68
142
  const prediction = this.predict(inputs);
69
143
  const error = target - prediction;
70
- this.weights = this.weights.map((w, i) => w + lr * error * inputs[i]);
71
- this.bias += lr * error;
144
+ this._update(inputs.map((inp) => error * inp), error, lr);
72
145
  }
73
146
  };
74
147
 
75
148
  // src/Layer.ts
149
+ var defaultOptimizer2 = () => new SGD();
76
150
  var Layer = class {
77
- constructor(nNeurons, nInputs) {
78
- this.neurons = Array.from({ length: nNeurons }, () => new NeuronN(nInputs));
151
+ constructor(nNeurons, nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer2) {
152
+ this.neurons = Array.from(
153
+ { length: nNeurons },
154
+ () => new NeuronN(nInputs, activation, optimizerFactory)
155
+ );
79
156
  }
80
157
  predict(inputs) {
81
158
  return this.neurons.map((n) => n.predict(inputs));
@@ -115,12 +192,16 @@ var Network = class {
115
192
  };
116
193
 
117
194
  // src/NetworkN.ts
195
+ var defaultOptimizer3 = () => new SGD();
118
196
  var NetworkN = class {
119
- constructor(structure) {
197
+ constructor(structure, options = {}) {
120
198
  this.structure = structure;
199
+ const nLayers = structure.length - 1;
200
+ const activations = options.activations ?? Array.from({ length: nLayers }, () => sigmoid2);
201
+ const optimizer = options.optimizer ?? defaultOptimizer3;
121
202
  this.layers = [];
122
203
  for (let i = 1; i < structure.length; i++) {
123
- this.layers.push(new Layer(structure[i], structure[i - 1]));
204
+ this.layers.push(new Layer(structure[i], structure[i - 1], activations[i - 1], optimizer));
124
205
  }
125
206
  }
126
207
  predict(inputs) {
@@ -132,17 +213,18 @@ var NetworkN = class {
132
213
  const act = [inputs];
133
214
  for (const layer of this.layers) act.push(layer.predict(act[act.length - 1]));
134
215
  const pred = act[act.length - 1];
135
- let deltas = pred.map((p, i) => (targets[i] - p) * p * (1 - p));
216
+ const outAct = this.layers[this.layers.length - 1].neurons[0].activation;
217
+ let deltas = pred.map((p, i) => (targets[i] - p) * outAct.dfn(p));
136
218
  for (let l = this.layers.length - 1; l >= 0; l--) {
137
219
  const layer = this.layers[l];
138
220
  const layerIn = act[l];
221
+ const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
139
222
  const prevDeltas = layerIn.map((out, j) => {
140
223
  const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
141
- return errProp * out * (1 - out);
224
+ return prevAct ? errProp * prevAct.dfn(out) : errProp;
142
225
  });
143
226
  layer.neurons.forEach((n, k) => {
144
- n.weights = n.weights.map((w, j) => w + lr * deltas[k] * layerIn[j]);
145
- n.bias += lr * deltas[k];
227
+ n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
146
228
  });
147
229
  deltas = prevDeltas;
148
230
  }
@@ -157,13 +239,13 @@ var NetworkN = class {
157
239
  for (let l = this.layers.length - 1; l >= 0; l--) {
158
240
  const layer = this.layers[l];
159
241
  const layerIn = act[l];
242
+ const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
160
243
  const prevDeltas = layerIn.map((out, j) => {
161
244
  const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
162
- return errProp * out * (1 - out);
245
+ return prevAct ? errProp * prevAct.dfn(out) : errProp;
163
246
  });
164
247
  layer.neurons.forEach((n, k) => {
165
- n.weights = n.weights.map((w, j) => w + lr * deltas[k] * layerIn[j]);
166
- n.bias += lr * deltas[k];
248
+ n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
167
249
  });
168
250
  deltas = prevDeltas;
169
251
  }
@@ -174,7 +256,7 @@ var NetworkN = class {
174
256
  function sigmoid3(x) {
175
257
  return 1 / (1 + Math.exp(-x));
176
258
  }
177
- function tanh(x) {
259
+ function tanh2(x) {
178
260
  const e = Math.exp(2 * x);
179
261
  return (e - 1) / (e + 1);
180
262
  }
@@ -223,10 +305,10 @@ var LSTMLayer = class {
223
305
  const zo = this.outputGate.linear(combined);
224
306
  const zf_a = zf.map(sigmoid3);
225
307
  const zi_a = zi.map(sigmoid3);
226
- const zg_a = zg.map(tanh);
308
+ const zg_a = zg.map(tanh2);
227
309
  const zo_a = zo.map(sigmoid3);
228
310
  const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
229
- const h = zo_a.map((o, k) => o * tanh(c[k]));
311
+ const h = zo_a.map((o, k) => o * tanh2(c[k]));
230
312
  this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
231
313
  this.h = h;
232
314
  this.c = c;
@@ -254,7 +336,7 @@ var LSTMLayer = class {
254
336
  for (let t = T - 1; t >= 0; t--) {
255
337
  const s = this._traj[t];
256
338
  const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
257
- const tanh_c = s.c.map(tanh);
339
+ const tanh_c = s.c.map(tanh2);
258
340
  const do_a = dh.map((d, k) => d * tanh_c[k]);
259
341
  const dc = dh.map(
260
342
  (d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
@@ -323,16 +405,19 @@ var LSTMLayer = class {
323
405
  };
324
406
 
325
407
  // src/NetworkLSTM.ts
408
+ var defaultOptimizer4 = () => new SGD();
326
409
  var NetworkLSTM = class {
327
410
  // [T][layer+1][neuron]
328
- constructor(inputSize, hiddenSize, denseStructure) {
411
+ constructor(inputSize, hiddenSize, denseStructure, options = {}) {
329
412
  this.inputSize = inputSize;
330
413
  this.hiddenSize = hiddenSize;
331
414
  this.lstm = new LSTMLayer(inputSize, hiddenSize);
415
+ const activation = options.denseActivation ?? sigmoid2;
416
+ const optimizer = options.optimizer ?? defaultOptimizer4;
332
417
  this.denseLayers = [];
333
418
  const sizes = [hiddenSize, ...denseStructure];
334
419
  for (let i = 1; i < sizes.length; i++) {
335
- this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
420
+ this.denseLayers.push(new Layer(sizes[i], sizes[i - 1], activation, optimizer));
336
421
  }
337
422
  this._acts = [];
338
423
  }
@@ -365,14 +450,16 @@ var NetworkLSTM = class {
365
450
  for (let t = 0; t < T; t++) {
366
451
  const acts = this._acts[t];
367
452
  const pred = acts[acts.length - 1];
368
- let deltas = pred.map((p, i) => (targets[t][i] - p) * p * (1 - p));
453
+ const outAct = this.denseLayers[this.denseLayers.length - 1].neurons[0].activation;
454
+ let deltas = pred.map((p, i) => (targets[t][i] - p) * outAct.dfn(p));
369
455
  for (let l = this.denseLayers.length - 1; l >= 0; l--) {
370
456
  const layer = this.denseLayers[l];
371
457
  const layerIn = acts[l];
372
458
  const grad = denseGrads[l];
459
+ const prevAct = l > 0 ? this.denseLayers[l - 1].neurons[0].activation : null;
373
460
  const prevDeltas = layerIn.map((out, j) => {
374
461
  const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
375
- return l === 0 ? errProp : errProp * out * (1 - out);
462
+ return prevAct ? errProp * prevAct.dfn(out) : errProp;
376
463
  });
377
464
  layer.neurons.forEach((n, k) => {
378
465
  n.weights.forEach((_, j) => {
@@ -388,8 +475,11 @@ var NetworkLSTM = class {
388
475
  const layer = this.denseLayers[l];
389
476
  const grad = denseGrads[l];
390
477
  layer.neurons.forEach((n, k) => {
391
- n.weights = n.weights.map((w, j) => w + lr / T * grad.dW[k][j]);
392
- n.bias += lr / T * grad.db[k];
478
+ n._update(
479
+ grad.dW[k].map((g) => g / T),
480
+ grad.db[k] / T,
481
+ lr
482
+ );
393
483
  });
394
484
  }
395
485
  this.lstm.backprop(dh_seq, lr);
@@ -414,13 +504,48 @@ var NetworkLSTM = class {
414
504
  });
415
505
  }
416
506
  };
507
+
508
+ // src/losses.ts
509
+ function mse(predicted, actual) {
510
+ return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
511
+ }
512
+ function crossEntropy(predicted, actual) {
513
+ const eps = 1e-15;
514
+ return -predicted.reduce((sum, p, i) => {
515
+ const clipped = Math.max(eps, Math.min(1 - eps, p));
516
+ return sum + actual[i] * Math.log(clipped) + (1 - actual[i]) * Math.log(1 - clipped);
517
+ }, 0) / predicted.length;
518
+ }
519
+ function mseDelta(predicted, actual) {
520
+ return actual - predicted;
521
+ }
522
+ function crossEntropyDelta(predicted, actual) {
523
+ return actual - predicted;
524
+ }
525
+ function crossEntropyDeltaRaw(predicted, actual) {
526
+ const eps = 1e-15;
527
+ const p = Math.max(eps, Math.min(1 - eps, predicted));
528
+ return actual / p - (1 - actual) / (1 - p);
529
+ }
417
530
  // Annotate the CommonJS export names for ESM import in node:
418
531
  0 && (module.exports = {
532
+ Adam,
419
533
  LSTMLayer,
420
534
  Layer,
535
+ Momentum,
421
536
  Network,
422
537
  NetworkLSTM,
423
538
  NetworkN,
424
539
  Neuron,
425
- NeuronN
540
+ NeuronN,
541
+ SGD,
542
+ crossEntropy,
543
+ crossEntropyDelta,
544
+ crossEntropyDeltaRaw,
545
+ linear,
546
+ mse,
547
+ mseDelta,
548
+ relu,
549
+ sigmoid,
550
+ tanh
426
551
  });
package/dist/index.mjs CHANGED
@@ -18,32 +18,97 @@ var Neuron = class {
18
18
  }
19
19
  };
20
20
 
21
+ // src/activations.ts
22
+ var sigmoid2 = {
23
+ fn: (x) => 1 / (1 + Math.exp(-x)),
24
+ dfn: (out) => out * (1 - out)
25
+ };
26
+ var tanh = {
27
+ fn: (x) => {
28
+ const e = Math.exp(2 * x);
29
+ return (e - 1) / (e + 1);
30
+ },
31
+ dfn: (out) => 1 - out * out
32
+ };
33
+ var relu = {
34
+ fn: (x) => Math.max(0, x),
35
+ dfn: (out) => out > 0 ? 1 : 0
36
+ };
37
+ var linear = {
38
+ fn: (x) => x,
39
+ dfn: () => 1
40
+ };
41
+
42
+ // src/optimizers.ts
43
+ var SGD = class {
44
+ step(weight, gradient, lr) {
45
+ return weight + lr * gradient;
46
+ }
47
+ };
48
+ var Momentum = class {
49
+ constructor(beta = 0.9) {
50
+ this.beta = beta;
51
+ this.v = 0;
52
+ }
53
+ step(weight, gradient, lr) {
54
+ this.v = this.beta * this.v + lr * gradient;
55
+ return weight + this.v;
56
+ }
57
+ };
58
+ var Adam = class {
59
+ constructor(beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8) {
60
+ this.beta1 = beta1;
61
+ this.beta2 = beta2;
62
+ this.epsilon = epsilon;
63
+ this.m = 0;
64
+ this.v = 0;
65
+ this.t = 0;
66
+ }
67
+ step(weight, gradient, lr) {
68
+ this.t++;
69
+ this.m = this.beta1 * this.m + (1 - this.beta1) * gradient;
70
+ this.v = this.beta2 * this.v + (1 - this.beta2) * gradient * gradient;
71
+ const mHat = this.m / (1 - Math.pow(this.beta1, this.t));
72
+ const vHat = this.v / (1 - Math.pow(this.beta2, this.t));
73
+ return weight + lr * mHat / (Math.sqrt(vHat) + this.epsilon);
74
+ }
75
+ };
76
+
21
77
  // src/NeuronN.ts
22
- function sigmoid2(x) {
23
- return 1 / (1 + Math.exp(-x));
24
- }
78
+ var defaultOptimizer = () => new SGD();
25
79
  var NeuronN = class {
26
- constructor(nInputs) {
80
+ constructor(nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer) {
27
81
  const limit = Math.sqrt(1 / nInputs);
28
82
  this.weights = Array.from({ length: nInputs }, () => (Math.random() * 2 - 1) * limit);
29
83
  this.bias = 0;
84
+ this.activation = activation;
85
+ this._opts = Array.from({ length: nInputs + 1 }, optimizerFactory);
30
86
  }
31
87
  predict(inputs) {
32
88
  const sum = inputs.reduce((acc, e, i) => acc + e * this.weights[i], this.bias);
33
- return sigmoid2(sum);
89
+ return this.activation.fn(sum);
90
+ }
91
+ // Apply pre-computed gradients via the optimizer.
92
+ // Called internally by Layer / NetworkN / NetworkLSTM during backprop.
93
+ _update(weightGrads, biasGrad, lr) {
94
+ this.weights = this.weights.map((w, i) => this._opts[i].step(w, weightGrads[i], lr));
95
+ this.bias = this._opts[this.weights.length].step(this.bias, biasGrad, lr);
34
96
  }
35
97
  train(inputs, target, lr) {
36
98
  const prediction = this.predict(inputs);
37
99
  const error = target - prediction;
38
- this.weights = this.weights.map((w, i) => w + lr * error * inputs[i]);
39
- this.bias += lr * error;
100
+ this._update(inputs.map((inp) => error * inp), error, lr);
40
101
  }
41
102
  };
42
103
 
43
104
  // src/Layer.ts
105
+ var defaultOptimizer2 = () => new SGD();
44
106
  var Layer = class {
45
- constructor(nNeurons, nInputs) {
46
- this.neurons = Array.from({ length: nNeurons }, () => new NeuronN(nInputs));
107
+ constructor(nNeurons, nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer2) {
108
+ this.neurons = Array.from(
109
+ { length: nNeurons },
110
+ () => new NeuronN(nInputs, activation, optimizerFactory)
111
+ );
47
112
  }
48
113
  predict(inputs) {
49
114
  return this.neurons.map((n) => n.predict(inputs));
@@ -83,12 +148,16 @@ var Network = class {
83
148
  };
84
149
 
85
150
  // src/NetworkN.ts
151
+ var defaultOptimizer3 = () => new SGD();
86
152
  var NetworkN = class {
87
- constructor(structure) {
153
+ constructor(structure, options = {}) {
88
154
  this.structure = structure;
155
+ const nLayers = structure.length - 1;
156
+ const activations = options.activations ?? Array.from({ length: nLayers }, () => sigmoid2);
157
+ const optimizer = options.optimizer ?? defaultOptimizer3;
89
158
  this.layers = [];
90
159
  for (let i = 1; i < structure.length; i++) {
91
- this.layers.push(new Layer(structure[i], structure[i - 1]));
160
+ this.layers.push(new Layer(structure[i], structure[i - 1], activations[i - 1], optimizer));
92
161
  }
93
162
  }
94
163
  predict(inputs) {
@@ -100,17 +169,18 @@ var NetworkN = class {
100
169
  const act = [inputs];
101
170
  for (const layer of this.layers) act.push(layer.predict(act[act.length - 1]));
102
171
  const pred = act[act.length - 1];
103
- let deltas = pred.map((p, i) => (targets[i] - p) * p * (1 - p));
172
+ const outAct = this.layers[this.layers.length - 1].neurons[0].activation;
173
+ let deltas = pred.map((p, i) => (targets[i] - p) * outAct.dfn(p));
104
174
  for (let l = this.layers.length - 1; l >= 0; l--) {
105
175
  const layer = this.layers[l];
106
176
  const layerIn = act[l];
177
+ const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
107
178
  const prevDeltas = layerIn.map((out, j) => {
108
179
  const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
109
- return errProp * out * (1 - out);
180
+ return prevAct ? errProp * prevAct.dfn(out) : errProp;
110
181
  });
111
182
  layer.neurons.forEach((n, k) => {
112
- n.weights = n.weights.map((w, j) => w + lr * deltas[k] * layerIn[j]);
113
- n.bias += lr * deltas[k];
183
+ n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
114
184
  });
115
185
  deltas = prevDeltas;
116
186
  }
@@ -125,13 +195,13 @@ var NetworkN = class {
125
195
  for (let l = this.layers.length - 1; l >= 0; l--) {
126
196
  const layer = this.layers[l];
127
197
  const layerIn = act[l];
198
+ const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
128
199
  const prevDeltas = layerIn.map((out, j) => {
129
200
  const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
130
- return errProp * out * (1 - out);
201
+ return prevAct ? errProp * prevAct.dfn(out) : errProp;
131
202
  });
132
203
  layer.neurons.forEach((n, k) => {
133
- n.weights = n.weights.map((w, j) => w + lr * deltas[k] * layerIn[j]);
134
- n.bias += lr * deltas[k];
204
+ n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
135
205
  });
136
206
  deltas = prevDeltas;
137
207
  }
@@ -142,7 +212,7 @@ var NetworkN = class {
142
212
  function sigmoid3(x) {
143
213
  return 1 / (1 + Math.exp(-x));
144
214
  }
145
- function tanh(x) {
215
+ function tanh2(x) {
146
216
  const e = Math.exp(2 * x);
147
217
  return (e - 1) / (e + 1);
148
218
  }
@@ -191,10 +261,10 @@ var LSTMLayer = class {
191
261
  const zo = this.outputGate.linear(combined);
192
262
  const zf_a = zf.map(sigmoid3);
193
263
  const zi_a = zi.map(sigmoid3);
194
- const zg_a = zg.map(tanh);
264
+ const zg_a = zg.map(tanh2);
195
265
  const zo_a = zo.map(sigmoid3);
196
266
  const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
197
- const h = zo_a.map((o, k) => o * tanh(c[k]));
267
+ const h = zo_a.map((o, k) => o * tanh2(c[k]));
198
268
  this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
199
269
  this.h = h;
200
270
  this.c = c;
@@ -222,7 +292,7 @@ var LSTMLayer = class {
222
292
  for (let t = T - 1; t >= 0; t--) {
223
293
  const s = this._traj[t];
224
294
  const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
225
- const tanh_c = s.c.map(tanh);
295
+ const tanh_c = s.c.map(tanh2);
226
296
  const do_a = dh.map((d, k) => d * tanh_c[k]);
227
297
  const dc = dh.map(
228
298
  (d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
@@ -291,16 +361,19 @@ var LSTMLayer = class {
291
361
  };
292
362
 
293
363
  // src/NetworkLSTM.ts
364
+ var defaultOptimizer4 = () => new SGD();
294
365
  var NetworkLSTM = class {
295
366
  // [T][layer+1][neuron]
296
- constructor(inputSize, hiddenSize, denseStructure) {
367
+ constructor(inputSize, hiddenSize, denseStructure, options = {}) {
297
368
  this.inputSize = inputSize;
298
369
  this.hiddenSize = hiddenSize;
299
370
  this.lstm = new LSTMLayer(inputSize, hiddenSize);
371
+ const activation = options.denseActivation ?? sigmoid2;
372
+ const optimizer = options.optimizer ?? defaultOptimizer4;
300
373
  this.denseLayers = [];
301
374
  const sizes = [hiddenSize, ...denseStructure];
302
375
  for (let i = 1; i < sizes.length; i++) {
303
- this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
376
+ this.denseLayers.push(new Layer(sizes[i], sizes[i - 1], activation, optimizer));
304
377
  }
305
378
  this._acts = [];
306
379
  }
@@ -333,14 +406,16 @@ var NetworkLSTM = class {
333
406
  for (let t = 0; t < T; t++) {
334
407
  const acts = this._acts[t];
335
408
  const pred = acts[acts.length - 1];
336
- let deltas = pred.map((p, i) => (targets[t][i] - p) * p * (1 - p));
409
+ const outAct = this.denseLayers[this.denseLayers.length - 1].neurons[0].activation;
410
+ let deltas = pred.map((p, i) => (targets[t][i] - p) * outAct.dfn(p));
337
411
  for (let l = this.denseLayers.length - 1; l >= 0; l--) {
338
412
  const layer = this.denseLayers[l];
339
413
  const layerIn = acts[l];
340
414
  const grad = denseGrads[l];
415
+ const prevAct = l > 0 ? this.denseLayers[l - 1].neurons[0].activation : null;
341
416
  const prevDeltas = layerIn.map((out, j) => {
342
417
  const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
343
- return l === 0 ? errProp : errProp * out * (1 - out);
418
+ return prevAct ? errProp * prevAct.dfn(out) : errProp;
344
419
  });
345
420
  layer.neurons.forEach((n, k) => {
346
421
  n.weights.forEach((_, j) => {
@@ -356,8 +431,11 @@ var NetworkLSTM = class {
356
431
  const layer = this.denseLayers[l];
357
432
  const grad = denseGrads[l];
358
433
  layer.neurons.forEach((n, k) => {
359
- n.weights = n.weights.map((w, j) => w + lr / T * grad.dW[k][j]);
360
- n.bias += lr / T * grad.db[k];
434
+ n._update(
435
+ grad.dW[k].map((g) => g / T),
436
+ grad.db[k] / T,
437
+ lr
438
+ );
361
439
  });
362
440
  }
363
441
  this.lstm.backprop(dh_seq, lr);
@@ -382,12 +460,47 @@ var NetworkLSTM = class {
382
460
  });
383
461
  }
384
462
  };
463
+
464
+ // src/losses.ts
465
+ function mse(predicted, actual) {
466
+ return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
467
+ }
468
+ function crossEntropy(predicted, actual) {
469
+ const eps = 1e-15;
470
+ return -predicted.reduce((sum, p, i) => {
471
+ const clipped = Math.max(eps, Math.min(1 - eps, p));
472
+ return sum + actual[i] * Math.log(clipped) + (1 - actual[i]) * Math.log(1 - clipped);
473
+ }, 0) / predicted.length;
474
+ }
475
+ function mseDelta(predicted, actual) {
476
+ return actual - predicted;
477
+ }
478
+ function crossEntropyDelta(predicted, actual) {
479
+ return actual - predicted;
480
+ }
481
+ function crossEntropyDeltaRaw(predicted, actual) {
482
+ const eps = 1e-15;
483
+ const p = Math.max(eps, Math.min(1 - eps, predicted));
484
+ return actual / p - (1 - actual) / (1 - p);
485
+ }
385
486
  export {
487
+ Adam,
386
488
  LSTMLayer,
387
489
  Layer,
490
+ Momentum,
388
491
  Network,
389
492
  NetworkLSTM,
390
493
  NetworkN,
391
494
  Neuron,
392
- NeuronN
495
+ NeuronN,
496
+ SGD,
497
+ crossEntropy,
498
+ crossEntropyDelta,
499
+ crossEntropyDeltaRaw,
500
+ linear,
501
+ mse,
502
+ mseDelta,
503
+ relu,
504
+ sigmoid2 as sigmoid,
505
+ tanh
393
506
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dniskav/neuron",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Minimal neural network from scratch — neuron, layer, network, backpropagation. No dependencies.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",