@dniskav/neuron 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -10
- package/dist/index.d.mts +55 -5
- package/dist/index.d.ts +55 -5
- package/dist/index.js +155 -30
- package/dist/index.mjs +142 -29
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# @dniskav/neuron
|
|
2
|
-
|
|
3
1
|
[](https://www.npmjs.com/package/@dniskav/neuron)
|
|
4
2
|
[](LICENSE)
|
|
5
3
|
|
|
@@ -7,15 +5,19 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
|
|
|
7
5
|
|
|
8
6
|
## What's inside
|
|
9
7
|
|
|
10
|
-
|
|
|
11
|
-
|
|
8
|
+
| Export | Description |
|
|
9
|
+
|--------|-------------|
|
|
12
10
|
| `Neuron` | Single-input neuron. The simplest possible unit: one weight, one bias. |
|
|
13
|
-
| `NeuronN` | N-input neuron with Xavier initialization and
|
|
11
|
+
| `NeuronN` | N-input neuron with Xavier initialization and configurable activation. |
|
|
14
12
|
| `Layer` | A group of `NeuronN` neurons that share the same inputs. |
|
|
15
13
|
| `Network` | Two-layer network (hidden + output) with backpropagation. |
|
|
16
14
|
| `NetworkN` | Deep network of arbitrary depth. Define your architecture as `[inputs, ...hidden, outputs]`. |
|
|
17
15
|
| `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
|
|
18
16
|
| `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
|
|
17
|
+
| `sigmoid` `relu` `tanh` `linear` | Built-in activation functions. |
|
|
18
|
+
| `SGD` `Momentum` `Adam` | Optimizers. Each instance tracks its own state per weight. |
|
|
19
|
+
| `mse` `crossEntropy` | Loss functions for evaluation and logging. |
|
|
20
|
+
| `mseDelta` `crossEntropyDelta` | Output-layer delta functions for use with `trainWithDeltas`. |
|
|
19
21
|
|
|
20
22
|
## Install
|
|
21
23
|
|
|
@@ -92,12 +94,73 @@ net.train([0.5, 0.3, 0.8], [1, 0], 0.05);
|
|
|
92
94
|
const [out1, out2] = net.predict([0.5, 0.3, 0.8]);
|
|
93
95
|
```
|
|
94
96
|
|
|
97
|
+
### Activations — ReLU, tanh, and more
|
|
98
|
+
|
|
99
|
+
Pass an activation per layer. The last layer typically uses `sigmoid` for binary output or `linear` for regression.
|
|
100
|
+
|
|
101
|
+
```ts
|
|
102
|
+
import { NetworkN, relu, sigmoid } from "@dniskav/neuron";
|
|
103
|
+
|
|
104
|
+
const net = new NetworkN([3, 64, 32, 1], {
|
|
105
|
+
activations: [relu, relu, sigmoid],
|
|
106
|
+
});
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Available: `sigmoid`, `relu`, `tanh`, `linear`.
|
|
110
|
+
|
|
111
|
+
### Optimizers — Adam, Momentum, SGD
|
|
112
|
+
|
|
113
|
+
Pass an optimizer factory. Each weight gets its own instance with independent state.
|
|
114
|
+
|
|
115
|
+
```ts
|
|
116
|
+
import { NetworkN, relu, sigmoid, Adam } from "@dniskav/neuron";
|
|
117
|
+
|
|
118
|
+
const net = new NetworkN([2, 64, 1], {
|
|
119
|
+
activations: [relu, sigmoid],
|
|
120
|
+
optimizer: () => new Adam(), // default: beta1=0.9, beta2=0.999
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Momentum example
|
|
124
|
+
import { Momentum } from "@dniskav/neuron";
|
|
125
|
+
const net2 = new NetworkN([2, 32, 1], {
|
|
126
|
+
optimizer: () => new Momentum(0.9),
|
|
127
|
+
});
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Optimizers also work in `NetworkLSTM` (applied to the dense layers):
|
|
131
|
+
|
|
132
|
+
```ts
|
|
133
|
+
import { NetworkLSTM, relu, Adam } from "@dniskav/neuron";
|
|
134
|
+
|
|
135
|
+
const net = new NetworkLSTM(1, 8, [4, 1], {
|
|
136
|
+
denseActivation: relu,
|
|
137
|
+
optimizer: () => new Adam(0.001),
|
|
138
|
+
});
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Loss utilities
|
|
142
|
+
|
|
143
|
+
```ts
|
|
144
|
+
import { mse, crossEntropy } from "@dniskav/neuron";
|
|
145
|
+
|
|
146
|
+
const predicted = net.predict([0.5, 0.3]);
|
|
147
|
+
console.log(mse(predicted, [1, 0]));
|
|
148
|
+
console.log(crossEntropy(predicted, [1, 0]));
|
|
149
|
+
```
|
|
150
|
+
|
|
95
151
|
### trainWithDeltas — custom loss / physics-based gradients
|
|
96
152
|
|
|
97
153
|
`NetworkN` also exposes `trainWithDeltas` for when you compute your own output-layer deltas (e.g., from a physics simulation or a custom loss function):
|
|
98
154
|
|
|
99
155
|
```ts
|
|
100
|
-
|
|
156
|
+
import { NetworkN, mseDelta } from "@dniskav/neuron";
|
|
157
|
+
|
|
158
|
+
const net = new NetworkN([3, 16, 2]);
|
|
159
|
+
const pred = net.predict(inputs);
|
|
160
|
+
|
|
161
|
+
// Compute deltas manually using a helper, or from any external signal
|
|
162
|
+
const deltas = pred.map((p, i) => mseDelta(p, targets[i]));
|
|
163
|
+
net.trainWithDeltas(inputs, deltas, 0.01);
|
|
101
164
|
```
|
|
102
165
|
|
|
103
166
|
### NetworkLSTM — recurrent network with memory
|
|
@@ -143,17 +206,19 @@ The network learns to count steps using its hidden state — no external counter
|
|
|
143
206
|
|
|
144
207
|
## How it works
|
|
145
208
|
|
|
146
|
-
|
|
209
|
+
Each class applies an **activation function** to the weighted sum of inputs and uses **gradient descent** to update weights:
|
|
147
210
|
|
|
148
211
|
```
|
|
149
|
-
weight += lr ×
|
|
150
|
-
bias += lr ×
|
|
212
|
+
weight += lr × delta × input
|
|
213
|
+
bias += lr × delta
|
|
151
214
|
```
|
|
152
215
|
|
|
153
|
-
`NetworkN` implements full **backpropagation** across all layers, propagating deltas from the output back to the first layer using the chain rule.
|
|
216
|
+
`NetworkN` implements full **backpropagation** across all layers, propagating deltas from the output back to the first layer using the chain rule. The derivative of the chosen activation is applied at each layer.
|
|
154
217
|
|
|
155
218
|
`NeuronN` uses simplified **Xavier initialization** — weights start in `[-√(1/n), +√(1/n)]` — so gradients flow well from the start of training.
|
|
156
219
|
|
|
220
|
+
When an **optimizer** is used (e.g., Adam), the raw gradient is passed to the optimizer instead of being applied directly. Each weight maintains its own optimizer state (velocity, moments).
|
|
221
|
+
|
|
157
222
|
## Build
|
|
158
223
|
|
|
159
224
|
```bash
|
|
@@ -165,6 +230,11 @@ npm run dev # watch mode
|
|
|
165
230
|
|
|
166
231
|
If you are an AI agent or LLM working with this codebase, read [AGENTS.md](AGENTS.md) first. It contains the full class hierarchy, design constraints, and what this library does not do.
|
|
167
232
|
|
|
233
|
+
## Possible improvements
|
|
234
|
+
|
|
235
|
+
1. **Support for batches** in training to improve efficiency.
|
|
236
|
+
2. **Improve documentation** with more advanced examples and use cases.
|
|
237
|
+
|
|
168
238
|
## License
|
|
169
239
|
|
|
170
240
|
MIT
|
package/dist/index.d.mts
CHANGED
|
@@ -6,17 +6,53 @@ declare class Neuron {
|
|
|
6
6
|
train(input: number, target: number, lr: number): void;
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
interface Activation {
|
|
10
|
+
fn(x: number): number;
|
|
11
|
+
dfn(out: number): number;
|
|
12
|
+
}
|
|
13
|
+
declare const sigmoid: Activation;
|
|
14
|
+
declare const tanh: Activation;
|
|
15
|
+
declare const relu: Activation;
|
|
16
|
+
declare const linear: Activation;
|
|
17
|
+
|
|
18
|
+
interface Optimizer {
|
|
19
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
20
|
+
}
|
|
21
|
+
type OptimizerFactory = () => Optimizer;
|
|
22
|
+
declare class SGD implements Optimizer {
|
|
23
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
24
|
+
}
|
|
25
|
+
declare class Momentum implements Optimizer {
|
|
26
|
+
readonly beta: number;
|
|
27
|
+
private v;
|
|
28
|
+
constructor(beta?: number);
|
|
29
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
30
|
+
}
|
|
31
|
+
declare class Adam implements Optimizer {
|
|
32
|
+
readonly beta1: number;
|
|
33
|
+
readonly beta2: number;
|
|
34
|
+
readonly epsilon: number;
|
|
35
|
+
private m;
|
|
36
|
+
private v;
|
|
37
|
+
private t;
|
|
38
|
+
constructor(beta1?: number, beta2?: number, epsilon?: number);
|
|
39
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
40
|
+
}
|
|
41
|
+
|
|
9
42
|
declare class NeuronN {
|
|
10
43
|
weights: number[];
|
|
11
44
|
bias: number;
|
|
12
|
-
|
|
45
|
+
readonly activation: Activation;
|
|
46
|
+
private _opts;
|
|
47
|
+
constructor(nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
13
48
|
predict(inputs: number[]): number;
|
|
49
|
+
_update(weightGrads: number[], biasGrad: number, lr: number): void;
|
|
14
50
|
train(inputs: number[], target: number, lr: number): void;
|
|
15
51
|
}
|
|
16
52
|
|
|
17
53
|
declare class Layer {
|
|
18
54
|
neurons: NeuronN[];
|
|
19
|
-
constructor(nNeurons: number, nInputs: number);
|
|
55
|
+
constructor(nNeurons: number, nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
20
56
|
predict(inputs: number[]): number[];
|
|
21
57
|
}
|
|
22
58
|
|
|
@@ -28,10 +64,14 @@ declare class Network {
|
|
|
28
64
|
train(inputs: number[], target: number, lr: number): number;
|
|
29
65
|
}
|
|
30
66
|
|
|
67
|
+
interface NetworkNOptions {
|
|
68
|
+
activations?: Activation[];
|
|
69
|
+
optimizer?: OptimizerFactory;
|
|
70
|
+
}
|
|
31
71
|
declare class NetworkN {
|
|
32
72
|
readonly structure: number[];
|
|
33
73
|
layers: Layer[];
|
|
34
|
-
constructor(structure: number[]);
|
|
74
|
+
constructor(structure: number[], options?: NetworkNOptions);
|
|
35
75
|
predict(inputs: number[]): number[];
|
|
36
76
|
train(inputs: number[], targets: number[], lr: number): number;
|
|
37
77
|
trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
|
|
@@ -78,13 +118,17 @@ declare class LSTMLayer {
|
|
|
78
118
|
setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
|
|
79
119
|
}
|
|
80
120
|
|
|
121
|
+
interface NetworkLSTMOptions {
|
|
122
|
+
denseActivation?: Activation;
|
|
123
|
+
optimizer?: OptimizerFactory;
|
|
124
|
+
}
|
|
81
125
|
declare class NetworkLSTM {
|
|
82
126
|
readonly inputSize: number;
|
|
83
127
|
readonly hiddenSize: number;
|
|
84
128
|
lstm: LSTMLayer;
|
|
85
129
|
denseLayers: Layer[];
|
|
86
130
|
private _acts;
|
|
87
|
-
constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
|
|
131
|
+
constructor(inputSize: number, hiddenSize: number, denseStructure: number[], options?: NetworkLSTMOptions);
|
|
88
132
|
resetState(): void;
|
|
89
133
|
predict(inputs: number[]): number[];
|
|
90
134
|
train(targets: number[][], lr: number): void;
|
|
@@ -115,4 +159,10 @@ declare class NetworkLSTM {
|
|
|
115
159
|
setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
|
|
116
160
|
}
|
|
117
161
|
|
|
118
|
-
|
|
162
|
+
declare function mse(predicted: number[], actual: number[]): number;
|
|
163
|
+
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
164
|
+
declare function mseDelta(predicted: number, actual: number): number;
|
|
165
|
+
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
166
|
+
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
167
|
+
|
|
168
|
+
export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, linear, mse, mseDelta, relu, sigmoid, tanh };
|
package/dist/index.d.ts
CHANGED
|
@@ -6,17 +6,53 @@ declare class Neuron {
|
|
|
6
6
|
train(input: number, target: number, lr: number): void;
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
interface Activation {
|
|
10
|
+
fn(x: number): number;
|
|
11
|
+
dfn(out: number): number;
|
|
12
|
+
}
|
|
13
|
+
declare const sigmoid: Activation;
|
|
14
|
+
declare const tanh: Activation;
|
|
15
|
+
declare const relu: Activation;
|
|
16
|
+
declare const linear: Activation;
|
|
17
|
+
|
|
18
|
+
interface Optimizer {
|
|
19
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
20
|
+
}
|
|
21
|
+
type OptimizerFactory = () => Optimizer;
|
|
22
|
+
declare class SGD implements Optimizer {
|
|
23
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
24
|
+
}
|
|
25
|
+
declare class Momentum implements Optimizer {
|
|
26
|
+
readonly beta: number;
|
|
27
|
+
private v;
|
|
28
|
+
constructor(beta?: number);
|
|
29
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
30
|
+
}
|
|
31
|
+
declare class Adam implements Optimizer {
|
|
32
|
+
readonly beta1: number;
|
|
33
|
+
readonly beta2: number;
|
|
34
|
+
readonly epsilon: number;
|
|
35
|
+
private m;
|
|
36
|
+
private v;
|
|
37
|
+
private t;
|
|
38
|
+
constructor(beta1?: number, beta2?: number, epsilon?: number);
|
|
39
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
40
|
+
}
|
|
41
|
+
|
|
9
42
|
declare class NeuronN {
|
|
10
43
|
weights: number[];
|
|
11
44
|
bias: number;
|
|
12
|
-
|
|
45
|
+
readonly activation: Activation;
|
|
46
|
+
private _opts;
|
|
47
|
+
constructor(nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
13
48
|
predict(inputs: number[]): number;
|
|
49
|
+
_update(weightGrads: number[], biasGrad: number, lr: number): void;
|
|
14
50
|
train(inputs: number[], target: number, lr: number): void;
|
|
15
51
|
}
|
|
16
52
|
|
|
17
53
|
declare class Layer {
|
|
18
54
|
neurons: NeuronN[];
|
|
19
|
-
constructor(nNeurons: number, nInputs: number);
|
|
55
|
+
constructor(nNeurons: number, nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
20
56
|
predict(inputs: number[]): number[];
|
|
21
57
|
}
|
|
22
58
|
|
|
@@ -28,10 +64,14 @@ declare class Network {
|
|
|
28
64
|
train(inputs: number[], target: number, lr: number): number;
|
|
29
65
|
}
|
|
30
66
|
|
|
67
|
+
interface NetworkNOptions {
|
|
68
|
+
activations?: Activation[];
|
|
69
|
+
optimizer?: OptimizerFactory;
|
|
70
|
+
}
|
|
31
71
|
declare class NetworkN {
|
|
32
72
|
readonly structure: number[];
|
|
33
73
|
layers: Layer[];
|
|
34
|
-
constructor(structure: number[]);
|
|
74
|
+
constructor(structure: number[], options?: NetworkNOptions);
|
|
35
75
|
predict(inputs: number[]): number[];
|
|
36
76
|
train(inputs: number[], targets: number[], lr: number): number;
|
|
37
77
|
trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
|
|
@@ -78,13 +118,17 @@ declare class LSTMLayer {
|
|
|
78
118
|
setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
|
|
79
119
|
}
|
|
80
120
|
|
|
121
|
+
interface NetworkLSTMOptions {
|
|
122
|
+
denseActivation?: Activation;
|
|
123
|
+
optimizer?: OptimizerFactory;
|
|
124
|
+
}
|
|
81
125
|
declare class NetworkLSTM {
|
|
82
126
|
readonly inputSize: number;
|
|
83
127
|
readonly hiddenSize: number;
|
|
84
128
|
lstm: LSTMLayer;
|
|
85
129
|
denseLayers: Layer[];
|
|
86
130
|
private _acts;
|
|
87
|
-
constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
|
|
131
|
+
constructor(inputSize: number, hiddenSize: number, denseStructure: number[], options?: NetworkLSTMOptions);
|
|
88
132
|
resetState(): void;
|
|
89
133
|
predict(inputs: number[]): number[];
|
|
90
134
|
train(targets: number[][], lr: number): void;
|
|
@@ -115,4 +159,10 @@ declare class NetworkLSTM {
|
|
|
115
159
|
setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
|
|
116
160
|
}
|
|
117
161
|
|
|
118
|
-
|
|
162
|
+
declare function mse(predicted: number[], actual: number[]): number;
|
|
163
|
+
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
164
|
+
declare function mseDelta(predicted: number, actual: number): number;
|
|
165
|
+
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
166
|
+
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
167
|
+
|
|
168
|
+
export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, linear, mse, mseDelta, relu, sigmoid, tanh };
|
package/dist/index.js
CHANGED
|
@@ -20,13 +20,25 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
Adam: () => Adam,
|
|
23
24
|
LSTMLayer: () => LSTMLayer,
|
|
24
25
|
Layer: () => Layer,
|
|
26
|
+
Momentum: () => Momentum,
|
|
25
27
|
Network: () => Network,
|
|
26
28
|
NetworkLSTM: () => NetworkLSTM,
|
|
27
29
|
NetworkN: () => NetworkN,
|
|
28
30
|
Neuron: () => Neuron,
|
|
29
|
-
NeuronN: () => NeuronN
|
|
31
|
+
NeuronN: () => NeuronN,
|
|
32
|
+
SGD: () => SGD,
|
|
33
|
+
crossEntropy: () => crossEntropy,
|
|
34
|
+
crossEntropyDelta: () => crossEntropyDelta,
|
|
35
|
+
crossEntropyDeltaRaw: () => crossEntropyDeltaRaw,
|
|
36
|
+
linear: () => linear,
|
|
37
|
+
mse: () => mse,
|
|
38
|
+
mseDelta: () => mseDelta,
|
|
39
|
+
relu: () => relu,
|
|
40
|
+
sigmoid: () => sigmoid2,
|
|
41
|
+
tanh: () => tanh
|
|
30
42
|
});
|
|
31
43
|
module.exports = __toCommonJS(index_exports);
|
|
32
44
|
|
|
@@ -50,32 +62,97 @@ var Neuron = class {
|
|
|
50
62
|
}
|
|
51
63
|
};
|
|
52
64
|
|
|
65
|
+
// src/activations.ts
|
|
66
|
+
var sigmoid2 = {
|
|
67
|
+
fn: (x) => 1 / (1 + Math.exp(-x)),
|
|
68
|
+
dfn: (out) => out * (1 - out)
|
|
69
|
+
};
|
|
70
|
+
var tanh = {
|
|
71
|
+
fn: (x) => {
|
|
72
|
+
const e = Math.exp(2 * x);
|
|
73
|
+
return (e - 1) / (e + 1);
|
|
74
|
+
},
|
|
75
|
+
dfn: (out) => 1 - out * out
|
|
76
|
+
};
|
|
77
|
+
var relu = {
|
|
78
|
+
fn: (x) => Math.max(0, x),
|
|
79
|
+
dfn: (out) => out > 0 ? 1 : 0
|
|
80
|
+
};
|
|
81
|
+
var linear = {
|
|
82
|
+
fn: (x) => x,
|
|
83
|
+
dfn: () => 1
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// src/optimizers.ts
|
|
87
|
+
var SGD = class {
|
|
88
|
+
step(weight, gradient, lr) {
|
|
89
|
+
return weight + lr * gradient;
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
var Momentum = class {
|
|
93
|
+
constructor(beta = 0.9) {
|
|
94
|
+
this.beta = beta;
|
|
95
|
+
this.v = 0;
|
|
96
|
+
}
|
|
97
|
+
step(weight, gradient, lr) {
|
|
98
|
+
this.v = this.beta * this.v + lr * gradient;
|
|
99
|
+
return weight + this.v;
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
var Adam = class {
|
|
103
|
+
constructor(beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8) {
|
|
104
|
+
this.beta1 = beta1;
|
|
105
|
+
this.beta2 = beta2;
|
|
106
|
+
this.epsilon = epsilon;
|
|
107
|
+
this.m = 0;
|
|
108
|
+
this.v = 0;
|
|
109
|
+
this.t = 0;
|
|
110
|
+
}
|
|
111
|
+
step(weight, gradient, lr) {
|
|
112
|
+
this.t++;
|
|
113
|
+
this.m = this.beta1 * this.m + (1 - this.beta1) * gradient;
|
|
114
|
+
this.v = this.beta2 * this.v + (1 - this.beta2) * gradient * gradient;
|
|
115
|
+
const mHat = this.m / (1 - Math.pow(this.beta1, this.t));
|
|
116
|
+
const vHat = this.v / (1 - Math.pow(this.beta2, this.t));
|
|
117
|
+
return weight + lr * mHat / (Math.sqrt(vHat) + this.epsilon);
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
|
|
53
121
|
// src/NeuronN.ts
|
|
54
|
-
|
|
55
|
-
return 1 / (1 + Math.exp(-x));
|
|
56
|
-
}
|
|
122
|
+
var defaultOptimizer = () => new SGD();
|
|
57
123
|
var NeuronN = class {
|
|
58
|
-
constructor(nInputs) {
|
|
124
|
+
constructor(nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer) {
|
|
59
125
|
const limit = Math.sqrt(1 / nInputs);
|
|
60
126
|
this.weights = Array.from({ length: nInputs }, () => (Math.random() * 2 - 1) * limit);
|
|
61
127
|
this.bias = 0;
|
|
128
|
+
this.activation = activation;
|
|
129
|
+
this._opts = Array.from({ length: nInputs + 1 }, optimizerFactory);
|
|
62
130
|
}
|
|
63
131
|
predict(inputs) {
|
|
64
132
|
const sum = inputs.reduce((acc, e, i) => acc + e * this.weights[i], this.bias);
|
|
65
|
-
return
|
|
133
|
+
return this.activation.fn(sum);
|
|
134
|
+
}
|
|
135
|
+
// Apply pre-computed gradients via the optimizer.
|
|
136
|
+
// Called internally by Layer / NetworkN / NetworkLSTM during backprop.
|
|
137
|
+
_update(weightGrads, biasGrad, lr) {
|
|
138
|
+
this.weights = this.weights.map((w, i) => this._opts[i].step(w, weightGrads[i], lr));
|
|
139
|
+
this.bias = this._opts[this.weights.length].step(this.bias, biasGrad, lr);
|
|
66
140
|
}
|
|
67
141
|
train(inputs, target, lr) {
|
|
68
142
|
const prediction = this.predict(inputs);
|
|
69
143
|
const error = target - prediction;
|
|
70
|
-
this.
|
|
71
|
-
this.bias += lr * error;
|
|
144
|
+
this._update(inputs.map((inp) => error * inp), error, lr);
|
|
72
145
|
}
|
|
73
146
|
};
|
|
74
147
|
|
|
75
148
|
// src/Layer.ts
|
|
149
|
+
var defaultOptimizer2 = () => new SGD();
|
|
76
150
|
var Layer = class {
|
|
77
|
-
constructor(nNeurons, nInputs) {
|
|
78
|
-
this.neurons = Array.from(
|
|
151
|
+
constructor(nNeurons, nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer2) {
|
|
152
|
+
this.neurons = Array.from(
|
|
153
|
+
{ length: nNeurons },
|
|
154
|
+
() => new NeuronN(nInputs, activation, optimizerFactory)
|
|
155
|
+
);
|
|
79
156
|
}
|
|
80
157
|
predict(inputs) {
|
|
81
158
|
return this.neurons.map((n) => n.predict(inputs));
|
|
@@ -115,12 +192,16 @@ var Network = class {
|
|
|
115
192
|
};
|
|
116
193
|
|
|
117
194
|
// src/NetworkN.ts
|
|
195
|
+
var defaultOptimizer3 = () => new SGD();
|
|
118
196
|
var NetworkN = class {
|
|
119
|
-
constructor(structure) {
|
|
197
|
+
constructor(structure, options = {}) {
|
|
120
198
|
this.structure = structure;
|
|
199
|
+
const nLayers = structure.length - 1;
|
|
200
|
+
const activations = options.activations ?? Array.from({ length: nLayers }, () => sigmoid2);
|
|
201
|
+
const optimizer = options.optimizer ?? defaultOptimizer3;
|
|
121
202
|
this.layers = [];
|
|
122
203
|
for (let i = 1; i < structure.length; i++) {
|
|
123
|
-
this.layers.push(new Layer(structure[i], structure[i - 1]));
|
|
204
|
+
this.layers.push(new Layer(structure[i], structure[i - 1], activations[i - 1], optimizer));
|
|
124
205
|
}
|
|
125
206
|
}
|
|
126
207
|
predict(inputs) {
|
|
@@ -132,17 +213,18 @@ var NetworkN = class {
|
|
|
132
213
|
const act = [inputs];
|
|
133
214
|
for (const layer of this.layers) act.push(layer.predict(act[act.length - 1]));
|
|
134
215
|
const pred = act[act.length - 1];
|
|
135
|
-
|
|
216
|
+
const outAct = this.layers[this.layers.length - 1].neurons[0].activation;
|
|
217
|
+
let deltas = pred.map((p, i) => (targets[i] - p) * outAct.dfn(p));
|
|
136
218
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
137
219
|
const layer = this.layers[l];
|
|
138
220
|
const layerIn = act[l];
|
|
221
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
139
222
|
const prevDeltas = layerIn.map((out, j) => {
|
|
140
223
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
141
|
-
return
|
|
224
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
142
225
|
});
|
|
143
226
|
layer.neurons.forEach((n, k) => {
|
|
144
|
-
n.
|
|
145
|
-
n.bias += lr * deltas[k];
|
|
227
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
146
228
|
});
|
|
147
229
|
deltas = prevDeltas;
|
|
148
230
|
}
|
|
@@ -157,13 +239,13 @@ var NetworkN = class {
|
|
|
157
239
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
158
240
|
const layer = this.layers[l];
|
|
159
241
|
const layerIn = act[l];
|
|
242
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
160
243
|
const prevDeltas = layerIn.map((out, j) => {
|
|
161
244
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
162
|
-
return
|
|
245
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
163
246
|
});
|
|
164
247
|
layer.neurons.forEach((n, k) => {
|
|
165
|
-
n.
|
|
166
|
-
n.bias += lr * deltas[k];
|
|
248
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
167
249
|
});
|
|
168
250
|
deltas = prevDeltas;
|
|
169
251
|
}
|
|
@@ -174,7 +256,7 @@ var NetworkN = class {
|
|
|
174
256
|
function sigmoid3(x) {
|
|
175
257
|
return 1 / (1 + Math.exp(-x));
|
|
176
258
|
}
|
|
177
|
-
function
|
|
259
|
+
function tanh2(x) {
|
|
178
260
|
const e = Math.exp(2 * x);
|
|
179
261
|
return (e - 1) / (e + 1);
|
|
180
262
|
}
|
|
@@ -223,10 +305,10 @@ var LSTMLayer = class {
|
|
|
223
305
|
const zo = this.outputGate.linear(combined);
|
|
224
306
|
const zf_a = zf.map(sigmoid3);
|
|
225
307
|
const zi_a = zi.map(sigmoid3);
|
|
226
|
-
const zg_a = zg.map(
|
|
308
|
+
const zg_a = zg.map(tanh2);
|
|
227
309
|
const zo_a = zo.map(sigmoid3);
|
|
228
310
|
const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
|
|
229
|
-
const h = zo_a.map((o, k) => o *
|
|
311
|
+
const h = zo_a.map((o, k) => o * tanh2(c[k]));
|
|
230
312
|
this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
|
|
231
313
|
this.h = h;
|
|
232
314
|
this.c = c;
|
|
@@ -254,7 +336,7 @@ var LSTMLayer = class {
|
|
|
254
336
|
for (let t = T - 1; t >= 0; t--) {
|
|
255
337
|
const s = this._traj[t];
|
|
256
338
|
const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
|
|
257
|
-
const tanh_c = s.c.map(
|
|
339
|
+
const tanh_c = s.c.map(tanh2);
|
|
258
340
|
const do_a = dh.map((d, k) => d * tanh_c[k]);
|
|
259
341
|
const dc = dh.map(
|
|
260
342
|
(d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
|
|
@@ -323,16 +405,19 @@ var LSTMLayer = class {
|
|
|
323
405
|
};
|
|
324
406
|
|
|
325
407
|
// src/NetworkLSTM.ts
|
|
408
|
+
var defaultOptimizer4 = () => new SGD();
|
|
326
409
|
var NetworkLSTM = class {
|
|
327
410
|
// [T][layer+1][neuron]
|
|
328
|
-
constructor(inputSize, hiddenSize, denseStructure) {
|
|
411
|
+
constructor(inputSize, hiddenSize, denseStructure, options = {}) {
|
|
329
412
|
this.inputSize = inputSize;
|
|
330
413
|
this.hiddenSize = hiddenSize;
|
|
331
414
|
this.lstm = new LSTMLayer(inputSize, hiddenSize);
|
|
415
|
+
const activation = options.denseActivation ?? sigmoid2;
|
|
416
|
+
const optimizer = options.optimizer ?? defaultOptimizer4;
|
|
332
417
|
this.denseLayers = [];
|
|
333
418
|
const sizes = [hiddenSize, ...denseStructure];
|
|
334
419
|
for (let i = 1; i < sizes.length; i++) {
|
|
335
|
-
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
|
|
420
|
+
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1], activation, optimizer));
|
|
336
421
|
}
|
|
337
422
|
this._acts = [];
|
|
338
423
|
}
|
|
@@ -365,14 +450,16 @@ var NetworkLSTM = class {
|
|
|
365
450
|
for (let t = 0; t < T; t++) {
|
|
366
451
|
const acts = this._acts[t];
|
|
367
452
|
const pred = acts[acts.length - 1];
|
|
368
|
-
|
|
453
|
+
const outAct = this.denseLayers[this.denseLayers.length - 1].neurons[0].activation;
|
|
454
|
+
let deltas = pred.map((p, i) => (targets[t][i] - p) * outAct.dfn(p));
|
|
369
455
|
for (let l = this.denseLayers.length - 1; l >= 0; l--) {
|
|
370
456
|
const layer = this.denseLayers[l];
|
|
371
457
|
const layerIn = acts[l];
|
|
372
458
|
const grad = denseGrads[l];
|
|
459
|
+
const prevAct = l > 0 ? this.denseLayers[l - 1].neurons[0].activation : null;
|
|
373
460
|
const prevDeltas = layerIn.map((out, j) => {
|
|
374
461
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
375
|
-
return
|
|
462
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
376
463
|
});
|
|
377
464
|
layer.neurons.forEach((n, k) => {
|
|
378
465
|
n.weights.forEach((_, j) => {
|
|
@@ -388,8 +475,11 @@ var NetworkLSTM = class {
|
|
|
388
475
|
const layer = this.denseLayers[l];
|
|
389
476
|
const grad = denseGrads[l];
|
|
390
477
|
layer.neurons.forEach((n, k) => {
|
|
391
|
-
n.
|
|
392
|
-
|
|
478
|
+
n._update(
|
|
479
|
+
grad.dW[k].map((g) => g / T),
|
|
480
|
+
grad.db[k] / T,
|
|
481
|
+
lr
|
|
482
|
+
);
|
|
393
483
|
});
|
|
394
484
|
}
|
|
395
485
|
this.lstm.backprop(dh_seq, lr);
|
|
@@ -414,13 +504,48 @@ var NetworkLSTM = class {
|
|
|
414
504
|
});
|
|
415
505
|
}
|
|
416
506
|
};
|
|
507
|
+
|
|
508
|
+
// src/losses.ts
|
|
509
|
+
function mse(predicted, actual) {
|
|
510
|
+
return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
|
|
511
|
+
}
|
|
512
|
+
function crossEntropy(predicted, actual) {
|
|
513
|
+
const eps = 1e-15;
|
|
514
|
+
return -predicted.reduce((sum, p, i) => {
|
|
515
|
+
const clipped = Math.max(eps, Math.min(1 - eps, p));
|
|
516
|
+
return sum + actual[i] * Math.log(clipped) + (1 - actual[i]) * Math.log(1 - clipped);
|
|
517
|
+
}, 0) / predicted.length;
|
|
518
|
+
}
|
|
519
|
+
function mseDelta(predicted, actual) {
|
|
520
|
+
return actual - predicted;
|
|
521
|
+
}
|
|
522
|
+
function crossEntropyDelta(predicted, actual) {
|
|
523
|
+
return actual - predicted;
|
|
524
|
+
}
|
|
525
|
+
function crossEntropyDeltaRaw(predicted, actual) {
|
|
526
|
+
const eps = 1e-15;
|
|
527
|
+
const p = Math.max(eps, Math.min(1 - eps, predicted));
|
|
528
|
+
return actual / p - (1 - actual) / (1 - p);
|
|
529
|
+
}
|
|
417
530
|
// Annotate the CommonJS export names for ESM import in node:
|
|
418
531
|
0 && (module.exports = {
|
|
532
|
+
Adam,
|
|
419
533
|
LSTMLayer,
|
|
420
534
|
Layer,
|
|
535
|
+
Momentum,
|
|
421
536
|
Network,
|
|
422
537
|
NetworkLSTM,
|
|
423
538
|
NetworkN,
|
|
424
539
|
Neuron,
|
|
425
|
-
NeuronN
|
|
540
|
+
NeuronN,
|
|
541
|
+
SGD,
|
|
542
|
+
crossEntropy,
|
|
543
|
+
crossEntropyDelta,
|
|
544
|
+
crossEntropyDeltaRaw,
|
|
545
|
+
linear,
|
|
546
|
+
mse,
|
|
547
|
+
mseDelta,
|
|
548
|
+
relu,
|
|
549
|
+
sigmoid,
|
|
550
|
+
tanh
|
|
426
551
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -18,32 +18,97 @@ var Neuron = class {
|
|
|
18
18
|
}
|
|
19
19
|
};
|
|
20
20
|
|
|
21
|
+
// src/activations.ts
|
|
22
|
+
var sigmoid2 = {
|
|
23
|
+
fn: (x) => 1 / (1 + Math.exp(-x)),
|
|
24
|
+
dfn: (out) => out * (1 - out)
|
|
25
|
+
};
|
|
26
|
+
var tanh = {
|
|
27
|
+
fn: (x) => {
|
|
28
|
+
const e = Math.exp(2 * x);
|
|
29
|
+
return (e - 1) / (e + 1);
|
|
30
|
+
},
|
|
31
|
+
dfn: (out) => 1 - out * out
|
|
32
|
+
};
|
|
33
|
+
var relu = {
|
|
34
|
+
fn: (x) => Math.max(0, x),
|
|
35
|
+
dfn: (out) => out > 0 ? 1 : 0
|
|
36
|
+
};
|
|
37
|
+
var linear = {
|
|
38
|
+
fn: (x) => x,
|
|
39
|
+
dfn: () => 1
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
// src/optimizers.ts
|
|
43
|
+
var SGD = class {
|
|
44
|
+
step(weight, gradient, lr) {
|
|
45
|
+
return weight + lr * gradient;
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
var Momentum = class {
|
|
49
|
+
constructor(beta = 0.9) {
|
|
50
|
+
this.beta = beta;
|
|
51
|
+
this.v = 0;
|
|
52
|
+
}
|
|
53
|
+
step(weight, gradient, lr) {
|
|
54
|
+
this.v = this.beta * this.v + lr * gradient;
|
|
55
|
+
return weight + this.v;
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
var Adam = class {
|
|
59
|
+
constructor(beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8) {
|
|
60
|
+
this.beta1 = beta1;
|
|
61
|
+
this.beta2 = beta2;
|
|
62
|
+
this.epsilon = epsilon;
|
|
63
|
+
this.m = 0;
|
|
64
|
+
this.v = 0;
|
|
65
|
+
this.t = 0;
|
|
66
|
+
}
|
|
67
|
+
step(weight, gradient, lr) {
|
|
68
|
+
this.t++;
|
|
69
|
+
this.m = this.beta1 * this.m + (1 - this.beta1) * gradient;
|
|
70
|
+
this.v = this.beta2 * this.v + (1 - this.beta2) * gradient * gradient;
|
|
71
|
+
const mHat = this.m / (1 - Math.pow(this.beta1, this.t));
|
|
72
|
+
const vHat = this.v / (1 - Math.pow(this.beta2, this.t));
|
|
73
|
+
return weight + lr * mHat / (Math.sqrt(vHat) + this.epsilon);
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
|
|
21
77
|
// src/NeuronN.ts
|
|
22
|
-
|
|
23
|
-
return 1 / (1 + Math.exp(-x));
|
|
24
|
-
}
|
|
78
|
+
var defaultOptimizer = () => new SGD();
|
|
25
79
|
var NeuronN = class {
|
|
26
|
-
constructor(nInputs) {
|
|
80
|
+
constructor(nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer) {
|
|
27
81
|
const limit = Math.sqrt(1 / nInputs);
|
|
28
82
|
this.weights = Array.from({ length: nInputs }, () => (Math.random() * 2 - 1) * limit);
|
|
29
83
|
this.bias = 0;
|
|
84
|
+
this.activation = activation;
|
|
85
|
+
this._opts = Array.from({ length: nInputs + 1 }, optimizerFactory);
|
|
30
86
|
}
|
|
31
87
|
predict(inputs) {
|
|
32
88
|
const sum = inputs.reduce((acc, e, i) => acc + e * this.weights[i], this.bias);
|
|
33
|
-
return
|
|
89
|
+
return this.activation.fn(sum);
|
|
90
|
+
}
|
|
91
|
+
// Apply pre-computed gradients via the optimizer.
|
|
92
|
+
// Called internally by Layer / NetworkN / NetworkLSTM during backprop.
|
|
93
|
+
_update(weightGrads, biasGrad, lr) {
|
|
94
|
+
this.weights = this.weights.map((w, i) => this._opts[i].step(w, weightGrads[i], lr));
|
|
95
|
+
this.bias = this._opts[this.weights.length].step(this.bias, biasGrad, lr);
|
|
34
96
|
}
|
|
35
97
|
train(inputs, target, lr) {
|
|
36
98
|
const prediction = this.predict(inputs);
|
|
37
99
|
const error = target - prediction;
|
|
38
|
-
this.
|
|
39
|
-
this.bias += lr * error;
|
|
100
|
+
this._update(inputs.map((inp) => error * inp), error, lr);
|
|
40
101
|
}
|
|
41
102
|
};
|
|
42
103
|
|
|
43
104
|
// src/Layer.ts
|
|
105
|
+
var defaultOptimizer2 = () => new SGD();
|
|
44
106
|
var Layer = class {
|
|
45
|
-
constructor(nNeurons, nInputs) {
|
|
46
|
-
this.neurons = Array.from(
|
|
107
|
+
constructor(nNeurons, nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer2) {
|
|
108
|
+
this.neurons = Array.from(
|
|
109
|
+
{ length: nNeurons },
|
|
110
|
+
() => new NeuronN(nInputs, activation, optimizerFactory)
|
|
111
|
+
);
|
|
47
112
|
}
|
|
48
113
|
predict(inputs) {
|
|
49
114
|
return this.neurons.map((n) => n.predict(inputs));
|
|
@@ -83,12 +148,16 @@ var Network = class {
|
|
|
83
148
|
};
|
|
84
149
|
|
|
85
150
|
// src/NetworkN.ts
|
|
151
|
+
var defaultOptimizer3 = () => new SGD();
|
|
86
152
|
var NetworkN = class {
|
|
87
|
-
constructor(structure) {
|
|
153
|
+
constructor(structure, options = {}) {
|
|
88
154
|
this.structure = structure;
|
|
155
|
+
const nLayers = structure.length - 1;
|
|
156
|
+
const activations = options.activations ?? Array.from({ length: nLayers }, () => sigmoid2);
|
|
157
|
+
const optimizer = options.optimizer ?? defaultOptimizer3;
|
|
89
158
|
this.layers = [];
|
|
90
159
|
for (let i = 1; i < structure.length; i++) {
|
|
91
|
-
this.layers.push(new Layer(structure[i], structure[i - 1]));
|
|
160
|
+
this.layers.push(new Layer(structure[i], structure[i - 1], activations[i - 1], optimizer));
|
|
92
161
|
}
|
|
93
162
|
}
|
|
94
163
|
predict(inputs) {
|
|
@@ -100,17 +169,18 @@ var NetworkN = class {
|
|
|
100
169
|
const act = [inputs];
|
|
101
170
|
for (const layer of this.layers) act.push(layer.predict(act[act.length - 1]));
|
|
102
171
|
const pred = act[act.length - 1];
|
|
103
|
-
|
|
172
|
+
const outAct = this.layers[this.layers.length - 1].neurons[0].activation;
|
|
173
|
+
let deltas = pred.map((p, i) => (targets[i] - p) * outAct.dfn(p));
|
|
104
174
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
105
175
|
const layer = this.layers[l];
|
|
106
176
|
const layerIn = act[l];
|
|
177
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
107
178
|
const prevDeltas = layerIn.map((out, j) => {
|
|
108
179
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
109
|
-
return
|
|
180
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
110
181
|
});
|
|
111
182
|
layer.neurons.forEach((n, k) => {
|
|
112
|
-
n.
|
|
113
|
-
n.bias += lr * deltas[k];
|
|
183
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
114
184
|
});
|
|
115
185
|
deltas = prevDeltas;
|
|
116
186
|
}
|
|
@@ -125,13 +195,13 @@ var NetworkN = class {
|
|
|
125
195
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
126
196
|
const layer = this.layers[l];
|
|
127
197
|
const layerIn = act[l];
|
|
198
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
128
199
|
const prevDeltas = layerIn.map((out, j) => {
|
|
129
200
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
130
|
-
return
|
|
201
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
131
202
|
});
|
|
132
203
|
layer.neurons.forEach((n, k) => {
|
|
133
|
-
n.
|
|
134
|
-
n.bias += lr * deltas[k];
|
|
204
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
135
205
|
});
|
|
136
206
|
deltas = prevDeltas;
|
|
137
207
|
}
|
|
@@ -142,7 +212,7 @@ var NetworkN = class {
|
|
|
142
212
|
function sigmoid3(x) {
|
|
143
213
|
return 1 / (1 + Math.exp(-x));
|
|
144
214
|
}
|
|
145
|
-
function
|
|
215
|
+
function tanh2(x) {
|
|
146
216
|
const e = Math.exp(2 * x);
|
|
147
217
|
return (e - 1) / (e + 1);
|
|
148
218
|
}
|
|
@@ -191,10 +261,10 @@ var LSTMLayer = class {
|
|
|
191
261
|
const zo = this.outputGate.linear(combined);
|
|
192
262
|
const zf_a = zf.map(sigmoid3);
|
|
193
263
|
const zi_a = zi.map(sigmoid3);
|
|
194
|
-
const zg_a = zg.map(
|
|
264
|
+
const zg_a = zg.map(tanh2);
|
|
195
265
|
const zo_a = zo.map(sigmoid3);
|
|
196
266
|
const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
|
|
197
|
-
const h = zo_a.map((o, k) => o *
|
|
267
|
+
const h = zo_a.map((o, k) => o * tanh2(c[k]));
|
|
198
268
|
this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
|
|
199
269
|
this.h = h;
|
|
200
270
|
this.c = c;
|
|
@@ -222,7 +292,7 @@ var LSTMLayer = class {
|
|
|
222
292
|
for (let t = T - 1; t >= 0; t--) {
|
|
223
293
|
const s = this._traj[t];
|
|
224
294
|
const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
|
|
225
|
-
const tanh_c = s.c.map(
|
|
295
|
+
const tanh_c = s.c.map(tanh2);
|
|
226
296
|
const do_a = dh.map((d, k) => d * tanh_c[k]);
|
|
227
297
|
const dc = dh.map(
|
|
228
298
|
(d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
|
|
@@ -291,16 +361,19 @@ var LSTMLayer = class {
|
|
|
291
361
|
};
|
|
292
362
|
|
|
293
363
|
// src/NetworkLSTM.ts
|
|
364
|
+
var defaultOptimizer4 = () => new SGD();
|
|
294
365
|
var NetworkLSTM = class {
|
|
295
366
|
// [T][layer+1][neuron]
|
|
296
|
-
constructor(inputSize, hiddenSize, denseStructure) {
|
|
367
|
+
constructor(inputSize, hiddenSize, denseStructure, options = {}) {
|
|
297
368
|
this.inputSize = inputSize;
|
|
298
369
|
this.hiddenSize = hiddenSize;
|
|
299
370
|
this.lstm = new LSTMLayer(inputSize, hiddenSize);
|
|
371
|
+
const activation = options.denseActivation ?? sigmoid2;
|
|
372
|
+
const optimizer = options.optimizer ?? defaultOptimizer4;
|
|
300
373
|
this.denseLayers = [];
|
|
301
374
|
const sizes = [hiddenSize, ...denseStructure];
|
|
302
375
|
for (let i = 1; i < sizes.length; i++) {
|
|
303
|
-
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
|
|
376
|
+
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1], activation, optimizer));
|
|
304
377
|
}
|
|
305
378
|
this._acts = [];
|
|
306
379
|
}
|
|
@@ -333,14 +406,16 @@ var NetworkLSTM = class {
|
|
|
333
406
|
for (let t = 0; t < T; t++) {
|
|
334
407
|
const acts = this._acts[t];
|
|
335
408
|
const pred = acts[acts.length - 1];
|
|
336
|
-
|
|
409
|
+
const outAct = this.denseLayers[this.denseLayers.length - 1].neurons[0].activation;
|
|
410
|
+
let deltas = pred.map((p, i) => (targets[t][i] - p) * outAct.dfn(p));
|
|
337
411
|
for (let l = this.denseLayers.length - 1; l >= 0; l--) {
|
|
338
412
|
const layer = this.denseLayers[l];
|
|
339
413
|
const layerIn = acts[l];
|
|
340
414
|
const grad = denseGrads[l];
|
|
415
|
+
const prevAct = l > 0 ? this.denseLayers[l - 1].neurons[0].activation : null;
|
|
341
416
|
const prevDeltas = layerIn.map((out, j) => {
|
|
342
417
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
343
|
-
return
|
|
418
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
344
419
|
});
|
|
345
420
|
layer.neurons.forEach((n, k) => {
|
|
346
421
|
n.weights.forEach((_, j) => {
|
|
@@ -356,8 +431,11 @@ var NetworkLSTM = class {
|
|
|
356
431
|
const layer = this.denseLayers[l];
|
|
357
432
|
const grad = denseGrads[l];
|
|
358
433
|
layer.neurons.forEach((n, k) => {
|
|
359
|
-
n.
|
|
360
|
-
|
|
434
|
+
n._update(
|
|
435
|
+
grad.dW[k].map((g) => g / T),
|
|
436
|
+
grad.db[k] / T,
|
|
437
|
+
lr
|
|
438
|
+
);
|
|
361
439
|
});
|
|
362
440
|
}
|
|
363
441
|
this.lstm.backprop(dh_seq, lr);
|
|
@@ -382,12 +460,47 @@ var NetworkLSTM = class {
|
|
|
382
460
|
});
|
|
383
461
|
}
|
|
384
462
|
};
|
|
463
|
+
|
|
464
|
+
// src/losses.ts
|
|
465
|
+
function mse(predicted, actual) {
|
|
466
|
+
return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
|
|
467
|
+
}
|
|
468
|
+
function crossEntropy(predicted, actual) {
|
|
469
|
+
const eps = 1e-15;
|
|
470
|
+
return -predicted.reduce((sum, p, i) => {
|
|
471
|
+
const clipped = Math.max(eps, Math.min(1 - eps, p));
|
|
472
|
+
return sum + actual[i] * Math.log(clipped) + (1 - actual[i]) * Math.log(1 - clipped);
|
|
473
|
+
}, 0) / predicted.length;
|
|
474
|
+
}
|
|
475
|
+
function mseDelta(predicted, actual) {
|
|
476
|
+
return actual - predicted;
|
|
477
|
+
}
|
|
478
|
+
function crossEntropyDelta(predicted, actual) {
|
|
479
|
+
return actual - predicted;
|
|
480
|
+
}
|
|
481
|
+
function crossEntropyDeltaRaw(predicted, actual) {
|
|
482
|
+
const eps = 1e-15;
|
|
483
|
+
const p = Math.max(eps, Math.min(1 - eps, predicted));
|
|
484
|
+
return actual / p - (1 - actual) / (1 - p);
|
|
485
|
+
}
|
|
385
486
|
export {
|
|
487
|
+
Adam,
|
|
386
488
|
LSTMLayer,
|
|
387
489
|
Layer,
|
|
490
|
+
Momentum,
|
|
388
491
|
Network,
|
|
389
492
|
NetworkLSTM,
|
|
390
493
|
NetworkN,
|
|
391
494
|
Neuron,
|
|
392
|
-
NeuronN
|
|
495
|
+
NeuronN,
|
|
496
|
+
SGD,
|
|
497
|
+
crossEntropy,
|
|
498
|
+
crossEntropyDelta,
|
|
499
|
+
crossEntropyDeltaRaw,
|
|
500
|
+
linear,
|
|
501
|
+
mse,
|
|
502
|
+
mseDelta,
|
|
503
|
+
relu,
|
|
504
|
+
sigmoid2 as sigmoid,
|
|
505
|
+
tanh
|
|
393
506
|
};
|
package/package.json
CHANGED