@dniskav/neuron 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -10
- package/dist/index.d.mts +61 -5
- package/dist/index.d.ts +61 -5
- package/dist/index.js +177 -30
- package/dist/index.mjs +160 -29
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# @dniskav/neuron
|
|
2
|
-
|
|
3
1
|
[](https://www.npmjs.com/package/@dniskav/neuron)
|
|
4
2
|
[](LICENSE)
|
|
5
3
|
|
|
@@ -7,15 +5,19 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
|
|
|
7
5
|
|
|
8
6
|
## What's inside
|
|
9
7
|
|
|
10
|
-
|
|
|
11
|
-
|
|
8
|
+
| Export | Description |
|
|
9
|
+
|--------|-------------|
|
|
12
10
|
| `Neuron` | Single-input neuron. The simplest possible unit: one weight, one bias. |
|
|
13
|
-
| `NeuronN` | N-input neuron with Xavier initialization and
|
|
11
|
+
| `NeuronN` | N-input neuron with Xavier initialization and configurable activation. |
|
|
14
12
|
| `Layer` | A group of `NeuronN` neurons that share the same inputs. |
|
|
15
13
|
| `Network` | Two-layer network (hidden + output) with backpropagation. |
|
|
16
14
|
| `NetworkN` | Deep network of arbitrary depth. Define your architecture as `[inputs, ...hidden, outputs]`. |
|
|
17
15
|
| `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
|
|
18
16
|
| `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
|
|
17
|
+
| `sigmoid` `relu` `tanh` `linear` | Built-in activation functions. |
|
|
18
|
+
| `SGD` `Momentum` `Adam` | Optimizers. Each instance tracks its own state per weight. |
|
|
19
|
+
| `mse` `crossEntropy` | Loss functions for evaluation and logging. |
|
|
20
|
+
| `mseDelta` `crossEntropyDelta` | Output-layer delta functions for use with `trainWithDeltas`. |
|
|
19
21
|
|
|
20
22
|
## Install
|
|
21
23
|
|
|
@@ -92,12 +94,73 @@ net.train([0.5, 0.3, 0.8], [1, 0], 0.05);
|
|
|
92
94
|
const [out1, out2] = net.predict([0.5, 0.3, 0.8]);
|
|
93
95
|
```
|
|
94
96
|
|
|
97
|
+
### Activations — ReLU, tanh, and more
|
|
98
|
+
|
|
99
|
+
Pass an activation per layer. The last layer typically uses `sigmoid` for binary output or `linear` for regression.
|
|
100
|
+
|
|
101
|
+
```ts
|
|
102
|
+
import { NetworkN, relu, sigmoid } from "@dniskav/neuron";
|
|
103
|
+
|
|
104
|
+
const net = new NetworkN([3, 64, 32, 1], {
|
|
105
|
+
activations: [relu, relu, sigmoid],
|
|
106
|
+
});
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Available: `sigmoid`, `relu`, `tanh`, `linear`.
|
|
110
|
+
|
|
111
|
+
### Optimizers — Adam, Momentum, SGD
|
|
112
|
+
|
|
113
|
+
Pass an optimizer factory. Each weight gets its own instance with independent state.
|
|
114
|
+
|
|
115
|
+
```ts
|
|
116
|
+
import { NetworkN, relu, sigmoid, Adam } from "@dniskav/neuron";
|
|
117
|
+
|
|
118
|
+
const net = new NetworkN([2, 64, 1], {
|
|
119
|
+
activations: [relu, sigmoid],
|
|
120
|
+
optimizer: () => new Adam(), // default: beta1=0.9, beta2=0.999
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// Momentum example
|
|
124
|
+
import { Momentum } from "@dniskav/neuron";
|
|
125
|
+
const net2 = new NetworkN([2, 32, 1], {
|
|
126
|
+
optimizer: () => new Momentum(0.9),
|
|
127
|
+
});
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Optimizers also work in `NetworkLSTM` (applied to the dense layers):
|
|
131
|
+
|
|
132
|
+
```ts
|
|
133
|
+
import { NetworkLSTM, relu, Adam } from "@dniskav/neuron";
|
|
134
|
+
|
|
135
|
+
const net = new NetworkLSTM(1, 8, [4, 1], {
|
|
136
|
+
denseActivation: relu,
|
|
137
|
+
optimizer: () => new Adam(0.001),
|
|
138
|
+
});
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Loss utilities
|
|
142
|
+
|
|
143
|
+
```ts
|
|
144
|
+
import { mse, crossEntropy } from "@dniskav/neuron";
|
|
145
|
+
|
|
146
|
+
const predicted = net.predict([0.5, 0.3]);
|
|
147
|
+
console.log(mse(predicted, [1, 0]));
|
|
148
|
+
console.log(crossEntropy(predicted, [1, 0]));
|
|
149
|
+
```
|
|
150
|
+
|
|
95
151
|
### trainWithDeltas — custom loss / physics-based gradients
|
|
96
152
|
|
|
97
153
|
`NetworkN` also exposes `trainWithDeltas` for when you compute your own output-layer deltas (e.g., from a physics simulation or a custom loss function):
|
|
98
154
|
|
|
99
155
|
```ts
|
|
100
|
-
|
|
156
|
+
import { NetworkN, mseDelta } from "@dniskav/neuron";
|
|
157
|
+
|
|
158
|
+
const net = new NetworkN([3, 16, 2]);
|
|
159
|
+
const pred = net.predict(inputs);
|
|
160
|
+
|
|
161
|
+
// Compute deltas manually using a helper, or from any external signal
|
|
162
|
+
const deltas = pred.map((p, i) => mseDelta(p, targets[i]));
|
|
163
|
+
net.trainWithDeltas(inputs, deltas, 0.01);
|
|
101
164
|
```
|
|
102
165
|
|
|
103
166
|
### NetworkLSTM — recurrent network with memory
|
|
@@ -143,17 +206,19 @@ The network learns to count steps using its hidden state — no external counter
|
|
|
143
206
|
|
|
144
207
|
## How it works
|
|
145
208
|
|
|
146
|
-
|
|
209
|
+
Each class applies an **activation function** to the weighted sum of inputs and uses **gradient descent** to update weights:
|
|
147
210
|
|
|
148
211
|
```
|
|
149
|
-
weight += lr ×
|
|
150
|
-
bias += lr ×
|
|
212
|
+
weight += lr × delta × input
|
|
213
|
+
bias += lr × delta
|
|
151
214
|
```
|
|
152
215
|
|
|
153
|
-
`NetworkN` implements full **backpropagation** across all layers, propagating deltas from the output back to the first layer using the chain rule.
|
|
216
|
+
`NetworkN` implements full **backpropagation** across all layers, propagating deltas from the output back to the first layer using the chain rule. The derivative of the chosen activation is applied at each layer.
|
|
154
217
|
|
|
155
218
|
`NeuronN` uses simplified **Xavier initialization** — weights start in `[-√(1/n), +√(1/n)]` — so gradients flow well from the start of training.
|
|
156
219
|
|
|
220
|
+
When an **optimizer** is used (e.g., Adam), the raw gradient is passed to the optimizer instead of being applied directly. Each weight maintains its own optimizer state (velocity, moments).
|
|
221
|
+
|
|
157
222
|
## Build
|
|
158
223
|
|
|
159
224
|
```bash
|
|
@@ -165,6 +230,11 @@ npm run dev # watch mode
|
|
|
165
230
|
|
|
166
231
|
If you are an AI agent or LLM working with this codebase, read [AGENTS.md](AGENTS.md) first. It contains the full class hierarchy, design constraints, and what this library does not do.
|
|
167
232
|
|
|
233
|
+
## Possible improvements
|
|
234
|
+
|
|
235
|
+
1. **Support for batches** in training to improve efficiency.
|
|
236
|
+
2. **Improve documentation** with more advanced examples and use cases.
|
|
237
|
+
|
|
168
238
|
## License
|
|
169
239
|
|
|
170
240
|
MIT
|
package/dist/index.d.mts
CHANGED
|
@@ -6,17 +6,59 @@ declare class Neuron {
|
|
|
6
6
|
train(input: number, target: number, lr: number): void;
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
interface Activation {
|
|
10
|
+
fn(x: number): number;
|
|
11
|
+
dfn(out: number): number;
|
|
12
|
+
}
|
|
13
|
+
declare const sigmoid: Activation;
|
|
14
|
+
declare const tanh: Activation;
|
|
15
|
+
declare const relu: Activation;
|
|
16
|
+
declare const linear: Activation;
|
|
17
|
+
declare function makeLeakyRelu(alpha?: number): Activation;
|
|
18
|
+
/** Leaky ReLU with the standard α = 0.01. */
|
|
19
|
+
declare const leakyRelu: Activation;
|
|
20
|
+
declare function makeElu(alpha?: number): Activation;
|
|
21
|
+
/** ELU with the standard α = 1.0. */
|
|
22
|
+
declare const elu: Activation;
|
|
23
|
+
|
|
24
|
+
interface Optimizer {
|
|
25
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
26
|
+
}
|
|
27
|
+
type OptimizerFactory = () => Optimizer;
|
|
28
|
+
declare class SGD implements Optimizer {
|
|
29
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
30
|
+
}
|
|
31
|
+
declare class Momentum implements Optimizer {
|
|
32
|
+
readonly beta: number;
|
|
33
|
+
private v;
|
|
34
|
+
constructor(beta?: number);
|
|
35
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
36
|
+
}
|
|
37
|
+
declare class Adam implements Optimizer {
|
|
38
|
+
readonly beta1: number;
|
|
39
|
+
readonly beta2: number;
|
|
40
|
+
readonly epsilon: number;
|
|
41
|
+
private m;
|
|
42
|
+
private v;
|
|
43
|
+
private t;
|
|
44
|
+
constructor(beta1?: number, beta2?: number, epsilon?: number);
|
|
45
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
46
|
+
}
|
|
47
|
+
|
|
9
48
|
declare class NeuronN {
|
|
10
49
|
weights: number[];
|
|
11
50
|
bias: number;
|
|
12
|
-
|
|
51
|
+
readonly activation: Activation;
|
|
52
|
+
private _opts;
|
|
53
|
+
constructor(nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
13
54
|
predict(inputs: number[]): number;
|
|
55
|
+
_update(weightGrads: number[], biasGrad: number, lr: number): void;
|
|
14
56
|
train(inputs: number[], target: number, lr: number): void;
|
|
15
57
|
}
|
|
16
58
|
|
|
17
59
|
declare class Layer {
|
|
18
60
|
neurons: NeuronN[];
|
|
19
|
-
constructor(nNeurons: number, nInputs: number);
|
|
61
|
+
constructor(nNeurons: number, nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
20
62
|
predict(inputs: number[]): number[];
|
|
21
63
|
}
|
|
22
64
|
|
|
@@ -28,10 +70,14 @@ declare class Network {
|
|
|
28
70
|
train(inputs: number[], target: number, lr: number): number;
|
|
29
71
|
}
|
|
30
72
|
|
|
73
|
+
interface NetworkNOptions {
|
|
74
|
+
activations?: Activation[];
|
|
75
|
+
optimizer?: OptimizerFactory;
|
|
76
|
+
}
|
|
31
77
|
declare class NetworkN {
|
|
32
78
|
readonly structure: number[];
|
|
33
79
|
layers: Layer[];
|
|
34
|
-
constructor(structure: number[]);
|
|
80
|
+
constructor(structure: number[], options?: NetworkNOptions);
|
|
35
81
|
predict(inputs: number[]): number[];
|
|
36
82
|
train(inputs: number[], targets: number[], lr: number): number;
|
|
37
83
|
trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
|
|
@@ -78,13 +124,17 @@ declare class LSTMLayer {
|
|
|
78
124
|
setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
|
|
79
125
|
}
|
|
80
126
|
|
|
127
|
+
interface NetworkLSTMOptions {
|
|
128
|
+
denseActivation?: Activation;
|
|
129
|
+
optimizer?: OptimizerFactory;
|
|
130
|
+
}
|
|
81
131
|
declare class NetworkLSTM {
|
|
82
132
|
readonly inputSize: number;
|
|
83
133
|
readonly hiddenSize: number;
|
|
84
134
|
lstm: LSTMLayer;
|
|
85
135
|
denseLayers: Layer[];
|
|
86
136
|
private _acts;
|
|
87
|
-
constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
|
|
137
|
+
constructor(inputSize: number, hiddenSize: number, denseStructure: number[], options?: NetworkLSTMOptions);
|
|
88
138
|
resetState(): void;
|
|
89
139
|
predict(inputs: number[]): number[];
|
|
90
140
|
train(targets: number[][], lr: number): void;
|
|
@@ -115,4 +165,10 @@ declare class NetworkLSTM {
|
|
|
115
165
|
setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
|
|
116
166
|
}
|
|
117
167
|
|
|
118
|
-
|
|
168
|
+
declare function mse(predicted: number[], actual: number[]): number;
|
|
169
|
+
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
170
|
+
declare function mseDelta(predicted: number, actual: number): number;
|
|
171
|
+
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
172
|
+
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
173
|
+
|
|
174
|
+
export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, mse, mseDelta, relu, sigmoid, tanh };
|
package/dist/index.d.ts
CHANGED
|
@@ -6,17 +6,59 @@ declare class Neuron {
|
|
|
6
6
|
train(input: number, target: number, lr: number): void;
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
interface Activation {
|
|
10
|
+
fn(x: number): number;
|
|
11
|
+
dfn(out: number): number;
|
|
12
|
+
}
|
|
13
|
+
declare const sigmoid: Activation;
|
|
14
|
+
declare const tanh: Activation;
|
|
15
|
+
declare const relu: Activation;
|
|
16
|
+
declare const linear: Activation;
|
|
17
|
+
declare function makeLeakyRelu(alpha?: number): Activation;
|
|
18
|
+
/** Leaky ReLU with the standard α = 0.01. */
|
|
19
|
+
declare const leakyRelu: Activation;
|
|
20
|
+
declare function makeElu(alpha?: number): Activation;
|
|
21
|
+
/** ELU with the standard α = 1.0. */
|
|
22
|
+
declare const elu: Activation;
|
|
23
|
+
|
|
24
|
+
interface Optimizer {
|
|
25
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
26
|
+
}
|
|
27
|
+
type OptimizerFactory = () => Optimizer;
|
|
28
|
+
declare class SGD implements Optimizer {
|
|
29
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
30
|
+
}
|
|
31
|
+
declare class Momentum implements Optimizer {
|
|
32
|
+
readonly beta: number;
|
|
33
|
+
private v;
|
|
34
|
+
constructor(beta?: number);
|
|
35
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
36
|
+
}
|
|
37
|
+
declare class Adam implements Optimizer {
|
|
38
|
+
readonly beta1: number;
|
|
39
|
+
readonly beta2: number;
|
|
40
|
+
readonly epsilon: number;
|
|
41
|
+
private m;
|
|
42
|
+
private v;
|
|
43
|
+
private t;
|
|
44
|
+
constructor(beta1?: number, beta2?: number, epsilon?: number);
|
|
45
|
+
step(weight: number, gradient: number, lr: number): number;
|
|
46
|
+
}
|
|
47
|
+
|
|
9
48
|
declare class NeuronN {
|
|
10
49
|
weights: number[];
|
|
11
50
|
bias: number;
|
|
12
|
-
|
|
51
|
+
readonly activation: Activation;
|
|
52
|
+
private _opts;
|
|
53
|
+
constructor(nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
13
54
|
predict(inputs: number[]): number;
|
|
55
|
+
_update(weightGrads: number[], biasGrad: number, lr: number): void;
|
|
14
56
|
train(inputs: number[], target: number, lr: number): void;
|
|
15
57
|
}
|
|
16
58
|
|
|
17
59
|
declare class Layer {
|
|
18
60
|
neurons: NeuronN[];
|
|
19
|
-
constructor(nNeurons: number, nInputs: number);
|
|
61
|
+
constructor(nNeurons: number, nInputs: number, activation?: Activation, optimizerFactory?: OptimizerFactory);
|
|
20
62
|
predict(inputs: number[]): number[];
|
|
21
63
|
}
|
|
22
64
|
|
|
@@ -28,10 +70,14 @@ declare class Network {
|
|
|
28
70
|
train(inputs: number[], target: number, lr: number): number;
|
|
29
71
|
}
|
|
30
72
|
|
|
73
|
+
interface NetworkNOptions {
|
|
74
|
+
activations?: Activation[];
|
|
75
|
+
optimizer?: OptimizerFactory;
|
|
76
|
+
}
|
|
31
77
|
declare class NetworkN {
|
|
32
78
|
readonly structure: number[];
|
|
33
79
|
layers: Layer[];
|
|
34
|
-
constructor(structure: number[]);
|
|
80
|
+
constructor(structure: number[], options?: NetworkNOptions);
|
|
35
81
|
predict(inputs: number[]): number[];
|
|
36
82
|
train(inputs: number[], targets: number[], lr: number): number;
|
|
37
83
|
trainWithDeltas(inputs: number[], outputDeltas: number[], lr: number): void;
|
|
@@ -78,13 +124,17 @@ declare class LSTMLayer {
|
|
|
78
124
|
setWeights(data: ReturnType<LSTMLayer["getWeights"]>): void;
|
|
79
125
|
}
|
|
80
126
|
|
|
127
|
+
interface NetworkLSTMOptions {
|
|
128
|
+
denseActivation?: Activation;
|
|
129
|
+
optimizer?: OptimizerFactory;
|
|
130
|
+
}
|
|
81
131
|
declare class NetworkLSTM {
|
|
82
132
|
readonly inputSize: number;
|
|
83
133
|
readonly hiddenSize: number;
|
|
84
134
|
lstm: LSTMLayer;
|
|
85
135
|
denseLayers: Layer[];
|
|
86
136
|
private _acts;
|
|
87
|
-
constructor(inputSize: number, hiddenSize: number, denseStructure: number[]);
|
|
137
|
+
constructor(inputSize: number, hiddenSize: number, denseStructure: number[], options?: NetworkLSTMOptions);
|
|
88
138
|
resetState(): void;
|
|
89
139
|
predict(inputs: number[]): number[];
|
|
90
140
|
train(targets: number[][], lr: number): void;
|
|
@@ -115,4 +165,10 @@ declare class NetworkLSTM {
|
|
|
115
165
|
setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
|
|
116
166
|
}
|
|
117
167
|
|
|
118
|
-
|
|
168
|
+
declare function mse(predicted: number[], actual: number[]): number;
|
|
169
|
+
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
170
|
+
declare function mseDelta(predicted: number, actual: number): number;
|
|
171
|
+
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
172
|
+
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
173
|
+
|
|
174
|
+
export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, mse, mseDelta, relu, sigmoid, tanh };
|
package/dist/index.js
CHANGED
|
@@ -20,13 +20,29 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
Adam: () => Adam,
|
|
23
24
|
LSTMLayer: () => LSTMLayer,
|
|
24
25
|
Layer: () => Layer,
|
|
26
|
+
Momentum: () => Momentum,
|
|
25
27
|
Network: () => Network,
|
|
26
28
|
NetworkLSTM: () => NetworkLSTM,
|
|
27
29
|
NetworkN: () => NetworkN,
|
|
28
30
|
Neuron: () => Neuron,
|
|
29
|
-
NeuronN: () => NeuronN
|
|
31
|
+
NeuronN: () => NeuronN,
|
|
32
|
+
SGD: () => SGD,
|
|
33
|
+
crossEntropy: () => crossEntropy,
|
|
34
|
+
crossEntropyDelta: () => crossEntropyDelta,
|
|
35
|
+
crossEntropyDeltaRaw: () => crossEntropyDeltaRaw,
|
|
36
|
+
elu: () => elu,
|
|
37
|
+
leakyRelu: () => leakyRelu,
|
|
38
|
+
linear: () => linear,
|
|
39
|
+
makeElu: () => makeElu,
|
|
40
|
+
makeLeakyRelu: () => makeLeakyRelu,
|
|
41
|
+
mse: () => mse,
|
|
42
|
+
mseDelta: () => mseDelta,
|
|
43
|
+
relu: () => relu,
|
|
44
|
+
sigmoid: () => sigmoid2,
|
|
45
|
+
tanh: () => tanh
|
|
30
46
|
});
|
|
31
47
|
module.exports = __toCommonJS(index_exports);
|
|
32
48
|
|
|
@@ -50,32 +66,111 @@ var Neuron = class {
|
|
|
50
66
|
}
|
|
51
67
|
};
|
|
52
68
|
|
|
53
|
-
// src/
|
|
54
|
-
|
|
55
|
-
|
|
69
|
+
// src/activations.ts
|
|
70
|
+
var sigmoid2 = {
|
|
71
|
+
fn: (x) => 1 / (1 + Math.exp(-x)),
|
|
72
|
+
dfn: (out) => out * (1 - out)
|
|
73
|
+
};
|
|
74
|
+
var tanh = {
|
|
75
|
+
fn: (x) => {
|
|
76
|
+
const e = Math.exp(2 * x);
|
|
77
|
+
return (e - 1) / (e + 1);
|
|
78
|
+
},
|
|
79
|
+
dfn: (out) => 1 - out * out
|
|
80
|
+
};
|
|
81
|
+
var relu = {
|
|
82
|
+
fn: (x) => Math.max(0, x),
|
|
83
|
+
dfn: (out) => out > 0 ? 1 : 0
|
|
84
|
+
};
|
|
85
|
+
var linear = {
|
|
86
|
+
fn: (x) => x,
|
|
87
|
+
dfn: () => 1
|
|
88
|
+
};
|
|
89
|
+
function makeLeakyRelu(alpha = 0.01) {
|
|
90
|
+
return {
|
|
91
|
+
fn: (x) => x > 0 ? x : alpha * x,
|
|
92
|
+
dfn: (out) => out > 0 ? 1 : alpha
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
var leakyRelu = makeLeakyRelu(0.01);
|
|
96
|
+
function makeElu(alpha = 1) {
|
|
97
|
+
return {
|
|
98
|
+
fn: (x) => x > 0 ? x : alpha * (Math.exp(x) - 1),
|
|
99
|
+
dfn: (out) => out > 0 ? 1 : out + alpha
|
|
100
|
+
};
|
|
56
101
|
}
|
|
102
|
+
var elu = makeElu(1);
|
|
103
|
+
|
|
104
|
+
// src/optimizers.ts
|
|
105
|
+
var SGD = class {
|
|
106
|
+
step(weight, gradient, lr) {
|
|
107
|
+
return weight + lr * gradient;
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
var Momentum = class {
|
|
111
|
+
constructor(beta = 0.9) {
|
|
112
|
+
this.beta = beta;
|
|
113
|
+
this.v = 0;
|
|
114
|
+
}
|
|
115
|
+
step(weight, gradient, lr) {
|
|
116
|
+
this.v = this.beta * this.v + lr * gradient;
|
|
117
|
+
return weight + this.v;
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
var Adam = class {
|
|
121
|
+
constructor(beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8) {
|
|
122
|
+
this.beta1 = beta1;
|
|
123
|
+
this.beta2 = beta2;
|
|
124
|
+
this.epsilon = epsilon;
|
|
125
|
+
this.m = 0;
|
|
126
|
+
this.v = 0;
|
|
127
|
+
this.t = 0;
|
|
128
|
+
}
|
|
129
|
+
step(weight, gradient, lr) {
|
|
130
|
+
this.t++;
|
|
131
|
+
this.m = this.beta1 * this.m + (1 - this.beta1) * gradient;
|
|
132
|
+
this.v = this.beta2 * this.v + (1 - this.beta2) * gradient * gradient;
|
|
133
|
+
const mHat = this.m / (1 - Math.pow(this.beta1, this.t));
|
|
134
|
+
const vHat = this.v / (1 - Math.pow(this.beta2, this.t));
|
|
135
|
+
return weight + lr * mHat / (Math.sqrt(vHat) + this.epsilon);
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
// src/NeuronN.ts
|
|
140
|
+
var defaultOptimizer = () => new SGD();
|
|
57
141
|
var NeuronN = class {
|
|
58
|
-
constructor(nInputs) {
|
|
142
|
+
constructor(nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer) {
|
|
59
143
|
const limit = Math.sqrt(1 / nInputs);
|
|
60
144
|
this.weights = Array.from({ length: nInputs }, () => (Math.random() * 2 - 1) * limit);
|
|
61
145
|
this.bias = 0;
|
|
146
|
+
this.activation = activation;
|
|
147
|
+
this._opts = Array.from({ length: nInputs + 1 }, optimizerFactory);
|
|
62
148
|
}
|
|
63
149
|
predict(inputs) {
|
|
64
150
|
const sum = inputs.reduce((acc, e, i) => acc + e * this.weights[i], this.bias);
|
|
65
|
-
return
|
|
151
|
+
return this.activation.fn(sum);
|
|
152
|
+
}
|
|
153
|
+
// Apply pre-computed gradients via the optimizer.
|
|
154
|
+
// Called internally by Layer / NetworkN / NetworkLSTM during backprop.
|
|
155
|
+
_update(weightGrads, biasGrad, lr) {
|
|
156
|
+
this.weights = this.weights.map((w, i) => this._opts[i].step(w, weightGrads[i], lr));
|
|
157
|
+
this.bias = this._opts[this.weights.length].step(this.bias, biasGrad, lr);
|
|
66
158
|
}
|
|
67
159
|
train(inputs, target, lr) {
|
|
68
160
|
const prediction = this.predict(inputs);
|
|
69
161
|
const error = target - prediction;
|
|
70
|
-
this.
|
|
71
|
-
this.bias += lr * error;
|
|
162
|
+
this._update(inputs.map((inp) => error * inp), error, lr);
|
|
72
163
|
}
|
|
73
164
|
};
|
|
74
165
|
|
|
75
166
|
// src/Layer.ts
|
|
167
|
+
var defaultOptimizer2 = () => new SGD();
|
|
76
168
|
var Layer = class {
|
|
77
|
-
constructor(nNeurons, nInputs) {
|
|
78
|
-
this.neurons = Array.from(
|
|
169
|
+
constructor(nNeurons, nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer2) {
|
|
170
|
+
this.neurons = Array.from(
|
|
171
|
+
{ length: nNeurons },
|
|
172
|
+
() => new NeuronN(nInputs, activation, optimizerFactory)
|
|
173
|
+
);
|
|
79
174
|
}
|
|
80
175
|
predict(inputs) {
|
|
81
176
|
return this.neurons.map((n) => n.predict(inputs));
|
|
@@ -115,12 +210,16 @@ var Network = class {
|
|
|
115
210
|
};
|
|
116
211
|
|
|
117
212
|
// src/NetworkN.ts
|
|
213
|
+
var defaultOptimizer3 = () => new SGD();
|
|
118
214
|
var NetworkN = class {
|
|
119
|
-
constructor(structure) {
|
|
215
|
+
constructor(structure, options = {}) {
|
|
120
216
|
this.structure = structure;
|
|
217
|
+
const nLayers = structure.length - 1;
|
|
218
|
+
const activations = options.activations ?? Array.from({ length: nLayers }, () => sigmoid2);
|
|
219
|
+
const optimizer = options.optimizer ?? defaultOptimizer3;
|
|
121
220
|
this.layers = [];
|
|
122
221
|
for (let i = 1; i < structure.length; i++) {
|
|
123
|
-
this.layers.push(new Layer(structure[i], structure[i - 1]));
|
|
222
|
+
this.layers.push(new Layer(structure[i], structure[i - 1], activations[i - 1], optimizer));
|
|
124
223
|
}
|
|
125
224
|
}
|
|
126
225
|
predict(inputs) {
|
|
@@ -132,17 +231,18 @@ var NetworkN = class {
|
|
|
132
231
|
const act = [inputs];
|
|
133
232
|
for (const layer of this.layers) act.push(layer.predict(act[act.length - 1]));
|
|
134
233
|
const pred = act[act.length - 1];
|
|
135
|
-
|
|
234
|
+
const outAct = this.layers[this.layers.length - 1].neurons[0].activation;
|
|
235
|
+
let deltas = pred.map((p, i) => (targets[i] - p) * outAct.dfn(p));
|
|
136
236
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
137
237
|
const layer = this.layers[l];
|
|
138
238
|
const layerIn = act[l];
|
|
239
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
139
240
|
const prevDeltas = layerIn.map((out, j) => {
|
|
140
241
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
141
|
-
return
|
|
242
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
142
243
|
});
|
|
143
244
|
layer.neurons.forEach((n, k) => {
|
|
144
|
-
n.
|
|
145
|
-
n.bias += lr * deltas[k];
|
|
245
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
146
246
|
});
|
|
147
247
|
deltas = prevDeltas;
|
|
148
248
|
}
|
|
@@ -157,13 +257,13 @@ var NetworkN = class {
|
|
|
157
257
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
158
258
|
const layer = this.layers[l];
|
|
159
259
|
const layerIn = act[l];
|
|
260
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
160
261
|
const prevDeltas = layerIn.map((out, j) => {
|
|
161
262
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
162
|
-
return
|
|
263
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
163
264
|
});
|
|
164
265
|
layer.neurons.forEach((n, k) => {
|
|
165
|
-
n.
|
|
166
|
-
n.bias += lr * deltas[k];
|
|
266
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
167
267
|
});
|
|
168
268
|
deltas = prevDeltas;
|
|
169
269
|
}
|
|
@@ -174,7 +274,7 @@ var NetworkN = class {
|
|
|
174
274
|
function sigmoid3(x) {
|
|
175
275
|
return 1 / (1 + Math.exp(-x));
|
|
176
276
|
}
|
|
177
|
-
function
|
|
277
|
+
function tanh2(x) {
|
|
178
278
|
const e = Math.exp(2 * x);
|
|
179
279
|
return (e - 1) / (e + 1);
|
|
180
280
|
}
|
|
@@ -223,10 +323,10 @@ var LSTMLayer = class {
|
|
|
223
323
|
const zo = this.outputGate.linear(combined);
|
|
224
324
|
const zf_a = zf.map(sigmoid3);
|
|
225
325
|
const zi_a = zi.map(sigmoid3);
|
|
226
|
-
const zg_a = zg.map(
|
|
326
|
+
const zg_a = zg.map(tanh2);
|
|
227
327
|
const zo_a = zo.map(sigmoid3);
|
|
228
328
|
const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
|
|
229
|
-
const h = zo_a.map((o, k) => o *
|
|
329
|
+
const h = zo_a.map((o, k) => o * tanh2(c[k]));
|
|
230
330
|
this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
|
|
231
331
|
this.h = h;
|
|
232
332
|
this.c = c;
|
|
@@ -254,7 +354,7 @@ var LSTMLayer = class {
|
|
|
254
354
|
for (let t = T - 1; t >= 0; t--) {
|
|
255
355
|
const s = this._traj[t];
|
|
256
356
|
const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
|
|
257
|
-
const tanh_c = s.c.map(
|
|
357
|
+
const tanh_c = s.c.map(tanh2);
|
|
258
358
|
const do_a = dh.map((d, k) => d * tanh_c[k]);
|
|
259
359
|
const dc = dh.map(
|
|
260
360
|
(d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
|
|
@@ -323,16 +423,19 @@ var LSTMLayer = class {
|
|
|
323
423
|
};
|
|
324
424
|
|
|
325
425
|
// src/NetworkLSTM.ts
|
|
426
|
+
var defaultOptimizer4 = () => new SGD();
|
|
326
427
|
var NetworkLSTM = class {
|
|
327
428
|
// [T][layer+1][neuron]
|
|
328
|
-
constructor(inputSize, hiddenSize, denseStructure) {
|
|
429
|
+
constructor(inputSize, hiddenSize, denseStructure, options = {}) {
|
|
329
430
|
this.inputSize = inputSize;
|
|
330
431
|
this.hiddenSize = hiddenSize;
|
|
331
432
|
this.lstm = new LSTMLayer(inputSize, hiddenSize);
|
|
433
|
+
const activation = options.denseActivation ?? sigmoid2;
|
|
434
|
+
const optimizer = options.optimizer ?? defaultOptimizer4;
|
|
332
435
|
this.denseLayers = [];
|
|
333
436
|
const sizes = [hiddenSize, ...denseStructure];
|
|
334
437
|
for (let i = 1; i < sizes.length; i++) {
|
|
335
|
-
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
|
|
438
|
+
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1], activation, optimizer));
|
|
336
439
|
}
|
|
337
440
|
this._acts = [];
|
|
338
441
|
}
|
|
@@ -365,14 +468,16 @@ var NetworkLSTM = class {
|
|
|
365
468
|
for (let t = 0; t < T; t++) {
|
|
366
469
|
const acts = this._acts[t];
|
|
367
470
|
const pred = acts[acts.length - 1];
|
|
368
|
-
|
|
471
|
+
const outAct = this.denseLayers[this.denseLayers.length - 1].neurons[0].activation;
|
|
472
|
+
let deltas = pred.map((p, i) => (targets[t][i] - p) * outAct.dfn(p));
|
|
369
473
|
for (let l = this.denseLayers.length - 1; l >= 0; l--) {
|
|
370
474
|
const layer = this.denseLayers[l];
|
|
371
475
|
const layerIn = acts[l];
|
|
372
476
|
const grad = denseGrads[l];
|
|
477
|
+
const prevAct = l > 0 ? this.denseLayers[l - 1].neurons[0].activation : null;
|
|
373
478
|
const prevDeltas = layerIn.map((out, j) => {
|
|
374
479
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
375
|
-
return
|
|
480
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
376
481
|
});
|
|
377
482
|
layer.neurons.forEach((n, k) => {
|
|
378
483
|
n.weights.forEach((_, j) => {
|
|
@@ -388,8 +493,11 @@ var NetworkLSTM = class {
|
|
|
388
493
|
const layer = this.denseLayers[l];
|
|
389
494
|
const grad = denseGrads[l];
|
|
390
495
|
layer.neurons.forEach((n, k) => {
|
|
391
|
-
n.
|
|
392
|
-
|
|
496
|
+
n._update(
|
|
497
|
+
grad.dW[k].map((g) => g / T),
|
|
498
|
+
grad.db[k] / T,
|
|
499
|
+
lr
|
|
500
|
+
);
|
|
393
501
|
});
|
|
394
502
|
}
|
|
395
503
|
this.lstm.backprop(dh_seq, lr);
|
|
@@ -414,13 +522,52 @@ var NetworkLSTM = class {
|
|
|
414
522
|
});
|
|
415
523
|
}
|
|
416
524
|
};
|
|
525
|
+
|
|
526
|
+
// src/losses.ts
|
|
527
|
+
function mse(predicted, actual) {
|
|
528
|
+
return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
|
|
529
|
+
}
|
|
530
|
+
function crossEntropy(predicted, actual) {
|
|
531
|
+
const eps = 1e-15;
|
|
532
|
+
return -predicted.reduce((sum, p, i) => {
|
|
533
|
+
const clipped = Math.max(eps, Math.min(1 - eps, p));
|
|
534
|
+
return sum + actual[i] * Math.log(clipped) + (1 - actual[i]) * Math.log(1 - clipped);
|
|
535
|
+
}, 0) / predicted.length;
|
|
536
|
+
}
|
|
537
|
+
function mseDelta(predicted, actual) {
|
|
538
|
+
return actual - predicted;
|
|
539
|
+
}
|
|
540
|
+
function crossEntropyDelta(predicted, actual) {
|
|
541
|
+
return actual - predicted;
|
|
542
|
+
}
|
|
543
|
+
function crossEntropyDeltaRaw(predicted, actual) {
|
|
544
|
+
const eps = 1e-15;
|
|
545
|
+
const p = Math.max(eps, Math.min(1 - eps, predicted));
|
|
546
|
+
return actual / p - (1 - actual) / (1 - p);
|
|
547
|
+
}
|
|
417
548
|
// Annotate the CommonJS export names for ESM import in node:
|
|
418
549
|
0 && (module.exports = {
|
|
550
|
+
Adam,
|
|
419
551
|
LSTMLayer,
|
|
420
552
|
Layer,
|
|
553
|
+
Momentum,
|
|
421
554
|
Network,
|
|
422
555
|
NetworkLSTM,
|
|
423
556
|
NetworkN,
|
|
424
557
|
Neuron,
|
|
425
|
-
NeuronN
|
|
558
|
+
NeuronN,
|
|
559
|
+
SGD,
|
|
560
|
+
crossEntropy,
|
|
561
|
+
crossEntropyDelta,
|
|
562
|
+
crossEntropyDeltaRaw,
|
|
563
|
+
elu,
|
|
564
|
+
leakyRelu,
|
|
565
|
+
linear,
|
|
566
|
+
makeElu,
|
|
567
|
+
makeLeakyRelu,
|
|
568
|
+
mse,
|
|
569
|
+
mseDelta,
|
|
570
|
+
relu,
|
|
571
|
+
sigmoid,
|
|
572
|
+
tanh
|
|
426
573
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -18,32 +18,111 @@ var Neuron = class {
|
|
|
18
18
|
}
|
|
19
19
|
};
|
|
20
20
|
|
|
21
|
-
// src/
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
// src/activations.ts
|
|
22
|
+
var sigmoid2 = {
|
|
23
|
+
fn: (x) => 1 / (1 + Math.exp(-x)),
|
|
24
|
+
dfn: (out) => out * (1 - out)
|
|
25
|
+
};
|
|
26
|
+
var tanh = {
|
|
27
|
+
fn: (x) => {
|
|
28
|
+
const e = Math.exp(2 * x);
|
|
29
|
+
return (e - 1) / (e + 1);
|
|
30
|
+
},
|
|
31
|
+
dfn: (out) => 1 - out * out
|
|
32
|
+
};
|
|
33
|
+
var relu = {
|
|
34
|
+
fn: (x) => Math.max(0, x),
|
|
35
|
+
dfn: (out) => out > 0 ? 1 : 0
|
|
36
|
+
};
|
|
37
|
+
var linear = {
|
|
38
|
+
fn: (x) => x,
|
|
39
|
+
dfn: () => 1
|
|
40
|
+
};
|
|
41
|
+
function makeLeakyRelu(alpha = 0.01) {
|
|
42
|
+
return {
|
|
43
|
+
fn: (x) => x > 0 ? x : alpha * x,
|
|
44
|
+
dfn: (out) => out > 0 ? 1 : alpha
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
var leakyRelu = makeLeakyRelu(0.01);
|
|
48
|
+
function makeElu(alpha = 1) {
|
|
49
|
+
return {
|
|
50
|
+
fn: (x) => x > 0 ? x : alpha * (Math.exp(x) - 1),
|
|
51
|
+
dfn: (out) => out > 0 ? 1 : out + alpha
|
|
52
|
+
};
|
|
24
53
|
}
|
|
54
|
+
var elu = makeElu(1);
|
|
55
|
+
|
|
56
|
+
// src/optimizers.ts
|
|
57
|
+
var SGD = class {
|
|
58
|
+
step(weight, gradient, lr) {
|
|
59
|
+
return weight + lr * gradient;
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
var Momentum = class {
|
|
63
|
+
constructor(beta = 0.9) {
|
|
64
|
+
this.beta = beta;
|
|
65
|
+
this.v = 0;
|
|
66
|
+
}
|
|
67
|
+
step(weight, gradient, lr) {
|
|
68
|
+
this.v = this.beta * this.v + lr * gradient;
|
|
69
|
+
return weight + this.v;
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
var Adam = class {
|
|
73
|
+
constructor(beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8) {
|
|
74
|
+
this.beta1 = beta1;
|
|
75
|
+
this.beta2 = beta2;
|
|
76
|
+
this.epsilon = epsilon;
|
|
77
|
+
this.m = 0;
|
|
78
|
+
this.v = 0;
|
|
79
|
+
this.t = 0;
|
|
80
|
+
}
|
|
81
|
+
step(weight, gradient, lr) {
|
|
82
|
+
this.t++;
|
|
83
|
+
this.m = this.beta1 * this.m + (1 - this.beta1) * gradient;
|
|
84
|
+
this.v = this.beta2 * this.v + (1 - this.beta2) * gradient * gradient;
|
|
85
|
+
const mHat = this.m / (1 - Math.pow(this.beta1, this.t));
|
|
86
|
+
const vHat = this.v / (1 - Math.pow(this.beta2, this.t));
|
|
87
|
+
return weight + lr * mHat / (Math.sqrt(vHat) + this.epsilon);
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// src/NeuronN.ts
|
|
92
|
+
var defaultOptimizer = () => new SGD();
|
|
25
93
|
var NeuronN = class {
|
|
26
|
-
constructor(nInputs) {
|
|
94
|
+
constructor(nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer) {
|
|
27
95
|
const limit = Math.sqrt(1 / nInputs);
|
|
28
96
|
this.weights = Array.from({ length: nInputs }, () => (Math.random() * 2 - 1) * limit);
|
|
29
97
|
this.bias = 0;
|
|
98
|
+
this.activation = activation;
|
|
99
|
+
this._opts = Array.from({ length: nInputs + 1 }, optimizerFactory);
|
|
30
100
|
}
|
|
31
101
|
predict(inputs) {
|
|
32
102
|
const sum = inputs.reduce((acc, e, i) => acc + e * this.weights[i], this.bias);
|
|
33
|
-
return
|
|
103
|
+
return this.activation.fn(sum);
|
|
104
|
+
}
|
|
105
|
+
// Apply pre-computed gradients via the optimizer.
|
|
106
|
+
// Called internally by Layer / NetworkN / NetworkLSTM during backprop.
|
|
107
|
+
_update(weightGrads, biasGrad, lr) {
|
|
108
|
+
this.weights = this.weights.map((w, i) => this._opts[i].step(w, weightGrads[i], lr));
|
|
109
|
+
this.bias = this._opts[this.weights.length].step(this.bias, biasGrad, lr);
|
|
34
110
|
}
|
|
35
111
|
train(inputs, target, lr) {
|
|
36
112
|
const prediction = this.predict(inputs);
|
|
37
113
|
const error = target - prediction;
|
|
38
|
-
this.
|
|
39
|
-
this.bias += lr * error;
|
|
114
|
+
this._update(inputs.map((inp) => error * inp), error, lr);
|
|
40
115
|
}
|
|
41
116
|
};
|
|
42
117
|
|
|
43
118
|
// src/Layer.ts
|
|
119
|
+
var defaultOptimizer2 = () => new SGD();
|
|
44
120
|
var Layer = class {
|
|
45
|
-
constructor(nNeurons, nInputs) {
|
|
46
|
-
this.neurons = Array.from(
|
|
121
|
+
constructor(nNeurons, nInputs, activation = sigmoid2, optimizerFactory = defaultOptimizer2) {
|
|
122
|
+
this.neurons = Array.from(
|
|
123
|
+
{ length: nNeurons },
|
|
124
|
+
() => new NeuronN(nInputs, activation, optimizerFactory)
|
|
125
|
+
);
|
|
47
126
|
}
|
|
48
127
|
predict(inputs) {
|
|
49
128
|
return this.neurons.map((n) => n.predict(inputs));
|
|
@@ -83,12 +162,16 @@ var Network = class {
|
|
|
83
162
|
};
|
|
84
163
|
|
|
85
164
|
// src/NetworkN.ts
|
|
165
|
+
var defaultOptimizer3 = () => new SGD();
|
|
86
166
|
var NetworkN = class {
|
|
87
|
-
constructor(structure) {
|
|
167
|
+
constructor(structure, options = {}) {
|
|
88
168
|
this.structure = structure;
|
|
169
|
+
const nLayers = structure.length - 1;
|
|
170
|
+
const activations = options.activations ?? Array.from({ length: nLayers }, () => sigmoid2);
|
|
171
|
+
const optimizer = options.optimizer ?? defaultOptimizer3;
|
|
89
172
|
this.layers = [];
|
|
90
173
|
for (let i = 1; i < structure.length; i++) {
|
|
91
|
-
this.layers.push(new Layer(structure[i], structure[i - 1]));
|
|
174
|
+
this.layers.push(new Layer(structure[i], structure[i - 1], activations[i - 1], optimizer));
|
|
92
175
|
}
|
|
93
176
|
}
|
|
94
177
|
predict(inputs) {
|
|
@@ -100,17 +183,18 @@ var NetworkN = class {
|
|
|
100
183
|
const act = [inputs];
|
|
101
184
|
for (const layer of this.layers) act.push(layer.predict(act[act.length - 1]));
|
|
102
185
|
const pred = act[act.length - 1];
|
|
103
|
-
|
|
186
|
+
const outAct = this.layers[this.layers.length - 1].neurons[0].activation;
|
|
187
|
+
let deltas = pred.map((p, i) => (targets[i] - p) * outAct.dfn(p));
|
|
104
188
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
105
189
|
const layer = this.layers[l];
|
|
106
190
|
const layerIn = act[l];
|
|
191
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
107
192
|
const prevDeltas = layerIn.map((out, j) => {
|
|
108
193
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
109
|
-
return
|
|
194
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
110
195
|
});
|
|
111
196
|
layer.neurons.forEach((n, k) => {
|
|
112
|
-
n.
|
|
113
|
-
n.bias += lr * deltas[k];
|
|
197
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
114
198
|
});
|
|
115
199
|
deltas = prevDeltas;
|
|
116
200
|
}
|
|
@@ -125,13 +209,13 @@ var NetworkN = class {
|
|
|
125
209
|
for (let l = this.layers.length - 1; l >= 0; l--) {
|
|
126
210
|
const layer = this.layers[l];
|
|
127
211
|
const layerIn = act[l];
|
|
212
|
+
const prevAct = l > 0 ? this.layers[l - 1].neurons[0].activation : null;
|
|
128
213
|
const prevDeltas = layerIn.map((out, j) => {
|
|
129
214
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
130
|
-
return
|
|
215
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
131
216
|
});
|
|
132
217
|
layer.neurons.forEach((n, k) => {
|
|
133
|
-
n.
|
|
134
|
-
n.bias += lr * deltas[k];
|
|
218
|
+
n._update(layerIn.map((inp) => deltas[k] * inp), deltas[k], lr);
|
|
135
219
|
});
|
|
136
220
|
deltas = prevDeltas;
|
|
137
221
|
}
|
|
@@ -142,7 +226,7 @@ var NetworkN = class {
|
|
|
142
226
|
function sigmoid3(x) {
|
|
143
227
|
return 1 / (1 + Math.exp(-x));
|
|
144
228
|
}
|
|
145
|
-
function
|
|
229
|
+
function tanh2(x) {
|
|
146
230
|
const e = Math.exp(2 * x);
|
|
147
231
|
return (e - 1) / (e + 1);
|
|
148
232
|
}
|
|
@@ -191,10 +275,10 @@ var LSTMLayer = class {
|
|
|
191
275
|
const zo = this.outputGate.linear(combined);
|
|
192
276
|
const zf_a = zf.map(sigmoid3);
|
|
193
277
|
const zi_a = zi.map(sigmoid3);
|
|
194
|
-
const zg_a = zg.map(
|
|
278
|
+
const zg_a = zg.map(tanh2);
|
|
195
279
|
const zo_a = zo.map(sigmoid3);
|
|
196
280
|
const c = c_prev.map((cv, k) => zf_a[k] * cv + zi_a[k] * zg_a[k]);
|
|
197
|
-
const h = zo_a.map((o, k) => o *
|
|
281
|
+
const h = zo_a.map((o, k) => o * tanh2(c[k]));
|
|
198
282
|
this._traj.push({ combined, c_prev, zf, zf_a, zi, zi_a, zg, zg_a, zo, zo_a, c, h });
|
|
199
283
|
this.h = h;
|
|
200
284
|
this.c = c;
|
|
@@ -222,7 +306,7 @@ var LSTMLayer = class {
|
|
|
222
306
|
for (let t = T - 1; t >= 0; t--) {
|
|
223
307
|
const s = this._traj[t];
|
|
224
308
|
const dh = dh_seq[t].map((d, k) => d + dh_next[k]);
|
|
225
|
-
const tanh_c = s.c.map(
|
|
309
|
+
const tanh_c = s.c.map(tanh2);
|
|
226
310
|
const do_a = dh.map((d, k) => d * tanh_c[k]);
|
|
227
311
|
const dc = dh.map(
|
|
228
312
|
(d, k) => d * s.zo_a[k] * (1 - tanh_c[k] ** 2) + dc_next[k]
|
|
@@ -291,16 +375,19 @@ var LSTMLayer = class {
|
|
|
291
375
|
};
|
|
292
376
|
|
|
293
377
|
// src/NetworkLSTM.ts
|
|
378
|
+
var defaultOptimizer4 = () => new SGD();
|
|
294
379
|
var NetworkLSTM = class {
|
|
295
380
|
// [T][layer+1][neuron]
|
|
296
|
-
constructor(inputSize, hiddenSize, denseStructure) {
|
|
381
|
+
constructor(inputSize, hiddenSize, denseStructure, options = {}) {
|
|
297
382
|
this.inputSize = inputSize;
|
|
298
383
|
this.hiddenSize = hiddenSize;
|
|
299
384
|
this.lstm = new LSTMLayer(inputSize, hiddenSize);
|
|
385
|
+
const activation = options.denseActivation ?? sigmoid2;
|
|
386
|
+
const optimizer = options.optimizer ?? defaultOptimizer4;
|
|
300
387
|
this.denseLayers = [];
|
|
301
388
|
const sizes = [hiddenSize, ...denseStructure];
|
|
302
389
|
for (let i = 1; i < sizes.length; i++) {
|
|
303
|
-
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1]));
|
|
390
|
+
this.denseLayers.push(new Layer(sizes[i], sizes[i - 1], activation, optimizer));
|
|
304
391
|
}
|
|
305
392
|
this._acts = [];
|
|
306
393
|
}
|
|
@@ -333,14 +420,16 @@ var NetworkLSTM = class {
|
|
|
333
420
|
for (let t = 0; t < T; t++) {
|
|
334
421
|
const acts = this._acts[t];
|
|
335
422
|
const pred = acts[acts.length - 1];
|
|
336
|
-
|
|
423
|
+
const outAct = this.denseLayers[this.denseLayers.length - 1].neurons[0].activation;
|
|
424
|
+
let deltas = pred.map((p, i) => (targets[t][i] - p) * outAct.dfn(p));
|
|
337
425
|
for (let l = this.denseLayers.length - 1; l >= 0; l--) {
|
|
338
426
|
const layer = this.denseLayers[l];
|
|
339
427
|
const layerIn = acts[l];
|
|
340
428
|
const grad = denseGrads[l];
|
|
429
|
+
const prevAct = l > 0 ? this.denseLayers[l - 1].neurons[0].activation : null;
|
|
341
430
|
const prevDeltas = layerIn.map((out, j) => {
|
|
342
431
|
const errProp = layer.neurons.reduce((s, n, k) => s + deltas[k] * n.weights[j], 0);
|
|
343
|
-
return
|
|
432
|
+
return prevAct ? errProp * prevAct.dfn(out) : errProp;
|
|
344
433
|
});
|
|
345
434
|
layer.neurons.forEach((n, k) => {
|
|
346
435
|
n.weights.forEach((_, j) => {
|
|
@@ -356,8 +445,11 @@ var NetworkLSTM = class {
|
|
|
356
445
|
const layer = this.denseLayers[l];
|
|
357
446
|
const grad = denseGrads[l];
|
|
358
447
|
layer.neurons.forEach((n, k) => {
|
|
359
|
-
n.
|
|
360
|
-
|
|
448
|
+
n._update(
|
|
449
|
+
grad.dW[k].map((g) => g / T),
|
|
450
|
+
grad.db[k] / T,
|
|
451
|
+
lr
|
|
452
|
+
);
|
|
361
453
|
});
|
|
362
454
|
}
|
|
363
455
|
this.lstm.backprop(dh_seq, lr);
|
|
@@ -382,12 +474,51 @@ var NetworkLSTM = class {
|
|
|
382
474
|
});
|
|
383
475
|
}
|
|
384
476
|
};
|
|
477
|
+
|
|
478
|
+
// src/losses.ts
|
|
479
|
+
function mse(predicted, actual) {
|
|
480
|
+
return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
|
|
481
|
+
}
|
|
482
|
+
function crossEntropy(predicted, actual) {
|
|
483
|
+
const eps = 1e-15;
|
|
484
|
+
return -predicted.reduce((sum, p, i) => {
|
|
485
|
+
const clipped = Math.max(eps, Math.min(1 - eps, p));
|
|
486
|
+
return sum + actual[i] * Math.log(clipped) + (1 - actual[i]) * Math.log(1 - clipped);
|
|
487
|
+
}, 0) / predicted.length;
|
|
488
|
+
}
|
|
489
|
+
function mseDelta(predicted, actual) {
|
|
490
|
+
return actual - predicted;
|
|
491
|
+
}
|
|
492
|
+
function crossEntropyDelta(predicted, actual) {
|
|
493
|
+
return actual - predicted;
|
|
494
|
+
}
|
|
495
|
+
function crossEntropyDeltaRaw(predicted, actual) {
|
|
496
|
+
const eps = 1e-15;
|
|
497
|
+
const p = Math.max(eps, Math.min(1 - eps, predicted));
|
|
498
|
+
return actual / p - (1 - actual) / (1 - p);
|
|
499
|
+
}
|
|
385
500
|
export {
|
|
501
|
+
Adam,
|
|
386
502
|
LSTMLayer,
|
|
387
503
|
Layer,
|
|
504
|
+
Momentum,
|
|
388
505
|
Network,
|
|
389
506
|
NetworkLSTM,
|
|
390
507
|
NetworkN,
|
|
391
508
|
Neuron,
|
|
392
|
-
NeuronN
|
|
509
|
+
NeuronN,
|
|
510
|
+
SGD,
|
|
511
|
+
crossEntropy,
|
|
512
|
+
crossEntropyDelta,
|
|
513
|
+
crossEntropyDeltaRaw,
|
|
514
|
+
elu,
|
|
515
|
+
leakyRelu,
|
|
516
|
+
linear,
|
|
517
|
+
makeElu,
|
|
518
|
+
makeLeakyRelu,
|
|
519
|
+
mse,
|
|
520
|
+
mseDelta,
|
|
521
|
+
relu,
|
|
522
|
+
sigmoid2 as sigmoid,
|
|
523
|
+
tanh
|
|
393
524
|
};
|
package/package.json
CHANGED