@dniskav/neuron 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -3
- package/dist/index.d.mts +17 -10
- package/dist/index.d.ts +17 -10
- package/dist/index.js +144 -162
- package/dist/index.mjs +142 -162
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -3,6 +3,28 @@
|
|
|
3
3
|
|
|
4
4
|
A minimal, dependency-free neural network library built from scratch in TypeScript. Designed for learning and experimentation — every line of math is readable.
|
|
5
5
|
|
|
6
|
+
Each class is a building block for the next: from a single neuron to a full Transformer with causal attention.
|
|
7
|
+
|
|
8
|
+
```mermaid
|
|
9
|
+
graph TD
|
|
10
|
+
A["Neuron\n1 input · 1 weight · 1 bias"]
|
|
11
|
+
B["NeuronN\nN inputs · Xavier init · configurable activation"]
|
|
12
|
+
C["Layer\ngroup of NeuronN sharing the same inputs"]
|
|
13
|
+
D["Network\nhidden + output · backprop"]
|
|
14
|
+
E["NetworkN\narbitrary depth · define as [inputs, ...hidden, outputs]"]
|
|
15
|
+
F["LSTMLayer\nrecurrent · hidden + cell state · BPTT"]
|
|
16
|
+
G["NetworkLSTM\nLSTM + dense layers · sequence memory"]
|
|
17
|
+
H["AttentionHead\nQ · K · V · scaled dot-product"]
|
|
18
|
+
I["MultiHeadAttention\nN heads in parallel"]
|
|
19
|
+
J["TransformerBlock\nattention + FFN + LayerNorm × 2 + residuals"]
|
|
20
|
+
K["NetworkTransformer\nembeddings → blocks → per-token logits"]
|
|
21
|
+
L["NetworkTransformerRL\ncontinuous projection → causal attention → Q-values"]
|
|
22
|
+
|
|
23
|
+
A --> B --> C --> D --> E
|
|
24
|
+
E --> F --> G
|
|
25
|
+
E --> H --> I --> J --> K --> L
|
|
26
|
+
```
|
|
27
|
+
|
|
6
28
|
## What's inside
|
|
7
29
|
|
|
8
30
|
| Export | Description |
|
|
@@ -21,9 +43,11 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
|
|
|
21
43
|
| `AttentionHead` | Single scaled dot-product self-attention head (Q / K / V projections + backprop). |
|
|
22
44
|
| `LayerNorm` | Layer normalization with learnable γ / β per feature. |
|
|
23
45
|
| `WeightMatrix` | 2D weight matrix with per-scalar Adam optimizers. Optional per-element gradient clipping via `update(dW, lr, clipValue)`. |
|
|
46
|
+
| `BiasVector` | 1D bias vector with per-scalar Adam optimizers. Companion to `WeightMatrix` for bias terms. |
|
|
24
47
|
| `EmbeddingMatrix` | Lookup-table embedding matrix with SGD updates. |
|
|
25
48
|
| `sigmoid` `relu` `tanh` `linear` | Built-in activation functions. |
|
|
26
|
-
| `SGD` `Momentum` `Adam` | Optimizers. Each instance tracks its own state per weight. |
|
|
49
|
+
| `SGD` `Momentum` `Adam` `ClipOptimizer` | Optimizers. Each instance tracks its own state per weight. `ClipOptimizer` wraps any optimizer with gradient clipping. |
|
|
50
|
+
| `defaultOptimizer` | Default `OptimizerFactory` (`() => new SGD()`). Shared across `NeuronN`, `Layer`, `NetworkN`, `NetworkLSTM`. |
|
|
27
51
|
| `mse` `crossEntropy` | Loss functions for evaluation and logging. |
|
|
28
52
|
| `mseDelta` `crossEntropyDelta` | Output-layer delta functions for use with `trainWithDeltas`. |
|
|
29
53
|
|
|
@@ -83,8 +107,8 @@ for (let epoch = 0; epoch < 5000; epoch++) {
|
|
|
83
107
|
}
|
|
84
108
|
}
|
|
85
109
|
|
|
86
|
-
console.log(net.predict([0, 1])); // ~0.97
|
|
87
|
-
console.log(net.predict([1, 1])); // ~0.03
|
|
110
|
+
console.log(net.predict([0, 1])[0]); // ~0.97
|
|
111
|
+
console.log(net.predict([1, 1])[0]); // ~0.03
|
|
88
112
|
```
|
|
89
113
|
|
|
90
114
|
### NetworkN — deep network with custom architecture
|
|
@@ -307,6 +331,32 @@ const attnWeights = net.getAttentionWeights();
|
|
|
307
331
|
// attnWeights[blockIdx][headIdx] → seqLen × seqLen matrix
|
|
308
332
|
```
|
|
309
333
|
|
|
334
|
+
## Changelog
|
|
335
|
+
|
|
336
|
+
### v0.2.7
|
|
337
|
+
- **Docs:** Added architecture diagram to README — visual progression from `Neuron` to `NetworkTransformerRL`
|
|
338
|
+
|
|
339
|
+
### v0.2.6
|
|
340
|
+
- **Fix:** `Network.predict` now returns `number[]` (consistent with all other network classes)
|
|
341
|
+
- **Fix:** `Network.train` now uses the configured optimizer and `activation.dfn()` instead of hardcoded SGD and sigmoid derivative
|
|
342
|
+
- **Fix:** `LayerNorm.backwardOne` now correctly uses pre-update γ when computing the input gradient
|
|
343
|
+
- **Fix:** LSTM and GRU gate initialization corrected from He (`√(2/n)`) to Xavier fan-in+out (`√(2/(fanIn+fanOut))`), matching the sigmoid/tanh activations used in those gates
|
|
344
|
+
- **New:** `BiasVector` — 1D counterpart to `WeightMatrix` with per-scalar Adam optimizers; replaces repeated `number[] + Adam[]` pairs in `TransformerBlock`, `NetworkTransformer`, and `NetworkTransformerRL`
|
|
345
|
+
- **New:** `defaultOptimizer` exported from `optimizers.ts` — single source of truth for the default `() => new SGD()` factory
|
|
346
|
+
- **Refactor:** `NetworkN.train` and `trainWithDeltas` share extracted `_forwardAll()` and `_backpropLayers()` internals — eliminates ~50 lines of duplication
|
|
347
|
+
- **Refactor:** `Transformer` backward methods now throw descriptive errors instead of crashing with a cryptic `TypeError` when called before `predict()`
|
|
348
|
+
- **Refactor:** `NetworkTransformer.setWeights()` and `NetworkTransformerRL.setWeightsFlat()` use each component's own `setWeights()` instead of direct `.W` mutation
|
|
349
|
+
|
|
350
|
+
### v0.2.5
|
|
351
|
+
- Unified optimizer factories for `LSTMLayer`, `GRULayer`, `Conv1D` (per-scalar Adam/Momentum/SGD)
|
|
352
|
+
- `NetworkN`: residual connections (`residual` option) and dropout (`dropoutRate`)
|
|
353
|
+
- `Conv1D`: multi-channel input (`inputChannels`)
|
|
354
|
+
- `NetworkTransformerRL`: configurable pooling (`avg` / `max` / `last` / `weighted`)
|
|
355
|
+
- `Trainer`: weight decay, early stopping, classification metrics, gradient clipping support
|
|
356
|
+
- `DataLoader`: validation split (`validationSplit` + `getValidationData()`)
|
|
357
|
+
- `ModelSaver`: universal serialization via flat `getWeights()`/`setWeights()` for all classes
|
|
358
|
+
- Gradient check test suite (`tests/GradientCheck.test.ts`)
|
|
359
|
+
|
|
310
360
|
## Possible improvements
|
|
311
361
|
|
|
312
362
|
1. **Support for batches** in training to improve efficiency and gradient stability.
|
package/dist/index.d.mts
CHANGED
|
@@ -25,6 +25,7 @@ interface Optimizer {
|
|
|
25
25
|
step(weight: number, gradient: number, lr: number): number;
|
|
26
26
|
}
|
|
27
27
|
type OptimizerFactory = () => Optimizer;
|
|
28
|
+
declare const defaultOptimizer: OptimizerFactory;
|
|
28
29
|
declare class SGD implements Optimizer {
|
|
29
30
|
step(weight: number, gradient: number, lr: number): number;
|
|
30
31
|
}
|
|
@@ -73,7 +74,7 @@ declare class Network {
|
|
|
73
74
|
hiddenLayer: Layer;
|
|
74
75
|
outputLayer: Layer;
|
|
75
76
|
constructor(nInputs: number, nHidden: number, nOutputs: number);
|
|
76
|
-
predict(inputs: number[]): number;
|
|
77
|
+
predict(inputs: number[]): number[];
|
|
77
78
|
train(inputs: number[], target: number, lr: number): number;
|
|
78
79
|
getWeights(): number[];
|
|
79
80
|
setWeights(weights: number[]): void;
|
|
@@ -97,6 +98,8 @@ declare class NetworkN {
|
|
|
97
98
|
getWeights(): number[];
|
|
98
99
|
setWeights(weights: number[]): void;
|
|
99
100
|
private _shouldResidual;
|
|
101
|
+
private _forwardAll;
|
|
102
|
+
private _backpropLayers;
|
|
100
103
|
}
|
|
101
104
|
|
|
102
105
|
declare class Gate$1 {
|
|
@@ -198,6 +201,14 @@ declare class WeightMatrix {
|
|
|
198
201
|
getWeights(): number[];
|
|
199
202
|
setWeights(weights: number[]): void;
|
|
200
203
|
}
|
|
204
|
+
declare class BiasVector {
|
|
205
|
+
values: number[];
|
|
206
|
+
private opts;
|
|
207
|
+
constructor(size: number);
|
|
208
|
+
update(grad: number[], lr: number): void;
|
|
209
|
+
getWeights(): number[];
|
|
210
|
+
setWeights(weights: number[]): void;
|
|
211
|
+
}
|
|
201
212
|
declare class EmbeddingMatrix {
|
|
202
213
|
W: number[][];
|
|
203
214
|
constructor(vocabSize: number, d_model: number);
|
|
@@ -266,10 +277,8 @@ declare class TransformerBlock {
|
|
|
266
277
|
norm2: LayerNorm;
|
|
267
278
|
ff1: WeightMatrix;
|
|
268
279
|
ff2: WeightMatrix;
|
|
269
|
-
b1:
|
|
270
|
-
b2:
|
|
271
|
-
private b1Opts;
|
|
272
|
-
private b2Opts;
|
|
280
|
+
b1: BiasVector;
|
|
281
|
+
b2: BiasVector;
|
|
273
282
|
private _X;
|
|
274
283
|
private _attnOut;
|
|
275
284
|
private _h1;
|
|
@@ -301,8 +310,7 @@ declare class NetworkTransformer {
|
|
|
301
310
|
posEmb: EmbeddingMatrix;
|
|
302
311
|
blocks: TransformerBlock[];
|
|
303
312
|
outputProj: WeightMatrix;
|
|
304
|
-
outputBias:
|
|
305
|
-
private outBiasOpts;
|
|
313
|
+
outputBias: BiasVector;
|
|
306
314
|
constructor(seqLen: number, options?: NetworkTransformerOptions);
|
|
307
315
|
predict(tokens: number[]): number[];
|
|
308
316
|
train(tokens: number[], targets: number[], lr: number, mask?: boolean[]): number;
|
|
@@ -328,8 +336,7 @@ declare class NetworkTransformerRL {
|
|
|
328
336
|
inputProj: WeightMatrix;
|
|
329
337
|
blocks: TransformerBlock[];
|
|
330
338
|
outputProj: WeightMatrix;
|
|
331
|
-
outputBias:
|
|
332
|
-
private outBiasOpts;
|
|
339
|
+
outputBias: BiasVector;
|
|
333
340
|
private _projected;
|
|
334
341
|
private _pooling;
|
|
335
342
|
private _argmax;
|
|
@@ -586,4 +593,4 @@ declare function validateArrayMinLength(arr: unknown, minLength: number, methodN
|
|
|
586
593
|
declare function validate2DArray(arr: unknown, expectedRows: number, expectedCols: number, methodName: string): asserts arr is number[][];
|
|
587
594
|
declare function validateNumber(value: unknown, methodName: string): asserts value is number;
|
|
588
595
|
|
|
589
|
-
export { type Activation, Adam, AttentionHead, BatchNorm, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
|
|
596
|
+
export { type Activation, Adam, AttentionHead, BatchNorm, BiasVector, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, defaultOptimizer, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
|
package/dist/index.d.ts
CHANGED
|
@@ -25,6 +25,7 @@ interface Optimizer {
|
|
|
25
25
|
step(weight: number, gradient: number, lr: number): number;
|
|
26
26
|
}
|
|
27
27
|
type OptimizerFactory = () => Optimizer;
|
|
28
|
+
declare const defaultOptimizer: OptimizerFactory;
|
|
28
29
|
declare class SGD implements Optimizer {
|
|
29
30
|
step(weight: number, gradient: number, lr: number): number;
|
|
30
31
|
}
|
|
@@ -73,7 +74,7 @@ declare class Network {
|
|
|
73
74
|
hiddenLayer: Layer;
|
|
74
75
|
outputLayer: Layer;
|
|
75
76
|
constructor(nInputs: number, nHidden: number, nOutputs: number);
|
|
76
|
-
predict(inputs: number[]): number;
|
|
77
|
+
predict(inputs: number[]): number[];
|
|
77
78
|
train(inputs: number[], target: number, lr: number): number;
|
|
78
79
|
getWeights(): number[];
|
|
79
80
|
setWeights(weights: number[]): void;
|
|
@@ -97,6 +98,8 @@ declare class NetworkN {
|
|
|
97
98
|
getWeights(): number[];
|
|
98
99
|
setWeights(weights: number[]): void;
|
|
99
100
|
private _shouldResidual;
|
|
101
|
+
private _forwardAll;
|
|
102
|
+
private _backpropLayers;
|
|
100
103
|
}
|
|
101
104
|
|
|
102
105
|
declare class Gate$1 {
|
|
@@ -198,6 +201,14 @@ declare class WeightMatrix {
|
|
|
198
201
|
getWeights(): number[];
|
|
199
202
|
setWeights(weights: number[]): void;
|
|
200
203
|
}
|
|
204
|
+
declare class BiasVector {
|
|
205
|
+
values: number[];
|
|
206
|
+
private opts;
|
|
207
|
+
constructor(size: number);
|
|
208
|
+
update(grad: number[], lr: number): void;
|
|
209
|
+
getWeights(): number[];
|
|
210
|
+
setWeights(weights: number[]): void;
|
|
211
|
+
}
|
|
201
212
|
declare class EmbeddingMatrix {
|
|
202
213
|
W: number[][];
|
|
203
214
|
constructor(vocabSize: number, d_model: number);
|
|
@@ -266,10 +277,8 @@ declare class TransformerBlock {
|
|
|
266
277
|
norm2: LayerNorm;
|
|
267
278
|
ff1: WeightMatrix;
|
|
268
279
|
ff2: WeightMatrix;
|
|
269
|
-
b1:
|
|
270
|
-
b2:
|
|
271
|
-
private b1Opts;
|
|
272
|
-
private b2Opts;
|
|
280
|
+
b1: BiasVector;
|
|
281
|
+
b2: BiasVector;
|
|
273
282
|
private _X;
|
|
274
283
|
private _attnOut;
|
|
275
284
|
private _h1;
|
|
@@ -301,8 +310,7 @@ declare class NetworkTransformer {
|
|
|
301
310
|
posEmb: EmbeddingMatrix;
|
|
302
311
|
blocks: TransformerBlock[];
|
|
303
312
|
outputProj: WeightMatrix;
|
|
304
|
-
outputBias:
|
|
305
|
-
private outBiasOpts;
|
|
313
|
+
outputBias: BiasVector;
|
|
306
314
|
constructor(seqLen: number, options?: NetworkTransformerOptions);
|
|
307
315
|
predict(tokens: number[]): number[];
|
|
308
316
|
train(tokens: number[], targets: number[], lr: number, mask?: boolean[]): number;
|
|
@@ -328,8 +336,7 @@ declare class NetworkTransformerRL {
|
|
|
328
336
|
inputProj: WeightMatrix;
|
|
329
337
|
blocks: TransformerBlock[];
|
|
330
338
|
outputProj: WeightMatrix;
|
|
331
|
-
outputBias:
|
|
332
|
-
private outBiasOpts;
|
|
339
|
+
outputBias: BiasVector;
|
|
333
340
|
private _projected;
|
|
334
341
|
private _pooling;
|
|
335
342
|
private _argmax;
|
|
@@ -586,4 +593,4 @@ declare function validateArrayMinLength(arr: unknown, minLength: number, methodN
|
|
|
586
593
|
declare function validate2DArray(arr: unknown, expectedRows: number, expectedCols: number, methodName: string): asserts arr is number[][];
|
|
587
594
|
declare function validateNumber(value: unknown, methodName: string): asserts value is number;
|
|
588
595
|
|
|
589
|
-
export { type Activation, Adam, AttentionHead, BatchNorm, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
|
|
596
|
+
export { type Activation, Adam, AttentionHead, BatchNorm, BiasVector, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, defaultOptimizer, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
|