@dniskav/neuron 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,6 +3,28 @@
3
3
 
4
4
  A minimal, dependency-free neural network library built from scratch in TypeScript. Designed for learning and experimentation — every line of math is readable.
5
5
 
6
+ Each class is a building block for the next: from a single neuron to a full Transformer with causal attention.
7
+
8
+ ```mermaid
9
+ graph TD
10
+ A["Neuron\n1 input · 1 weight · 1 bias"]
11
+ B["NeuronN\nN inputs · Xavier init · configurable activation"]
12
+ C["Layer\ngroup of NeuronN sharing the same inputs"]
13
+ D["Network\nhidden + output · backprop"]
14
+ E["NetworkN\narbitrary depth · define as [inputs, ...hidden, outputs]"]
15
+ F["LSTMLayer\nrecurrent · hidden + cell state · BPTT"]
16
+ G["NetworkLSTM\nLSTM + dense layers · sequence memory"]
17
+ H["AttentionHead\nQ · K · V · scaled dot-product"]
18
+ I["MultiHeadAttention\nN heads in parallel"]
19
+ J["TransformerBlock\nattention + FFN + LayerNorm × 2 + residuals"]
20
+ K["NetworkTransformer\nembeddings → blocks → per-token logits"]
21
+ L["NetworkTransformerRL\ncontinuous projection → causal attention → Q-values"]
22
+
23
+ A --> B --> C --> D --> E
24
+ E --> F --> G
25
+ E --> H --> I --> J --> K --> L
26
+ ```
27
+
6
28
  ## What's inside
7
29
 
8
30
  | Export | Description |
@@ -21,9 +43,11 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
21
43
  | `AttentionHead` | Single scaled dot-product self-attention head (Q / K / V projections + backprop). |
22
44
  | `LayerNorm` | Layer normalization with learnable γ / β per feature. |
23
45
  | `WeightMatrix` | 2D weight matrix with per-scalar Adam optimizers. Optional per-element gradient clipping via `update(dW, lr, clipValue)`. |
46
+ | `BiasVector` | 1D bias vector with per-scalar Adam optimizers. Companion to `WeightMatrix` for bias terms. |
24
47
  | `EmbeddingMatrix` | Lookup-table embedding matrix with SGD updates. |
25
48
  | `sigmoid` `relu` `tanh` `linear` | Built-in activation functions. |
26
- | `SGD` `Momentum` `Adam` | Optimizers. Each instance tracks its own state per weight. |
49
+ | `SGD` `Momentum` `Adam` `ClipOptimizer` | Optimizers. Each instance tracks its own state per weight. `ClipOptimizer` wraps any optimizer with gradient clipping. |
50
+ | `defaultOptimizer` | Default `OptimizerFactory` (`() => new SGD()`). Shared across `NeuronN`, `Layer`, `NetworkN`, `NetworkLSTM`. |
27
51
  | `mse` `crossEntropy` | Loss functions for evaluation and logging. |
28
52
  | `mseDelta` `crossEntropyDelta` | Output-layer delta functions for use with `trainWithDeltas`. |
29
53
 
@@ -83,8 +107,8 @@ for (let epoch = 0; epoch < 5000; epoch++) {
83
107
  }
84
108
  }
85
109
 
86
- console.log(net.predict([0, 1])); // ~0.97
87
- console.log(net.predict([1, 1])); // ~0.03
110
+ console.log(net.predict([0, 1])[0]); // ~0.97
111
+ console.log(net.predict([1, 1])[0]); // ~0.03
88
112
  ```
89
113
 
90
114
  ### NetworkN — deep network with custom architecture
@@ -307,6 +331,32 @@ const attnWeights = net.getAttentionWeights();
307
331
  // attnWeights[blockIdx][headIdx] → seqLen × seqLen matrix
308
332
  ```
309
333
 
334
+ ## Changelog
335
+
336
+ ### v0.2.7
337
+ - **Docs:** Added architecture diagram to README — visual progression from `Neuron` to `NetworkTransformerRL`
338
+
339
+ ### v0.2.6
340
+ - **Fix:** `Network.predict` now returns `number[]` (consistent with all other network classes)
341
+ - **Fix:** `Network.train` now uses the configured optimizer and `activation.dfn()` instead of hardcoded SGD and sigmoid derivative
342
+ - **Fix:** `LayerNorm.backwardOne` now correctly uses pre-update γ when computing the input gradient
343
+ - **Fix:** LSTM and GRU gate initialization corrected from He (`√(2/n)`) to Xavier fan-in+out (`√(2/(fanIn+fanOut))`), matching the sigmoid/tanh activations used in those gates
344
+ - **New:** `BiasVector` — 1D counterpart to `WeightMatrix` with per-scalar Adam optimizers; replaces repeated `number[] + Adam[]` pairs in `TransformerBlock`, `NetworkTransformer`, and `NetworkTransformerRL`
345
+ - **New:** `defaultOptimizer` exported from `optimizers.ts` — single source of truth for the default `() => new SGD()` factory
346
+ - **Refactor:** `NetworkN.train` and `trainWithDeltas` share extracted `_forwardAll()` and `_backpropLayers()` internals — eliminates ~50 lines of duplication
347
+ - **Refactor:** `Transformer` backward methods now throw descriptive errors instead of crashing with a cryptic `TypeError` when called before `predict()`
348
+ - **Refactor:** `NetworkTransformer.setWeights()` and `NetworkTransformerRL.setWeightsFlat()` use each component's own `setWeights()` instead of direct `.W` mutation
349
+
350
+ ### v0.2.5
351
+ - Unified optimizer factories for `LSTMLayer`, `GRULayer`, `Conv1D` (per-scalar Adam/Momentum/SGD)
352
+ - `NetworkN`: residual connections (`residual` option) and dropout (`dropoutRate`)
353
+ - `Conv1D`: multi-channel input (`inputChannels`)
354
+ - `NetworkTransformerRL`: configurable pooling (`avg` / `max` / `last` / `weighted`)
355
+ - `Trainer`: weight decay, early stopping, classification metrics, gradient clipping support
356
+ - `DataLoader`: validation split (`validationSplit` + `getValidationData()`)
357
+ - `ModelSaver`: universal serialization via flat `getWeights()`/`setWeights()` for all classes
358
+ - Gradient check test suite (`tests/GradientCheck.test.ts`)
359
+
310
360
  ## Possible improvements
311
361
 
312
362
  1. **Support for batches** in training to improve efficiency and gradient stability.
package/dist/index.d.mts CHANGED
@@ -25,6 +25,7 @@ interface Optimizer {
25
25
  step(weight: number, gradient: number, lr: number): number;
26
26
  }
27
27
  type OptimizerFactory = () => Optimizer;
28
+ declare const defaultOptimizer: OptimizerFactory;
28
29
  declare class SGD implements Optimizer {
29
30
  step(weight: number, gradient: number, lr: number): number;
30
31
  }
@@ -73,7 +74,7 @@ declare class Network {
73
74
  hiddenLayer: Layer;
74
75
  outputLayer: Layer;
75
76
  constructor(nInputs: number, nHidden: number, nOutputs: number);
76
- predict(inputs: number[]): number;
77
+ predict(inputs: number[]): number[];
77
78
  train(inputs: number[], target: number, lr: number): number;
78
79
  getWeights(): number[];
79
80
  setWeights(weights: number[]): void;
@@ -97,6 +98,8 @@ declare class NetworkN {
97
98
  getWeights(): number[];
98
99
  setWeights(weights: number[]): void;
99
100
  private _shouldResidual;
101
+ private _forwardAll;
102
+ private _backpropLayers;
100
103
  }
101
104
 
102
105
  declare class Gate$1 {
@@ -198,6 +201,14 @@ declare class WeightMatrix {
198
201
  getWeights(): number[];
199
202
  setWeights(weights: number[]): void;
200
203
  }
204
+ declare class BiasVector {
205
+ values: number[];
206
+ private opts;
207
+ constructor(size: number);
208
+ update(grad: number[], lr: number): void;
209
+ getWeights(): number[];
210
+ setWeights(weights: number[]): void;
211
+ }
201
212
  declare class EmbeddingMatrix {
202
213
  W: number[][];
203
214
  constructor(vocabSize: number, d_model: number);
@@ -266,10 +277,8 @@ declare class TransformerBlock {
266
277
  norm2: LayerNorm;
267
278
  ff1: WeightMatrix;
268
279
  ff2: WeightMatrix;
269
- b1: number[];
270
- b2: number[];
271
- private b1Opts;
272
- private b2Opts;
280
+ b1: BiasVector;
281
+ b2: BiasVector;
273
282
  private _X;
274
283
  private _attnOut;
275
284
  private _h1;
@@ -301,8 +310,7 @@ declare class NetworkTransformer {
301
310
  posEmb: EmbeddingMatrix;
302
311
  blocks: TransformerBlock[];
303
312
  outputProj: WeightMatrix;
304
- outputBias: number[];
305
- private outBiasOpts;
313
+ outputBias: BiasVector;
306
314
  constructor(seqLen: number, options?: NetworkTransformerOptions);
307
315
  predict(tokens: number[]): number[];
308
316
  train(tokens: number[], targets: number[], lr: number, mask?: boolean[]): number;
@@ -328,8 +336,7 @@ declare class NetworkTransformerRL {
328
336
  inputProj: WeightMatrix;
329
337
  blocks: TransformerBlock[];
330
338
  outputProj: WeightMatrix;
331
- outputBias: number[];
332
- private outBiasOpts;
339
+ outputBias: BiasVector;
333
340
  private _projected;
334
341
  private _pooling;
335
342
  private _argmax;
@@ -586,4 +593,4 @@ declare function validateArrayMinLength(arr: unknown, minLength: number, methodN
586
593
  declare function validate2DArray(arr: unknown, expectedRows: number, expectedCols: number, methodName: string): asserts arr is number[][];
587
594
  declare function validateNumber(value: unknown, methodName: string): asserts value is number;
588
595
 
589
- export { type Activation, Adam, AttentionHead, BatchNorm, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
596
+ export { type Activation, Adam, AttentionHead, BatchNorm, BiasVector, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, defaultOptimizer, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
package/dist/index.d.ts CHANGED
@@ -25,6 +25,7 @@ interface Optimizer {
25
25
  step(weight: number, gradient: number, lr: number): number;
26
26
  }
27
27
  type OptimizerFactory = () => Optimizer;
28
+ declare const defaultOptimizer: OptimizerFactory;
28
29
  declare class SGD implements Optimizer {
29
30
  step(weight: number, gradient: number, lr: number): number;
30
31
  }
@@ -73,7 +74,7 @@ declare class Network {
73
74
  hiddenLayer: Layer;
74
75
  outputLayer: Layer;
75
76
  constructor(nInputs: number, nHidden: number, nOutputs: number);
76
- predict(inputs: number[]): number;
77
+ predict(inputs: number[]): number[];
77
78
  train(inputs: number[], target: number, lr: number): number;
78
79
  getWeights(): number[];
79
80
  setWeights(weights: number[]): void;
@@ -97,6 +98,8 @@ declare class NetworkN {
97
98
  getWeights(): number[];
98
99
  setWeights(weights: number[]): void;
99
100
  private _shouldResidual;
101
+ private _forwardAll;
102
+ private _backpropLayers;
100
103
  }
101
104
 
102
105
  declare class Gate$1 {
@@ -198,6 +201,14 @@ declare class WeightMatrix {
198
201
  getWeights(): number[];
199
202
  setWeights(weights: number[]): void;
200
203
  }
204
+ declare class BiasVector {
205
+ values: number[];
206
+ private opts;
207
+ constructor(size: number);
208
+ update(grad: number[], lr: number): void;
209
+ getWeights(): number[];
210
+ setWeights(weights: number[]): void;
211
+ }
201
212
  declare class EmbeddingMatrix {
202
213
  W: number[][];
203
214
  constructor(vocabSize: number, d_model: number);
@@ -266,10 +277,8 @@ declare class TransformerBlock {
266
277
  norm2: LayerNorm;
267
278
  ff1: WeightMatrix;
268
279
  ff2: WeightMatrix;
269
- b1: number[];
270
- b2: number[];
271
- private b1Opts;
272
- private b2Opts;
280
+ b1: BiasVector;
281
+ b2: BiasVector;
273
282
  private _X;
274
283
  private _attnOut;
275
284
  private _h1;
@@ -301,8 +310,7 @@ declare class NetworkTransformer {
301
310
  posEmb: EmbeddingMatrix;
302
311
  blocks: TransformerBlock[];
303
312
  outputProj: WeightMatrix;
304
- outputBias: number[];
305
- private outBiasOpts;
313
+ outputBias: BiasVector;
306
314
  constructor(seqLen: number, options?: NetworkTransformerOptions);
307
315
  predict(tokens: number[]): number[];
308
316
  train(tokens: number[], targets: number[], lr: number, mask?: boolean[]): number;
@@ -328,8 +336,7 @@ declare class NetworkTransformerRL {
328
336
  inputProj: WeightMatrix;
329
337
  blocks: TransformerBlock[];
330
338
  outputProj: WeightMatrix;
331
- outputBias: number[];
332
- private outBiasOpts;
339
+ outputBias: BiasVector;
333
340
  private _projected;
334
341
  private _pooling;
335
342
  private _argmax;
@@ -586,4 +593,4 @@ declare function validateArrayMinLength(arr: unknown, minLength: number, methodN
586
593
  declare function validate2DArray(arr: unknown, expectedRows: number, expectedCols: number, methodName: string): asserts arr is number[][];
587
594
  declare function validateNumber(value: unknown, methodName: string): asserts value is number;
588
595
 
589
- export { type Activation, Adam, AttentionHead, BatchNorm, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };
596
+ export { type Activation, Adam, AttentionHead, BatchNorm, BiasVector, ClipOptimizer, ClippedOptimizerFactory, Conv1D, DataLoader, type DataPair, Dropout, EmbeddingMatrix, GRULayer, LRScheduler, LSTMLayer, Layer, LayerNorm, ModelSaver, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, type Serializable, type TrainDataset, type TrainMetrics, type TrainableNetwork, type TrainableNetworkWithWeights, Trainer, type TrainerOptions, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, defaultOptimizer, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose, validate2DArray, validateArray, validateArrayMinLength, validateNumber };