@dniskav/neuron 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/index.d.mts +110 -1
- package/dist/index.d.ts +110 -1
- package/dist/index.js +583 -2
- package/dist/index.mjs +571 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -14,6 +14,13 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
|
|
|
14
14
|
| `NetworkN` | Deep network of arbitrary depth. Define your architecture as `[inputs, ...hidden, outputs]`. |
|
|
15
15
|
| `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
|
|
16
16
|
| `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
|
|
17
|
+
| `NetworkTransformer` | Full token-classification Transformer: embeddings → N blocks → per-token logits. |
|
|
18
|
+
| `TransformerBlock` | One Transformer block: multi-head attention + FFN + LayerNorm × 2 with residuals. |
|
|
19
|
+
| `MultiHeadAttention` | N parallel attention heads concatenated and projected to `d_model`. |
|
|
20
|
+
| `AttentionHead` | Single scaled dot-product self-attention head (Q / K / V projections + backprop). |
|
|
21
|
+
| `LayerNorm` | Layer normalization with learnable γ / β per feature. |
|
|
22
|
+
| `WeightMatrix` | 2D weight matrix with per-scalar Adam optimizers. |
|
|
23
|
+
| `EmbeddingMatrix` | Lookup-table embedding matrix with SGD updates. |
|
|
17
24
|
| `sigmoid` `relu` `tanh` `linear` | Built-in activation functions. |
|
|
18
25
|
| `SGD` `Momentum` `Adam` | Optimizers. Each instance tracks its own state per weight. |
|
|
19
26
|
| `mse` `crossEntropy` | Loss functions for evaluation and logging. |
|
|
@@ -230,6 +237,37 @@ npm run dev # watch mode
|
|
|
230
237
|
|
|
231
238
|
If you are an AI agent or LLM working with this codebase, read [AGENTS.md](AGENTS.md) first. It contains the full class hierarchy, design constraints, and what this library does not do.
|
|
232
239
|
|
|
240
|
+
### NetworkTransformer — self-attention over sequences
|
|
241
|
+
|
|
242
|
+
```ts
|
|
243
|
+
import { NetworkTransformer } from "@dniskav/neuron";
|
|
244
|
+
|
|
245
|
+
// Sudoku solver: 81 cells (tokens), values 0–9, predict digit 1–9 per cell
|
|
246
|
+
const net = new NetworkTransformer(81, {
|
|
247
|
+
vocabSize: 10, // digits 0–9
|
|
248
|
+
d_model: 64, // embedding / hidden dimension
|
|
249
|
+
nHeads: 4, // attention heads (d_k = d_model / nHeads = 16)
|
|
250
|
+
d_ff: 128, // FFN hidden size
|
|
251
|
+
nBlocks: 4, // number of transformer blocks
|
|
252
|
+
nClasses: 9, // output classes per token (digits 1–9)
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// tokens: 81 cell values (0 = empty)
|
|
256
|
+
const puzzle = [5,3,0, 0,7,0, 0,0,0, ...];
|
|
257
|
+
const targets = [...]; // 81*9 one-hot values
|
|
258
|
+
const mask = puzzle.map(v => v === 0); // only train on empty cells
|
|
259
|
+
|
|
260
|
+
const loss = net.train(puzzle, targets, 0.001, mask);
|
|
261
|
+
const logits = net.predict(puzzle); // 729 logits (81 × 9)
|
|
262
|
+
|
|
263
|
+
// Attention weights from all blocks for visualization
|
|
264
|
+
const weights = net.getAttentionWeights();
|
|
265
|
+
// weights[blockIdx][headIdx] → seqLen × seqLen matrix
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Each head in each block learns a different type of relationship (row, column,
|
|
269
|
+
3×3 box). The network figures this out by itself through training.
|
|
270
|
+
|
|
233
271
|
## Possible improvements
|
|
234
272
|
|
|
235
273
|
1. **Support for batches** in training to improve efficiency.
|
package/dist/index.d.mts
CHANGED
|
@@ -165,10 +165,119 @@ declare class NetworkLSTM {
|
|
|
165
165
|
setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
+
declare function matMul(A: number[][], B: number[][]): number[][];
|
|
169
|
+
declare function transpose(A: number[][]): number[][];
|
|
170
|
+
declare function softmax(row: number[]): number[];
|
|
171
|
+
declare function softmaxBackward(dS: number[], s: number[]): number[];
|
|
172
|
+
declare class WeightMatrix {
|
|
173
|
+
W: number[][];
|
|
174
|
+
private opts;
|
|
175
|
+
constructor(rows: number, cols: number);
|
|
176
|
+
update(dW: number[][], lr: number): void;
|
|
177
|
+
}
|
|
178
|
+
declare class EmbeddingMatrix {
|
|
179
|
+
W: number[][];
|
|
180
|
+
constructor(vocabSize: number, d_model: number);
|
|
181
|
+
get(idx: number): number[];
|
|
182
|
+
update(idx: number, grad: number[], lr: number): void;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
declare class AttentionHead {
|
|
186
|
+
readonly d_k: number;
|
|
187
|
+
readonly d_v: number;
|
|
188
|
+
Wq: WeightMatrix;
|
|
189
|
+
Wk: WeightMatrix;
|
|
190
|
+
Wv: WeightMatrix;
|
|
191
|
+
private cache;
|
|
192
|
+
constructor(d_model: number, d_k: number, d_v: number);
|
|
193
|
+
predict(X: number[][]): number[][];
|
|
194
|
+
backward(dOut: number[][], lr: number): number[][];
|
|
195
|
+
getAttentionWeights(): number[][] | null;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
declare class MultiHeadAttention {
|
|
199
|
+
readonly nHeads: number;
|
|
200
|
+
readonly d_model: number;
|
|
201
|
+
readonly d_k: number;
|
|
202
|
+
heads: AttentionHead[];
|
|
203
|
+
Wo: WeightMatrix;
|
|
204
|
+
private _concat;
|
|
205
|
+
constructor(d_model: number, nHeads: number);
|
|
206
|
+
predict(X: number[][]): number[][];
|
|
207
|
+
backward(dOut: number[][], lr: number): number[][];
|
|
208
|
+
getAttentionWeights(): (number[][] | null)[];
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
declare class LayerNorm {
|
|
212
|
+
gamma: number[];
|
|
213
|
+
beta: number[];
|
|
214
|
+
private readonly eps;
|
|
215
|
+
private _cache;
|
|
216
|
+
constructor(dim: number);
|
|
217
|
+
resetCache(seqLen: number): void;
|
|
218
|
+
predictOne(x: number[], pos: number): number[];
|
|
219
|
+
backwardOne(dOut: number[], pos: number, lr: number): number[];
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
interface TransformerBlockOptions {
|
|
223
|
+
d_model: number;
|
|
224
|
+
nHeads: number;
|
|
225
|
+
d_ff: number;
|
|
226
|
+
}
|
|
227
|
+
declare class TransformerBlock {
|
|
228
|
+
readonly d_model: number;
|
|
229
|
+
readonly d_ff: number;
|
|
230
|
+
attn: MultiHeadAttention;
|
|
231
|
+
norm1: LayerNorm;
|
|
232
|
+
norm2: LayerNorm;
|
|
233
|
+
ff1: WeightMatrix;
|
|
234
|
+
ff2: WeightMatrix;
|
|
235
|
+
b1: number[];
|
|
236
|
+
b2: number[];
|
|
237
|
+
private b1Opts;
|
|
238
|
+
private b2Opts;
|
|
239
|
+
private _X;
|
|
240
|
+
private _attnOut;
|
|
241
|
+
private _h1;
|
|
242
|
+
private _ff1Pre;
|
|
243
|
+
private _ff1Out;
|
|
244
|
+
private _ff2Out;
|
|
245
|
+
constructor({ d_model, nHeads, d_ff }: TransformerBlockOptions);
|
|
246
|
+
predict(X: number[][]): number[][];
|
|
247
|
+
backward(dOut: number[][], lr: number): number[][];
|
|
248
|
+
getAttentionWeights(): (number[][] | null)[];
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
interface NetworkTransformerOptions {
|
|
252
|
+
vocabSize?: number;
|
|
253
|
+
d_model?: number;
|
|
254
|
+
nHeads?: number;
|
|
255
|
+
d_ff?: number;
|
|
256
|
+
nBlocks?: number;
|
|
257
|
+
nClasses?: number;
|
|
258
|
+
}
|
|
259
|
+
declare class NetworkTransformer {
|
|
260
|
+
readonly seqLen: number;
|
|
261
|
+
readonly vocabSize: number;
|
|
262
|
+
readonly d_model: number;
|
|
263
|
+
readonly nClasses: number;
|
|
264
|
+
tokenEmb: EmbeddingMatrix;
|
|
265
|
+
posEmb: EmbeddingMatrix;
|
|
266
|
+
blocks: TransformerBlock[];
|
|
267
|
+
outputProj: WeightMatrix;
|
|
268
|
+
outputBias: number[];
|
|
269
|
+
private outBiasOpts;
|
|
270
|
+
constructor(seqLen: number, options?: NetworkTransformerOptions);
|
|
271
|
+
predict(tokens: number[]): number[];
|
|
272
|
+
train(tokens: number[], targets: number[], lr: number, mask?: boolean[]): number;
|
|
273
|
+
getAttentionWeights(): (number[][] | null)[][];
|
|
274
|
+
private _forward;
|
|
275
|
+
}
|
|
276
|
+
|
|
168
277
|
declare function mse(predicted: number[], actual: number[]): number;
|
|
169
278
|
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
170
279
|
declare function mseDelta(predicted: number, actual: number): number;
|
|
171
280
|
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
172
281
|
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
173
282
|
|
|
174
|
-
export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, mse, mseDelta, relu, sigmoid, tanh };
|
|
283
|
+
export { type Activation, Adam, AttentionHead, EmbeddingMatrix, LSTMLayer, Layer, LayerNorm, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose };
|
package/dist/index.d.ts
CHANGED
|
@@ -165,10 +165,119 @@ declare class NetworkLSTM {
|
|
|
165
165
|
setWeights(data: ReturnType<NetworkLSTM["getWeights"]>): void;
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
+
declare function matMul(A: number[][], B: number[][]): number[][];
|
|
169
|
+
declare function transpose(A: number[][]): number[][];
|
|
170
|
+
declare function softmax(row: number[]): number[];
|
|
171
|
+
declare function softmaxBackward(dS: number[], s: number[]): number[];
|
|
172
|
+
declare class WeightMatrix {
|
|
173
|
+
W: number[][];
|
|
174
|
+
private opts;
|
|
175
|
+
constructor(rows: number, cols: number);
|
|
176
|
+
update(dW: number[][], lr: number): void;
|
|
177
|
+
}
|
|
178
|
+
declare class EmbeddingMatrix {
|
|
179
|
+
W: number[][];
|
|
180
|
+
constructor(vocabSize: number, d_model: number);
|
|
181
|
+
get(idx: number): number[];
|
|
182
|
+
update(idx: number, grad: number[], lr: number): void;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
declare class AttentionHead {
|
|
186
|
+
readonly d_k: number;
|
|
187
|
+
readonly d_v: number;
|
|
188
|
+
Wq: WeightMatrix;
|
|
189
|
+
Wk: WeightMatrix;
|
|
190
|
+
Wv: WeightMatrix;
|
|
191
|
+
private cache;
|
|
192
|
+
constructor(d_model: number, d_k: number, d_v: number);
|
|
193
|
+
predict(X: number[][]): number[][];
|
|
194
|
+
backward(dOut: number[][], lr: number): number[][];
|
|
195
|
+
getAttentionWeights(): number[][] | null;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
declare class MultiHeadAttention {
|
|
199
|
+
readonly nHeads: number;
|
|
200
|
+
readonly d_model: number;
|
|
201
|
+
readonly d_k: number;
|
|
202
|
+
heads: AttentionHead[];
|
|
203
|
+
Wo: WeightMatrix;
|
|
204
|
+
private _concat;
|
|
205
|
+
constructor(d_model: number, nHeads: number);
|
|
206
|
+
predict(X: number[][]): number[][];
|
|
207
|
+
backward(dOut: number[][], lr: number): number[][];
|
|
208
|
+
getAttentionWeights(): (number[][] | null)[];
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
declare class LayerNorm {
|
|
212
|
+
gamma: number[];
|
|
213
|
+
beta: number[];
|
|
214
|
+
private readonly eps;
|
|
215
|
+
private _cache;
|
|
216
|
+
constructor(dim: number);
|
|
217
|
+
resetCache(seqLen: number): void;
|
|
218
|
+
predictOne(x: number[], pos: number): number[];
|
|
219
|
+
backwardOne(dOut: number[], pos: number, lr: number): number[];
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
interface TransformerBlockOptions {
|
|
223
|
+
d_model: number;
|
|
224
|
+
nHeads: number;
|
|
225
|
+
d_ff: number;
|
|
226
|
+
}
|
|
227
|
+
declare class TransformerBlock {
|
|
228
|
+
readonly d_model: number;
|
|
229
|
+
readonly d_ff: number;
|
|
230
|
+
attn: MultiHeadAttention;
|
|
231
|
+
norm1: LayerNorm;
|
|
232
|
+
norm2: LayerNorm;
|
|
233
|
+
ff1: WeightMatrix;
|
|
234
|
+
ff2: WeightMatrix;
|
|
235
|
+
b1: number[];
|
|
236
|
+
b2: number[];
|
|
237
|
+
private b1Opts;
|
|
238
|
+
private b2Opts;
|
|
239
|
+
private _X;
|
|
240
|
+
private _attnOut;
|
|
241
|
+
private _h1;
|
|
242
|
+
private _ff1Pre;
|
|
243
|
+
private _ff1Out;
|
|
244
|
+
private _ff2Out;
|
|
245
|
+
constructor({ d_model, nHeads, d_ff }: TransformerBlockOptions);
|
|
246
|
+
predict(X: number[][]): number[][];
|
|
247
|
+
backward(dOut: number[][], lr: number): number[][];
|
|
248
|
+
getAttentionWeights(): (number[][] | null)[];
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
interface NetworkTransformerOptions {
|
|
252
|
+
vocabSize?: number;
|
|
253
|
+
d_model?: number;
|
|
254
|
+
nHeads?: number;
|
|
255
|
+
d_ff?: number;
|
|
256
|
+
nBlocks?: number;
|
|
257
|
+
nClasses?: number;
|
|
258
|
+
}
|
|
259
|
+
declare class NetworkTransformer {
|
|
260
|
+
readonly seqLen: number;
|
|
261
|
+
readonly vocabSize: number;
|
|
262
|
+
readonly d_model: number;
|
|
263
|
+
readonly nClasses: number;
|
|
264
|
+
tokenEmb: EmbeddingMatrix;
|
|
265
|
+
posEmb: EmbeddingMatrix;
|
|
266
|
+
blocks: TransformerBlock[];
|
|
267
|
+
outputProj: WeightMatrix;
|
|
268
|
+
outputBias: number[];
|
|
269
|
+
private outBiasOpts;
|
|
270
|
+
constructor(seqLen: number, options?: NetworkTransformerOptions);
|
|
271
|
+
predict(tokens: number[]): number[];
|
|
272
|
+
train(tokens: number[], targets: number[], lr: number, mask?: boolean[]): number;
|
|
273
|
+
getAttentionWeights(): (number[][] | null)[][];
|
|
274
|
+
private _forward;
|
|
275
|
+
}
|
|
276
|
+
|
|
168
277
|
declare function mse(predicted: number[], actual: number[]): number;
|
|
169
278
|
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
170
279
|
declare function mseDelta(predicted: number, actual: number): number;
|
|
171
280
|
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
172
281
|
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
173
282
|
|
|
174
|
-
export { type Activation, Adam, LSTMLayer, Layer, Momentum, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, mse, mseDelta, relu, sigmoid, tanh };
|
|
283
|
+
export { type Activation, Adam, AttentionHead, EmbeddingMatrix, LSTMLayer, Layer, LayerNorm, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose };
|