@dniskav/neuron 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/index.d.mts +55 -1
- package/dist/index.d.ts +55 -1
- package/dist/index.js +168 -0
- package/dist/index.mjs +167 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,6 +15,7 @@ A minimal, dependency-free neural network library built from scratch in TypeScri
|
|
|
15
15
|
| `LSTMLayer` | Recurrent layer with persistent hidden and cell state. Learns sequences via BPTT. |
|
|
16
16
|
| `NetworkLSTM` | Wraps an `LSTMLayer` + dense layers. Maintains memory across steps within an episode. |
|
|
17
17
|
| `NetworkTransformer` | Full token-classification Transformer: embeddings → N blocks → per-token logits. |
|
|
18
|
+
| `NetworkTransformerRL` | Transformer for RL agents: continuous input projection → causal attention → Q-values. Remembers the last N steps. |
|
|
18
19
|
| `TransformerBlock` | One Transformer block: multi-head attention + FFN + LayerNorm × 2 with residuals. |
|
|
19
20
|
| `MultiHeadAttention` | N parallel attention heads concatenated and projected to `d_model`. |
|
|
20
21
|
| `AttentionHead` | Single scaled dot-product self-attention head (Q / K / V projections + backprop). |
|
|
@@ -269,6 +270,43 @@ const weights = net.getAttentionWeights();
|
|
|
269
270
|
Each head in each block learns a different type of relationship (row, column,
|
|
270
271
|
3×3 box). The network figures this out by itself through training.
|
|
271
272
|
|
|
273
|
+
### NetworkTransformerRL — Transformer for reinforcement learning
|
|
274
|
+
|
|
275
|
+
`NetworkTransformerRL` uses causal self-attention over a sliding window of past states to output Q-values. Unlike `NetworkLSTM`, the agent attends to specific past moments rather than compressing them into a single hidden vector.
|
|
276
|
+
|
|
277
|
+
```ts
|
|
278
|
+
import { NetworkTransformerRL } from "@dniskav/neuron";
|
|
279
|
+
|
|
280
|
+
// Agent sees the last 8 steps, each step is a 7-value sensor vector → 4 actions
|
|
281
|
+
const net = new NetworkTransformerRL(8, 7, {
|
|
282
|
+
d_model: 32,
|
|
283
|
+
nHeads: 2,
|
|
284
|
+
d_ff: 64,
|
|
285
|
+
nBlocks: 2,
|
|
286
|
+
nActions: 4,
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// Each step: feed the last N states as a sequence
|
|
290
|
+
const sequence = getLastNStates(); // number[][] — shape: [8, 7]
|
|
291
|
+
const qValues = net.predict(sequence); // number[4]
|
|
292
|
+
|
|
293
|
+
// Q-learning update: train toward Bellman target
|
|
294
|
+
const action = argmax(qValues);
|
|
295
|
+
const reward = env.step(action);
|
|
296
|
+
const targets = qValues.slice();
|
|
297
|
+
targets[action] = reward + 0.99 * Math.max(...net.predict(nextSequence));
|
|
298
|
+
|
|
299
|
+
const loss = net.train(sequence, targets, 0.001);
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
The last step in the sequence gets 2× pooling weight — the most recent state contributes more to the decision.
|
|
303
|
+
|
|
304
|
+
```ts
|
|
305
|
+
// Inspect what the agent is attending to
|
|
306
|
+
const attnWeights = net.getAttentionWeights();
|
|
307
|
+
// attnWeights[blockIdx][headIdx] → seqLen × seqLen matrix
|
|
308
|
+
```
|
|
309
|
+
|
|
272
310
|
## Possible improvements
|
|
273
311
|
|
|
274
312
|
1. **Support for batches** in training to improve efficiency and gradient stability.
|
package/dist/index.d.mts
CHANGED
|
@@ -274,10 +274,64 @@ declare class NetworkTransformer {
|
|
|
274
274
|
private _forward;
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
+
interface NetworkTransformerRLOptions {
|
|
278
|
+
d_model?: number;
|
|
279
|
+
nHeads?: number;
|
|
280
|
+
d_ff?: number;
|
|
281
|
+
nBlocks?: number;
|
|
282
|
+
nActions?: number;
|
|
283
|
+
}
|
|
284
|
+
declare class NetworkTransformerRL {
|
|
285
|
+
readonly seqLen: number;
|
|
286
|
+
readonly inputDim: number;
|
|
287
|
+
readonly d_model: number;
|
|
288
|
+
readonly nActions: number;
|
|
289
|
+
inputProj: WeightMatrix;
|
|
290
|
+
blocks: TransformerBlock[];
|
|
291
|
+
outputProj: WeightMatrix;
|
|
292
|
+
outputBias: number[];
|
|
293
|
+
private outBiasOpts;
|
|
294
|
+
private _projected;
|
|
295
|
+
constructor(seqLen: number, inputDim: number, options?: NetworkTransformerRLOptions);
|
|
296
|
+
predict(sequence: number[][]): number[];
|
|
297
|
+
train(sequence: number[][], target: number[], lr: number): number;
|
|
298
|
+
getAttentionWeights(): (number[][] | null)[][];
|
|
299
|
+
getWeights(): {
|
|
300
|
+
inputProj: number[][];
|
|
301
|
+
blocks: {
|
|
302
|
+
attn: {
|
|
303
|
+
heads: {
|
|
304
|
+
Wq: number[][];
|
|
305
|
+
Wk: number[][];
|
|
306
|
+
Wv: number[][];
|
|
307
|
+
}[];
|
|
308
|
+
Wo: number[][];
|
|
309
|
+
};
|
|
310
|
+
norm1: {
|
|
311
|
+
gamma: number[];
|
|
312
|
+
beta: number[];
|
|
313
|
+
};
|
|
314
|
+
norm2: {
|
|
315
|
+
gamma: number[];
|
|
316
|
+
beta: number[];
|
|
317
|
+
};
|
|
318
|
+
ff1: number[][];
|
|
319
|
+
ff2: number[][];
|
|
320
|
+
b1: number[];
|
|
321
|
+
b2: number[];
|
|
322
|
+
}[];
|
|
323
|
+
outputProj: number[][];
|
|
324
|
+
outputBias: number[];
|
|
325
|
+
};
|
|
326
|
+
setWeights(data: ReturnType<NetworkTransformerRL['getWeights']>): void;
|
|
327
|
+
private _forward;
|
|
328
|
+
private _pool;
|
|
329
|
+
}
|
|
330
|
+
|
|
277
331
|
declare function mse(predicted: number[], actual: number[]): number;
|
|
278
332
|
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
279
333
|
declare function mseDelta(predicted: number, actual: number): number;
|
|
280
334
|
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
281
335
|
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
282
336
|
|
|
283
|
-
export { type Activation, Adam, AttentionHead, EmbeddingMatrix, LSTMLayer, Layer, LayerNorm, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose };
|
|
337
|
+
export { type Activation, Adam, AttentionHead, EmbeddingMatrix, LSTMLayer, Layer, LayerNorm, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose };
|
package/dist/index.d.ts
CHANGED
|
@@ -274,10 +274,64 @@ declare class NetworkTransformer {
|
|
|
274
274
|
private _forward;
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
+
interface NetworkTransformerRLOptions {
|
|
278
|
+
d_model?: number;
|
|
279
|
+
nHeads?: number;
|
|
280
|
+
d_ff?: number;
|
|
281
|
+
nBlocks?: number;
|
|
282
|
+
nActions?: number;
|
|
283
|
+
}
|
|
284
|
+
declare class NetworkTransformerRL {
|
|
285
|
+
readonly seqLen: number;
|
|
286
|
+
readonly inputDim: number;
|
|
287
|
+
readonly d_model: number;
|
|
288
|
+
readonly nActions: number;
|
|
289
|
+
inputProj: WeightMatrix;
|
|
290
|
+
blocks: TransformerBlock[];
|
|
291
|
+
outputProj: WeightMatrix;
|
|
292
|
+
outputBias: number[];
|
|
293
|
+
private outBiasOpts;
|
|
294
|
+
private _projected;
|
|
295
|
+
constructor(seqLen: number, inputDim: number, options?: NetworkTransformerRLOptions);
|
|
296
|
+
predict(sequence: number[][]): number[];
|
|
297
|
+
train(sequence: number[][], target: number[], lr: number): number;
|
|
298
|
+
getAttentionWeights(): (number[][] | null)[][];
|
|
299
|
+
getWeights(): {
|
|
300
|
+
inputProj: number[][];
|
|
301
|
+
blocks: {
|
|
302
|
+
attn: {
|
|
303
|
+
heads: {
|
|
304
|
+
Wq: number[][];
|
|
305
|
+
Wk: number[][];
|
|
306
|
+
Wv: number[][];
|
|
307
|
+
}[];
|
|
308
|
+
Wo: number[][];
|
|
309
|
+
};
|
|
310
|
+
norm1: {
|
|
311
|
+
gamma: number[];
|
|
312
|
+
beta: number[];
|
|
313
|
+
};
|
|
314
|
+
norm2: {
|
|
315
|
+
gamma: number[];
|
|
316
|
+
beta: number[];
|
|
317
|
+
};
|
|
318
|
+
ff1: number[][];
|
|
319
|
+
ff2: number[][];
|
|
320
|
+
b1: number[];
|
|
321
|
+
b2: number[];
|
|
322
|
+
}[];
|
|
323
|
+
outputProj: number[][];
|
|
324
|
+
outputBias: number[];
|
|
325
|
+
};
|
|
326
|
+
setWeights(data: ReturnType<NetworkTransformerRL['getWeights']>): void;
|
|
327
|
+
private _forward;
|
|
328
|
+
private _pool;
|
|
329
|
+
}
|
|
330
|
+
|
|
277
331
|
declare function mse(predicted: number[], actual: number[]): number;
|
|
278
332
|
declare function crossEntropy(predicted: number[], actual: number[]): number;
|
|
279
333
|
declare function mseDelta(predicted: number, actual: number): number;
|
|
280
334
|
declare function crossEntropyDelta(predicted: number, actual: number): number;
|
|
281
335
|
declare function crossEntropyDeltaRaw(predicted: number, actual: number): number;
|
|
282
336
|
|
|
283
|
-
export { type Activation, Adam, AttentionHead, EmbeddingMatrix, LSTMLayer, Layer, LayerNorm, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose };
|
|
337
|
+
export { type Activation, Adam, AttentionHead, EmbeddingMatrix, LSTMLayer, Layer, LayerNorm, Momentum, MultiHeadAttention, Network, NetworkLSTM, type NetworkLSTMOptions, NetworkN, type NetworkNOptions, NetworkTransformer, type NetworkTransformerOptions, NetworkTransformerRL, type NetworkTransformerRLOptions, Neuron, NeuronN, type Optimizer, type OptimizerFactory, SGD, TransformerBlock, type TransformerBlockOptions, WeightMatrix, crossEntropy, crossEntropyDelta, crossEntropyDeltaRaw, elu, leakyRelu, linear, makeElu, makeLeakyRelu, matMul, mse, mseDelta, relu, sigmoid, softmax, softmaxBackward, tanh, transpose };
|
package/dist/index.js
CHANGED
|
@@ -32,6 +32,7 @@ __export(index_exports, {
|
|
|
32
32
|
NetworkLSTM: () => NetworkLSTM,
|
|
33
33
|
NetworkN: () => NetworkN,
|
|
34
34
|
NetworkTransformer: () => NetworkTransformer,
|
|
35
|
+
NetworkTransformerRL: () => NetworkTransformerRL,
|
|
35
36
|
Neuron: () => Neuron,
|
|
36
37
|
NeuronN: () => NeuronN,
|
|
37
38
|
SGD: () => SGD,
|
|
@@ -1098,6 +1099,172 @@ var NetworkTransformer = class {
|
|
|
1098
1099
|
}
|
|
1099
1100
|
};
|
|
1100
1101
|
|
|
1102
|
+
// src/NetworkTransformerRL.ts
|
|
1103
|
+
var NetworkTransformerRL = class {
|
|
1104
|
+
constructor(seqLen, inputDim, options = {}) {
|
|
1105
|
+
// Forward caches para backprop
|
|
1106
|
+
this._projected = null;
|
|
1107
|
+
const {
|
|
1108
|
+
d_model = 32,
|
|
1109
|
+
nHeads = 2,
|
|
1110
|
+
d_ff = 64,
|
|
1111
|
+
nBlocks = 2,
|
|
1112
|
+
nActions = 2
|
|
1113
|
+
} = options;
|
|
1114
|
+
this.seqLen = seqLen;
|
|
1115
|
+
this.inputDim = inputDim;
|
|
1116
|
+
this.d_model = d_model;
|
|
1117
|
+
this.nActions = nActions;
|
|
1118
|
+
this.inputProj = new WeightMatrix(d_model, inputDim);
|
|
1119
|
+
this.blocks = Array.from(
|
|
1120
|
+
{ length: nBlocks },
|
|
1121
|
+
() => new TransformerBlock({ d_model, nHeads, d_ff })
|
|
1122
|
+
);
|
|
1123
|
+
this.outputProj = new WeightMatrix(nActions, d_model);
|
|
1124
|
+
this.outputBias = new Array(nActions).fill(0);
|
|
1125
|
+
this.outBiasOpts = Array.from({ length: nActions }, () => new Adam());
|
|
1126
|
+
}
|
|
1127
|
+
// ── Forward ────────────────────────────────────────────────────────────────
|
|
1128
|
+
// sequence: seqLen × inputDim → nActions Q-values
|
|
1129
|
+
predict(sequence) {
|
|
1130
|
+
const h = this._forward(sequence);
|
|
1131
|
+
const pooled = this._pool(h);
|
|
1132
|
+
return this.outputProj.W.map(
|
|
1133
|
+
(row, c) => row.reduce((s, w, m) => s + w * pooled[m], this.outputBias[c])
|
|
1134
|
+
);
|
|
1135
|
+
}
|
|
1136
|
+
// ── Training ────────────────────────────────────────────────────────────────
|
|
1137
|
+
// sequence: seqLen × inputDim
|
|
1138
|
+
// target: nActions Q-values (one-hot style para Q-learning)
|
|
1139
|
+
// lr: learning rate
|
|
1140
|
+
// Returns: MSE loss
|
|
1141
|
+
train(sequence, target, lr) {
|
|
1142
|
+
const h = this._forward(sequence);
|
|
1143
|
+
const pooled = this._pool(h);
|
|
1144
|
+
const pred = this.outputProj.W.map(
|
|
1145
|
+
(row, c) => row.reduce((s, w, m) => s + w * pooled[m], this.outputBias[c])
|
|
1146
|
+
);
|
|
1147
|
+
const n = this.nActions;
|
|
1148
|
+
let loss = 0;
|
|
1149
|
+
for (let c = 0; c < n; c++) {
|
|
1150
|
+
const diff = pred[c] - target[c];
|
|
1151
|
+
loss += diff * diff;
|
|
1152
|
+
}
|
|
1153
|
+
loss /= n;
|
|
1154
|
+
const dPred = pred.map((p, c) => 2 * (p - target[c]) / n);
|
|
1155
|
+
const dPooled = Array.from(
|
|
1156
|
+
{ length: this.d_model },
|
|
1157
|
+
(_, m) => dPred.reduce((s, dp, c) => s + dp * this.outputProj.W[c][m], 0)
|
|
1158
|
+
);
|
|
1159
|
+
const dWout = Array.from(
|
|
1160
|
+
{ length: this.nActions },
|
|
1161
|
+
(_, c) => Array.from(
|
|
1162
|
+
{ length: this.d_model },
|
|
1163
|
+
(_2, m) => dPred[c] * pooled[m]
|
|
1164
|
+
)
|
|
1165
|
+
);
|
|
1166
|
+
const dBout = dPred.slice();
|
|
1167
|
+
this.outputProj.update(dWout, lr);
|
|
1168
|
+
for (let c = 0; c < this.nActions; c++)
|
|
1169
|
+
this.outputBias[c] = this.outBiasOpts[c].step(this.outputBias[c], dBout[c], lr);
|
|
1170
|
+
let dH = Array.from(
|
|
1171
|
+
{ length: this.seqLen },
|
|
1172
|
+
(_, i) => dPooled.map((v) => v / this.seqLen)
|
|
1173
|
+
// Gradiente dividido entre posiciones
|
|
1174
|
+
);
|
|
1175
|
+
for (let b = this.blocks.length - 1; b >= 0; b--)
|
|
1176
|
+
dH = this.blocks[b].backward(dH, lr);
|
|
1177
|
+
for (let i = 0; i < this.seqLen; i++) {
|
|
1178
|
+
const dInputProj = Array.from(
|
|
1179
|
+
{ length: this.d_model },
|
|
1180
|
+
(_, k) => Array.from(
|
|
1181
|
+
{ length: this.inputDim },
|
|
1182
|
+
(_2, m) => dH[i][k] * sequence[i][m]
|
|
1183
|
+
)
|
|
1184
|
+
);
|
|
1185
|
+
this.inputProj.update(dInputProj, lr);
|
|
1186
|
+
}
|
|
1187
|
+
return loss;
|
|
1188
|
+
}
|
|
1189
|
+
// Attention weights from every block for visualization.
|
|
1190
|
+
getAttentionWeights() {
|
|
1191
|
+
return this.blocks.map((b) => b.getAttentionWeights());
|
|
1192
|
+
}
|
|
1193
|
+
// ── Serialization ──────────────────────────────────────────────────────────
|
|
1194
|
+
getWeights() {
|
|
1195
|
+
return {
|
|
1196
|
+
inputProj: this.inputProj.W.map((r) => [...r]),
|
|
1197
|
+
blocks: this.blocks.map((b) => ({
|
|
1198
|
+
attn: {
|
|
1199
|
+
heads: b.attn.heads.map((h) => ({
|
|
1200
|
+
Wq: h.Wq.W.map((r) => [...r]),
|
|
1201
|
+
Wk: h.Wk.W.map((r) => [...r]),
|
|
1202
|
+
Wv: h.Wv.W.map((r) => [...r])
|
|
1203
|
+
})),
|
|
1204
|
+
Wo: b.attn.Wo.W.map((r) => [...r])
|
|
1205
|
+
},
|
|
1206
|
+
norm1: { gamma: [...b.norm1.gamma], beta: [...b.norm1.beta] },
|
|
1207
|
+
norm2: { gamma: [...b.norm2.gamma], beta: [...b.norm2.beta] },
|
|
1208
|
+
ff1: b.ff1.W.map((r) => [...r]),
|
|
1209
|
+
ff2: b.ff2.W.map((r) => [...r]),
|
|
1210
|
+
b1: [...b.b1],
|
|
1211
|
+
b2: [...b.b2]
|
|
1212
|
+
})),
|
|
1213
|
+
outputProj: this.outputProj.W.map((r) => [...r]),
|
|
1214
|
+
outputBias: [...this.outputBias]
|
|
1215
|
+
};
|
|
1216
|
+
}
|
|
1217
|
+
setWeights(data) {
|
|
1218
|
+
data.inputProj.forEach((row, i) => {
|
|
1219
|
+
this.inputProj.W[i] = [...row];
|
|
1220
|
+
});
|
|
1221
|
+
data.blocks.forEach((bd, b) => {
|
|
1222
|
+
const blk = this.blocks[b];
|
|
1223
|
+
bd.attn.heads.forEach((hd, h) => {
|
|
1224
|
+
blk.attn.heads[h].Wq.W = hd.Wq.map((r) => [...r]);
|
|
1225
|
+
blk.attn.heads[h].Wk.W = hd.Wk.map((r) => [...r]);
|
|
1226
|
+
blk.attn.heads[h].Wv.W = hd.Wv.map((r) => [...r]);
|
|
1227
|
+
});
|
|
1228
|
+
blk.attn.Wo.W = bd.attn.Wo.map((r) => [...r]);
|
|
1229
|
+
blk.norm1.gamma = [...bd.norm1.gamma];
|
|
1230
|
+
blk.norm1.beta = [...bd.norm1.beta];
|
|
1231
|
+
blk.norm2.gamma = [...bd.norm2.gamma];
|
|
1232
|
+
blk.norm2.beta = [...bd.norm2.beta];
|
|
1233
|
+
blk.ff1.W = bd.ff1.map((r) => [...r]);
|
|
1234
|
+
blk.ff2.W = bd.ff2.map((r) => [...r]);
|
|
1235
|
+
blk.b1 = [...bd.b1];
|
|
1236
|
+
blk.b2 = [...bd.b2];
|
|
1237
|
+
});
|
|
1238
|
+
this.outputProj.W = data.outputProj.map((r) => [...r]);
|
|
1239
|
+
this.outputBias = [...data.outputBias];
|
|
1240
|
+
}
|
|
1241
|
+
// ── Internal ────────────────────────────────────────────────────────────────
|
|
1242
|
+
_forward(sequence) {
|
|
1243
|
+
let h = sequence.map(
|
|
1244
|
+
(step) => this.inputProj.W.map(
|
|
1245
|
+
(row, k) => row.reduce((s, w, m) => s + w * step[m], 0)
|
|
1246
|
+
)
|
|
1247
|
+
);
|
|
1248
|
+
for (const block of this.blocks)
|
|
1249
|
+
h = block.predict(h);
|
|
1250
|
+
this._projected = h;
|
|
1251
|
+
return h;
|
|
1252
|
+
}
|
|
1253
|
+
_pool(h) {
|
|
1254
|
+
const weights = Array.from(
|
|
1255
|
+
{ length: this.seqLen },
|
|
1256
|
+
(_, i) => i === this.seqLen - 1 ? 2 : 1
|
|
1257
|
+
);
|
|
1258
|
+
const totalWeight = weights.reduce((a, b) => a + b, 0);
|
|
1259
|
+
return Array.from({ length: this.d_model }, (_, m) => {
|
|
1260
|
+
let sum = 0;
|
|
1261
|
+
for (let i = 0; i < this.seqLen; i++)
|
|
1262
|
+
sum += weights[i] * h[i][m];
|
|
1263
|
+
return sum / totalWeight;
|
|
1264
|
+
});
|
|
1265
|
+
}
|
|
1266
|
+
};
|
|
1267
|
+
|
|
1101
1268
|
// src/losses.ts
|
|
1102
1269
|
function mse(predicted, actual) {
|
|
1103
1270
|
return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
|
|
@@ -1134,6 +1301,7 @@ function crossEntropyDeltaRaw(predicted, actual) {
|
|
|
1134
1301
|
NetworkLSTM,
|
|
1135
1302
|
NetworkN,
|
|
1136
1303
|
NetworkTransformer,
|
|
1304
|
+
NetworkTransformerRL,
|
|
1137
1305
|
Neuron,
|
|
1138
1306
|
NeuronN,
|
|
1139
1307
|
SGD,
|
package/dist/index.mjs
CHANGED
|
@@ -1039,6 +1039,172 @@ var NetworkTransformer = class {
|
|
|
1039
1039
|
}
|
|
1040
1040
|
};
|
|
1041
1041
|
|
|
1042
|
+
// src/NetworkTransformerRL.ts
|
|
1043
|
+
var NetworkTransformerRL = class {
|
|
1044
|
+
constructor(seqLen, inputDim, options = {}) {
|
|
1045
|
+
// Forward caches para backprop
|
|
1046
|
+
this._projected = null;
|
|
1047
|
+
const {
|
|
1048
|
+
d_model = 32,
|
|
1049
|
+
nHeads = 2,
|
|
1050
|
+
d_ff = 64,
|
|
1051
|
+
nBlocks = 2,
|
|
1052
|
+
nActions = 2
|
|
1053
|
+
} = options;
|
|
1054
|
+
this.seqLen = seqLen;
|
|
1055
|
+
this.inputDim = inputDim;
|
|
1056
|
+
this.d_model = d_model;
|
|
1057
|
+
this.nActions = nActions;
|
|
1058
|
+
this.inputProj = new WeightMatrix(d_model, inputDim);
|
|
1059
|
+
this.blocks = Array.from(
|
|
1060
|
+
{ length: nBlocks },
|
|
1061
|
+
() => new TransformerBlock({ d_model, nHeads, d_ff })
|
|
1062
|
+
);
|
|
1063
|
+
this.outputProj = new WeightMatrix(nActions, d_model);
|
|
1064
|
+
this.outputBias = new Array(nActions).fill(0);
|
|
1065
|
+
this.outBiasOpts = Array.from({ length: nActions }, () => new Adam());
|
|
1066
|
+
}
|
|
1067
|
+
// ── Forward ────────────────────────────────────────────────────────────────
|
|
1068
|
+
// sequence: seqLen × inputDim → nActions Q-values
|
|
1069
|
+
predict(sequence) {
|
|
1070
|
+
const h = this._forward(sequence);
|
|
1071
|
+
const pooled = this._pool(h);
|
|
1072
|
+
return this.outputProj.W.map(
|
|
1073
|
+
(row, c) => row.reduce((s, w, m) => s + w * pooled[m], this.outputBias[c])
|
|
1074
|
+
);
|
|
1075
|
+
}
|
|
1076
|
+
// ── Training ────────────────────────────────────────────────────────────────
|
|
1077
|
+
// sequence: seqLen × inputDim
|
|
1078
|
+
// target: nActions Q-values (one-hot style para Q-learning)
|
|
1079
|
+
// lr: learning rate
|
|
1080
|
+
// Returns: MSE loss
|
|
1081
|
+
train(sequence, target, lr) {
|
|
1082
|
+
const h = this._forward(sequence);
|
|
1083
|
+
const pooled = this._pool(h);
|
|
1084
|
+
const pred = this.outputProj.W.map(
|
|
1085
|
+
(row, c) => row.reduce((s, w, m) => s + w * pooled[m], this.outputBias[c])
|
|
1086
|
+
);
|
|
1087
|
+
const n = this.nActions;
|
|
1088
|
+
let loss = 0;
|
|
1089
|
+
for (let c = 0; c < n; c++) {
|
|
1090
|
+
const diff = pred[c] - target[c];
|
|
1091
|
+
loss += diff * diff;
|
|
1092
|
+
}
|
|
1093
|
+
loss /= n;
|
|
1094
|
+
const dPred = pred.map((p, c) => 2 * (p - target[c]) / n);
|
|
1095
|
+
const dPooled = Array.from(
|
|
1096
|
+
{ length: this.d_model },
|
|
1097
|
+
(_, m) => dPred.reduce((s, dp, c) => s + dp * this.outputProj.W[c][m], 0)
|
|
1098
|
+
);
|
|
1099
|
+
const dWout = Array.from(
|
|
1100
|
+
{ length: this.nActions },
|
|
1101
|
+
(_, c) => Array.from(
|
|
1102
|
+
{ length: this.d_model },
|
|
1103
|
+
(_2, m) => dPred[c] * pooled[m]
|
|
1104
|
+
)
|
|
1105
|
+
);
|
|
1106
|
+
const dBout = dPred.slice();
|
|
1107
|
+
this.outputProj.update(dWout, lr);
|
|
1108
|
+
for (let c = 0; c < this.nActions; c++)
|
|
1109
|
+
this.outputBias[c] = this.outBiasOpts[c].step(this.outputBias[c], dBout[c], lr);
|
|
1110
|
+
let dH = Array.from(
|
|
1111
|
+
{ length: this.seqLen },
|
|
1112
|
+
(_, i) => dPooled.map((v) => v / this.seqLen)
|
|
1113
|
+
// Gradiente dividido entre posiciones
|
|
1114
|
+
);
|
|
1115
|
+
for (let b = this.blocks.length - 1; b >= 0; b--)
|
|
1116
|
+
dH = this.blocks[b].backward(dH, lr);
|
|
1117
|
+
for (let i = 0; i < this.seqLen; i++) {
|
|
1118
|
+
const dInputProj = Array.from(
|
|
1119
|
+
{ length: this.d_model },
|
|
1120
|
+
(_, k) => Array.from(
|
|
1121
|
+
{ length: this.inputDim },
|
|
1122
|
+
(_2, m) => dH[i][k] * sequence[i][m]
|
|
1123
|
+
)
|
|
1124
|
+
);
|
|
1125
|
+
this.inputProj.update(dInputProj, lr);
|
|
1126
|
+
}
|
|
1127
|
+
return loss;
|
|
1128
|
+
}
|
|
1129
|
+
// Attention weights from every block for visualization.
|
|
1130
|
+
getAttentionWeights() {
|
|
1131
|
+
return this.blocks.map((b) => b.getAttentionWeights());
|
|
1132
|
+
}
|
|
1133
|
+
// ── Serialization ──────────────────────────────────────────────────────────
|
|
1134
|
+
getWeights() {
|
|
1135
|
+
return {
|
|
1136
|
+
inputProj: this.inputProj.W.map((r) => [...r]),
|
|
1137
|
+
blocks: this.blocks.map((b) => ({
|
|
1138
|
+
attn: {
|
|
1139
|
+
heads: b.attn.heads.map((h) => ({
|
|
1140
|
+
Wq: h.Wq.W.map((r) => [...r]),
|
|
1141
|
+
Wk: h.Wk.W.map((r) => [...r]),
|
|
1142
|
+
Wv: h.Wv.W.map((r) => [...r])
|
|
1143
|
+
})),
|
|
1144
|
+
Wo: b.attn.Wo.W.map((r) => [...r])
|
|
1145
|
+
},
|
|
1146
|
+
norm1: { gamma: [...b.norm1.gamma], beta: [...b.norm1.beta] },
|
|
1147
|
+
norm2: { gamma: [...b.norm2.gamma], beta: [...b.norm2.beta] },
|
|
1148
|
+
ff1: b.ff1.W.map((r) => [...r]),
|
|
1149
|
+
ff2: b.ff2.W.map((r) => [...r]),
|
|
1150
|
+
b1: [...b.b1],
|
|
1151
|
+
b2: [...b.b2]
|
|
1152
|
+
})),
|
|
1153
|
+
outputProj: this.outputProj.W.map((r) => [...r]),
|
|
1154
|
+
outputBias: [...this.outputBias]
|
|
1155
|
+
};
|
|
1156
|
+
}
|
|
1157
|
+
setWeights(data) {
|
|
1158
|
+
data.inputProj.forEach((row, i) => {
|
|
1159
|
+
this.inputProj.W[i] = [...row];
|
|
1160
|
+
});
|
|
1161
|
+
data.blocks.forEach((bd, b) => {
|
|
1162
|
+
const blk = this.blocks[b];
|
|
1163
|
+
bd.attn.heads.forEach((hd, h) => {
|
|
1164
|
+
blk.attn.heads[h].Wq.W = hd.Wq.map((r) => [...r]);
|
|
1165
|
+
blk.attn.heads[h].Wk.W = hd.Wk.map((r) => [...r]);
|
|
1166
|
+
blk.attn.heads[h].Wv.W = hd.Wv.map((r) => [...r]);
|
|
1167
|
+
});
|
|
1168
|
+
blk.attn.Wo.W = bd.attn.Wo.map((r) => [...r]);
|
|
1169
|
+
blk.norm1.gamma = [...bd.norm1.gamma];
|
|
1170
|
+
blk.norm1.beta = [...bd.norm1.beta];
|
|
1171
|
+
blk.norm2.gamma = [...bd.norm2.gamma];
|
|
1172
|
+
blk.norm2.beta = [...bd.norm2.beta];
|
|
1173
|
+
blk.ff1.W = bd.ff1.map((r) => [...r]);
|
|
1174
|
+
blk.ff2.W = bd.ff2.map((r) => [...r]);
|
|
1175
|
+
blk.b1 = [...bd.b1];
|
|
1176
|
+
blk.b2 = [...bd.b2];
|
|
1177
|
+
});
|
|
1178
|
+
this.outputProj.W = data.outputProj.map((r) => [...r]);
|
|
1179
|
+
this.outputBias = [...data.outputBias];
|
|
1180
|
+
}
|
|
1181
|
+
// ── Internal ────────────────────────────────────────────────────────────────
|
|
1182
|
+
_forward(sequence) {
|
|
1183
|
+
let h = sequence.map(
|
|
1184
|
+
(step) => this.inputProj.W.map(
|
|
1185
|
+
(row, k) => row.reduce((s, w, m) => s + w * step[m], 0)
|
|
1186
|
+
)
|
|
1187
|
+
);
|
|
1188
|
+
for (const block of this.blocks)
|
|
1189
|
+
h = block.predict(h);
|
|
1190
|
+
this._projected = h;
|
|
1191
|
+
return h;
|
|
1192
|
+
}
|
|
1193
|
+
_pool(h) {
|
|
1194
|
+
const weights = Array.from(
|
|
1195
|
+
{ length: this.seqLen },
|
|
1196
|
+
(_, i) => i === this.seqLen - 1 ? 2 : 1
|
|
1197
|
+
);
|
|
1198
|
+
const totalWeight = weights.reduce((a, b) => a + b, 0);
|
|
1199
|
+
return Array.from({ length: this.d_model }, (_, m) => {
|
|
1200
|
+
let sum = 0;
|
|
1201
|
+
for (let i = 0; i < this.seqLen; i++)
|
|
1202
|
+
sum += weights[i] * h[i][m];
|
|
1203
|
+
return sum / totalWeight;
|
|
1204
|
+
});
|
|
1205
|
+
}
|
|
1206
|
+
};
|
|
1207
|
+
|
|
1042
1208
|
// src/losses.ts
|
|
1043
1209
|
function mse(predicted, actual) {
|
|
1044
1210
|
return predicted.reduce((sum, p, i) => sum + (actual[i] - p) ** 2, 0) / predicted.length;
|
|
@@ -1074,6 +1240,7 @@ export {
|
|
|
1074
1240
|
NetworkLSTM,
|
|
1075
1241
|
NetworkN,
|
|
1076
1242
|
NetworkTransformer,
|
|
1243
|
+
NetworkTransformerRL,
|
|
1077
1244
|
Neuron,
|
|
1078
1245
|
NeuronN,
|
|
1079
1246
|
SGD,
|
package/package.json
CHANGED