moflo 4.8.32 → 4.8.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/generate-code-map.mjs +955 -955
- package/bin/index-guidance.mjs +905 -905
- package/bin/index-tests.mjs +728 -728
- package/bin/setup-project.mjs +252 -252
- package/package.json +10 -5
- package/src/@claude-flow/cli/dist/src/commands/doctor.js +1339 -1107
- package/src/@claude-flow/cli/dist/src/index.js +2 -18
- package/src/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.js +17 -0
- package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +4 -7
- package/src/@claude-flow/cli/dist/src/version.js +6 -0
- package/src/@claude-flow/cli/package.json +1 -1
- package/src/@claude-flow/neural/README.md +260 -0
- package/src/@claude-flow/neural/dist/algorithms/a2c.js +361 -0
- package/src/@claude-flow/neural/dist/algorithms/curiosity.js +392 -0
- package/src/@claude-flow/neural/dist/algorithms/decision-transformer.js +415 -0
- package/src/@claude-flow/neural/dist/algorithms/dqn.js +303 -0
- package/src/@claude-flow/neural/dist/algorithms/index.js +74 -0
- package/src/@claude-flow/neural/dist/algorithms/ppo.js +331 -0
- package/src/@claude-flow/neural/dist/algorithms/q-learning.js +259 -0
- package/src/@claude-flow/neural/dist/algorithms/sarsa.js +297 -0
- package/src/@claude-flow/neural/dist/application/index.js +7 -0
- package/src/@claude-flow/neural/dist/application/services/neural-application-service.js +161 -0
- package/src/@claude-flow/neural/dist/domain/entities/pattern.js +134 -0
- package/src/@claude-flow/neural/dist/domain/index.js +8 -0
- package/src/@claude-flow/neural/dist/domain/services/learning-service.js +195 -0
- package/src/@claude-flow/neural/dist/index.js +201 -0
- package/src/@claude-flow/neural/dist/modes/balanced.js +234 -0
- package/src/@claude-flow/neural/dist/modes/base.js +77 -0
- package/src/@claude-flow/neural/dist/modes/batch.js +316 -0
- package/src/@claude-flow/neural/dist/modes/edge.js +310 -0
- package/src/@claude-flow/neural/dist/modes/index.js +13 -0
- package/src/@claude-flow/neural/dist/modes/real-time.js +196 -0
- package/src/@claude-flow/neural/dist/modes/research.js +389 -0
- package/src/@claude-flow/neural/dist/pattern-learner.js +603 -0
- package/src/@claude-flow/neural/dist/reasoning-bank.js +993 -0
- package/src/@claude-flow/neural/dist/reasoningbank-adapter.js +463 -0
- package/src/@claude-flow/neural/dist/sona-integration.js +326 -0
- package/src/@claude-flow/neural/dist/sona-manager.js +695 -0
- package/src/@claude-flow/neural/dist/types.js +11 -0
- package/src/@claude-flow/neural/package.json +26 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decision Transformer
|
|
3
|
+
*
|
|
4
|
+
* Implements sequence modeling approach for RL:
|
|
5
|
+
* - Trajectory as sequence: (s, a, R, s, a, R, ...)
|
|
6
|
+
* - Return-conditioned generation
|
|
7
|
+
* - Causal transformer attention
|
|
8
|
+
* - Offline RL from trajectories
|
|
9
|
+
*
|
|
10
|
+
* Performance Target: <10ms per forward pass
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Default Decision Transformer configuration
|
|
14
|
+
*/
|
|
15
|
+
export const DEFAULT_DT_CONFIG = {
|
|
16
|
+
algorithm: 'decision-transformer',
|
|
17
|
+
learningRate: 0.0001,
|
|
18
|
+
gamma: 0.99,
|
|
19
|
+
entropyCoef: 0,
|
|
20
|
+
valueLossCoef: 0,
|
|
21
|
+
maxGradNorm: 1.0,
|
|
22
|
+
epochs: 1,
|
|
23
|
+
miniBatchSize: 64,
|
|
24
|
+
contextLength: 20,
|
|
25
|
+
numHeads: 4,
|
|
26
|
+
numLayers: 2,
|
|
27
|
+
hiddenDim: 64,
|
|
28
|
+
embeddingDim: 32,
|
|
29
|
+
dropout: 0.1,
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Decision Transformer Implementation
|
|
33
|
+
*/
|
|
34
|
+
export class DecisionTransformer {
|
|
35
|
+
config;
|
|
36
|
+
// Embeddings
|
|
37
|
+
stateEmbed;
|
|
38
|
+
actionEmbed;
|
|
39
|
+
returnEmbed;
|
|
40
|
+
posEmbed;
|
|
41
|
+
// Transformer layers (simplified)
|
|
42
|
+
attentionWeights;
|
|
43
|
+
ffnWeights;
|
|
44
|
+
// Output head
|
|
45
|
+
actionHead;
|
|
46
|
+
// Training buffer
|
|
47
|
+
trajectoryBuffer = [];
|
|
48
|
+
// Dimensions
|
|
49
|
+
stateDim = 768;
|
|
50
|
+
numActions = 4;
|
|
51
|
+
// Statistics
|
|
52
|
+
updateCount = 0;
|
|
53
|
+
avgLoss = 0;
|
|
54
|
+
constructor(config = {}) {
|
|
55
|
+
this.config = { ...DEFAULT_DT_CONFIG, ...config };
|
|
56
|
+
// Initialize embeddings
|
|
57
|
+
this.stateEmbed = this.initEmbedding(this.stateDim, this.config.embeddingDim);
|
|
58
|
+
this.actionEmbed = this.initEmbedding(this.numActions, this.config.embeddingDim);
|
|
59
|
+
this.returnEmbed = this.initEmbedding(1, this.config.embeddingDim);
|
|
60
|
+
this.posEmbed = this.initEmbedding(this.config.contextLength * 3, this.config.embeddingDim);
|
|
61
|
+
// Initialize transformer layers
|
|
62
|
+
this.attentionWeights = [];
|
|
63
|
+
this.ffnWeights = [];
|
|
64
|
+
for (let l = 0; l < this.config.numLayers; l++) {
|
|
65
|
+
// Attention: Q, K, V, O projections
|
|
66
|
+
this.attentionWeights.push([
|
|
67
|
+
this.initWeight(this.config.embeddingDim, this.config.hiddenDim), // Q
|
|
68
|
+
this.initWeight(this.config.embeddingDim, this.config.hiddenDim), // K
|
|
69
|
+
this.initWeight(this.config.embeddingDim, this.config.hiddenDim), // V
|
|
70
|
+
this.initWeight(this.config.hiddenDim, this.config.embeddingDim), // O
|
|
71
|
+
]);
|
|
72
|
+
// FFN: up and down projections
|
|
73
|
+
this.ffnWeights.push([
|
|
74
|
+
this.initWeight(this.config.embeddingDim, this.config.hiddenDim * 4),
|
|
75
|
+
this.initWeight(this.config.hiddenDim * 4, this.config.embeddingDim),
|
|
76
|
+
]);
|
|
77
|
+
}
|
|
78
|
+
// Action prediction head
|
|
79
|
+
this.actionHead = this.initWeight(this.config.embeddingDim, this.numActions);
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Add trajectory for training
|
|
83
|
+
*/
|
|
84
|
+
addTrajectory(trajectory) {
|
|
85
|
+
if (trajectory.isComplete && trajectory.steps.length > 0) {
|
|
86
|
+
this.trajectoryBuffer.push(trajectory);
|
|
87
|
+
// Keep buffer bounded
|
|
88
|
+
if (this.trajectoryBuffer.length > 1000) {
|
|
89
|
+
this.trajectoryBuffer = this.trajectoryBuffer.slice(-1000);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Train on buffered trajectories
|
|
95
|
+
* Target: <10ms per batch
|
|
96
|
+
*/
|
|
97
|
+
train() {
|
|
98
|
+
const startTime = performance.now();
|
|
99
|
+
if (this.trajectoryBuffer.length === 0) {
|
|
100
|
+
return { loss: 0, accuracy: 0 };
|
|
101
|
+
}
|
|
102
|
+
// Sample mini-batch of trajectories
|
|
103
|
+
const batchSize = Math.min(this.config.miniBatchSize, this.trajectoryBuffer.length);
|
|
104
|
+
const batch = [];
|
|
105
|
+
for (let i = 0; i < batchSize; i++) {
|
|
106
|
+
const idx = Math.floor(Math.random() * this.trajectoryBuffer.length);
|
|
107
|
+
batch.push(this.trajectoryBuffer[idx]);
|
|
108
|
+
}
|
|
109
|
+
let totalLoss = 0;
|
|
110
|
+
let correct = 0;
|
|
111
|
+
let total = 0;
|
|
112
|
+
for (const trajectory of batch) {
|
|
113
|
+
// Create sequence from trajectory
|
|
114
|
+
const sequence = this.createSequence(trajectory);
|
|
115
|
+
if (sequence.length < 2)
|
|
116
|
+
continue;
|
|
117
|
+
// Forward pass and compute loss
|
|
118
|
+
for (let t = 1; t < sequence.length; t++) {
|
|
119
|
+
// Use context up to position t
|
|
120
|
+
const context = sequence.slice(Math.max(0, t - this.config.contextLength), t);
|
|
121
|
+
const target = sequence[t];
|
|
122
|
+
// Predict action
|
|
123
|
+
const predicted = this.forward(context);
|
|
124
|
+
const predictedAction = this.argmax(predicted);
|
|
125
|
+
// Cross-entropy loss
|
|
126
|
+
const loss = -Math.log(predicted[target.action] + 1e-8);
|
|
127
|
+
totalLoss += loss;
|
|
128
|
+
if (predictedAction === target.action) {
|
|
129
|
+
correct++;
|
|
130
|
+
}
|
|
131
|
+
total++;
|
|
132
|
+
// Gradient update (simplified)
|
|
133
|
+
this.updateWeights(context, target.action, predicted);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
this.updateCount++;
|
|
137
|
+
this.avgLoss = total > 0 ? totalLoss / total : 0;
|
|
138
|
+
const elapsed = performance.now() - startTime;
|
|
139
|
+
if (elapsed > 10) {
|
|
140
|
+
console.warn(`DT training exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
|
|
141
|
+
}
|
|
142
|
+
return {
|
|
143
|
+
loss: this.avgLoss,
|
|
144
|
+
accuracy: total > 0 ? correct / total : 0,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Get action conditioned on target return
|
|
149
|
+
*/
|
|
150
|
+
getAction(states, actions, targetReturn) {
|
|
151
|
+
// Build sequence
|
|
152
|
+
const sequence = [];
|
|
153
|
+
let returnToGo = targetReturn;
|
|
154
|
+
for (let i = 0; i < states.length; i++) {
|
|
155
|
+
sequence.push({
|
|
156
|
+
returnToGo,
|
|
157
|
+
state: states[i],
|
|
158
|
+
action: actions[i] ?? 0,
|
|
159
|
+
timestep: i,
|
|
160
|
+
});
|
|
161
|
+
// Decrease return-to-go by estimated reward
|
|
162
|
+
if (i > 0) {
|
|
163
|
+
returnToGo -= 0.1; // Default reward decrement for inference
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// Forward pass
|
|
167
|
+
const logits = this.forward(sequence);
|
|
168
|
+
return this.argmax(logits);
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Forward pass through transformer
|
|
172
|
+
*/
|
|
173
|
+
forward(sequence) {
|
|
174
|
+
// Embed sequence elements
|
|
175
|
+
const seqLen = Math.min(sequence.length, this.config.contextLength);
|
|
176
|
+
const embedDim = this.config.embeddingDim;
|
|
177
|
+
// Initialize hidden states (simplified: stack all modalities)
|
|
178
|
+
const hidden = new Float32Array(seqLen * 3 * embedDim);
|
|
179
|
+
for (let t = 0; t < seqLen; t++) {
|
|
180
|
+
const entry = sequence[sequence.length - seqLen + t];
|
|
181
|
+
const baseIdx = t * 3 * embedDim;
|
|
182
|
+
// Embed return
|
|
183
|
+
for (let d = 0; d < embedDim; d++) {
|
|
184
|
+
hidden[baseIdx + d] = entry.returnToGo * this.returnEmbed[d];
|
|
185
|
+
}
|
|
186
|
+
// Embed state
|
|
187
|
+
for (let d = 0; d < embedDim; d++) {
|
|
188
|
+
let stateSum = 0;
|
|
189
|
+
for (let s = 0; s < Math.min(entry.state.length, this.stateDim); s++) {
|
|
190
|
+
stateSum += entry.state[s] * this.stateEmbed[s * embedDim + d];
|
|
191
|
+
}
|
|
192
|
+
hidden[baseIdx + embedDim + d] = stateSum;
|
|
193
|
+
}
|
|
194
|
+
// Embed action
|
|
195
|
+
for (let d = 0; d < embedDim; d++) {
|
|
196
|
+
hidden[baseIdx + 2 * embedDim + d] = this.actionEmbed[entry.action * embedDim + d];
|
|
197
|
+
}
|
|
198
|
+
// Add positional embedding
|
|
199
|
+
for (let d = 0; d < 3 * embedDim; d++) {
|
|
200
|
+
hidden[baseIdx + d] += this.posEmbed[t * 3 * embedDim + d] || 0;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Apply transformer layers
|
|
204
|
+
for (let l = 0; l < this.config.numLayers; l++) {
|
|
205
|
+
hidden.set(this.transformerLayer(hidden, seqLen * 3, l));
|
|
206
|
+
}
|
|
207
|
+
// Extract last state position embedding for action prediction
|
|
208
|
+
const lastStateIdx = (seqLen * 3 - 2) * embedDim;
|
|
209
|
+
const lastState = hidden.slice(lastStateIdx, lastStateIdx + embedDim);
|
|
210
|
+
// Action prediction
|
|
211
|
+
const logits = new Float32Array(this.numActions);
|
|
212
|
+
for (let a = 0; a < this.numActions; a++) {
|
|
213
|
+
let sum = 0;
|
|
214
|
+
for (let d = 0; d < embedDim; d++) {
|
|
215
|
+
sum += lastState[d] * this.actionHead[d * this.numActions + a];
|
|
216
|
+
}
|
|
217
|
+
logits[a] = sum;
|
|
218
|
+
}
|
|
219
|
+
return this.softmax(logits);
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Get statistics
|
|
223
|
+
*/
|
|
224
|
+
getStats() {
|
|
225
|
+
return {
|
|
226
|
+
updateCount: this.updateCount,
|
|
227
|
+
bufferSize: this.trajectoryBuffer.length,
|
|
228
|
+
avgLoss: this.avgLoss,
|
|
229
|
+
contextLength: this.config.contextLength,
|
|
230
|
+
numLayers: this.config.numLayers,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
// ==========================================================================
|
|
234
|
+
// Private Methods
|
|
235
|
+
// ==========================================================================
|
|
236
|
+
initEmbedding(inputDim, outputDim) {
|
|
237
|
+
const embed = new Float32Array(inputDim * outputDim);
|
|
238
|
+
const scale = Math.sqrt(2 / inputDim);
|
|
239
|
+
for (let i = 0; i < embed.length; i++) {
|
|
240
|
+
embed[i] = (Math.random() - 0.5) * scale;
|
|
241
|
+
}
|
|
242
|
+
return embed;
|
|
243
|
+
}
|
|
244
|
+
initWeight(inputDim, outputDim) {
|
|
245
|
+
const weight = new Float32Array(inputDim * outputDim);
|
|
246
|
+
const scale = Math.sqrt(2 / inputDim);
|
|
247
|
+
for (let i = 0; i < weight.length; i++) {
|
|
248
|
+
weight[i] = (Math.random() - 0.5) * scale;
|
|
249
|
+
}
|
|
250
|
+
return weight;
|
|
251
|
+
}
|
|
252
|
+
createSequence(trajectory) {
|
|
253
|
+
const sequence = [];
|
|
254
|
+
// Compute returns-to-go
|
|
255
|
+
const rewards = trajectory.steps.map(s => s.reward);
|
|
256
|
+
const returnsToGo = new Array(rewards.length).fill(0);
|
|
257
|
+
let cumReturn = 0;
|
|
258
|
+
for (let t = rewards.length - 1; t >= 0; t--) {
|
|
259
|
+
cumReturn = rewards[t] + this.config.gamma * cumReturn;
|
|
260
|
+
returnsToGo[t] = cumReturn;
|
|
261
|
+
}
|
|
262
|
+
// Create sequence entries
|
|
263
|
+
for (let t = 0; t < trajectory.steps.length; t++) {
|
|
264
|
+
sequence.push({
|
|
265
|
+
returnToGo: returnsToGo[t],
|
|
266
|
+
state: trajectory.steps[t].stateAfter,
|
|
267
|
+
action: this.hashAction(trajectory.steps[t].action),
|
|
268
|
+
timestep: t,
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
return sequence;
|
|
272
|
+
}
|
|
273
|
+
transformerLayer(hidden, seqLen, layerIdx) {
|
|
274
|
+
const embedDim = this.config.embeddingDim;
|
|
275
|
+
const hiddenDim = this.config.hiddenDim;
|
|
276
|
+
const numHeads = this.config.numHeads;
|
|
277
|
+
const headDim = hiddenDim / numHeads;
|
|
278
|
+
const output = new Float32Array(hidden.length);
|
|
279
|
+
// Self-attention (simplified causal)
|
|
280
|
+
const [Wq, Wk, Wv, Wo] = this.attentionWeights[layerIdx];
|
|
281
|
+
// Compute Q, K, V for all positions
|
|
282
|
+
const Q = new Float32Array(seqLen * hiddenDim);
|
|
283
|
+
const K = new Float32Array(seqLen * hiddenDim);
|
|
284
|
+
const V = new Float32Array(seqLen * hiddenDim);
|
|
285
|
+
for (let pos = 0; pos < seqLen; pos++) {
|
|
286
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
287
|
+
let qSum = 0, kSum = 0, vSum = 0;
|
|
288
|
+
for (let d = 0; d < embedDim; d++) {
|
|
289
|
+
const hiddenVal = hidden[pos * embedDim + d];
|
|
290
|
+
qSum += hiddenVal * Wq[d * hiddenDim + h];
|
|
291
|
+
kSum += hiddenVal * Wk[d * hiddenDim + h];
|
|
292
|
+
vSum += hiddenVal * Wv[d * hiddenDim + h];
|
|
293
|
+
}
|
|
294
|
+
Q[pos * hiddenDim + h] = qSum;
|
|
295
|
+
K[pos * hiddenDim + h] = kSum;
|
|
296
|
+
V[pos * hiddenDim + h] = vSum;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
// Causal attention
|
|
300
|
+
for (let pos = 0; pos < seqLen; pos++) {
|
|
301
|
+
// Compute attention scores for current position
|
|
302
|
+
const scores = new Float32Array(pos + 1);
|
|
303
|
+
for (let k = 0; k <= pos; k++) {
|
|
304
|
+
let score = 0;
|
|
305
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
306
|
+
score += Q[pos * hiddenDim + h] * K[k * hiddenDim + h];
|
|
307
|
+
}
|
|
308
|
+
scores[k] = score / Math.sqrt(headDim);
|
|
309
|
+
}
|
|
310
|
+
// Softmax
|
|
311
|
+
const maxScore = Math.max(...scores);
|
|
312
|
+
let sumExp = 0;
|
|
313
|
+
for (let k = 0; k <= pos; k++) {
|
|
314
|
+
scores[k] = Math.exp(scores[k] - maxScore);
|
|
315
|
+
sumExp += scores[k];
|
|
316
|
+
}
|
|
317
|
+
for (let k = 0; k <= pos; k++) {
|
|
318
|
+
scores[k] /= sumExp;
|
|
319
|
+
}
|
|
320
|
+
// Weighted sum of values
|
|
321
|
+
const attnOut = new Float32Array(hiddenDim);
|
|
322
|
+
for (let k = 0; k <= pos; k++) {
|
|
323
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
324
|
+
attnOut[h] += scores[k] * V[k * hiddenDim + h];
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
// Output projection
|
|
328
|
+
for (let d = 0; d < embedDim; d++) {
|
|
329
|
+
let sum = hidden[pos * embedDim + d]; // Residual
|
|
330
|
+
for (let h = 0; h < hiddenDim; h++) {
|
|
331
|
+
sum += attnOut[h] * Wo[h * embedDim + d];
|
|
332
|
+
}
|
|
333
|
+
output[pos * embedDim + d] = sum;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
// FFN with residual
|
|
337
|
+
const [Wup, Wdown] = this.ffnWeights[layerIdx];
|
|
338
|
+
const ffnHiddenDim = hiddenDim * 4;
|
|
339
|
+
for (let pos = 0; pos < seqLen; pos++) {
|
|
340
|
+
// Up projection + GELU
|
|
341
|
+
const ffnHidden = new Float32Array(ffnHiddenDim);
|
|
342
|
+
for (let h = 0; h < ffnHiddenDim; h++) {
|
|
343
|
+
let sum = 0;
|
|
344
|
+
for (let d = 0; d < embedDim; d++) {
|
|
345
|
+
sum += output[pos * embedDim + d] * Wup[d * ffnHiddenDim + h];
|
|
346
|
+
}
|
|
347
|
+
// GELU approximation
|
|
348
|
+
ffnHidden[h] = sum * 0.5 * (1 + Math.tanh(0.7978845608 * (sum + 0.044715 * sum * sum * sum)));
|
|
349
|
+
}
|
|
350
|
+
// Down projection
|
|
351
|
+
for (let d = 0; d < embedDim; d++) {
|
|
352
|
+
let sum = output[pos * embedDim + d]; // Residual
|
|
353
|
+
for (let h = 0; h < ffnHiddenDim; h++) {
|
|
354
|
+
sum += ffnHidden[h] * Wdown[h * embedDim + d];
|
|
355
|
+
}
|
|
356
|
+
output[pos * embedDim + d] = sum;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return output;
|
|
360
|
+
}
|
|
361
|
+
updateWeights(context, targetAction, predicted) {
|
|
362
|
+
// Simplified gradient update for action head
|
|
363
|
+
const lr = this.config.learningRate;
|
|
364
|
+
const embedDim = this.config.embeddingDim;
|
|
365
|
+
// Gradient of cross-entropy
|
|
366
|
+
const grad = new Float32Array(this.numActions);
|
|
367
|
+
for (let a = 0; a < this.numActions; a++) {
|
|
368
|
+
grad[a] = predicted[a] - (a === targetAction ? 1 : 0);
|
|
369
|
+
}
|
|
370
|
+
// Update action head (simplified)
|
|
371
|
+
for (let d = 0; d < embedDim; d++) {
|
|
372
|
+
for (let a = 0; a < this.numActions; a++) {
|
|
373
|
+
this.actionHead[d * this.numActions + a] -= lr * grad[a] * 0.1;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
softmax(logits) {
|
|
378
|
+
const max = Math.max(...logits);
|
|
379
|
+
const exps = new Float32Array(logits.length);
|
|
380
|
+
let sum = 0;
|
|
381
|
+
for (let i = 0; i < logits.length; i++) {
|
|
382
|
+
exps[i] = Math.exp(logits[i] - max);
|
|
383
|
+
sum += exps[i];
|
|
384
|
+
}
|
|
385
|
+
for (let i = 0; i < exps.length; i++) {
|
|
386
|
+
exps[i] /= sum;
|
|
387
|
+
}
|
|
388
|
+
return exps;
|
|
389
|
+
}
|
|
390
|
+
argmax(values) {
|
|
391
|
+
let maxIdx = 0;
|
|
392
|
+
let maxVal = values[0];
|
|
393
|
+
for (let i = 1; i < values.length; i++) {
|
|
394
|
+
if (values[i] > maxVal) {
|
|
395
|
+
maxVal = values[i];
|
|
396
|
+
maxIdx = i;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
return maxIdx;
|
|
400
|
+
}
|
|
401
|
+
hashAction(action) {
|
|
402
|
+
let hash = 0;
|
|
403
|
+
for (let i = 0; i < action.length; i++) {
|
|
404
|
+
hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
|
|
405
|
+
}
|
|
406
|
+
return hash;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Factory function
|
|
411
|
+
*/
|
|
412
|
+
export function createDecisionTransformer(config) {
|
|
413
|
+
return new DecisionTransformer(config);
|
|
414
|
+
}
|
|
415
|
+
//# sourceMappingURL=decision-transformer.js.map
|