moflo 4.8.32 → 4.8.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/bin/generate-code-map.mjs +955 -955
  2. package/bin/index-guidance.mjs +905 -905
  3. package/bin/index-tests.mjs +728 -728
  4. package/bin/setup-project.mjs +252 -252
  5. package/package.json +10 -5
  6. package/src/@claude-flow/cli/dist/src/commands/doctor.js +1339 -1107
  7. package/src/@claude-flow/cli/dist/src/index.js +2 -18
  8. package/src/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.js +17 -0
  9. package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +4 -7
  10. package/src/@claude-flow/cli/dist/src/version.js +6 -0
  11. package/src/@claude-flow/cli/package.json +1 -1
  12. package/src/@claude-flow/neural/README.md +260 -0
  13. package/src/@claude-flow/neural/dist/algorithms/a2c.js +361 -0
  14. package/src/@claude-flow/neural/dist/algorithms/curiosity.js +392 -0
  15. package/src/@claude-flow/neural/dist/algorithms/decision-transformer.js +415 -0
  16. package/src/@claude-flow/neural/dist/algorithms/dqn.js +303 -0
  17. package/src/@claude-flow/neural/dist/algorithms/index.js +74 -0
  18. package/src/@claude-flow/neural/dist/algorithms/ppo.js +331 -0
  19. package/src/@claude-flow/neural/dist/algorithms/q-learning.js +259 -0
  20. package/src/@claude-flow/neural/dist/algorithms/sarsa.js +297 -0
  21. package/src/@claude-flow/neural/dist/application/index.js +7 -0
  22. package/src/@claude-flow/neural/dist/application/services/neural-application-service.js +161 -0
  23. package/src/@claude-flow/neural/dist/domain/entities/pattern.js +134 -0
  24. package/src/@claude-flow/neural/dist/domain/index.js +8 -0
  25. package/src/@claude-flow/neural/dist/domain/services/learning-service.js +195 -0
  26. package/src/@claude-flow/neural/dist/index.js +201 -0
  27. package/src/@claude-flow/neural/dist/modes/balanced.js +234 -0
  28. package/src/@claude-flow/neural/dist/modes/base.js +77 -0
  29. package/src/@claude-flow/neural/dist/modes/batch.js +316 -0
  30. package/src/@claude-flow/neural/dist/modes/edge.js +310 -0
  31. package/src/@claude-flow/neural/dist/modes/index.js +13 -0
  32. package/src/@claude-flow/neural/dist/modes/real-time.js +196 -0
  33. package/src/@claude-flow/neural/dist/modes/research.js +389 -0
  34. package/src/@claude-flow/neural/dist/pattern-learner.js +603 -0
  35. package/src/@claude-flow/neural/dist/reasoning-bank.js +993 -0
  36. package/src/@claude-flow/neural/dist/reasoningbank-adapter.js +463 -0
  37. package/src/@claude-flow/neural/dist/sona-integration.js +326 -0
  38. package/src/@claude-flow/neural/dist/sona-manager.js +695 -0
  39. package/src/@claude-flow/neural/dist/types.js +11 -0
  40. package/src/@claude-flow/neural/package.json +26 -0
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Advantage Actor-Critic (A2C)
3
+ *
4
+ * Implements synchronous A2C algorithm with:
5
+ * - Shared actor-critic network
6
+ * - N-step returns
7
+ * - Entropy regularization
8
+ * - Advantage normalization
9
+ *
10
+ * Performance Target: <10ms per update step
11
+ */
12
+ /**
13
+ * Default A2C configuration
14
+ */
15
+ export const DEFAULT_A2C_CONFIG = {
16
+ algorithm: 'a2c',
17
+ learningRate: 0.0007,
18
+ gamma: 0.99,
19
+ entropyCoef: 0.01,
20
+ valueLossCoef: 0.5,
21
+ maxGradNorm: 0.5,
22
+ epochs: 1,
23
+ miniBatchSize: 32,
24
+ nSteps: 5,
25
+ useGAE: true,
26
+ gaeLambda: 0.95,
27
+ };
28
+ /**
29
+ * A2C Algorithm Implementation
30
+ */
31
+ export class A2CAlgorithm {
32
+ config;
33
+ // Shared network weights
34
+ sharedWeights;
35
+ policyHead;
36
+ valueHead;
37
+ // Optimizer state
38
+ sharedMomentum;
39
+ policyMomentum;
40
+ valueMomentum;
41
+ // Experience buffer for n-step
42
+ buffer = [];
43
+ // Dimensions
44
+ inputDim = 768;
45
+ hiddenDim = 64;
46
+ numActions = 4;
47
+ // Statistics
48
+ updateCount = 0;
49
+ avgPolicyLoss = 0;
50
+ avgValueLoss = 0;
51
+ avgEntropy = 0;
52
+ constructor(config = {}) {
53
+ this.config = { ...DEFAULT_A2C_CONFIG, ...config };
54
+ // Initialize network
55
+ const scale = Math.sqrt(2 / this.inputDim);
56
+ this.sharedWeights = new Float32Array(this.inputDim * this.hiddenDim);
57
+ this.policyHead = new Float32Array(this.hiddenDim * this.numActions);
58
+ this.valueHead = new Float32Array(this.hiddenDim);
59
+ for (let i = 0; i < this.sharedWeights.length; i++) {
60
+ this.sharedWeights[i] = (Math.random() - 0.5) * scale;
61
+ }
62
+ for (let i = 0; i < this.policyHead.length; i++) {
63
+ this.policyHead[i] = (Math.random() - 0.5) * 0.1;
64
+ }
65
+ for (let i = 0; i < this.valueHead.length; i++) {
66
+ this.valueHead[i] = (Math.random() - 0.5) * 0.1;
67
+ }
68
+ // Initialize momentum
69
+ this.sharedMomentum = new Float32Array(this.sharedWeights.length);
70
+ this.policyMomentum = new Float32Array(this.policyHead.length);
71
+ this.valueMomentum = new Float32Array(this.valueHead.length);
72
+ }
73
+ /**
74
+ * Add experience from trajectory
75
+ */
76
+ addExperience(trajectory) {
77
+ for (const step of trajectory.steps) {
78
+ const { probs, value, entropy } = this.evaluate(step.stateAfter);
79
+ const action = this.hashAction(step.action);
80
+ this.buffer.push({
81
+ state: step.stateAfter,
82
+ action,
83
+ reward: step.reward,
84
+ value,
85
+ logProb: Math.log(probs[action] + 1e-8),
86
+ entropy,
87
+ });
88
+ }
89
+ }
90
+ /**
91
+ * Perform A2C update
92
+ * Target: <10ms
93
+ */
94
+ update() {
95
+ const startTime = performance.now();
96
+ if (this.buffer.length < this.config.nSteps) {
97
+ return { policyLoss: 0, valueLoss: 0, entropy: 0 };
98
+ }
99
+ // Compute returns and advantages
100
+ const returns = this.computeReturns();
101
+ const advantages = this.computeAdvantages(returns);
102
+ // Initialize gradients
103
+ const sharedGrad = new Float32Array(this.sharedWeights.length);
104
+ const policyGrad = new Float32Array(this.policyHead.length);
105
+ const valueGrad = new Float32Array(this.valueHead.length);
106
+ let totalPolicyLoss = 0;
107
+ let totalValueLoss = 0;
108
+ let totalEntropy = 0;
109
+ // Process all experiences
110
+ for (let i = 0; i < this.buffer.length; i++) {
111
+ const exp = this.buffer[i];
112
+ const advantage = advantages[i];
113
+ const return_ = returns[i];
114
+ // Get current policy and value
115
+ const { probs, value, hidden } = this.forwardWithHidden(exp.state);
116
+ const logProb = Math.log(probs[exp.action] + 1e-8);
117
+ // Policy loss
118
+ const policyLoss = -logProb * advantage;
119
+ totalPolicyLoss += policyLoss;
120
+ // Value loss
121
+ const valueLoss = (value - return_) ** 2;
122
+ totalValueLoss += valueLoss;
123
+ // Entropy
124
+ let entropy = 0;
125
+ for (const p of probs) {
126
+ if (p > 0)
127
+ entropy -= p * Math.log(p);
128
+ }
129
+ totalEntropy += entropy;
130
+ // Accumulate gradients
131
+ this.accumulateGradients(sharedGrad, policyGrad, valueGrad, exp.state, hidden, exp.action, advantage, value - return_);
132
+ }
133
+ // Add entropy bonus to policy gradient
134
+ for (let i = 0; i < policyGrad.length; i++) {
135
+ policyGrad[i] -= this.config.entropyCoef * totalEntropy / this.buffer.length;
136
+ }
137
+ // Apply gradients
138
+ this.applyGradients(sharedGrad, policyGrad, valueGrad, this.buffer.length);
139
+ // Clear buffer
140
+ this.buffer = [];
141
+ this.updateCount++;
142
+ this.avgPolicyLoss = totalPolicyLoss / this.buffer.length || 0;
143
+ this.avgValueLoss = totalValueLoss / this.buffer.length || 0;
144
+ this.avgEntropy = totalEntropy / this.buffer.length || 0;
145
+ const elapsed = performance.now() - startTime;
146
+ if (elapsed > 10) {
147
+ console.warn(`A2C update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
148
+ }
149
+ return {
150
+ policyLoss: this.avgPolicyLoss,
151
+ valueLoss: this.avgValueLoss,
152
+ entropy: this.avgEntropy,
153
+ };
154
+ }
155
+ /**
156
+ * Get action from policy
157
+ */
158
+ getAction(state) {
159
+ const { probs, value } = this.evaluate(state);
160
+ const action = this.sampleAction(probs);
161
+ return { action, value };
162
+ }
163
+ /**
164
+ * Get statistics
165
+ */
166
+ getStats() {
167
+ return {
168
+ updateCount: this.updateCount,
169
+ bufferSize: this.buffer.length,
170
+ avgPolicyLoss: this.avgPolicyLoss,
171
+ avgValueLoss: this.avgValueLoss,
172
+ avgEntropy: this.avgEntropy,
173
+ };
174
+ }
175
+ // ==========================================================================
176
+ // Private Methods
177
+ // ==========================================================================
178
+ evaluate(state) {
179
+ const { probs, value } = this.forward(state);
180
+ let entropy = 0;
181
+ for (const p of probs) {
182
+ if (p > 0)
183
+ entropy -= p * Math.log(p);
184
+ }
185
+ return { probs, value, entropy };
186
+ }
187
+ forward(state) {
188
+ // Shared hidden layer
189
+ const hidden = new Float32Array(this.hiddenDim);
190
+ for (let h = 0; h < this.hiddenDim; h++) {
191
+ let sum = 0;
192
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
193
+ sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
194
+ }
195
+ hidden[h] = Math.max(0, sum); // ReLU
196
+ }
197
+ // Policy head
198
+ const logits = new Float32Array(this.numActions);
199
+ for (let a = 0; a < this.numActions; a++) {
200
+ let sum = 0;
201
+ for (let h = 0; h < this.hiddenDim; h++) {
202
+ sum += hidden[h] * this.policyHead[h * this.numActions + a];
203
+ }
204
+ logits[a] = sum;
205
+ }
206
+ const probs = this.softmax(logits);
207
+ // Value head
208
+ let value = 0;
209
+ for (let h = 0; h < this.hiddenDim; h++) {
210
+ value += hidden[h] * this.valueHead[h];
211
+ }
212
+ return { probs, value };
213
+ }
214
+ forwardWithHidden(state) {
215
+ const hidden = new Float32Array(this.hiddenDim);
216
+ for (let h = 0; h < this.hiddenDim; h++) {
217
+ let sum = 0;
218
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
219
+ sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
220
+ }
221
+ hidden[h] = Math.max(0, sum);
222
+ }
223
+ const logits = new Float32Array(this.numActions);
224
+ for (let a = 0; a < this.numActions; a++) {
225
+ let sum = 0;
226
+ for (let h = 0; h < this.hiddenDim; h++) {
227
+ sum += hidden[h] * this.policyHead[h * this.numActions + a];
228
+ }
229
+ logits[a] = sum;
230
+ }
231
+ const probs = this.softmax(logits);
232
+ let value = 0;
233
+ for (let h = 0; h < this.hiddenDim; h++) {
234
+ value += hidden[h] * this.valueHead[h];
235
+ }
236
+ return { probs, value, hidden };
237
+ }
238
+ computeReturns() {
239
+ const returns = new Array(this.buffer.length).fill(0);
240
+ let cumReturn = 0;
241
+ // Bootstrap from last value if not terminal
242
+ if (this.buffer.length > 0) {
243
+ cumReturn = this.buffer[this.buffer.length - 1].value;
244
+ }
245
+ for (let t = this.buffer.length - 1; t >= 0; t--) {
246
+ cumReturn = this.buffer[t].reward + this.config.gamma * cumReturn;
247
+ returns[t] = cumReturn;
248
+ }
249
+ return returns;
250
+ }
251
+ computeAdvantages(returns) {
252
+ if (this.config.useGAE) {
253
+ return this.computeGAE();
254
+ }
255
+ // Simple advantage: return - value
256
+ const advantages = new Array(this.buffer.length).fill(0);
257
+ for (let i = 0; i < this.buffer.length; i++) {
258
+ advantages[i] = returns[i] - this.buffer[i].value;
259
+ }
260
+ // Normalize
261
+ const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
262
+ const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
263
+ return advantages.map(a => (a - mean) / std);
264
+ }
265
+ computeGAE() {
266
+ const advantages = new Array(this.buffer.length).fill(0);
267
+ let lastGae = 0;
268
+ for (let t = this.buffer.length - 1; t >= 0; t--) {
269
+ const nextValue = t < this.buffer.length - 1
270
+ ? this.buffer[t + 1].value
271
+ : 0;
272
+ const delta = this.buffer[t].reward + this.config.gamma * nextValue - this.buffer[t].value;
273
+ lastGae = delta + this.config.gamma * this.config.gaeLambda * lastGae;
274
+ advantages[t] = lastGae;
275
+ }
276
+ // Normalize
277
+ const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
278
+ const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
279
+ return advantages.map(a => (a - mean) / std);
280
+ }
281
+ accumulateGradients(sharedGrad, policyGrad, valueGrad, state, hidden, action, advantage, valueError) {
282
+ // Policy gradient
283
+ for (let h = 0; h < this.hiddenDim; h++) {
284
+ policyGrad[h * this.numActions + action] += hidden[h] * advantage;
285
+ }
286
+ // Value gradient
287
+ for (let h = 0; h < this.hiddenDim; h++) {
288
+ valueGrad[h] += hidden[h] * valueError * this.config.valueLossCoef;
289
+ }
290
+ // Shared layer gradient (backprop through both heads)
291
+ for (let h = 0; h < this.hiddenDim; h++) {
292
+ if (hidden[h] > 0) { // ReLU gradient
293
+ const policySignal = advantage * this.policyHead[h * this.numActions + action];
294
+ const valueSignal = valueError * this.valueHead[h] * this.config.valueLossCoef;
295
+ const totalSignal = policySignal + valueSignal;
296
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
297
+ sharedGrad[i * this.hiddenDim + h] += state[i] * totalSignal;
298
+ }
299
+ }
300
+ }
301
+ }
302
+ applyGradients(sharedGrad, policyGrad, valueGrad, batchSize) {
303
+ const lr = this.config.learningRate / batchSize;
304
+ const beta = 0.9;
305
+ // Apply to shared weights
306
+ for (let i = 0; i < this.sharedWeights.length; i++) {
307
+ const grad = Math.max(Math.min(sharedGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
308
+ this.sharedMomentum[i] = beta * this.sharedMomentum[i] + (1 - beta) * grad;
309
+ this.sharedWeights[i] -= lr * this.sharedMomentum[i];
310
+ }
311
+ // Apply to policy head
312
+ for (let i = 0; i < this.policyHead.length; i++) {
313
+ const grad = Math.max(Math.min(policyGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
314
+ this.policyMomentum[i] = beta * this.policyMomentum[i] + (1 - beta) * grad;
315
+ this.policyHead[i] -= lr * this.policyMomentum[i];
316
+ }
317
+ // Apply to value head
318
+ for (let i = 0; i < this.valueHead.length; i++) {
319
+ const grad = Math.max(Math.min(valueGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
320
+ this.valueMomentum[i] = beta * this.valueMomentum[i] + (1 - beta) * grad;
321
+ this.valueHead[i] -= lr * this.valueMomentum[i];
322
+ }
323
+ }
324
+ softmax(logits) {
325
+ const max = Math.max(...logits);
326
+ const exps = new Float32Array(logits.length);
327
+ let sum = 0;
328
+ for (let i = 0; i < logits.length; i++) {
329
+ exps[i] = Math.exp(logits[i] - max);
330
+ sum += exps[i];
331
+ }
332
+ for (let i = 0; i < exps.length; i++) {
333
+ exps[i] /= sum;
334
+ }
335
+ return exps;
336
+ }
337
+ sampleAction(probs) {
338
+ const r = Math.random();
339
+ let cumSum = 0;
340
+ for (let i = 0; i < probs.length; i++) {
341
+ cumSum += probs[i];
342
+ if (r < cumSum)
343
+ return i;
344
+ }
345
+ return probs.length - 1;
346
+ }
347
+ hashAction(action) {
348
+ let hash = 0;
349
+ for (let i = 0; i < action.length; i++) {
350
+ hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
351
+ }
352
+ return hash;
353
+ }
354
+ }
355
+ /**
356
+ * Factory function
357
+ */
358
+ export function createA2C(config) {
359
+ return new A2CAlgorithm(config);
360
+ }
361
+ //# sourceMappingURL=a2c.js.map