moflo 4.8.32 → 4.8.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/generate-code-map.mjs +1 -1
- package/bin/index-guidance.mjs +1 -1
- package/bin/index-tests.mjs +1 -1
- package/bin/setup-project.mjs +1 -1
- package/package.json +8 -4
- package/src/@claude-flow/cli/dist/src/commands/doctor.js +1298 -1107
- package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +4 -7
- package/src/@claude-flow/neural/README.md +260 -0
- package/src/@claude-flow/neural/dist/algorithms/a2c.js +361 -0
- package/src/@claude-flow/neural/dist/algorithms/curiosity.js +392 -0
- package/src/@claude-flow/neural/dist/algorithms/decision-transformer.js +415 -0
- package/src/@claude-flow/neural/dist/algorithms/dqn.js +303 -0
- package/src/@claude-flow/neural/dist/algorithms/index.js +74 -0
- package/src/@claude-flow/neural/dist/algorithms/ppo.js +331 -0
- package/src/@claude-flow/neural/dist/algorithms/q-learning.js +259 -0
- package/src/@claude-flow/neural/dist/algorithms/sarsa.js +297 -0
- package/src/@claude-flow/neural/dist/application/index.js +7 -0
- package/src/@claude-flow/neural/dist/application/services/neural-application-service.js +161 -0
- package/src/@claude-flow/neural/dist/domain/entities/pattern.js +134 -0
- package/src/@claude-flow/neural/dist/domain/index.js +8 -0
- package/src/@claude-flow/neural/dist/domain/services/learning-service.js +195 -0
- package/src/@claude-flow/neural/dist/index.js +201 -0
- package/src/@claude-flow/neural/dist/modes/balanced.js +234 -0
- package/src/@claude-flow/neural/dist/modes/base.js +77 -0
- package/src/@claude-flow/neural/dist/modes/batch.js +316 -0
- package/src/@claude-flow/neural/dist/modes/edge.js +310 -0
- package/src/@claude-flow/neural/dist/modes/index.js +13 -0
- package/src/@claude-flow/neural/dist/modes/real-time.js +196 -0
- package/src/@claude-flow/neural/dist/modes/research.js +389 -0
- package/src/@claude-flow/neural/dist/pattern-learner.js +603 -0
- package/src/@claude-flow/neural/dist/reasoning-bank.js +993 -0
- package/src/@claude-flow/neural/dist/reasoningbank-adapter.js +463 -0
- package/src/@claude-flow/neural/dist/sona-integration.js +316 -0
- package/src/@claude-flow/neural/dist/sona-manager.js +695 -0
- package/src/@claude-flow/neural/dist/types.js +11 -0
- package/src/@claude-flow/neural/package.json +26 -0
|
@@ -1950,14 +1950,11 @@ export async function searchEntries(options) {
|
|
|
1950
1950
|
// Invalid embedding, use keyword score
|
|
1951
1951
|
}
|
|
1952
1952
|
}
|
|
1953
|
-
//
|
|
1953
|
+
// Skip entries without valid semantic embeddings — keyword fallback
|
|
1954
|
+
// produces misleading 0.500 scores that degrade search quality.
|
|
1955
|
+
// Entries must have real vector embeddings to participate in semantic search.
|
|
1954
1956
|
if (score < threshold) {
|
|
1955
|
-
|
|
1956
|
-
const lowerQuery = query.toLowerCase();
|
|
1957
|
-
const words = lowerQuery.split(/\s+/);
|
|
1958
|
-
const matchCount = words.filter(w => lowerContent.includes(w)).length;
|
|
1959
|
-
const keywordScore = matchCount / words.length * 0.5;
|
|
1960
|
-
score = Math.max(score, keywordScore);
|
|
1957
|
+
continue;
|
|
1961
1958
|
}
|
|
1962
1959
|
if (score >= threshold) {
|
|
1963
1960
|
results.push({
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# @claude-flow/neural
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@claude-flow/neural)
|
|
4
|
+
[](https://www.npmjs.com/package/@claude-flow/neural)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://www.typescriptlang.org/)
|
|
7
|
+
[](https://github.com/eric-cielo/moflo)
|
|
8
|
+
|
|
9
|
+
> Self-Optimizing Neural Architecture (SONA) module for Claude Flow V3 - adaptive learning, trajectory tracking, and pattern-based optimization.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **SONA Learning** - Self-Optimizing Neural Architecture with <0.05ms adaptation time
|
|
14
|
+
- **5 Learning Modes** - Real-time, Balanced, Research, Edge, and Batch modes
|
|
15
|
+
- **9 RL Algorithms** - PPO, A2C, DQN, Q-Learning, SARSA, Decision Transformer, and more
|
|
16
|
+
- **LoRA Integration** - Low-Rank Adaptation for efficient fine-tuning
|
|
17
|
+
- **EWC++ Memory** - Elastic Weight Consolidation for continual learning without forgetting
|
|
18
|
+
- **Trajectory Tracking** - Record and learn from agent execution paths
|
|
19
|
+
- **Pattern Recognition** - Automatic pattern extraction and reuse
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npm install @claude-flow/neural
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
import { SONAManager, createSONAManager } from '@claude-flow/neural';
|
|
31
|
+
|
|
32
|
+
// Create SONA manager
|
|
33
|
+
const sona = createSONAManager('balanced');
|
|
34
|
+
await sona.initialize();
|
|
35
|
+
|
|
36
|
+
// Begin trajectory tracking
|
|
37
|
+
const trajectoryId = sona.beginTrajectory('code-review-task', 'development');
|
|
38
|
+
|
|
39
|
+
// Record steps
|
|
40
|
+
sona.recordStep(trajectoryId, 'analyze-code', 0.8, stateEmbedding, {
|
|
41
|
+
filesAnalyzed: 5,
|
|
42
|
+
issuesFound: 2
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
sona.recordStep(trajectoryId, 'generate-feedback', 0.9, newStateEmbedding);
|
|
46
|
+
|
|
47
|
+
// Complete trajectory
|
|
48
|
+
const trajectory = sona.completeTrajectory(trajectoryId);
|
|
49
|
+
|
|
50
|
+
// Find similar patterns for guidance
|
|
51
|
+
const patterns = await sona.findSimilarPatterns(contextEmbedding, 3);
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Learning Modes
|
|
55
|
+
|
|
56
|
+
| Mode | Adaptation | Quality | Memory | Use Case |
|
|
57
|
+
|------|------------|---------|--------|----------|
|
|
58
|
+
| **real-time** | <0.5ms | 70%+ | 25MB | Production, low-latency |
|
|
59
|
+
| **balanced** | <18ms | 75%+ | 50MB | General purpose |
|
|
60
|
+
| **research** | <100ms | 95%+ | 100MB | Deep exploration |
|
|
61
|
+
| **edge** | <1ms | 80%+ | 5MB | Resource-constrained |
|
|
62
|
+
| **batch** | <50ms | 85%+ | 75MB | High-throughput |
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
// Switch modes dynamically
|
|
66
|
+
await sona.setMode('research');
|
|
67
|
+
|
|
68
|
+
// Get current configuration
|
|
69
|
+
const { mode, config, optimizations } = sona.getConfig();
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## API Reference
|
|
73
|
+
|
|
74
|
+
### SONA Manager
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
import { SONAManager } from '@claude-flow/neural';
|
|
78
|
+
|
|
79
|
+
const sona = new SONAManager('balanced');
|
|
80
|
+
await sona.initialize();
|
|
81
|
+
|
|
82
|
+
// Trajectory Management
|
|
83
|
+
const trajectoryId = sona.beginTrajectory(context, domain);
|
|
84
|
+
sona.recordStep(trajectoryId, action, reward, stateEmbedding, metadata);
|
|
85
|
+
const trajectory = sona.completeTrajectory(trajectoryId, finalQuality);
|
|
86
|
+
|
|
87
|
+
// Pattern Matching
|
|
88
|
+
const patterns = await sona.findSimilarPatterns(embedding, k);
|
|
89
|
+
const pattern = sona.storePattern({ name, strategy, embedding, domain });
|
|
90
|
+
sona.updatePatternUsage(patternId, quality);
|
|
91
|
+
|
|
92
|
+
// Learning
|
|
93
|
+
await sona.triggerLearning('manual');
|
|
94
|
+
const output = await sona.applyAdaptations(input, domain);
|
|
95
|
+
|
|
96
|
+
// Statistics
|
|
97
|
+
const stats = sona.getStats();
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### RL Algorithms
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
import { PPO, A2C, DQN, QLearning, SARSA, DecisionTransformer } from '@claude-flow/neural';
|
|
104
|
+
|
|
105
|
+
// Proximal Policy Optimization
|
|
106
|
+
const ppo = new PPO({
|
|
107
|
+
learningRate: 0.0003,
|
|
108
|
+
epsilon: 0.2,
|
|
109
|
+
valueCoef: 0.5
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// Advantage Actor-Critic
|
|
113
|
+
const a2c = new A2C({
|
|
114
|
+
learningRate: 0.001,
|
|
115
|
+
gamma: 0.99,
|
|
116
|
+
entropyCoef: 0.01
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Deep Q-Network
|
|
120
|
+
const dqn = new DQN({
|
|
121
|
+
learningRate: 0.001,
|
|
122
|
+
gamma: 0.99,
|
|
123
|
+
epsilon: 0.1,
|
|
124
|
+
targetUpdateFreq: 100
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
// Decision Transformer
|
|
128
|
+
const dt = new DecisionTransformer({
|
|
129
|
+
contextLength: 20,
|
|
130
|
+
embeddingDim: 256,
|
|
131
|
+
numHeads: 4
|
|
132
|
+
});
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### LoRA Configuration
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
// Get LoRA config for current mode
|
|
139
|
+
const loraConfig = sona.getLoRAConfig();
|
|
140
|
+
// {
|
|
141
|
+
// rank: 4,
|
|
142
|
+
// alpha: 8,
|
|
143
|
+
// dropout: 0.05,
|
|
144
|
+
// targetModules: ['q_proj', 'v_proj', 'k_proj', 'o_proj'],
|
|
145
|
+
// microLoRA: false
|
|
146
|
+
// }
|
|
147
|
+
|
|
148
|
+
// Initialize LoRA weights for a domain
|
|
149
|
+
const weights = sona.initializeLoRAWeights('code-generation');
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### EWC++ (Elastic Weight Consolidation)
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
// Get EWC config
|
|
156
|
+
const ewcConfig = sona.getEWCConfig();
|
|
157
|
+
// {
|
|
158
|
+
// lambda: 2000,
|
|
159
|
+
// decay: 0.9,
|
|
160
|
+
// fisherSamples: 100,
|
|
161
|
+
// minFisher: 1e-8,
|
|
162
|
+
// online: true
|
|
163
|
+
// }
|
|
164
|
+
|
|
165
|
+
// Consolidate after learning a new task
|
|
166
|
+
sona.consolidateEWC();
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Event System
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
// Subscribe to neural events
|
|
173
|
+
sona.addEventListener((event) => {
|
|
174
|
+
switch (event.type) {
|
|
175
|
+
case 'trajectory_started':
|
|
176
|
+
console.log(`Started: ${event.trajectoryId}`);
|
|
177
|
+
break;
|
|
178
|
+
case 'trajectory_completed':
|
|
179
|
+
console.log(`Completed with quality: ${event.qualityScore}`);
|
|
180
|
+
break;
|
|
181
|
+
case 'pattern_matched':
|
|
182
|
+
console.log(`Pattern ${event.patternId} matched`);
|
|
183
|
+
break;
|
|
184
|
+
case 'learning_triggered':
|
|
185
|
+
console.log(`Learning: ${event.reason}`);
|
|
186
|
+
break;
|
|
187
|
+
case 'mode_changed':
|
|
188
|
+
console.log(`Mode: ${event.fromMode} -> ${event.toMode}`);
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Mode Configurations
|
|
195
|
+
|
|
196
|
+
```typescript
|
|
197
|
+
// Real-time mode (ultra-fast)
|
|
198
|
+
{
|
|
199
|
+
loraRank: 2,
|
|
200
|
+
learningRate: 0.001,
|
|
201
|
+
batchSize: 32,
|
|
202
|
+
trajectoryCapacity: 1000,
|
|
203
|
+
qualityThreshold: 0.7,
|
|
204
|
+
maxLatencyMs: 0.5
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Research mode (high quality)
|
|
208
|
+
{
|
|
209
|
+
loraRank: 16,
|
|
210
|
+
learningRate: 0.002,
|
|
211
|
+
batchSize: 64,
|
|
212
|
+
trajectoryCapacity: 10000,
|
|
213
|
+
qualityThreshold: 0.2,
|
|
214
|
+
maxLatencyMs: 100
|
|
215
|
+
}
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Performance Targets
|
|
219
|
+
|
|
220
|
+
| Metric | Target | Typical |
|
|
221
|
+
|--------|--------|---------|
|
|
222
|
+
| Adaptation latency | <0.05ms | 0.02ms |
|
|
223
|
+
| Pattern retrieval | <1ms | 0.5ms |
|
|
224
|
+
| Learning step | <10ms | 5ms |
|
|
225
|
+
| Quality improvement | +55% | +40-60% |
|
|
226
|
+
| Memory overhead | <50MB | 25-75MB |
|
|
227
|
+
|
|
228
|
+
## TypeScript Types
|
|
229
|
+
|
|
230
|
+
```typescript
|
|
231
|
+
import type {
|
|
232
|
+
SONAMode,
|
|
233
|
+
SONAModeConfig,
|
|
234
|
+
Trajectory,
|
|
235
|
+
TrajectoryStep,
|
|
236
|
+
Pattern,
|
|
237
|
+
PatternMatch,
|
|
238
|
+
NeuralStats,
|
|
239
|
+
NeuralEvent,
|
|
240
|
+
LoRAConfig,
|
|
241
|
+
LoRAWeights,
|
|
242
|
+
EWCConfig,
|
|
243
|
+
RLAlgorithm
|
|
244
|
+
} from '@claude-flow/neural';
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Dependencies
|
|
248
|
+
|
|
249
|
+
- [@claude-flow/memory](../memory) - Memory integration
|
|
250
|
+
- `@ruvector/sona` - SONA learning engine
|
|
251
|
+
|
|
252
|
+
## Related Packages
|
|
253
|
+
|
|
254
|
+
- [@claude-flow/memory](../memory) - Vector memory for patterns
|
|
255
|
+
- [@claude-flow/integration](../integration) - agentic-flow integration
|
|
256
|
+
- [@claude-flow/performance](../performance) - Benchmarking
|
|
257
|
+
|
|
258
|
+
## License
|
|
259
|
+
|
|
260
|
+
MIT
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advantage Actor-Critic (A2C)
|
|
3
|
+
*
|
|
4
|
+
* Implements synchronous A2C algorithm with:
|
|
5
|
+
* - Shared actor-critic network
|
|
6
|
+
* - N-step returns
|
|
7
|
+
* - Entropy regularization
|
|
8
|
+
* - Advantage normalization
|
|
9
|
+
*
|
|
10
|
+
* Performance Target: <10ms per update step
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Default A2C configuration
|
|
14
|
+
*/
|
|
15
|
+
export const DEFAULT_A2C_CONFIG = {
|
|
16
|
+
algorithm: 'a2c',
|
|
17
|
+
learningRate: 0.0007,
|
|
18
|
+
gamma: 0.99,
|
|
19
|
+
entropyCoef: 0.01,
|
|
20
|
+
valueLossCoef: 0.5,
|
|
21
|
+
maxGradNorm: 0.5,
|
|
22
|
+
epochs: 1,
|
|
23
|
+
miniBatchSize: 32,
|
|
24
|
+
nSteps: 5,
|
|
25
|
+
useGAE: true,
|
|
26
|
+
gaeLambda: 0.95,
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* A2C Algorithm Implementation
|
|
30
|
+
*/
|
|
31
|
+
export class A2CAlgorithm {
|
|
32
|
+
config;
|
|
33
|
+
// Shared network weights
|
|
34
|
+
sharedWeights;
|
|
35
|
+
policyHead;
|
|
36
|
+
valueHead;
|
|
37
|
+
// Optimizer state
|
|
38
|
+
sharedMomentum;
|
|
39
|
+
policyMomentum;
|
|
40
|
+
valueMomentum;
|
|
41
|
+
// Experience buffer for n-step
|
|
42
|
+
buffer = [];
|
|
43
|
+
// Dimensions
|
|
44
|
+
inputDim = 768;
|
|
45
|
+
hiddenDim = 64;
|
|
46
|
+
numActions = 4;
|
|
47
|
+
// Statistics
|
|
48
|
+
updateCount = 0;
|
|
49
|
+
avgPolicyLoss = 0;
|
|
50
|
+
avgValueLoss = 0;
|
|
51
|
+
avgEntropy = 0;
|
|
52
|
+
constructor(config = {}) {
|
|
53
|
+
this.config = { ...DEFAULT_A2C_CONFIG, ...config };
|
|
54
|
+
// Initialize network
|
|
55
|
+
const scale = Math.sqrt(2 / this.inputDim);
|
|
56
|
+
this.sharedWeights = new Float32Array(this.inputDim * this.hiddenDim);
|
|
57
|
+
this.policyHead = new Float32Array(this.hiddenDim * this.numActions);
|
|
58
|
+
this.valueHead = new Float32Array(this.hiddenDim);
|
|
59
|
+
for (let i = 0; i < this.sharedWeights.length; i++) {
|
|
60
|
+
this.sharedWeights[i] = (Math.random() - 0.5) * scale;
|
|
61
|
+
}
|
|
62
|
+
for (let i = 0; i < this.policyHead.length; i++) {
|
|
63
|
+
this.policyHead[i] = (Math.random() - 0.5) * 0.1;
|
|
64
|
+
}
|
|
65
|
+
for (let i = 0; i < this.valueHead.length; i++) {
|
|
66
|
+
this.valueHead[i] = (Math.random() - 0.5) * 0.1;
|
|
67
|
+
}
|
|
68
|
+
// Initialize momentum
|
|
69
|
+
this.sharedMomentum = new Float32Array(this.sharedWeights.length);
|
|
70
|
+
this.policyMomentum = new Float32Array(this.policyHead.length);
|
|
71
|
+
this.valueMomentum = new Float32Array(this.valueHead.length);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Add experience from trajectory
|
|
75
|
+
*/
|
|
76
|
+
addExperience(trajectory) {
|
|
77
|
+
for (const step of trajectory.steps) {
|
|
78
|
+
const { probs, value, entropy } = this.evaluate(step.stateAfter);
|
|
79
|
+
const action = this.hashAction(step.action);
|
|
80
|
+
this.buffer.push({
|
|
81
|
+
state: step.stateAfter,
|
|
82
|
+
action,
|
|
83
|
+
reward: step.reward,
|
|
84
|
+
value,
|
|
85
|
+
logProb: Math.log(probs[action] + 1e-8),
|
|
86
|
+
entropy,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Perform A2C update
|
|
92
|
+
* Target: <10ms
|
|
93
|
+
*/
|
|
94
|
+
update() {
|
|
95
|
+
const startTime = performance.now();
|
|
96
|
+
if (this.buffer.length < this.config.nSteps) {
|
|
97
|
+
return { policyLoss: 0, valueLoss: 0, entropy: 0 };
|
|
98
|
+
}
|
|
99
|
+
// Compute returns and advantages
|
|
100
|
+
const returns = this.computeReturns();
|
|
101
|
+
const advantages = this.computeAdvantages(returns);
|
|
102
|
+
// Initialize gradients
|
|
103
|
+
const sharedGrad = new Float32Array(this.sharedWeights.length);
|
|
104
|
+
const policyGrad = new Float32Array(this.policyHead.length);
|
|
105
|
+
const valueGrad = new Float32Array(this.valueHead.length);
|
|
106
|
+
let totalPolicyLoss = 0;
|
|
107
|
+
let totalValueLoss = 0;
|
|
108
|
+
let totalEntropy = 0;
|
|
109
|
+
// Process all experiences
|
|
110
|
+
for (let i = 0; i < this.buffer.length; i++) {
|
|
111
|
+
const exp = this.buffer[i];
|
|
112
|
+
const advantage = advantages[i];
|
|
113
|
+
const return_ = returns[i];
|
|
114
|
+
// Get current policy and value
|
|
115
|
+
const { probs, value, hidden } = this.forwardWithHidden(exp.state);
|
|
116
|
+
const logProb = Math.log(probs[exp.action] + 1e-8);
|
|
117
|
+
// Policy loss
|
|
118
|
+
const policyLoss = -logProb * advantage;
|
|
119
|
+
totalPolicyLoss += policyLoss;
|
|
120
|
+
// Value loss
|
|
121
|
+
const valueLoss = (value - return_) ** 2;
|
|
122
|
+
totalValueLoss += valueLoss;
|
|
123
|
+
// Entropy
|
|
124
|
+
let entropy = 0;
|
|
125
|
+
for (const p of probs) {
|
|
126
|
+
if (p > 0)
|
|
127
|
+
entropy -= p * Math.log(p);
|
|
128
|
+
}
|
|
129
|
+
totalEntropy += entropy;
|
|
130
|
+
// Accumulate gradients
|
|
131
|
+
this.accumulateGradients(sharedGrad, policyGrad, valueGrad, exp.state, hidden, exp.action, advantage, value - return_);
|
|
132
|
+
}
|
|
133
|
+
// Add entropy bonus to policy gradient
|
|
134
|
+
for (let i = 0; i < policyGrad.length; i++) {
|
|
135
|
+
policyGrad[i] -= this.config.entropyCoef * totalEntropy / this.buffer.length;
|
|
136
|
+
}
|
|
137
|
+
// Apply gradients
|
|
138
|
+
this.applyGradients(sharedGrad, policyGrad, valueGrad, this.buffer.length);
|
|
139
|
+
// Clear buffer
|
|
140
|
+
this.buffer = [];
|
|
141
|
+
this.updateCount++;
|
|
142
|
+
this.avgPolicyLoss = totalPolicyLoss / this.buffer.length || 0;
|
|
143
|
+
this.avgValueLoss = totalValueLoss / this.buffer.length || 0;
|
|
144
|
+
this.avgEntropy = totalEntropy / this.buffer.length || 0;
|
|
145
|
+
const elapsed = performance.now() - startTime;
|
|
146
|
+
if (elapsed > 10) {
|
|
147
|
+
console.warn(`A2C update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
policyLoss: this.avgPolicyLoss,
|
|
151
|
+
valueLoss: this.avgValueLoss,
|
|
152
|
+
entropy: this.avgEntropy,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Get action from policy
|
|
157
|
+
*/
|
|
158
|
+
getAction(state) {
|
|
159
|
+
const { probs, value } = this.evaluate(state);
|
|
160
|
+
const action = this.sampleAction(probs);
|
|
161
|
+
return { action, value };
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Get statistics
|
|
165
|
+
*/
|
|
166
|
+
getStats() {
|
|
167
|
+
return {
|
|
168
|
+
updateCount: this.updateCount,
|
|
169
|
+
bufferSize: this.buffer.length,
|
|
170
|
+
avgPolicyLoss: this.avgPolicyLoss,
|
|
171
|
+
avgValueLoss: this.avgValueLoss,
|
|
172
|
+
avgEntropy: this.avgEntropy,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// ==========================================================================
|
|
176
|
+
// Private Methods
|
|
177
|
+
// ==========================================================================
|
|
178
|
+
evaluate(state) {
|
|
179
|
+
const { probs, value } = this.forward(state);
|
|
180
|
+
let entropy = 0;
|
|
181
|
+
for (const p of probs) {
|
|
182
|
+
if (p > 0)
|
|
183
|
+
entropy -= p * Math.log(p);
|
|
184
|
+
}
|
|
185
|
+
return { probs, value, entropy };
|
|
186
|
+
}
|
|
187
|
+
forward(state) {
|
|
188
|
+
// Shared hidden layer
|
|
189
|
+
const hidden = new Float32Array(this.hiddenDim);
|
|
190
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
191
|
+
let sum = 0;
|
|
192
|
+
for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
|
|
193
|
+
sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
|
|
194
|
+
}
|
|
195
|
+
hidden[h] = Math.max(0, sum); // ReLU
|
|
196
|
+
}
|
|
197
|
+
// Policy head
|
|
198
|
+
const logits = new Float32Array(this.numActions);
|
|
199
|
+
for (let a = 0; a < this.numActions; a++) {
|
|
200
|
+
let sum = 0;
|
|
201
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
202
|
+
sum += hidden[h] * this.policyHead[h * this.numActions + a];
|
|
203
|
+
}
|
|
204
|
+
logits[a] = sum;
|
|
205
|
+
}
|
|
206
|
+
const probs = this.softmax(logits);
|
|
207
|
+
// Value head
|
|
208
|
+
let value = 0;
|
|
209
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
210
|
+
value += hidden[h] * this.valueHead[h];
|
|
211
|
+
}
|
|
212
|
+
return { probs, value };
|
|
213
|
+
}
|
|
214
|
+
forwardWithHidden(state) {
|
|
215
|
+
const hidden = new Float32Array(this.hiddenDim);
|
|
216
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
217
|
+
let sum = 0;
|
|
218
|
+
for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
|
|
219
|
+
sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
|
|
220
|
+
}
|
|
221
|
+
hidden[h] = Math.max(0, sum);
|
|
222
|
+
}
|
|
223
|
+
const logits = new Float32Array(this.numActions);
|
|
224
|
+
for (let a = 0; a < this.numActions; a++) {
|
|
225
|
+
let sum = 0;
|
|
226
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
227
|
+
sum += hidden[h] * this.policyHead[h * this.numActions + a];
|
|
228
|
+
}
|
|
229
|
+
logits[a] = sum;
|
|
230
|
+
}
|
|
231
|
+
const probs = this.softmax(logits);
|
|
232
|
+
let value = 0;
|
|
233
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
234
|
+
value += hidden[h] * this.valueHead[h];
|
|
235
|
+
}
|
|
236
|
+
return { probs, value, hidden };
|
|
237
|
+
}
|
|
238
|
+
computeReturns() {
|
|
239
|
+
const returns = new Array(this.buffer.length).fill(0);
|
|
240
|
+
let cumReturn = 0;
|
|
241
|
+
// Bootstrap from last value if not terminal
|
|
242
|
+
if (this.buffer.length > 0) {
|
|
243
|
+
cumReturn = this.buffer[this.buffer.length - 1].value;
|
|
244
|
+
}
|
|
245
|
+
for (let t = this.buffer.length - 1; t >= 0; t--) {
|
|
246
|
+
cumReturn = this.buffer[t].reward + this.config.gamma * cumReturn;
|
|
247
|
+
returns[t] = cumReturn;
|
|
248
|
+
}
|
|
249
|
+
return returns;
|
|
250
|
+
}
|
|
251
|
+
computeAdvantages(returns) {
|
|
252
|
+
if (this.config.useGAE) {
|
|
253
|
+
return this.computeGAE();
|
|
254
|
+
}
|
|
255
|
+
// Simple advantage: return - value
|
|
256
|
+
const advantages = new Array(this.buffer.length).fill(0);
|
|
257
|
+
for (let i = 0; i < this.buffer.length; i++) {
|
|
258
|
+
advantages[i] = returns[i] - this.buffer[i].value;
|
|
259
|
+
}
|
|
260
|
+
// Normalize
|
|
261
|
+
const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
|
|
262
|
+
const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
|
|
263
|
+
return advantages.map(a => (a - mean) / std);
|
|
264
|
+
}
|
|
265
|
+
computeGAE() {
|
|
266
|
+
const advantages = new Array(this.buffer.length).fill(0);
|
|
267
|
+
let lastGae = 0;
|
|
268
|
+
for (let t = this.buffer.length - 1; t >= 0; t--) {
|
|
269
|
+
const nextValue = t < this.buffer.length - 1
|
|
270
|
+
? this.buffer[t + 1].value
|
|
271
|
+
: 0;
|
|
272
|
+
const delta = this.buffer[t].reward + this.config.gamma * nextValue - this.buffer[t].value;
|
|
273
|
+
lastGae = delta + this.config.gamma * this.config.gaeLambda * lastGae;
|
|
274
|
+
advantages[t] = lastGae;
|
|
275
|
+
}
|
|
276
|
+
// Normalize
|
|
277
|
+
const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
|
|
278
|
+
const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
|
|
279
|
+
return advantages.map(a => (a - mean) / std);
|
|
280
|
+
}
|
|
281
|
+
accumulateGradients(sharedGrad, policyGrad, valueGrad, state, hidden, action, advantage, valueError) {
|
|
282
|
+
// Policy gradient
|
|
283
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
284
|
+
policyGrad[h * this.numActions + action] += hidden[h] * advantage;
|
|
285
|
+
}
|
|
286
|
+
// Value gradient
|
|
287
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
288
|
+
valueGrad[h] += hidden[h] * valueError * this.config.valueLossCoef;
|
|
289
|
+
}
|
|
290
|
+
// Shared layer gradient (backprop through both heads)
|
|
291
|
+
for (let h = 0; h < this.hiddenDim; h++) {
|
|
292
|
+
if (hidden[h] > 0) { // ReLU gradient
|
|
293
|
+
const policySignal = advantage * this.policyHead[h * this.numActions + action];
|
|
294
|
+
const valueSignal = valueError * this.valueHead[h] * this.config.valueLossCoef;
|
|
295
|
+
const totalSignal = policySignal + valueSignal;
|
|
296
|
+
for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
|
|
297
|
+
sharedGrad[i * this.hiddenDim + h] += state[i] * totalSignal;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
applyGradients(sharedGrad, policyGrad, valueGrad, batchSize) {
|
|
303
|
+
const lr = this.config.learningRate / batchSize;
|
|
304
|
+
const beta = 0.9;
|
|
305
|
+
// Apply to shared weights
|
|
306
|
+
for (let i = 0; i < this.sharedWeights.length; i++) {
|
|
307
|
+
const grad = Math.max(Math.min(sharedGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
|
|
308
|
+
this.sharedMomentum[i] = beta * this.sharedMomentum[i] + (1 - beta) * grad;
|
|
309
|
+
this.sharedWeights[i] -= lr * this.sharedMomentum[i];
|
|
310
|
+
}
|
|
311
|
+
// Apply to policy head
|
|
312
|
+
for (let i = 0; i < this.policyHead.length; i++) {
|
|
313
|
+
const grad = Math.max(Math.min(policyGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
|
|
314
|
+
this.policyMomentum[i] = beta * this.policyMomentum[i] + (1 - beta) * grad;
|
|
315
|
+
this.policyHead[i] -= lr * this.policyMomentum[i];
|
|
316
|
+
}
|
|
317
|
+
// Apply to value head
|
|
318
|
+
for (let i = 0; i < this.valueHead.length; i++) {
|
|
319
|
+
const grad = Math.max(Math.min(valueGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
|
|
320
|
+
this.valueMomentum[i] = beta * this.valueMomentum[i] + (1 - beta) * grad;
|
|
321
|
+
this.valueHead[i] -= lr * this.valueMomentum[i];
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
softmax(logits) {
|
|
325
|
+
const max = Math.max(...logits);
|
|
326
|
+
const exps = new Float32Array(logits.length);
|
|
327
|
+
let sum = 0;
|
|
328
|
+
for (let i = 0; i < logits.length; i++) {
|
|
329
|
+
exps[i] = Math.exp(logits[i] - max);
|
|
330
|
+
sum += exps[i];
|
|
331
|
+
}
|
|
332
|
+
for (let i = 0; i < exps.length; i++) {
|
|
333
|
+
exps[i] /= sum;
|
|
334
|
+
}
|
|
335
|
+
return exps;
|
|
336
|
+
}
|
|
337
|
+
sampleAction(probs) {
|
|
338
|
+
const r = Math.random();
|
|
339
|
+
let cumSum = 0;
|
|
340
|
+
for (let i = 0; i < probs.length; i++) {
|
|
341
|
+
cumSum += probs[i];
|
|
342
|
+
if (r < cumSum)
|
|
343
|
+
return i;
|
|
344
|
+
}
|
|
345
|
+
return probs.length - 1;
|
|
346
|
+
}
|
|
347
|
+
hashAction(action) {
|
|
348
|
+
let hash = 0;
|
|
349
|
+
for (let i = 0; i < action.length; i++) {
|
|
350
|
+
hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
|
|
351
|
+
}
|
|
352
|
+
return hash;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Factory function
|
|
357
|
+
*/
|
|
358
|
+
export function createA2C(config) {
|
|
359
|
+
return new A2CAlgorithm(config);
|
|
360
|
+
}
|
|
361
|
+
//# sourceMappingURL=a2c.js.map
|