agentic-qe 2.1.2 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/agentic-quality-engineering/SKILL.md +4 -4
- package/.claude/skills/cicd-pipeline-qe-orchestrator/README.md +14 -11
- package/.claude/skills/skills-manifest.json +2 -2
- package/CHANGELOG.md +138 -0
- package/README.md +92 -214
- package/dist/agents/BaseAgent.d.ts +5 -1
- package/dist/agents/BaseAgent.d.ts.map +1 -1
- package/dist/agents/BaseAgent.js +32 -17
- package/dist/agents/BaseAgent.js.map +1 -1
- package/dist/agents/index.d.ts.map +1 -1
- package/dist/agents/index.js +5 -1
- package/dist/agents/index.js.map +1 -1
- package/dist/cli/commands/improve/index.d.ts +8 -1
- package/dist/cli/commands/improve/index.d.ts.map +1 -1
- package/dist/cli/commands/improve/index.js +18 -16
- package/dist/cli/commands/improve/index.js.map +1 -1
- package/dist/cli/commands/learn/index.d.ts +10 -2
- package/dist/cli/commands/learn/index.d.ts.map +1 -1
- package/dist/cli/commands/learn/index.js +99 -63
- package/dist/cli/commands/learn/index.js.map +1 -1
- package/dist/cli/commands/patterns/index.d.ts +8 -1
- package/dist/cli/commands/patterns/index.d.ts.map +1 -1
- package/dist/cli/commands/patterns/index.js +79 -45
- package/dist/cli/commands/patterns/index.js.map +1 -1
- package/dist/cli/commands/routing/index.d.ts +5 -0
- package/dist/cli/commands/routing/index.d.ts.map +1 -1
- package/dist/cli/commands/routing/index.js +11 -10
- package/dist/cli/commands/routing/index.js.map +1 -1
- package/dist/cli/init/agents.d.ts +1 -1
- package/dist/cli/init/agents.js +2 -2
- package/dist/cli/init/database-init.d.ts +7 -0
- package/dist/cli/init/database-init.d.ts.map +1 -1
- package/dist/cli/init/database-init.js +29 -48
- package/dist/cli/init/database-init.js.map +1 -1
- package/dist/core/di/AgentDependencies.d.ts +127 -0
- package/dist/core/di/AgentDependencies.d.ts.map +1 -0
- package/dist/core/di/AgentDependencies.js +251 -0
- package/dist/core/di/AgentDependencies.js.map +1 -0
- package/dist/core/di/DIContainer.d.ts +149 -0
- package/dist/core/di/DIContainer.d.ts.map +1 -0
- package/dist/core/di/DIContainer.js +333 -0
- package/dist/core/di/DIContainer.js.map +1 -0
- package/dist/core/di/index.d.ts +11 -0
- package/dist/core/di/index.d.ts.map +1 -0
- package/dist/core/di/index.js +22 -0
- package/dist/core/di/index.js.map +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +11 -1
- package/dist/core/index.js.map +1 -1
- package/dist/core/memory/HNSWVectorMemory.d.ts +261 -0
- package/dist/core/memory/HNSWVectorMemory.d.ts.map +1 -0
- package/dist/core/memory/HNSWVectorMemory.js +647 -0
- package/dist/core/memory/HNSWVectorMemory.js.map +1 -0
- package/dist/core/memory/SwarmMemoryManager.d.ts +7 -0
- package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
- package/dist/core/memory/SwarmMemoryManager.js +9 -0
- package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
- package/dist/core/memory/index.d.ts +2 -0
- package/dist/core/memory/index.d.ts.map +1 -1
- package/dist/core/memory/index.js +11 -1
- package/dist/core/memory/index.js.map +1 -1
- package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
- package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
- package/dist/learning/ExperienceSharingProtocol.js +538 -0
- package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
- package/dist/learning/ExplainableLearning.d.ts +191 -0
- package/dist/learning/ExplainableLearning.d.ts.map +1 -0
- package/dist/learning/ExplainableLearning.js +441 -0
- package/dist/learning/ExplainableLearning.js.map +1 -0
- package/dist/learning/GossipPatternSharingProtocol.d.ts +228 -0
- package/dist/learning/GossipPatternSharingProtocol.d.ts.map +1 -0
- package/dist/learning/GossipPatternSharingProtocol.js +590 -0
- package/dist/learning/GossipPatternSharingProtocol.js.map +1 -0
- package/dist/learning/LearningEngine.d.ts +104 -4
- package/dist/learning/LearningEngine.d.ts.map +1 -1
- package/dist/learning/LearningEngine.js +350 -16
- package/dist/learning/LearningEngine.js.map +1 -1
- package/dist/learning/PerformanceOptimizer.d.ts +268 -0
- package/dist/learning/PerformanceOptimizer.d.ts.map +1 -0
- package/dist/learning/PerformanceOptimizer.js +552 -0
- package/dist/learning/PerformanceOptimizer.js.map +1 -0
- package/dist/learning/PrivacyManager.d.ts +197 -0
- package/dist/learning/PrivacyManager.d.ts.map +1 -0
- package/dist/learning/PrivacyManager.js +551 -0
- package/dist/learning/PrivacyManager.js.map +1 -0
- package/dist/learning/QLearning.d.ts +38 -125
- package/dist/learning/QLearning.d.ts.map +1 -1
- package/dist/learning/QLearning.js +46 -267
- package/dist/learning/QLearning.js.map +1 -1
- package/dist/learning/QLearningLegacy.d.ts +154 -0
- package/dist/learning/QLearningLegacy.d.ts.map +1 -0
- package/dist/learning/QLearningLegacy.js +337 -0
- package/dist/learning/QLearningLegacy.js.map +1 -0
- package/dist/learning/TransferLearningManager.d.ts +212 -0
- package/dist/learning/TransferLearningManager.d.ts.map +1 -0
- package/dist/learning/TransferLearningManager.js +497 -0
- package/dist/learning/TransferLearningManager.js.map +1 -0
- package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
- package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
- package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
- package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
- package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
- package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
- package/dist/learning/algorithms/MAMLMetaLearner.d.ts +218 -0
- package/dist/learning/algorithms/MAMLMetaLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/MAMLMetaLearner.js +532 -0
- package/dist/learning/algorithms/MAMLMetaLearner.js.map +1 -0
- package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
- package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/PPOLearner.js +490 -0
- package/dist/learning/algorithms/PPOLearner.js.map +1 -0
- package/dist/learning/algorithms/QLearning.d.ts +68 -0
- package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
- package/dist/learning/algorithms/QLearning.js +116 -0
- package/dist/learning/algorithms/QLearning.js.map +1 -0
- package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
- package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
- package/dist/learning/algorithms/SARSALearner.js +252 -0
- package/dist/learning/algorithms/SARSALearner.js.map +1 -0
- package/dist/learning/algorithms/index.d.ts +32 -0
- package/dist/learning/algorithms/index.d.ts.map +1 -0
- package/dist/learning/algorithms/index.js +50 -0
- package/dist/learning/algorithms/index.js.map +1 -0
- package/dist/learning/index.d.ts +11 -0
- package/dist/learning/index.d.ts.map +1 -1
- package/dist/learning/index.js +31 -1
- package/dist/learning/index.js.map +1 -1
- package/dist/learning/types.d.ts +2 -0
- package/dist/learning/types.d.ts.map +1 -1
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.js +1 -1
- package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
- package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
- package/dist/memory/DistributedPatternLibrary.js +370 -0
- package/dist/memory/DistributedPatternLibrary.js.map +1 -0
- package/dist/memory/PatternQualityScorer.d.ts +169 -0
- package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
- package/dist/memory/PatternQualityScorer.js +327 -0
- package/dist/memory/PatternQualityScorer.js.map +1 -0
- package/dist/memory/PatternReplicationService.d.ts +187 -0
- package/dist/memory/PatternReplicationService.d.ts.map +1 -0
- package/dist/memory/PatternReplicationService.js +392 -0
- package/dist/memory/PatternReplicationService.js.map +1 -0
- package/dist/providers/ClaudeProvider.d.ts +98 -0
- package/dist/providers/ClaudeProvider.d.ts.map +1 -0
- package/dist/providers/ClaudeProvider.js +418 -0
- package/dist/providers/ClaudeProvider.js.map +1 -0
- package/dist/providers/HybridRouter.d.ts +217 -0
- package/dist/providers/HybridRouter.d.ts.map +1 -0
- package/dist/providers/HybridRouter.js +679 -0
- package/dist/providers/HybridRouter.js.map +1 -0
- package/dist/providers/ILLMProvider.d.ts +287 -0
- package/dist/providers/ILLMProvider.d.ts.map +1 -0
- package/dist/providers/ILLMProvider.js +33 -0
- package/dist/providers/ILLMProvider.js.map +1 -0
- package/dist/providers/LLMProviderFactory.d.ts +154 -0
- package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
- package/dist/providers/LLMProviderFactory.js +426 -0
- package/dist/providers/LLMProviderFactory.js.map +1 -0
- package/dist/providers/RuvllmProvider.d.ts +107 -0
- package/dist/providers/RuvllmProvider.d.ts.map +1 -0
- package/dist/providers/RuvllmProvider.js +417 -0
- package/dist/providers/RuvllmProvider.js.map +1 -0
- package/dist/providers/index.d.ts +32 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +75 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/telemetry/LearningTelemetry.d.ts +190 -0
- package/dist/telemetry/LearningTelemetry.d.ts.map +1 -0
- package/dist/telemetry/LearningTelemetry.js +403 -0
- package/dist/telemetry/LearningTelemetry.js.map +1 -0
- package/dist/telemetry/index.d.ts +1 -0
- package/dist/telemetry/index.d.ts.map +1 -1
- package/dist/telemetry/index.js +20 -2
- package/dist/telemetry/index.js.map +1 -1
- package/dist/telemetry/instrumentation/agent.d.ts +1 -1
- package/dist/telemetry/instrumentation/agent.js +1 -1
- package/dist/telemetry/instrumentation/index.d.ts +1 -1
- package/dist/telemetry/instrumentation/index.js +1 -1
- package/dist/utils/math.d.ts +11 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +16 -0
- package/dist/utils/math.js.map +1 -0
- package/docs/reference/agents.md +1 -1
- package/docs/reference/skills.md +3 -3
- package/docs/reference/usage.md +4 -4
- package/package.json +1 -1
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ActorCriticLearner - Actor-Critic Reinforcement Learning Algorithm
|
|
4
|
+
*
|
|
5
|
+
* Implements Advantage Actor-Critic (A2C) combining:
|
|
6
|
+
* - Actor: Policy network that selects actions using softmax policy
|
|
7
|
+
* - Critic: Value network that estimates state values for advantage calculation
|
|
8
|
+
*
|
|
9
|
+
* Key features:
|
|
10
|
+
* - Continuous action probabilities via softmax
|
|
11
|
+
* - Advantage-based updates to reduce variance
|
|
12
|
+
* - Entropy bonus for exploration
|
|
13
|
+
* - Policy gradient with baseline
|
|
14
|
+
*
|
|
15
|
+
* Update rules:
|
|
16
|
+
* - Critic (Value): V(s) += α_c * δ where δ = r + γV(s') - V(s)
|
|
17
|
+
* - Actor (Policy): π(a|s) += α_a * δ * ∇log(π(a|s)) + β * H(π)
|
|
18
|
+
*
|
|
19
|
+
* @module learning/algorithms/ActorCriticLearner
|
|
20
|
+
* @version 1.0.0
|
|
21
|
+
*/
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.ActorCriticLearner = void 0;
|
|
24
|
+
exports.createDefaultActorCriticConfig = createDefaultActorCriticConfig;
|
|
25
|
+
const AbstractRLLearner_1 = require("./AbstractRLLearner");
|
|
26
|
+
/**
|
|
27
|
+
* ActorCriticLearner - Advantage Actor-Critic implementation
|
|
28
|
+
*
|
|
29
|
+
* Combines policy gradient (actor) with value function approximation (critic)
|
|
30
|
+
* for more stable and efficient learning than pure Q-learning.
|
|
31
|
+
*
|
|
32
|
+
* Usage:
|
|
33
|
+
* ```typescript
|
|
34
|
+
* const ac = new ActorCriticLearner({
|
|
35
|
+
* learningRate: 0.1,
|
|
36
|
+
* actorLearningRate: 0.01,
|
|
37
|
+
* criticLearningRate: 0.1,
|
|
38
|
+
* discountFactor: 0.95,
|
|
39
|
+
* explorationRate: 0.3,
|
|
40
|
+
* explorationDecay: 0.995,
|
|
41
|
+
* minExplorationRate: 0.01,
|
|
42
|
+
* entropyCoefficient: 0.01,
|
|
43
|
+
* temperature: 1.0,
|
|
44
|
+
* normalizeAdvantage: true,
|
|
45
|
+
* targetUpdateFrequency: 100,
|
|
46
|
+
* useExperienceReplay: true,
|
|
47
|
+
* replayBufferSize: 10000,
|
|
48
|
+
* batchSize: 32
|
|
49
|
+
* });
|
|
50
|
+
*
|
|
51
|
+
* const action = ac.selectAction(state, availableActions);
|
|
52
|
+
* ac.update(experience);
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
class ActorCriticLearner extends AbstractRLLearner_1.AbstractRLLearner {
|
|
56
|
+
constructor(config) {
|
|
57
|
+
super(config);
|
|
58
|
+
this.actorConfig = config;
|
|
59
|
+
this.policyTable = new Map();
|
|
60
|
+
this.valueTable = new Map();
|
|
61
|
+
this.targetValueTable = new Map();
|
|
62
|
+
this.updatesSinceTargetSync = 0;
|
|
63
|
+
this.advantageHistory = [];
|
|
64
|
+
this.defaultExploration = config.explorationRate;
|
|
65
|
+
this.logger.info('ActorCriticLearner initialized', {
|
|
66
|
+
actorLR: config.actorLearningRate,
|
|
67
|
+
criticLR: config.criticLearningRate,
|
|
68
|
+
entropy: config.entropyCoefficient,
|
|
69
|
+
temperature: config.temperature
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Select action using softmax policy with exploration
|
|
74
|
+
* π(a|s) = exp(Q(s,a)/τ) / Σ_a' exp(Q(s,a')/τ)
|
|
75
|
+
*/
|
|
76
|
+
selectAction(state, availableActions) {
|
|
77
|
+
if (availableActions.length === 0) {
|
|
78
|
+
throw new Error('No available actions to select from');
|
|
79
|
+
}
|
|
80
|
+
// With probability ε, use random action (exploration fallback)
|
|
81
|
+
if (Math.random() < this.config.explorationRate) {
|
|
82
|
+
const randomIndex = Math.floor(Math.random() * availableActions.length);
|
|
83
|
+
return availableActions[randomIndex];
|
|
84
|
+
}
|
|
85
|
+
// Use softmax policy
|
|
86
|
+
return this.sampleFromPolicy(state, availableActions);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Sample action from softmax policy distribution
|
|
90
|
+
*/
|
|
91
|
+
sampleFromPolicy(state, availableActions) {
|
|
92
|
+
const stateKey = this.encodeState(state);
|
|
93
|
+
const probabilities = this.getActionProbabilities(stateKey, availableActions);
|
|
94
|
+
// Sample from categorical distribution
|
|
95
|
+
const random = Math.random();
|
|
96
|
+
let cumulative = 0;
|
|
97
|
+
for (let i = 0; i < availableActions.length; i++) {
|
|
98
|
+
cumulative += probabilities[i];
|
|
99
|
+
if (random <= cumulative) {
|
|
100
|
+
return availableActions[i];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Fallback (shouldn't reach here due to normalization)
|
|
104
|
+
return availableActions[availableActions.length - 1];
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Get softmax action probabilities
|
|
108
|
+
* π(a|s) = exp(preference(s,a)/τ) / Σ_a' exp(preference(s,a')/τ)
|
|
109
|
+
*/
|
|
110
|
+
getActionProbabilities(stateKey, availableActions) {
|
|
111
|
+
const temperature = this.actorConfig.temperature;
|
|
112
|
+
const preferences = [];
|
|
113
|
+
// Get preferences (Q-values or policy table values)
|
|
114
|
+
for (const action of availableActions) {
|
|
115
|
+
const actionKey = this.encodeAction(action);
|
|
116
|
+
const preference = this.getPreference(stateKey, actionKey);
|
|
117
|
+
preferences.push(preference / temperature);
|
|
118
|
+
}
|
|
119
|
+
// Softmax with numerical stability
|
|
120
|
+
const maxPref = Math.max(...preferences);
|
|
121
|
+
const expPrefs = preferences.map(p => Math.exp(p - maxPref));
|
|
122
|
+
const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
|
|
123
|
+
return expPrefs.map(e => e / sumExp);
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Get preference for state-action pair from policy table
|
|
127
|
+
*/
|
|
128
|
+
getPreference(stateKey, actionKey) {
|
|
129
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
130
|
+
if (!statePolicy) {
|
|
131
|
+
return 0; // uniform preference initially
|
|
132
|
+
}
|
|
133
|
+
const entry = statePolicy.get(actionKey);
|
|
134
|
+
return entry ? entry.probability : 0;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Update actor and critic using temporal difference
|
|
138
|
+
*
|
|
139
|
+
* TD Error (advantage): δ = r + γV(s') - V(s)
|
|
140
|
+
* Critic update: V(s) += α_c * δ
|
|
141
|
+
* Actor update: preference(s,a) += α_a * δ * (1 - π(a|s))
|
|
142
|
+
*/
|
|
143
|
+
update(experience, nextAction) {
|
|
144
|
+
this.stepCount++;
|
|
145
|
+
const { state, action, reward, nextState, done } = this.extractExperience(experience);
|
|
146
|
+
const stateKey = this.encodeState(state);
|
|
147
|
+
const actionKey = this.encodeAction(action);
|
|
148
|
+
// Get current and next state values from critic
|
|
149
|
+
const currentV = this.getStateValue(state);
|
|
150
|
+
const nextV = done ? 0 : this.getTargetStateValue(nextState);
|
|
151
|
+
// Calculate TD error (advantage)
|
|
152
|
+
let advantage = reward + this.config.discountFactor * nextV - currentV;
|
|
153
|
+
// Normalize advantage if enabled
|
|
154
|
+
if (this.actorConfig.normalizeAdvantage) {
|
|
155
|
+
advantage = this.normalizeAdvantage(advantage);
|
|
156
|
+
}
|
|
157
|
+
// Update critic (value function)
|
|
158
|
+
this.updateCritic(stateKey, currentV, advantage);
|
|
159
|
+
// Update actor (policy)
|
|
160
|
+
this.updateActor(stateKey, actionKey, advantage);
|
|
161
|
+
// Store in replay buffer if enabled
|
|
162
|
+
if (this.replayBuffer) {
|
|
163
|
+
this.replayBuffer.add(experience);
|
|
164
|
+
}
|
|
165
|
+
// Sync target network periodically
|
|
166
|
+
this.updatesSinceTargetSync++;
|
|
167
|
+
if (this.updatesSinceTargetSync >= this.actorConfig.targetUpdateFrequency) {
|
|
168
|
+
this.syncTargetNetwork();
|
|
169
|
+
this.updatesSinceTargetSync = 0;
|
|
170
|
+
}
|
|
171
|
+
this.logger.debug('Actor-Critic update', {
|
|
172
|
+
state: stateKey,
|
|
173
|
+
action: actionKey,
|
|
174
|
+
reward,
|
|
175
|
+
advantage,
|
|
176
|
+
valueUpdate: currentV + this.actorConfig.criticLearningRate * advantage
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Update critic (value function)
|
|
181
|
+
* V(s) += α_c * δ
|
|
182
|
+
*/
|
|
183
|
+
updateCritic(stateKey, currentV, advantage) {
|
|
184
|
+
const newValue = currentV + this.actorConfig.criticLearningRate * advantage;
|
|
185
|
+
const existingEntry = this.valueTable.get(stateKey);
|
|
186
|
+
this.valueTable.set(stateKey, {
|
|
187
|
+
state: stateKey,
|
|
188
|
+
value: newValue,
|
|
189
|
+
updateCount: (existingEntry?.updateCount ?? 0) + 1,
|
|
190
|
+
lastUpdated: Date.now()
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Update actor (policy)
|
|
195
|
+
* For softmax policy: preference(s,a) += α_a * δ * (1 - π(a|s))
|
|
196
|
+
* This increases preference for actions with positive advantage
|
|
197
|
+
*/
|
|
198
|
+
updateActor(stateKey, actionKey, advantage) {
|
|
199
|
+
if (!this.policyTable.has(stateKey)) {
|
|
200
|
+
this.policyTable.set(stateKey, new Map());
|
|
201
|
+
}
|
|
202
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
203
|
+
// Get current preference and probability
|
|
204
|
+
const currentEntry = statePolicy.get(actionKey);
|
|
205
|
+
const currentPref = currentEntry?.probability ?? 0;
|
|
206
|
+
// Approximate gradient: increase preference proportional to advantage
|
|
207
|
+
// Also add entropy bonus to encourage exploration
|
|
208
|
+
const entropyBonus = this.calculateEntropyBonus(stateKey);
|
|
209
|
+
const newPref = currentPref + this.actorConfig.actorLearningRate * (advantage + entropyBonus);
|
|
210
|
+
statePolicy.set(actionKey, {
|
|
211
|
+
action: actionKey,
|
|
212
|
+
probability: newPref,
|
|
213
|
+
logProbability: Math.log(Math.max(0.001, this.softmaxProb(stateKey, actionKey))),
|
|
214
|
+
updateCount: (currentEntry?.updateCount ?? 0) + 1,
|
|
215
|
+
lastUpdated: Date.now()
|
|
216
|
+
});
|
|
217
|
+
// Also update Q-table for getBestAction compatibility
|
|
218
|
+
this.setQValue(stateKey, actionKey, newPref);
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Calculate entropy bonus for a state
|
|
222
|
+
* H(π(·|s)) = -Σ_a π(a|s) log(π(a|s))
|
|
223
|
+
*/
|
|
224
|
+
calculateEntropyBonus(stateKey) {
|
|
225
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
226
|
+
if (!statePolicy || statePolicy.size === 0) {
|
|
227
|
+
return 0;
|
|
228
|
+
}
|
|
229
|
+
// Calculate entropy over stored actions
|
|
230
|
+
const prefs = Array.from(statePolicy.values()).map(e => e.probability);
|
|
231
|
+
const maxPref = Math.max(...prefs);
|
|
232
|
+
const expPrefs = prefs.map(p => Math.exp((p - maxPref) / this.actorConfig.temperature));
|
|
233
|
+
const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
|
|
234
|
+
const probs = expPrefs.map(e => e / sumExp);
|
|
235
|
+
let entropy = 0;
|
|
236
|
+
for (const p of probs) {
|
|
237
|
+
if (p > 0) {
|
|
238
|
+
entropy -= p * Math.log(p);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return this.actorConfig.entropyCoefficient * entropy;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Get softmax probability for a specific action
|
|
245
|
+
*/
|
|
246
|
+
softmaxProb(stateKey, actionKey) {
|
|
247
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
248
|
+
if (!statePolicy || statePolicy.size === 0) {
|
|
249
|
+
return 1.0 / Math.max(1, statePolicy?.size ?? 1);
|
|
250
|
+
}
|
|
251
|
+
const prefs = Array.from(statePolicy.entries());
|
|
252
|
+
const temp = this.actorConfig.temperature;
|
|
253
|
+
const maxPref = Math.max(...prefs.map(([, e]) => e.probability));
|
|
254
|
+
let sumExp = 0;
|
|
255
|
+
let targetExp = 0;
|
|
256
|
+
for (const [key, entry] of prefs) {
|
|
257
|
+
const exp = Math.exp((entry.probability - maxPref) / temp);
|
|
258
|
+
sumExp += exp;
|
|
259
|
+
if (key === actionKey) {
|
|
260
|
+
targetExp = exp;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return targetExp / sumExp;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Normalize advantage using running statistics
|
|
267
|
+
*/
|
|
268
|
+
normalizeAdvantage(advantage) {
|
|
269
|
+
this.advantageHistory.push(advantage);
|
|
270
|
+
// Keep limited history
|
|
271
|
+
if (this.advantageHistory.length > 1000) {
|
|
272
|
+
this.advantageHistory.shift();
|
|
273
|
+
}
|
|
274
|
+
if (this.advantageHistory.length < 10) {
|
|
275
|
+
return advantage;
|
|
276
|
+
}
|
|
277
|
+
const mean = this.advantageHistory.reduce((s, a) => s + a, 0) / this.advantageHistory.length;
|
|
278
|
+
const variance = this.advantageHistory.reduce((s, a) => s + (a - mean) ** 2, 0) / this.advantageHistory.length;
|
|
279
|
+
const std = Math.sqrt(variance) + 1e-8;
|
|
280
|
+
return (advantage - mean) / std;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Get state value from value table
|
|
284
|
+
*/
|
|
285
|
+
getStateValue(state) {
|
|
286
|
+
const stateKey = this.encodeState(state);
|
|
287
|
+
const entry = this.valueTable.get(stateKey);
|
|
288
|
+
return entry?.value ?? 0;
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Get state value from target network (for stability)
|
|
292
|
+
*/
|
|
293
|
+
getTargetStateValue(state) {
|
|
294
|
+
const stateKey = this.encodeState(state);
|
|
295
|
+
const entry = this.targetValueTable.get(stateKey);
|
|
296
|
+
return entry?.value ?? this.getStateValue(state);
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Sync target network with main network
|
|
300
|
+
*/
|
|
301
|
+
syncTargetNetwork() {
|
|
302
|
+
this.targetValueTable.clear();
|
|
303
|
+
for (const [key, value] of this.valueTable.entries()) {
|
|
304
|
+
this.targetValueTable.set(key, { ...value });
|
|
305
|
+
}
|
|
306
|
+
this.logger.debug('Target network synchronized');
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Extract experience components
|
|
310
|
+
*/
|
|
311
|
+
extractExperience(experience) {
|
|
312
|
+
return {
|
|
313
|
+
state: experience.state,
|
|
314
|
+
action: experience.action,
|
|
315
|
+
reward: experience.reward,
|
|
316
|
+
nextState: experience.nextState,
|
|
317
|
+
done: experience.done ?? false
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Get default exploration rate for reset
|
|
322
|
+
*/
|
|
323
|
+
getDefaultExplorationRate() {
|
|
324
|
+
return this.defaultExploration;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Get actor-critic specific statistics
|
|
328
|
+
*/
|
|
329
|
+
getActorCriticStatistics() {
|
|
330
|
+
// Calculate average state value
|
|
331
|
+
let totalValue = 0;
|
|
332
|
+
for (const entry of this.valueTable.values()) {
|
|
333
|
+
totalValue += entry.value;
|
|
334
|
+
}
|
|
335
|
+
const avgStateValue = this.valueTable.size > 0 ? totalValue / this.valueTable.size : 0;
|
|
336
|
+
// Calculate policy table size
|
|
337
|
+
let policySize = 0;
|
|
338
|
+
for (const statePolicy of this.policyTable.values()) {
|
|
339
|
+
policySize += statePolicy.size;
|
|
340
|
+
}
|
|
341
|
+
// Calculate average entropy
|
|
342
|
+
let totalEntropy = 0;
|
|
343
|
+
let entropyCount = 0;
|
|
344
|
+
for (const stateKey of this.policyTable.keys()) {
|
|
345
|
+
const entropy = this.calculateEntropyBonus(stateKey) / this.actorConfig.entropyCoefficient;
|
|
346
|
+
totalEntropy += entropy;
|
|
347
|
+
entropyCount++;
|
|
348
|
+
}
|
|
349
|
+
const avgEntropy = entropyCount > 0 ? totalEntropy / entropyCount : 0;
|
|
350
|
+
// Calculate advantage statistics
|
|
351
|
+
const advMean = this.advantageHistory.length > 0
|
|
352
|
+
? this.advantageHistory.reduce((s, a) => s + a, 0) / this.advantageHistory.length
|
|
353
|
+
: 0;
|
|
354
|
+
const advVariance = this.advantageHistory.length > 0
|
|
355
|
+
? this.advantageHistory.reduce((s, a) => s + (a - advMean) ** 2, 0) / this.advantageHistory.length
|
|
356
|
+
: 0;
|
|
357
|
+
return {
|
|
358
|
+
valueTableSize: this.valueTable.size,
|
|
359
|
+
policyTableSize: policySize,
|
|
360
|
+
avgStateValue,
|
|
361
|
+
avgEntropy,
|
|
362
|
+
advantageMean: advMean,
|
|
363
|
+
advantageStd: Math.sqrt(advVariance)
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Reset actor-critic specific state
|
|
368
|
+
*/
|
|
369
|
+
reset() {
|
|
370
|
+
super.reset();
|
|
371
|
+
this.policyTable.clear();
|
|
372
|
+
this.valueTable.clear();
|
|
373
|
+
this.targetValueTable.clear();
|
|
374
|
+
this.advantageHistory = [];
|
|
375
|
+
this.updatesSinceTargetSync = 0;
|
|
376
|
+
this.logger.info('ActorCriticLearner reset');
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Export complete actor-critic state
|
|
380
|
+
*/
|
|
381
|
+
exportActorCritic() {
|
|
382
|
+
const serializedPolicy = {};
|
|
383
|
+
for (const [state, actions] of this.policyTable.entries()) {
|
|
384
|
+
serializedPolicy[state] = {};
|
|
385
|
+
for (const [action, entry] of actions.entries()) {
|
|
386
|
+
serializedPolicy[state][action] = entry;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
const serializedValue = {};
|
|
390
|
+
for (const [state, entry] of this.valueTable.entries()) {
|
|
391
|
+
serializedValue[state] = entry;
|
|
392
|
+
}
|
|
393
|
+
return {
|
|
394
|
+
base: this.export(),
|
|
395
|
+
valueTable: serializedValue,
|
|
396
|
+
policyTable: serializedPolicy,
|
|
397
|
+
actorConfig: { ...this.actorConfig }
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Import complete actor-critic state
|
|
402
|
+
*/
|
|
403
|
+
importActorCritic(state) {
|
|
404
|
+
this.import(state.base);
|
|
405
|
+
this.valueTable.clear();
|
|
406
|
+
for (const [stateKey, entry] of Object.entries(state.valueTable)) {
|
|
407
|
+
this.valueTable.set(stateKey, entry);
|
|
408
|
+
}
|
|
409
|
+
this.policyTable.clear();
|
|
410
|
+
for (const [stateKey, actions] of Object.entries(state.policyTable)) {
|
|
411
|
+
const actionMap = new Map();
|
|
412
|
+
for (const [actionKey, entry] of Object.entries(actions)) {
|
|
413
|
+
actionMap.set(actionKey, entry);
|
|
414
|
+
}
|
|
415
|
+
this.policyTable.set(stateKey, actionMap);
|
|
416
|
+
}
|
|
417
|
+
this.actorConfig = { ...state.actorConfig };
|
|
418
|
+
this.syncTargetNetwork();
|
|
419
|
+
this.logger.info('Imported Actor-Critic state', {
|
|
420
|
+
valueTableSize: this.valueTable.size,
|
|
421
|
+
policyTableSize: this.policyTable.size
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
exports.ActorCriticLearner = ActorCriticLearner;
|
|
426
|
+
/**
|
|
427
|
+
* Create default Actor-Critic configuration
|
|
428
|
+
*/
|
|
429
|
+
function createDefaultActorCriticConfig() {
|
|
430
|
+
return {
|
|
431
|
+
learningRate: 0.1,
|
|
432
|
+
actorLearningRate: 0.01,
|
|
433
|
+
criticLearningRate: 0.1,
|
|
434
|
+
discountFactor: 0.95,
|
|
435
|
+
explorationRate: 0.3,
|
|
436
|
+
explorationDecay: 0.995,
|
|
437
|
+
minExplorationRate: 0.01,
|
|
438
|
+
entropyCoefficient: 0.01,
|
|
439
|
+
temperature: 1.0,
|
|
440
|
+
normalizeAdvantage: true,
|
|
441
|
+
targetUpdateFrequency: 100,
|
|
442
|
+
useExperienceReplay: true,
|
|
443
|
+
replayBufferSize: 10000,
|
|
444
|
+
batchSize: 32
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
//# sourceMappingURL=ActorCriticLearner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ActorCriticLearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/ActorCriticLearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AAyhBH,wEAiBC;AAxiBD,2DAA0E;AA0C1E;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAa,kBAAmB,SAAQ,qCAAiB;IASvD,YAAY,MAAyB;QACnC,KAAK,CAAC,MAAM,CAAC,CAAC;QACd,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC;QAC1B,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAAE,CAAC;QAC5B,IAAI,CAAC,gBAAgB,GAAG,IAAI,GAAG,EAAE,CAAC;QAClC,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,eAAe,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,gCAAgC,EAAE;YACjD,OAAO,EAAE,MAAM,CAAC,iBAAiB;YACjC,QAAQ,EAAE,MAAM,CAAC,kBAAkB;YACnC,OAAO,EAAE,MAAM,CAAC,kBAAkB;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW;SAChC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACM,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QACrE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,+DAA+D;QAC/D,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,qBAAqB;QACrB,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IACxD,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,KAAgB,EAAE,gBAA+B;QACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,aAAa,GAAG,IAAI,CAAC,sBAAsB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QAE9E,uCAAuC;QACvC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,UAAU,IAAI,aAAa,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;gBACzB,OAAO,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;;OAGG;IACK,sBAAsB,CAAC,QAAgB,EAAE,gBAA+B;QAC9E,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;QACjD,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,oDAAoD;QACpD,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC3D,WAAW,CAAC,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,CAAC;QAC7C,CAAC;QAED,mCAAmC;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAEvD,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAgB,EAAE,SAAiB;QACvD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,CAAC,CAAC,+BAA+B;QAC3C,CAAC;QAED,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACzC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED;;;;;;OAMG;IACM,MAAM,CAAC,UAA0B,EAAE,UAAwB;QAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QACtF,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE5C,gDAAgD;QAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAE7D,iCAAiC;QACjC,IAAI,SAAS,GAAG,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,KAAK,GAAG,QAAQ,CAAC;QAEvE,iCAAiC;QACjC,IAAI,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;YACxC,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACjD,CAAC;QAED,iCAAiC;QACjC,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;QAEjD,wBAAwB;QACxB,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAEjD,oCAAoC;QACpC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,sBAAsB,IAAI,IAAI,CAAC,WAAW,CAAC,qBAAqB,EAAE,CAAC;YAC1E,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACzB,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE;YACvC,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,MAAM;YACN,SAAS;YACT,WAAW,EAAE,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,SAAS;SACxE,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,YAAY,CAAC,QAAgB,EAAE,QAAgB,EAAE,SAAiB;QACxE,MAAM,QAAQ,GAAG,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,SAAS,CAAC;QAE5E,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,QAAQ;YACf,KAAK,EAAE,QAAQ;YACf,WAAW,EAAE,CAAC,aAAa,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;IACL,CAAC;IAED;;;;OAIG;IACK,WAAW,CAAC,QAAgB,EAAE,SAAiB,EAAE,SAAiB;QACxE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEpD,yCAAyC;QACzC,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAChD,MAAM,WAAW,GAAG,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC;QAEnD,sEAAsE;QACtE,kDAAkD;QAClD,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAG,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,iBAAiB,GAAG,CAAC,SAAS,GAAG,YAAY,CAAC,CAAC;QAE9F,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;YACzB,MAAM,EAAE,SAAS;YACjB,WAAW,EAAE,OAAO;YACpB,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC;YAChF,WAAW,EAAE,CAAC,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YACjD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;QAEH,sDAAsD;QACtD,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACK,qBAAqB,CAAC,QAAgB;QAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,wCAAwC;QACxC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;QACxF,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,OAAO,CAAC;IACvD,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,QAAgB,EAAE,SAAiB;QACrD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;QAE1C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QACjE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,KAAK,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;YAC3D,MAAM,IAAI,GAAG,CAAC;YACd,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,GAAG,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,GAAG,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,SAAiB;QAC1C,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,uBAAuB;QACvB,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAChC,CAAC;QAED,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACtC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC;QAC7F,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC;QAC/G,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;QAEvC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC;IAClC,CAAC;IAED;;OAEG;IACM,aAAa,CAAC,KAAgB;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,KAAgB;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClD,OAAO,KAAK,EAAE,KAAK,IAAI,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,iBAAiB;QACvB,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACrD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,GAAG,KAAK,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,UAA0B;QAOlD,OAAO;YACL,KAAK,EAAE,UAAU,CAAC,KAAK;YACvB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,IAAI,EAAE,UAAU,CAAC,IAAI,IAAI,KAAK;SAC/B,CAAC;IACJ,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,wBAAwB;QAQtB,gCAAgC;QAChC,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC;YAC7C,UAAU,IAAI,KAAK,CAAC,KAAK,CAAC;QAC5B,CAAC;QACD,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvF,8BAA8B;QAC9B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,UAAU,IAAI,WAAW,CAAC,IAAI,CAAC;QACjC,CAAC;QAED,4BAA4B;QAC5B,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;YAC3F,YAAY,IAAI,OAAO,CAAC;YACxB,YAAY,EAAE,CAAC;QACjB,CAAC;QACD,MAAM,UAAU,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtE,iCAAiC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;YAC9C,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM;YACjF,CAAC,CAAC,CAAC,CAAC;QACN,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;YAClD,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM;YAClG,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,UAAU;YAC3B,aAAa;YACb,UAAU;YACV,aAAa,EAAE,OAAO;YACtB,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;SACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACM,KAAK;QACZ,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,iBAAiB;QAMf,MAAM,gBAAgB,GAAgD,EAAE,CAAC;QACzE,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBAChD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,MAAM,eAAe,GAAoC,EAAE,CAAC;QAC5D,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,eAAe,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE;YACnB,UAAU,EAAE,eAAe;YAC3B,WAAW,EAAE,gBAAgB;YAC7B,WAAW,EAAE,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE;SACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,KAAgD;QAChE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;YACjD,KAAK,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAClC,CAAC;YACD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,EAAE,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC5C,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,EAAE;YAC9C,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;SACvC,CAAC,CAAC;IACL,CAAC;CACF;AA3cD,gDA2cC;AAED;;GAEG;AACH,SAAgB,8BAA8B;IAC5C,OAAO;QACL,YAAY,EAAE,GAAG;QACjB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,GAAG;QACvB,cAAc,EAAE,IAAI;QACpB,eAAe,EAAE,GAAG;QACpB,gBAAgB,EAAE,KAAK;QACvB,kBAAkB,EAAE,IAAI;QACxB,kBAAkB,EAAE,IAAI;QACxB,WAAW,EAAE,GAAG;QAChB,kBAAkB,EAAE,IAAI;QACxB,qBAAqB,EAAE,GAAG;QAC1B,mBAAmB,EAAE,IAAI;QACzB,gBAAgB,EAAE,KAAK;QACvB,SAAS,EAAE,EAAE;KACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MAMLMetaLearner - Model-Agnostic Meta-Learning for QE Agents
|
|
3
|
+
*
|
|
4
|
+
* Implements MAML-style meta-learning that enables agents to "learn how to learn".
|
|
5
|
+
* Agents can quickly adapt to new testing domains with just 5-10 examples.
|
|
6
|
+
*
|
|
7
|
+
* Key Concepts:
|
|
8
|
+
* - Inner Loop: Fast adaptation to new task with few gradient steps (5-10 examples)
|
|
9
|
+
* - Outer Loop: Learn initialization parameters that enable fast adaptation
|
|
10
|
+
* - Meta-Learning: After seeing few examples of new test pattern, agent performs well
|
|
11
|
+
*
|
|
12
|
+
* Algorithm:
|
|
13
|
+
* 1. Initialize meta-parameters θ (Q-table initialization)
|
|
14
|
+
* 2. For each task Ti in task distribution:
|
|
15
|
+
* a. Sample K examples from Ti (support set)
|
|
16
|
+
* b. Adapt: θ'i = θ - α∇Loss(θ, support) [inner loop]
|
|
17
|
+
* c. Evaluate on query set from Ti
|
|
18
|
+
* 3. Update meta-parameters: θ = θ - β∇Loss(θ', query) [outer loop]
|
|
19
|
+
* 4. Result: θ is optimized for fast adaptation to new tasks
|
|
20
|
+
*
|
|
21
|
+
* Use Cases:
|
|
22
|
+
* - New testing framework adoption (5-10 examples → proficient)
|
|
23
|
+
* - New project domain (few examples → effective testing strategy)
|
|
24
|
+
* - API testing → UI testing transfer learning
|
|
25
|
+
*/
|
|
26
|
+
import { AbstractRLLearner, RLConfig, QValue } from './AbstractRLLearner';
|
|
27
|
+
import { TaskExperience, AgentAction } from '../types';
|
|
28
|
+
/**
|
|
29
|
+
* MAML configuration extends base RL config with meta-learning parameters
|
|
30
|
+
*/
|
|
31
|
+
export interface MAMLConfig extends RLConfig {
|
|
32
|
+
innerLearningRate: number;
|
|
33
|
+
innerSteps: number;
|
|
34
|
+
metaLearningRate: number;
|
|
35
|
+
minTaskExamples: number;
|
|
36
|
+
maxTaskExamples: number;
|
|
37
|
+
taskBatchSize: number;
|
|
38
|
+
firstOrderApproximation: boolean;
|
|
39
|
+
baseAlgorithm?: AbstractRLLearner;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Task for meta-learning (contains support and query sets)
|
|
43
|
+
*/
|
|
44
|
+
interface MetaTask {
|
|
45
|
+
id: string;
|
|
46
|
+
taskType: string;
|
|
47
|
+
supportSet: TaskExperience[];
|
|
48
|
+
querySet: TaskExperience[];
|
|
49
|
+
metadata?: Record<string, any>;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Meta-learning episode tracking
|
|
53
|
+
*/
|
|
54
|
+
interface MetaEpisode {
|
|
55
|
+
episodeId: string;
|
|
56
|
+
tasks: MetaTask[];
|
|
57
|
+
preAdaptationLoss: number;
|
|
58
|
+
postAdaptationLoss: number;
|
|
59
|
+
metaLoss: number;
|
|
60
|
+
improvement: number;
|
|
61
|
+
timestamp: Date;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* MAMLMetaLearner - Model-Agnostic Meta-Learning for Fast Adaptation
|
|
65
|
+
*
|
|
66
|
+
* Learns an initialization of Q-values that enables rapid adaptation to new
|
|
67
|
+
* testing tasks with minimal examples (5-10 shots).
|
|
68
|
+
*
|
|
69
|
+
* Example:
|
|
70
|
+
* - Agent trained on Jest, Mocha, Jasmine unit testing
|
|
71
|
+
* - Sees 5-10 examples of Vitest tests
|
|
72
|
+
* - Immediately generates high-quality Vitest tests
|
|
73
|
+
*
|
|
74
|
+
* Meta-Learning Process:
|
|
75
|
+
* 1. Sample batch of tasks (different testing scenarios)
|
|
76
|
+
* 2. For each task:
|
|
77
|
+
* - Adapt Q-table with support set (inner loop)
|
|
78
|
+
* - Evaluate adapted Q-table on query set
|
|
79
|
+
* 3. Compute meta-gradient from all tasks
|
|
80
|
+
* 4. Update meta-parameters (Q-table initialization)
|
|
81
|
+
*/
|
|
82
|
+
export declare class MAMLMetaLearner extends AbstractRLLearner {
|
|
83
|
+
private readonly mamlConfig;
|
|
84
|
+
private metaQTable;
|
|
85
|
+
private taskBuffer;
|
|
86
|
+
private metaEpisodes;
|
|
87
|
+
private metaStepCount;
|
|
88
|
+
private baseAlgorithm?;
|
|
89
|
+
constructor(config?: Partial<MAMLConfig>);
|
|
90
|
+
/**
|
|
91
|
+
* Update Q-value using base algorithm (delegates to wrapped algorithm if available)
|
|
92
|
+
* For MAML, this is called during inner loop adaptation
|
|
93
|
+
*/
|
|
94
|
+
update(experience: TaskExperience, nextAction?: AgentAction): void;
|
|
95
|
+
/**
|
|
96
|
+
* Q-Learning update rule (default inner loop algorithm)
|
|
97
|
+
*/
|
|
98
|
+
private qLearningUpdate;
|
|
99
|
+
/**
|
|
100
|
+
* Buffer experience by task type for meta-learning
|
|
101
|
+
*/
|
|
102
|
+
private bufferExperience;
|
|
103
|
+
/**
|
|
104
|
+
* Perform meta-learning update (outer loop)
|
|
105
|
+
* Learns Q-table initialization that enables fast adaptation
|
|
106
|
+
*
|
|
107
|
+
* This should be called periodically after collecting enough task examples
|
|
108
|
+
*/
|
|
109
|
+
performMetaUpdate(): Promise<MetaEpisode | null>;
|
|
110
|
+
/**
|
|
111
|
+
* Sample batch of meta-tasks from task buffer
|
|
112
|
+
* Each task contains support set (for adaptation) and query set (for evaluation)
|
|
113
|
+
*/
|
|
114
|
+
private sampleMetaTasks;
|
|
115
|
+
/**
|
|
116
|
+
* Adapt Q-table to a specific task using support set (inner loop)
|
|
117
|
+
*/
|
|
118
|
+
private adaptToTask;
|
|
119
|
+
/**
|
|
120
|
+
* Evaluate loss (TD error) on a set of experiences
|
|
121
|
+
*/
|
|
122
|
+
private evaluateLoss;
|
|
123
|
+
/**
|
|
124
|
+
* Compute first-order meta-gradients (FOMAML)
|
|
125
|
+
* Faster approximation that ignores second-order derivatives
|
|
126
|
+
*/
|
|
127
|
+
private computeFirstOrderGradients;
|
|
128
|
+
/**
|
|
129
|
+
* Compute second-order meta-gradients (Full MAML)
|
|
130
|
+
* More accurate but computationally expensive
|
|
131
|
+
*/
|
|
132
|
+
private computeSecondOrderGradients;
|
|
133
|
+
/**
|
|
134
|
+
* Update meta-parameters using accumulated gradients
|
|
135
|
+
*/
|
|
136
|
+
private updateMetaParameters;
|
|
137
|
+
/**
|
|
138
|
+
* Fast adaptation to new task (few-shot learning)
|
|
139
|
+
* Given 5-10 examples, quickly adapt Q-table for new testing domain
|
|
140
|
+
*
|
|
141
|
+
* @param examples Few examples of new task (5-10)
|
|
142
|
+
* @returns Adapted Q-table
|
|
143
|
+
*/
|
|
144
|
+
fastAdapt(examples: TaskExperience[]): Promise<Map<string, Map<string, QValue>>>;
|
|
145
|
+
/**
|
|
146
|
+
* Get Q-value from specific Q-table (helper)
|
|
147
|
+
*/
|
|
148
|
+
private getQValueFromTable;
|
|
149
|
+
/**
|
|
150
|
+
* Set Q-value in specific Q-table (helper)
|
|
151
|
+
*/
|
|
152
|
+
private setQValueInTable;
|
|
153
|
+
/**
|
|
154
|
+
* Get Q-value (raw, without creating entry)
|
|
155
|
+
*/
|
|
156
|
+
private getQValueRaw;
|
|
157
|
+
/**
|
|
158
|
+
* Clone Q-table
|
|
159
|
+
*/
|
|
160
|
+
private cloneQTable;
|
|
161
|
+
/**
|
|
162
|
+
* Copy Q-table from source to destination
|
|
163
|
+
*/
|
|
164
|
+
private copyQTable;
|
|
165
|
+
/**
|
|
166
|
+
* Get meta-learning statistics
|
|
167
|
+
*/
|
|
168
|
+
getMetaStatistics(): {
|
|
169
|
+
metaSteps: number;
|
|
170
|
+
metaEpisodes: number;
|
|
171
|
+
avgPreAdaptLoss: number;
|
|
172
|
+
avgPostAdaptLoss: number;
|
|
173
|
+
avgImprovement: number;
|
|
174
|
+
taskTypes: number;
|
|
175
|
+
bufferedExperiences: number;
|
|
176
|
+
};
|
|
177
|
+
/**
|
|
178
|
+
* Get meta-episodes history
|
|
179
|
+
*/
|
|
180
|
+
getMetaEpisodes(): MetaEpisode[];
|
|
181
|
+
/**
|
|
182
|
+
* Clear task buffer
|
|
183
|
+
*/
|
|
184
|
+
clearTaskBuffer(): void;
|
|
185
|
+
/**
|
|
186
|
+
* Get default exploration rate
|
|
187
|
+
*/
|
|
188
|
+
protected getDefaultExplorationRate(): number;
|
|
189
|
+
/**
|
|
190
|
+
* Get algorithm name
|
|
191
|
+
*/
|
|
192
|
+
getAlgorithmName(): string;
|
|
193
|
+
/**
|
|
194
|
+
* Override getStatistics to include meta-learning metrics
|
|
195
|
+
*/
|
|
196
|
+
getStatistics(): ReturnType<AbstractRLLearner['getStatistics']> & {
|
|
197
|
+
maml: ReturnType<MAMLMetaLearner['getMetaStatistics']>;
|
|
198
|
+
};
|
|
199
|
+
/**
|
|
200
|
+
* Export meta-learner state
|
|
201
|
+
*/
|
|
202
|
+
export(): ReturnType<AbstractRLLearner['export']> & {
|
|
203
|
+
metaQTable: Record<string, Record<string, QValue>>;
|
|
204
|
+
taskBuffer: Record<string, TaskExperience[]>;
|
|
205
|
+
metaEpisodes: MetaEpisode[];
|
|
206
|
+
metaStepCount: number;
|
|
207
|
+
};
|
|
208
|
+
/**
|
|
209
|
+
* Import meta-learner state
|
|
210
|
+
*/
|
|
211
|
+
import(state: ReturnType<MAMLMetaLearner['export']>): void;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Create default MAML configuration
|
|
215
|
+
*/
|
|
216
|
+
export declare function createDefaultMAMLConfig(): MAMLConfig;
|
|
217
|
+
export {};
|
|
218
|
+
//# sourceMappingURL=MAMLMetaLearner.d.ts.map
|