agentic-qe 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +123 -0
- package/README.md +1 -1
- package/dist/agents/index.d.ts.map +1 -1
- package/dist/agents/index.js +5 -1
- package/dist/agents/index.js.map +1 -1
- package/dist/core/di/AgentDependencies.d.ts +127 -0
- package/dist/core/di/AgentDependencies.d.ts.map +1 -0
- package/dist/core/di/AgentDependencies.js +251 -0
- package/dist/core/di/AgentDependencies.js.map +1 -0
- package/dist/core/di/DIContainer.d.ts +149 -0
- package/dist/core/di/DIContainer.d.ts.map +1 -0
- package/dist/core/di/DIContainer.js +333 -0
- package/dist/core/di/DIContainer.js.map +1 -0
- package/dist/core/di/index.d.ts +11 -0
- package/dist/core/di/index.d.ts.map +1 -0
- package/dist/core/di/index.js +22 -0
- package/dist/core/di/index.js.map +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +11 -1
- package/dist/core/index.js.map +1 -1
- package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
- package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
- package/dist/learning/ExperienceSharingProtocol.js +538 -0
- package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
- package/dist/learning/LearningEngine.d.ts +101 -1
- package/dist/learning/LearningEngine.d.ts.map +1 -1
- package/dist/learning/LearningEngine.js +330 -3
- package/dist/learning/LearningEngine.js.map +1 -1
- package/dist/learning/QLearning.d.ts +38 -125
- package/dist/learning/QLearning.d.ts.map +1 -1
- package/dist/learning/QLearning.js +46 -267
- package/dist/learning/QLearning.js.map +1 -1
- package/dist/learning/QLearningLegacy.d.ts +154 -0
- package/dist/learning/QLearningLegacy.d.ts.map +1 -0
- package/dist/learning/QLearningLegacy.js +337 -0
- package/dist/learning/QLearningLegacy.js.map +1 -0
- package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
- package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
- package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
- package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
- package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
- package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
- package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
- package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/PPOLearner.js +490 -0
- package/dist/learning/algorithms/PPOLearner.js.map +1 -0
- package/dist/learning/algorithms/QLearning.d.ts +68 -0
- package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
- package/dist/learning/algorithms/QLearning.js +116 -0
- package/dist/learning/algorithms/QLearning.js.map +1 -0
- package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
- package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
- package/dist/learning/algorithms/SARSALearner.js +252 -0
- package/dist/learning/algorithms/SARSALearner.js.map +1 -0
- package/dist/learning/algorithms/index.d.ts +29 -0
- package/dist/learning/algorithms/index.d.ts.map +1 -0
- package/dist/learning/algorithms/index.js +44 -0
- package/dist/learning/algorithms/index.js.map +1 -0
- package/dist/learning/index.d.ts +3 -0
- package/dist/learning/index.d.ts.map +1 -1
- package/dist/learning/index.js +15 -1
- package/dist/learning/index.js.map +1 -1
- package/dist/learning/types.d.ts +2 -0
- package/dist/learning/types.d.ts.map +1 -1
- package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
- package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
- package/dist/memory/DistributedPatternLibrary.js +370 -0
- package/dist/memory/DistributedPatternLibrary.js.map +1 -0
- package/dist/memory/PatternQualityScorer.d.ts +169 -0
- package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
- package/dist/memory/PatternQualityScorer.js +327 -0
- package/dist/memory/PatternQualityScorer.js.map +1 -0
- package/dist/memory/PatternReplicationService.d.ts +187 -0
- package/dist/memory/PatternReplicationService.d.ts.map +1 -0
- package/dist/memory/PatternReplicationService.js +392 -0
- package/dist/memory/PatternReplicationService.js.map +1 -0
- package/dist/providers/ClaudeProvider.d.ts +98 -0
- package/dist/providers/ClaudeProvider.d.ts.map +1 -0
- package/dist/providers/ClaudeProvider.js +418 -0
- package/dist/providers/ClaudeProvider.js.map +1 -0
- package/dist/providers/ILLMProvider.d.ts +287 -0
- package/dist/providers/ILLMProvider.d.ts.map +1 -0
- package/dist/providers/ILLMProvider.js +33 -0
- package/dist/providers/ILLMProvider.js.map +1 -0
- package/dist/providers/LLMProviderFactory.d.ts +154 -0
- package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
- package/dist/providers/LLMProviderFactory.js +426 -0
- package/dist/providers/LLMProviderFactory.js.map +1 -0
- package/dist/providers/RuvllmProvider.d.ts +107 -0
- package/dist/providers/RuvllmProvider.d.ts.map +1 -0
- package/dist/providers/RuvllmProvider.js +417 -0
- package/dist/providers/RuvllmProvider.js.map +1 -0
- package/dist/providers/index.d.ts +31 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +69 -0
- package/dist/providers/index.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PPOLearner - Proximal Policy Optimization Algorithm
|
|
4
|
+
*
|
|
5
|
+
* Implements PPO-Clip, the most widely used variant of PPO:
|
|
6
|
+
* - Clipped surrogate objective to prevent large policy updates
|
|
7
|
+
* - Generalized Advantage Estimation (GAE) for variance reduction
|
|
8
|
+
* - Value function clipping for stability
|
|
9
|
+
* - Multiple epochs over collected trajectories
|
|
10
|
+
*
|
|
11
|
+
* Key features:
|
|
12
|
+
* - Trust region optimization without KL constraint
|
|
13
|
+
* - Sample efficient with mini-batch updates
|
|
14
|
+
* - Robust to hyperparameter choices
|
|
15
|
+
* - Suitable for continuous and discrete action spaces
|
|
16
|
+
*
|
|
17
|
+
* PPO-Clip objective:
|
|
18
|
+
* L^CLIP(θ) = E[min(r(θ)Â, clip(r(θ), 1-ε, 1+ε)Â)]
|
|
19
|
+
* where r(θ) = π_θ(a|s) / π_θ_old(a|s)
|
|
20
|
+
*
|
|
21
|
+
* @module learning/algorithms/PPOLearner
|
|
22
|
+
* @version 1.0.0
|
|
23
|
+
*/
|
|
24
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
+
exports.PPOLearner = void 0;
|
|
26
|
+
exports.createDefaultPPOConfig = createDefaultPPOConfig;
|
|
27
|
+
const AbstractRLLearner_1 = require("./AbstractRLLearner");
|
|
28
|
+
/**
|
|
29
|
+
* PPOLearner - Proximal Policy Optimization implementation
|
|
30
|
+
*
|
|
31
|
+
* PPO is a state-of-the-art policy gradient method that achieves
|
|
32
|
+
* strong performance while being simpler than TRPO.
|
|
33
|
+
*
|
|
34
|
+
* Usage:
|
|
35
|
+
* ```typescript
|
|
36
|
+
* const ppo = new PPOLearner({
|
|
37
|
+
* learningRate: 0.0003,
|
|
38
|
+
* discountFactor: 0.99,
|
|
39
|
+
* explorationRate: 0.0,
|
|
40
|
+
* explorationDecay: 1.0,
|
|
41
|
+
* minExplorationRate: 0.0,
|
|
42
|
+
* clipEpsilon: 0.2,
|
|
43
|
+
* ppoEpochs: 4,
|
|
44
|
+
* miniBatchSize: 64,
|
|
45
|
+
* valueLossCoefficient: 0.5,
|
|
46
|
+
* entropyCoefficient: 0.01,
|
|
47
|
+
* gaeLambda: 0.95,
|
|
48
|
+
* maxGradNorm: 0.5,
|
|
49
|
+
* clipValueLoss: true,
|
|
50
|
+
* policyLearningRate: 0.0003,
|
|
51
|
+
* valueLearningRate: 0.001,
|
|
52
|
+
* useExperienceReplay: false,
|
|
53
|
+
* replayBufferSize: 2048,
|
|
54
|
+
* batchSize: 64
|
|
55
|
+
* });
|
|
56
|
+
*
|
|
57
|
+
* // Collect trajectory
|
|
58
|
+
* ppo.collectStep(state, action, reward, nextState, done);
|
|
59
|
+
*
|
|
60
|
+
* // Train on collected trajectory
|
|
61
|
+
* ppo.trainOnTrajectory();
|
|
62
|
+
* ```
|
|
63
|
+
*/
|
|
64
|
+
class PPOLearner extends AbstractRLLearner_1.AbstractRLLearner {
|
|
65
|
+
constructor(config) {
|
|
66
|
+
super(config);
|
|
67
|
+
this.ppoConfig = config;
|
|
68
|
+
this.policyTable = new Map();
|
|
69
|
+
this.valueTable = new Map();
|
|
70
|
+
this.oldPolicyTable = new Map();
|
|
71
|
+
this.trajectory = [];
|
|
72
|
+
this.defaultExploration = config.explorationRate;
|
|
73
|
+
this.logger.info('PPOLearner initialized', {
|
|
74
|
+
clipEpsilon: config.clipEpsilon,
|
|
75
|
+
epochs: config.ppoEpochs,
|
|
76
|
+
gaeLambda: config.gaeLambda,
|
|
77
|
+
entropyCoeff: config.entropyCoefficient
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Select action using current policy (softmax)
|
|
82
|
+
*/
|
|
83
|
+
selectAction(state, availableActions) {
|
|
84
|
+
if (availableActions.length === 0) {
|
|
85
|
+
throw new Error('No available actions to select from');
|
|
86
|
+
}
|
|
87
|
+
const stateKey = this.encodeState(state);
|
|
88
|
+
const probs = this.getActionProbabilities(stateKey, availableActions);
|
|
89
|
+
// Sample from distribution
|
|
90
|
+
const random = Math.random();
|
|
91
|
+
let cumulative = 0;
|
|
92
|
+
for (let i = 0; i < availableActions.length; i++) {
|
|
93
|
+
cumulative += probs[i];
|
|
94
|
+
if (random <= cumulative) {
|
|
95
|
+
return availableActions[i];
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return availableActions[availableActions.length - 1];
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Get action probabilities using softmax policy
|
|
102
|
+
*/
|
|
103
|
+
getActionProbabilities(stateKey, availableActions) {
|
|
104
|
+
const preferences = [];
|
|
105
|
+
for (const action of availableActions) {
|
|
106
|
+
const actionKey = this.encodeAction(action);
|
|
107
|
+
const params = this.getPolicyParams(stateKey, actionKey);
|
|
108
|
+
preferences.push(params.preference);
|
|
109
|
+
}
|
|
110
|
+
// Softmax with numerical stability
|
|
111
|
+
const maxPref = Math.max(...preferences);
|
|
112
|
+
const expPrefs = preferences.map(p => Math.exp(p - maxPref));
|
|
113
|
+
const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
|
|
114
|
+
return expPrefs.map(e => e / sumExp);
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Get policy parameters for state-action pair
|
|
118
|
+
*/
|
|
119
|
+
getPolicyParams(stateKey, actionKey) {
|
|
120
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
121
|
+
if (!statePolicy) {
|
|
122
|
+
return { preference: 0, logProb: 0, updateCount: 0 };
|
|
123
|
+
}
|
|
124
|
+
return statePolicy.get(actionKey) ?? { preference: 0, logProb: 0, updateCount: 0 };
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Get log probability of action under current policy
|
|
128
|
+
*/
|
|
129
|
+
getLogProb(stateKey, actionKey, availableActions) {
|
|
130
|
+
// Get preference for target action
|
|
131
|
+
const params = this.getPolicyParams(stateKey, actionKey);
|
|
132
|
+
// If we don't know the action space, return stored log prob
|
|
133
|
+
if (!availableActions) {
|
|
134
|
+
return params.logProb;
|
|
135
|
+
}
|
|
136
|
+
// Calculate actual log probability
|
|
137
|
+
const prefs = [];
|
|
138
|
+
let targetPref = params.preference;
|
|
139
|
+
for (const action of availableActions) {
|
|
140
|
+
const ak = this.encodeAction(action);
|
|
141
|
+
const p = this.getPolicyParams(stateKey, ak);
|
|
142
|
+
prefs.push(p.preference);
|
|
143
|
+
if (ak === actionKey) {
|
|
144
|
+
targetPref = p.preference;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
const maxPref = Math.max(...prefs, targetPref);
|
|
148
|
+
const expTarget = Math.exp(targetPref - maxPref);
|
|
149
|
+
const sumExp = prefs.reduce((sum, p) => sum + Math.exp(p - maxPref), 0);
|
|
150
|
+
return Math.log(expTarget / sumExp);
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Get state value from value network
|
|
154
|
+
*/
|
|
155
|
+
getStateValue(state) {
|
|
156
|
+
const stateKey = this.encodeState(state);
|
|
157
|
+
return this.valueTable.get(stateKey) ?? 0;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Collect a step in the trajectory
|
|
161
|
+
*/
|
|
162
|
+
collectStep(state, action, reward, nextState, done) {
|
|
163
|
+
const stateKey = this.encodeState(state);
|
|
164
|
+
const actionKey = this.encodeAction(action);
|
|
165
|
+
const nextStateKey = this.encodeState(nextState);
|
|
166
|
+
const value = this.valueTable.get(stateKey) ?? 0;
|
|
167
|
+
const logProb = this.getLogProb(stateKey, actionKey);
|
|
168
|
+
this.trajectory.push({
|
|
169
|
+
state: stateKey,
|
|
170
|
+
action: actionKey,
|
|
171
|
+
reward,
|
|
172
|
+
nextState: nextStateKey,
|
|
173
|
+
done,
|
|
174
|
+
value,
|
|
175
|
+
logProb,
|
|
176
|
+
advantage: 0, // Computed later
|
|
177
|
+
returns: 0 // Computed later
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Standard update interface - collects experience and trains when ready
|
|
182
|
+
*/
|
|
183
|
+
update(experience, nextAction) {
|
|
184
|
+
this.stepCount++;
|
|
185
|
+
const { state, action, reward, nextState } = experience;
|
|
186
|
+
const done = experience.done ?? false;
|
|
187
|
+
// Collect step
|
|
188
|
+
this.collectStep(state, action, reward, nextState, done);
|
|
189
|
+
// Train when trajectory is large enough
|
|
190
|
+
if (this.trajectory.length >= this.ppoConfig.replayBufferSize) {
|
|
191
|
+
this.trainOnTrajectory();
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Train on collected trajectory using PPO
|
|
196
|
+
*/
|
|
197
|
+
trainOnTrajectory() {
|
|
198
|
+
if (this.trajectory.length === 0) {
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
// Compute advantages using GAE
|
|
202
|
+
this.computeGAE();
|
|
203
|
+
// Save old policy for ratio computation
|
|
204
|
+
this.saveOldPolicy();
|
|
205
|
+
// Multiple epochs of training
|
|
206
|
+
for (let epoch = 0; epoch < this.ppoConfig.ppoEpochs; epoch++) {
|
|
207
|
+
this.trainEpoch();
|
|
208
|
+
}
|
|
209
|
+
// Clear trajectory
|
|
210
|
+
this.trajectory = [];
|
|
211
|
+
this.logger.info('PPO training complete', {
|
|
212
|
+
epochs: this.ppoConfig.ppoEpochs,
|
|
213
|
+
steps: this.stepCount
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Compute Generalized Advantage Estimation (GAE)
|
|
218
|
+
*
|
|
219
|
+
* GAE: Â_t = Σ_{l=0}^∞ (γλ)^l δ_{t+l}
|
|
220
|
+
* where δ_t = r_t + γV(s_{t+1}) - V(s_t)
|
|
221
|
+
*/
|
|
222
|
+
computeGAE() {
|
|
223
|
+
const gamma = this.config.discountFactor;
|
|
224
|
+
const lambda = this.ppoConfig.gaeLambda;
|
|
225
|
+
let lastGaeLam = 0;
|
|
226
|
+
const n = this.trajectory.length;
|
|
227
|
+
// Compute returns and advantages backwards
|
|
228
|
+
for (let t = n - 1; t >= 0; t--) {
|
|
229
|
+
const step = this.trajectory[t];
|
|
230
|
+
const nextValue = step.done
|
|
231
|
+
? 0
|
|
232
|
+
: (t < n - 1 ? this.trajectory[t + 1].value : this.valueTable.get(step.nextState) ?? 0);
|
|
233
|
+
// TD error
|
|
234
|
+
const delta = step.reward + gamma * nextValue - step.value;
|
|
235
|
+
// GAE advantage
|
|
236
|
+
lastGaeLam = step.done
|
|
237
|
+
? delta
|
|
238
|
+
: delta + gamma * lambda * lastGaeLam;
|
|
239
|
+
step.advantage = lastGaeLam;
|
|
240
|
+
step.returns = step.advantage + step.value;
|
|
241
|
+
}
|
|
242
|
+
// Normalize advantages
|
|
243
|
+
const advantages = this.trajectory.map(s => s.advantage);
|
|
244
|
+
const mean = advantages.reduce((s, a) => s + a, 0) / advantages.length;
|
|
245
|
+
const variance = advantages.reduce((s, a) => s + (a - mean) ** 2, 0) / advantages.length;
|
|
246
|
+
const std = Math.sqrt(variance) + 1e-8;
|
|
247
|
+
for (const step of this.trajectory) {
|
|
248
|
+
step.advantage = (step.advantage - mean) / std;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Save current policy as old policy for ratio computation
|
|
253
|
+
*/
|
|
254
|
+
saveOldPolicy() {
|
|
255
|
+
this.oldPolicyTable.clear();
|
|
256
|
+
for (const [state, actions] of this.policyTable.entries()) {
|
|
257
|
+
const actionMap = new Map();
|
|
258
|
+
for (const [action, params] of actions.entries()) {
|
|
259
|
+
actionMap.set(action, { ...params });
|
|
260
|
+
}
|
|
261
|
+
this.oldPolicyTable.set(state, actionMap);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Get old log probability for ratio computation
|
|
266
|
+
*/
|
|
267
|
+
getOldLogProb(stateKey, actionKey) {
|
|
268
|
+
const statePolicy = this.oldPolicyTable.get(stateKey);
|
|
269
|
+
if (!statePolicy) {
|
|
270
|
+
return 0;
|
|
271
|
+
}
|
|
272
|
+
return statePolicy.get(actionKey)?.logProb ?? 0;
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Train one epoch on the trajectory
|
|
276
|
+
*/
|
|
277
|
+
trainEpoch() {
|
|
278
|
+
// Shuffle trajectory
|
|
279
|
+
const shuffled = [...this.trajectory].sort(() => Math.random() - 0.5);
|
|
280
|
+
// Mini-batch updates
|
|
281
|
+
for (let i = 0; i < shuffled.length; i += this.ppoConfig.miniBatchSize) {
|
|
282
|
+
const batch = shuffled.slice(i, i + this.ppoConfig.miniBatchSize);
|
|
283
|
+
this.trainMiniBatch(batch);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Train on a mini-batch
|
|
288
|
+
*/
|
|
289
|
+
trainMiniBatch(batch) {
|
|
290
|
+
for (const step of batch) {
|
|
291
|
+
// Compute probability ratio
|
|
292
|
+
const newLogProb = this.getLogProb(step.state, step.action);
|
|
293
|
+
const oldLogProb = step.logProb; // Use stored log prob
|
|
294
|
+
const ratio = Math.exp(newLogProb - oldLogProb);
|
|
295
|
+
// Compute clipped and unclipped objectives
|
|
296
|
+
const eps = this.ppoConfig.clipEpsilon;
|
|
297
|
+
const surr1 = ratio * step.advantage;
|
|
298
|
+
const surr2 = Math.max(Math.min(ratio, 1 + eps), 1 - eps) * step.advantage;
|
|
299
|
+
// Policy loss (negative because we want to maximize)
|
|
300
|
+
const policyLoss = -Math.min(surr1, surr2);
|
|
301
|
+
// Value loss
|
|
302
|
+
const valueTarget = step.returns;
|
|
303
|
+
const currentValue = this.valueTable.get(step.state) ?? 0;
|
|
304
|
+
let valueLoss = (currentValue - valueTarget) ** 2;
|
|
305
|
+
// Clip value loss if enabled
|
|
306
|
+
if (this.ppoConfig.clipValueLoss) {
|
|
307
|
+
const clippedValue = step.value + Math.max(Math.min(currentValue - step.value, eps), -eps);
|
|
308
|
+
const clippedValueLoss = (clippedValue - valueTarget) ** 2;
|
|
309
|
+
valueLoss = Math.max(valueLoss, clippedValueLoss);
|
|
310
|
+
}
|
|
311
|
+
// Entropy bonus
|
|
312
|
+
const entropy = this.computeEntropy(step.state);
|
|
313
|
+
const entropyLoss = -this.ppoConfig.entropyCoefficient * entropy;
|
|
314
|
+
// Total loss
|
|
315
|
+
const totalLoss = policyLoss + this.ppoConfig.valueLossCoefficient * valueLoss + entropyLoss;
|
|
316
|
+
// Update policy (gradient ascent direction)
|
|
317
|
+
this.updatePolicy(step.state, step.action, step.advantage, ratio);
|
|
318
|
+
// Update value function
|
|
319
|
+
this.updateValue(step.state, valueTarget);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Update policy parameters
|
|
324
|
+
*/
|
|
325
|
+
updatePolicy(stateKey, actionKey, advantage, ratio) {
|
|
326
|
+
if (!this.policyTable.has(stateKey)) {
|
|
327
|
+
this.policyTable.set(stateKey, new Map());
|
|
328
|
+
}
|
|
329
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
330
|
+
const current = statePolicy.get(actionKey) ?? { preference: 0, logProb: 0, updateCount: 0 };
|
|
331
|
+
// Clipped gradient
|
|
332
|
+
const eps = this.ppoConfig.clipEpsilon;
|
|
333
|
+
let gradient = advantage;
|
|
334
|
+
if ((ratio > 1 + eps && advantage > 0) || (ratio < 1 - eps && advantage < 0)) {
|
|
335
|
+
gradient = 0; // Clipped - no update
|
|
336
|
+
}
|
|
337
|
+
// Update preference
|
|
338
|
+
const newPreference = current.preference + this.ppoConfig.policyLearningRate * gradient;
|
|
339
|
+
const newLogProb = this.getLogProb(stateKey, actionKey);
|
|
340
|
+
statePolicy.set(actionKey, {
|
|
341
|
+
preference: newPreference,
|
|
342
|
+
logProb: newLogProb,
|
|
343
|
+
updateCount: current.updateCount + 1
|
|
344
|
+
});
|
|
345
|
+
// Update Q-table for compatibility
|
|
346
|
+
this.setQValue(stateKey, actionKey, newPreference);
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Update value function
|
|
350
|
+
*/
|
|
351
|
+
updateValue(stateKey, target) {
|
|
352
|
+
const current = this.valueTable.get(stateKey) ?? 0;
|
|
353
|
+
const newValue = current + this.ppoConfig.valueLearningRate * (target - current);
|
|
354
|
+
this.valueTable.set(stateKey, newValue);
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Compute entropy of policy at state
|
|
358
|
+
*/
|
|
359
|
+
computeEntropy(stateKey) {
|
|
360
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
361
|
+
if (!statePolicy || statePolicy.size === 0) {
|
|
362
|
+
return 0;
|
|
363
|
+
}
|
|
364
|
+
const prefs = Array.from(statePolicy.values()).map(p => p.preference);
|
|
365
|
+
const maxPref = Math.max(...prefs);
|
|
366
|
+
const expPrefs = prefs.map(p => Math.exp(p - maxPref));
|
|
367
|
+
const sumExp = expPrefs.reduce((s, e) => s + e, 0);
|
|
368
|
+
const probs = expPrefs.map(e => e / sumExp);
|
|
369
|
+
let entropy = 0;
|
|
370
|
+
for (const p of probs) {
|
|
371
|
+
if (p > 0) {
|
|
372
|
+
entropy -= p * Math.log(p);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return entropy;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Get default exploration rate for reset
|
|
379
|
+
*/
|
|
380
|
+
getDefaultExplorationRate() {
|
|
381
|
+
return this.defaultExploration;
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Get PPO-specific statistics
|
|
385
|
+
*/
|
|
386
|
+
getPPOStatistics() {
|
|
387
|
+
let totalValue = 0;
|
|
388
|
+
for (const v of this.valueTable.values()) {
|
|
389
|
+
totalValue += v;
|
|
390
|
+
}
|
|
391
|
+
let policySize = 0;
|
|
392
|
+
for (const statePolicy of this.policyTable.values()) {
|
|
393
|
+
policySize += statePolicy.size;
|
|
394
|
+
}
|
|
395
|
+
const avgAdvantage = this.trajectory.length > 0
|
|
396
|
+
? this.trajectory.reduce((s, t) => s + t.advantage, 0) / this.trajectory.length
|
|
397
|
+
: 0;
|
|
398
|
+
return {
|
|
399
|
+
trajectoryLength: this.trajectory.length,
|
|
400
|
+
valueTableSize: this.valueTable.size,
|
|
401
|
+
policyTableSize: policySize,
|
|
402
|
+
avgValue: this.valueTable.size > 0 ? totalValue / this.valueTable.size : 0,
|
|
403
|
+
avgAdvantage,
|
|
404
|
+
clipFraction: 0 // Would need tracking during training
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Reset PPO-specific state
|
|
409
|
+
*/
|
|
410
|
+
reset() {
|
|
411
|
+
super.reset();
|
|
412
|
+
this.policyTable.clear();
|
|
413
|
+
this.valueTable.clear();
|
|
414
|
+
this.oldPolicyTable.clear();
|
|
415
|
+
this.trajectory = [];
|
|
416
|
+
this.logger.info('PPOLearner reset');
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Export PPO state
|
|
420
|
+
*/
|
|
421
|
+
exportPPO() {
|
|
422
|
+
const serializedPolicy = {};
|
|
423
|
+
for (const [state, actions] of this.policyTable.entries()) {
|
|
424
|
+
serializedPolicy[state] = {};
|
|
425
|
+
for (const [action, params] of actions.entries()) {
|
|
426
|
+
serializedPolicy[state][action] = params;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
const serializedValue = {};
|
|
430
|
+
for (const [state, value] of this.valueTable.entries()) {
|
|
431
|
+
serializedValue[state] = value;
|
|
432
|
+
}
|
|
433
|
+
return {
|
|
434
|
+
base: this.export(),
|
|
435
|
+
policyTable: serializedPolicy,
|
|
436
|
+
valueTable: serializedValue,
|
|
437
|
+
ppoConfig: { ...this.ppoConfig }
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Import PPO state
|
|
442
|
+
*/
|
|
443
|
+
importPPO(state) {
|
|
444
|
+
this.import(state.base);
|
|
445
|
+
this.policyTable.clear();
|
|
446
|
+
for (const [stateKey, actions] of Object.entries(state.policyTable)) {
|
|
447
|
+
const actionMap = new Map();
|
|
448
|
+
for (const [actionKey, params] of Object.entries(actions)) {
|
|
449
|
+
actionMap.set(actionKey, params);
|
|
450
|
+
}
|
|
451
|
+
this.policyTable.set(stateKey, actionMap);
|
|
452
|
+
}
|
|
453
|
+
this.valueTable.clear();
|
|
454
|
+
for (const [stateKey, value] of Object.entries(state.valueTable)) {
|
|
455
|
+
this.valueTable.set(stateKey, value);
|
|
456
|
+
}
|
|
457
|
+
this.ppoConfig = { ...state.ppoConfig };
|
|
458
|
+
this.logger.info('Imported PPO state', {
|
|
459
|
+
policySize: this.policyTable.size,
|
|
460
|
+
valueSize: this.valueTable.size
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
exports.PPOLearner = PPOLearner;
|
|
465
|
+
/**
|
|
466
|
+
* Create default PPO configuration
|
|
467
|
+
*/
|
|
468
|
+
function createDefaultPPOConfig() {
|
|
469
|
+
return {
|
|
470
|
+
learningRate: 0.0003,
|
|
471
|
+
discountFactor: 0.99,
|
|
472
|
+
explorationRate: 0.0, // PPO uses entropy for exploration
|
|
473
|
+
explorationDecay: 1.0,
|
|
474
|
+
minExplorationRate: 0.0,
|
|
475
|
+
clipEpsilon: 0.2,
|
|
476
|
+
ppoEpochs: 4,
|
|
477
|
+
miniBatchSize: 64,
|
|
478
|
+
valueLossCoefficient: 0.5,
|
|
479
|
+
entropyCoefficient: 0.01,
|
|
480
|
+
gaeLambda: 0.95,
|
|
481
|
+
maxGradNorm: 0.5,
|
|
482
|
+
clipValueLoss: true,
|
|
483
|
+
policyLearningRate: 0.0003,
|
|
484
|
+
valueLearningRate: 0.001,
|
|
485
|
+
useExperienceReplay: false, // PPO doesn't use replay buffer
|
|
486
|
+
replayBufferSize: 2048, // Used as trajectory buffer size
|
|
487
|
+
batchSize: 64
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
//# sourceMappingURL=PPOLearner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PPOLearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/PPOLearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AA2lBH,wDAqBC;AA9mBD,2DAA0E;AAqD1E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AACH,MAAa,UAAW,SAAQ,qCAAiB;IAQ/C,YAAY,MAAiB;QAC3B,KAAK,CAAC,MAAM,CAAC,CAAC;QACd,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC;QACxB,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAAE,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,IAAI,GAAG,EAAE,CAAC;QAChC,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,eAAe,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE;YACzC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,YAAY,EAAE,MAAM,CAAC,kBAAkB;SACxC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACM,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QACrE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QAEtE,2BAA2B;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,UAAU,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;gBACzB,OAAO,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACK,sBAAsB,CAAC,QAAgB,EAAE,gBAA+B;QAC9E,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACzD,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACtC,CAAC;QAED,mCAAmC;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAEvD,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAgB,EAAE,SAAiB;QACzD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;QACvD,CAAC;QACD,OAAO,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;IACrF,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,QAAgB,EAAE,SAAiB,EAAE,gBAAgC;QACtF,mCAAmC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAEzD,4DAA4D;QAC5D,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,MAAM,CAAC,OAAO,CAAC;QACxB,CAAC;QAED,mCAAmC;QACnC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QAEnC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACrC,MAAM,CAAC,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC7C,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;YACzB,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACrB,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,EAAE,UAAU,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,OAAO,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC;QAExE,OAAO,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACM,aAAa,CAAC,KAAgB;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,WAAW,CACT,KAAgB,EAChB,MAAmB,EACnB,MAAc,EACd,SAAoB,EACpB,IAAa;QAEb,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAEjD,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAErD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YACnB,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,MAAM;YACN,SAAS,EAAE,YAAY;YACvB,IAAI;YACJ,KAAK;YACL,OAAO;YACP,SAAS,EAAE,CAAC,EAAE,iBAAiB;YAC/B,OAAO,EAAE,CAAC,CAAI,iBAAiB;SAChC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACM,MAAM,CAAC,UAA0B,EAAE,UAAwB;QAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,UAAU,CAAC;QACxD,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,IAAI,KAAK,CAAC;QAEtC,eAAe;QACf,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAEzD,wCAAwC;QACxC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,gBAAgB,EAAE,CAAC;YAC9D,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO;QACT,CAAC;QAED,+BAA+B;QAC/B,IAAI,CAAC,UAAU,EAAE,CAAC;QAElB,wCAAwC;QACxC,IAAI,CAAC,aAAa,EAAE,CAAC;QAErB,8BAA8B;QAC9B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC;YAC9D,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QAED,mBAAmB;QACnB,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QAErB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uBAAuB,EAAE;YACxC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;YAChC,KAAK,EAAE,IAAI,CAAC,SAAS;SACtB,CAAC,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACK,UAAU;QAChB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC;QAExC,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,MAAM,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;QAEjC,2CAA2C;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAEhC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI;gBACzB,CAAC,CAAC,CAAC;gBACH,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;YAE1F,WAAW;YACX,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;YAE3D,gBAAgB;YAChB,UAAU,GAAG,IAAI,CAAC,IAAI;gBACpB,CAAC,CAAC,KAAK;gBACP,CAAC,CAAC,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,UAAU,CAAC;YAExC,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;QAC7C,CAAC;QAED,uBAAuB;QACvB,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QACvE,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QACzF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;QAEvC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACnC,IAAI,CAAC,SAAS,GAAG,CAAC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;QAC5B,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAC;YAClD,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;YACvC,CAAC;YACD,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAgB,EAAE,SAAiB;QACvD,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACtD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,CAAC;QACX,CAAC;QACD,OAAO,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,OAAO,IAAI,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,UAAU;QAChB,qBAAqB;QACrB,MAAM,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC;QAEtE,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,CAAC;YACvE,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;YAClE,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAuB;QAC5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,4BAA4B;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5D,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,sBAAsB;YACvD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC;YAEhD,2CAA2C;YAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;YACvC,MAAM,KAAK,GAAG,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC;YACrC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC;YAE3E,qDAAqD;YACrD,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAE3C,aAAa;YACb,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC;YACjC,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC1D,IAAI,SAAS,GAAG,CAAC,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;YAElD,6BAA6B;YAC7B,IAAI,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,GAAG,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;gBAC3F,MAAM,gBAAgB,GAAG,CAAC,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;gBAC3D,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;YACpD,CAAC;YAED,gBAAgB;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChD,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,kBAAkB,GAAG,OAAO,CAAC;YAEjE,aAAa;YACb,MAAM,SAAS,GAAG,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,oBAAoB,GAAG,SAAS,GAAG,WAAW,CAAC;YAE7F,4CAA4C;YAC5C,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAElE,wBAAwB;YACxB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAClB,QAAgB,EAChB,SAAiB,EACjB,SAAiB,EACjB,KAAa;QAEb,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEpD,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;QAE5F,mBAAmB;QACnB,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;QACvC,IAAI,QAAQ,GAAG,SAAS,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,CAAC,GAAG,GAAG,IAAI,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,GAAG,GAAG,IAAI,SAAS,GAAG,CAAC,CAAC,EAAE,CAAC;YAC7E,QAAQ,GAAG,CAAC,CAAC,CAAC,sBAAsB;QACtC,CAAC;QAED,oBAAoB;QACpB,MAAM,aAAa,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,GAAG,QAAQ,CAAC;QACxF,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAExD,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;YACzB,UAAU,EAAE,aAAa;YACzB,OAAO,EAAE,UAAU;YACnB,WAAW,EAAE,OAAO,CAAC,WAAW,GAAG,CAAC;SACrC,CAAC,CAAC;QAEH,mCAAmC;QACnC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,QAAgB,EAAE,MAAc;QAClD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAG,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,iBAAiB,GAAG,CAAC,MAAM,GAAG,OAAO,CAAC,CAAC;QACjF,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB;QACrC,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QACtE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QACvD,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,gBAAgB;QAQd,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC;YACzC,UAAU,IAAI,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,UAAU,IAAI,WAAW,CAAC,IAAI,CAAC;QACjC,CAAC;QAED,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM;YAC/E,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,gBAAgB,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM;YACxC,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,UAAU;YAC3B,QAAQ,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1E,YAAY;YACZ,YAAY,EAAE,CAAC,CAAC,sCAAsC;SACvD,CAAC;IACJ,CAAC;IAED;;OAEG;IACM,KAAK;QACZ,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;QAC5B,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,SAAS;QAMP,MAAM,gBAAgB,GAAiD,EAAE,CAAC;QAC1E,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,MAAM,eAAe,GAA2B,EAAE,CAAC;QACnD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,eAAe,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE;YACnB,WAAW,EAAE,gBAAgB;YAC7B,UAAU,EAAE,eAAe;YAC3B,SAAS,EAAE,EAAE,GAAG,IAAI,CAAC,SAAS,EAAE;SACjC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAwC;QAChD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExB,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAC;YAClD,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1D,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,SAAS,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;QAExC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oBAAoB,EAAE;YACrC,UAAU,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;YACjC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;SAChC,CAAC,CAAC;IACL,CAAC;CACF;AA3fD,gCA2fC;AAED;;GAEG;AACH,SAAgB,sBAAsB;IACpC,OAAO;QACL,YAAY,EAAE,MAAM;QACpB,cAAc,EAAE,IAAI;QACpB,eAAe,EAAE,GAAG,EAAE,mCAAmC;QACzD,gBAAgB,EAAE,GAAG;QACrB,kBAAkB,EAAE,GAAG;QACvB,WAAW,EAAE,GAAG;QAChB,SAAS,EAAE,CAAC;QACZ,aAAa,EAAE,EAAE;QACjB,oBAAoB,EAAE,GAAG;QACzB,kBAAkB,EAAE,IAAI;QACxB,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,GAAG;QAChB,aAAa,EAAE,IAAI;QACnB,kBAAkB,EAAE,MAAM;QAC1B,iBAAiB,EAAE,KAAK;QACxB,mBAAmB,EAAE,KAAK,EAAE,gCAAgC;QAC5D,gBAAgB,EAAE,IAAI,EAAM,iCAAiC;QAC7D,SAAS,EAAE,EAAE;KACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QLearning - Off-policy TD(0) Reinforcement Learning
|
|
3
|
+
*
|
|
4
|
+
* Implements standard Q-learning algorithm for reinforcement learning.
|
|
5
|
+
* Key differences from SARSA:
|
|
6
|
+
* - Off-policy: learns optimal Q-values regardless of policy being followed
|
|
7
|
+
* - Uses max Q-value for next state, not actual next action
|
|
8
|
+
* - Update rule: Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
|
|
9
|
+
* - More aggressive than SARSA, finds optimal policy faster
|
|
10
|
+
*/
|
|
11
|
+
import { AbstractRLLearner, RLConfig } from './AbstractRLLearner';
|
|
12
|
+
import { TaskExperience, AgentAction } from '../types';
|
|
13
|
+
/**
|
|
14
|
+
* Q-learning configuration (extends base RL config)
|
|
15
|
+
*/
|
|
16
|
+
export interface QLearningConfig extends RLConfig {
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* QLearning - Standard Q-learning implementation
|
|
20
|
+
*
|
|
21
|
+
* Implements the classic Q-learning algorithm with:
|
|
22
|
+
* - Epsilon-greedy exploration policy
|
|
23
|
+
* - Off-policy temporal difference (TD) learning
|
|
24
|
+
* - Q-table for state-action values
|
|
25
|
+
* - Optional experience replay for stability
|
|
26
|
+
*
|
|
27
|
+
* Update Rule:
|
|
28
|
+
* Q(s,a) ← Q(s,a) + α[r + γ·max_a'(Q(s',a')) - Q(s,a)]
|
|
29
|
+
*
|
|
30
|
+
* Key characteristics:
|
|
31
|
+
* - Off-policy: learns about optimal policy while following exploration policy
|
|
32
|
+
* - Uses max Q-value (greedy) for bootstrapping
|
|
33
|
+
* - Converges to optimal Q* under certain conditions
|
|
34
|
+
* - More sample-efficient than on-policy methods
|
|
35
|
+
*/
|
|
36
|
+
export declare class QLearning extends AbstractRLLearner {
|
|
37
|
+
private readonly defaultConfig;
|
|
38
|
+
constructor(config?: Partial<QLearningConfig>);
|
|
39
|
+
/**
|
|
40
|
+
* Update Q-value using Q-learning update rule
|
|
41
|
+
* Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
|
|
42
|
+
*
|
|
43
|
+
* @param experience The transition experience (s, a, r, s')
|
|
44
|
+
* @param nextAction Ignored in Q-learning (uses max Q-value instead)
|
|
45
|
+
*/
|
|
46
|
+
update(experience: TaskExperience, nextAction?: AgentAction): void;
|
|
47
|
+
/**
|
|
48
|
+
* Get the default exploration rate for this algorithm
|
|
49
|
+
*/
|
|
50
|
+
protected getDefaultExplorationRate(): number;
|
|
51
|
+
/**
|
|
52
|
+
* Get algorithm name
|
|
53
|
+
*/
|
|
54
|
+
getAlgorithmName(): string;
|
|
55
|
+
/**
|
|
56
|
+
* Get algorithm type (off-policy)
|
|
57
|
+
*/
|
|
58
|
+
getAlgorithmType(): 'on-policy' | 'off-policy';
|
|
59
|
+
/**
|
|
60
|
+
* Get detailed statistics including Q-learning-specific metrics
|
|
61
|
+
*/
|
|
62
|
+
getDetailedStatistics(): {
|
|
63
|
+
algorithm: string;
|
|
64
|
+
type: 'on-policy' | 'off-policy';
|
|
65
|
+
stats: ReturnType<AbstractRLLearner['getStatistics']>;
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=QLearning.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"QLearning.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/QLearning.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAEvD;;GAEG;AACH,MAAM,WAAW,eAAgB,SAAQ,QAAQ;CAGhD;AAgBD;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,SAAU,SAAQ,iBAAiB;IAC9C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAkB;gBAEpC,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAOjD;;;;;;OAMG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IAiClE;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;IACH,gBAAgB,IAAI,WAAW,GAAG,YAAY;IAI9C;;OAEG;IACH,qBAAqB,IAAI;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC;QACjC,KAAK,EAAE,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,CAAC;KACvD;CAOF"}
|