agentic-qe 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +123 -0
- package/README.md +1 -1
- package/dist/agents/index.d.ts.map +1 -1
- package/dist/agents/index.js +5 -1
- package/dist/agents/index.js.map +1 -1
- package/dist/core/di/AgentDependencies.d.ts +127 -0
- package/dist/core/di/AgentDependencies.d.ts.map +1 -0
- package/dist/core/di/AgentDependencies.js +251 -0
- package/dist/core/di/AgentDependencies.js.map +1 -0
- package/dist/core/di/DIContainer.d.ts +149 -0
- package/dist/core/di/DIContainer.d.ts.map +1 -0
- package/dist/core/di/DIContainer.js +333 -0
- package/dist/core/di/DIContainer.js.map +1 -0
- package/dist/core/di/index.d.ts +11 -0
- package/dist/core/di/index.d.ts.map +1 -0
- package/dist/core/di/index.js +22 -0
- package/dist/core/di/index.js.map +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +11 -1
- package/dist/core/index.js.map +1 -1
- package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
- package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
- package/dist/learning/ExperienceSharingProtocol.js +538 -0
- package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
- package/dist/learning/LearningEngine.d.ts +101 -1
- package/dist/learning/LearningEngine.d.ts.map +1 -1
- package/dist/learning/LearningEngine.js +330 -3
- package/dist/learning/LearningEngine.js.map +1 -1
- package/dist/learning/QLearning.d.ts +38 -125
- package/dist/learning/QLearning.d.ts.map +1 -1
- package/dist/learning/QLearning.js +46 -267
- package/dist/learning/QLearning.js.map +1 -1
- package/dist/learning/QLearningLegacy.d.ts +154 -0
- package/dist/learning/QLearningLegacy.d.ts.map +1 -0
- package/dist/learning/QLearningLegacy.js +337 -0
- package/dist/learning/QLearningLegacy.js.map +1 -0
- package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
- package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
- package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
- package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
- package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
- package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
- package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
- package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
- package/dist/learning/algorithms/PPOLearner.js +490 -0
- package/dist/learning/algorithms/PPOLearner.js.map +1 -0
- package/dist/learning/algorithms/QLearning.d.ts +68 -0
- package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
- package/dist/learning/algorithms/QLearning.js +116 -0
- package/dist/learning/algorithms/QLearning.js.map +1 -0
- package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
- package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
- package/dist/learning/algorithms/SARSALearner.js +252 -0
- package/dist/learning/algorithms/SARSALearner.js.map +1 -0
- package/dist/learning/algorithms/index.d.ts +29 -0
- package/dist/learning/algorithms/index.d.ts.map +1 -0
- package/dist/learning/algorithms/index.js +44 -0
- package/dist/learning/algorithms/index.js.map +1 -0
- package/dist/learning/index.d.ts +3 -0
- package/dist/learning/index.d.ts.map +1 -1
- package/dist/learning/index.js +15 -1
- package/dist/learning/index.js.map +1 -1
- package/dist/learning/types.d.ts +2 -0
- package/dist/learning/types.d.ts.map +1 -1
- package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
- package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
- package/dist/memory/DistributedPatternLibrary.js +370 -0
- package/dist/memory/DistributedPatternLibrary.js.map +1 -0
- package/dist/memory/PatternQualityScorer.d.ts +169 -0
- package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
- package/dist/memory/PatternQualityScorer.js +327 -0
- package/dist/memory/PatternQualityScorer.js.map +1 -0
- package/dist/memory/PatternReplicationService.d.ts +187 -0
- package/dist/memory/PatternReplicationService.d.ts.map +1 -0
- package/dist/memory/PatternReplicationService.js +392 -0
- package/dist/memory/PatternReplicationService.js.map +1 -0
- package/dist/providers/ClaudeProvider.d.ts +98 -0
- package/dist/providers/ClaudeProvider.d.ts.map +1 -0
- package/dist/providers/ClaudeProvider.js +418 -0
- package/dist/providers/ClaudeProvider.js.map +1 -0
- package/dist/providers/ILLMProvider.d.ts +287 -0
- package/dist/providers/ILLMProvider.d.ts.map +1 -0
- package/dist/providers/ILLMProvider.js +33 -0
- package/dist/providers/ILLMProvider.js.map +1 -0
- package/dist/providers/LLMProviderFactory.d.ts +154 -0
- package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
- package/dist/providers/LLMProviderFactory.js +426 -0
- package/dist/providers/LLMProviderFactory.js.map +1 -0
- package/dist/providers/RuvllmProvider.d.ts +107 -0
- package/dist/providers/RuvllmProvider.d.ts.map +1 -0
- package/dist/providers/RuvllmProvider.js +417 -0
- package/dist/providers/RuvllmProvider.js.map +1 -0
- package/dist/providers/index.d.ts +31 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +69 -0
- package/dist/providers/index.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ActorCriticLearner - Actor-Critic Reinforcement Learning Algorithm
|
|
4
|
+
*
|
|
5
|
+
* Implements Advantage Actor-Critic (A2C) combining:
|
|
6
|
+
* - Actor: Policy network that selects actions using softmax policy
|
|
7
|
+
* - Critic: Value network that estimates state values for advantage calculation
|
|
8
|
+
*
|
|
9
|
+
* Key features:
|
|
10
|
+
* - Continuous action probabilities via softmax
|
|
11
|
+
* - Advantage-based updates to reduce variance
|
|
12
|
+
* - Entropy bonus for exploration
|
|
13
|
+
* - Policy gradient with baseline
|
|
14
|
+
*
|
|
15
|
+
* Update rules:
|
|
16
|
+
* - Critic (Value): V(s) += α_c * δ where δ = r + γV(s') - V(s)
|
|
17
|
+
* - Actor (Policy): π(a|s) += α_a * δ * ∇log(π(a|s)) + β * H(π)
|
|
18
|
+
*
|
|
19
|
+
* @module learning/algorithms/ActorCriticLearner
|
|
20
|
+
* @version 1.0.0
|
|
21
|
+
*/
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.ActorCriticLearner = void 0;
|
|
24
|
+
exports.createDefaultActorCriticConfig = createDefaultActorCriticConfig;
|
|
25
|
+
const AbstractRLLearner_1 = require("./AbstractRLLearner");
|
|
26
|
+
/**
|
|
27
|
+
* ActorCriticLearner - Advantage Actor-Critic implementation
|
|
28
|
+
*
|
|
29
|
+
* Combines policy gradient (actor) with value function approximation (critic)
|
|
30
|
+
* for more stable and efficient learning than pure Q-learning.
|
|
31
|
+
*
|
|
32
|
+
* Usage:
|
|
33
|
+
* ```typescript
|
|
34
|
+
* const ac = new ActorCriticLearner({
|
|
35
|
+
* learningRate: 0.1,
|
|
36
|
+
* actorLearningRate: 0.01,
|
|
37
|
+
* criticLearningRate: 0.1,
|
|
38
|
+
* discountFactor: 0.95,
|
|
39
|
+
* explorationRate: 0.3,
|
|
40
|
+
* explorationDecay: 0.995,
|
|
41
|
+
* minExplorationRate: 0.01,
|
|
42
|
+
* entropyCoefficient: 0.01,
|
|
43
|
+
* temperature: 1.0,
|
|
44
|
+
* normalizeAdvantage: true,
|
|
45
|
+
* targetUpdateFrequency: 100,
|
|
46
|
+
* useExperienceReplay: true,
|
|
47
|
+
* replayBufferSize: 10000,
|
|
48
|
+
* batchSize: 32
|
|
49
|
+
* });
|
|
50
|
+
*
|
|
51
|
+
* const action = ac.selectAction(state, availableActions);
|
|
52
|
+
* ac.update(experience);
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
class ActorCriticLearner extends AbstractRLLearner_1.AbstractRLLearner {
|
|
56
|
+
constructor(config) {
|
|
57
|
+
super(config);
|
|
58
|
+
this.actorConfig = config;
|
|
59
|
+
this.policyTable = new Map();
|
|
60
|
+
this.valueTable = new Map();
|
|
61
|
+
this.targetValueTable = new Map();
|
|
62
|
+
this.updatesSinceTargetSync = 0;
|
|
63
|
+
this.advantageHistory = [];
|
|
64
|
+
this.defaultExploration = config.explorationRate;
|
|
65
|
+
this.logger.info('ActorCriticLearner initialized', {
|
|
66
|
+
actorLR: config.actorLearningRate,
|
|
67
|
+
criticLR: config.criticLearningRate,
|
|
68
|
+
entropy: config.entropyCoefficient,
|
|
69
|
+
temperature: config.temperature
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Select action using softmax policy with exploration
|
|
74
|
+
* π(a|s) = exp(Q(s,a)/τ) / Σ_a' exp(Q(s,a')/τ)
|
|
75
|
+
*/
|
|
76
|
+
selectAction(state, availableActions) {
|
|
77
|
+
if (availableActions.length === 0) {
|
|
78
|
+
throw new Error('No available actions to select from');
|
|
79
|
+
}
|
|
80
|
+
// With probability ε, use random action (exploration fallback)
|
|
81
|
+
if (Math.random() < this.config.explorationRate) {
|
|
82
|
+
const randomIndex = Math.floor(Math.random() * availableActions.length);
|
|
83
|
+
return availableActions[randomIndex];
|
|
84
|
+
}
|
|
85
|
+
// Use softmax policy
|
|
86
|
+
return this.sampleFromPolicy(state, availableActions);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Sample action from softmax policy distribution
|
|
90
|
+
*/
|
|
91
|
+
sampleFromPolicy(state, availableActions) {
|
|
92
|
+
const stateKey = this.encodeState(state);
|
|
93
|
+
const probabilities = this.getActionProbabilities(stateKey, availableActions);
|
|
94
|
+
// Sample from categorical distribution
|
|
95
|
+
const random = Math.random();
|
|
96
|
+
let cumulative = 0;
|
|
97
|
+
for (let i = 0; i < availableActions.length; i++) {
|
|
98
|
+
cumulative += probabilities[i];
|
|
99
|
+
if (random <= cumulative) {
|
|
100
|
+
return availableActions[i];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Fallback (shouldn't reach here due to normalization)
|
|
104
|
+
return availableActions[availableActions.length - 1];
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Get softmax action probabilities
|
|
108
|
+
* π(a|s) = exp(preference(s,a)/τ) / Σ_a' exp(preference(s,a')/τ)
|
|
109
|
+
*/
|
|
110
|
+
getActionProbabilities(stateKey, availableActions) {
|
|
111
|
+
const temperature = this.actorConfig.temperature;
|
|
112
|
+
const preferences = [];
|
|
113
|
+
// Get preferences (Q-values or policy table values)
|
|
114
|
+
for (const action of availableActions) {
|
|
115
|
+
const actionKey = this.encodeAction(action);
|
|
116
|
+
const preference = this.getPreference(stateKey, actionKey);
|
|
117
|
+
preferences.push(preference / temperature);
|
|
118
|
+
}
|
|
119
|
+
// Softmax with numerical stability
|
|
120
|
+
const maxPref = Math.max(...preferences);
|
|
121
|
+
const expPrefs = preferences.map(p => Math.exp(p - maxPref));
|
|
122
|
+
const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
|
|
123
|
+
return expPrefs.map(e => e / sumExp);
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Get preference for state-action pair from policy table
|
|
127
|
+
*/
|
|
128
|
+
getPreference(stateKey, actionKey) {
|
|
129
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
130
|
+
if (!statePolicy) {
|
|
131
|
+
return 0; // uniform preference initially
|
|
132
|
+
}
|
|
133
|
+
const entry = statePolicy.get(actionKey);
|
|
134
|
+
return entry ? entry.probability : 0;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Update actor and critic using temporal difference
|
|
138
|
+
*
|
|
139
|
+
* TD Error (advantage): δ = r + γV(s') - V(s)
|
|
140
|
+
* Critic update: V(s) += α_c * δ
|
|
141
|
+
* Actor update: preference(s,a) += α_a * δ * (1 - π(a|s))
|
|
142
|
+
*/
|
|
143
|
+
update(experience, nextAction) {
|
|
144
|
+
this.stepCount++;
|
|
145
|
+
const { state, action, reward, nextState, done } = this.extractExperience(experience);
|
|
146
|
+
const stateKey = this.encodeState(state);
|
|
147
|
+
const actionKey = this.encodeAction(action);
|
|
148
|
+
// Get current and next state values from critic
|
|
149
|
+
const currentV = this.getStateValue(state);
|
|
150
|
+
const nextV = done ? 0 : this.getTargetStateValue(nextState);
|
|
151
|
+
// Calculate TD error (advantage)
|
|
152
|
+
let advantage = reward + this.config.discountFactor * nextV - currentV;
|
|
153
|
+
// Normalize advantage if enabled
|
|
154
|
+
if (this.actorConfig.normalizeAdvantage) {
|
|
155
|
+
advantage = this.normalizeAdvantage(advantage);
|
|
156
|
+
}
|
|
157
|
+
// Update critic (value function)
|
|
158
|
+
this.updateCritic(stateKey, currentV, advantage);
|
|
159
|
+
// Update actor (policy)
|
|
160
|
+
this.updateActor(stateKey, actionKey, advantage);
|
|
161
|
+
// Store in replay buffer if enabled
|
|
162
|
+
if (this.replayBuffer) {
|
|
163
|
+
this.replayBuffer.add(experience);
|
|
164
|
+
}
|
|
165
|
+
// Sync target network periodically
|
|
166
|
+
this.updatesSinceTargetSync++;
|
|
167
|
+
if (this.updatesSinceTargetSync >= this.actorConfig.targetUpdateFrequency) {
|
|
168
|
+
this.syncTargetNetwork();
|
|
169
|
+
this.updatesSinceTargetSync = 0;
|
|
170
|
+
}
|
|
171
|
+
this.logger.debug('Actor-Critic update', {
|
|
172
|
+
state: stateKey,
|
|
173
|
+
action: actionKey,
|
|
174
|
+
reward,
|
|
175
|
+
advantage,
|
|
176
|
+
valueUpdate: currentV + this.actorConfig.criticLearningRate * advantage
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Update critic (value function)
|
|
181
|
+
* V(s) += α_c * δ
|
|
182
|
+
*/
|
|
183
|
+
updateCritic(stateKey, currentV, advantage) {
|
|
184
|
+
const newValue = currentV + this.actorConfig.criticLearningRate * advantage;
|
|
185
|
+
const existingEntry = this.valueTable.get(stateKey);
|
|
186
|
+
this.valueTable.set(stateKey, {
|
|
187
|
+
state: stateKey,
|
|
188
|
+
value: newValue,
|
|
189
|
+
updateCount: (existingEntry?.updateCount ?? 0) + 1,
|
|
190
|
+
lastUpdated: Date.now()
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Update actor (policy)
|
|
195
|
+
* For softmax policy: preference(s,a) += α_a * δ * (1 - π(a|s))
|
|
196
|
+
* This increases preference for actions with positive advantage
|
|
197
|
+
*/
|
|
198
|
+
updateActor(stateKey, actionKey, advantage) {
|
|
199
|
+
if (!this.policyTable.has(stateKey)) {
|
|
200
|
+
this.policyTable.set(stateKey, new Map());
|
|
201
|
+
}
|
|
202
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
203
|
+
// Get current preference and probability
|
|
204
|
+
const currentEntry = statePolicy.get(actionKey);
|
|
205
|
+
const currentPref = currentEntry?.probability ?? 0;
|
|
206
|
+
// Approximate gradient: increase preference proportional to advantage
|
|
207
|
+
// Also add entropy bonus to encourage exploration
|
|
208
|
+
const entropyBonus = this.calculateEntropyBonus(stateKey);
|
|
209
|
+
const newPref = currentPref + this.actorConfig.actorLearningRate * (advantage + entropyBonus);
|
|
210
|
+
statePolicy.set(actionKey, {
|
|
211
|
+
action: actionKey,
|
|
212
|
+
probability: newPref,
|
|
213
|
+
logProbability: Math.log(Math.max(0.001, this.softmaxProb(stateKey, actionKey))),
|
|
214
|
+
updateCount: (currentEntry?.updateCount ?? 0) + 1,
|
|
215
|
+
lastUpdated: Date.now()
|
|
216
|
+
});
|
|
217
|
+
// Also update Q-table for getBestAction compatibility
|
|
218
|
+
this.setQValue(stateKey, actionKey, newPref);
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Calculate entropy bonus for a state
|
|
222
|
+
* H(π(·|s)) = -Σ_a π(a|s) log(π(a|s))
|
|
223
|
+
*/
|
|
224
|
+
calculateEntropyBonus(stateKey) {
|
|
225
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
226
|
+
if (!statePolicy || statePolicy.size === 0) {
|
|
227
|
+
return 0;
|
|
228
|
+
}
|
|
229
|
+
// Calculate entropy over stored actions
|
|
230
|
+
const prefs = Array.from(statePolicy.values()).map(e => e.probability);
|
|
231
|
+
const maxPref = Math.max(...prefs);
|
|
232
|
+
const expPrefs = prefs.map(p => Math.exp((p - maxPref) / this.actorConfig.temperature));
|
|
233
|
+
const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
|
|
234
|
+
const probs = expPrefs.map(e => e / sumExp);
|
|
235
|
+
let entropy = 0;
|
|
236
|
+
for (const p of probs) {
|
|
237
|
+
if (p > 0) {
|
|
238
|
+
entropy -= p * Math.log(p);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return this.actorConfig.entropyCoefficient * entropy;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Get softmax probability for a specific action
|
|
245
|
+
*/
|
|
246
|
+
softmaxProb(stateKey, actionKey) {
|
|
247
|
+
const statePolicy = this.policyTable.get(stateKey);
|
|
248
|
+
if (!statePolicy || statePolicy.size === 0) {
|
|
249
|
+
return 1.0 / Math.max(1, statePolicy?.size ?? 1);
|
|
250
|
+
}
|
|
251
|
+
const prefs = Array.from(statePolicy.entries());
|
|
252
|
+
const temp = this.actorConfig.temperature;
|
|
253
|
+
const maxPref = Math.max(...prefs.map(([, e]) => e.probability));
|
|
254
|
+
let sumExp = 0;
|
|
255
|
+
let targetExp = 0;
|
|
256
|
+
for (const [key, entry] of prefs) {
|
|
257
|
+
const exp = Math.exp((entry.probability - maxPref) / temp);
|
|
258
|
+
sumExp += exp;
|
|
259
|
+
if (key === actionKey) {
|
|
260
|
+
targetExp = exp;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return targetExp / sumExp;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Normalize advantage using running statistics
|
|
267
|
+
*/
|
|
268
|
+
normalizeAdvantage(advantage) {
|
|
269
|
+
this.advantageHistory.push(advantage);
|
|
270
|
+
// Keep limited history
|
|
271
|
+
if (this.advantageHistory.length > 1000) {
|
|
272
|
+
this.advantageHistory.shift();
|
|
273
|
+
}
|
|
274
|
+
if (this.advantageHistory.length < 10) {
|
|
275
|
+
return advantage;
|
|
276
|
+
}
|
|
277
|
+
const mean = this.advantageHistory.reduce((s, a) => s + a, 0) / this.advantageHistory.length;
|
|
278
|
+
const variance = this.advantageHistory.reduce((s, a) => s + (a - mean) ** 2, 0) / this.advantageHistory.length;
|
|
279
|
+
const std = Math.sqrt(variance) + 1e-8;
|
|
280
|
+
return (advantage - mean) / std;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Get state value from value table
|
|
284
|
+
*/
|
|
285
|
+
getStateValue(state) {
|
|
286
|
+
const stateKey = this.encodeState(state);
|
|
287
|
+
const entry = this.valueTable.get(stateKey);
|
|
288
|
+
return entry?.value ?? 0;
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Get state value from target network (for stability)
|
|
292
|
+
*/
|
|
293
|
+
getTargetStateValue(state) {
|
|
294
|
+
const stateKey = this.encodeState(state);
|
|
295
|
+
const entry = this.targetValueTable.get(stateKey);
|
|
296
|
+
return entry?.value ?? this.getStateValue(state);
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Sync target network with main network
|
|
300
|
+
*/
|
|
301
|
+
syncTargetNetwork() {
|
|
302
|
+
this.targetValueTable.clear();
|
|
303
|
+
for (const [key, value] of this.valueTable.entries()) {
|
|
304
|
+
this.targetValueTable.set(key, { ...value });
|
|
305
|
+
}
|
|
306
|
+
this.logger.debug('Target network synchronized');
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Extract experience components
|
|
310
|
+
*/
|
|
311
|
+
extractExperience(experience) {
|
|
312
|
+
return {
|
|
313
|
+
state: experience.state,
|
|
314
|
+
action: experience.action,
|
|
315
|
+
reward: experience.reward,
|
|
316
|
+
nextState: experience.nextState,
|
|
317
|
+
done: experience.done ?? false
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Get default exploration rate for reset
|
|
322
|
+
*/
|
|
323
|
+
getDefaultExplorationRate() {
|
|
324
|
+
return this.defaultExploration;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Get actor-critic specific statistics
|
|
328
|
+
*/
|
|
329
|
+
getActorCriticStatistics() {
|
|
330
|
+
// Calculate average state value
|
|
331
|
+
let totalValue = 0;
|
|
332
|
+
for (const entry of this.valueTable.values()) {
|
|
333
|
+
totalValue += entry.value;
|
|
334
|
+
}
|
|
335
|
+
const avgStateValue = this.valueTable.size > 0 ? totalValue / this.valueTable.size : 0;
|
|
336
|
+
// Calculate policy table size
|
|
337
|
+
let policySize = 0;
|
|
338
|
+
for (const statePolicy of this.policyTable.values()) {
|
|
339
|
+
policySize += statePolicy.size;
|
|
340
|
+
}
|
|
341
|
+
// Calculate average entropy
|
|
342
|
+
let totalEntropy = 0;
|
|
343
|
+
let entropyCount = 0;
|
|
344
|
+
for (const stateKey of this.policyTable.keys()) {
|
|
345
|
+
const entropy = this.calculateEntropyBonus(stateKey) / this.actorConfig.entropyCoefficient;
|
|
346
|
+
totalEntropy += entropy;
|
|
347
|
+
entropyCount++;
|
|
348
|
+
}
|
|
349
|
+
const avgEntropy = entropyCount > 0 ? totalEntropy / entropyCount : 0;
|
|
350
|
+
// Calculate advantage statistics
|
|
351
|
+
const advMean = this.advantageHistory.length > 0
|
|
352
|
+
? this.advantageHistory.reduce((s, a) => s + a, 0) / this.advantageHistory.length
|
|
353
|
+
: 0;
|
|
354
|
+
const advVariance = this.advantageHistory.length > 0
|
|
355
|
+
? this.advantageHistory.reduce((s, a) => s + (a - advMean) ** 2, 0) / this.advantageHistory.length
|
|
356
|
+
: 0;
|
|
357
|
+
return {
|
|
358
|
+
valueTableSize: this.valueTable.size,
|
|
359
|
+
policyTableSize: policySize,
|
|
360
|
+
avgStateValue,
|
|
361
|
+
avgEntropy,
|
|
362
|
+
advantageMean: advMean,
|
|
363
|
+
advantageStd: Math.sqrt(advVariance)
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Reset actor-critic specific state
|
|
368
|
+
*/
|
|
369
|
+
reset() {
|
|
370
|
+
super.reset();
|
|
371
|
+
this.policyTable.clear();
|
|
372
|
+
this.valueTable.clear();
|
|
373
|
+
this.targetValueTable.clear();
|
|
374
|
+
this.advantageHistory = [];
|
|
375
|
+
this.updatesSinceTargetSync = 0;
|
|
376
|
+
this.logger.info('ActorCriticLearner reset');
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Export complete actor-critic state
|
|
380
|
+
*/
|
|
381
|
+
exportActorCritic() {
|
|
382
|
+
const serializedPolicy = {};
|
|
383
|
+
for (const [state, actions] of this.policyTable.entries()) {
|
|
384
|
+
serializedPolicy[state] = {};
|
|
385
|
+
for (const [action, entry] of actions.entries()) {
|
|
386
|
+
serializedPolicy[state][action] = entry;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
const serializedValue = {};
|
|
390
|
+
for (const [state, entry] of this.valueTable.entries()) {
|
|
391
|
+
serializedValue[state] = entry;
|
|
392
|
+
}
|
|
393
|
+
return {
|
|
394
|
+
base: this.export(),
|
|
395
|
+
valueTable: serializedValue,
|
|
396
|
+
policyTable: serializedPolicy,
|
|
397
|
+
actorConfig: { ...this.actorConfig }
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Import complete actor-critic state
|
|
402
|
+
*/
|
|
403
|
+
importActorCritic(state) {
|
|
404
|
+
this.import(state.base);
|
|
405
|
+
this.valueTable.clear();
|
|
406
|
+
for (const [stateKey, entry] of Object.entries(state.valueTable)) {
|
|
407
|
+
this.valueTable.set(stateKey, entry);
|
|
408
|
+
}
|
|
409
|
+
this.policyTable.clear();
|
|
410
|
+
for (const [stateKey, actions] of Object.entries(state.policyTable)) {
|
|
411
|
+
const actionMap = new Map();
|
|
412
|
+
for (const [actionKey, entry] of Object.entries(actions)) {
|
|
413
|
+
actionMap.set(actionKey, entry);
|
|
414
|
+
}
|
|
415
|
+
this.policyTable.set(stateKey, actionMap);
|
|
416
|
+
}
|
|
417
|
+
this.actorConfig = { ...state.actorConfig };
|
|
418
|
+
this.syncTargetNetwork();
|
|
419
|
+
this.logger.info('Imported Actor-Critic state', {
|
|
420
|
+
valueTableSize: this.valueTable.size,
|
|
421
|
+
policyTableSize: this.policyTable.size
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
exports.ActorCriticLearner = ActorCriticLearner;
|
|
426
|
+
/**
|
|
427
|
+
* Create default Actor-Critic configuration
|
|
428
|
+
*/
|
|
429
|
+
function createDefaultActorCriticConfig() {
|
|
430
|
+
return {
|
|
431
|
+
learningRate: 0.1,
|
|
432
|
+
actorLearningRate: 0.01,
|
|
433
|
+
criticLearningRate: 0.1,
|
|
434
|
+
discountFactor: 0.95,
|
|
435
|
+
explorationRate: 0.3,
|
|
436
|
+
explorationDecay: 0.995,
|
|
437
|
+
minExplorationRate: 0.01,
|
|
438
|
+
entropyCoefficient: 0.01,
|
|
439
|
+
temperature: 1.0,
|
|
440
|
+
normalizeAdvantage: true,
|
|
441
|
+
targetUpdateFrequency: 100,
|
|
442
|
+
useExperienceReplay: true,
|
|
443
|
+
replayBufferSize: 10000,
|
|
444
|
+
batchSize: 32
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
//# sourceMappingURL=ActorCriticLearner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ActorCriticLearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/ActorCriticLearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AAyhBH,wEAiBC;AAxiBD,2DAA0E;AA0C1E;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAa,kBAAmB,SAAQ,qCAAiB;IASvD,YAAY,MAAyB;QACnC,KAAK,CAAC,MAAM,CAAC,CAAC;QACd,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC;QAC1B,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAAE,CAAC;QAC5B,IAAI,CAAC,gBAAgB,GAAG,IAAI,GAAG,EAAE,CAAC;QAClC,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,eAAe,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,gCAAgC,EAAE;YACjD,OAAO,EAAE,MAAM,CAAC,iBAAiB;YACjC,QAAQ,EAAE,MAAM,CAAC,kBAAkB;YACnC,OAAO,EAAE,MAAM,CAAC,kBAAkB;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW;SAChC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACM,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QACrE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,+DAA+D;QAC/D,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,qBAAqB;QACrB,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IACxD,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,KAAgB,EAAE,gBAA+B;QACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,aAAa,GAAG,IAAI,CAAC,sBAAsB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QAE9E,uCAAuC;QACvC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,UAAU,IAAI,aAAa,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;gBACzB,OAAO,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;;OAGG;IACK,sBAAsB,CAAC,QAAgB,EAAE,gBAA+B;QAC9E,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;QACjD,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,oDAAoD;QACpD,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC3D,WAAW,CAAC,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,CAAC;QAC7C,CAAC;QAED,mCAAmC;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAEvD,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAgB,EAAE,SAAiB;QACvD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,CAAC,CAAC,+BAA+B;QAC3C,CAAC;QAED,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACzC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED;;;;;;OAMG;IACM,MAAM,CAAC,UAA0B,EAAE,UAAwB;QAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QACtF,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE5C,gDAAgD;QAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAE7D,iCAAiC;QACjC,IAAI,SAAS,GAAG,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,KAAK,GAAG,QAAQ,CAAC;QAEvE,iCAAiC;QACjC,IAAI,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;YACxC,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACjD,CAAC;QAED,iCAAiC;QACjC,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;QAEjD,wBAAwB;QACxB,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAEjD,oCAAoC;QACpC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,sBAAsB,IAAI,IAAI,CAAC,WAAW,CAAC,qBAAqB,EAAE,CAAC;YAC1E,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACzB,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE;YACvC,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,MAAM;YACN,SAAS;YACT,WAAW,EAAE,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,SAAS;SACxE,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,YAAY,CAAC,QAAgB,EAAE,QAAgB,EAAE,SAAiB;QACxE,MAAM,QAAQ,GAAG,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,SAAS,CAAC;QAE5E,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,QAAQ;YACf,KAAK,EAAE,QAAQ;YACf,WAAW,EAAE,CAAC,aAAa,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;IACL,CAAC;IAED;;;;OAIG;IACK,WAAW,CAAC,QAAgB,EAAE,SAAiB,EAAE,SAAiB;QACxE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEpD,yCAAyC;QACzC,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAChD,MAAM,WAAW,GAAG,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC;QAEnD,sEAAsE;QACtE,kDAAkD;QAClD,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAG,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,iBAAiB,GAAG,CAAC,SAAS,GAAG,YAAY,CAAC,CAAC;QAE9F,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;YACzB,MAAM,EAAE,SAAS;YACjB,WAAW,EAAE,OAAO;YACpB,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC;YAChF,WAAW,EAAE,CAAC,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YACjD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;QAEH,sDAAsD;QACtD,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACK,qBAAqB,CAAC,QAAgB;QAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,wCAAwC;QACxC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;QACxF,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,OAAO,CAAC;IACvD,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,QAAgB,EAAE,SAAiB;QACrD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;QAE1C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QACjE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,KAAK,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;YAC3D,MAAM,IAAI,GAAG,CAAC;YACd,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,GAAG,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,GAAG,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,SAAiB;QAC1C,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,uBAAuB;QACvB,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAChC,CAAC;QAED,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACtC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC;QAC7F,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC;QAC/G,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;QAEvC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC;IAClC,CAAC;IAED;;OAEG;IACM,aAAa,CAAC,KAAgB;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,KAAgB;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClD,OAAO,KAAK,EAAE,KAAK,IAAI,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,iBAAiB;QACvB,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACrD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,GAAG,KAAK,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,UAA0B;QAOlD,OAAO;YACL,KAAK,EAAE,UAAU,CAAC,KAAK;YACvB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,IAAI,EAAE,UAAU,CAAC,IAAI,IAAI,KAAK;SAC/B,CAAC;IACJ,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,wBAAwB;QAQtB,gCAAgC;QAChC,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC;YAC7C,UAAU,IAAI,KAAK,CAAC,KAAK,CAAC;QAC5B,CAAC;QACD,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvF,8BAA8B;QAC9B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,UAAU,IAAI,WAAW,CAAC,IAAI,CAAC;QACjC,CAAC;QAED,4BAA4B;QAC5B,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;YAC3F,YAAY,IAAI,OAAO,CAAC;YACxB,YAAY,EAAE,CAAC;QACjB,CAAC;QACD,MAAM,UAAU,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtE,iCAAiC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;YAC9C,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM;YACjF,CAAC,CAAC,CAAC,CAAC;QACN,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;YAClD,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM;YAClG,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,UAAU;YAC3B,aAAa;YACb,UAAU;YACV,aAAa,EAAE,OAAO;YACtB,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;SACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACM,KAAK;QACZ,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,iBAAiB;QAMf,MAAM,gBAAgB,GAAgD,EAAE,CAAC;QACzE,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBAChD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,MAAM,eAAe,GAAoC,EAAE,CAAC;QAC5D,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,eAAe,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE;YACnB,UAAU,EAAE,eAAe;YAC3B,WAAW,EAAE,gBAAgB;YAC7B,WAAW,EAAE,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE;SACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,KAAgD;QAChE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;YACjD,KAAK,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAClC,CAAC;YACD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,EAAE,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC5C,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,EAAE;YAC9C,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;SACvC,CAAC,CAAC;IACL,CAAC;CACF;AA3cD,gDA2cC;AAED;;GAEG;AACH,SAAgB,8BAA8B;IAC5C,OAAO;QACL,YAAY,EAAE,GAAG;QACjB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,GAAG;QACvB,cAAc,EAAE,IAAI;QACpB,eAAe,EAAE,GAAG;QACpB,gBAAgB,EAAE,KAAK;QACvB,kBAAkB,EAAE,IAAI;QACxB,kBAAkB,EAAE,IAAI;QACxB,WAAW,EAAE,GAAG;QAChB,kBAAkB,EAAE,IAAI;QACxB,qBAAqB,EAAE,GAAG;QAC1B,mBAAmB,EAAE,IAAI;QACzB,gBAAgB,EAAE,KAAK;QACvB,SAAS,EAAE,EAAE;KACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PPOLearner - Proximal Policy Optimization Algorithm
|
|
3
|
+
*
|
|
4
|
+
* Implements PPO-Clip, the most widely used variant of PPO:
|
|
5
|
+
* - Clipped surrogate objective to prevent large policy updates
|
|
6
|
+
* - Generalized Advantage Estimation (GAE) for variance reduction
|
|
7
|
+
* - Value function clipping for stability
|
|
8
|
+
* - Multiple epochs over collected trajectories
|
|
9
|
+
*
|
|
10
|
+
* Key features:
|
|
11
|
+
* - Trust region optimization without KL constraint
|
|
12
|
+
* - Sample efficient with mini-batch updates
|
|
13
|
+
* - Robust to hyperparameter choices
|
|
14
|
+
* - Suitable for continuous and discrete action spaces
|
|
15
|
+
*
|
|
16
|
+
* PPO-Clip objective:
|
|
17
|
+
* L^CLIP(θ) = E[min(r(θ)Â, clip(r(θ), 1-ε, 1+ε)Â)]
|
|
18
|
+
* where r(θ) = π_θ(a|s) / π_θ_old(a|s)
|
|
19
|
+
*
|
|
20
|
+
* @module learning/algorithms/PPOLearner
|
|
21
|
+
* @version 1.0.0
|
|
22
|
+
*/
|
|
23
|
+
import { AbstractRLLearner, RLConfig } from './AbstractRLLearner';
|
|
24
|
+
import { TaskState, AgentAction, TaskExperience } from '../types';
|
|
25
|
+
/**
|
|
26
|
+
* Configuration specific to PPO algorithm
|
|
27
|
+
*/
|
|
28
|
+
export interface PPOConfig extends RLConfig {
|
|
29
|
+
/** Clipping parameter (ε) - typically 0.1-0.3 */
|
|
30
|
+
clipEpsilon: number;
|
|
31
|
+
/** Number of epochs to train on collected data */
|
|
32
|
+
ppoEpochs: number;
|
|
33
|
+
/** Mini-batch size for training */
|
|
34
|
+
miniBatchSize: number;
|
|
35
|
+
/** Value function loss coefficient */
|
|
36
|
+
valueLossCoefficient: number;
|
|
37
|
+
/** Entropy loss coefficient for exploration */
|
|
38
|
+
entropyCoefficient: number;
|
|
39
|
+
/** GAE lambda for advantage estimation */
|
|
40
|
+
gaeLambda: number;
|
|
41
|
+
/** Maximum gradient norm for clipping */
|
|
42
|
+
maxGradNorm: number;
|
|
43
|
+
/** Whether to clip value function updates */
|
|
44
|
+
clipValueLoss: boolean;
|
|
45
|
+
/** Learning rate for policy network */
|
|
46
|
+
policyLearningRate: number;
|
|
47
|
+
/** Learning rate for value network */
|
|
48
|
+
valueLearningRate: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Policy parameters for a state-action pair
|
|
52
|
+
*/
|
|
53
|
+
interface PolicyParams {
|
|
54
|
+
preference: number;
|
|
55
|
+
logProb: number;
|
|
56
|
+
updateCount: number;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* PPOLearner - Proximal Policy Optimization implementation
|
|
60
|
+
*
|
|
61
|
+
* PPO is a state-of-the-art policy gradient method that achieves
|
|
62
|
+
* strong performance while being simpler than TRPO.
|
|
63
|
+
*
|
|
64
|
+
* Usage:
|
|
65
|
+
* ```typescript
|
|
66
|
+
* const ppo = new PPOLearner({
|
|
67
|
+
* learningRate: 0.0003,
|
|
68
|
+
* discountFactor: 0.99,
|
|
69
|
+
* explorationRate: 0.0,
|
|
70
|
+
* explorationDecay: 1.0,
|
|
71
|
+
* minExplorationRate: 0.0,
|
|
72
|
+
* clipEpsilon: 0.2,
|
|
73
|
+
* ppoEpochs: 4,
|
|
74
|
+
* miniBatchSize: 64,
|
|
75
|
+
* valueLossCoefficient: 0.5,
|
|
76
|
+
* entropyCoefficient: 0.01,
|
|
77
|
+
* gaeLambda: 0.95,
|
|
78
|
+
* maxGradNorm: 0.5,
|
|
79
|
+
* clipValueLoss: true,
|
|
80
|
+
* policyLearningRate: 0.0003,
|
|
81
|
+
* valueLearningRate: 0.001,
|
|
82
|
+
* useExperienceReplay: false,
|
|
83
|
+
* replayBufferSize: 2048,
|
|
84
|
+
* batchSize: 64
|
|
85
|
+
* });
|
|
86
|
+
*
|
|
87
|
+
* // Collect trajectory
|
|
88
|
+
* ppo.collectStep(state, action, reward, nextState, done);
|
|
89
|
+
*
|
|
90
|
+
* // Train on collected trajectory
|
|
91
|
+
* ppo.trainOnTrajectory();
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
export declare class PPOLearner extends AbstractRLLearner {
|
|
95
|
+
private ppoConfig;
|
|
96
|
+
private policyTable;
|
|
97
|
+
private valueTable;
|
|
98
|
+
private oldPolicyTable;
|
|
99
|
+
private trajectory;
|
|
100
|
+
private readonly defaultExploration;
|
|
101
|
+
constructor(config: PPOConfig);
|
|
102
|
+
/**
|
|
103
|
+
* Select action using current policy (softmax)
|
|
104
|
+
*/
|
|
105
|
+
selectAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
|
|
106
|
+
/**
|
|
107
|
+
* Get action probabilities using softmax policy
|
|
108
|
+
*/
|
|
109
|
+
private getActionProbabilities;
|
|
110
|
+
/**
|
|
111
|
+
* Get policy parameters for state-action pair
|
|
112
|
+
*/
|
|
113
|
+
private getPolicyParams;
|
|
114
|
+
/**
|
|
115
|
+
* Get log probability of action under current policy
|
|
116
|
+
*/
|
|
117
|
+
private getLogProb;
|
|
118
|
+
/**
|
|
119
|
+
* Get state value from value network
|
|
120
|
+
*/
|
|
121
|
+
getStateValue(state: TaskState): number;
|
|
122
|
+
/**
|
|
123
|
+
* Collect a step in the trajectory
|
|
124
|
+
*/
|
|
125
|
+
collectStep(state: TaskState, action: AgentAction, reward: number, nextState: TaskState, done: boolean): void;
|
|
126
|
+
/**
|
|
127
|
+
* Standard update interface - collects experience and trains when ready
|
|
128
|
+
*/
|
|
129
|
+
update(experience: TaskExperience, nextAction?: AgentAction): void;
|
|
130
|
+
/**
|
|
131
|
+
* Train on collected trajectory using PPO
|
|
132
|
+
*/
|
|
133
|
+
trainOnTrajectory(): void;
|
|
134
|
+
/**
|
|
135
|
+
* Compute Generalized Advantage Estimation (GAE)
|
|
136
|
+
*
|
|
137
|
+
* GAE: Â_t = Σ_{l=0}^∞ (γλ)^l δ_{t+l}
|
|
138
|
+
* where δ_t = r_t + γV(s_{t+1}) - V(s_t)
|
|
139
|
+
*/
|
|
140
|
+
private computeGAE;
|
|
141
|
+
/**
|
|
142
|
+
* Save current policy as old policy for ratio computation
|
|
143
|
+
*/
|
|
144
|
+
private saveOldPolicy;
|
|
145
|
+
/**
|
|
146
|
+
* Get old log probability for ratio computation
|
|
147
|
+
*/
|
|
148
|
+
private getOldLogProb;
|
|
149
|
+
/**
|
|
150
|
+
* Train one epoch on the trajectory
|
|
151
|
+
*/
|
|
152
|
+
private trainEpoch;
|
|
153
|
+
/**
|
|
154
|
+
* Train on a mini-batch
|
|
155
|
+
*/
|
|
156
|
+
private trainMiniBatch;
|
|
157
|
+
/**
|
|
158
|
+
* Update policy parameters
|
|
159
|
+
*/
|
|
160
|
+
private updatePolicy;
|
|
161
|
+
/**
|
|
162
|
+
* Update value function
|
|
163
|
+
*/
|
|
164
|
+
private updateValue;
|
|
165
|
+
/**
|
|
166
|
+
* Compute entropy of policy at state
|
|
167
|
+
*/
|
|
168
|
+
private computeEntropy;
|
|
169
|
+
/**
|
|
170
|
+
* Get default exploration rate for reset
|
|
171
|
+
*/
|
|
172
|
+
protected getDefaultExplorationRate(): number;
|
|
173
|
+
/**
|
|
174
|
+
* Get PPO-specific statistics
|
|
175
|
+
*/
|
|
176
|
+
getPPOStatistics(): {
|
|
177
|
+
trajectoryLength: number;
|
|
178
|
+
valueTableSize: number;
|
|
179
|
+
policyTableSize: number;
|
|
180
|
+
avgValue: number;
|
|
181
|
+
avgAdvantage: number;
|
|
182
|
+
clipFraction: number;
|
|
183
|
+
};
|
|
184
|
+
/**
|
|
185
|
+
* Reset PPO-specific state
|
|
186
|
+
*/
|
|
187
|
+
reset(): void;
|
|
188
|
+
/**
|
|
189
|
+
* Export PPO state
|
|
190
|
+
*/
|
|
191
|
+
exportPPO(): {
|
|
192
|
+
base: ReturnType<AbstractRLLearner['export']>;
|
|
193
|
+
policyTable: Record<string, Record<string, PolicyParams>>;
|
|
194
|
+
valueTable: Record<string, number>;
|
|
195
|
+
ppoConfig: PPOConfig;
|
|
196
|
+
};
|
|
197
|
+
/**
|
|
198
|
+
* Import PPO state
|
|
199
|
+
*/
|
|
200
|
+
importPPO(state: ReturnType<typeof this.exportPPO>): void;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Create default PPO configuration
|
|
204
|
+
*/
|
|
205
|
+
export declare function createDefaultPPOConfig(): PPOConfig;
|
|
206
|
+
export {};
|
|
207
|
+
//# sourceMappingURL=PPOLearner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PPOLearner.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/PPOLearner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAU,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAElE;;GAEG;AACH,MAAM,WAAW,SAAU,SAAQ,QAAQ;IACzC,iDAAiD;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,kDAAkD;IAClD,SAAS,EAAE,MAAM,CAAC;IAClB,mCAAmC;IACnC,aAAa,EAAE,MAAM,CAAC;IACtB,sCAAsC;IACtC,oBAAoB,EAAE,MAAM,CAAC;IAC7B,+CAA+C;IAC/C,kBAAkB,EAAE,MAAM,CAAC;IAC3B,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,yCAAyC;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,6CAA6C;IAC7C,aAAa,EAAE,OAAO,CAAC;IACvB,uCAAuC;IACvC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,sCAAsC;IACtC,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAiBD;;GAEG;AACH,UAAU,YAAY;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AACH,qBAAa,UAAW,SAAQ,iBAAiB;IAC/C,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,WAAW,CAAyC;IAC5D,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,cAAc,CAAyC;IAC/D,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAS;gBAEhC,MAAM,EAAE,SAAS;IAiB7B;;OAEG;IACM,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IAsBrF;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAiB9B;;OAEG;IACH,OAAO,CAAC,eAAe;IAQvB;;OAEG;IACH,OAAO,CAAC,UAAU;IA6BlB;;OAEG;IACM,aAAa,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAKhD;;OAEG;IACH,WAAW,CACT,KAAK,EAAE,SAAS,EAChB,MAAM,EAAE,WAAW,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,SAAS,EACpB,IAAI,EAAE,OAAO,GACZ,IAAI;IAqBP;;OAEG;IACM,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IAe3E;;OAEG;IACH,iBAAiB,IAAI,IAAI;IAyBzB;;;;;OAKG;IACH,OAAO,CAAC,UAAU;IAsClB;;OAEG;IACH,OAAO,CAAC,aAAa;IAWrB;;OAEG;IACH,OAAO,CAAC,aAAa;IAQrB;;OAEG;IACH,OAAO,CAAC,UAAU;IAWlB;;OAEG;IACH,OAAO,CAAC,cAAc;IA0CtB;;OAEG;IACH,OAAO,CAAC,YAAY;IAkCpB;;OAEG;IACH,OAAO,CAAC,WAAW;IAMnB;;OAEG;IACH,OAAO,CAAC,cAAc;IAsBtB;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,gBAAgB,IAAI;QAClB,gBAAgB,EAAE,MAAM,CAAC;QACzB,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;KACtB;IAyBD;;OAEG;IACM,KAAK,IAAI,IAAI;IAStB;;OAEG;IACH,SAAS,IAAI;QACX,IAAI,EAAE,UAAU,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC9C,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC;QAC1D,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACnC,SAAS,EAAE,SAAS,CAAC;KACtB;IAsBD;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,UAAU,CAAC,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,IAAI;CAwB1D;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,SAAS,CAqBlD"}
|