agentic-qe 2.1.2 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/.claude/skills/agentic-quality-engineering/SKILL.md +4 -4
  2. package/.claude/skills/cicd-pipeline-qe-orchestrator/README.md +14 -11
  3. package/.claude/skills/skills-manifest.json +2 -2
  4. package/CHANGELOG.md +138 -0
  5. package/README.md +92 -214
  6. package/dist/agents/BaseAgent.d.ts +5 -1
  7. package/dist/agents/BaseAgent.d.ts.map +1 -1
  8. package/dist/agents/BaseAgent.js +32 -17
  9. package/dist/agents/BaseAgent.js.map +1 -1
  10. package/dist/agents/index.d.ts.map +1 -1
  11. package/dist/agents/index.js +5 -1
  12. package/dist/agents/index.js.map +1 -1
  13. package/dist/cli/commands/improve/index.d.ts +8 -1
  14. package/dist/cli/commands/improve/index.d.ts.map +1 -1
  15. package/dist/cli/commands/improve/index.js +18 -16
  16. package/dist/cli/commands/improve/index.js.map +1 -1
  17. package/dist/cli/commands/learn/index.d.ts +10 -2
  18. package/dist/cli/commands/learn/index.d.ts.map +1 -1
  19. package/dist/cli/commands/learn/index.js +99 -63
  20. package/dist/cli/commands/learn/index.js.map +1 -1
  21. package/dist/cli/commands/patterns/index.d.ts +8 -1
  22. package/dist/cli/commands/patterns/index.d.ts.map +1 -1
  23. package/dist/cli/commands/patterns/index.js +79 -45
  24. package/dist/cli/commands/patterns/index.js.map +1 -1
  25. package/dist/cli/commands/routing/index.d.ts +5 -0
  26. package/dist/cli/commands/routing/index.d.ts.map +1 -1
  27. package/dist/cli/commands/routing/index.js +11 -10
  28. package/dist/cli/commands/routing/index.js.map +1 -1
  29. package/dist/cli/init/agents.d.ts +1 -1
  30. package/dist/cli/init/agents.js +2 -2
  31. package/dist/cli/init/database-init.d.ts +7 -0
  32. package/dist/cli/init/database-init.d.ts.map +1 -1
  33. package/dist/cli/init/database-init.js +29 -48
  34. package/dist/cli/init/database-init.js.map +1 -1
  35. package/dist/core/di/AgentDependencies.d.ts +127 -0
  36. package/dist/core/di/AgentDependencies.d.ts.map +1 -0
  37. package/dist/core/di/AgentDependencies.js +251 -0
  38. package/dist/core/di/AgentDependencies.js.map +1 -0
  39. package/dist/core/di/DIContainer.d.ts +149 -0
  40. package/dist/core/di/DIContainer.d.ts.map +1 -0
  41. package/dist/core/di/DIContainer.js +333 -0
  42. package/dist/core/di/DIContainer.js.map +1 -0
  43. package/dist/core/di/index.d.ts +11 -0
  44. package/dist/core/di/index.d.ts.map +1 -0
  45. package/dist/core/di/index.js +22 -0
  46. package/dist/core/di/index.js.map +1 -0
  47. package/dist/core/index.d.ts +1 -0
  48. package/dist/core/index.d.ts.map +1 -1
  49. package/dist/core/index.js +11 -1
  50. package/dist/core/index.js.map +1 -1
  51. package/dist/core/memory/HNSWVectorMemory.d.ts +261 -0
  52. package/dist/core/memory/HNSWVectorMemory.d.ts.map +1 -0
  53. package/dist/core/memory/HNSWVectorMemory.js +647 -0
  54. package/dist/core/memory/HNSWVectorMemory.js.map +1 -0
  55. package/dist/core/memory/SwarmMemoryManager.d.ts +7 -0
  56. package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
  57. package/dist/core/memory/SwarmMemoryManager.js +9 -0
  58. package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
  59. package/dist/core/memory/index.d.ts +2 -0
  60. package/dist/core/memory/index.d.ts.map +1 -1
  61. package/dist/core/memory/index.js +11 -1
  62. package/dist/core/memory/index.js.map +1 -1
  63. package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
  64. package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
  65. package/dist/learning/ExperienceSharingProtocol.js +538 -0
  66. package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
  67. package/dist/learning/ExplainableLearning.d.ts +191 -0
  68. package/dist/learning/ExplainableLearning.d.ts.map +1 -0
  69. package/dist/learning/ExplainableLearning.js +441 -0
  70. package/dist/learning/ExplainableLearning.js.map +1 -0
  71. package/dist/learning/GossipPatternSharingProtocol.d.ts +228 -0
  72. package/dist/learning/GossipPatternSharingProtocol.d.ts.map +1 -0
  73. package/dist/learning/GossipPatternSharingProtocol.js +590 -0
  74. package/dist/learning/GossipPatternSharingProtocol.js.map +1 -0
  75. package/dist/learning/LearningEngine.d.ts +104 -4
  76. package/dist/learning/LearningEngine.d.ts.map +1 -1
  77. package/dist/learning/LearningEngine.js +350 -16
  78. package/dist/learning/LearningEngine.js.map +1 -1
  79. package/dist/learning/PerformanceOptimizer.d.ts +268 -0
  80. package/dist/learning/PerformanceOptimizer.d.ts.map +1 -0
  81. package/dist/learning/PerformanceOptimizer.js +552 -0
  82. package/dist/learning/PerformanceOptimizer.js.map +1 -0
  83. package/dist/learning/PrivacyManager.d.ts +197 -0
  84. package/dist/learning/PrivacyManager.d.ts.map +1 -0
  85. package/dist/learning/PrivacyManager.js +551 -0
  86. package/dist/learning/PrivacyManager.js.map +1 -0
  87. package/dist/learning/QLearning.d.ts +38 -125
  88. package/dist/learning/QLearning.d.ts.map +1 -1
  89. package/dist/learning/QLearning.js +46 -267
  90. package/dist/learning/QLearning.js.map +1 -1
  91. package/dist/learning/QLearningLegacy.d.ts +154 -0
  92. package/dist/learning/QLearningLegacy.d.ts.map +1 -0
  93. package/dist/learning/QLearningLegacy.js +337 -0
  94. package/dist/learning/QLearningLegacy.js.map +1 -0
  95. package/dist/learning/TransferLearningManager.d.ts +212 -0
  96. package/dist/learning/TransferLearningManager.d.ts.map +1 -0
  97. package/dist/learning/TransferLearningManager.js +497 -0
  98. package/dist/learning/TransferLearningManager.js.map +1 -0
  99. package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
  100. package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
  101. package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
  102. package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
  103. package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
  104. package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
  105. package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
  106. package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
  107. package/dist/learning/algorithms/MAMLMetaLearner.d.ts +218 -0
  108. package/dist/learning/algorithms/MAMLMetaLearner.d.ts.map +1 -0
  109. package/dist/learning/algorithms/MAMLMetaLearner.js +532 -0
  110. package/dist/learning/algorithms/MAMLMetaLearner.js.map +1 -0
  111. package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
  112. package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
  113. package/dist/learning/algorithms/PPOLearner.js +490 -0
  114. package/dist/learning/algorithms/PPOLearner.js.map +1 -0
  115. package/dist/learning/algorithms/QLearning.d.ts +68 -0
  116. package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
  117. package/dist/learning/algorithms/QLearning.js +116 -0
  118. package/dist/learning/algorithms/QLearning.js.map +1 -0
  119. package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
  120. package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
  121. package/dist/learning/algorithms/SARSALearner.js +252 -0
  122. package/dist/learning/algorithms/SARSALearner.js.map +1 -0
  123. package/dist/learning/algorithms/index.d.ts +32 -0
  124. package/dist/learning/algorithms/index.d.ts.map +1 -0
  125. package/dist/learning/algorithms/index.js +50 -0
  126. package/dist/learning/algorithms/index.js.map +1 -0
  127. package/dist/learning/index.d.ts +11 -0
  128. package/dist/learning/index.d.ts.map +1 -1
  129. package/dist/learning/index.js +31 -1
  130. package/dist/learning/index.js.map +1 -1
  131. package/dist/learning/types.d.ts +2 -0
  132. package/dist/learning/types.d.ts.map +1 -1
  133. package/dist/mcp/server-instructions.d.ts +1 -1
  134. package/dist/mcp/server-instructions.js +1 -1
  135. package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
  136. package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
  137. package/dist/memory/DistributedPatternLibrary.js +370 -0
  138. package/dist/memory/DistributedPatternLibrary.js.map +1 -0
  139. package/dist/memory/PatternQualityScorer.d.ts +169 -0
  140. package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
  141. package/dist/memory/PatternQualityScorer.js +327 -0
  142. package/dist/memory/PatternQualityScorer.js.map +1 -0
  143. package/dist/memory/PatternReplicationService.d.ts +187 -0
  144. package/dist/memory/PatternReplicationService.d.ts.map +1 -0
  145. package/dist/memory/PatternReplicationService.js +392 -0
  146. package/dist/memory/PatternReplicationService.js.map +1 -0
  147. package/dist/providers/ClaudeProvider.d.ts +98 -0
  148. package/dist/providers/ClaudeProvider.d.ts.map +1 -0
  149. package/dist/providers/ClaudeProvider.js +418 -0
  150. package/dist/providers/ClaudeProvider.js.map +1 -0
  151. package/dist/providers/HybridRouter.d.ts +217 -0
  152. package/dist/providers/HybridRouter.d.ts.map +1 -0
  153. package/dist/providers/HybridRouter.js +679 -0
  154. package/dist/providers/HybridRouter.js.map +1 -0
  155. package/dist/providers/ILLMProvider.d.ts +287 -0
  156. package/dist/providers/ILLMProvider.d.ts.map +1 -0
  157. package/dist/providers/ILLMProvider.js +33 -0
  158. package/dist/providers/ILLMProvider.js.map +1 -0
  159. package/dist/providers/LLMProviderFactory.d.ts +154 -0
  160. package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
  161. package/dist/providers/LLMProviderFactory.js +426 -0
  162. package/dist/providers/LLMProviderFactory.js.map +1 -0
  163. package/dist/providers/RuvllmProvider.d.ts +107 -0
  164. package/dist/providers/RuvllmProvider.d.ts.map +1 -0
  165. package/dist/providers/RuvllmProvider.js +417 -0
  166. package/dist/providers/RuvllmProvider.js.map +1 -0
  167. package/dist/providers/index.d.ts +32 -0
  168. package/dist/providers/index.d.ts.map +1 -0
  169. package/dist/providers/index.js +75 -0
  170. package/dist/providers/index.js.map +1 -0
  171. package/dist/telemetry/LearningTelemetry.d.ts +190 -0
  172. package/dist/telemetry/LearningTelemetry.d.ts.map +1 -0
  173. package/dist/telemetry/LearningTelemetry.js +403 -0
  174. package/dist/telemetry/LearningTelemetry.js.map +1 -0
  175. package/dist/telemetry/index.d.ts +1 -0
  176. package/dist/telemetry/index.d.ts.map +1 -1
  177. package/dist/telemetry/index.js +20 -2
  178. package/dist/telemetry/index.js.map +1 -1
  179. package/dist/telemetry/instrumentation/agent.d.ts +1 -1
  180. package/dist/telemetry/instrumentation/agent.js +1 -1
  181. package/dist/telemetry/instrumentation/index.d.ts +1 -1
  182. package/dist/telemetry/instrumentation/index.js +1 -1
  183. package/dist/utils/math.d.ts +11 -0
  184. package/dist/utils/math.d.ts.map +1 -0
  185. package/dist/utils/math.js +16 -0
  186. package/dist/utils/math.js.map +1 -0
  187. package/docs/reference/agents.md +1 -1
  188. package/docs/reference/skills.md +3 -3
  189. package/docs/reference/usage.md +4 -4
  190. package/package.json +1 -1
@@ -0,0 +1,300 @@
1
+ "use strict";
2
+ /**
3
+ * AbstractRLLearner - Base class for Reinforcement Learning algorithms
4
+ *
5
+ * Provides common functionality for all RL algorithms including:
6
+ * - Epsilon-greedy exploration policy
7
+ * - State/action encoding
8
+ * - Q-table management
9
+ * - Experience replay integration
10
+ * - Statistics tracking
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.AbstractRLLearner = void 0;
14
+ const Logger_1 = require("../../utils/Logger");
15
+ const ExperienceReplayBuffer_1 = require("../ExperienceReplayBuffer");
16
+ /**
17
+ * Abstract base class for RL algorithms
18
+ */
19
+ class AbstractRLLearner {
20
+ constructor(config) {
21
+ this.logger = Logger_1.Logger.getInstance();
22
+ this.config = config;
23
+ this.qTable = new Map();
24
+ this.stepCount = 0;
25
+ this.episodeCount = 0;
26
+ // Initialize experience replay buffer if enabled
27
+ if (this.config.useExperienceReplay) {
28
+ this.replayBuffer = new ExperienceReplayBuffer_1.ExperienceReplayBuffer({
29
+ maxSize: this.config.replayBufferSize,
30
+ minSize: this.config.batchSize,
31
+ prioritized: false
32
+ });
33
+ }
34
+ this.logger.info(`${this.constructor.name} initialized`, { config });
35
+ }
36
+ /**
37
+ * Select action using epsilon-greedy policy
38
+ * With probability ε, select random action (exploration)
39
+ * Otherwise, select action with highest Q-value (exploitation)
40
+ */
41
+ selectAction(state, availableActions) {
42
+ if (availableActions.length === 0) {
43
+ throw new Error('No available actions to select from');
44
+ }
45
+ // Exploration: random action
46
+ if (Math.random() < this.config.explorationRate) {
47
+ const randomIndex = Math.floor(Math.random() * availableActions.length);
48
+ return availableActions[randomIndex];
49
+ }
50
+ // Exploitation: best action based on Q-values
51
+ return this.getBestAction(state, availableActions);
52
+ }
53
+ /**
54
+ * Get best action based on current Q-values (greedy policy)
55
+ */
56
+ getBestAction(state, availableActions) {
57
+ const stateKey = this.encodeState(state);
58
+ const stateActions = this.qTable.get(stateKey);
59
+ if (!stateActions || stateActions.size === 0) {
60
+ // No Q-values yet, return random action
61
+ const randomIndex = Math.floor(Math.random() * availableActions.length);
62
+ return availableActions[randomIndex];
63
+ }
64
+ // Find action with highest Q-value
65
+ let bestAction = availableActions[0];
66
+ let bestValue = -Infinity;
67
+ for (const action of availableActions) {
68
+ const actionKey = this.encodeAction(action);
69
+ const qValue = stateActions.get(actionKey);
70
+ if (qValue && qValue.value > bestValue) {
71
+ bestValue = qValue.value;
72
+ bestAction = action;
73
+ }
74
+ }
75
+ return bestAction;
76
+ }
77
+ /**
78
+ * Get Q-value for a state-action pair
79
+ */
80
+ getQValue(state, action) {
81
+ const stateKey = this.encodeState(state);
82
+ const actionKey = this.encodeAction(action);
83
+ const stateActions = this.qTable.get(stateKey);
84
+ if (!stateActions) {
85
+ return 0;
86
+ }
87
+ const qValue = stateActions.get(actionKey);
88
+ return qValue?.value ?? 0;
89
+ }
90
+ /**
91
+ * Set Q-value for a state-action pair (protected for subclass use)
92
+ */
93
+ setQValue(stateKey, actionKey, value) {
94
+ if (!this.qTable.has(stateKey)) {
95
+ this.qTable.set(stateKey, new Map());
96
+ }
97
+ const stateActions = this.qTable.get(stateKey);
98
+ const currentQValue = stateActions.get(actionKey);
99
+ stateActions.set(actionKey, {
100
+ state: stateKey,
101
+ action: actionKey,
102
+ value,
103
+ updateCount: (currentQValue?.updateCount ?? 0) + 1,
104
+ lastUpdated: Date.now()
105
+ });
106
+ }
107
+ /**
108
+ * Get all Q-values for a state
109
+ */
110
+ getStateValues(state) {
111
+ const stateKey = this.encodeState(state);
112
+ const stateActions = this.qTable.get(stateKey);
113
+ if (!stateActions) {
114
+ return new Map();
115
+ }
116
+ const values = new Map();
117
+ for (const [actionKey, qValue] of stateActions.entries()) {
118
+ values.set(actionKey, qValue.value);
119
+ }
120
+ return values;
121
+ }
122
+ /**
123
+ * Get value of a state (max Q-value over all actions)
124
+ * V(s) = max_a Q(s,a)
125
+ */
126
+ getStateValue(state) {
127
+ const stateKey = this.encodeState(state);
128
+ const stateActions = this.qTable.get(stateKey);
129
+ if (!stateActions || stateActions.size === 0) {
130
+ return 0;
131
+ }
132
+ return Math.max(...Array.from(stateActions.values()).map(qv => qv.value));
133
+ }
134
+ /**
135
+ * Perform batch update using experience replay
136
+ * Samples random batch from replay buffer and updates Q-values
137
+ */
138
+ batchUpdate() {
139
+ if (!this.replayBuffer || !this.replayBuffer.canSample(this.config.batchSize)) {
140
+ return;
141
+ }
142
+ const batch = this.replayBuffer.sample(this.config.batchSize);
143
+ for (const experience of batch) {
144
+ this.update(experience);
145
+ }
146
+ this.logger.debug(`Performed batch update with ${batch.length} experiences`);
147
+ }
148
+ /**
149
+ * Decay exploration rate (epsilon)
150
+ * Called after each episode to gradually reduce exploration
151
+ */
152
+ decayExploration() {
153
+ this.config.explorationRate = Math.max(this.config.minExplorationRate, this.config.explorationRate * this.config.explorationDecay);
154
+ }
155
+ /**
156
+ * Mark end of episode
157
+ */
158
+ endEpisode() {
159
+ this.episodeCount++;
160
+ this.decayExploration();
161
+ // Perform batch update if using experience replay
162
+ if (this.config.useExperienceReplay) {
163
+ this.batchUpdate();
164
+ }
165
+ }
166
+ /**
167
+ * Encode state to string key for Q-table
168
+ * Creates normalized feature vector and discretizes for generalization
169
+ */
170
+ encodeState(state) {
171
+ // Create normalized feature vector
172
+ const features = [
173
+ state.taskComplexity,
174
+ state.requiredCapabilities.length / 10, // normalize
175
+ state.previousAttempts / 5, // normalize
176
+ state.availableResources,
177
+ state.timeConstraint ? Math.min(state.timeConstraint / 300000, 1) : 1 // normalize to 5 min
178
+ ];
179
+ // Round to reduce state space (discretization)
180
+ return features.map(f => Math.round(f * 10) / 10).join(',');
181
+ }
182
+ /**
183
+ * Encode action to string key for Q-table
184
+ */
185
+ encodeAction(action) {
186
+ return `${action.strategy}:${action.parallelization.toFixed(1)}:${action.retryPolicy}`;
187
+ }
188
+ /**
189
+ * Get current exploration rate (epsilon)
190
+ */
191
+ getExplorationRate() {
192
+ return this.config.explorationRate;
193
+ }
194
+ /**
195
+ * Get total number of learning steps
196
+ */
197
+ getStepCount() {
198
+ return this.stepCount;
199
+ }
200
+ /**
201
+ * Get total number of episodes
202
+ */
203
+ getEpisodeCount() {
204
+ return this.episodeCount;
205
+ }
206
+ /**
207
+ * Get Q-table size (number of state-action pairs)
208
+ */
209
+ getTableSize() {
210
+ let size = 0;
211
+ for (const stateActions of this.qTable.values()) {
212
+ size += stateActions.size;
213
+ }
214
+ return size;
215
+ }
216
+ /**
217
+ * Get statistics about learning progress
218
+ */
219
+ getStatistics() {
220
+ let totalQValue = 0;
221
+ let count = 0;
222
+ let maxQ = -Infinity;
223
+ let minQ = Infinity;
224
+ for (const stateActions of this.qTable.values()) {
225
+ for (const qValue of stateActions.values()) {
226
+ totalQValue += qValue.value;
227
+ maxQ = Math.max(maxQ, qValue.value);
228
+ minQ = Math.min(minQ, qValue.value);
229
+ count++;
230
+ }
231
+ }
232
+ return {
233
+ steps: this.stepCount,
234
+ episodes: this.episodeCount,
235
+ tableSize: count,
236
+ explorationRate: this.config.explorationRate,
237
+ avgQValue: count > 0 ? totalQValue / count : 0,
238
+ maxQValue: count > 0 ? maxQ : 0,
239
+ minQValue: count > 0 ? minQ : 0
240
+ };
241
+ }
242
+ /**
243
+ * Reset Q-table and learning state
244
+ */
245
+ reset() {
246
+ this.qTable.clear();
247
+ this.stepCount = 0;
248
+ this.episodeCount = 0;
249
+ this.config.explorationRate = this.getDefaultExplorationRate();
250
+ if (this.replayBuffer) {
251
+ this.replayBuffer.clear();
252
+ }
253
+ this.logger.info(`${this.constructor.name} reset to initial state`);
254
+ }
255
+ /**
256
+ * Export Q-table and state for persistence
257
+ */
258
+ export() {
259
+ const serializedQTable = {};
260
+ for (const [state, actions] of this.qTable.entries()) {
261
+ serializedQTable[state] = {};
262
+ for (const [action, qValue] of actions.entries()) {
263
+ serializedQTable[state][action] = qValue;
264
+ }
265
+ }
266
+ return {
267
+ qTable: serializedQTable,
268
+ config: { ...this.config },
269
+ stepCount: this.stepCount,
270
+ episodeCount: this.episodeCount
271
+ };
272
+ }
273
+ /**
274
+ * Import Q-table and state from persistence
275
+ */
276
+ import(state) {
277
+ this.qTable.clear();
278
+ for (const [stateKey, actions] of Object.entries(state.qTable)) {
279
+ const actionMap = new Map();
280
+ for (const [actionKey, qValue] of Object.entries(actions)) {
281
+ actionMap.set(actionKey, qValue);
282
+ }
283
+ this.qTable.set(stateKey, actionMap);
284
+ }
285
+ this.config = { ...state.config };
286
+ this.stepCount = state.stepCount;
287
+ this.episodeCount = state.episodeCount;
288
+ this.logger.info(`Imported Q-table with ${this.getTableSize()} state-action pairs`);
289
+ }
290
+ /**
291
+ * Get memory usage estimate in bytes
292
+ */
293
+ getMemoryUsage() {
294
+ const qTableSize = JSON.stringify(this.export().qTable).length;
295
+ const bufferSize = this.replayBuffer?.getMemoryUsage() ?? 0;
296
+ return qTableSize + bufferSize;
297
+ }
298
+ }
299
+ exports.AbstractRLLearner = AbstractRLLearner;
300
+ //# sourceMappingURL=AbstractRLLearner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AbstractRLLearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/AbstractRLLearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAEH,+CAA4C;AAE5C,sEAAmE;AA2BnE;;GAEG;AACH,MAAsB,iBAAiB;IAQrC,YAAY,MAAgB;QAC1B,IAAI,CAAC,MAAM,GAAG,eAAM,CAAC,WAAW,EAAE,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;QACxB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QAEtB,iDAAiD;QACjD,IAAI,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;YACpC,IAAI,CAAC,YAAY,GAAG,IAAI,+CAAsB,CAAC;gBAC7C,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,gBAAgB;gBACrC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC9B,WAAW,EAAE,KAAK;aACnB,CAAC,CAAC;QACL,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,cAAc,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IACvE,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QAC5D,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,8CAA8C;QAC9C,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,KAAgB,EAAE,gBAA+B;QAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7C,wCAAwC;YACxC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,mCAAmC;QACnC,IAAI,UAAU,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;QAE1B,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAE3C,IAAI,MAAM,IAAI,MAAM,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBACvC,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;gBACzB,UAAU,GAAG,MAAM,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAQD;;OAEG;IACH,SAAS,CAAC,KAAgB,EAAE,MAAmB;QAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE5C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC3C,OAAO,MAAM,EAAE,KAAK,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACO,SAAS,CAAC,QAAgB,EAAE,SAAiB,EAAE,KAAa;QACpE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QACvC,CAAC;QACD,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEhD,MAAM,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAClD,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE;YAC1B,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,KAAK;YACL,WAAW,EAAE,CAAC,aAAa,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,KAAgB;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,IAAI,GAAG,EAAE,CAAC;QACnB,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;YACzD,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACH,aAAa,CAAC,KAAgB;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;IAC5E,CAAC;IAED;;;OAGG;IACH,WAAW;QACT,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9E,OAAO;QACT,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAE9D,KAAK,MAAM,UAAU,IAAI,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC1B,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,+BAA+B,KAAK,CAAC,MAAM,cAAc,CAAC,CAAC;IAC/E,CAAC;IAED;;;OAGG;IACH,gBAAgB;QACd,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,CACpC,IAAI,CAAC,MAAM,CAAC,kBAAkB,EAC9B,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAC3D,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAExB,kDAAkD;QAClD,IAAI,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED;;;OAGG;IACO,WAAW,CAAC,KAAgB;QACpC,mCAAmC;QACnC,MAAM,QAAQ,GAAG;YACf,KAAK,CAAC,cAAc;YACpB,KAAK,CAAC,oBAAoB,CAAC,MAAM,GAAG,EAAE,EAAE,YAAY;YACpD,KAAK,CAAC,gBAAgB,GAAG,CAAC,EAAE,YAAY;YACxC,KAAK,CAAC,kBAAkB;YACxB,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;SAC5F,CAAC;QAEF,+CAA+C;QAC/C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC9D,CAAC;IAED;;OAEG;IACO,YAAY,CAAC,MAAmB;QACxC,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;IACzF,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAChD,IAAI,IAAI,YAAY,CAAC,IAAI,CAAC;QAC5B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,aAAa;QASX,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC;QACrB,IAAI,IAAI,GAAG,QAAQ,CAAC;QAEpB,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAChD,KAAK,MAAM,MAAM,IAAI,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC;gBAC3C,WAAW,IAAI,MAAM,CAAC,KAAK,CAAC;gBAC5B,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpC,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,SAAS;YACrB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,SAAS,EAAE,KAAK;YAChB,eAAe,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe;YAC5C,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9C,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC/B,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QACpB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,yBAAyB,EAAE,CAAC;QAE/D,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;QAC5B,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,yBAAyB,CAAC,CAAC;IACtE,CAAC;IAOD;;OAEG;IACH,MAAM;QAMJ,MAAM,gBAAgB,GAA2C,EAAE,CAAC;QAEpE,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;YACrD,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,OAAO;YACL,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,YAAY,EAAE,IAAI,CAAC,YAAY;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAKN;QACC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAEpB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;YAC5C,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1D,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QAClC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC,YAAY,CAAC;QAEvC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,YAAY,EAAE,qBAAqB,CAAC,CAAC;IACtF,CAAC;IAED;;OAEG;IACH,cAAc;QACZ,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;QAC5D,OAAO,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;CACF;AAjXD,8CAiXC"}
@@ -0,0 +1,201 @@
1
+ /**
2
+ * ActorCriticLearner - Actor-Critic Reinforcement Learning Algorithm
3
+ *
4
+ * Implements Advantage Actor-Critic (A2C) combining:
5
+ * - Actor: Policy network that selects actions using softmax policy
6
+ * - Critic: Value network that estimates state values for advantage calculation
7
+ *
8
+ * Key features:
9
+ * - Continuous action probabilities via softmax
10
+ * - Advantage-based updates to reduce variance
11
+ * - Entropy bonus for exploration
12
+ * - Policy gradient with baseline
13
+ *
14
+ * Update rules:
15
+ * - Critic (Value): V(s) += α_c * δ where δ = r + γV(s') - V(s)
16
+ * - Actor (Policy): π(a|s) += α_a * δ * ∇log(π(a|s)) + β * H(π)
17
+ *
18
+ * @module learning/algorithms/ActorCriticLearner
19
+ * @version 1.0.0
20
+ */
21
+ import { AbstractRLLearner, RLConfig } from './AbstractRLLearner';
22
+ import { TaskState, AgentAction, TaskExperience } from '../types';
23
+ /**
24
+ * Configuration specific to Actor-Critic algorithm
25
+ */
26
+ export interface ActorCriticConfig extends RLConfig {
27
+ /** Actor learning rate (α_a) - typically smaller than critic */
28
+ actorLearningRate: number;
29
+ /** Critic learning rate (α_c) */
30
+ criticLearningRate: number;
31
+ /** Entropy coefficient (β) for exploration bonus */
32
+ entropyCoefficient: number;
33
+ /** Temperature for softmax action selection */
34
+ temperature: number;
35
+ /** Whether to use advantage normalization */
36
+ normalizeAdvantage: boolean;
37
+ /** Target network update frequency (for stability) */
38
+ targetUpdateFrequency: number;
39
+ }
40
+ /**
41
+ * Policy entry storing action probabilities
42
+ */
43
+ interface PolicyEntry {
44
+ action: string;
45
+ probability: number;
46
+ logProbability: number;
47
+ updateCount: number;
48
+ lastUpdated: number;
49
+ }
50
+ /**
51
+ * State value entry for critic
52
+ */
53
+ interface StateValueEntry {
54
+ state: string;
55
+ value: number;
56
+ updateCount: number;
57
+ lastUpdated: number;
58
+ }
59
+ /**
60
+ * ActorCriticLearner - Advantage Actor-Critic implementation
61
+ *
62
+ * Combines policy gradient (actor) with value function approximation (critic)
63
+ * for more stable and efficient learning than pure Q-learning.
64
+ *
65
+ * Usage:
66
+ * ```typescript
67
+ * const ac = new ActorCriticLearner({
68
+ * learningRate: 0.1,
69
+ * actorLearningRate: 0.01,
70
+ * criticLearningRate: 0.1,
71
+ * discountFactor: 0.95,
72
+ * explorationRate: 0.3,
73
+ * explorationDecay: 0.995,
74
+ * minExplorationRate: 0.01,
75
+ * entropyCoefficient: 0.01,
76
+ * temperature: 1.0,
77
+ * normalizeAdvantage: true,
78
+ * targetUpdateFrequency: 100,
79
+ * useExperienceReplay: true,
80
+ * replayBufferSize: 10000,
81
+ * batchSize: 32
82
+ * });
83
+ *
84
+ * const action = ac.selectAction(state, availableActions);
85
+ * ac.update(experience);
86
+ * ```
87
+ */
88
+ export declare class ActorCriticLearner extends AbstractRLLearner {
89
+ private actorConfig;
90
+ private policyTable;
91
+ private valueTable;
92
+ private targetValueTable;
93
+ private updatesSinceTargetSync;
94
+ private advantageHistory;
95
+ private readonly defaultExploration;
96
+ constructor(config: ActorCriticConfig);
97
+ /**
98
+ * Select action using softmax policy with exploration
99
+ * π(a|s) = exp(Q(s,a)/τ) / Σ_a' exp(Q(s,a')/τ)
100
+ */
101
+ selectAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
102
+ /**
103
+ * Sample action from softmax policy distribution
104
+ */
105
+ private sampleFromPolicy;
106
+ /**
107
+ * Get softmax action probabilities
108
+ * π(a|s) = exp(preference(s,a)/τ) / Σ_a' exp(preference(s,a')/τ)
109
+ */
110
+ private getActionProbabilities;
111
+ /**
112
+ * Get preference for state-action pair from policy table
113
+ */
114
+ private getPreference;
115
+ /**
116
+ * Update actor and critic using temporal difference
117
+ *
118
+ * TD Error (advantage): δ = r + γV(s') - V(s)
119
+ * Critic update: V(s) += α_c * δ
120
+ * Actor update: preference(s,a) += α_a * δ * (1 - π(a|s))
121
+ */
122
+ update(experience: TaskExperience, nextAction?: AgentAction): void;
123
+ /**
124
+ * Update critic (value function)
125
+ * V(s) += α_c * δ
126
+ */
127
+ private updateCritic;
128
+ /**
129
+ * Update actor (policy)
130
+ * For softmax policy: preference(s,a) += α_a * δ * (1 - π(a|s))
131
+ * This increases preference for actions with positive advantage
132
+ */
133
+ private updateActor;
134
+ /**
135
+ * Calculate entropy bonus for a state
136
+ * H(π(·|s)) = -Σ_a π(a|s) log(π(a|s))
137
+ */
138
+ private calculateEntropyBonus;
139
+ /**
140
+ * Get softmax probability for a specific action
141
+ */
142
+ private softmaxProb;
143
+ /**
144
+ * Normalize advantage using running statistics
145
+ */
146
+ private normalizeAdvantage;
147
+ /**
148
+ * Get state value from value table
149
+ */
150
+ getStateValue(state: TaskState): number;
151
+ /**
152
+ * Get state value from target network (for stability)
153
+ */
154
+ private getTargetStateValue;
155
+ /**
156
+ * Sync target network with main network
157
+ */
158
+ private syncTargetNetwork;
159
+ /**
160
+ * Extract experience components
161
+ */
162
+ private extractExperience;
163
+ /**
164
+ * Get default exploration rate for reset
165
+ */
166
+ protected getDefaultExplorationRate(): number;
167
+ /**
168
+ * Get actor-critic specific statistics
169
+ */
170
+ getActorCriticStatistics(): {
171
+ valueTableSize: number;
172
+ policyTableSize: number;
173
+ avgStateValue: number;
174
+ avgEntropy: number;
175
+ advantageMean: number;
176
+ advantageStd: number;
177
+ };
178
+ /**
179
+ * Reset actor-critic specific state
180
+ */
181
+ reset(): void;
182
+ /**
183
+ * Export complete actor-critic state
184
+ */
185
+ exportActorCritic(): {
186
+ base: ReturnType<AbstractRLLearner['export']>;
187
+ valueTable: Record<string, StateValueEntry>;
188
+ policyTable: Record<string, Record<string, PolicyEntry>>;
189
+ actorConfig: ActorCriticConfig;
190
+ };
191
+ /**
192
+ * Import complete actor-critic state
193
+ */
194
+ importActorCritic(state: ReturnType<typeof this.exportActorCritic>): void;
195
+ }
196
+ /**
197
+ * Create default Actor-Critic configuration
198
+ */
199
+ export declare function createDefaultActorCriticConfig(): ActorCriticConfig;
200
+ export {};
201
+ //# sourceMappingURL=ActorCriticLearner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ActorCriticLearner.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/ActorCriticLearner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAU,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAElE;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,QAAQ;IACjD,gEAAgE;IAChE,iBAAiB,EAAE,MAAM,CAAC;IAC1B,iCAAiC;IACjC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,oDAAoD;IACpD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,+CAA+C;IAC/C,WAAW,EAAE,MAAM,CAAC;IACpB,6CAA6C;IAC7C,kBAAkB,EAAE,OAAO,CAAC;IAC5B,sDAAsD;IACtD,qBAAqB,EAAE,MAAM,CAAC;CAC/B;AAED;;GAEG;AACH,UAAU,WAAW;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,UAAU,eAAe;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,kBAAmB,SAAQ,iBAAiB;IACvD,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,WAAW,CAAwC;IAC3D,OAAO,CAAC,UAAU,CAA+B;IACjD,OAAO,CAAC,gBAAgB,CAA+B;IACvD,OAAO,CAAC,sBAAsB,CAAS;IACvC,OAAO,CAAC,gBAAgB,CAAW;IACnC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAS;gBAEhC,MAAM,EAAE,iBAAiB;IAkBrC;;;OAGG;IACM,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IAerF;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAmBxB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAmB9B;;OAEG;IACH,OAAO,CAAC,aAAa;IAUrB;;;;;;OAMG;IACM,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IA8C3E;;;OAGG;IACH,OAAO,CAAC,YAAY;IAYpB;;;;OAIG;IACH,OAAO,CAAC,WAAW;IA2BnB;;;OAGG;IACH,OAAO,CAAC,qBAAqB;IAuB7B;;OAEG;IACH,OAAO,CAAC,WAAW;IAwBnB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAmB1B;;OAEG;IACM,aAAa,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAMhD;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAM3B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAQzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAgBzB;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,wBAAwB,IAAI;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;QACxB,aAAa,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;QACnB,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;KACtB;IA0CD;;OAEG;IACM,KAAK,IAAI,IAAI;IAUtB;;OAEG;IACH,iBAAiB,IAAI;QACnB,IAAI,EAAE,UAAU,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC9C,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;QAC5C,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC;QACzD,WAAW,EAAE,iBAAiB,CAAC;KAChC;IAsBD;;OAEG;IACH,iBAAiB,CAAC,KAAK,EAAE,UAAU,CAAC,OAAO,IAAI,CAAC,iBAAiB,CAAC,GAAG,IAAI;CAyB1E;AAED;;GAEG;AACH,wBAAgB,8BAA8B,IAAI,iBAAiB,CAiBlE"}