agentic-qe 2.1.2 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/.claude/skills/agentic-quality-engineering/SKILL.md +4 -4
  2. package/.claude/skills/cicd-pipeline-qe-orchestrator/README.md +14 -11
  3. package/.claude/skills/skills-manifest.json +2 -2
  4. package/CHANGELOG.md +138 -0
  5. package/README.md +92 -214
  6. package/dist/agents/BaseAgent.d.ts +5 -1
  7. package/dist/agents/BaseAgent.d.ts.map +1 -1
  8. package/dist/agents/BaseAgent.js +32 -17
  9. package/dist/agents/BaseAgent.js.map +1 -1
  10. package/dist/agents/index.d.ts.map +1 -1
  11. package/dist/agents/index.js +5 -1
  12. package/dist/agents/index.js.map +1 -1
  13. package/dist/cli/commands/improve/index.d.ts +8 -1
  14. package/dist/cli/commands/improve/index.d.ts.map +1 -1
  15. package/dist/cli/commands/improve/index.js +18 -16
  16. package/dist/cli/commands/improve/index.js.map +1 -1
  17. package/dist/cli/commands/learn/index.d.ts +10 -2
  18. package/dist/cli/commands/learn/index.d.ts.map +1 -1
  19. package/dist/cli/commands/learn/index.js +99 -63
  20. package/dist/cli/commands/learn/index.js.map +1 -1
  21. package/dist/cli/commands/patterns/index.d.ts +8 -1
  22. package/dist/cli/commands/patterns/index.d.ts.map +1 -1
  23. package/dist/cli/commands/patterns/index.js +79 -45
  24. package/dist/cli/commands/patterns/index.js.map +1 -1
  25. package/dist/cli/commands/routing/index.d.ts +5 -0
  26. package/dist/cli/commands/routing/index.d.ts.map +1 -1
  27. package/dist/cli/commands/routing/index.js +11 -10
  28. package/dist/cli/commands/routing/index.js.map +1 -1
  29. package/dist/cli/init/agents.d.ts +1 -1
  30. package/dist/cli/init/agents.js +2 -2
  31. package/dist/cli/init/database-init.d.ts +7 -0
  32. package/dist/cli/init/database-init.d.ts.map +1 -1
  33. package/dist/cli/init/database-init.js +29 -48
  34. package/dist/cli/init/database-init.js.map +1 -1
  35. package/dist/core/di/AgentDependencies.d.ts +127 -0
  36. package/dist/core/di/AgentDependencies.d.ts.map +1 -0
  37. package/dist/core/di/AgentDependencies.js +251 -0
  38. package/dist/core/di/AgentDependencies.js.map +1 -0
  39. package/dist/core/di/DIContainer.d.ts +149 -0
  40. package/dist/core/di/DIContainer.d.ts.map +1 -0
  41. package/dist/core/di/DIContainer.js +333 -0
  42. package/dist/core/di/DIContainer.js.map +1 -0
  43. package/dist/core/di/index.d.ts +11 -0
  44. package/dist/core/di/index.d.ts.map +1 -0
  45. package/dist/core/di/index.js +22 -0
  46. package/dist/core/di/index.js.map +1 -0
  47. package/dist/core/index.d.ts +1 -0
  48. package/dist/core/index.d.ts.map +1 -1
  49. package/dist/core/index.js +11 -1
  50. package/dist/core/index.js.map +1 -1
  51. package/dist/core/memory/HNSWVectorMemory.d.ts +261 -0
  52. package/dist/core/memory/HNSWVectorMemory.d.ts.map +1 -0
  53. package/dist/core/memory/HNSWVectorMemory.js +647 -0
  54. package/dist/core/memory/HNSWVectorMemory.js.map +1 -0
  55. package/dist/core/memory/SwarmMemoryManager.d.ts +7 -0
  56. package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
  57. package/dist/core/memory/SwarmMemoryManager.js +9 -0
  58. package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
  59. package/dist/core/memory/index.d.ts +2 -0
  60. package/dist/core/memory/index.d.ts.map +1 -1
  61. package/dist/core/memory/index.js +11 -1
  62. package/dist/core/memory/index.js.map +1 -1
  63. package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
  64. package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
  65. package/dist/learning/ExperienceSharingProtocol.js +538 -0
  66. package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
  67. package/dist/learning/ExplainableLearning.d.ts +191 -0
  68. package/dist/learning/ExplainableLearning.d.ts.map +1 -0
  69. package/dist/learning/ExplainableLearning.js +441 -0
  70. package/dist/learning/ExplainableLearning.js.map +1 -0
  71. package/dist/learning/GossipPatternSharingProtocol.d.ts +228 -0
  72. package/dist/learning/GossipPatternSharingProtocol.d.ts.map +1 -0
  73. package/dist/learning/GossipPatternSharingProtocol.js +590 -0
  74. package/dist/learning/GossipPatternSharingProtocol.js.map +1 -0
  75. package/dist/learning/LearningEngine.d.ts +104 -4
  76. package/dist/learning/LearningEngine.d.ts.map +1 -1
  77. package/dist/learning/LearningEngine.js +350 -16
  78. package/dist/learning/LearningEngine.js.map +1 -1
  79. package/dist/learning/PerformanceOptimizer.d.ts +268 -0
  80. package/dist/learning/PerformanceOptimizer.d.ts.map +1 -0
  81. package/dist/learning/PerformanceOptimizer.js +552 -0
  82. package/dist/learning/PerformanceOptimizer.js.map +1 -0
  83. package/dist/learning/PrivacyManager.d.ts +197 -0
  84. package/dist/learning/PrivacyManager.d.ts.map +1 -0
  85. package/dist/learning/PrivacyManager.js +551 -0
  86. package/dist/learning/PrivacyManager.js.map +1 -0
  87. package/dist/learning/QLearning.d.ts +38 -125
  88. package/dist/learning/QLearning.d.ts.map +1 -1
  89. package/dist/learning/QLearning.js +46 -267
  90. package/dist/learning/QLearning.js.map +1 -1
  91. package/dist/learning/QLearningLegacy.d.ts +154 -0
  92. package/dist/learning/QLearningLegacy.d.ts.map +1 -0
  93. package/dist/learning/QLearningLegacy.js +337 -0
  94. package/dist/learning/QLearningLegacy.js.map +1 -0
  95. package/dist/learning/TransferLearningManager.d.ts +212 -0
  96. package/dist/learning/TransferLearningManager.d.ts.map +1 -0
  97. package/dist/learning/TransferLearningManager.js +497 -0
  98. package/dist/learning/TransferLearningManager.js.map +1 -0
  99. package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
  100. package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
  101. package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
  102. package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
  103. package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
  104. package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
  105. package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
  106. package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
  107. package/dist/learning/algorithms/MAMLMetaLearner.d.ts +218 -0
  108. package/dist/learning/algorithms/MAMLMetaLearner.d.ts.map +1 -0
  109. package/dist/learning/algorithms/MAMLMetaLearner.js +532 -0
  110. package/dist/learning/algorithms/MAMLMetaLearner.js.map +1 -0
  111. package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
  112. package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
  113. package/dist/learning/algorithms/PPOLearner.js +490 -0
  114. package/dist/learning/algorithms/PPOLearner.js.map +1 -0
  115. package/dist/learning/algorithms/QLearning.d.ts +68 -0
  116. package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
  117. package/dist/learning/algorithms/QLearning.js +116 -0
  118. package/dist/learning/algorithms/QLearning.js.map +1 -0
  119. package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
  120. package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
  121. package/dist/learning/algorithms/SARSALearner.js +252 -0
  122. package/dist/learning/algorithms/SARSALearner.js.map +1 -0
  123. package/dist/learning/algorithms/index.d.ts +32 -0
  124. package/dist/learning/algorithms/index.d.ts.map +1 -0
  125. package/dist/learning/algorithms/index.js +50 -0
  126. package/dist/learning/algorithms/index.js.map +1 -0
  127. package/dist/learning/index.d.ts +11 -0
  128. package/dist/learning/index.d.ts.map +1 -1
  129. package/dist/learning/index.js +31 -1
  130. package/dist/learning/index.js.map +1 -1
  131. package/dist/learning/types.d.ts +2 -0
  132. package/dist/learning/types.d.ts.map +1 -1
  133. package/dist/mcp/server-instructions.d.ts +1 -1
  134. package/dist/mcp/server-instructions.js +1 -1
  135. package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
  136. package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
  137. package/dist/memory/DistributedPatternLibrary.js +370 -0
  138. package/dist/memory/DistributedPatternLibrary.js.map +1 -0
  139. package/dist/memory/PatternQualityScorer.d.ts +169 -0
  140. package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
  141. package/dist/memory/PatternQualityScorer.js +327 -0
  142. package/dist/memory/PatternQualityScorer.js.map +1 -0
  143. package/dist/memory/PatternReplicationService.d.ts +187 -0
  144. package/dist/memory/PatternReplicationService.d.ts.map +1 -0
  145. package/dist/memory/PatternReplicationService.js +392 -0
  146. package/dist/memory/PatternReplicationService.js.map +1 -0
  147. package/dist/providers/ClaudeProvider.d.ts +98 -0
  148. package/dist/providers/ClaudeProvider.d.ts.map +1 -0
  149. package/dist/providers/ClaudeProvider.js +418 -0
  150. package/dist/providers/ClaudeProvider.js.map +1 -0
  151. package/dist/providers/HybridRouter.d.ts +217 -0
  152. package/dist/providers/HybridRouter.d.ts.map +1 -0
  153. package/dist/providers/HybridRouter.js +679 -0
  154. package/dist/providers/HybridRouter.js.map +1 -0
  155. package/dist/providers/ILLMProvider.d.ts +287 -0
  156. package/dist/providers/ILLMProvider.d.ts.map +1 -0
  157. package/dist/providers/ILLMProvider.js +33 -0
  158. package/dist/providers/ILLMProvider.js.map +1 -0
  159. package/dist/providers/LLMProviderFactory.d.ts +154 -0
  160. package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
  161. package/dist/providers/LLMProviderFactory.js +426 -0
  162. package/dist/providers/LLMProviderFactory.js.map +1 -0
  163. package/dist/providers/RuvllmProvider.d.ts +107 -0
  164. package/dist/providers/RuvllmProvider.d.ts.map +1 -0
  165. package/dist/providers/RuvllmProvider.js +417 -0
  166. package/dist/providers/RuvllmProvider.js.map +1 -0
  167. package/dist/providers/index.d.ts +32 -0
  168. package/dist/providers/index.d.ts.map +1 -0
  169. package/dist/providers/index.js +75 -0
  170. package/dist/providers/index.js.map +1 -0
  171. package/dist/telemetry/LearningTelemetry.d.ts +190 -0
  172. package/dist/telemetry/LearningTelemetry.d.ts.map +1 -0
  173. package/dist/telemetry/LearningTelemetry.js +403 -0
  174. package/dist/telemetry/LearningTelemetry.js.map +1 -0
  175. package/dist/telemetry/index.d.ts +1 -0
  176. package/dist/telemetry/index.d.ts.map +1 -1
  177. package/dist/telemetry/index.js +20 -2
  178. package/dist/telemetry/index.js.map +1 -1
  179. package/dist/telemetry/instrumentation/agent.d.ts +1 -1
  180. package/dist/telemetry/instrumentation/agent.js +1 -1
  181. package/dist/telemetry/instrumentation/index.d.ts +1 -1
  182. package/dist/telemetry/instrumentation/index.js +1 -1
  183. package/dist/utils/math.d.ts +11 -0
  184. package/dist/utils/math.d.ts.map +1 -0
  185. package/dist/utils/math.js +16 -0
  186. package/dist/utils/math.js.map +1 -0
  187. package/docs/reference/agents.md +1 -1
  188. package/docs/reference/skills.md +3 -3
  189. package/docs/reference/usage.md +4 -4
  190. package/package.json +1 -1
@@ -0,0 +1,447 @@
1
+ "use strict";
2
+ /**
3
+ * ActorCriticLearner - Actor-Critic Reinforcement Learning Algorithm
4
+ *
5
+ * Implements Advantage Actor-Critic (A2C) combining:
6
+ * - Actor: Policy network that selects actions using softmax policy
7
+ * - Critic: Value network that estimates state values for advantage calculation
8
+ *
9
+ * Key features:
10
+ * - Continuous action probabilities via softmax
11
+ * - Advantage-based updates to reduce variance
12
+ * - Entropy bonus for exploration
13
+ * - Policy gradient with baseline
14
+ *
15
+ * Update rules:
16
+ * - Critic (Value): V(s) += α_c * δ where δ = r + γV(s') - V(s)
17
+ * - Actor (Policy): π(a|s) += α_a * δ * ∇log(π(a|s)) + β * H(π)
18
+ *
19
+ * @module learning/algorithms/ActorCriticLearner
20
+ * @version 1.0.0
21
+ */
22
+ Object.defineProperty(exports, "__esModule", { value: true });
23
+ exports.ActorCriticLearner = void 0;
24
+ exports.createDefaultActorCriticConfig = createDefaultActorCriticConfig;
25
+ const AbstractRLLearner_1 = require("./AbstractRLLearner");
26
+ /**
27
+ * ActorCriticLearner - Advantage Actor-Critic implementation
28
+ *
29
+ * Combines policy gradient (actor) with value function approximation (critic)
30
+ * for more stable and efficient learning than pure Q-learning.
31
+ *
32
+ * Usage:
33
+ * ```typescript
34
+ * const ac = new ActorCriticLearner({
35
+ * learningRate: 0.1,
36
+ * actorLearningRate: 0.01,
37
+ * criticLearningRate: 0.1,
38
+ * discountFactor: 0.95,
39
+ * explorationRate: 0.3,
40
+ * explorationDecay: 0.995,
41
+ * minExplorationRate: 0.01,
42
+ * entropyCoefficient: 0.01,
43
+ * temperature: 1.0,
44
+ * normalizeAdvantage: true,
45
+ * targetUpdateFrequency: 100,
46
+ * useExperienceReplay: true,
47
+ * replayBufferSize: 10000,
48
+ * batchSize: 32
49
+ * });
50
+ *
51
+ * const action = ac.selectAction(state, availableActions);
52
+ * ac.update(experience);
53
+ * ```
54
+ */
55
+ class ActorCriticLearner extends AbstractRLLearner_1.AbstractRLLearner {
56
+ constructor(config) {
57
+ super(config);
58
+ this.actorConfig = config;
59
+ this.policyTable = new Map();
60
+ this.valueTable = new Map();
61
+ this.targetValueTable = new Map();
62
+ this.updatesSinceTargetSync = 0;
63
+ this.advantageHistory = [];
64
+ this.defaultExploration = config.explorationRate;
65
+ this.logger.info('ActorCriticLearner initialized', {
66
+ actorLR: config.actorLearningRate,
67
+ criticLR: config.criticLearningRate,
68
+ entropy: config.entropyCoefficient,
69
+ temperature: config.temperature
70
+ });
71
+ }
72
+ /**
73
+ * Select action using softmax policy with exploration
74
+ * π(a|s) = exp(Q(s,a)/τ) / Σ_a' exp(Q(s,a')/τ)
75
+ */
76
+ selectAction(state, availableActions) {
77
+ if (availableActions.length === 0) {
78
+ throw new Error('No available actions to select from');
79
+ }
80
+ // With probability ε, use random action (exploration fallback)
81
+ if (Math.random() < this.config.explorationRate) {
82
+ const randomIndex = Math.floor(Math.random() * availableActions.length);
83
+ return availableActions[randomIndex];
84
+ }
85
+ // Use softmax policy
86
+ return this.sampleFromPolicy(state, availableActions);
87
+ }
88
+ /**
89
+ * Sample action from softmax policy distribution
90
+ */
91
+ sampleFromPolicy(state, availableActions) {
92
+ const stateKey = this.encodeState(state);
93
+ const probabilities = this.getActionProbabilities(stateKey, availableActions);
94
+ // Sample from categorical distribution
95
+ const random = Math.random();
96
+ let cumulative = 0;
97
+ for (let i = 0; i < availableActions.length; i++) {
98
+ cumulative += probabilities[i];
99
+ if (random <= cumulative) {
100
+ return availableActions[i];
101
+ }
102
+ }
103
+ // Fallback (shouldn't reach here due to normalization)
104
+ return availableActions[availableActions.length - 1];
105
+ }
106
+ /**
107
+ * Get softmax action probabilities
108
+ * π(a|s) = exp(preference(s,a)/τ) / Σ_a' exp(preference(s,a')/τ)
109
+ */
110
+ getActionProbabilities(stateKey, availableActions) {
111
+ const temperature = this.actorConfig.temperature;
112
+ const preferences = [];
113
+ // Get preferences (Q-values or policy table values)
114
+ for (const action of availableActions) {
115
+ const actionKey = this.encodeAction(action);
116
+ const preference = this.getPreference(stateKey, actionKey);
117
+ preferences.push(preference / temperature);
118
+ }
119
+ // Softmax with numerical stability
120
+ const maxPref = Math.max(...preferences);
121
+ const expPrefs = preferences.map(p => Math.exp(p - maxPref));
122
+ const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
123
+ return expPrefs.map(e => e / sumExp);
124
+ }
125
+ /**
126
+ * Get preference for state-action pair from policy table
127
+ */
128
+ getPreference(stateKey, actionKey) {
129
+ const statePolicy = this.policyTable.get(stateKey);
130
+ if (!statePolicy) {
131
+ return 0; // uniform preference initially
132
+ }
133
+ const entry = statePolicy.get(actionKey);
134
+ return entry ? entry.probability : 0;
135
+ }
136
+ /**
137
+ * Update actor and critic using temporal difference
138
+ *
139
+ * TD Error (advantage): δ = r + γV(s') - V(s)
140
+ * Critic update: V(s) += α_c * δ
141
+ * Actor update: preference(s,a) += α_a * δ * (1 - π(a|s))
142
+ */
143
+ update(experience, nextAction) {
144
+ this.stepCount++;
145
+ const { state, action, reward, nextState, done } = this.extractExperience(experience);
146
+ const stateKey = this.encodeState(state);
147
+ const actionKey = this.encodeAction(action);
148
+ // Get current and next state values from critic
149
+ const currentV = this.getStateValue(state);
150
+ const nextV = done ? 0 : this.getTargetStateValue(nextState);
151
+ // Calculate TD error (advantage)
152
+ let advantage = reward + this.config.discountFactor * nextV - currentV;
153
+ // Normalize advantage if enabled
154
+ if (this.actorConfig.normalizeAdvantage) {
155
+ advantage = this.normalizeAdvantage(advantage);
156
+ }
157
+ // Update critic (value function)
158
+ this.updateCritic(stateKey, currentV, advantage);
159
+ // Update actor (policy)
160
+ this.updateActor(stateKey, actionKey, advantage);
161
+ // Store in replay buffer if enabled
162
+ if (this.replayBuffer) {
163
+ this.replayBuffer.add(experience);
164
+ }
165
+ // Sync target network periodically
166
+ this.updatesSinceTargetSync++;
167
+ if (this.updatesSinceTargetSync >= this.actorConfig.targetUpdateFrequency) {
168
+ this.syncTargetNetwork();
169
+ this.updatesSinceTargetSync = 0;
170
+ }
171
+ this.logger.debug('Actor-Critic update', {
172
+ state: stateKey,
173
+ action: actionKey,
174
+ reward,
175
+ advantage,
176
+ valueUpdate: currentV + this.actorConfig.criticLearningRate * advantage
177
+ });
178
+ }
179
+ /**
180
+ * Update critic (value function)
181
+ * V(s) += α_c * δ
182
+ */
183
+ updateCritic(stateKey, currentV, advantage) {
184
+ const newValue = currentV + this.actorConfig.criticLearningRate * advantage;
185
+ const existingEntry = this.valueTable.get(stateKey);
186
+ this.valueTable.set(stateKey, {
187
+ state: stateKey,
188
+ value: newValue,
189
+ updateCount: (existingEntry?.updateCount ?? 0) + 1,
190
+ lastUpdated: Date.now()
191
+ });
192
+ }
193
+ /**
194
+ * Update actor (policy)
195
+ * For softmax policy: preference(s,a) += α_a * δ * (1 - π(a|s))
196
+ * This increases preference for actions with positive advantage
197
+ */
198
+ updateActor(stateKey, actionKey, advantage) {
199
+ if (!this.policyTable.has(stateKey)) {
200
+ this.policyTable.set(stateKey, new Map());
201
+ }
202
+ const statePolicy = this.policyTable.get(stateKey);
203
+ // Get current preference and probability
204
+ const currentEntry = statePolicy.get(actionKey);
205
+ const currentPref = currentEntry?.probability ?? 0;
206
+ // Approximate gradient: increase preference proportional to advantage
207
+ // Also add entropy bonus to encourage exploration
208
+ const entropyBonus = this.calculateEntropyBonus(stateKey);
209
+ const newPref = currentPref + this.actorConfig.actorLearningRate * (advantage + entropyBonus);
210
+ statePolicy.set(actionKey, {
211
+ action: actionKey,
212
+ probability: newPref,
213
+ logProbability: Math.log(Math.max(0.001, this.softmaxProb(stateKey, actionKey))),
214
+ updateCount: (currentEntry?.updateCount ?? 0) + 1,
215
+ lastUpdated: Date.now()
216
+ });
217
+ // Also update Q-table for getBestAction compatibility
218
+ this.setQValue(stateKey, actionKey, newPref);
219
+ }
220
+ /**
221
+ * Calculate entropy bonus for a state
222
+ * H(π(·|s)) = -Σ_a π(a|s) log(π(a|s))
223
+ */
224
+ calculateEntropyBonus(stateKey) {
225
+ const statePolicy = this.policyTable.get(stateKey);
226
+ if (!statePolicy || statePolicy.size === 0) {
227
+ return 0;
228
+ }
229
+ // Calculate entropy over stored actions
230
+ const prefs = Array.from(statePolicy.values()).map(e => e.probability);
231
+ const maxPref = Math.max(...prefs);
232
+ const expPrefs = prefs.map(p => Math.exp((p - maxPref) / this.actorConfig.temperature));
233
+ const sumExp = expPrefs.reduce((sum, e) => sum + e, 0);
234
+ const probs = expPrefs.map(e => e / sumExp);
235
+ let entropy = 0;
236
+ for (const p of probs) {
237
+ if (p > 0) {
238
+ entropy -= p * Math.log(p);
239
+ }
240
+ }
241
+ return this.actorConfig.entropyCoefficient * entropy;
242
+ }
243
+ /**
244
+ * Get softmax probability for a specific action
245
+ */
246
+ softmaxProb(stateKey, actionKey) {
247
+ const statePolicy = this.policyTable.get(stateKey);
248
+ if (!statePolicy || statePolicy.size === 0) {
249
+ return 1.0 / Math.max(1, statePolicy?.size ?? 1);
250
+ }
251
+ const prefs = Array.from(statePolicy.entries());
252
+ const temp = this.actorConfig.temperature;
253
+ const maxPref = Math.max(...prefs.map(([, e]) => e.probability));
254
+ let sumExp = 0;
255
+ let targetExp = 0;
256
+ for (const [key, entry] of prefs) {
257
+ const exp = Math.exp((entry.probability - maxPref) / temp);
258
+ sumExp += exp;
259
+ if (key === actionKey) {
260
+ targetExp = exp;
261
+ }
262
+ }
263
+ return targetExp / sumExp;
264
+ }
265
+ /**
266
+ * Normalize advantage using running statistics
267
+ */
268
+ normalizeAdvantage(advantage) {
269
+ this.advantageHistory.push(advantage);
270
+ // Keep limited history
271
+ if (this.advantageHistory.length > 1000) {
272
+ this.advantageHistory.shift();
273
+ }
274
+ if (this.advantageHistory.length < 10) {
275
+ return advantage;
276
+ }
277
+ const mean = this.advantageHistory.reduce((s, a) => s + a, 0) / this.advantageHistory.length;
278
+ const variance = this.advantageHistory.reduce((s, a) => s + (a - mean) ** 2, 0) / this.advantageHistory.length;
279
+ const std = Math.sqrt(variance) + 1e-8;
280
+ return (advantage - mean) / std;
281
+ }
282
+ /**
283
+ * Get state value from value table
284
+ */
285
+ getStateValue(state) {
286
+ const stateKey = this.encodeState(state);
287
+ const entry = this.valueTable.get(stateKey);
288
+ return entry?.value ?? 0;
289
+ }
290
+ /**
291
+ * Get state value from target network (for stability)
292
+ */
293
+ getTargetStateValue(state) {
294
+ const stateKey = this.encodeState(state);
295
+ const entry = this.targetValueTable.get(stateKey);
296
+ return entry?.value ?? this.getStateValue(state);
297
+ }
298
+ /**
299
+ * Sync target network with main network
300
+ */
301
+ syncTargetNetwork() {
302
+ this.targetValueTable.clear();
303
+ for (const [key, value] of this.valueTable.entries()) {
304
+ this.targetValueTable.set(key, { ...value });
305
+ }
306
+ this.logger.debug('Target network synchronized');
307
+ }
308
+ /**
309
+ * Extract experience components
310
+ */
311
+ extractExperience(experience) {
312
+ return {
313
+ state: experience.state,
314
+ action: experience.action,
315
+ reward: experience.reward,
316
+ nextState: experience.nextState,
317
+ done: experience.done ?? false
318
+ };
319
+ }
320
+ /**
321
+ * Get default exploration rate for reset
322
+ */
323
+ getDefaultExplorationRate() {
324
+ return this.defaultExploration;
325
+ }
326
+ /**
327
+ * Get actor-critic specific statistics
328
+ */
329
+ getActorCriticStatistics() {
330
+ // Calculate average state value
331
+ let totalValue = 0;
332
+ for (const entry of this.valueTable.values()) {
333
+ totalValue += entry.value;
334
+ }
335
+ const avgStateValue = this.valueTable.size > 0 ? totalValue / this.valueTable.size : 0;
336
+ // Calculate policy table size
337
+ let policySize = 0;
338
+ for (const statePolicy of this.policyTable.values()) {
339
+ policySize += statePolicy.size;
340
+ }
341
+ // Calculate average entropy
342
+ let totalEntropy = 0;
343
+ let entropyCount = 0;
344
+ for (const stateKey of this.policyTable.keys()) {
345
+ const entropy = this.calculateEntropyBonus(stateKey) / this.actorConfig.entropyCoefficient;
346
+ totalEntropy += entropy;
347
+ entropyCount++;
348
+ }
349
+ const avgEntropy = entropyCount > 0 ? totalEntropy / entropyCount : 0;
350
+ // Calculate advantage statistics
351
+ const advMean = this.advantageHistory.length > 0
352
+ ? this.advantageHistory.reduce((s, a) => s + a, 0) / this.advantageHistory.length
353
+ : 0;
354
+ const advVariance = this.advantageHistory.length > 0
355
+ ? this.advantageHistory.reduce((s, a) => s + (a - advMean) ** 2, 0) / this.advantageHistory.length
356
+ : 0;
357
+ return {
358
+ valueTableSize: this.valueTable.size,
359
+ policyTableSize: policySize,
360
+ avgStateValue,
361
+ avgEntropy,
362
+ advantageMean: advMean,
363
+ advantageStd: Math.sqrt(advVariance)
364
+ };
365
+ }
366
+ /**
367
+ * Reset actor-critic specific state
368
+ */
369
+ reset() {
370
+ super.reset();
371
+ this.policyTable.clear();
372
+ this.valueTable.clear();
373
+ this.targetValueTable.clear();
374
+ this.advantageHistory = [];
375
+ this.updatesSinceTargetSync = 0;
376
+ this.logger.info('ActorCriticLearner reset');
377
+ }
378
+ /**
379
+ * Export complete actor-critic state
380
+ */
381
+ exportActorCritic() {
382
+ const serializedPolicy = {};
383
+ for (const [state, actions] of this.policyTable.entries()) {
384
+ serializedPolicy[state] = {};
385
+ for (const [action, entry] of actions.entries()) {
386
+ serializedPolicy[state][action] = entry;
387
+ }
388
+ }
389
+ const serializedValue = {};
390
+ for (const [state, entry] of this.valueTable.entries()) {
391
+ serializedValue[state] = entry;
392
+ }
393
+ return {
394
+ base: this.export(),
395
+ valueTable: serializedValue,
396
+ policyTable: serializedPolicy,
397
+ actorConfig: { ...this.actorConfig }
398
+ };
399
+ }
400
+ /**
401
+ * Import complete actor-critic state
402
+ */
403
+ importActorCritic(state) {
404
+ this.import(state.base);
405
+ this.valueTable.clear();
406
+ for (const [stateKey, entry] of Object.entries(state.valueTable)) {
407
+ this.valueTable.set(stateKey, entry);
408
+ }
409
+ this.policyTable.clear();
410
+ for (const [stateKey, actions] of Object.entries(state.policyTable)) {
411
+ const actionMap = new Map();
412
+ for (const [actionKey, entry] of Object.entries(actions)) {
413
+ actionMap.set(actionKey, entry);
414
+ }
415
+ this.policyTable.set(stateKey, actionMap);
416
+ }
417
+ this.actorConfig = { ...state.actorConfig };
418
+ this.syncTargetNetwork();
419
+ this.logger.info('Imported Actor-Critic state', {
420
+ valueTableSize: this.valueTable.size,
421
+ policyTableSize: this.policyTable.size
422
+ });
423
+ }
424
+ }
425
+ exports.ActorCriticLearner = ActorCriticLearner;
426
+ /**
427
+ * Create default Actor-Critic configuration
428
+ */
429
+ function createDefaultActorCriticConfig() {
430
+ return {
431
+ learningRate: 0.1,
432
+ actorLearningRate: 0.01,
433
+ criticLearningRate: 0.1,
434
+ discountFactor: 0.95,
435
+ explorationRate: 0.3,
436
+ explorationDecay: 0.995,
437
+ minExplorationRate: 0.01,
438
+ entropyCoefficient: 0.01,
439
+ temperature: 1.0,
440
+ normalizeAdvantage: true,
441
+ targetUpdateFrequency: 100,
442
+ useExperienceReplay: true,
443
+ replayBufferSize: 10000,
444
+ batchSize: 32
445
+ };
446
+ }
447
+ //# sourceMappingURL=ActorCriticLearner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ActorCriticLearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/ActorCriticLearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AAyhBH,wEAiBC;AAxiBD,2DAA0E;AA0C1E;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAa,kBAAmB,SAAQ,qCAAiB;IASvD,YAAY,MAAyB;QACnC,KAAK,CAAC,MAAM,CAAC,CAAC;QACd,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC;QAC1B,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAAE,CAAC;QAC5B,IAAI,CAAC,gBAAgB,GAAG,IAAI,GAAG,EAAE,CAAC;QAClC,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,eAAe,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,gCAAgC,EAAE;YACjD,OAAO,EAAE,MAAM,CAAC,iBAAiB;YACjC,QAAQ,EAAE,MAAM,CAAC,kBAAkB;YACnC,OAAO,EAAE,MAAM,CAAC,kBAAkB;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW;SAChC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACM,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QACrE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,+DAA+D;QAC/D,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,qBAAqB;QACrB,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IACxD,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,KAAgB,EAAE,gBAA+B;QACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,aAAa,GAAG,IAAI,CAAC,sBAAsB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QAE9E,uCAAuC;QACvC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,UAAU,IAAI,aAAa,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;gBACzB,OAAO,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;;OAGG;IACK,sBAAsB,CAAC,QAAgB,EAAE,gBAA+B;QAC9E,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;QACjD,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,oDAAoD;QACpD,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC3D,WAAW,CAAC,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,CAAC;QAC7C,CAAC;QAED,mCAAmC;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAEvD,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAgB,EAAE,SAAiB;QACvD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,CAAC,CAAC,+BAA+B;QAC3C,CAAC;QAED,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACzC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED;;;;;;OAMG;IACM,MAAM,CAAC,UAA0B,EAAE,UAAwB;QAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QACtF,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE5C,gDAAgD;QAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QAE7D,iCAAiC;QACjC,IAAI,SAAS,GAAG,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,KAAK,GAAG,QAAQ,CAAC;QAEvE,iCAAiC;QACjC,IAAI,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;YACxC,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACjD,CAAC;QAED,iCAAiC;QACjC,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;QAEjD,wBAAwB;QACxB,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAEjD,oCAAoC;QACpC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,mCAAmC;QACnC,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,sBAAsB,IAAI,IAAI,CAAC,WAAW,CAAC,qBAAqB,EAAE,CAAC;YAC1E,IAAI,CAAC,iBAAiB,EAAE,CAAC;YACzB,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE;YACvC,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,MAAM;YACN,SAAS;YACT,WAAW,EAAE,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,SAAS;SACxE,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,YAAY,CAAC,QAAgB,EAAE,QAAgB,EAAE,SAAiB;QACxE,MAAM,QAAQ,GAAG,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,SAAS,CAAC;QAE5E,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,QAAQ;YACf,KAAK,EAAE,QAAQ;YACf,WAAW,EAAE,CAAC,aAAa,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;IACL,CAAC;IAED;;;;OAIG;IACK,WAAW,CAAC,QAAgB,EAAE,SAAiB,EAAE,SAAiB;QACxE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEpD,yCAAyC;QACzC,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAChD,MAAM,WAAW,GAAG,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC;QAEnD,sEAAsE;QACtE,kDAAkD;QAClD,MAAM,YAAY,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAG,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,iBAAiB,GAAG,CAAC,SAAS,GAAG,YAAY,CAAC,CAAC;QAE9F,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;YACzB,MAAM,EAAE,SAAS;YACjB,WAAW,EAAE,OAAO;YACpB,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC;YAChF,WAAW,EAAE,CAAC,YAAY,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YACjD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;QAEH,sDAAsD;QACtD,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACK,qBAAqB,CAAC,QAAgB;QAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,wCAAwC;QACxC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC;QACxF,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC,WAAW,CAAC,kBAAkB,GAAG,OAAO,CAAC;IACvD,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,QAAgB,EAAE,SAAiB;QACrD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;QAE1C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;QACjE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,KAAK,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,WAAW,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;YAC3D,MAAM,IAAI,GAAG,CAAC;YACd,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,GAAG,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,GAAG,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,SAAiB;QAC1C,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEtC,uBAAuB;QACvB,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAChC,CAAC;QAED,IAAI,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACtC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC;QAC7F,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC;QAC/G,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;QAEvC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC;IAClC,CAAC;IAED;;OAEG;IACM,aAAa,CAAC,KAAgB;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,KAAgB;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAClD,OAAO,KAAK,EAAE,KAAK,IAAI,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,iBAAiB;QACvB,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACrD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,GAAG,KAAK,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,UAA0B;QAOlD,OAAO;YACL,KAAK,EAAE,UAAU,CAAC,KAAK;YACvB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,SAAS,EAAE,UAAU,CAAC,SAAS;YAC/B,IAAI,EAAE,UAAU,CAAC,IAAI,IAAI,KAAK;SAC/B,CAAC;IACJ,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,wBAAwB;QAQtB,gCAAgC;QAChC,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC;YAC7C,UAAU,IAAI,KAAK,CAAC,KAAK,CAAC;QAC5B,CAAC;QACD,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvF,8BAA8B;QAC9B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,UAAU,IAAI,WAAW,CAAC,IAAI,CAAC;QACjC,CAAC;QAED,4BAA4B;QAC5B,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;YAC3F,YAAY,IAAI,OAAO,CAAC;YACxB,YAAY,EAAE,CAAC;QACjB,CAAC;QACD,MAAM,UAAU,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtE,iCAAiC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;YAC9C,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM;YACjF,CAAC,CAAC,CAAC,CAAC;QACN,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;YAClD,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM;YAClG,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,UAAU;YAC3B,aAAa;YACb,UAAU;YACV,aAAa,EAAE,OAAO;YACtB,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;SACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACM,KAAK;QACZ,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAC;QAC3B,IAAI,CAAC,sBAAsB,GAAG,CAAC,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,iBAAiB;QAMf,MAAM,gBAAgB,GAAgD,EAAE,CAAC;QACzE,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBAChD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,MAAM,eAAe,GAAoC,EAAE,CAAC;QAC5D,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,eAAe,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE;YACnB,UAAU,EAAE,eAAe;YAC3B,WAAW,EAAE,gBAAgB;YAC7B,WAAW,EAAE,EAAE,GAAG,IAAI,CAAC,WAAW,EAAE;SACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,KAAgD;QAChE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAuB,CAAC;YACjD,KAAK,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAClC,CAAC;YACD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,CAAC,WAAW,GAAG,EAAE,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC5C,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6BAA6B,EAAE;YAC9C,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;SACvC,CAAC,CAAC;IACL,CAAC;CACF;AA3cD,gDA2cC;AAED;;GAEG;AACH,SAAgB,8BAA8B;IAC5C,OAAO;QACL,YAAY,EAAE,GAAG;QACjB,iBAAiB,EAAE,IAAI;QACvB,kBAAkB,EAAE,GAAG;QACvB,cAAc,EAAE,IAAI;QACpB,eAAe,EAAE,GAAG;QACpB,gBAAgB,EAAE,KAAK;QACvB,kBAAkB,EAAE,IAAI;QACxB,kBAAkB,EAAE,IAAI;QACxB,WAAW,EAAE,GAAG;QAChB,kBAAkB,EAAE,IAAI;QACxB,qBAAqB,EAAE,GAAG;QAC1B,mBAAmB,EAAE,IAAI;QACzB,gBAAgB,EAAE,KAAK;QACvB,SAAS,EAAE,EAAE;KACd,CAAC;AACJ,CAAC"}
@@ -0,0 +1,218 @@
1
+ /**
2
+ * MAMLMetaLearner - Model-Agnostic Meta-Learning for QE Agents
3
+ *
4
+ * Implements MAML-style meta-learning that enables agents to "learn how to learn".
5
+ * Agents can quickly adapt to new testing domains with just 5-10 examples.
6
+ *
7
+ * Key Concepts:
8
+ * - Inner Loop: Fast adaptation to new task with few gradient steps (5-10 examples)
9
+ * - Outer Loop: Learn initialization parameters that enable fast adaptation
10
+ * - Meta-Learning: After seeing few examples of new test pattern, agent performs well
11
+ *
12
+ * Algorithm:
13
+ * 1. Initialize meta-parameters θ (Q-table initialization)
14
+ * 2. For each task Ti in task distribution:
15
+ * a. Sample K examples from Ti (support set)
16
+ * b. Adapt: θ'i = θ - α∇Loss(θ, support) [inner loop]
17
+ * c. Evaluate on query set from Ti
18
+ * 3. Update meta-parameters: θ = θ - β∇Loss(θ', query) [outer loop]
19
+ * 4. Result: θ is optimized for fast adaptation to new tasks
20
+ *
21
+ * Use Cases:
22
+ * - New testing framework adoption (5-10 examples → proficient)
23
+ * - New project domain (few examples → effective testing strategy)
24
+ * - API testing → UI testing transfer learning
25
+ */
26
+ import { AbstractRLLearner, RLConfig, QValue } from './AbstractRLLearner';
27
+ import { TaskExperience, AgentAction } from '../types';
28
+ /**
29
+ * MAML configuration extends base RL config with meta-learning parameters
30
+ */
31
+ export interface MAMLConfig extends RLConfig {
32
+ innerLearningRate: number;
33
+ innerSteps: number;
34
+ metaLearningRate: number;
35
+ minTaskExamples: number;
36
+ maxTaskExamples: number;
37
+ taskBatchSize: number;
38
+ firstOrderApproximation: boolean;
39
+ baseAlgorithm?: AbstractRLLearner;
40
+ }
41
+ /**
42
+ * Task for meta-learning (contains support and query sets)
43
+ */
44
+ interface MetaTask {
45
+ id: string;
46
+ taskType: string;
47
+ supportSet: TaskExperience[];
48
+ querySet: TaskExperience[];
49
+ metadata?: Record<string, any>;
50
+ }
51
+ /**
52
+ * Meta-learning episode tracking
53
+ */
54
+ interface MetaEpisode {
55
+ episodeId: string;
56
+ tasks: MetaTask[];
57
+ preAdaptationLoss: number;
58
+ postAdaptationLoss: number;
59
+ metaLoss: number;
60
+ improvement: number;
61
+ timestamp: Date;
62
+ }
63
+ /**
64
+ * MAMLMetaLearner - Model-Agnostic Meta-Learning for Fast Adaptation
65
+ *
66
+ * Learns an initialization of Q-values that enables rapid adaptation to new
67
+ * testing tasks with minimal examples (5-10 shots).
68
+ *
69
+ * Example:
70
+ * - Agent trained on Jest, Mocha, Jasmine unit testing
71
+ * - Sees 5-10 examples of Vitest tests
72
+ * - Immediately generates high-quality Vitest tests
73
+ *
74
+ * Meta-Learning Process:
75
+ * 1. Sample batch of tasks (different testing scenarios)
76
+ * 2. For each task:
77
+ * - Adapt Q-table with support set (inner loop)
78
+ * - Evaluate adapted Q-table on query set
79
+ * 3. Compute meta-gradient from all tasks
80
+ * 4. Update meta-parameters (Q-table initialization)
81
+ */
82
+ export declare class MAMLMetaLearner extends AbstractRLLearner {
83
+ private readonly mamlConfig;
84
+ private metaQTable;
85
+ private taskBuffer;
86
+ private metaEpisodes;
87
+ private metaStepCount;
88
+ private baseAlgorithm?;
89
+ constructor(config?: Partial<MAMLConfig>);
90
+ /**
91
+ * Update Q-value using base algorithm (delegates to wrapped algorithm if available)
92
+ * For MAML, this is called during inner loop adaptation
93
+ */
94
+ update(experience: TaskExperience, nextAction?: AgentAction): void;
95
+ /**
96
+ * Q-Learning update rule (default inner loop algorithm)
97
+ */
98
+ private qLearningUpdate;
99
+ /**
100
+ * Buffer experience by task type for meta-learning
101
+ */
102
+ private bufferExperience;
103
+ /**
104
+ * Perform meta-learning update (outer loop)
105
+ * Learns Q-table initialization that enables fast adaptation
106
+ *
107
+ * This should be called periodically after collecting enough task examples
108
+ */
109
+ performMetaUpdate(): Promise<MetaEpisode | null>;
110
+ /**
111
+ * Sample batch of meta-tasks from task buffer
112
+ * Each task contains support set (for adaptation) and query set (for evaluation)
113
+ */
114
+ private sampleMetaTasks;
115
+ /**
116
+ * Adapt Q-table to a specific task using support set (inner loop)
117
+ */
118
+ private adaptToTask;
119
+ /**
120
+ * Evaluate loss (TD error) on a set of experiences
121
+ */
122
+ private evaluateLoss;
123
+ /**
124
+ * Compute first-order meta-gradients (FOMAML)
125
+ * Faster approximation that ignores second-order derivatives
126
+ */
127
+ private computeFirstOrderGradients;
128
+ /**
129
+ * Compute second-order meta-gradients (Full MAML)
130
+ * More accurate but computationally expensive
131
+ */
132
+ private computeSecondOrderGradients;
133
+ /**
134
+ * Update meta-parameters using accumulated gradients
135
+ */
136
+ private updateMetaParameters;
137
+ /**
138
+ * Fast adaptation to new task (few-shot learning)
139
+ * Given 5-10 examples, quickly adapt Q-table for new testing domain
140
+ *
141
+ * @param examples Few examples of new task (5-10)
142
+ * @returns Adapted Q-table
143
+ */
144
+ fastAdapt(examples: TaskExperience[]): Promise<Map<string, Map<string, QValue>>>;
145
+ /**
146
+ * Get Q-value from specific Q-table (helper)
147
+ */
148
+ private getQValueFromTable;
149
+ /**
150
+ * Set Q-value in specific Q-table (helper)
151
+ */
152
+ private setQValueInTable;
153
+ /**
154
+ * Get Q-value (raw, without creating entry)
155
+ */
156
+ private getQValueRaw;
157
+ /**
158
+ * Clone Q-table
159
+ */
160
+ private cloneQTable;
161
+ /**
162
+ * Copy Q-table from source to destination
163
+ */
164
+ private copyQTable;
165
+ /**
166
+ * Get meta-learning statistics
167
+ */
168
+ getMetaStatistics(): {
169
+ metaSteps: number;
170
+ metaEpisodes: number;
171
+ avgPreAdaptLoss: number;
172
+ avgPostAdaptLoss: number;
173
+ avgImprovement: number;
174
+ taskTypes: number;
175
+ bufferedExperiences: number;
176
+ };
177
+ /**
178
+ * Get meta-episodes history
179
+ */
180
+ getMetaEpisodes(): MetaEpisode[];
181
+ /**
182
+ * Clear task buffer
183
+ */
184
+ clearTaskBuffer(): void;
185
+ /**
186
+ * Get default exploration rate
187
+ */
188
+ protected getDefaultExplorationRate(): number;
189
+ /**
190
+ * Get algorithm name
191
+ */
192
+ getAlgorithmName(): string;
193
+ /**
194
+ * Override getStatistics to include meta-learning metrics
195
+ */
196
+ getStatistics(): ReturnType<AbstractRLLearner['getStatistics']> & {
197
+ maml: ReturnType<MAMLMetaLearner['getMetaStatistics']>;
198
+ };
199
+ /**
200
+ * Export meta-learner state
201
+ */
202
+ export(): ReturnType<AbstractRLLearner['export']> & {
203
+ metaQTable: Record<string, Record<string, QValue>>;
204
+ taskBuffer: Record<string, TaskExperience[]>;
205
+ metaEpisodes: MetaEpisode[];
206
+ metaStepCount: number;
207
+ };
208
+ /**
209
+ * Import meta-learner state
210
+ */
211
+ import(state: ReturnType<MAMLMetaLearner['export']>): void;
212
+ }
213
+ /**
214
+ * Create default MAML configuration
215
+ */
216
+ export declare function createDefaultMAMLConfig(): MAMLConfig;
217
+ export {};
218
+ //# sourceMappingURL=MAMLMetaLearner.d.ts.map