agentic-qe 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +252 -0
  2. package/README.md +28 -23
  3. package/dist/cli/commands/routing/index.d.ts +1 -1
  4. package/dist/cli/commands/routing/index.d.ts.map +1 -1
  5. package/dist/cli/commands/routing/index.js +29 -19
  6. package/dist/cli/commands/routing/index.js.map +1 -1
  7. package/dist/learning/ExperienceReplayBuffer.d.ts +143 -0
  8. package/dist/learning/ExperienceReplayBuffer.d.ts.map +1 -0
  9. package/dist/learning/ExperienceReplayBuffer.js +255 -0
  10. package/dist/learning/ExperienceReplayBuffer.js.map +1 -0
  11. package/dist/learning/LearningEngine.d.ts +50 -1
  12. package/dist/learning/LearningEngine.d.ts.map +1 -1
  13. package/dist/learning/LearningEngine.js +140 -0
  14. package/dist/learning/LearningEngine.js.map +1 -1
  15. package/dist/learning/QLearning.d.ts +154 -0
  16. package/dist/learning/QLearning.d.ts.map +1 -0
  17. package/dist/learning/QLearning.js +337 -0
  18. package/dist/learning/QLearning.js.map +1 -0
  19. package/dist/learning/index.d.ts +2 -0
  20. package/dist/learning/index.d.ts.map +1 -1
  21. package/dist/learning/index.js +2 -0
  22. package/dist/learning/index.js.map +1 -1
  23. package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.d.ts +11 -1
  24. package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.d.ts.map +1 -1
  25. package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.js +12 -0
  26. package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.js.map +1 -1
  27. package/dist/mcp/streaming/TestExecuteStreamHandler.d.ts +10 -1
  28. package/dist/mcp/streaming/TestExecuteStreamHandler.d.ts.map +1 -1
  29. package/dist/mcp/streaming/TestExecuteStreamHandler.js +11 -0
  30. package/dist/mcp/streaming/TestExecuteStreamHandler.js.map +1 -1
  31. package/dist/reasoning/QEReasoningBank.d.ts +89 -2
  32. package/dist/reasoning/QEReasoningBank.d.ts.map +1 -1
  33. package/dist/reasoning/QEReasoningBank.js +396 -10
  34. package/dist/reasoning/QEReasoningBank.js.map +1 -1
  35. package/package.json +2 -2
@@ -0,0 +1,143 @@
1
+ /**
2
+ * ExperienceReplayBuffer - Phase 2 (Milestone 2.2)
3
+ *
4
+ * Implements experience replay buffer for reinforcement learning.
5
+ * Stores transitions and enables batch sampling for training.
6
+ */
7
+ import { TaskExperience } from './types';
8
+ /**
9
+ * Configuration for experience replay buffer
10
+ */
11
+ export interface ReplayBufferConfig {
12
+ maxSize: number;
13
+ minSize: number;
14
+ prioritized: boolean;
15
+ }
16
+ /**
17
+ * Prioritized experience with importance weight
18
+ */
19
+ interface PrioritizedExperience {
20
+ experience: TaskExperience;
21
+ priority: number;
22
+ timestamp: number;
23
+ }
24
+ /**
25
+ * ExperienceReplayBuffer - FIFO buffer with optional prioritization
26
+ *
27
+ * Implements experience replay for more stable and efficient learning.
28
+ * Supports both uniform random sampling and prioritized experience replay.
29
+ */
30
+ export declare class ExperienceReplayBuffer {
31
+ private readonly logger;
32
+ private readonly config;
33
+ private buffer;
34
+ private totalExperiences;
35
+ constructor(config?: Partial<ReplayBufferConfig>);
36
+ /**
37
+ * Add a new experience to the buffer
38
+ * Uses FIFO eviction when buffer is full
39
+ */
40
+ add(experience: TaskExperience, priority?: number): void;
41
+ /**
42
+ * Add multiple experiences in batch
43
+ */
44
+ addBatch(experiences: TaskExperience[]): void;
45
+ /**
46
+ * Sample a random batch of experiences
47
+ * Uses uniform random sampling or prioritized sampling based on config
48
+ */
49
+ sample(batchSize: number): TaskExperience[];
50
+ /**
51
+ * Uniform random sampling (default)
52
+ */
53
+ private uniformSample;
54
+ /**
55
+ * Prioritized experience replay sampling
56
+ * Samples based on priority with probability proportional to priority
57
+ */
58
+ private prioritizedSample;
59
+ /**
60
+ * Update priority for a specific experience
61
+ * Used in prioritized experience replay to adjust importance weights
62
+ */
63
+ updatePriority(experienceId: string, newPriority: number): boolean;
64
+ /**
65
+ * Calculate default priority based on TD-error magnitude
66
+ * Higher absolute rewards get higher priority
67
+ */
68
+ private calculateDefaultPriority;
69
+ /**
70
+ * Check if buffer has enough experiences to sample
71
+ */
72
+ canSample(batchSize: number): boolean;
73
+ /**
74
+ * Get recent experiences (for temporal coherence)
75
+ */
76
+ getRecent(count: number): TaskExperience[];
77
+ /**
78
+ * Get oldest experiences
79
+ */
80
+ getOldest(count: number): TaskExperience[];
81
+ /**
82
+ * Get all experiences matching a filter
83
+ */
84
+ filter(predicate: (exp: TaskExperience) => boolean): TaskExperience[];
85
+ /**
86
+ * Clear the buffer
87
+ */
88
+ clear(): void;
89
+ /**
90
+ * Get current buffer size
91
+ */
92
+ size(): number;
93
+ /**
94
+ * Check if buffer is empty
95
+ */
96
+ isEmpty(): boolean;
97
+ /**
98
+ * Check if buffer is full
99
+ */
100
+ isFull(): boolean;
101
+ /**
102
+ * Get total number of experiences ever added (including evicted)
103
+ */
104
+ getTotalExperiences(): number;
105
+ /**
106
+ * Get buffer statistics
107
+ */
108
+ getStatistics(): {
109
+ size: number;
110
+ maxSize: number;
111
+ utilization: number;
112
+ totalAdded: number;
113
+ avgPriority: number;
114
+ avgReward: number;
115
+ };
116
+ /**
117
+ * Export buffer state for persistence
118
+ */
119
+ export(): {
120
+ buffer: PrioritizedExperience[];
121
+ config: ReplayBufferConfig;
122
+ totalExperiences: number;
123
+ };
124
+ /**
125
+ * Import buffer state from persistence
126
+ */
127
+ import(state: {
128
+ buffer: PrioritizedExperience[];
129
+ config: ReplayBufferConfig;
130
+ totalExperiences: number;
131
+ }): void;
132
+ /**
133
+ * Prune old experiences beyond retention limit
134
+ * Keeps most recent experiences
135
+ */
136
+ prune(retentionCount: number): number;
137
+ /**
138
+ * Get memory usage estimate in bytes
139
+ */
140
+ getMemoryUsage(): number;
141
+ }
142
+ export {};
143
+ //# sourceMappingURL=ExperienceReplayBuffer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ExperienceReplayBuffer.d.ts","sourceRoot":"","sources":["../../src/learning/ExperienceReplayBuffer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAGzC;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,OAAO,CAAC;CACtB;AAWD;;GAEG;AACH,UAAU,qBAAqB;IAC7B,UAAU,EAAE,cAAc,CAAC;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,GAAE,OAAO,CAAC,kBAAkB,CAAM;IAOpD;;;OAGG;IACH,GAAG,CAAC,UAAU,EAAE,cAAc,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI;IAgBxD;;OAEG;IACH,QAAQ,CAAC,WAAW,EAAE,cAAc,EAAE,GAAG,IAAI;IAO7C;;;OAGG;IACH,MAAM,CAAC,SAAS,EAAE,MAAM,GAAG,cAAc,EAAE;IAe3C;;OAEG;IACH,OAAO,CAAC,aAAa;IAgBrB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAqBzB;;;OAGG;IACH,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,OAAO;IAalE;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO;IAIrC;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE;IAO1C;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE;IAO1C;;OAEG;IACH,MAAM,CAAC,SAAS,EAAE,CAAC,GAAG,EAAE,cAAc,KAAK,OAAO,GAAG,cAAc,EAAE;IAMrE;;OAEG;IACH,KAAK,IAAI,IAAI;IAKb;;OAEG;IACH,IAAI,IAAI,MAAM;IAId;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,MAAM,IAAI,OAAO;IAIjB;;OAEG;IACH,mBAAmB,IAAI,MAAM;IAI7B;;OAEG;IACH,aAAa,IAAI;QACf,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,SAAS,EAAE,MAAM,CAAC;KACnB;IAmBD;;OAEG;IACH,MAAM,IAAI;QACR,MAAM,EAAE,qBAAqB,EAAE,CAAC;QAChC,MAAM,EAAE,kBAAkB,CAAC;QAC3B,gBAAgB,EAAE,MAAM,CAAC;KAC1B;IAQD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE;QACZ,MAAM,EAAE,qBAAqB,EAAE,CAAC;QAChC,MAAM,EAAE,kBAAkB,CAAC;QAC3B,gBAAgB,EAAE,MAAM,CAAC;KAC1B,GAAG,IAAI;IAMR;;;OAGG;IACH,KAAK,CAAC,cAAc,EAAE,MAAM,GAAG,MAAM;IAYrC;;OAEG;IACH,cAAc,IAAI,MAAM;CAGzB"}
@@ -0,0 +1,255 @@
1
+ "use strict";
2
+ /**
3
+ * ExperienceReplayBuffer - Phase 2 (Milestone 2.2)
4
+ *
5
+ * Implements experience replay buffer for reinforcement learning.
6
+ * Stores transitions and enables batch sampling for training.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.ExperienceReplayBuffer = void 0;
10
+ const Logger_1 = require("../utils/Logger");
11
+ /**
12
+ * Default replay buffer configuration
13
+ */
14
+ const DEFAULT_CONFIG = {
15
+ maxSize: 10000,
16
+ minSize: 100,
17
+ prioritized: false
18
+ };
19
+ /**
20
+ * ExperienceReplayBuffer - FIFO buffer with optional prioritization
21
+ *
22
+ * Implements experience replay for more stable and efficient learning.
23
+ * Supports both uniform random sampling and prioritized experience replay.
24
+ */
25
+ class ExperienceReplayBuffer {
26
+ constructor(config = {}) {
27
+ this.logger = Logger_1.Logger.getInstance();
28
+ this.config = { ...DEFAULT_CONFIG, ...config };
29
+ this.buffer = [];
30
+ this.totalExperiences = 0;
31
+ }
32
+ /**
33
+ * Add a new experience to the buffer
34
+ * Uses FIFO eviction when buffer is full
35
+ */
36
+ add(experience, priority) {
37
+ const prioritizedExp = {
38
+ experience,
39
+ priority: priority ?? this.calculateDefaultPriority(experience),
40
+ timestamp: Date.now()
41
+ };
42
+ // FIFO eviction: remove oldest when full
43
+ if (this.buffer.length >= this.config.maxSize) {
44
+ this.buffer.shift(); // Remove oldest (first element)
45
+ }
46
+ this.buffer.push(prioritizedExp);
47
+ this.totalExperiences++;
48
+ }
49
+ /**
50
+ * Add multiple experiences in batch
51
+ */
52
+ addBatch(experiences) {
53
+ for (const experience of experiences) {
54
+ this.add(experience);
55
+ }
56
+ this.logger.debug(`Added batch of ${experiences.length} experiences`);
57
+ }
58
+ /**
59
+ * Sample a random batch of experiences
60
+ * Uses uniform random sampling or prioritized sampling based on config
61
+ */
62
+ sample(batchSize) {
63
+ if (!this.canSample(batchSize)) {
64
+ throw new Error(`Cannot sample: buffer has ${this.buffer.length} experiences, ` +
65
+ `need at least ${Math.max(batchSize, this.config.minSize)}`);
66
+ }
67
+ if (this.config.prioritized) {
68
+ return this.prioritizedSample(batchSize);
69
+ }
70
+ else {
71
+ return this.uniformSample(batchSize);
72
+ }
73
+ }
74
+ /**
75
+ * Uniform random sampling (default)
76
+ */
77
+ uniformSample(batchSize) {
78
+ const sampled = [];
79
+ const indices = new Set();
80
+ // Sample without replacement
81
+ while (indices.size < batchSize) {
82
+ const randomIndex = Math.floor(Math.random() * this.buffer.length);
83
+ if (!indices.has(randomIndex)) {
84
+ indices.add(randomIndex);
85
+ sampled.push(this.buffer[randomIndex].experience);
86
+ }
87
+ }
88
+ return sampled;
89
+ }
90
+ /**
91
+ * Prioritized experience replay sampling
92
+ * Samples based on priority with probability proportional to priority
93
+ */
94
+ prioritizedSample(batchSize) {
95
+ const sampled = [];
96
+ const totalPriority = this.buffer.reduce((sum, exp) => sum + exp.priority, 0);
97
+ // Sample with replacement based on priorities
98
+ for (let i = 0; i < batchSize; i++) {
99
+ let random = Math.random() * totalPriority;
100
+ let cumulativePriority = 0;
101
+ for (const exp of this.buffer) {
102
+ cumulativePriority += exp.priority;
103
+ if (random <= cumulativePriority) {
104
+ sampled.push(exp.experience);
105
+ break;
106
+ }
107
+ }
108
+ }
109
+ return sampled;
110
+ }
111
+ /**
112
+ * Update priority for a specific experience
113
+ * Used in prioritized experience replay to adjust importance weights
114
+ */
115
+ updatePriority(experienceId, newPriority) {
116
+ const index = this.buffer.findIndex(exp => exp.experience.taskId === experienceId);
117
+ if (index === -1) {
118
+ return false;
119
+ }
120
+ this.buffer[index].priority = newPriority;
121
+ return true;
122
+ }
123
+ /**
124
+ * Calculate default priority based on TD-error magnitude
125
+ * Higher absolute rewards get higher priority
126
+ */
127
+ calculateDefaultPriority(experience) {
128
+ // Priority based on absolute reward (experiences with higher impact are prioritized)
129
+ const basePriority = Math.abs(experience.reward) + 0.01; // Add small constant to avoid zero priority
130
+ // Recent experiences get slight boost
131
+ const recencyBoost = 1.0;
132
+ return basePriority * recencyBoost;
133
+ }
134
+ /**
135
+ * Check if buffer has enough experiences to sample
136
+ */
137
+ canSample(batchSize) {
138
+ return this.buffer.length >= Math.max(batchSize, this.config.minSize);
139
+ }
140
+ /**
141
+ * Get recent experiences (for temporal coherence)
142
+ */
143
+ getRecent(count) {
144
+ const actualCount = Math.min(count, this.buffer.length);
145
+ return this.buffer
146
+ .slice(-actualCount)
147
+ .map(exp => exp.experience);
148
+ }
149
+ /**
150
+ * Get oldest experiences
151
+ */
152
+ getOldest(count) {
153
+ const actualCount = Math.min(count, this.buffer.length);
154
+ return this.buffer
155
+ .slice(0, actualCount)
156
+ .map(exp => exp.experience);
157
+ }
158
+ /**
159
+ * Get all experiences matching a filter
160
+ */
161
+ filter(predicate) {
162
+ return this.buffer
163
+ .filter(exp => predicate(exp.experience))
164
+ .map(exp => exp.experience);
165
+ }
166
+ /**
167
+ * Clear the buffer
168
+ */
169
+ clear() {
170
+ this.buffer = [];
171
+ this.logger.info('Experience replay buffer cleared');
172
+ }
173
+ /**
174
+ * Get current buffer size
175
+ */
176
+ size() {
177
+ return this.buffer.length;
178
+ }
179
+ /**
180
+ * Check if buffer is empty
181
+ */
182
+ isEmpty() {
183
+ return this.buffer.length === 0;
184
+ }
185
+ /**
186
+ * Check if buffer is full
187
+ */
188
+ isFull() {
189
+ return this.buffer.length >= this.config.maxSize;
190
+ }
191
+ /**
192
+ * Get total number of experiences ever added (including evicted)
193
+ */
194
+ getTotalExperiences() {
195
+ return this.totalExperiences;
196
+ }
197
+ /**
198
+ * Get buffer statistics
199
+ */
200
+ getStatistics() {
201
+ const avgPriority = this.buffer.length > 0
202
+ ? this.buffer.reduce((sum, exp) => sum + exp.priority, 0) / this.buffer.length
203
+ : 0;
204
+ const avgReward = this.buffer.length > 0
205
+ ? this.buffer.reduce((sum, exp) => sum + exp.experience.reward, 0) / this.buffer.length
206
+ : 0;
207
+ return {
208
+ size: this.buffer.length,
209
+ maxSize: this.config.maxSize,
210
+ utilization: this.buffer.length / this.config.maxSize,
211
+ totalAdded: this.totalExperiences,
212
+ avgPriority,
213
+ avgReward
214
+ };
215
+ }
216
+ /**
217
+ * Export buffer state for persistence
218
+ */
219
+ export() {
220
+ return {
221
+ buffer: [...this.buffer],
222
+ config: { ...this.config },
223
+ totalExperiences: this.totalExperiences
224
+ };
225
+ }
226
+ /**
227
+ * Import buffer state from persistence
228
+ */
229
+ import(state) {
230
+ this.buffer = [...state.buffer];
231
+ this.totalExperiences = state.totalExperiences;
232
+ this.logger.info(`Imported replay buffer with ${this.buffer.length} experiences`);
233
+ }
234
+ /**
235
+ * Prune old experiences beyond retention limit
236
+ * Keeps most recent experiences
237
+ */
238
+ prune(retentionCount) {
239
+ if (this.buffer.length <= retentionCount) {
240
+ return 0;
241
+ }
242
+ const removeCount = this.buffer.length - retentionCount;
243
+ this.buffer = this.buffer.slice(-retentionCount);
244
+ this.logger.info(`Pruned ${removeCount} old experiences from buffer`);
245
+ return removeCount;
246
+ }
247
+ /**
248
+ * Get memory usage estimate in bytes
249
+ */
250
+ getMemoryUsage() {
251
+ return JSON.stringify(this.buffer).length;
252
+ }
253
+ }
254
+ exports.ExperienceReplayBuffer = ExperienceReplayBuffer;
255
+ //# sourceMappingURL=ExperienceReplayBuffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ExperienceReplayBuffer.js","sourceRoot":"","sources":["../../src/learning/ExperienceReplayBuffer.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAGH,4CAAyC;AAWzC;;GAEG;AACH,MAAM,cAAc,GAAuB;IACzC,OAAO,EAAE,KAAK;IACd,OAAO,EAAE,GAAG;IACZ,WAAW,EAAE,KAAK;CACnB,CAAC;AAWF;;;;;GAKG;AACH,MAAa,sBAAsB;IAMjC,YAAY,SAAsC,EAAE;QAClD,IAAI,CAAC,MAAM,GAAG,eAAM,CAAC,WAAW,EAAE,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QAC/C,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,gBAAgB,GAAG,CAAC,CAAC;IAC5B,CAAC;IAED;;;OAGG;IACH,GAAG,CAAC,UAA0B,EAAE,QAAiB;QAC/C,MAAM,cAAc,GAA0B;YAC5C,UAAU;YACV,QAAQ,EAAE,QAAQ,IAAI,IAAI,CAAC,wBAAwB,CAAC,UAAU,CAAC;YAC/D,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB,CAAC;QAEF,yCAAyC;QACzC,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YAC9C,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,gCAAgC;QACvD,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACjC,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,WAA6B;QACpC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACvB,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,kBAAkB,WAAW,CAAC,MAAM,cAAc,CAAC,CAAC;IACxE,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,SAAiB;QACtB,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CACb,6BAA6B,IAAI,CAAC,MAAM,CAAC,MAAM,gBAAgB;gBAC/D,iBAAiB,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAC5D,CAAC;QACJ,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAC3C,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,SAAiB;QACrC,MAAM,OAAO,GAAqB,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;QAElC,6BAA6B;QAC7B,OAAO,OAAO,CAAC,IAAI,GAAG,SAAS,EAAE,CAAC;YAChC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;gBACzB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,UAAU,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB;QACzC,MAAM,OAAO,GAAqB,EAAE,CAAC;QACrC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QAE9E,8CAA8C;QAC9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,IAAI,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,aAAa,CAAC;YAC3C,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC9B,kBAAkB,IAAI,GAAG,CAAC,QAAQ,CAAC;gBACnC,IAAI,MAAM,IAAI,kBAAkB,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBAC7B,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,YAAoB,EAAE,WAAmB;QACtD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CACjC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,KAAK,YAAY,CAC9C,CAAC;QAEF,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,WAAW,CAAC;QAC1C,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACK,wBAAwB,CAAC,UAA0B;QACzD,qFAAqF;QACrF,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,4CAA4C;QAErG,sCAAsC;QACtC,MAAM,YAAY,GAAG,GAAG,CAAC;QAEzB,OAAO,YAAY,GAAG,YAAY,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,SAAiB;QACzB,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACxE,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAa;QACrB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,MAAM;aACf,KAAK,CAAC,CAAC,WAAW,CAAC;aACnB,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAa;QACrB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,MAAM;aACf,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;aACrB,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,SAA2C;QAChD,OAAO,IAAI,CAAC,MAAM;aACf,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;aACxC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,IAAI;QACF,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,OAAO;QACL,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,aAAa;QAQX,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM;YAC9E,CAAC,CAAC,CAAC,CAAC;QAEN,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACtC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM;YACvF,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YACxB,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO;YACrD,UAAU,EAAE,IAAI,CAAC,gBAAgB;YACjC,WAAW;YACX,SAAS;SACV,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM;QAKJ,OAAO;YACL,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,MAAM,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE;YAC1B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAIN;QACC,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC,gBAAgB,CAAC;QAC/C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,+BAA+B,IAAI,CAAC,MAAM,CAAC,MAAM,cAAc,CAAC,CAAC;IACpF,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,cAAsB;QAC1B,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,cAAc,EAAE,CAAC;YACzC,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC;QACxD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,cAAc,CAAC,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,WAAW,8BAA8B,CAAC,CAAC;QACtE,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,cAAc;QACZ,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC5C,CAAC;CACF;AAhSD,wDAgSC"}
@@ -5,7 +5,8 @@
5
5
  * Uses Q-learning algorithm to optimize task execution strategies.
6
6
  */
7
7
  import { SwarmMemoryManager } from '../core/memory/SwarmMemoryManager';
8
- import { LearningConfig, TaskState, LearningFeedback, LearningOutcome, LearnedPattern, FailurePattern, StrategyRecommendation } from './types';
8
+ import { QLearningConfig } from './QLearning';
9
+ import { LearningConfig, TaskExperience, TaskState, AgentAction, LearningFeedback, LearningOutcome, LearnedPattern, FailurePattern, StrategyRecommendation } from './types';
9
10
  /**
10
11
  * LearningEngine - Reinforcement learning for agents
11
12
  */
@@ -15,6 +16,8 @@ export declare class LearningEngine {
15
16
  private readonly agentId;
16
17
  private config;
17
18
  private qTable;
19
+ private qLearning?;
20
+ private useQLearning;
18
21
  private experiences;
19
22
  private patterns;
20
23
  private failurePatterns;
@@ -112,6 +115,14 @@ export declare class LearningEngine {
112
115
  * Deserialize Q-table from storage
113
116
  */
114
117
  private deserializeQTable;
118
+ /**
119
+ * Serialize Q-table for QLearning import (converts to QValue format)
120
+ */
121
+ private serializeQTableForQLearning;
122
+ /**
123
+ * Deserialize Q-table from QLearning export (extracts values from QValue format)
124
+ */
125
+ private deserializeQTableFromQLearning;
115
126
  /**
116
127
  * Calculate state size in bytes
117
128
  */
@@ -140,5 +151,43 @@ export declare class LearningEngine {
140
151
  * Check if learning is enabled
141
152
  */
142
153
  isEnabled(): boolean;
154
+ /**
155
+ * Enable Q-learning mode (Phase 2 Integration)
156
+ * Switches from basic Q-table to full QLearning algorithm with experience replay
157
+ */
158
+ enableQLearning(config?: Partial<QLearningConfig>): void;
159
+ /**
160
+ * Disable Q-learning mode (revert to basic implementation)
161
+ */
162
+ disableQLearning(): void;
163
+ /**
164
+ * Learn from experience using Q-learning (when enabled)
165
+ * This method integrates with the QLearning algorithm
166
+ */
167
+ learnFromExperience(experience: TaskExperience): Promise<void>;
168
+ /**
169
+ * Select action with policy (Q-learning integration)
170
+ * Uses epsilon-greedy policy when Q-learning is enabled
171
+ */
172
+ selectActionWithPolicy(state: TaskState, availableActions: AgentAction[]): Promise<AgentAction>;
173
+ /**
174
+ * Get Q-learning statistics (when enabled)
175
+ */
176
+ getQLearningStats(): {
177
+ enabled: boolean;
178
+ stats?: {
179
+ steps: number;
180
+ episodes: number;
181
+ tableSize: number;
182
+ explorationRate: number;
183
+ avgQValue: number;
184
+ maxQValue: number;
185
+ minQValue: number;
186
+ };
187
+ };
188
+ /**
189
+ * Check if Q-learning mode is enabled
190
+ */
191
+ isQLearningEnabled(): boolean;
143
192
  }
144
193
  //# sourceMappingURL=LearningEngine.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"LearningEngine.d.ts","sourceRoot":"","sources":["../../src/learning/LearningEngine.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AAKvE,OAAO,EACL,cAAc,EAEd,SAAS,EAET,gBAAgB,EAChB,eAAe,EACf,cAAc,EAEd,cAAc,EACd,sBAAsB,EAEvB,MAAM,SAAS,CAAC;AAiBjB;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAqB;IACjD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,WAAW,CAAmB;IACtC,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,eAAe,CAA8B;IACrD,OAAO,CAAC,SAAS,CAAS;gBAGxB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,kBAAkB,EAC/B,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM;IAatC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgBjC;;OAEG;IACG,kBAAkB,CACtB,IAAI,EAAE,GAAG,EACT,MAAM,EAAE,GAAG,EACX,QAAQ,CAAC,EAAE,gBAAgB,GAC1B,OAAO,CAAC,eAAe,CAAC;IAqD3B;;OAEG;IACG,iBAAiB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAoD1E;;OAEG;IACH,WAAW,IAAI,cAAc,EAAE;IAK/B;;OAEG;IACH,kBAAkB,IAAI,cAAc,EAAE;IAKtC;;OAEG;IACH,OAAO,CAAC,eAAe;IAUvB;;OAEG;IACH,OAAO,CAAC,WAAW;IAKnB;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAyCzB;;OAEG;IACH,OAAO,CAAC,eAAe;IA+BvB;;OAEG;YACW,YAAY;IA4B1B;;OAEG;YACW,kBAAkB;IAgBhC;;OAEG;YACW,cAAc;IAoC5B;;OAEG;YACW,oBAAoB;IAoBlC;;OAEG;YACW,oBAAoB;IAwBlC;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBrB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAc1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAIzB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAOxB;;OAEG;YACW,SAAS;IA6BvB;;OAEG;YACW,SAAS;IAmBvB;;OAEG;IACH,OAAO,CAAC,eAAe;IAQvB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAOzB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;OAEG;YACW,qBAAqB;IAiBnC;;OAEG;YACW,iBAAiB;IAiB/B;;OAEG;IACH,kBAAkB,IAAI,MAAM;IAI5B;;OAEG;IACH,mBAAmB,IAAI,MAAM;IAI7B;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAIlC;;OAEG;IACH,SAAS,IAAI,OAAO;CAGrB"}
1
+ {"version":3,"file":"LearningEngine.d.ts","sourceRoot":"","sources":["../../src/learning/LearningEngine.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AACvE,OAAO,EAAa,eAAe,EAAE,MAAM,aAAa,CAAC;AAKzD,OAAO,EACL,cAAc,EACd,cAAc,EACd,SAAS,EACT,WAAW,EACX,gBAAgB,EAChB,eAAe,EACf,cAAc,EAEd,cAAc,EACd,sBAAsB,EAEvB,MAAM,SAAS,CAAC;AAiBjB;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAqB;IACjD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,SAAS,CAAC,CAAY;IAC9B,OAAO,CAAC,YAAY,CAAU;IAC9B,OAAO,CAAC,WAAW,CAAmB;IACtC,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,eAAe,CAA8B;IACrD,OAAO,CAAC,SAAS,CAAS;gBAGxB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,kBAAkB,EAC/B,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM;IActC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgBjC;;OAEG;IACG,kBAAkB,CACtB,IAAI,EAAE,GAAG,EACT,MAAM,EAAE,GAAG,EACX,QAAQ,CAAC,EAAE,gBAAgB,GAC1B,OAAO,CAAC,eAAe,CAAC;IAqD3B;;OAEG;IACG,iBAAiB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAoD1E;;OAEG;IACH,WAAW,IAAI,cAAc,EAAE;IAK/B;;OAEG;IACH,kBAAkB,IAAI,cAAc,EAAE;IAKtC;;OAEG;IACH,OAAO,CAAC,eAAe;IAUvB;;OAEG;IACH,OAAO,CAAC,WAAW;IAKnB;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAyCzB;;OAEG;IACH,OAAO,CAAC,eAAe;IA+BvB;;OAEG;YACW,YAAY;IA4B1B;;OAEG;YACW,kBAAkB;IAgBhC;;OAEG;YACW,cAAc;IAoC5B;;OAEG;YACW,oBAAoB;IAoBlC;;OAEG;YACW,oBAAoB;IAwBlC;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBrB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAc1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAIzB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAOxB;;OAEG;YACW,SAAS;IA6BvB;;OAEG;YACW,SAAS;IAmBvB;;OAEG;IACH,OAAO,CAAC,eAAe;IAQvB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAOzB;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiBnC;;OAEG;IACH,OAAO,CAAC,8BAA8B;IAWtC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;OAEG;YACW,qBAAqB;IAiBnC;;OAEG;YACW,iBAAiB;IAiB/B;;OAEG;IACH,kBAAkB,IAAI,MAAM;IAI5B;;OAEG;IACH,mBAAmB,IAAI,MAAM;IAI7B;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAIlC;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;;OAGG;IACH,eAAe,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAAG,IAAI;IAgCxD;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAaxB;;;OAGG;IACG,mBAAmB,CAAC,UAAU,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BpE;;;OAGG;IACG,sBAAsB,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC;IAiBrG;;OAEG;IACH,iBAAiB,IAAI;QACnB,OAAO,EAAE,OAAO,CAAC;QACjB,KAAK,CAAC,EAAE;YACN,KAAK,EAAE,MAAM,CAAC;YACd,QAAQ,EAAE,MAAM,CAAC;YACjB,SAAS,EAAE,MAAM,CAAC;YAClB,eAAe,EAAE,MAAM,CAAC;YACxB,SAAS,EAAE,MAAM,CAAC;YAClB,SAAS,EAAE,MAAM,CAAC;YAClB,SAAS,EAAE,MAAM,CAAC;SACnB,CAAC;KACH;IAWD;;OAEG;IACH,kBAAkB,IAAI,OAAO;CAG9B"}
@@ -9,6 +9,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
9
9
  exports.LearningEngine = void 0;
10
10
  const uuid_1 = require("uuid");
11
11
  const Logger_1 = require("../utils/Logger");
12
+ const QLearning_1 = require("./QLearning");
12
13
  // Import version from package.json to maintain consistency
13
14
  const packageJson = require('../../package.json');
14
15
  const PACKAGE_VERSION = packageJson.version;
@@ -36,6 +37,7 @@ class LearningEngine {
36
37
  this.memoryStore = memoryStore;
37
38
  this.config = { ...DEFAULT_CONFIG, ...config };
38
39
  this.qTable = new Map();
40
+ this.useQLearning = false; // Default to legacy implementation
39
41
  this.experiences = [];
40
42
  this.patterns = new Map();
41
43
  this.failurePatterns = new Map();
@@ -459,6 +461,38 @@ class LearningEngine {
459
461
  this.qTable.set(state, new Map(Object.entries(actions)));
460
462
  }
461
463
  }
464
+ /**
465
+ * Serialize Q-table for QLearning import (converts to QValue format)
466
+ */
467
+ serializeQTableForQLearning() {
468
+ const serialized = {};
469
+ for (const [state, actions] of this.qTable.entries()) {
470
+ serialized[state] = {};
471
+ for (const [action, value] of actions.entries()) {
472
+ serialized[state][action] = {
473
+ state,
474
+ action,
475
+ value,
476
+ updateCount: 1,
477
+ lastUpdated: Date.now()
478
+ };
479
+ }
480
+ }
481
+ return serialized;
482
+ }
483
+ /**
484
+ * Deserialize Q-table from QLearning export (extracts values from QValue format)
485
+ */
486
+ deserializeQTableFromQLearning(data) {
487
+ this.qTable.clear();
488
+ for (const [state, actions] of Object.entries(data)) {
489
+ const actionMap = new Map();
490
+ for (const [action, qValue] of Object.entries(actions)) {
491
+ actionMap.set(action, qValue.value);
492
+ }
493
+ this.qTable.set(state, actionMap);
494
+ }
495
+ }
462
496
  /**
463
497
  * Calculate state size in bytes
464
498
  */
@@ -529,6 +563,112 @@ class LearningEngine {
529
563
  isEnabled() {
530
564
  return this.config.enabled;
531
565
  }
566
+ /**
567
+ * Enable Q-learning mode (Phase 2 Integration)
568
+ * Switches from basic Q-table to full QLearning algorithm with experience replay
569
+ */
570
+ enableQLearning(config) {
571
+ const qLearningConfig = {
572
+ learningRate: this.config.learningRate,
573
+ discountFactor: this.config.discountFactor,
574
+ explorationRate: this.config.explorationRate,
575
+ explorationDecay: this.config.explorationDecay,
576
+ minExplorationRate: this.config.minExplorationRate,
577
+ useExperienceReplay: true,
578
+ replayBufferSize: 10000,
579
+ batchSize: this.config.batchSize,
580
+ ...config
581
+ };
582
+ this.qLearning = new QLearning_1.QLearning(qLearningConfig);
583
+ this.useQLearning = true;
584
+ // Import existing Q-table into QLearning if we have data
585
+ if (this.qTable.size > 0) {
586
+ const serialized = this.serializeQTableForQLearning();
587
+ this.qLearning.import({
588
+ qTable: serialized,
589
+ config: qLearningConfig,
590
+ stepCount: this.taskCount,
591
+ episodeCount: Math.floor(this.taskCount / 10)
592
+ });
593
+ }
594
+ this.logger.info(`Q-learning mode enabled for agent ${this.agentId}`, {
595
+ config: qLearningConfig
596
+ });
597
+ }
598
+ /**
599
+ * Disable Q-learning mode (revert to basic implementation)
600
+ */
601
+ disableQLearning() {
602
+ if (this.qLearning && this.useQLearning) {
603
+ // Export Q-learning state to basic Q-table
604
+ const exported = this.qLearning.export();
605
+ this.deserializeQTableFromQLearning(exported.qTable);
606
+ }
607
+ this.qLearning = undefined;
608
+ this.useQLearning = false;
609
+ this.logger.info(`Q-learning mode disabled for agent ${this.agentId}`);
610
+ }
611
+ /**
612
+ * Learn from experience using Q-learning (when enabled)
613
+ * This method integrates with the QLearning algorithm
614
+ */
615
+ async learnFromExperience(experience) {
616
+ if (!this.config.enabled) {
617
+ return;
618
+ }
619
+ if (this.useQLearning && this.qLearning) {
620
+ // Use QLearning algorithm
621
+ this.qLearning.update(experience);
622
+ this.experiences.push(experience);
623
+ // Perform batch update periodically
624
+ if (this.taskCount % this.config.updateFrequency === 0) {
625
+ this.qLearning.batchUpdate();
626
+ }
627
+ // End episode periodically to trigger exploration decay
628
+ if (this.taskCount % 10 === 0) {
629
+ this.qLearning.endEpisode();
630
+ }
631
+ }
632
+ else {
633
+ // Use legacy Q-table implementation
634
+ await this.updateQTable(experience);
635
+ this.experiences.push(experience);
636
+ }
637
+ this.taskCount++;
638
+ }
639
+ /**
640
+ * Select action with policy (Q-learning integration)
641
+ * Uses epsilon-greedy policy when Q-learning is enabled
642
+ */
643
+ async selectActionWithPolicy(state, availableActions) {
644
+ if (this.useQLearning && this.qLearning) {
645
+ // Use Q-learning's epsilon-greedy policy
646
+ return this.qLearning.selectAction(state, availableActions);
647
+ }
648
+ // Fallback to recommendation-based selection
649
+ const recommendation = await this.recommendStrategy(state);
650
+ // Find the action matching the recommended strategy
651
+ const matchingAction = availableActions.find(action => action.strategy === recommendation.strategy);
652
+ return matchingAction || availableActions[0];
653
+ }
654
+ /**
655
+ * Get Q-learning statistics (when enabled)
656
+ */
657
+ getQLearningStats() {
658
+ if (!this.useQLearning || !this.qLearning) {
659
+ return { enabled: false };
660
+ }
661
+ return {
662
+ enabled: true,
663
+ stats: this.qLearning.getStatistics()
664
+ };
665
+ }
666
+ /**
667
+ * Check if Q-learning mode is enabled
668
+ */
669
+ isQLearningEnabled() {
670
+ return this.useQLearning;
671
+ }
532
672
  }
533
673
  exports.LearningEngine = LearningEngine;
534
674
  //# sourceMappingURL=LearningEngine.js.map