agentic-qe 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +252 -0
- package/README.md +28 -23
- package/dist/cli/commands/routing/index.d.ts +1 -1
- package/dist/cli/commands/routing/index.d.ts.map +1 -1
- package/dist/cli/commands/routing/index.js +29 -19
- package/dist/cli/commands/routing/index.js.map +1 -1
- package/dist/learning/ExperienceReplayBuffer.d.ts +143 -0
- package/dist/learning/ExperienceReplayBuffer.d.ts.map +1 -0
- package/dist/learning/ExperienceReplayBuffer.js +255 -0
- package/dist/learning/ExperienceReplayBuffer.js.map +1 -0
- package/dist/learning/LearningEngine.d.ts +50 -1
- package/dist/learning/LearningEngine.d.ts.map +1 -1
- package/dist/learning/LearningEngine.js +140 -0
- package/dist/learning/LearningEngine.js.map +1 -1
- package/dist/learning/QLearning.d.ts +154 -0
- package/dist/learning/QLearning.d.ts.map +1 -0
- package/dist/learning/QLearning.js +337 -0
- package/dist/learning/QLearning.js.map +1 -0
- package/dist/learning/index.d.ts +2 -0
- package/dist/learning/index.d.ts.map +1 -1
- package/dist/learning/index.js +2 -0
- package/dist/learning/index.js.map +1 -1
- package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.d.ts +11 -1
- package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.d.ts.map +1 -1
- package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.js +12 -0
- package/dist/mcp/streaming/CoverageAnalyzeStreamHandler.js.map +1 -1
- package/dist/mcp/streaming/TestExecuteStreamHandler.d.ts +10 -1
- package/dist/mcp/streaming/TestExecuteStreamHandler.d.ts.map +1 -1
- package/dist/mcp/streaming/TestExecuteStreamHandler.js +11 -0
- package/dist/mcp/streaming/TestExecuteStreamHandler.js.map +1 -1
- package/dist/reasoning/QEReasoningBank.d.ts +89 -2
- package/dist/reasoning/QEReasoningBank.d.ts.map +1 -1
- package/dist/reasoning/QEReasoningBank.js +396 -10
- package/dist/reasoning/QEReasoningBank.js.map +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ExperienceReplayBuffer - Phase 2 (Milestone 2.2)
|
|
3
|
+
*
|
|
4
|
+
* Implements experience replay buffer for reinforcement learning.
|
|
5
|
+
* Stores transitions and enables batch sampling for training.
|
|
6
|
+
*/
|
|
7
|
+
import { TaskExperience } from './types';
|
|
8
|
+
/**
|
|
9
|
+
* Configuration for experience replay buffer
|
|
10
|
+
*/
|
|
11
|
+
export interface ReplayBufferConfig {
|
|
12
|
+
maxSize: number;
|
|
13
|
+
minSize: number;
|
|
14
|
+
prioritized: boolean;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Prioritized experience with importance weight
|
|
18
|
+
*/
|
|
19
|
+
interface PrioritizedExperience {
|
|
20
|
+
experience: TaskExperience;
|
|
21
|
+
priority: number;
|
|
22
|
+
timestamp: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* ExperienceReplayBuffer - FIFO buffer with optional prioritization
|
|
26
|
+
*
|
|
27
|
+
* Implements experience replay for more stable and efficient learning.
|
|
28
|
+
* Supports both uniform random sampling and prioritized experience replay.
|
|
29
|
+
*/
|
|
30
|
+
export declare class ExperienceReplayBuffer {
|
|
31
|
+
private readonly logger;
|
|
32
|
+
private readonly config;
|
|
33
|
+
private buffer;
|
|
34
|
+
private totalExperiences;
|
|
35
|
+
constructor(config?: Partial<ReplayBufferConfig>);
|
|
36
|
+
/**
|
|
37
|
+
* Add a new experience to the buffer
|
|
38
|
+
* Uses FIFO eviction when buffer is full
|
|
39
|
+
*/
|
|
40
|
+
add(experience: TaskExperience, priority?: number): void;
|
|
41
|
+
/**
|
|
42
|
+
* Add multiple experiences in batch
|
|
43
|
+
*/
|
|
44
|
+
addBatch(experiences: TaskExperience[]): void;
|
|
45
|
+
/**
|
|
46
|
+
* Sample a random batch of experiences
|
|
47
|
+
* Uses uniform random sampling or prioritized sampling based on config
|
|
48
|
+
*/
|
|
49
|
+
sample(batchSize: number): TaskExperience[];
|
|
50
|
+
/**
|
|
51
|
+
* Uniform random sampling (default)
|
|
52
|
+
*/
|
|
53
|
+
private uniformSample;
|
|
54
|
+
/**
|
|
55
|
+
* Prioritized experience replay sampling
|
|
56
|
+
* Samples based on priority with probability proportional to priority
|
|
57
|
+
*/
|
|
58
|
+
private prioritizedSample;
|
|
59
|
+
/**
|
|
60
|
+
* Update priority for a specific experience
|
|
61
|
+
* Used in prioritized experience replay to adjust importance weights
|
|
62
|
+
*/
|
|
63
|
+
updatePriority(experienceId: string, newPriority: number): boolean;
|
|
64
|
+
/**
|
|
65
|
+
* Calculate default priority based on TD-error magnitude
|
|
66
|
+
* Higher absolute rewards get higher priority
|
|
67
|
+
*/
|
|
68
|
+
private calculateDefaultPriority;
|
|
69
|
+
/**
|
|
70
|
+
* Check if buffer has enough experiences to sample
|
|
71
|
+
*/
|
|
72
|
+
canSample(batchSize: number): boolean;
|
|
73
|
+
/**
|
|
74
|
+
* Get recent experiences (for temporal coherence)
|
|
75
|
+
*/
|
|
76
|
+
getRecent(count: number): TaskExperience[];
|
|
77
|
+
/**
|
|
78
|
+
* Get oldest experiences
|
|
79
|
+
*/
|
|
80
|
+
getOldest(count: number): TaskExperience[];
|
|
81
|
+
/**
|
|
82
|
+
* Get all experiences matching a filter
|
|
83
|
+
*/
|
|
84
|
+
filter(predicate: (exp: TaskExperience) => boolean): TaskExperience[];
|
|
85
|
+
/**
|
|
86
|
+
* Clear the buffer
|
|
87
|
+
*/
|
|
88
|
+
clear(): void;
|
|
89
|
+
/**
|
|
90
|
+
* Get current buffer size
|
|
91
|
+
*/
|
|
92
|
+
size(): number;
|
|
93
|
+
/**
|
|
94
|
+
* Check if buffer is empty
|
|
95
|
+
*/
|
|
96
|
+
isEmpty(): boolean;
|
|
97
|
+
/**
|
|
98
|
+
* Check if buffer is full
|
|
99
|
+
*/
|
|
100
|
+
isFull(): boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Get total number of experiences ever added (including evicted)
|
|
103
|
+
*/
|
|
104
|
+
getTotalExperiences(): number;
|
|
105
|
+
/**
|
|
106
|
+
* Get buffer statistics
|
|
107
|
+
*/
|
|
108
|
+
getStatistics(): {
|
|
109
|
+
size: number;
|
|
110
|
+
maxSize: number;
|
|
111
|
+
utilization: number;
|
|
112
|
+
totalAdded: number;
|
|
113
|
+
avgPriority: number;
|
|
114
|
+
avgReward: number;
|
|
115
|
+
};
|
|
116
|
+
/**
|
|
117
|
+
* Export buffer state for persistence
|
|
118
|
+
*/
|
|
119
|
+
export(): {
|
|
120
|
+
buffer: PrioritizedExperience[];
|
|
121
|
+
config: ReplayBufferConfig;
|
|
122
|
+
totalExperiences: number;
|
|
123
|
+
};
|
|
124
|
+
/**
|
|
125
|
+
* Import buffer state from persistence
|
|
126
|
+
*/
|
|
127
|
+
import(state: {
|
|
128
|
+
buffer: PrioritizedExperience[];
|
|
129
|
+
config: ReplayBufferConfig;
|
|
130
|
+
totalExperiences: number;
|
|
131
|
+
}): void;
|
|
132
|
+
/**
|
|
133
|
+
* Prune old experiences beyond retention limit
|
|
134
|
+
* Keeps most recent experiences
|
|
135
|
+
*/
|
|
136
|
+
prune(retentionCount: number): number;
|
|
137
|
+
/**
|
|
138
|
+
* Get memory usage estimate in bytes
|
|
139
|
+
*/
|
|
140
|
+
getMemoryUsage(): number;
|
|
141
|
+
}
|
|
142
|
+
export {};
|
|
143
|
+
//# sourceMappingURL=ExperienceReplayBuffer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ExperienceReplayBuffer.d.ts","sourceRoot":"","sources":["../../src/learning/ExperienceReplayBuffer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAGzC;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,OAAO,CAAC;CACtB;AAWD;;GAEG;AACH,UAAU,qBAAqB;IAC7B,UAAU,EAAE,cAAc,CAAC;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,qBAAa,sBAAsB;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,gBAAgB,CAAS;gBAErB,MAAM,GAAE,OAAO,CAAC,kBAAkB,CAAM;IAOpD;;;OAGG;IACH,GAAG,CAAC,UAAU,EAAE,cAAc,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI;IAgBxD;;OAEG;IACH,QAAQ,CAAC,WAAW,EAAE,cAAc,EAAE,GAAG,IAAI;IAO7C;;;OAGG;IACH,MAAM,CAAC,SAAS,EAAE,MAAM,GAAG,cAAc,EAAE;IAe3C;;OAEG;IACH,OAAO,CAAC,aAAa;IAgBrB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAqBzB;;;OAGG;IACH,cAAc,CAAC,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,OAAO;IAalE;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO;IAIrC;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE;IAO1C;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE;IAO1C;;OAEG;IACH,MAAM,CAAC,SAAS,EAAE,CAAC,GAAG,EAAE,cAAc,KAAK,OAAO,GAAG,cAAc,EAAE;IAMrE;;OAEG;IACH,KAAK,IAAI,IAAI;IAKb;;OAEG;IACH,IAAI,IAAI,MAAM;IAId;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,MAAM,IAAI,OAAO;IAIjB;;OAEG;IACH,mBAAmB,IAAI,MAAM;IAI7B;;OAEG;IACH,aAAa,IAAI;QACf,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,SAAS,EAAE,MAAM,CAAC;KACnB;IAmBD;;OAEG;IACH,MAAM,IAAI;QACR,MAAM,EAAE,qBAAqB,EAAE,CAAC;QAChC,MAAM,EAAE,kBAAkB,CAAC;QAC3B,gBAAgB,EAAE,MAAM,CAAC;KAC1B;IAQD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE;QACZ,MAAM,EAAE,qBAAqB,EAAE,CAAC;QAChC,MAAM,EAAE,kBAAkB,CAAC;QAC3B,gBAAgB,EAAE,MAAM,CAAC;KAC1B,GAAG,IAAI;IAMR;;;OAGG;IACH,KAAK,CAAC,cAAc,EAAE,MAAM,GAAG,MAAM;IAYrC;;OAEG;IACH,cAAc,IAAI,MAAM;CAGzB"}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ExperienceReplayBuffer - Phase 2 (Milestone 2.2)
|
|
4
|
+
*
|
|
5
|
+
* Implements experience replay buffer for reinforcement learning.
|
|
6
|
+
* Stores transitions and enables batch sampling for training.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.ExperienceReplayBuffer = void 0;
|
|
10
|
+
const Logger_1 = require("../utils/Logger");
|
|
11
|
+
/**
|
|
12
|
+
* Default replay buffer configuration
|
|
13
|
+
*/
|
|
14
|
+
const DEFAULT_CONFIG = {
|
|
15
|
+
maxSize: 10000,
|
|
16
|
+
minSize: 100,
|
|
17
|
+
prioritized: false
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* ExperienceReplayBuffer - FIFO buffer with optional prioritization
|
|
21
|
+
*
|
|
22
|
+
* Implements experience replay for more stable and efficient learning.
|
|
23
|
+
* Supports both uniform random sampling and prioritized experience replay.
|
|
24
|
+
*/
|
|
25
|
+
class ExperienceReplayBuffer {
|
|
26
|
+
constructor(config = {}) {
|
|
27
|
+
this.logger = Logger_1.Logger.getInstance();
|
|
28
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
29
|
+
this.buffer = [];
|
|
30
|
+
this.totalExperiences = 0;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Add a new experience to the buffer
|
|
34
|
+
* Uses FIFO eviction when buffer is full
|
|
35
|
+
*/
|
|
36
|
+
add(experience, priority) {
|
|
37
|
+
const prioritizedExp = {
|
|
38
|
+
experience,
|
|
39
|
+
priority: priority ?? this.calculateDefaultPriority(experience),
|
|
40
|
+
timestamp: Date.now()
|
|
41
|
+
};
|
|
42
|
+
// FIFO eviction: remove oldest when full
|
|
43
|
+
if (this.buffer.length >= this.config.maxSize) {
|
|
44
|
+
this.buffer.shift(); // Remove oldest (first element)
|
|
45
|
+
}
|
|
46
|
+
this.buffer.push(prioritizedExp);
|
|
47
|
+
this.totalExperiences++;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Add multiple experiences in batch
|
|
51
|
+
*/
|
|
52
|
+
addBatch(experiences) {
|
|
53
|
+
for (const experience of experiences) {
|
|
54
|
+
this.add(experience);
|
|
55
|
+
}
|
|
56
|
+
this.logger.debug(`Added batch of ${experiences.length} experiences`);
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Sample a random batch of experiences
|
|
60
|
+
* Uses uniform random sampling or prioritized sampling based on config
|
|
61
|
+
*/
|
|
62
|
+
sample(batchSize) {
|
|
63
|
+
if (!this.canSample(batchSize)) {
|
|
64
|
+
throw new Error(`Cannot sample: buffer has ${this.buffer.length} experiences, ` +
|
|
65
|
+
`need at least ${Math.max(batchSize, this.config.minSize)}`);
|
|
66
|
+
}
|
|
67
|
+
if (this.config.prioritized) {
|
|
68
|
+
return this.prioritizedSample(batchSize);
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
return this.uniformSample(batchSize);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Uniform random sampling (default)
|
|
76
|
+
*/
|
|
77
|
+
uniformSample(batchSize) {
|
|
78
|
+
const sampled = [];
|
|
79
|
+
const indices = new Set();
|
|
80
|
+
// Sample without replacement
|
|
81
|
+
while (indices.size < batchSize) {
|
|
82
|
+
const randomIndex = Math.floor(Math.random() * this.buffer.length);
|
|
83
|
+
if (!indices.has(randomIndex)) {
|
|
84
|
+
indices.add(randomIndex);
|
|
85
|
+
sampled.push(this.buffer[randomIndex].experience);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return sampled;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Prioritized experience replay sampling
|
|
92
|
+
* Samples based on priority with probability proportional to priority
|
|
93
|
+
*/
|
|
94
|
+
prioritizedSample(batchSize) {
|
|
95
|
+
const sampled = [];
|
|
96
|
+
const totalPriority = this.buffer.reduce((sum, exp) => sum + exp.priority, 0);
|
|
97
|
+
// Sample with replacement based on priorities
|
|
98
|
+
for (let i = 0; i < batchSize; i++) {
|
|
99
|
+
let random = Math.random() * totalPriority;
|
|
100
|
+
let cumulativePriority = 0;
|
|
101
|
+
for (const exp of this.buffer) {
|
|
102
|
+
cumulativePriority += exp.priority;
|
|
103
|
+
if (random <= cumulativePriority) {
|
|
104
|
+
sampled.push(exp.experience);
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return sampled;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Update priority for a specific experience
|
|
113
|
+
* Used in prioritized experience replay to adjust importance weights
|
|
114
|
+
*/
|
|
115
|
+
updatePriority(experienceId, newPriority) {
|
|
116
|
+
const index = this.buffer.findIndex(exp => exp.experience.taskId === experienceId);
|
|
117
|
+
if (index === -1) {
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
this.buffer[index].priority = newPriority;
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Calculate default priority based on TD-error magnitude
|
|
125
|
+
* Higher absolute rewards get higher priority
|
|
126
|
+
*/
|
|
127
|
+
calculateDefaultPriority(experience) {
|
|
128
|
+
// Priority based on absolute reward (experiences with higher impact are prioritized)
|
|
129
|
+
const basePriority = Math.abs(experience.reward) + 0.01; // Add small constant to avoid zero priority
|
|
130
|
+
// Recent experiences get slight boost
|
|
131
|
+
const recencyBoost = 1.0;
|
|
132
|
+
return basePriority * recencyBoost;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Check if buffer has enough experiences to sample
|
|
136
|
+
*/
|
|
137
|
+
canSample(batchSize) {
|
|
138
|
+
return this.buffer.length >= Math.max(batchSize, this.config.minSize);
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Get recent experiences (for temporal coherence)
|
|
142
|
+
*/
|
|
143
|
+
getRecent(count) {
|
|
144
|
+
const actualCount = Math.min(count, this.buffer.length);
|
|
145
|
+
return this.buffer
|
|
146
|
+
.slice(-actualCount)
|
|
147
|
+
.map(exp => exp.experience);
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Get oldest experiences
|
|
151
|
+
*/
|
|
152
|
+
getOldest(count) {
|
|
153
|
+
const actualCount = Math.min(count, this.buffer.length);
|
|
154
|
+
return this.buffer
|
|
155
|
+
.slice(0, actualCount)
|
|
156
|
+
.map(exp => exp.experience);
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get all experiences matching a filter
|
|
160
|
+
*/
|
|
161
|
+
filter(predicate) {
|
|
162
|
+
return this.buffer
|
|
163
|
+
.filter(exp => predicate(exp.experience))
|
|
164
|
+
.map(exp => exp.experience);
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Clear the buffer
|
|
168
|
+
*/
|
|
169
|
+
clear() {
|
|
170
|
+
this.buffer = [];
|
|
171
|
+
this.logger.info('Experience replay buffer cleared');
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Get current buffer size
|
|
175
|
+
*/
|
|
176
|
+
size() {
|
|
177
|
+
return this.buffer.length;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Check if buffer is empty
|
|
181
|
+
*/
|
|
182
|
+
isEmpty() {
|
|
183
|
+
return this.buffer.length === 0;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Check if buffer is full
|
|
187
|
+
*/
|
|
188
|
+
isFull() {
|
|
189
|
+
return this.buffer.length >= this.config.maxSize;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Get total number of experiences ever added (including evicted)
|
|
193
|
+
*/
|
|
194
|
+
getTotalExperiences() {
|
|
195
|
+
return this.totalExperiences;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Get buffer statistics
|
|
199
|
+
*/
|
|
200
|
+
getStatistics() {
|
|
201
|
+
const avgPriority = this.buffer.length > 0
|
|
202
|
+
? this.buffer.reduce((sum, exp) => sum + exp.priority, 0) / this.buffer.length
|
|
203
|
+
: 0;
|
|
204
|
+
const avgReward = this.buffer.length > 0
|
|
205
|
+
? this.buffer.reduce((sum, exp) => sum + exp.experience.reward, 0) / this.buffer.length
|
|
206
|
+
: 0;
|
|
207
|
+
return {
|
|
208
|
+
size: this.buffer.length,
|
|
209
|
+
maxSize: this.config.maxSize,
|
|
210
|
+
utilization: this.buffer.length / this.config.maxSize,
|
|
211
|
+
totalAdded: this.totalExperiences,
|
|
212
|
+
avgPriority,
|
|
213
|
+
avgReward
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Export buffer state for persistence
|
|
218
|
+
*/
|
|
219
|
+
export() {
|
|
220
|
+
return {
|
|
221
|
+
buffer: [...this.buffer],
|
|
222
|
+
config: { ...this.config },
|
|
223
|
+
totalExperiences: this.totalExperiences
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Import buffer state from persistence
|
|
228
|
+
*/
|
|
229
|
+
import(state) {
|
|
230
|
+
this.buffer = [...state.buffer];
|
|
231
|
+
this.totalExperiences = state.totalExperiences;
|
|
232
|
+
this.logger.info(`Imported replay buffer with ${this.buffer.length} experiences`);
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Prune old experiences beyond retention limit
|
|
236
|
+
* Keeps most recent experiences
|
|
237
|
+
*/
|
|
238
|
+
prune(retentionCount) {
|
|
239
|
+
if (this.buffer.length <= retentionCount) {
|
|
240
|
+
return 0;
|
|
241
|
+
}
|
|
242
|
+
const removeCount = this.buffer.length - retentionCount;
|
|
243
|
+
this.buffer = this.buffer.slice(-retentionCount);
|
|
244
|
+
this.logger.info(`Pruned ${removeCount} old experiences from buffer`);
|
|
245
|
+
return removeCount;
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Get memory usage estimate in bytes
|
|
249
|
+
*/
|
|
250
|
+
getMemoryUsage() {
|
|
251
|
+
return JSON.stringify(this.buffer).length;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
exports.ExperienceReplayBuffer = ExperienceReplayBuffer;
|
|
255
|
+
//# sourceMappingURL=ExperienceReplayBuffer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ExperienceReplayBuffer.js","sourceRoot":"","sources":["../../src/learning/ExperienceReplayBuffer.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAGH,4CAAyC;AAWzC;;GAEG;AACH,MAAM,cAAc,GAAuB;IACzC,OAAO,EAAE,KAAK;IACd,OAAO,EAAE,GAAG;IACZ,WAAW,EAAE,KAAK;CACnB,CAAC;AAWF;;;;;GAKG;AACH,MAAa,sBAAsB;IAMjC,YAAY,SAAsC,EAAE;QAClD,IAAI,CAAC,MAAM,GAAG,eAAM,CAAC,WAAW,EAAE,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QAC/C,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,gBAAgB,GAAG,CAAC,CAAC;IAC5B,CAAC;IAED;;;OAGG;IACH,GAAG,CAAC,UAA0B,EAAE,QAAiB;QAC/C,MAAM,cAAc,GAA0B;YAC5C,UAAU;YACV,QAAQ,EAAE,QAAQ,IAAI,IAAI,CAAC,wBAAwB,CAAC,UAAU,CAAC;YAC/D,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB,CAAC;QAEF,yCAAyC;QACzC,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YAC9C,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,gCAAgC;QACvD,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACjC,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,WAA6B;QACpC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACvB,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,kBAAkB,WAAW,CAAC,MAAM,cAAc,CAAC,CAAC;IACxE,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,SAAiB;QACtB,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CACb,6BAA6B,IAAI,CAAC,MAAM,CAAC,MAAM,gBAAgB;gBAC/D,iBAAiB,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAC5D,CAAC;QACJ,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAC3C,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,SAAiB;QACrC,MAAM,OAAO,GAAqB,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;QAElC,6BAA6B;QAC7B,OAAO,OAAO,CAAC,IAAI,GAAG,SAAS,EAAE,CAAC;YAChC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;gBACzB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,UAAU,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB;QACzC,MAAM,OAAO,GAAqB,EAAE,CAAC;QACrC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QAE9E,8CAA8C;QAC9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,IAAI,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,aAAa,CAAC;YAC3C,IAAI,kBAAkB,GAAG,CAAC,CAAC;YAE3B,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC9B,kBAAkB,IAAI,GAAG,CAAC,QAAQ,CAAC;gBACnC,IAAI,MAAM,IAAI,kBAAkB,EAAE,CAAC;oBACjC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;oBAC7B,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,YAAoB,EAAE,WAAmB;QACtD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CACjC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,KAAK,YAAY,CAC9C,CAAC;QAEF,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,WAAW,CAAC;QAC1C,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACK,wBAAwB,CAAC,UAA0B;QACzD,qFAAqF;QACrF,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,4CAA4C;QAErG,sCAAsC;QACtC,MAAM,YAAY,GAAG,GAAG,CAAC;QAEzB,OAAO,YAAY,GAAG,YAAY,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,SAAiB;QACzB,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACxE,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAa;QACrB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,MAAM;aACf,KAAK,CAAC,CAAC,WAAW,CAAC;aACnB,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAa;QACrB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,MAAM;aACf,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;aACrB,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,SAA2C;QAChD,OAAO,IAAI,CAAC,MAAM;aACf,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;aACxC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,IAAI;QACF,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,OAAO;QACL,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO,IAAI,CAAC,gBAAgB,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,aAAa;QAQX,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM;YAC9E,CAAC,CAAC,CAAC,CAAC;QAEN,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;YACtC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM;YACvF,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YACxB,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO;YACrD,UAAU,EAAE,IAAI,CAAC,gBAAgB;YACjC,WAAW;YACX,SAAS;SACV,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM;QAKJ,OAAO;YACL,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;YACxB,MAAM,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE;YAC1B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAIN;QACC,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC,gBAAgB,CAAC;QAC/C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,+BAA+B,IAAI,CAAC,MAAM,CAAC,MAAM,cAAc,CAAC,CAAC;IACpF,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,cAAsB;QAC1B,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,cAAc,EAAE,CAAC;YACzC,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC;QACxD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,cAAc,CAAC,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,WAAW,8BAA8B,CAAC,CAAC;QACtE,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,cAAc;QACZ,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC5C,CAAC;CACF;AAhSD,wDAgSC"}
|
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
* Uses Q-learning algorithm to optimize task execution strategies.
|
|
6
6
|
*/
|
|
7
7
|
import { SwarmMemoryManager } from '../core/memory/SwarmMemoryManager';
|
|
8
|
-
import {
|
|
8
|
+
import { QLearningConfig } from './QLearning';
|
|
9
|
+
import { LearningConfig, TaskExperience, TaskState, AgentAction, LearningFeedback, LearningOutcome, LearnedPattern, FailurePattern, StrategyRecommendation } from './types';
|
|
9
10
|
/**
|
|
10
11
|
* LearningEngine - Reinforcement learning for agents
|
|
11
12
|
*/
|
|
@@ -15,6 +16,8 @@ export declare class LearningEngine {
|
|
|
15
16
|
private readonly agentId;
|
|
16
17
|
private config;
|
|
17
18
|
private qTable;
|
|
19
|
+
private qLearning?;
|
|
20
|
+
private useQLearning;
|
|
18
21
|
private experiences;
|
|
19
22
|
private patterns;
|
|
20
23
|
private failurePatterns;
|
|
@@ -112,6 +115,14 @@ export declare class LearningEngine {
|
|
|
112
115
|
* Deserialize Q-table from storage
|
|
113
116
|
*/
|
|
114
117
|
private deserializeQTable;
|
|
118
|
+
/**
|
|
119
|
+
* Serialize Q-table for QLearning import (converts to QValue format)
|
|
120
|
+
*/
|
|
121
|
+
private serializeQTableForQLearning;
|
|
122
|
+
/**
|
|
123
|
+
* Deserialize Q-table from QLearning export (extracts values from QValue format)
|
|
124
|
+
*/
|
|
125
|
+
private deserializeQTableFromQLearning;
|
|
115
126
|
/**
|
|
116
127
|
* Calculate state size in bytes
|
|
117
128
|
*/
|
|
@@ -140,5 +151,43 @@ export declare class LearningEngine {
|
|
|
140
151
|
* Check if learning is enabled
|
|
141
152
|
*/
|
|
142
153
|
isEnabled(): boolean;
|
|
154
|
+
/**
|
|
155
|
+
* Enable Q-learning mode (Phase 2 Integration)
|
|
156
|
+
* Switches from basic Q-table to full QLearning algorithm with experience replay
|
|
157
|
+
*/
|
|
158
|
+
enableQLearning(config?: Partial<QLearningConfig>): void;
|
|
159
|
+
/**
|
|
160
|
+
* Disable Q-learning mode (revert to basic implementation)
|
|
161
|
+
*/
|
|
162
|
+
disableQLearning(): void;
|
|
163
|
+
/**
|
|
164
|
+
* Learn from experience using Q-learning (when enabled)
|
|
165
|
+
* This method integrates with the QLearning algorithm
|
|
166
|
+
*/
|
|
167
|
+
learnFromExperience(experience: TaskExperience): Promise<void>;
|
|
168
|
+
/**
|
|
169
|
+
* Select action with policy (Q-learning integration)
|
|
170
|
+
* Uses epsilon-greedy policy when Q-learning is enabled
|
|
171
|
+
*/
|
|
172
|
+
selectActionWithPolicy(state: TaskState, availableActions: AgentAction[]): Promise<AgentAction>;
|
|
173
|
+
/**
|
|
174
|
+
* Get Q-learning statistics (when enabled)
|
|
175
|
+
*/
|
|
176
|
+
getQLearningStats(): {
|
|
177
|
+
enabled: boolean;
|
|
178
|
+
stats?: {
|
|
179
|
+
steps: number;
|
|
180
|
+
episodes: number;
|
|
181
|
+
tableSize: number;
|
|
182
|
+
explorationRate: number;
|
|
183
|
+
avgQValue: number;
|
|
184
|
+
maxQValue: number;
|
|
185
|
+
minQValue: number;
|
|
186
|
+
};
|
|
187
|
+
};
|
|
188
|
+
/**
|
|
189
|
+
* Check if Q-learning mode is enabled
|
|
190
|
+
*/
|
|
191
|
+
isQLearningEnabled(): boolean;
|
|
143
192
|
}
|
|
144
193
|
//# sourceMappingURL=LearningEngine.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LearningEngine.d.ts","sourceRoot":"","sources":["../../src/learning/LearningEngine.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;
|
|
1
|
+
{"version":3,"file":"LearningEngine.d.ts","sourceRoot":"","sources":["../../src/learning/LearningEngine.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AACvE,OAAO,EAAa,eAAe,EAAE,MAAM,aAAa,CAAC;AAKzD,OAAO,EACL,cAAc,EACd,cAAc,EACd,SAAS,EACT,WAAW,EACX,gBAAgB,EAChB,eAAe,EACf,cAAc,EAEd,cAAc,EACd,sBAAsB,EAEvB,MAAM,SAAS,CAAC;AAiBjB;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAqB;IACjD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,SAAS,CAAC,CAAY;IAC9B,OAAO,CAAC,YAAY,CAAU;IAC9B,OAAO,CAAC,WAAW,CAAmB;IACtC,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,eAAe,CAA8B;IACrD,OAAO,CAAC,SAAS,CAAS;gBAGxB,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,kBAAkB,EAC/B,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM;IActC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAgBjC;;OAEG;IACG,kBAAkB,CACtB,IAAI,EAAE,GAAG,EACT,MAAM,EAAE,GAAG,EACX,QAAQ,CAAC,EAAE,gBAAgB,GAC1B,OAAO,CAAC,eAAe,CAAC;IAqD3B;;OAEG;IACG,iBAAiB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAoD1E;;OAEG;IACH,WAAW,IAAI,cAAc,EAAE;IAK/B;;OAEG;IACH,kBAAkB,IAAI,cAAc,EAAE;IAKtC;;OAEG;IACH,OAAO,CAAC,eAAe;IAUvB;;OAEG;IACH,OAAO,CAAC,WAAW;IAKnB;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAyCzB;;OAEG;IACH,OAAO,CAAC,eAAe;IA+BvB;;OAEG;YACW,YAAY;IA4B1B;;OAEG;YACW,kBAAkB;IAgBhC;;OAEG;YACW,cAAc;IAoC5B;;OAEG;YACW,oBAAoB;IAoBlC;;OAEG;YACW,oBAAoB;IAwBlC;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBrB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAc1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAIzB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAOxB;;OAEG;YACW,SAAS;IA6BvB;;OAEG;YACW,SAAS;IAmBvB;;OAEG;IACH,OAAO,CAAC,eAAe;IAQvB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAOzB;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAiBnC;;OAEG;IACH,OAAO,CAAC,8BAA8B;IAWtC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;OAEG;YACW,qBAAqB;IAiBnC;;OAEG;YACW,iBAAiB;IAiB/B;;OAEG;IACH,kBAAkB,IAAI,MAAM;IAI5B;;OAEG;IACH,mBAAmB,IAAI,MAAM;IAI7B;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAIlC;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;;OAGG;IACH,eAAe,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAAG,IAAI;IAgCxD;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAaxB;;;OAGG;IACG,mBAAmB,CAAC,UAAU,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BpE;;;OAGG;IACG,sBAAsB,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC;IAiBrG;;OAEG;IACH,iBAAiB,IAAI;QACnB,OAAO,EAAE,OAAO,CAAC;QACjB,KAAK,CAAC,EAAE;YACN,KAAK,EAAE,MAAM,CAAC;YACd,QAAQ,EAAE,MAAM,CAAC;YACjB,SAAS,EAAE,MAAM,CAAC;YAClB,eAAe,EAAE,MAAM,CAAC;YACxB,SAAS,EAAE,MAAM,CAAC;YAClB,SAAS,EAAE,MAAM,CAAC;YAClB,SAAS,EAAE,MAAM,CAAC;SACnB,CAAC;KACH;IAWD;;OAEG;IACH,kBAAkB,IAAI,OAAO;CAG9B"}
|
|
@@ -9,6 +9,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
9
9
|
exports.LearningEngine = void 0;
|
|
10
10
|
const uuid_1 = require("uuid");
|
|
11
11
|
const Logger_1 = require("../utils/Logger");
|
|
12
|
+
const QLearning_1 = require("./QLearning");
|
|
12
13
|
// Import version from package.json to maintain consistency
|
|
13
14
|
const packageJson = require('../../package.json');
|
|
14
15
|
const PACKAGE_VERSION = packageJson.version;
|
|
@@ -36,6 +37,7 @@ class LearningEngine {
|
|
|
36
37
|
this.memoryStore = memoryStore;
|
|
37
38
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
38
39
|
this.qTable = new Map();
|
|
40
|
+
this.useQLearning = false; // Default to legacy implementation
|
|
39
41
|
this.experiences = [];
|
|
40
42
|
this.patterns = new Map();
|
|
41
43
|
this.failurePatterns = new Map();
|
|
@@ -459,6 +461,38 @@ class LearningEngine {
|
|
|
459
461
|
this.qTable.set(state, new Map(Object.entries(actions)));
|
|
460
462
|
}
|
|
461
463
|
}
|
|
464
|
+
/**
|
|
465
|
+
* Serialize Q-table for QLearning import (converts to QValue format)
|
|
466
|
+
*/
|
|
467
|
+
serializeQTableForQLearning() {
|
|
468
|
+
const serialized = {};
|
|
469
|
+
for (const [state, actions] of this.qTable.entries()) {
|
|
470
|
+
serialized[state] = {};
|
|
471
|
+
for (const [action, value] of actions.entries()) {
|
|
472
|
+
serialized[state][action] = {
|
|
473
|
+
state,
|
|
474
|
+
action,
|
|
475
|
+
value,
|
|
476
|
+
updateCount: 1,
|
|
477
|
+
lastUpdated: Date.now()
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
return serialized;
|
|
482
|
+
}
|
|
483
|
+
/**
|
|
484
|
+
* Deserialize Q-table from QLearning export (extracts values from QValue format)
|
|
485
|
+
*/
|
|
486
|
+
deserializeQTableFromQLearning(data) {
|
|
487
|
+
this.qTable.clear();
|
|
488
|
+
for (const [state, actions] of Object.entries(data)) {
|
|
489
|
+
const actionMap = new Map();
|
|
490
|
+
for (const [action, qValue] of Object.entries(actions)) {
|
|
491
|
+
actionMap.set(action, qValue.value);
|
|
492
|
+
}
|
|
493
|
+
this.qTable.set(state, actionMap);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
462
496
|
/**
|
|
463
497
|
* Calculate state size in bytes
|
|
464
498
|
*/
|
|
@@ -529,6 +563,112 @@ class LearningEngine {
|
|
|
529
563
|
isEnabled() {
|
|
530
564
|
return this.config.enabled;
|
|
531
565
|
}
|
|
566
|
+
/**
|
|
567
|
+
* Enable Q-learning mode (Phase 2 Integration)
|
|
568
|
+
* Switches from basic Q-table to full QLearning algorithm with experience replay
|
|
569
|
+
*/
|
|
570
|
+
enableQLearning(config) {
|
|
571
|
+
const qLearningConfig = {
|
|
572
|
+
learningRate: this.config.learningRate,
|
|
573
|
+
discountFactor: this.config.discountFactor,
|
|
574
|
+
explorationRate: this.config.explorationRate,
|
|
575
|
+
explorationDecay: this.config.explorationDecay,
|
|
576
|
+
minExplorationRate: this.config.minExplorationRate,
|
|
577
|
+
useExperienceReplay: true,
|
|
578
|
+
replayBufferSize: 10000,
|
|
579
|
+
batchSize: this.config.batchSize,
|
|
580
|
+
...config
|
|
581
|
+
};
|
|
582
|
+
this.qLearning = new QLearning_1.QLearning(qLearningConfig);
|
|
583
|
+
this.useQLearning = true;
|
|
584
|
+
// Import existing Q-table into QLearning if we have data
|
|
585
|
+
if (this.qTable.size > 0) {
|
|
586
|
+
const serialized = this.serializeQTableForQLearning();
|
|
587
|
+
this.qLearning.import({
|
|
588
|
+
qTable: serialized,
|
|
589
|
+
config: qLearningConfig,
|
|
590
|
+
stepCount: this.taskCount,
|
|
591
|
+
episodeCount: Math.floor(this.taskCount / 10)
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
this.logger.info(`Q-learning mode enabled for agent ${this.agentId}`, {
|
|
595
|
+
config: qLearningConfig
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Disable Q-learning mode (revert to basic implementation)
|
|
600
|
+
*/
|
|
601
|
+
disableQLearning() {
|
|
602
|
+
if (this.qLearning && this.useQLearning) {
|
|
603
|
+
// Export Q-learning state to basic Q-table
|
|
604
|
+
const exported = this.qLearning.export();
|
|
605
|
+
this.deserializeQTableFromQLearning(exported.qTable);
|
|
606
|
+
}
|
|
607
|
+
this.qLearning = undefined;
|
|
608
|
+
this.useQLearning = false;
|
|
609
|
+
this.logger.info(`Q-learning mode disabled for agent ${this.agentId}`);
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Learn from experience using Q-learning (when enabled)
|
|
613
|
+
* This method integrates with the QLearning algorithm
|
|
614
|
+
*/
|
|
615
|
+
async learnFromExperience(experience) {
|
|
616
|
+
if (!this.config.enabled) {
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
if (this.useQLearning && this.qLearning) {
|
|
620
|
+
// Use QLearning algorithm
|
|
621
|
+
this.qLearning.update(experience);
|
|
622
|
+
this.experiences.push(experience);
|
|
623
|
+
// Perform batch update periodically
|
|
624
|
+
if (this.taskCount % this.config.updateFrequency === 0) {
|
|
625
|
+
this.qLearning.batchUpdate();
|
|
626
|
+
}
|
|
627
|
+
// End episode periodically to trigger exploration decay
|
|
628
|
+
if (this.taskCount % 10 === 0) {
|
|
629
|
+
this.qLearning.endEpisode();
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
else {
|
|
633
|
+
// Use legacy Q-table implementation
|
|
634
|
+
await this.updateQTable(experience);
|
|
635
|
+
this.experiences.push(experience);
|
|
636
|
+
}
|
|
637
|
+
this.taskCount++;
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Select action with policy (Q-learning integration)
|
|
641
|
+
* Uses epsilon-greedy policy when Q-learning is enabled
|
|
642
|
+
*/
|
|
643
|
+
async selectActionWithPolicy(state, availableActions) {
|
|
644
|
+
if (this.useQLearning && this.qLearning) {
|
|
645
|
+
// Use Q-learning's epsilon-greedy policy
|
|
646
|
+
return this.qLearning.selectAction(state, availableActions);
|
|
647
|
+
}
|
|
648
|
+
// Fallback to recommendation-based selection
|
|
649
|
+
const recommendation = await this.recommendStrategy(state);
|
|
650
|
+
// Find the action matching the recommended strategy
|
|
651
|
+
const matchingAction = availableActions.find(action => action.strategy === recommendation.strategy);
|
|
652
|
+
return matchingAction || availableActions[0];
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Get Q-learning statistics (when enabled)
|
|
656
|
+
*/
|
|
657
|
+
getQLearningStats() {
|
|
658
|
+
if (!this.useQLearning || !this.qLearning) {
|
|
659
|
+
return { enabled: false };
|
|
660
|
+
}
|
|
661
|
+
return {
|
|
662
|
+
enabled: true,
|
|
663
|
+
stats: this.qLearning.getStatistics()
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Check if Q-learning mode is enabled
|
|
668
|
+
*/
|
|
669
|
+
isQLearningEnabled() {
|
|
670
|
+
return this.useQLearning;
|
|
671
|
+
}
|
|
532
672
|
}
|
|
533
673
|
exports.LearningEngine = LearningEngine;
|
|
534
674
|
//# sourceMappingURL=LearningEngine.js.map
|