agentdb 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/dist/mcp/learning/core/experience-buffer.d.ts +61 -0
- package/dist/mcp/learning/core/experience-buffer.d.ts.map +1 -0
- package/dist/mcp/learning/core/experience-buffer.js +175 -0
- package/dist/mcp/learning/core/experience-buffer.js.map +1 -0
- package/dist/mcp/learning/core/experience-buffer.mjs +170 -0
- package/dist/mcp/learning/core/experience-recorder.d.ts +40 -0
- package/dist/mcp/learning/core/experience-recorder.d.ts.map +1 -0
- package/dist/mcp/learning/core/experience-recorder.js +200 -0
- package/dist/mcp/learning/core/experience-recorder.js.map +1 -0
- package/dist/mcp/learning/core/experience-recorder.mjs +195 -0
- package/dist/mcp/learning/core/learning-manager.d.ts +66 -0
- package/dist/mcp/learning/core/learning-manager.d.ts.map +1 -0
- package/dist/mcp/learning/core/learning-manager.js +252 -0
- package/dist/mcp/learning/core/learning-manager.js.map +1 -0
- package/dist/mcp/learning/core/learning-manager.mjs +247 -0
- package/dist/mcp/learning/core/policy-optimizer.d.ts +53 -0
- package/dist/mcp/learning/core/policy-optimizer.d.ts.map +1 -0
- package/dist/mcp/learning/core/policy-optimizer.js +251 -0
- package/dist/mcp/learning/core/policy-optimizer.js.map +1 -0
- package/dist/mcp/learning/core/policy-optimizer.mjs +246 -0
- package/dist/mcp/learning/core/reward-estimator.d.ts +44 -0
- package/dist/mcp/learning/core/reward-estimator.d.ts.map +1 -0
- package/dist/mcp/learning/core/reward-estimator.js +158 -0
- package/dist/mcp/learning/core/reward-estimator.js.map +1 -0
- package/dist/mcp/learning/core/reward-estimator.mjs +153 -0
- package/dist/mcp/learning/core/session-manager.d.ts +63 -0
- package/dist/mcp/learning/core/session-manager.d.ts.map +1 -0
- package/dist/mcp/learning/core/session-manager.js +202 -0
- package/dist/mcp/learning/core/session-manager.js.map +1 -0
- package/dist/mcp/learning/core/session-manager.mjs +197 -0
- package/dist/mcp/learning/index.d.ts +19 -0
- package/dist/mcp/learning/index.d.ts.map +1 -0
- package/dist/mcp/learning/index.js +30 -0
- package/dist/mcp/learning/index.js.map +1 -0
- package/dist/mcp/learning/index.mjs +19 -0
- package/dist/mcp/learning/tools/mcp-learning-tools.d.ts +369 -0
- package/dist/mcp/learning/tools/mcp-learning-tools.d.ts.map +1 -0
- package/dist/mcp/learning/tools/mcp-learning-tools.js +361 -0
- package/dist/mcp/learning/tools/mcp-learning-tools.js.map +1 -0
- package/dist/mcp/learning/tools/mcp-learning-tools.mjs +356 -0
- package/dist/mcp/learning/types/index.d.ts +138 -0
- package/dist/mcp/learning/types/index.d.ts.map +1 -0
- package/dist/mcp/learning/types/index.js +6 -0
- package/dist/mcp/learning/types/index.js.map +1 -0
- package/dist/mcp/learning/types/index.mjs +4 -0
- package/dist/mcp-server.d.ts +2 -0
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js +72 -4
- package/dist/mcp-server.js.map +1 -1
- package/dist/mcp-server.mjs +72 -4
- package/examples/mcp-learning-example.ts +220 -0
- package/package.json +1 -1
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LearningManager - Main orchestration layer for MCP learning integration
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.LearningManager = void 0;
|
|
7
|
+
const experience_recorder_js_1 = require("./experience-recorder.js");
|
|
8
|
+
const reward_estimator_js_1 = require("./reward-estimator.js");
|
|
9
|
+
const session_manager_js_1 = require("./session-manager.js");
|
|
10
|
+
const policy_optimizer_js_1 = require("./policy-optimizer.js");
|
|
11
|
+
class LearningManager {
|
|
12
|
+
constructor(db) {
|
|
13
|
+
this.policyOptimizers = new Map();
|
|
14
|
+
this.db = db;
|
|
15
|
+
this.experienceRecorder = new experience_recorder_js_1.ExperienceRecorder(db);
|
|
16
|
+
this.rewardEstimator = new reward_estimator_js_1.RewardEstimator();
|
|
17
|
+
this.sessionManager = new session_manager_js_1.SessionManager(db);
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Start a new learning session
|
|
21
|
+
*/
|
|
22
|
+
async startSession(userId, sessionType, plugin = 'q-learning', config = {}) {
|
|
23
|
+
const session = await this.sessionManager.createSession(userId, sessionType, plugin, config);
|
|
24
|
+
// Initialize policy optimizer for this session
|
|
25
|
+
const optimizer = new policy_optimizer_js_1.PolicyOptimizer(config.learningRate || 0.1, config.discountFactor || 0.95, config.bufferSize || 10000);
|
|
26
|
+
this.policyOptimizers.set(session.sessionId, optimizer);
|
|
27
|
+
return session;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* End a learning session
|
|
31
|
+
*/
|
|
32
|
+
async endSession(sessionId) {
|
|
33
|
+
// Export and save policy BEFORE ending session
|
|
34
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
35
|
+
if (optimizer) {
|
|
36
|
+
const policy = optimizer.exportPolicy();
|
|
37
|
+
await this.sessionManager.updateSessionPolicy(sessionId, policy);
|
|
38
|
+
this.policyOptimizers.delete(sessionId);
|
|
39
|
+
}
|
|
40
|
+
// Now end the session
|
|
41
|
+
const session = await this.sessionManager.endSession(sessionId);
|
|
42
|
+
return session;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Record a tool execution as learning experience
|
|
46
|
+
*/
|
|
47
|
+
async recordExperience(sessionId, toolName, args, result, outcome) {
|
|
48
|
+
const session = this.sessionManager.getSession(sessionId);
|
|
49
|
+
if (!session) {
|
|
50
|
+
throw new Error(`Session ${sessionId} not found`);
|
|
51
|
+
}
|
|
52
|
+
const context = {
|
|
53
|
+
userId: session.userId,
|
|
54
|
+
sessionId: session.sessionId,
|
|
55
|
+
taskType: session.sessionType,
|
|
56
|
+
timestamp: Date.now(),
|
|
57
|
+
isTerminal: outcome.success || !!outcome.error,
|
|
58
|
+
};
|
|
59
|
+
const experience = await this.experienceRecorder.recordToolExecution(toolName, args, result, context, outcome);
|
|
60
|
+
// Update policy with new experience
|
|
61
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
62
|
+
if (optimizer) {
|
|
63
|
+
await optimizer.updatePolicy(experience);
|
|
64
|
+
}
|
|
65
|
+
// Increment session experience count
|
|
66
|
+
this.sessionManager.incrementExperienceCount(sessionId);
|
|
67
|
+
return experience;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Predict next best action
|
|
71
|
+
*/
|
|
72
|
+
async predictAction(sessionId, currentState, availableTools) {
|
|
73
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
74
|
+
if (!optimizer) {
|
|
75
|
+
throw new Error(`No policy optimizer for session ${sessionId}`);
|
|
76
|
+
}
|
|
77
|
+
return await optimizer.predictAction(currentState, availableTools);
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Provide user feedback on action
|
|
81
|
+
*/
|
|
82
|
+
async provideFeedback(sessionId, actionId, feedback) {
|
|
83
|
+
// Calculate feedback-adjusted reward
|
|
84
|
+
const normalizedRating = feedback.rating / 5.0; // Assume 0-5 scale
|
|
85
|
+
await this.experienceRecorder.updateExperienceReward(actionId, normalizedRating);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Train policy on collected experiences
|
|
89
|
+
*/
|
|
90
|
+
async train(sessionId, options = {}) {
|
|
91
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
92
|
+
if (!optimizer) {
|
|
93
|
+
throw new Error(`No policy optimizer for session ${sessionId}`);
|
|
94
|
+
}
|
|
95
|
+
return await optimizer.train(options);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Get learning metrics
|
|
99
|
+
*/
|
|
100
|
+
async getMetrics(sessionId, period = 'session') {
|
|
101
|
+
const experiences = await this.experienceRecorder.getSessionExperiences(sessionId);
|
|
102
|
+
if (experiences.length === 0) {
|
|
103
|
+
return {
|
|
104
|
+
period,
|
|
105
|
+
totalExperiences: 0,
|
|
106
|
+
averageReward: 0,
|
|
107
|
+
successRate: 0,
|
|
108
|
+
learningProgress: {
|
|
109
|
+
initial: 0,
|
|
110
|
+
current: 0,
|
|
111
|
+
improvement: '0%',
|
|
112
|
+
},
|
|
113
|
+
topActions: [],
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// Calculate metrics
|
|
117
|
+
const rewards = experiences.map((exp) => exp.reward);
|
|
118
|
+
const avgReward = rewards.reduce((sum, r) => sum + r, 0) / rewards.length;
|
|
119
|
+
const successCount = experiences.filter((exp) => exp.reward > 0.5).length;
|
|
120
|
+
const successRate = successCount / experiences.length;
|
|
121
|
+
// Calculate learning progress (first 10 vs last 10)
|
|
122
|
+
const firstBatch = experiences.slice(0, 10);
|
|
123
|
+
const lastBatch = experiences.slice(-10);
|
|
124
|
+
const initialReward = firstBatch.reduce((sum, exp) => sum + exp.reward, 0) / firstBatch.length;
|
|
125
|
+
const currentReward = lastBatch.reduce((sum, exp) => sum + exp.reward, 0) / lastBatch.length;
|
|
126
|
+
const improvement = initialReward > 0
|
|
127
|
+
? (((currentReward - initialReward) / initialReward) * 100).toFixed(1)
|
|
128
|
+
: '0';
|
|
129
|
+
// Calculate top actions
|
|
130
|
+
const actionStats = new Map();
|
|
131
|
+
for (const exp of experiences) {
|
|
132
|
+
const tool = exp.action.tool;
|
|
133
|
+
const stats = actionStats.get(tool) || {
|
|
134
|
+
count: 0,
|
|
135
|
+
totalReward: 0,
|
|
136
|
+
successCount: 0,
|
|
137
|
+
};
|
|
138
|
+
stats.count++;
|
|
139
|
+
stats.totalReward += exp.reward;
|
|
140
|
+
if (exp.reward > 0.5)
|
|
141
|
+
stats.successCount++;
|
|
142
|
+
actionStats.set(tool, stats);
|
|
143
|
+
}
|
|
144
|
+
const topActions = Array.from(actionStats.entries())
|
|
145
|
+
.map(([tool, stats]) => ({
|
|
146
|
+
tool,
|
|
147
|
+
successRate: stats.successCount / stats.count,
|
|
148
|
+
avgReward: stats.totalReward / stats.count,
|
|
149
|
+
count: stats.count,
|
|
150
|
+
}))
|
|
151
|
+
.sort((a, b) => b.avgReward - a.avgReward)
|
|
152
|
+
.slice(0, 5);
|
|
153
|
+
return {
|
|
154
|
+
period,
|
|
155
|
+
totalExperiences: experiences.length,
|
|
156
|
+
averageReward: avgReward,
|
|
157
|
+
successRate,
|
|
158
|
+
learningProgress: {
|
|
159
|
+
initial: initialReward,
|
|
160
|
+
current: currentReward,
|
|
161
|
+
improvement: `${improvement}%`,
|
|
162
|
+
},
|
|
163
|
+
topActions,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Transfer learning between tasks
|
|
168
|
+
*/
|
|
169
|
+
async transferLearning(sourceSessionId, targetSessionId, similarity = 0.7) {
|
|
170
|
+
const sourceOptimizer = this.policyOptimizers.get(sourceSessionId);
|
|
171
|
+
const targetOptimizer = this.policyOptimizers.get(targetSessionId);
|
|
172
|
+
if (!sourceOptimizer || !targetOptimizer) {
|
|
173
|
+
throw new Error('Source or target session not found');
|
|
174
|
+
}
|
|
175
|
+
// Export source policy
|
|
176
|
+
const sourcePolicy = sourceOptimizer.exportPolicy();
|
|
177
|
+
// Import into target (with similarity-based weighting)
|
|
178
|
+
const targetPolicy = targetOptimizer.exportPolicy();
|
|
179
|
+
// Merge policies (simplified - in production would use more sophisticated transfer)
|
|
180
|
+
const mergedQTable = { ...targetPolicy.qTable };
|
|
181
|
+
for (const [stateKey, actions] of Object.entries(sourcePolicy.qTable)) {
|
|
182
|
+
if (!mergedQTable[stateKey]) {
|
|
183
|
+
mergedQTable[stateKey] = {};
|
|
184
|
+
}
|
|
185
|
+
for (const [action, value] of Object.entries(actions)) {
|
|
186
|
+
const currentValue = mergedQTable[stateKey][action] || 0;
|
|
187
|
+
// Weighted average based on similarity
|
|
188
|
+
mergedQTable[stateKey][action] =
|
|
189
|
+
currentValue * (1 - similarity) + value * similarity;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
targetOptimizer.importPolicy({ ...targetPolicy, qTable: mergedQTable });
|
|
193
|
+
const sourceSession = this.sessionManager.getSession(sourceSessionId);
|
|
194
|
+
const targetSession = this.sessionManager.getSession(targetSessionId);
|
|
195
|
+
return {
|
|
196
|
+
sourceTask: sourceSession?.sessionType || 'unknown',
|
|
197
|
+
targetTask: targetSession?.sessionType || 'unknown',
|
|
198
|
+
similarity,
|
|
199
|
+
transferSuccess: true,
|
|
200
|
+
performanceGain: similarity * 0.3, // Estimated gain
|
|
201
|
+
experiencesTransferred: Object.keys(sourcePolicy.qTable).length,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Explain a prediction
|
|
206
|
+
*/
|
|
207
|
+
async explainPrediction(sessionId, state) {
|
|
208
|
+
// Get similar experiences
|
|
209
|
+
const similarExperiences = await this.experienceRecorder.retrieveSimilarExperiences(state, 5);
|
|
210
|
+
// Calculate confidence factors
|
|
211
|
+
const confidenceFactors = {
|
|
212
|
+
experienceCount: Math.min(1.0, similarExperiences.length / 10),
|
|
213
|
+
avgReward: similarExperiences.reduce((sum, exp) => sum + exp.reward, 0) /
|
|
214
|
+
(similarExperiences.length || 1),
|
|
215
|
+
consistency: this.calculateConsistency(similarExperiences),
|
|
216
|
+
};
|
|
217
|
+
const reasoning = `Based on ${similarExperiences.length} similar past experiences with average reward ${confidenceFactors.avgReward.toFixed(2)}. Action consistency: ${(confidenceFactors.consistency * 100).toFixed(0)}%.`;
|
|
218
|
+
return {
|
|
219
|
+
reasoning,
|
|
220
|
+
similarExperiences,
|
|
221
|
+
confidenceFactors,
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Calculate consistency of actions in similar experiences
|
|
226
|
+
*/
|
|
227
|
+
calculateConsistency(experiences) {
|
|
228
|
+
if (experiences.length === 0)
|
|
229
|
+
return 0;
|
|
230
|
+
const actionCounts = new Map();
|
|
231
|
+
for (const exp of experiences) {
|
|
232
|
+
const tool = exp.action.tool;
|
|
233
|
+
actionCounts.set(tool, (actionCounts.get(tool) || 0) + 1);
|
|
234
|
+
}
|
|
235
|
+
const maxCount = Math.max(...Array.from(actionCounts.values()));
|
|
236
|
+
return maxCount / experiences.length;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Get session info
|
|
240
|
+
*/
|
|
241
|
+
getSessionInfo(sessionId) {
|
|
242
|
+
return this.sessionManager.getSession(sessionId);
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Restore sessions from database
|
|
246
|
+
*/
|
|
247
|
+
async restoreSessions(userId) {
|
|
248
|
+
return await this.sessionManager.restoreSessions(userId);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
exports.LearningManager = LearningManager;
|
|
252
|
+
//# sourceMappingURL=learning-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"learning-manager.js","sourceRoot":"","sources":["../../../../src/mcp/learning/core/learning-manager.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAgBH,qEAA8D;AAC9D,+DAAwD;AACxD,6DAAsD;AACtD,+DAAwD;AAExD,MAAa,eAAe;IAO1B,YAAY,EAAkB;QAFtB,qBAAgB,GAAiC,IAAI,GAAG,EAAE,CAAC;QAGjE,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,kBAAkB,GAAG,IAAI,2CAAkB,CAAC,EAAE,CAAC,CAAC;QACrD,IAAI,CAAC,eAAe,GAAG,IAAI,qCAAe,EAAE,CAAC;QAC7C,IAAI,CAAC,cAAc,GAAG,IAAI,mCAAc,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAChB,MAAc,EACd,WAA4D,EAC5D,SAAiB,YAAY,EAC7B,SAA8B,EAAE;QAEhC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,aAAa,CACrD,MAAM,EACN,WAAW,EACX,MAAM,EACN,MAAM,CACP,CAAC;QAEF,+CAA+C;QAC/C,MAAM,SAAS,GAAG,IAAI,qCAAe,CACnC,MAAM,CAAC,YAAY,IAAI,GAAG,EAC1B,MAAM,CAAC,cAAc,IAAI,IAAI,EAC7B,MAAM,CAAC,UAAU,IAAI,KAAK,CAC3B,CAAC;QACF,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAExD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,SAAiB;QAChC,+CAA+C;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,MAAM,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC;YACxC,MAAM,IAAI,CAAC,cAAc,CAAC,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACjE,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC1C,CAAC;QAED,sBAAsB;QACtB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAChE,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CACpB,SAAiB,EACjB,QAAgB,EAChB,IAAS,EACT,MAAW,EACX,OAAgB;QAEhB,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,WAAW,SAAS,YAAY,CAAC,CAAC;QACpD,CAAC;QAED,MAAM,OAAO,GAAqB;YAChC,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,QAAQ,EAAE,OAAO,CAAC,WAAW;YAC7B,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,UAAU,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK;SAC/C,CAAC;QAEF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,mBAAmB,CAClE,QAAQ,EACR,IAAI,EACJ,MAAM,EACN,OAAO,EACP,OAAO,CACR,CAAC;QAEF,oCAAoC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,SAAS,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,cAAc,CAAC,wBAAwB,CAAC,SAAS,CAAC,CAAC;QAExD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CACjB,SAAiB,EACjB,YAAmB,EACnB,cAAwB;QAExB,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,MAAM,SAAS,CAAC,aAAa,CAAC,YAAY,EAAE,cAAc,CAAC,CAAC;IACrE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CACnB,SAAiB,EACjB,QAAgB,EAChB,QAAuB;QAEvB,qCAAqC;QACrC,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,mBAAmB;QAEnE,MAAM,IAAI,CAAC,kBAAkB,CAAC,sBAAsB,CAClD,QAAQ,EACR,gBAAgB,CACjB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CACT,SAAiB,EACjB,UAA2B,EAAE;QAE7B,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,MAAM,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CACd,SAAiB,EACjB,SAAuD,SAAS;QAEhE,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,qBAAqB,CACrE,SAAS,CACV,CAAC;QAEF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;gBACL,MAAM;gBACN,gBAAgB,EAAE,CAAC;gBACnB,aAAa,EAAE,CAAC;gBAChB,WAAW,EAAE,CAAC;gBACd,gBAAgB,EAAE;oBAChB,OAAO,EAAE,CAAC;oBACV,OAAO,EAAE,CAAC;oBACV,WAAW,EAAE,IAAI;iBAClB;gBACD,UAAU,EAAE,EAAE;aACf,CAAC;QACJ,CAAC;QAED,oBAAoB;QACpB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACrD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QAE1E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC;QAC1E,MAAM,WAAW,GAAG,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC;QAEtD,oDAAoD;QACpD,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;QACzC,MAAM,aAAa,GACjB,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QAC3E,MAAM,aAAa,GACjB,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;QACzE,MAAM,WAAW,GACf,aAAa,GAAG,CAAC;YACf,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,aAAa,CAAC,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YACtE,CAAC,CAAC,GAAG,CAAC;QAEV,wBAAwB;QACxB,MAAM,WAAW,GAAG,IAAI,GAAG,EAGxB,CAAC;QAEJ,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;YAC7B,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI;gBACrC,KAAK,EAAE,CAAC;gBACR,WAAW,EAAE,CAAC;gBACd,YAAY,EAAE,CAAC;aAChB,CAAC;YACF,KAAK,CAAC,KAAK,EAAE,CAAC;YACd,KAAK,CAAC,WAAW,IAAI,GAAG,CAAC,MAAM,CAAC;YAChC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;gBAAE,KAAK,CAAC,YAAY,EAAE,CAAC;YAC3C,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;aACjD,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;YACvB,IAAI;YACJ,WAAW,EAAE,KAAK,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK;YAC7C,SAAS,EAAE,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK;YAC1C,KAAK,EAAE,KAAK,CAAC,KAAK;SACnB,CAAC,CAAC;aACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;aACzC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEf,OAAO;YACL,MAAM;YACN,gBAAgB,EAAE,WAAW,CAAC,MAAM;YACpC,aAAa,EAAE,SAAS;YACxB,WAAW;YACX,gBAAgB,EAAE;gBAChB,OAAO,EAAE,aAAa;gBACtB,OAAO,EAAE,aAAa;gBACtB,WAAW,EAAE,GAAG,WAAW,GAAG;aAC/B;YACD,UAAU;SACX,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CACpB,eAAuB,EACvB,eAAuB,EACvB,aAAqB,GAAG;QAExB,MAAM,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QACnE,MAAM,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QAEnE,IAAI,CAAC,eAAe,IAAI,CAAC,eAAe,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,uBAAuB;QACvB,MAAM,YAAY,GAAG,eAAe,CAAC,YAAY,EAAE,CAAC;QAEpD,uDAAuD;QACvD,MAAM,YAAY,GAAG,eAAe,CAAC,YAAY,EAAE,CAAC;QAEpD,oFAAoF;QACpF,MAAM,YAAY,GAAQ,EAAE,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAErD,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YACtE,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;YAC9B,CAAC;YAED,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAc,CAAC,EAAE,CAAC;gBAC7D,MAAM,YAAY,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBACzD,uCAAuC;gBACvC,YAAY,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;oBAC5B,YAAY,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,GAAI,KAAgB,GAAG,UAAU,CAAC;YACrE,CAAC;QACH,CAAC;QAED,eAAe,CAAC,YAAY,CAAC,EAAE,GAAG,YAAY,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC;QAExE,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QACtE,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QAEtE,OAAO;YACL,UAAU,EAAE,aAAa,EAAE,WAAW,IAAI,SAAS;YACnD,UAAU,EAAE,aAAa,EAAE,WAAW,IAAI,SAAS;YACnD,UAAU;YACV,eAAe,EAAE,IAAI;YACrB,eAAe,EAAE,UAAU,GAAG,GAAG,EAAE,iBAAiB;YACpD,sBAAsB,EAAE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,MAAM;SAChE,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB,CACrB,SAAiB,EACjB,KAAY;QAMZ,0BAA0B;QAC1B,MAAM,kBAAkB,GACtB,MAAM,IAAI,CAAC,kBAAkB,CAAC,0BAA0B,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAErE,+BAA+B;QAC/B,MAAM,iBAAiB,GAA2B;YAChD,eAAe,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,kBAAkB,CAAC,MAAM,GAAG,EAAE,CAAC;YAC9D,SAAS,EACP,kBAAkB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC5D,CAAC,kBAAkB,CAAC,MAAM,IAAI,CAAC,CAAC;YAClC,WAAW,EAAE,IAAI,CAAC,oBAAoB,CAAC,kBAAkB,CAAC;SAC3D,CAAC;QAEF,MAAM,SAAS,GAAG,YAAY,kBAAkB,CAAC,MAAM,iDAAiD,iBAAiB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,yBAAyB,CAAC,iBAAiB,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;QAE5N,OAAO;YACL,SAAS;YACT,kBAAkB;YAClB,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,WAAyB;QACpD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEvC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;YAC7B,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAChE,OAAO,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,SAAiB;QAC9B,OAAO,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CAAC,MAAe;QACnC,OAAO,MAAM,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IAC3D,CAAC;CACF;AA/VD,0CA+VC"}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LearningManager - Main orchestration layer for MCP learning integration
|
|
3
|
+
*/
|
|
4
|
+
import { ExperienceRecorder } from './experience-recorder.mjs';
|
|
5
|
+
import { RewardEstimator } from './reward-estimator.mjs';
|
|
6
|
+
import { SessionManager } from './session-manager.mjs';
|
|
7
|
+
import { PolicyOptimizer } from './policy-optimizer.mjs';
|
|
8
|
+
export class LearningManager {
|
|
9
|
+
constructor(db) {
|
|
10
|
+
this.policyOptimizers = new Map();
|
|
11
|
+
this.db = db;
|
|
12
|
+
this.experienceRecorder = new ExperienceRecorder(db);
|
|
13
|
+
this.rewardEstimator = new RewardEstimator();
|
|
14
|
+
this.sessionManager = new SessionManager(db);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Start a new learning session
|
|
18
|
+
*/
|
|
19
|
+
async startSession(userId, sessionType, plugin = 'q-learning', config = {}) {
|
|
20
|
+
const session = await this.sessionManager.createSession(userId, sessionType, plugin, config);
|
|
21
|
+
// Initialize policy optimizer for this session
|
|
22
|
+
const optimizer = new PolicyOptimizer(config.learningRate || 0.1, config.discountFactor || 0.95, config.bufferSize || 10000);
|
|
23
|
+
this.policyOptimizers.set(session.sessionId, optimizer);
|
|
24
|
+
return session;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* End a learning session
|
|
28
|
+
*/
|
|
29
|
+
async endSession(sessionId) {
|
|
30
|
+
// Export and save policy BEFORE ending session
|
|
31
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
32
|
+
if (optimizer) {
|
|
33
|
+
const policy = optimizer.exportPolicy();
|
|
34
|
+
await this.sessionManager.updateSessionPolicy(sessionId, policy);
|
|
35
|
+
this.policyOptimizers.delete(sessionId);
|
|
36
|
+
}
|
|
37
|
+
// Now end the session
|
|
38
|
+
const session = await this.sessionManager.endSession(sessionId);
|
|
39
|
+
return session;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Record a tool execution as learning experience
|
|
43
|
+
*/
|
|
44
|
+
async recordExperience(sessionId, toolName, args, result, outcome) {
|
|
45
|
+
const session = this.sessionManager.getSession(sessionId);
|
|
46
|
+
if (!session) {
|
|
47
|
+
throw new Error(`Session ${sessionId} not found`);
|
|
48
|
+
}
|
|
49
|
+
const context = {
|
|
50
|
+
userId: session.userId,
|
|
51
|
+
sessionId: session.sessionId,
|
|
52
|
+
taskType: session.sessionType,
|
|
53
|
+
timestamp: Date.now(),
|
|
54
|
+
isTerminal: outcome.success || !!outcome.error,
|
|
55
|
+
};
|
|
56
|
+
const experience = await this.experienceRecorder.recordToolExecution(toolName, args, result, context, outcome);
|
|
57
|
+
// Update policy with new experience
|
|
58
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
59
|
+
if (optimizer) {
|
|
60
|
+
await optimizer.updatePolicy(experience);
|
|
61
|
+
}
|
|
62
|
+
// Increment session experience count
|
|
63
|
+
this.sessionManager.incrementExperienceCount(sessionId);
|
|
64
|
+
return experience;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Predict next best action
|
|
68
|
+
*/
|
|
69
|
+
async predictAction(sessionId, currentState, availableTools) {
|
|
70
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
71
|
+
if (!optimizer) {
|
|
72
|
+
throw new Error(`No policy optimizer for session ${sessionId}`);
|
|
73
|
+
}
|
|
74
|
+
return await optimizer.predictAction(currentState, availableTools);
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Provide user feedback on action
|
|
78
|
+
*/
|
|
79
|
+
async provideFeedback(sessionId, actionId, feedback) {
|
|
80
|
+
// Calculate feedback-adjusted reward
|
|
81
|
+
const normalizedRating = feedback.rating / 5.0; // Assume 0-5 scale
|
|
82
|
+
await this.experienceRecorder.updateExperienceReward(actionId, normalizedRating);
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Train policy on collected experiences
|
|
86
|
+
*/
|
|
87
|
+
async train(sessionId, options = {}) {
|
|
88
|
+
const optimizer = this.policyOptimizers.get(sessionId);
|
|
89
|
+
if (!optimizer) {
|
|
90
|
+
throw new Error(`No policy optimizer for session ${sessionId}`);
|
|
91
|
+
}
|
|
92
|
+
return await optimizer.train(options);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Get learning metrics
|
|
96
|
+
*/
|
|
97
|
+
async getMetrics(sessionId, period = 'session') {
|
|
98
|
+
const experiences = await this.experienceRecorder.getSessionExperiences(sessionId);
|
|
99
|
+
if (experiences.length === 0) {
|
|
100
|
+
return {
|
|
101
|
+
period,
|
|
102
|
+
totalExperiences: 0,
|
|
103
|
+
averageReward: 0,
|
|
104
|
+
successRate: 0,
|
|
105
|
+
learningProgress: {
|
|
106
|
+
initial: 0,
|
|
107
|
+
current: 0,
|
|
108
|
+
improvement: '0%',
|
|
109
|
+
},
|
|
110
|
+
topActions: [],
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
// Calculate metrics
|
|
114
|
+
const rewards = experiences.map((exp) => exp.reward);
|
|
115
|
+
const avgReward = rewards.reduce((sum, r) => sum + r, 0) / rewards.length;
|
|
116
|
+
const successCount = experiences.filter((exp) => exp.reward > 0.5).length;
|
|
117
|
+
const successRate = successCount / experiences.length;
|
|
118
|
+
// Calculate learning progress (first 10 vs last 10)
|
|
119
|
+
const firstBatch = experiences.slice(0, 10);
|
|
120
|
+
const lastBatch = experiences.slice(-10);
|
|
121
|
+
const initialReward = firstBatch.reduce((sum, exp) => sum + exp.reward, 0) / firstBatch.length;
|
|
122
|
+
const currentReward = lastBatch.reduce((sum, exp) => sum + exp.reward, 0) / lastBatch.length;
|
|
123
|
+
const improvement = initialReward > 0
|
|
124
|
+
? (((currentReward - initialReward) / initialReward) * 100).toFixed(1)
|
|
125
|
+
: '0';
|
|
126
|
+
// Calculate top actions
|
|
127
|
+
const actionStats = new Map();
|
|
128
|
+
for (const exp of experiences) {
|
|
129
|
+
const tool = exp.action.tool;
|
|
130
|
+
const stats = actionStats.get(tool) || {
|
|
131
|
+
count: 0,
|
|
132
|
+
totalReward: 0,
|
|
133
|
+
successCount: 0,
|
|
134
|
+
};
|
|
135
|
+
stats.count++;
|
|
136
|
+
stats.totalReward += exp.reward;
|
|
137
|
+
if (exp.reward > 0.5)
|
|
138
|
+
stats.successCount++;
|
|
139
|
+
actionStats.set(tool, stats);
|
|
140
|
+
}
|
|
141
|
+
const topActions = Array.from(actionStats.entries())
|
|
142
|
+
.map(([tool, stats]) => ({
|
|
143
|
+
tool,
|
|
144
|
+
successRate: stats.successCount / stats.count,
|
|
145
|
+
avgReward: stats.totalReward / stats.count,
|
|
146
|
+
count: stats.count,
|
|
147
|
+
}))
|
|
148
|
+
.sort((a, b) => b.avgReward - a.avgReward)
|
|
149
|
+
.slice(0, 5);
|
|
150
|
+
return {
|
|
151
|
+
period,
|
|
152
|
+
totalExperiences: experiences.length,
|
|
153
|
+
averageReward: avgReward,
|
|
154
|
+
successRate,
|
|
155
|
+
learningProgress: {
|
|
156
|
+
initial: initialReward,
|
|
157
|
+
current: currentReward,
|
|
158
|
+
improvement: `${improvement}%`,
|
|
159
|
+
},
|
|
160
|
+
topActions,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Transfer learning between tasks
|
|
165
|
+
*/
|
|
166
|
+
async transferLearning(sourceSessionId, targetSessionId, similarity = 0.7) {
|
|
167
|
+
const sourceOptimizer = this.policyOptimizers.get(sourceSessionId);
|
|
168
|
+
const targetOptimizer = this.policyOptimizers.get(targetSessionId);
|
|
169
|
+
if (!sourceOptimizer || !targetOptimizer) {
|
|
170
|
+
throw new Error('Source or target session not found');
|
|
171
|
+
}
|
|
172
|
+
// Export source policy
|
|
173
|
+
const sourcePolicy = sourceOptimizer.exportPolicy();
|
|
174
|
+
// Import into target (with similarity-based weighting)
|
|
175
|
+
const targetPolicy = targetOptimizer.exportPolicy();
|
|
176
|
+
// Merge policies (simplified - in production would use more sophisticated transfer)
|
|
177
|
+
const mergedQTable = { ...targetPolicy.qTable };
|
|
178
|
+
for (const [stateKey, actions] of Object.entries(sourcePolicy.qTable)) {
|
|
179
|
+
if (!mergedQTable[stateKey]) {
|
|
180
|
+
mergedQTable[stateKey] = {};
|
|
181
|
+
}
|
|
182
|
+
for (const [action, value] of Object.entries(actions)) {
|
|
183
|
+
const currentValue = mergedQTable[stateKey][action] || 0;
|
|
184
|
+
// Weighted average based on similarity
|
|
185
|
+
mergedQTable[stateKey][action] =
|
|
186
|
+
currentValue * (1 - similarity) + value * similarity;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
targetOptimizer.importPolicy({ ...targetPolicy, qTable: mergedQTable });
|
|
190
|
+
const sourceSession = this.sessionManager.getSession(sourceSessionId);
|
|
191
|
+
const targetSession = this.sessionManager.getSession(targetSessionId);
|
|
192
|
+
return {
|
|
193
|
+
sourceTask: sourceSession?.sessionType || 'unknown',
|
|
194
|
+
targetTask: targetSession?.sessionType || 'unknown',
|
|
195
|
+
similarity,
|
|
196
|
+
transferSuccess: true,
|
|
197
|
+
performanceGain: similarity * 0.3, // Estimated gain
|
|
198
|
+
experiencesTransferred: Object.keys(sourcePolicy.qTable).length,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Explain a prediction
|
|
203
|
+
*/
|
|
204
|
+
async explainPrediction(sessionId, state) {
|
|
205
|
+
// Get similar experiences
|
|
206
|
+
const similarExperiences = await this.experienceRecorder.retrieveSimilarExperiences(state, 5);
|
|
207
|
+
// Calculate confidence factors
|
|
208
|
+
const confidenceFactors = {
|
|
209
|
+
experienceCount: Math.min(1.0, similarExperiences.length / 10),
|
|
210
|
+
avgReward: similarExperiences.reduce((sum, exp) => sum + exp.reward, 0) /
|
|
211
|
+
(similarExperiences.length || 1),
|
|
212
|
+
consistency: this.calculateConsistency(similarExperiences),
|
|
213
|
+
};
|
|
214
|
+
const reasoning = `Based on ${similarExperiences.length} similar past experiences with average reward ${confidenceFactors.avgReward.toFixed(2)}. Action consistency: ${(confidenceFactors.consistency * 100).toFixed(0)}%.`;
|
|
215
|
+
return {
|
|
216
|
+
reasoning,
|
|
217
|
+
similarExperiences,
|
|
218
|
+
confidenceFactors,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Calculate consistency of actions in similar experiences
|
|
223
|
+
*/
|
|
224
|
+
calculateConsistency(experiences) {
|
|
225
|
+
if (experiences.length === 0)
|
|
226
|
+
return 0;
|
|
227
|
+
const actionCounts = new Map();
|
|
228
|
+
for (const exp of experiences) {
|
|
229
|
+
const tool = exp.action.tool;
|
|
230
|
+
actionCounts.set(tool, (actionCounts.get(tool) || 0) + 1);
|
|
231
|
+
}
|
|
232
|
+
const maxCount = Math.max(...Array.from(actionCounts.values()));
|
|
233
|
+
return maxCount / experiences.length;
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Get session info
|
|
237
|
+
*/
|
|
238
|
+
getSessionInfo(sessionId) {
|
|
239
|
+
return this.sessionManager.getSession(sessionId);
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Restore sessions from database
|
|
243
|
+
*/
|
|
244
|
+
async restoreSessions(userId) {
|
|
245
|
+
return await this.sessionManager.restoreSessions(userId);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PolicyOptimizer - Optimizes action selection policy using reinforcement learning
|
|
3
|
+
*/
|
|
4
|
+
import type { State, Experience, ActionPrediction, TrainingOptions, TrainingMetrics } from '../types/index.js';
|
|
5
|
+
export declare class PolicyOptimizer {
|
|
6
|
+
private qTable;
|
|
7
|
+
private learningRate;
|
|
8
|
+
private discountFactor;
|
|
9
|
+
private explorationRate;
|
|
10
|
+
private experienceBuffer;
|
|
11
|
+
constructor(learningRate?: number, discountFactor?: number, bufferSize?: number);
|
|
12
|
+
/**
|
|
13
|
+
* Predict best action for current state
|
|
14
|
+
*/
|
|
15
|
+
predictAction(state: State, availableActions: string[]): Promise<ActionPrediction>;
|
|
16
|
+
/**
|
|
17
|
+
* Update policy based on experience
|
|
18
|
+
*/
|
|
19
|
+
updatePolicy(experience: Experience): Promise<void>;
|
|
20
|
+
/**
|
|
21
|
+
* Train policy on batch of experiences
|
|
22
|
+
*/
|
|
23
|
+
train(options?: TrainingOptions): Promise<TrainingMetrics>;
|
|
24
|
+
/**
|
|
25
|
+
* Get policy statistics
|
|
26
|
+
*/
|
|
27
|
+
getPolicyStats(): {
|
|
28
|
+
statesLearned: number;
|
|
29
|
+
totalExperiences: number;
|
|
30
|
+
avgQValue: number;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Export policy for persistence
|
|
34
|
+
*/
|
|
35
|
+
exportPolicy(): any;
|
|
36
|
+
/**
|
|
37
|
+
* Import policy from persistence
|
|
38
|
+
*/
|
|
39
|
+
importPolicy(policyData: any): void;
|
|
40
|
+
/**
|
|
41
|
+
* Encode state as string key for Q-table
|
|
42
|
+
*/
|
|
43
|
+
private encodeState;
|
|
44
|
+
/**
|
|
45
|
+
* Get experience count for state
|
|
46
|
+
*/
|
|
47
|
+
private getExperienceCount;
|
|
48
|
+
/**
|
|
49
|
+
* Decay exploration rate over time
|
|
50
|
+
*/
|
|
51
|
+
decayExploration(decayRate?: number): void;
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=policy-optimizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"policy-optimizer.d.ts","sourceRoot":"","sources":["../../../../src/mcp/learning/core/policy-optimizer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACV,KAAK,EAEL,UAAU,EACV,gBAAgB,EAChB,eAAe,EACf,eAAe,EAChB,MAAM,mBAAmB,CAAC;AAG3B,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAA+C;IAC7D,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,cAAc,CAAgB;IACtC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,gBAAgB,CAAmB;gBAGzC,YAAY,GAAE,MAAY,EAC1B,cAAc,GAAE,MAAa,EAC7B,UAAU,GAAE,MAAc;IAO5B;;OAEG;IACG,aAAa,CACjB,KAAK,EAAE,KAAK,EACZ,gBAAgB,EAAE,MAAM,EAAE,GACzB,OAAO,CAAC,gBAAgB,CAAC;IAyD5B;;OAEG;IACG,YAAY,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC;IAoCzD;;OAEG;IACG,KAAK,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,eAAe,CAAC;IAoFpE;;OAEG;IACH,cAAc,IAAI;QAChB,aAAa,EAAE,MAAM,CAAC;QACtB,gBAAgB,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;KACnB;IAkBD;;OAEG;IACH,YAAY,IAAI,GAAG;IAgBnB;;OAEG;IACH,YAAY,CAAC,UAAU,EAAE,GAAG,GAAG,IAAI;IAoBnC;;OAEG;IACH,OAAO,CAAC,WAAW;IAUnB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAK1B;;OAEG;IACH,gBAAgB,CAAC,SAAS,GAAE,MAAc,GAAG,IAAI;CAGlD"}
|