agentdb 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/dist/mcp/learning/core/experience-buffer.d.ts +61 -0
  3. package/dist/mcp/learning/core/experience-buffer.d.ts.map +1 -0
  4. package/dist/mcp/learning/core/experience-buffer.js +175 -0
  5. package/dist/mcp/learning/core/experience-buffer.js.map +1 -0
  6. package/dist/mcp/learning/core/experience-buffer.mjs +170 -0
  7. package/dist/mcp/learning/core/experience-recorder.d.ts +40 -0
  8. package/dist/mcp/learning/core/experience-recorder.d.ts.map +1 -0
  9. package/dist/mcp/learning/core/experience-recorder.js +200 -0
  10. package/dist/mcp/learning/core/experience-recorder.js.map +1 -0
  11. package/dist/mcp/learning/core/experience-recorder.mjs +195 -0
  12. package/dist/mcp/learning/core/learning-manager.d.ts +66 -0
  13. package/dist/mcp/learning/core/learning-manager.d.ts.map +1 -0
  14. package/dist/mcp/learning/core/learning-manager.js +252 -0
  15. package/dist/mcp/learning/core/learning-manager.js.map +1 -0
  16. package/dist/mcp/learning/core/learning-manager.mjs +247 -0
  17. package/dist/mcp/learning/core/policy-optimizer.d.ts +53 -0
  18. package/dist/mcp/learning/core/policy-optimizer.d.ts.map +1 -0
  19. package/dist/mcp/learning/core/policy-optimizer.js +251 -0
  20. package/dist/mcp/learning/core/policy-optimizer.js.map +1 -0
  21. package/dist/mcp/learning/core/policy-optimizer.mjs +246 -0
  22. package/dist/mcp/learning/core/reward-estimator.d.ts +44 -0
  23. package/dist/mcp/learning/core/reward-estimator.d.ts.map +1 -0
  24. package/dist/mcp/learning/core/reward-estimator.js +158 -0
  25. package/dist/mcp/learning/core/reward-estimator.js.map +1 -0
  26. package/dist/mcp/learning/core/reward-estimator.mjs +153 -0
  27. package/dist/mcp/learning/core/session-manager.d.ts +63 -0
  28. package/dist/mcp/learning/core/session-manager.d.ts.map +1 -0
  29. package/dist/mcp/learning/core/session-manager.js +202 -0
  30. package/dist/mcp/learning/core/session-manager.js.map +1 -0
  31. package/dist/mcp/learning/core/session-manager.mjs +197 -0
  32. package/dist/mcp/learning/index.d.ts +19 -0
  33. package/dist/mcp/learning/index.d.ts.map +1 -0
  34. package/dist/mcp/learning/index.js +30 -0
  35. package/dist/mcp/learning/index.js.map +1 -0
  36. package/dist/mcp/learning/index.mjs +19 -0
  37. package/dist/mcp/learning/tools/mcp-learning-tools.d.ts +369 -0
  38. package/dist/mcp/learning/tools/mcp-learning-tools.d.ts.map +1 -0
  39. package/dist/mcp/learning/tools/mcp-learning-tools.js +361 -0
  40. package/dist/mcp/learning/tools/mcp-learning-tools.js.map +1 -0
  41. package/dist/mcp/learning/tools/mcp-learning-tools.mjs +356 -0
  42. package/dist/mcp/learning/types/index.d.ts +138 -0
  43. package/dist/mcp/learning/types/index.d.ts.map +1 -0
  44. package/dist/mcp/learning/types/index.js +6 -0
  45. package/dist/mcp/learning/types/index.js.map +1 -0
  46. package/dist/mcp/learning/types/index.mjs +4 -0
  47. package/dist/mcp-server.d.ts +2 -0
  48. package/dist/mcp-server.d.ts.map +1 -1
  49. package/dist/mcp-server.js +72 -4
  50. package/dist/mcp-server.js.map +1 -1
  51. package/dist/mcp-server.mjs +72 -4
  52. package/examples/mcp-learning-example.ts +220 -0
  53. package/package.json +1 -1
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ /**
3
+ * LearningManager - Main orchestration layer for MCP learning integration
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.LearningManager = void 0;
7
+ const experience_recorder_js_1 = require("./experience-recorder.js");
8
+ const reward_estimator_js_1 = require("./reward-estimator.js");
9
+ const session_manager_js_1 = require("./session-manager.js");
10
+ const policy_optimizer_js_1 = require("./policy-optimizer.js");
11
+ class LearningManager {
12
+ constructor(db) {
13
+ this.policyOptimizers = new Map();
14
+ this.db = db;
15
+ this.experienceRecorder = new experience_recorder_js_1.ExperienceRecorder(db);
16
+ this.rewardEstimator = new reward_estimator_js_1.RewardEstimator();
17
+ this.sessionManager = new session_manager_js_1.SessionManager(db);
18
+ }
19
+ /**
20
+ * Start a new learning session
21
+ */
22
+ async startSession(userId, sessionType, plugin = 'q-learning', config = {}) {
23
+ const session = await this.sessionManager.createSession(userId, sessionType, plugin, config);
24
+ // Initialize policy optimizer for this session
25
+ const optimizer = new policy_optimizer_js_1.PolicyOptimizer(config.learningRate || 0.1, config.discountFactor || 0.95, config.bufferSize || 10000);
26
+ this.policyOptimizers.set(session.sessionId, optimizer);
27
+ return session;
28
+ }
29
+ /**
30
+ * End a learning session
31
+ */
32
+ async endSession(sessionId) {
33
+ // Export and save policy BEFORE ending session
34
+ const optimizer = this.policyOptimizers.get(sessionId);
35
+ if (optimizer) {
36
+ const policy = optimizer.exportPolicy();
37
+ await this.sessionManager.updateSessionPolicy(sessionId, policy);
38
+ this.policyOptimizers.delete(sessionId);
39
+ }
40
+ // Now end the session
41
+ const session = await this.sessionManager.endSession(sessionId);
42
+ return session;
43
+ }
44
+ /**
45
+ * Record a tool execution as learning experience
46
+ */
47
+ async recordExperience(sessionId, toolName, args, result, outcome) {
48
+ const session = this.sessionManager.getSession(sessionId);
49
+ if (!session) {
50
+ throw new Error(`Session ${sessionId} not found`);
51
+ }
52
+ const context = {
53
+ userId: session.userId,
54
+ sessionId: session.sessionId,
55
+ taskType: session.sessionType,
56
+ timestamp: Date.now(),
57
+ isTerminal: outcome.success || !!outcome.error,
58
+ };
59
+ const experience = await this.experienceRecorder.recordToolExecution(toolName, args, result, context, outcome);
60
+ // Update policy with new experience
61
+ const optimizer = this.policyOptimizers.get(sessionId);
62
+ if (optimizer) {
63
+ await optimizer.updatePolicy(experience);
64
+ }
65
+ // Increment session experience count
66
+ this.sessionManager.incrementExperienceCount(sessionId);
67
+ return experience;
68
+ }
69
+ /**
70
+ * Predict next best action
71
+ */
72
+ async predictAction(sessionId, currentState, availableTools) {
73
+ const optimizer = this.policyOptimizers.get(sessionId);
74
+ if (!optimizer) {
75
+ throw new Error(`No policy optimizer for session ${sessionId}`);
76
+ }
77
+ return await optimizer.predictAction(currentState, availableTools);
78
+ }
79
+ /**
80
+ * Provide user feedback on action
81
+ */
82
+ async provideFeedback(sessionId, actionId, feedback) {
83
+ // Calculate feedback-adjusted reward
84
+ const normalizedRating = feedback.rating / 5.0; // Assume 0-5 scale
85
+ await this.experienceRecorder.updateExperienceReward(actionId, normalizedRating);
86
+ }
87
+ /**
88
+ * Train policy on collected experiences
89
+ */
90
+ async train(sessionId, options = {}) {
91
+ const optimizer = this.policyOptimizers.get(sessionId);
92
+ if (!optimizer) {
93
+ throw new Error(`No policy optimizer for session ${sessionId}`);
94
+ }
95
+ return await optimizer.train(options);
96
+ }
97
+ /**
98
+ * Get learning metrics
99
+ */
100
+ async getMetrics(sessionId, period = 'session') {
101
+ const experiences = await this.experienceRecorder.getSessionExperiences(sessionId);
102
+ if (experiences.length === 0) {
103
+ return {
104
+ period,
105
+ totalExperiences: 0,
106
+ averageReward: 0,
107
+ successRate: 0,
108
+ learningProgress: {
109
+ initial: 0,
110
+ current: 0,
111
+ improvement: '0%',
112
+ },
113
+ topActions: [],
114
+ };
115
+ }
116
+ // Calculate metrics
117
+ const rewards = experiences.map((exp) => exp.reward);
118
+ const avgReward = rewards.reduce((sum, r) => sum + r, 0) / rewards.length;
119
+ const successCount = experiences.filter((exp) => exp.reward > 0.5).length;
120
+ const successRate = successCount / experiences.length;
121
+ // Calculate learning progress (first 10 vs last 10)
122
+ const firstBatch = experiences.slice(0, 10);
123
+ const lastBatch = experiences.slice(-10);
124
+ const initialReward = firstBatch.reduce((sum, exp) => sum + exp.reward, 0) / firstBatch.length;
125
+ const currentReward = lastBatch.reduce((sum, exp) => sum + exp.reward, 0) / lastBatch.length;
126
+ const improvement = initialReward > 0
127
+ ? (((currentReward - initialReward) / initialReward) * 100).toFixed(1)
128
+ : '0';
129
+ // Calculate top actions
130
+ const actionStats = new Map();
131
+ for (const exp of experiences) {
132
+ const tool = exp.action.tool;
133
+ const stats = actionStats.get(tool) || {
134
+ count: 0,
135
+ totalReward: 0,
136
+ successCount: 0,
137
+ };
138
+ stats.count++;
139
+ stats.totalReward += exp.reward;
140
+ if (exp.reward > 0.5)
141
+ stats.successCount++;
142
+ actionStats.set(tool, stats);
143
+ }
144
+ const topActions = Array.from(actionStats.entries())
145
+ .map(([tool, stats]) => ({
146
+ tool,
147
+ successRate: stats.successCount / stats.count,
148
+ avgReward: stats.totalReward / stats.count,
149
+ count: stats.count,
150
+ }))
151
+ .sort((a, b) => b.avgReward - a.avgReward)
152
+ .slice(0, 5);
153
+ return {
154
+ period,
155
+ totalExperiences: experiences.length,
156
+ averageReward: avgReward,
157
+ successRate,
158
+ learningProgress: {
159
+ initial: initialReward,
160
+ current: currentReward,
161
+ improvement: `${improvement}%`,
162
+ },
163
+ topActions,
164
+ };
165
+ }
166
+ /**
167
+ * Transfer learning between tasks
168
+ */
169
+ async transferLearning(sourceSessionId, targetSessionId, similarity = 0.7) {
170
+ const sourceOptimizer = this.policyOptimizers.get(sourceSessionId);
171
+ const targetOptimizer = this.policyOptimizers.get(targetSessionId);
172
+ if (!sourceOptimizer || !targetOptimizer) {
173
+ throw new Error('Source or target session not found');
174
+ }
175
+ // Export source policy
176
+ const sourcePolicy = sourceOptimizer.exportPolicy();
177
+ // Import into target (with similarity-based weighting)
178
+ const targetPolicy = targetOptimizer.exportPolicy();
179
+ // Merge policies (simplified - in production would use more sophisticated transfer)
180
+ const mergedQTable = { ...targetPolicy.qTable };
181
+ for (const [stateKey, actions] of Object.entries(sourcePolicy.qTable)) {
182
+ if (!mergedQTable[stateKey]) {
183
+ mergedQTable[stateKey] = {};
184
+ }
185
+ for (const [action, value] of Object.entries(actions)) {
186
+ const currentValue = mergedQTable[stateKey][action] || 0;
187
+ // Weighted average based on similarity
188
+ mergedQTable[stateKey][action] =
189
+ currentValue * (1 - similarity) + value * similarity;
190
+ }
191
+ }
192
+ targetOptimizer.importPolicy({ ...targetPolicy, qTable: mergedQTable });
193
+ const sourceSession = this.sessionManager.getSession(sourceSessionId);
194
+ const targetSession = this.sessionManager.getSession(targetSessionId);
195
+ return {
196
+ sourceTask: sourceSession?.sessionType || 'unknown',
197
+ targetTask: targetSession?.sessionType || 'unknown',
198
+ similarity,
199
+ transferSuccess: true,
200
+ performanceGain: similarity * 0.3, // Estimated gain
201
+ experiencesTransferred: Object.keys(sourcePolicy.qTable).length,
202
+ };
203
+ }
204
+ /**
205
+ * Explain a prediction
206
+ */
207
+ async explainPrediction(sessionId, state) {
208
+ // Get similar experiences
209
+ const similarExperiences = await this.experienceRecorder.retrieveSimilarExperiences(state, 5);
210
+ // Calculate confidence factors
211
+ const confidenceFactors = {
212
+ experienceCount: Math.min(1.0, similarExperiences.length / 10),
213
+ avgReward: similarExperiences.reduce((sum, exp) => sum + exp.reward, 0) /
214
+ (similarExperiences.length || 1),
215
+ consistency: this.calculateConsistency(similarExperiences),
216
+ };
217
+ const reasoning = `Based on ${similarExperiences.length} similar past experiences with average reward ${confidenceFactors.avgReward.toFixed(2)}. Action consistency: ${(confidenceFactors.consistency * 100).toFixed(0)}%.`;
218
+ return {
219
+ reasoning,
220
+ similarExperiences,
221
+ confidenceFactors,
222
+ };
223
+ }
224
+ /**
225
+ * Calculate consistency of actions in similar experiences
226
+ */
227
+ calculateConsistency(experiences) {
228
+ if (experiences.length === 0)
229
+ return 0;
230
+ const actionCounts = new Map();
231
+ for (const exp of experiences) {
232
+ const tool = exp.action.tool;
233
+ actionCounts.set(tool, (actionCounts.get(tool) || 0) + 1);
234
+ }
235
+ const maxCount = Math.max(...Array.from(actionCounts.values()));
236
+ return maxCount / experiences.length;
237
+ }
238
+ /**
239
+ * Get session info
240
+ */
241
+ getSessionInfo(sessionId) {
242
+ return this.sessionManager.getSession(sessionId);
243
+ }
244
+ /**
245
+ * Restore sessions from database
246
+ */
247
+ async restoreSessions(userId) {
248
+ return await this.sessionManager.restoreSessions(userId);
249
+ }
250
+ }
251
+ exports.LearningManager = LearningManager;
252
+ //# sourceMappingURL=learning-manager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"learning-manager.js","sourceRoot":"","sources":["../../../../src/mcp/learning/core/learning-manager.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAgBH,qEAA8D;AAC9D,+DAAwD;AACxD,6DAAsD;AACtD,+DAAwD;AAExD,MAAa,eAAe;IAO1B,YAAY,EAAkB;QAFtB,qBAAgB,GAAiC,IAAI,GAAG,EAAE,CAAC;QAGjE,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,kBAAkB,GAAG,IAAI,2CAAkB,CAAC,EAAE,CAAC,CAAC;QACrD,IAAI,CAAC,eAAe,GAAG,IAAI,qCAAe,EAAE,CAAC;QAC7C,IAAI,CAAC,cAAc,GAAG,IAAI,mCAAc,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAChB,MAAc,EACd,WAA4D,EAC5D,SAAiB,YAAY,EAC7B,SAA8B,EAAE;QAEhC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,aAAa,CACrD,MAAM,EACN,WAAW,EACX,MAAM,EACN,MAAM,CACP,CAAC;QAEF,+CAA+C;QAC/C,MAAM,SAAS,GAAG,IAAI,qCAAe,CACnC,MAAM,CAAC,YAAY,IAAI,GAAG,EAC1B,MAAM,CAAC,cAAc,IAAI,IAAI,EAC7B,MAAM,CAAC,UAAU,IAAI,KAAK,CAC3B,CAAC;QACF,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAExD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,SAAiB;QAChC,+CAA+C;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,MAAM,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC;YACxC,MAAM,IAAI,CAAC,cAAc,CAAC,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACjE,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC1C,CAAC;QAED,sBAAsB;QACtB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAChE,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CACpB,SAAiB,EACjB,QAAgB,EAChB,IAAS,EACT,MAAW,EACX,OAAgB;QAEhB,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,WAAW,SAAS,YAAY,CAAC,CAAC;QACpD,CAAC;QAED,MAAM,OAAO,GAAqB;YAChC,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,QAAQ,EAAE,OAAO,CAAC,WAAW;YAC7B,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,UAAU,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK;SAC/C,CAAC;QAEF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,mBAAmB,CAClE,QAAQ,EACR,IAAI,EACJ,MAAM,EACN,OAAO,EACP,OAAO,CACR,CAAC;QAEF,oCAAoC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,SAAS,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAC3C,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,cAAc,CAAC,wBAAwB,CAAC,SAAS,CAAC,CAAC;QAExD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,aAAa,CACjB,SAAiB,EACjB,YAAmB,EACnB,cAAwB;QAExB,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,MAAM,SAAS,CAAC,aAAa,CAAC,YAAY,EAAE,cAAc,CAAC,CAAC;IACrE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CACnB,SAAiB,EACjB,QAAgB,EAChB,QAAuB;QAEvB,qCAAqC;QACrC,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,mBAAmB;QAEnE,MAAM,IAAI,CAAC,kBAAkB,CAAC,sBAAsB,CAClD,QAAQ,EACR,gBAAgB,CACjB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CACT,SAAiB,EACjB,UAA2B,EAAE;QAE7B,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,MAAM,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CACd,SAAiB,EACjB,SAAuD,SAAS;QAEhE,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,qBAAqB,CACrE,SAAS,CACV,CAAC;QAEF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;gBACL,MAAM;gBACN,gBAAgB,EAAE,CAAC;gBACnB,aAAa,EAAE,CAAC;gBAChB,WAAW,EAAE,CAAC;gBACd,gBAAgB,EAAE;oBAChB,OAAO,EAAE,CAAC;oBACV,OAAO,EAAE,CAAC;oBACV,WAAW,EAAE,IAAI;iBAClB;gBACD,UAAU,EAAE,EAAE;aACf,CAAC;QACJ,CAAC;QAED,oBAAoB;QACpB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACrD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QAE1E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC;QAC1E,MAAM,WAAW,GAAG,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC;QAEtD,oDAAoD;QACpD,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5C,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;QACzC,MAAM,aAAa,GACjB,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QAC3E,MAAM,aAAa,GACjB,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;QACzE,MAAM,WAAW,GACf,aAAa,GAAG,CAAC;YACf,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,aAAa,CAAC,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YACtE,CAAC,CAAC,GAAG,CAAC;QAEV,wBAAwB;QACxB,MAAM,WAAW,GAAG,IAAI,GAAG,EAGxB,CAAC;QAEJ,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;YAC7B,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI;gBACrC,KAAK,EAAE,CAAC;gBACR,WAAW,EAAE,CAAC;gBACd,YAAY,EAAE,CAAC;aAChB,CAAC;YACF,KAAK,CAAC,KAAK,EAAE,CAAC;YACd,KAAK,CAAC,WAAW,IAAI,GAAG,CAAC,MAAM,CAAC;YAChC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;gBAAE,KAAK,CAAC,YAAY,EAAE,CAAC;YAC3C,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;aACjD,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;YACvB,IAAI;YACJ,WAAW,EAAE,KAAK,CAAC,YAAY,GAAG,KAAK,CAAC,KAAK;YAC7C,SAAS,EAAE,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK;YAC1C,KAAK,EAAE,KAAK,CAAC,KAAK;SACnB,CAAC,CAAC;aACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC;aACzC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEf,OAAO;YACL,MAAM;YACN,gBAAgB,EAAE,WAAW,CAAC,MAAM;YACpC,aAAa,EAAE,SAAS;YACxB,WAAW;YACX,gBAAgB,EAAE;gBAChB,OAAO,EAAE,aAAa;gBACtB,OAAO,EAAE,aAAa;gBACtB,WAAW,EAAE,GAAG,WAAW,GAAG;aAC/B;YACD,UAAU;SACX,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CACpB,eAAuB,EACvB,eAAuB,EACvB,aAAqB,GAAG;QAExB,MAAM,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QACnE,MAAM,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QAEnE,IAAI,CAAC,eAAe,IAAI,CAAC,eAAe,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,uBAAuB;QACvB,MAAM,YAAY,GAAG,eAAe,CAAC,YAAY,EAAE,CAAC;QAEpD,uDAAuD;QACvD,MAAM,YAAY,GAAG,eAAe,CAAC,YAAY,EAAE,CAAC;QAEpD,oFAAoF;QACpF,MAAM,YAAY,GAAQ,EAAE,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAErD,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YACtE,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,YAAY,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;YAC9B,CAAC;YAED,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAc,CAAC,EAAE,CAAC;gBAC7D,MAAM,YAAY,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBACzD,uCAAuC;gBACvC,YAAY,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;oBAC5B,YAAY,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,GAAI,KAAgB,GAAG,UAAU,CAAC;YACrE,CAAC;QACH,CAAC;QAED,eAAe,CAAC,YAAY,CAAC,EAAE,GAAG,YAAY,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC;QAExE,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QACtE,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QAEtE,OAAO;YACL,UAAU,EAAE,aAAa,EAAE,WAAW,IAAI,SAAS;YACnD,UAAU,EAAE,aAAa,EAAE,WAAW,IAAI,SAAS;YACnD,UAAU;YACV,eAAe,EAAE,IAAI;YACrB,eAAe,EAAE,UAAU,GAAG,GAAG,EAAE,iBAAiB;YACpD,sBAAsB,EAAE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,MAAM;SAChE,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB,CACrB,SAAiB,EACjB,KAAY;QAMZ,0BAA0B;QAC1B,MAAM,kBAAkB,GACtB,MAAM,IAAI,CAAC,kBAAkB,CAAC,0BAA0B,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAErE,+BAA+B;QAC/B,MAAM,iBAAiB,GAA2B;YAChD,eAAe,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,kBAAkB,CAAC,MAAM,GAAG,EAAE,CAAC;YAC9D,SAAS,EACP,kBAAkB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC5D,CAAC,kBAAkB,CAAC,MAAM,IAAI,CAAC,CAAC;YAClC,WAAW,EAAE,IAAI,CAAC,oBAAoB,CAAC,kBAAkB,CAAC;SAC3D,CAAC;QAEF,MAAM,SAAS,GAAG,YAAY,kBAAkB,CAAC,MAAM,iDAAiD,iBAAiB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,yBAAyB,CAAC,iBAAiB,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;QAE5N,OAAO;YACL,SAAS;YACT,kBAAkB;YAClB,iBAAiB;SAClB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,WAAyB;QACpD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEvC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;YAC7B,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAChE,OAAO,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,SAAiB;QAC9B,OAAO,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IACnD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CAAC,MAAe;QACnC,OAAO,MAAM,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IAC3D,CAAC;CACF;AA/VD,0CA+VC"}
@@ -0,0 +1,247 @@
1
+ /**
2
+ * LearningManager - Main orchestration layer for MCP learning integration
3
+ */
4
+ import { ExperienceRecorder } from './experience-recorder.mjs';
5
+ import { RewardEstimator } from './reward-estimator.mjs';
6
+ import { SessionManager } from './session-manager.mjs';
7
+ import { PolicyOptimizer } from './policy-optimizer.mjs';
8
+ export class LearningManager {
9
+ constructor(db) {
10
+ this.policyOptimizers = new Map();
11
+ this.db = db;
12
+ this.experienceRecorder = new ExperienceRecorder(db);
13
+ this.rewardEstimator = new RewardEstimator();
14
+ this.sessionManager = new SessionManager(db);
15
+ }
16
+ /**
17
+ * Start a new learning session
18
+ */
19
+ async startSession(userId, sessionType, plugin = 'q-learning', config = {}) {
20
+ const session = await this.sessionManager.createSession(userId, sessionType, plugin, config);
21
+ // Initialize policy optimizer for this session
22
+ const optimizer = new PolicyOptimizer(config.learningRate || 0.1, config.discountFactor || 0.95, config.bufferSize || 10000);
23
+ this.policyOptimizers.set(session.sessionId, optimizer);
24
+ return session;
25
+ }
26
+ /**
27
+ * End a learning session
28
+ */
29
+ async endSession(sessionId) {
30
+ // Export and save policy BEFORE ending session
31
+ const optimizer = this.policyOptimizers.get(sessionId);
32
+ if (optimizer) {
33
+ const policy = optimizer.exportPolicy();
34
+ await this.sessionManager.updateSessionPolicy(sessionId, policy);
35
+ this.policyOptimizers.delete(sessionId);
36
+ }
37
+ // Now end the session
38
+ const session = await this.sessionManager.endSession(sessionId);
39
+ return session;
40
+ }
41
+ /**
42
+ * Record a tool execution as learning experience
43
+ */
44
+ async recordExperience(sessionId, toolName, args, result, outcome) {
45
+ const session = this.sessionManager.getSession(sessionId);
46
+ if (!session) {
47
+ throw new Error(`Session ${sessionId} not found`);
48
+ }
49
+ const context = {
50
+ userId: session.userId,
51
+ sessionId: session.sessionId,
52
+ taskType: session.sessionType,
53
+ timestamp: Date.now(),
54
+ isTerminal: outcome.success || !!outcome.error,
55
+ };
56
+ const experience = await this.experienceRecorder.recordToolExecution(toolName, args, result, context, outcome);
57
+ // Update policy with new experience
58
+ const optimizer = this.policyOptimizers.get(sessionId);
59
+ if (optimizer) {
60
+ await optimizer.updatePolicy(experience);
61
+ }
62
+ // Increment session experience count
63
+ this.sessionManager.incrementExperienceCount(sessionId);
64
+ return experience;
65
+ }
66
+ /**
67
+ * Predict next best action
68
+ */
69
+ async predictAction(sessionId, currentState, availableTools) {
70
+ const optimizer = this.policyOptimizers.get(sessionId);
71
+ if (!optimizer) {
72
+ throw new Error(`No policy optimizer for session ${sessionId}`);
73
+ }
74
+ return await optimizer.predictAction(currentState, availableTools);
75
+ }
76
+ /**
77
+ * Provide user feedback on action
78
+ */
79
+ async provideFeedback(sessionId, actionId, feedback) {
80
+ // Calculate feedback-adjusted reward
81
+ const normalizedRating = feedback.rating / 5.0; // Assume 0-5 scale
82
+ await this.experienceRecorder.updateExperienceReward(actionId, normalizedRating);
83
+ }
84
+ /**
85
+ * Train policy on collected experiences
86
+ */
87
+ async train(sessionId, options = {}) {
88
+ const optimizer = this.policyOptimizers.get(sessionId);
89
+ if (!optimizer) {
90
+ throw new Error(`No policy optimizer for session ${sessionId}`);
91
+ }
92
+ return await optimizer.train(options);
93
+ }
94
+ /**
95
+ * Get learning metrics
96
+ */
97
+ async getMetrics(sessionId, period = 'session') {
98
+ const experiences = await this.experienceRecorder.getSessionExperiences(sessionId);
99
+ if (experiences.length === 0) {
100
+ return {
101
+ period,
102
+ totalExperiences: 0,
103
+ averageReward: 0,
104
+ successRate: 0,
105
+ learningProgress: {
106
+ initial: 0,
107
+ current: 0,
108
+ improvement: '0%',
109
+ },
110
+ topActions: [],
111
+ };
112
+ }
113
+ // Calculate metrics
114
+ const rewards = experiences.map((exp) => exp.reward);
115
+ const avgReward = rewards.reduce((sum, r) => sum + r, 0) / rewards.length;
116
+ const successCount = experiences.filter((exp) => exp.reward > 0.5).length;
117
+ const successRate = successCount / experiences.length;
118
+ // Calculate learning progress (first 10 vs last 10)
119
+ const firstBatch = experiences.slice(0, 10);
120
+ const lastBatch = experiences.slice(-10);
121
+ const initialReward = firstBatch.reduce((sum, exp) => sum + exp.reward, 0) / firstBatch.length;
122
+ const currentReward = lastBatch.reduce((sum, exp) => sum + exp.reward, 0) / lastBatch.length;
123
+ const improvement = initialReward > 0
124
+ ? (((currentReward - initialReward) / initialReward) * 100).toFixed(1)
125
+ : '0';
126
+ // Calculate top actions
127
+ const actionStats = new Map();
128
+ for (const exp of experiences) {
129
+ const tool = exp.action.tool;
130
+ const stats = actionStats.get(tool) || {
131
+ count: 0,
132
+ totalReward: 0,
133
+ successCount: 0,
134
+ };
135
+ stats.count++;
136
+ stats.totalReward += exp.reward;
137
+ if (exp.reward > 0.5)
138
+ stats.successCount++;
139
+ actionStats.set(tool, stats);
140
+ }
141
+ const topActions = Array.from(actionStats.entries())
142
+ .map(([tool, stats]) => ({
143
+ tool,
144
+ successRate: stats.successCount / stats.count,
145
+ avgReward: stats.totalReward / stats.count,
146
+ count: stats.count,
147
+ }))
148
+ .sort((a, b) => b.avgReward - a.avgReward)
149
+ .slice(0, 5);
150
+ return {
151
+ period,
152
+ totalExperiences: experiences.length,
153
+ averageReward: avgReward,
154
+ successRate,
155
+ learningProgress: {
156
+ initial: initialReward,
157
+ current: currentReward,
158
+ improvement: `${improvement}%`,
159
+ },
160
+ topActions,
161
+ };
162
+ }
163
+ /**
164
+ * Transfer learning between tasks
165
+ */
166
+ async transferLearning(sourceSessionId, targetSessionId, similarity = 0.7) {
167
+ const sourceOptimizer = this.policyOptimizers.get(sourceSessionId);
168
+ const targetOptimizer = this.policyOptimizers.get(targetSessionId);
169
+ if (!sourceOptimizer || !targetOptimizer) {
170
+ throw new Error('Source or target session not found');
171
+ }
172
+ // Export source policy
173
+ const sourcePolicy = sourceOptimizer.exportPolicy();
174
+ // Import into target (with similarity-based weighting)
175
+ const targetPolicy = targetOptimizer.exportPolicy();
176
+ // Merge policies (simplified - in production would use more sophisticated transfer)
177
+ const mergedQTable = { ...targetPolicy.qTable };
178
+ for (const [stateKey, actions] of Object.entries(sourcePolicy.qTable)) {
179
+ if (!mergedQTable[stateKey]) {
180
+ mergedQTable[stateKey] = {};
181
+ }
182
+ for (const [action, value] of Object.entries(actions)) {
183
+ const currentValue = mergedQTable[stateKey][action] || 0;
184
+ // Weighted average based on similarity
185
+ mergedQTable[stateKey][action] =
186
+ currentValue * (1 - similarity) + value * similarity;
187
+ }
188
+ }
189
+ targetOptimizer.importPolicy({ ...targetPolicy, qTable: mergedQTable });
190
+ const sourceSession = this.sessionManager.getSession(sourceSessionId);
191
+ const targetSession = this.sessionManager.getSession(targetSessionId);
192
+ return {
193
+ sourceTask: sourceSession?.sessionType || 'unknown',
194
+ targetTask: targetSession?.sessionType || 'unknown',
195
+ similarity,
196
+ transferSuccess: true,
197
+ performanceGain: similarity * 0.3, // Estimated gain
198
+ experiencesTransferred: Object.keys(sourcePolicy.qTable).length,
199
+ };
200
+ }
201
+ /**
202
+ * Explain a prediction
203
+ */
204
+ async explainPrediction(sessionId, state) {
205
+ // Get similar experiences
206
+ const similarExperiences = await this.experienceRecorder.retrieveSimilarExperiences(state, 5);
207
+ // Calculate confidence factors
208
+ const confidenceFactors = {
209
+ experienceCount: Math.min(1.0, similarExperiences.length / 10),
210
+ avgReward: similarExperiences.reduce((sum, exp) => sum + exp.reward, 0) /
211
+ (similarExperiences.length || 1),
212
+ consistency: this.calculateConsistency(similarExperiences),
213
+ };
214
+ const reasoning = `Based on ${similarExperiences.length} similar past experiences with average reward ${confidenceFactors.avgReward.toFixed(2)}. Action consistency: ${(confidenceFactors.consistency * 100).toFixed(0)}%.`;
215
+ return {
216
+ reasoning,
217
+ similarExperiences,
218
+ confidenceFactors,
219
+ };
220
+ }
221
+ /**
222
+ * Calculate consistency of actions in similar experiences
223
+ */
224
+ calculateConsistency(experiences) {
225
+ if (experiences.length === 0)
226
+ return 0;
227
+ const actionCounts = new Map();
228
+ for (const exp of experiences) {
229
+ const tool = exp.action.tool;
230
+ actionCounts.set(tool, (actionCounts.get(tool) || 0) + 1);
231
+ }
232
+ const maxCount = Math.max(...Array.from(actionCounts.values()));
233
+ return maxCount / experiences.length;
234
+ }
235
+ /**
236
+ * Get session info
237
+ */
238
+ getSessionInfo(sessionId) {
239
+ return this.sessionManager.getSession(sessionId);
240
+ }
241
+ /**
242
+ * Restore sessions from database
243
+ */
244
+ async restoreSessions(userId) {
245
+ return await this.sessionManager.restoreSessions(userId);
246
+ }
247
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * PolicyOptimizer - Optimizes action selection policy using reinforcement learning
3
+ */
4
+ import type { State, Experience, ActionPrediction, TrainingOptions, TrainingMetrics } from '../types/index.js';
5
+ export declare class PolicyOptimizer {
6
+ private qTable;
7
+ private learningRate;
8
+ private discountFactor;
9
+ private explorationRate;
10
+ private experienceBuffer;
11
+ constructor(learningRate?: number, discountFactor?: number, bufferSize?: number);
12
+ /**
13
+ * Predict best action for current state
14
+ */
15
+ predictAction(state: State, availableActions: string[]): Promise<ActionPrediction>;
16
+ /**
17
+ * Update policy based on experience
18
+ */
19
+ updatePolicy(experience: Experience): Promise<void>;
20
+ /**
21
+ * Train policy on batch of experiences
22
+ */
23
+ train(options?: TrainingOptions): Promise<TrainingMetrics>;
24
+ /**
25
+ * Get policy statistics
26
+ */
27
+ getPolicyStats(): {
28
+ statesLearned: number;
29
+ totalExperiences: number;
30
+ avgQValue: number;
31
+ };
32
+ /**
33
+ * Export policy for persistence
34
+ */
35
+ exportPolicy(): any;
36
+ /**
37
+ * Import policy from persistence
38
+ */
39
+ importPolicy(policyData: any): void;
40
+ /**
41
+ * Encode state as string key for Q-table
42
+ */
43
+ private encodeState;
44
+ /**
45
+ * Get experience count for state
46
+ */
47
+ private getExperienceCount;
48
+ /**
49
+ * Decay exploration rate over time
50
+ */
51
+ decayExploration(decayRate?: number): void;
52
+ }
53
+ //# sourceMappingURL=policy-optimizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"policy-optimizer.d.ts","sourceRoot":"","sources":["../../../../src/mcp/learning/core/policy-optimizer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACV,KAAK,EAEL,UAAU,EACV,gBAAgB,EAChB,eAAe,EACf,eAAe,EAChB,MAAM,mBAAmB,CAAC;AAG3B,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAA+C;IAC7D,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,cAAc,CAAgB;IACtC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,gBAAgB,CAAmB;gBAGzC,YAAY,GAAE,MAAY,EAC1B,cAAc,GAAE,MAAa,EAC7B,UAAU,GAAE,MAAc;IAO5B;;OAEG;IACG,aAAa,CACjB,KAAK,EAAE,KAAK,EACZ,gBAAgB,EAAE,MAAM,EAAE,GACzB,OAAO,CAAC,gBAAgB,CAAC;IAyD5B;;OAEG;IACG,YAAY,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC;IAoCzD;;OAEG;IACG,KAAK,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,eAAe,CAAC;IAoFpE;;OAEG;IACH,cAAc,IAAI;QAChB,aAAa,EAAE,MAAM,CAAC;QACtB,gBAAgB,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;KACnB;IAkBD;;OAEG;IACH,YAAY,IAAI,GAAG;IAgBnB;;OAEG;IACH,YAAY,CAAC,UAAU,EAAE,GAAG,GAAG,IAAI;IAoBnC;;OAEG;IACH,OAAO,CAAC,WAAW;IAUnB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAK1B;;OAEG;IACH,gBAAgB,CAAC,SAAS,GAAE,MAAc,GAAG,IAAI;CAGlD"}