@juspay/neurolink 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -5
- package/README.md +150 -92
- package/dist/lib/mcp/dynamic-chain-executor.d.ts +201 -0
- package/dist/lib/mcp/dynamic-chain-executor.js +489 -0
- package/dist/lib/mcp/dynamic-orchestrator.d.ts +109 -0
- package/dist/lib/mcp/dynamic-orchestrator.js +351 -0
- package/dist/lib/mcp/error-manager.d.ts +254 -0
- package/dist/lib/mcp/error-manager.js +501 -0
- package/dist/lib/mcp/error-recovery.d.ts +158 -0
- package/dist/lib/mcp/error-recovery.js +405 -0
- package/dist/lib/mcp/health-monitor.d.ts +256 -0
- package/dist/lib/mcp/health-monitor.js +621 -0
- package/dist/lib/mcp/orchestrator.d.ts +136 -5
- package/dist/lib/mcp/orchestrator.js +316 -9
- package/dist/lib/mcp/registry.d.ts +22 -0
- package/dist/lib/mcp/registry.js +24 -0
- package/dist/lib/mcp/semaphore-manager.d.ts +137 -0
- package/dist/lib/mcp/semaphore-manager.js +329 -0
- package/dist/lib/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
- package/dist/lib/mcp/session-manager.d.ts +186 -0
- package/dist/lib/mcp/session-manager.js +400 -0
- package/dist/lib/mcp/session-persistence.d.ts +93 -0
- package/dist/lib/mcp/session-persistence.js +298 -0
- package/dist/lib/mcp/transport-manager.d.ts +153 -0
- package/dist/lib/mcp/transport-manager.js +330 -0
- package/dist/lib/mcp/unified-registry.d.ts +42 -1
- package/dist/lib/mcp/unified-registry.js +122 -2
- package/dist/lib/neurolink.d.ts +75 -0
- package/dist/lib/neurolink.js +104 -0
- package/dist/mcp/dynamic-chain-executor.d.ts +201 -0
- package/dist/mcp/dynamic-chain-executor.js +489 -0
- package/dist/mcp/dynamic-orchestrator.d.ts +109 -0
- package/dist/mcp/dynamic-orchestrator.js +351 -0
- package/dist/mcp/error-manager.d.ts +254 -0
- package/dist/mcp/error-manager.js +501 -0
- package/dist/mcp/error-recovery.d.ts +158 -0
- package/dist/mcp/error-recovery.js +405 -0
- package/dist/mcp/health-monitor.d.ts +256 -0
- package/dist/mcp/health-monitor.js +621 -0
- package/dist/mcp/orchestrator.d.ts +136 -5
- package/dist/mcp/orchestrator.js +316 -9
- package/dist/mcp/plugins/core/neurolink-mcp.json +15 -15
- package/dist/mcp/registry.d.ts +22 -0
- package/dist/mcp/registry.js +24 -0
- package/dist/mcp/semaphore-manager.d.ts +137 -0
- package/dist/mcp/semaphore-manager.js +329 -0
- package/dist/mcp/session-manager.d.ts +186 -0
- package/dist/mcp/session-manager.js +400 -0
- package/dist/mcp/session-persistence.d.ts +93 -0
- package/dist/mcp/session-persistence.js +299 -0
- package/dist/mcp/transport-manager.d.ts +153 -0
- package/dist/mcp/transport-manager.js +331 -0
- package/dist/mcp/unified-registry.d.ts +42 -1
- package/dist/mcp/unified-registry.js +122 -2
- package/dist/neurolink.d.ts +75 -0
- package/dist/neurolink.js +104 -0
- package/package.json +245 -244
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NeuroLink Error Recovery System
|
|
3
|
+
* Implements circuit breaker, retry strategies, and pattern detection
|
|
4
|
+
* Based on resilience patterns from reference implementations
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Circuit breaker states
|
|
8
|
+
*/
|
|
9
|
+
export var CircuitState;
|
|
10
|
+
(function (CircuitState) {
|
|
11
|
+
CircuitState["CLOSED"] = "CLOSED";
|
|
12
|
+
CircuitState["OPEN"] = "OPEN";
|
|
13
|
+
CircuitState["HALF_OPEN"] = "HALF_OPEN";
|
|
14
|
+
})(CircuitState || (CircuitState = {}));
|
|
15
|
+
/**
|
|
16
|
+
* Circuit breaker instance
|
|
17
|
+
*/
|
|
18
|
+
class CircuitBreaker {
|
|
19
|
+
config;
|
|
20
|
+
state = CircuitState.CLOSED;
|
|
21
|
+
failures = 0;
|
|
22
|
+
successes = 0;
|
|
23
|
+
lastFailureTime = 0;
|
|
24
|
+
nextAttemptTime = 0;
|
|
25
|
+
constructor(config) {
|
|
26
|
+
this.config = config;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Check if request should be allowed
|
|
30
|
+
*/
|
|
31
|
+
canExecute() {
|
|
32
|
+
const now = Date.now();
|
|
33
|
+
switch (this.state) {
|
|
34
|
+
case CircuitState.CLOSED:
|
|
35
|
+
return true;
|
|
36
|
+
case CircuitState.OPEN:
|
|
37
|
+
if (now >= this.nextAttemptTime) {
|
|
38
|
+
this.state = CircuitState.HALF_OPEN;
|
|
39
|
+
this.successes = 0;
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
return false;
|
|
43
|
+
case CircuitState.HALF_OPEN:
|
|
44
|
+
return true;
|
|
45
|
+
default:
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Record success
|
|
51
|
+
*/
|
|
52
|
+
recordSuccess() {
|
|
53
|
+
this.failures = 0;
|
|
54
|
+
if (this.state === CircuitState.HALF_OPEN) {
|
|
55
|
+
this.successes++;
|
|
56
|
+
if (this.successes >= this.config.successThreshold) {
|
|
57
|
+
this.state = CircuitState.CLOSED;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Record failure
|
|
63
|
+
*/
|
|
64
|
+
recordFailure() {
|
|
65
|
+
const now = Date.now();
|
|
66
|
+
// Reset failure count if outside monitoring period
|
|
67
|
+
if (now - this.lastFailureTime > this.config.monitoringPeriod) {
|
|
68
|
+
this.failures = 0;
|
|
69
|
+
}
|
|
70
|
+
this.failures++;
|
|
71
|
+
this.lastFailureTime = now;
|
|
72
|
+
if (this.state === CircuitState.HALF_OPEN) {
|
|
73
|
+
this.state = CircuitState.OPEN;
|
|
74
|
+
this.nextAttemptTime = now + this.config.resetTimeout;
|
|
75
|
+
}
|
|
76
|
+
else if (this.failures >= this.config.failureThreshold) {
|
|
77
|
+
this.state = CircuitState.OPEN;
|
|
78
|
+
this.nextAttemptTime = now + this.config.resetTimeout;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Get current state
|
|
83
|
+
*/
|
|
84
|
+
getState() {
|
|
85
|
+
return this.state;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Error Recovery Manager
|
|
90
|
+
*/
|
|
91
|
+
export class ErrorRecovery {
|
|
92
|
+
circuitBreakers = new Map();
|
|
93
|
+
errorPatterns = [];
|
|
94
|
+
recoveryHistory = new Map();
|
|
95
|
+
defaultRetryConfig = {
|
|
96
|
+
maxAttempts: 3,
|
|
97
|
+
initialDelay: 1000,
|
|
98
|
+
maxDelay: 30000,
|
|
99
|
+
backoffMultiplier: 2,
|
|
100
|
+
jitter: true,
|
|
101
|
+
};
|
|
102
|
+
defaultCircuitConfig = {
|
|
103
|
+
failureThreshold: 5,
|
|
104
|
+
resetTimeout: 60000,
|
|
105
|
+
successThreshold: 2,
|
|
106
|
+
monitoringPeriod: 60000,
|
|
107
|
+
};
|
|
108
|
+
constructor() {
|
|
109
|
+
this.initializeDefaultPatterns();
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Initialize default error patterns
|
|
113
|
+
*/
|
|
114
|
+
initializeDefaultPatterns() {
|
|
115
|
+
// Network timeout pattern
|
|
116
|
+
this.addPattern({
|
|
117
|
+
id: "network-timeout",
|
|
118
|
+
name: "Network Timeout Pattern",
|
|
119
|
+
description: "Repeated network timeouts indicating connectivity issues",
|
|
120
|
+
matcher: (errors) => {
|
|
121
|
+
const recentErrors = errors.slice(-5);
|
|
122
|
+
return (recentErrors.length >= 3 &&
|
|
123
|
+
recentErrors.every((e) => e.category === "TIMEOUT_ERROR"));
|
|
124
|
+
},
|
|
125
|
+
severity: "HIGH",
|
|
126
|
+
recoveryStrategy: {
|
|
127
|
+
type: "retry",
|
|
128
|
+
config: {
|
|
129
|
+
maxAttempts: 5,
|
|
130
|
+
initialDelay: 2000,
|
|
131
|
+
maxDelay: 60000,
|
|
132
|
+
backoffMultiplier: 3,
|
|
133
|
+
jitter: true,
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
});
|
|
137
|
+
// Rate limit pattern
|
|
138
|
+
this.addPattern({
|
|
139
|
+
id: "rate-limit",
|
|
140
|
+
name: "Rate Limit Pattern",
|
|
141
|
+
description: "API rate limit errors",
|
|
142
|
+
matcher: (errors) => {
|
|
143
|
+
const lastError = errors[errors.length - 1];
|
|
144
|
+
return (lastError &&
|
|
145
|
+
(lastError.error.message.includes("429") ||
|
|
146
|
+
lastError.error.message.toLowerCase().includes("rate limit")));
|
|
147
|
+
},
|
|
148
|
+
severity: "MEDIUM",
|
|
149
|
+
recoveryStrategy: {
|
|
150
|
+
type: "circuit-breaker",
|
|
151
|
+
config: {
|
|
152
|
+
failureThreshold: 3,
|
|
153
|
+
resetTimeout: 120000, // 2 minutes
|
|
154
|
+
successThreshold: 1,
|
|
155
|
+
monitoringPeriod: 60000,
|
|
156
|
+
},
|
|
157
|
+
},
|
|
158
|
+
});
|
|
159
|
+
// Configuration error pattern
|
|
160
|
+
this.addPattern({
|
|
161
|
+
id: "config-error",
|
|
162
|
+
name: "Configuration Error Pattern",
|
|
163
|
+
description: "Missing or invalid configuration",
|
|
164
|
+
matcher: (errors) => {
|
|
165
|
+
const lastError = errors[errors.length - 1];
|
|
166
|
+
return lastError?.category === "CONFIGURATION_ERROR";
|
|
167
|
+
},
|
|
168
|
+
severity: "CRITICAL",
|
|
169
|
+
recoveryStrategy: {
|
|
170
|
+
type: "manual",
|
|
171
|
+
action: async (context) => ({
|
|
172
|
+
success: false,
|
|
173
|
+
message: "Configuration error requires manual intervention. Check environment variables and config files.",
|
|
174
|
+
}),
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Add error pattern
|
|
180
|
+
*/
|
|
181
|
+
addPattern(pattern) {
|
|
182
|
+
this.errorPatterns.push(pattern);
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Detect patterns in error history
|
|
186
|
+
*/
|
|
187
|
+
detectPatterns(errors) {
|
|
188
|
+
return this.errorPatterns.filter((pattern) => pattern.matcher(errors));
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Get circuit breaker for resource
|
|
192
|
+
*/
|
|
193
|
+
getCircuitBreaker(resourceId, config) {
|
|
194
|
+
if (!this.circuitBreakers.has(resourceId)) {
|
|
195
|
+
this.circuitBreakers.set(resourceId, new CircuitBreaker(config || this.defaultCircuitConfig));
|
|
196
|
+
}
|
|
197
|
+
return this.circuitBreakers.get(resourceId);
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Calculate retry delay with exponential backoff
|
|
201
|
+
*/
|
|
202
|
+
calculateRetryDelay(attemptNumber, config) {
|
|
203
|
+
const exponentialDelay = config.initialDelay *
|
|
204
|
+
Math.pow(config.backoffMultiplier, attemptNumber - 1);
|
|
205
|
+
const delay = Math.min(exponentialDelay, config.maxDelay);
|
|
206
|
+
if (config.jitter) {
|
|
207
|
+
// Add random jitter (±25%)
|
|
208
|
+
const jitter = delay * 0.25 * (Math.random() * 2 - 1);
|
|
209
|
+
return Math.floor(delay + jitter);
|
|
210
|
+
}
|
|
211
|
+
return delay;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Attempt recovery for an error
|
|
215
|
+
*/
|
|
216
|
+
async attemptRecovery(error, context) {
|
|
217
|
+
// Detect patterns
|
|
218
|
+
const patterns = this.detectPatterns([error]);
|
|
219
|
+
const pattern = patterns[0]; // Use first matching pattern
|
|
220
|
+
// Get recovery history
|
|
221
|
+
const historyKey = `${error.context.toolName || "unknown"}-${error.category}`;
|
|
222
|
+
const history = this.recoveryHistory.get(historyKey) || [];
|
|
223
|
+
// Build recovery context
|
|
224
|
+
const recoveryContext = {
|
|
225
|
+
error,
|
|
226
|
+
pattern,
|
|
227
|
+
attemptNumber: history.length + 1,
|
|
228
|
+
totalAttempts: 0,
|
|
229
|
+
previousAttempts: history,
|
|
230
|
+
executionContext: context,
|
|
231
|
+
};
|
|
232
|
+
// Determine recovery strategy
|
|
233
|
+
const strategy = pattern?.recoveryStrategy || this.getDefaultStrategy(error);
|
|
234
|
+
// Execute recovery
|
|
235
|
+
const startTime = Date.now();
|
|
236
|
+
let result;
|
|
237
|
+
try {
|
|
238
|
+
switch (strategy.type) {
|
|
239
|
+
case "retry":
|
|
240
|
+
result = await this.executeRetryStrategy(recoveryContext, strategy.config);
|
|
241
|
+
break;
|
|
242
|
+
case "circuit-breaker":
|
|
243
|
+
result = await this.executeCircuitBreakerStrategy(recoveryContext, strategy.config);
|
|
244
|
+
break;
|
|
245
|
+
case "fallback":
|
|
246
|
+
result = await this.executeFallbackStrategy(recoveryContext, strategy.config);
|
|
247
|
+
break;
|
|
248
|
+
case "manual":
|
|
249
|
+
result = strategy.action
|
|
250
|
+
? await strategy.action(recoveryContext)
|
|
251
|
+
: { success: false, message: "Manual intervention required" };
|
|
252
|
+
break;
|
|
253
|
+
default:
|
|
254
|
+
result = {
|
|
255
|
+
success: false,
|
|
256
|
+
message: "No recovery strategy available",
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
catch (recoveryError) {
|
|
261
|
+
result = {
|
|
262
|
+
success: false,
|
|
263
|
+
message: `Recovery failed: ${recoveryError instanceof Error ? recoveryError.message : "Unknown error"}`,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
// Record attempt
|
|
267
|
+
const attempt = {
|
|
268
|
+
timestamp: Date.now(),
|
|
269
|
+
strategy: strategy.type,
|
|
270
|
+
successful: result.success,
|
|
271
|
+
duration: Date.now() - startTime,
|
|
272
|
+
error: result.success
|
|
273
|
+
? undefined
|
|
274
|
+
: new Error(result.message || "Recovery failed"),
|
|
275
|
+
};
|
|
276
|
+
history.push(attempt);
|
|
277
|
+
this.recoveryHistory.set(historyKey, history.slice(-10)); // Keep last 10 attempts
|
|
278
|
+
return result;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Execute retry strategy
|
|
282
|
+
*/
|
|
283
|
+
async executeRetryStrategy(context, config) {
|
|
284
|
+
const retryConfig = config || this.defaultRetryConfig;
|
|
285
|
+
if (context.attemptNumber > retryConfig.maxAttempts) {
|
|
286
|
+
return {
|
|
287
|
+
success: false,
|
|
288
|
+
message: `Maximum retry attempts (${retryConfig.maxAttempts}) exceeded`,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
const delay = this.calculateRetryDelay(context.attemptNumber, retryConfig);
|
|
292
|
+
return {
|
|
293
|
+
success: true,
|
|
294
|
+
nextAction: "retry",
|
|
295
|
+
delay,
|
|
296
|
+
message: `Retry attempt ${context.attemptNumber}/${retryConfig.maxAttempts} after ${delay}ms`,
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Execute circuit breaker strategy
|
|
301
|
+
*/
|
|
302
|
+
async executeCircuitBreakerStrategy(context, config) {
|
|
303
|
+
const resourceId = context.error.context.toolName || "default";
|
|
304
|
+
const circuitBreaker = this.getCircuitBreaker(resourceId, config);
|
|
305
|
+
if (!circuitBreaker.canExecute()) {
|
|
306
|
+
return {
|
|
307
|
+
success: false,
|
|
308
|
+
message: `Circuit breaker OPEN for ${resourceId}. Service temporarily unavailable.`,
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
// Record the failure for circuit breaker
|
|
312
|
+
circuitBreaker.recordFailure();
|
|
313
|
+
return {
|
|
314
|
+
success: true,
|
|
315
|
+
nextAction: circuitBreaker.getState() === CircuitState.OPEN ? "fail" : "retry",
|
|
316
|
+
message: `Circuit breaker state: ${circuitBreaker.getState()}`,
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Execute fallback strategy
|
|
321
|
+
*/
|
|
322
|
+
async executeFallbackStrategy(context, config) {
|
|
323
|
+
// In a real implementation, this would execute fallback logic
|
|
324
|
+
// For now, just indicate fallback should be used
|
|
325
|
+
return {
|
|
326
|
+
success: true,
|
|
327
|
+
fallbackUsed: true,
|
|
328
|
+
message: "Fallback strategy activated",
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Get default recovery strategy based on error
|
|
333
|
+
*/
|
|
334
|
+
getDefaultStrategy(error) {
|
|
335
|
+
switch (error.category) {
|
|
336
|
+
case "NETWORK_ERROR":
|
|
337
|
+
case "TIMEOUT_ERROR":
|
|
338
|
+
return {
|
|
339
|
+
type: "retry",
|
|
340
|
+
config: this.defaultRetryConfig,
|
|
341
|
+
};
|
|
342
|
+
case "CONFIGURATION_ERROR":
|
|
343
|
+
case "PERMISSION_ERROR":
|
|
344
|
+
return {
|
|
345
|
+
type: "manual",
|
|
346
|
+
};
|
|
347
|
+
default:
|
|
348
|
+
return {
|
|
349
|
+
type: "retry",
|
|
350
|
+
config: {
|
|
351
|
+
...this.defaultRetryConfig,
|
|
352
|
+
maxAttempts: 2,
|
|
353
|
+
},
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Get recovery statistics
|
|
359
|
+
*/
|
|
360
|
+
getRecoveryStats() {
|
|
361
|
+
let totalAttempts = 0;
|
|
362
|
+
let successfulRecoveries = 0;
|
|
363
|
+
let failedRecoveries = 0;
|
|
364
|
+
// Calculate from history
|
|
365
|
+
for (const attempts of this.recoveryHistory.values()) {
|
|
366
|
+
for (const attempt of attempts) {
|
|
367
|
+
totalAttempts++;
|
|
368
|
+
if (attempt.successful) {
|
|
369
|
+
successfulRecoveries++;
|
|
370
|
+
}
|
|
371
|
+
else {
|
|
372
|
+
failedRecoveries++;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
// Get circuit breaker states
|
|
377
|
+
const circuitBreakerStates = {};
|
|
378
|
+
for (const [id, breaker] of this.circuitBreakers) {
|
|
379
|
+
circuitBreakerStates[id] = breaker.getState();
|
|
380
|
+
}
|
|
381
|
+
return {
|
|
382
|
+
totalAttempts,
|
|
383
|
+
successfulRecoveries,
|
|
384
|
+
failedRecoveries,
|
|
385
|
+
circuitBreakerStates,
|
|
386
|
+
patternMatches: {}, // TODO: Track pattern matches
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Reset circuit breaker
|
|
391
|
+
*/
|
|
392
|
+
resetCircuitBreaker(resourceId) {
|
|
393
|
+
this.circuitBreakers.delete(resourceId);
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Clear recovery history
|
|
397
|
+
*/
|
|
398
|
+
clearHistory() {
|
|
399
|
+
this.recoveryHistory.clear();
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Default error recovery instance
|
|
404
|
+
*/
|
|
405
|
+
export const defaultErrorRecovery = new ErrorRecovery();
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NeuroLink MCP Health Monitoring System
|
|
3
|
+
* Provides periodic health checks, connection status tracking, and auto-recovery
|
|
4
|
+
* Based on health monitoring patterns from Cline
|
|
5
|
+
*/
|
|
6
|
+
import type { MCPRegistry } from "./registry.js";
|
|
7
|
+
import { ErrorManager } from "./error-manager.js";
|
|
8
|
+
/**
|
|
9
|
+
* Connection status states
|
|
10
|
+
*/
|
|
11
|
+
export declare enum ConnectionStatus {
|
|
12
|
+
DISCONNECTED = "DISCONNECTED",
|
|
13
|
+
CONNECTING = "CONNECTING",
|
|
14
|
+
CONNECTED = "CONNECTED",
|
|
15
|
+
CHECKING = "CHECKING",
|
|
16
|
+
ERROR = "ERROR",
|
|
17
|
+
RECOVERING = "RECOVERING"
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Health check result
|
|
21
|
+
*/
|
|
22
|
+
export interface HealthCheckResult {
|
|
23
|
+
success: boolean;
|
|
24
|
+
status: ConnectionStatus;
|
|
25
|
+
message?: string;
|
|
26
|
+
latency?: number;
|
|
27
|
+
error?: Error;
|
|
28
|
+
timestamp: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Server health information
|
|
32
|
+
*/
|
|
33
|
+
export interface ServerHealth {
|
|
34
|
+
serverId: string;
|
|
35
|
+
status: ConnectionStatus;
|
|
36
|
+
lastCheck?: HealthCheckResult;
|
|
37
|
+
checkCount: number;
|
|
38
|
+
errorCount: number;
|
|
39
|
+
lastSuccessfulCheck?: number;
|
|
40
|
+
recoveryAttempts: number;
|
|
41
|
+
nextCheckTime?: number;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Health monitor configuration
|
|
45
|
+
*/
|
|
46
|
+
export interface HealthMonitorOptions {
|
|
47
|
+
checkInterval?: number;
|
|
48
|
+
checkTimeout?: number;
|
|
49
|
+
maxRecoveryAttempts?: number;
|
|
50
|
+
recoveryDelay?: number;
|
|
51
|
+
enableAutoRecovery?: boolean;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Health check strategy interface
|
|
55
|
+
*/
|
|
56
|
+
export interface HealthCheckStrategy {
|
|
57
|
+
name: string;
|
|
58
|
+
check(serverId: string, registry: MCPRegistry): Promise<HealthCheckResult>;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Ping health check - Simple availability check
|
|
62
|
+
*/
|
|
63
|
+
export declare class PingHealthCheck implements HealthCheckStrategy {
|
|
64
|
+
name: string;
|
|
65
|
+
check(serverId: string, registry: MCPRegistry): Promise<HealthCheckResult>;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Tool list validation check - Ensures tools are accessible
|
|
69
|
+
*/
|
|
70
|
+
export declare class ToolListValidationCheck implements HealthCheckStrategy {
|
|
71
|
+
name: string;
|
|
72
|
+
check(serverId: string, registry: MCPRegistry): Promise<HealthCheckResult>;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Performance baseline check - Monitors response times
|
|
76
|
+
*/
|
|
77
|
+
export declare class PerformanceCheck implements HealthCheckStrategy {
|
|
78
|
+
name: string;
|
|
79
|
+
private performanceThreshold;
|
|
80
|
+
constructor(thresholdMs?: number);
|
|
81
|
+
check(serverId: string, registry: MCPRegistry): Promise<HealthCheckResult>;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Health Monitor for MCP connections
|
|
85
|
+
*/
|
|
86
|
+
export declare class HealthMonitor {
|
|
87
|
+
private registry;
|
|
88
|
+
private errorManager;
|
|
89
|
+
private serverHealth;
|
|
90
|
+
private checkInterval;
|
|
91
|
+
private checkTimeout;
|
|
92
|
+
private maxRecoveryAttempts;
|
|
93
|
+
private recoveryDelay;
|
|
94
|
+
private enableAutoRecovery;
|
|
95
|
+
private checkTimers;
|
|
96
|
+
private strategies;
|
|
97
|
+
private isMonitoring;
|
|
98
|
+
private recoveryCallbacks;
|
|
99
|
+
constructor(registry: MCPRegistry, errorManager: ErrorManager, options?: HealthMonitorOptions);
|
|
100
|
+
/**
|
|
101
|
+
* Start monitoring all registered servers
|
|
102
|
+
*/
|
|
103
|
+
startMonitoring(): void;
|
|
104
|
+
/**
|
|
105
|
+
* Stop monitoring all servers
|
|
106
|
+
*/
|
|
107
|
+
stopMonitoring(): void;
|
|
108
|
+
/**
|
|
109
|
+
* Perform health check for a specific server
|
|
110
|
+
*
|
|
111
|
+
* @param serverId Server to check
|
|
112
|
+
* @param strategy Strategy name to use (default: "ping")
|
|
113
|
+
* @returns Health check result
|
|
114
|
+
*/
|
|
115
|
+
checkServerHealth(serverId: string, strategy?: string): Promise<HealthCheckResult>;
|
|
116
|
+
/**
|
|
117
|
+
* Get health status for all servers
|
|
118
|
+
*
|
|
119
|
+
* @returns Map of server health information
|
|
120
|
+
*/
|
|
121
|
+
getHealthStatus(): Map<string, ServerHealth>;
|
|
122
|
+
/**
|
|
123
|
+
* Get health status for a specific server
|
|
124
|
+
*
|
|
125
|
+
* @param serverId Server ID
|
|
126
|
+
* @returns Server health information or null
|
|
127
|
+
*/
|
|
128
|
+
getServerHealth(serverId: string): ServerHealth | null;
|
|
129
|
+
/**
|
|
130
|
+
* Register a recovery callback for a server
|
|
131
|
+
*
|
|
132
|
+
* @param serverId Server ID
|
|
133
|
+
* @param callback Recovery callback function
|
|
134
|
+
*/
|
|
135
|
+
registerRecoveryCallback(serverId: string, callback: (serverId: string) => Promise<void>): void;
|
|
136
|
+
/**
|
|
137
|
+
* Add a custom health check strategy
|
|
138
|
+
*
|
|
139
|
+
* @param strategy Health check strategy
|
|
140
|
+
*/
|
|
141
|
+
addStrategy(strategy: HealthCheckStrategy): void;
|
|
142
|
+
/**
|
|
143
|
+
* Schedule periodic health check for a server
|
|
144
|
+
*
|
|
145
|
+
* @private
|
|
146
|
+
*/
|
|
147
|
+
private scheduleHealthCheck;
|
|
148
|
+
/**
|
|
149
|
+
* Trigger recovery for a server
|
|
150
|
+
*
|
|
151
|
+
* @private
|
|
152
|
+
*/
|
|
153
|
+
private triggerRecovery;
|
|
154
|
+
/**
|
|
155
|
+
* Generate comprehensive health report
|
|
156
|
+
*
|
|
157
|
+
* @returns Health report with server statuses and metrics
|
|
158
|
+
*/
|
|
159
|
+
generateHealthReport(): {
|
|
160
|
+
summary: {
|
|
161
|
+
totalServers: number;
|
|
162
|
+
healthyServers: number;
|
|
163
|
+
unhealthyServers: number;
|
|
164
|
+
recoveringServers: number;
|
|
165
|
+
overallHealth: number;
|
|
166
|
+
};
|
|
167
|
+
servers: Array<{
|
|
168
|
+
serverId: string;
|
|
169
|
+
status: ConnectionStatus;
|
|
170
|
+
health: number;
|
|
171
|
+
uptime: number;
|
|
172
|
+
avgLatency: number;
|
|
173
|
+
lastError?: string;
|
|
174
|
+
metrics: {
|
|
175
|
+
totalChecks: number;
|
|
176
|
+
successfulChecks: number;
|
|
177
|
+
failedChecks: number;
|
|
178
|
+
recoveryAttempts: number;
|
|
179
|
+
};
|
|
180
|
+
}>;
|
|
181
|
+
trends: {
|
|
182
|
+
healthHistory: Array<{
|
|
183
|
+
timestamp: number;
|
|
184
|
+
health: number;
|
|
185
|
+
}>;
|
|
186
|
+
errorRate: number;
|
|
187
|
+
avgRecoveryTime: number;
|
|
188
|
+
};
|
|
189
|
+
recommendations: string[];
|
|
190
|
+
};
|
|
191
|
+
/**
|
|
192
|
+
* Get health metrics for monitoring dashboards
|
|
193
|
+
*
|
|
194
|
+
* @returns Simplified metrics for real-time monitoring
|
|
195
|
+
*/
|
|
196
|
+
getHealthMetrics(): {
|
|
197
|
+
status: "healthy" | "degraded" | "critical";
|
|
198
|
+
healthScore: number;
|
|
199
|
+
activeAlerts: Array<{
|
|
200
|
+
serverId: string;
|
|
201
|
+
severity: "low" | "medium" | "high" | "critical";
|
|
202
|
+
message: string;
|
|
203
|
+
timestamp: number;
|
|
204
|
+
}>;
|
|
205
|
+
serverStatuses: Record<string, ConnectionStatus>;
|
|
206
|
+
performance: {
|
|
207
|
+
avgLatency: number;
|
|
208
|
+
maxLatency: number;
|
|
209
|
+
successRate: number;
|
|
210
|
+
};
|
|
211
|
+
};
|
|
212
|
+
/**
|
|
213
|
+
* Subscribe to health events
|
|
214
|
+
*
|
|
215
|
+
* @param event Event type to subscribe to
|
|
216
|
+
* @param callback Callback function
|
|
217
|
+
*/
|
|
218
|
+
on(event: "health-change" | "recovery-started" | "recovery-failed" | "critical-error", callback: (data: any) => void): void;
|
|
219
|
+
/**
|
|
220
|
+
* Get health history for trend analysis
|
|
221
|
+
*
|
|
222
|
+
* @private
|
|
223
|
+
*/
|
|
224
|
+
private getHealthHistory;
|
|
225
|
+
/**
|
|
226
|
+
* Calculate error rate
|
|
227
|
+
*
|
|
228
|
+
* @private
|
|
229
|
+
*/
|
|
230
|
+
private calculateErrorRate;
|
|
231
|
+
/**
|
|
232
|
+
* Calculate average recovery time
|
|
233
|
+
*
|
|
234
|
+
* @private
|
|
235
|
+
*/
|
|
236
|
+
private calculateAvgRecoveryTime;
|
|
237
|
+
/**
|
|
238
|
+
* Generate health recommendations
|
|
239
|
+
*
|
|
240
|
+
* @private
|
|
241
|
+
*/
|
|
242
|
+
private generateRecommendations;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Default health monitor instance (to be initialized with registry and error manager)
|
|
246
|
+
*/
|
|
247
|
+
export declare let defaultHealthMonitor: HealthMonitor | null;
|
|
248
|
+
/**
|
|
249
|
+
* Initialize default health monitor
|
|
250
|
+
*
|
|
251
|
+
* @param registry Tool registry
|
|
252
|
+
* @param errorManager Error manager
|
|
253
|
+
* @param options Health monitor options
|
|
254
|
+
* @returns Health monitor instance
|
|
255
|
+
*/
|
|
256
|
+
export declare function initializeHealthMonitor(registry: MCPRegistry, errorManager: ErrorManager, options?: HealthMonitorOptions): HealthMonitor;
|