@unrdf/self-healing-workflows 26.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +284 -0
- package/examples/basic-usage.mjs +99 -0
- package/examples/recovery-strategies.mjs +142 -0
- package/package.json +46 -0
- package/src/circuit-breaker.mjs +262 -0
- package/src/error-classifier.mjs +203 -0
- package/src/health-monitor.mjs +301 -0
- package/src/index.mjs +46 -0
- package/src/recovery-actions.mjs +272 -0
- package/src/retry-strategy.mjs +241 -0
- package/src/schemas.mjs +185 -0
- package/src/self-healing-engine.mjs +354 -0
- package/test/self-healing.test.mjs +772 -0
- package/vitest.config.mjs +20 -0
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Self-healing workflow engine
|
|
3
|
+
* @module @unrdf/self-healing-workflows/engine
|
|
4
|
+
* @description Main engine coordinating error recovery, circuit breaking, and health monitoring
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { ErrorClassifier } from './error-classifier.mjs';
|
|
8
|
+
import { RetryStrategy } from './retry-strategy.mjs';
|
|
9
|
+
import { CircuitBreaker } from './circuit-breaker.mjs';
|
|
10
|
+
import { RecoveryActionExecutor } from './recovery-actions.mjs';
|
|
11
|
+
import { HealthMonitor } from './health-monitor.mjs';
|
|
12
|
+
import {
|
|
13
|
+
SelfHealingConfigSchema
|
|
14
|
+
} from './schemas.mjs';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Self-healing workflow engine
|
|
18
|
+
*/
|
|
19
|
+
export class SelfHealingEngine {
|
|
20
|
+
/**
|
|
21
|
+
* Creates a new self-healing engine
|
|
22
|
+
* @param {Object} [config] - Engine configuration
|
|
23
|
+
* @param {Object} [config.retry] - Retry strategy config
|
|
24
|
+
* @param {Object} [config.circuitBreaker] - Circuit breaker config
|
|
25
|
+
* @param {Object} [config.healthCheck] - Health check config
|
|
26
|
+
* @param {Array<Object>} [config.errorPatterns] - Custom error patterns
|
|
27
|
+
* @param {Array<Object>} [config.recoveryActions] - Custom recovery actions
|
|
28
|
+
* @param {boolean} [config.enableOtel=true] - Enable OTEL instrumentation
|
|
29
|
+
* @param {number} [config.maxConcurrentRecoveries=10] - Max concurrent recoveries
|
|
30
|
+
*/
|
|
31
|
+
constructor(config = {}) {
|
|
32
|
+
this.config = SelfHealingConfigSchema.parse(config);
|
|
33
|
+
|
|
34
|
+
// Initialize components
|
|
35
|
+
this.classifier = new ErrorClassifier({
|
|
36
|
+
patterns: this.config.errorPatterns
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
this.retry = new RetryStrategy(this.config.retry);
|
|
40
|
+
this.circuitBreaker = new CircuitBreaker(this.config.circuitBreaker);
|
|
41
|
+
this.recoveryExecutor = new RecoveryActionExecutor();
|
|
42
|
+
this.healthMonitor = new HealthMonitor(this.config.healthCheck);
|
|
43
|
+
|
|
44
|
+
// Register custom recovery actions
|
|
45
|
+
for (const action of this.config.recoveryActions) {
|
|
46
|
+
this.recoveryExecutor.register(action);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Statistics
|
|
50
|
+
this.stats = {
|
|
51
|
+
totalAttempts: 0,
|
|
52
|
+
successfulRecoveries: 0,
|
|
53
|
+
failedRecoveries: 0,
|
|
54
|
+
totalRecoveryTime: 0,
|
|
55
|
+
errorsByCategory: {},
|
|
56
|
+
actionsByType: {}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// Active recoveries
|
|
60
|
+
this.activeRecoveries = new Set();
|
|
61
|
+
|
|
62
|
+
// Register health checks
|
|
63
|
+
this.registerDefaultHealthChecks();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Registers default health checks
|
|
68
|
+
* @returns {void}
|
|
69
|
+
*/
|
|
70
|
+
registerDefaultHealthChecks() {
|
|
71
|
+
// Circuit breaker health
|
|
72
|
+
this.healthMonitor.registerCheck('circuit-breaker', async () => {
|
|
73
|
+
const state = this.circuitBreaker.getState();
|
|
74
|
+
if (state === 'open') {
|
|
75
|
+
throw new Error('Circuit breaker is open');
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Recovery capacity
|
|
80
|
+
this.healthMonitor.registerCheck('recovery-capacity', async () => {
|
|
81
|
+
if (this.activeRecoveries.size >= this.config.maxConcurrentRecoveries) {
|
|
82
|
+
throw new Error('Maximum concurrent recoveries reached');
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// Success rate
|
|
87
|
+
this.healthMonitor.registerCheck('recovery-success-rate', async () => {
|
|
88
|
+
const stats = this.getStats();
|
|
89
|
+
if (stats.successRate < 0.5 && stats.totalAttempts > 10) {
|
|
90
|
+
throw new Error(`Low success rate: ${(stats.successRate * 100).toFixed(1)}%`);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Executes an operation with self-healing capabilities
|
|
97
|
+
* @param {Function} operation - Async operation to execute
|
|
98
|
+
* @param {Object} [options] - Execution options
|
|
99
|
+
* @param {Function} [options.onRetry] - Retry callback
|
|
100
|
+
* @param {Function} [options.fallback] - Fallback function
|
|
101
|
+
* @param {Function} [options.compensationFn] - Compensation function
|
|
102
|
+
* @param {Object} [options.workflow] - Workflow context
|
|
103
|
+
* @returns {Promise<any>} Operation result
|
|
104
|
+
* @throws {Error} If recovery fails
|
|
105
|
+
* @example
|
|
106
|
+
* const engine = new SelfHealingEngine();
|
|
107
|
+
* const result = await engine.execute(async () => {
|
|
108
|
+
* return await fetch('https://api.example.com');
|
|
109
|
+
* }, {
|
|
110
|
+
* fallback: () => cachedData
|
|
111
|
+
* });
|
|
112
|
+
*/
|
|
113
|
+
async execute(operation, options = {}) {
|
|
114
|
+
// Check capacity
|
|
115
|
+
if (this.activeRecoveries.size >= this.config.maxConcurrentRecoveries) {
|
|
116
|
+
throw new Error('Maximum concurrent recoveries reached');
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const recoveryId = this.generateRecoveryId();
|
|
120
|
+
this.activeRecoveries.add(recoveryId);
|
|
121
|
+
this.stats.totalAttempts++;
|
|
122
|
+
|
|
123
|
+
const startTime = Date.now();
|
|
124
|
+
let result;
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
// Execute through circuit breaker
|
|
128
|
+
result = await this.circuitBreaker.execute(async () => {
|
|
129
|
+
// Execute with retry
|
|
130
|
+
return await this.retry.execute(operation, {
|
|
131
|
+
onRetry: options.onRetry
|
|
132
|
+
});
|
|
133
|
+
}, {
|
|
134
|
+
fallback: options.fallback
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
this.stats.successfulRecoveries++;
|
|
138
|
+
return result;
|
|
139
|
+
} catch (error) {
|
|
140
|
+
// Classify error
|
|
141
|
+
const classified = this.classifier.classify(error);
|
|
142
|
+
|
|
143
|
+
// Track error category
|
|
144
|
+
const category = classified.category;
|
|
145
|
+
this.stats.errorsByCategory[category] = (this.stats.errorsByCategory[category] || 0) + 1;
|
|
146
|
+
|
|
147
|
+
// Attempt recovery
|
|
148
|
+
try {
|
|
149
|
+
result = await this.attemptRecovery(classified, {
|
|
150
|
+
operation,
|
|
151
|
+
...options
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
this.stats.successfulRecoveries++;
|
|
155
|
+
return result;
|
|
156
|
+
} catch (recoveryError) {
|
|
157
|
+
this.stats.failedRecoveries++;
|
|
158
|
+
throw recoveryError;
|
|
159
|
+
}
|
|
160
|
+
} finally {
|
|
161
|
+
const duration = Date.now() - startTime;
|
|
162
|
+
this.stats.totalRecoveryTime += duration;
|
|
163
|
+
this.activeRecoveries.delete(recoveryId);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Attempts recovery for a classified error
|
|
169
|
+
* @param {Object} classifiedError - Classified error object
|
|
170
|
+
* @param {Object} context - Recovery context
|
|
171
|
+
* @returns {Promise<any>} Recovery result
|
|
172
|
+
* @throws {Error} If recovery fails
|
|
173
|
+
*/
|
|
174
|
+
async attemptRecovery(classifiedError, context) {
|
|
175
|
+
const result = await this.recoveryExecutor.recover(classifiedError, context);
|
|
176
|
+
|
|
177
|
+
// Track action type
|
|
178
|
+
const actionType = classifiedError.retryable ? 'retry' : 'compensate';
|
|
179
|
+
this.stats.actionsByType[actionType] = (this.stats.actionsByType[actionType] || 0) + 1;
|
|
180
|
+
|
|
181
|
+
return result;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Wraps a function with self-healing capabilities
|
|
186
|
+
* @param {Function} fn - Function to wrap
|
|
187
|
+
* @param {Object} [options] - Execution options
|
|
188
|
+
* @returns {Function} Wrapped function
|
|
189
|
+
*/
|
|
190
|
+
wrap(fn, options = {}) {
|
|
191
|
+
return async (...args) => {
|
|
192
|
+
return this.execute(() => fn(...args), options);
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Starts health monitoring
|
|
198
|
+
* @returns {void}
|
|
199
|
+
*/
|
|
200
|
+
startHealthMonitoring() {
|
|
201
|
+
this.healthMonitor.start();
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Stops health monitoring
|
|
206
|
+
* @returns {void}
|
|
207
|
+
*/
|
|
208
|
+
stopHealthMonitoring() {
|
|
209
|
+
this.healthMonitor.stop();
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Gets recovery statistics
|
|
214
|
+
* @returns {Object} Statistics object
|
|
215
|
+
*/
|
|
216
|
+
getStats() {
|
|
217
|
+
const { totalAttempts, successfulRecoveries, failedRecoveries, totalRecoveryTime } = this.stats;
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
totalAttempts,
|
|
221
|
+
successfulRecoveries,
|
|
222
|
+
failedRecoveries,
|
|
223
|
+
averageRecoveryTime: totalAttempts > 0 ? totalRecoveryTime / totalAttempts : 0,
|
|
224
|
+
successRate: totalAttempts > 0 ? successfulRecoveries / totalAttempts : 0,
|
|
225
|
+
errorsByCategory: { ...this.stats.errorsByCategory },
|
|
226
|
+
actionsByType: { ...this.stats.actionsByType }
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Resets statistics
|
|
232
|
+
* @returns {void}
|
|
233
|
+
*/
|
|
234
|
+
resetStats() {
|
|
235
|
+
this.stats = {
|
|
236
|
+
totalAttempts: 0,
|
|
237
|
+
successfulRecoveries: 0,
|
|
238
|
+
failedRecoveries: 0,
|
|
239
|
+
totalRecoveryTime: 0,
|
|
240
|
+
errorsByCategory: {},
|
|
241
|
+
actionsByType: {}
|
|
242
|
+
};
|
|
243
|
+
this.recoveryExecutor.resetStats();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Gets current health status
|
|
248
|
+
* @returns {Promise<Object>} Health check result
|
|
249
|
+
*/
|
|
250
|
+
async getHealth() {
|
|
251
|
+
return this.healthMonitor.check();
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Checks if engine is healthy
|
|
256
|
+
* @returns {boolean} True if healthy
|
|
257
|
+
*/
|
|
258
|
+
isHealthy() {
|
|
259
|
+
return this.healthMonitor.isHealthy();
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Registers a custom error pattern
|
|
264
|
+
* @param {Object} pattern - Error pattern
|
|
265
|
+
* @returns {void}
|
|
266
|
+
*/
|
|
267
|
+
addErrorPattern(pattern) {
|
|
268
|
+
this.classifier.addPattern(pattern);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Registers a custom recovery action
|
|
273
|
+
* @param {Object} action - Recovery action
|
|
274
|
+
* @returns {void}
|
|
275
|
+
*/
|
|
276
|
+
addRecoveryAction(action) {
|
|
277
|
+
this.recoveryExecutor.register(action);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Registers a custom health check
|
|
282
|
+
* @param {string} name - Check name
|
|
283
|
+
* @param {Function} checkFn - Check function
|
|
284
|
+
* @param {Object} [options] - Check options
|
|
285
|
+
* @returns {void}
|
|
286
|
+
*/
|
|
287
|
+
addHealthCheck(name, checkFn, options) {
|
|
288
|
+
this.healthMonitor.registerCheck(name, checkFn, options);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Adds a health status change listener
|
|
293
|
+
* @param {Function} listener - Listener function
|
|
294
|
+
* @returns {Function} Unsubscribe function
|
|
295
|
+
*/
|
|
296
|
+
onHealthChange(listener) {
|
|
297
|
+
return this.healthMonitor.onStatusChange(listener);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Gets circuit breaker state
|
|
302
|
+
* @returns {string} Current state
|
|
303
|
+
*/
|
|
304
|
+
getCircuitBreakerState() {
|
|
305
|
+
return this.circuitBreaker.getState();
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Resets circuit breaker
|
|
310
|
+
* @returns {void}
|
|
311
|
+
*/
|
|
312
|
+
resetCircuitBreaker() {
|
|
313
|
+
this.circuitBreaker.reset();
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Gets active recovery count
|
|
318
|
+
* @returns {number} Number of active recoveries
|
|
319
|
+
*/
|
|
320
|
+
getActiveRecoveryCount() {
|
|
321
|
+
return this.activeRecoveries.size;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Generates a unique recovery ID
|
|
326
|
+
* @returns {string} Recovery ID
|
|
327
|
+
*/
|
|
328
|
+
generateRecoveryId() {
|
|
329
|
+
return `recovery-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Gets comprehensive engine status
|
|
334
|
+
* @returns {Object} Engine status
|
|
335
|
+
*/
|
|
336
|
+
getStatus() {
|
|
337
|
+
return {
|
|
338
|
+
stats: this.getStats(),
|
|
339
|
+
circuitBreaker: this.circuitBreaker.getStats(),
|
|
340
|
+
health: this.healthMonitor.getStats(),
|
|
341
|
+
activeRecoveries: this.activeRecoveries.size,
|
|
342
|
+
maxConcurrentRecoveries: this.config.maxConcurrentRecoveries
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Creates a new self-healing engine instance
|
|
349
|
+
* @param {Object} [config] - Engine configuration
|
|
350
|
+
* @returns {SelfHealingEngine} Self-healing engine instance
|
|
351
|
+
*/
|
|
352
|
+
export function createSelfHealingEngine(config) {
|
|
353
|
+
return new SelfHealingEngine(config);
|
|
354
|
+
}
|