@unrdf/self-healing-workflows 26.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,241 @@
1
+ /**
2
+ * @file Retry strategy with exponential backoff
3
+ * @module @unrdf/self-healing-workflows/retry
4
+ * @description Implements retry logic with exponential backoff and jitter
5
+ */
6
+
7
+ import { RetryStrategySchema } from './schemas.mjs';
8
+
9
+ /**
10
+ * Retry strategy with exponential backoff
11
+ */
12
+ export class RetryStrategy {
13
+ /**
14
+ * Creates a new retry strategy
15
+ * @param {Object} [config] - Retry configuration
16
+ * @param {number} [config.maxAttempts=3] - Maximum retry attempts
17
+ * @param {number} [config.initialDelay=1000] - Initial delay in ms
18
+ * @param {number} [config.maxDelay=30000] - Maximum delay in ms
19
+ * @param {number} [config.backoffMultiplier=2] - Backoff multiplier
20
+ * @param {boolean} [config.jitter=true] - Add random jitter
21
+ * @param {Array<string>} [config.retryableErrors] - Error categories to retry
22
+ */
23
+ constructor(config = {}) {
24
+ this.config = RetryStrategySchema.parse(config);
25
+ }
26
+
27
+ /**
28
+ * Executes an operation with retry logic
29
+ * @param {Function} operation - Async operation to execute
30
+ * @param {Object} [options] - Execution options
31
+ * @param {Function} [options.onRetry] - Callback on retry
32
+ * @param {Function} [options.shouldRetry] - Custom retry predicate
33
+ * @returns {Promise<any>} Operation result
34
+ * @throws {Error} If all retries exhausted
35
+ * @example
36
+ * const retry = new RetryStrategy({ maxAttempts: 3 });
37
+ * const result = await retry.execute(async () => {
38
+ * return await fetch('https://api.example.com');
39
+ * });
40
+ */
41
+ async execute(operation, options = {}) {
42
+ const { onRetry, shouldRetry } = options;
43
+ let lastError;
44
+ let attempt = 0;
45
+
46
+ while (attempt < this.config.maxAttempts) {
47
+ try {
48
+ const result = await operation();
49
+ return result;
50
+ } catch (error) {
51
+ lastError = error;
52
+ attempt++;
53
+
54
+ // Check if we should retry
55
+ const canRetry = shouldRetry
56
+ ? shouldRetry(error, attempt)
57
+ : attempt < this.config.maxAttempts;
58
+
59
+ if (!canRetry) {
60
+ break;
61
+ }
62
+
63
+ // Calculate delay
64
+ const delay = this.calculateDelay(attempt);
65
+
66
+ // Call retry callback
67
+ if (onRetry) {
68
+ onRetry(error, attempt, delay);
69
+ }
70
+
71
+ // Wait before retry
72
+ await this.sleep(delay);
73
+ }
74
+ }
75
+
76
+ // All retries exhausted
77
+ const error = new Error(
78
+ `Operation failed after ${attempt} attempts: ${lastError.message}`
79
+ );
80
+ error.cause = lastError;
81
+ error.attempts = attempt;
82
+ throw error;
83
+ }
84
+
85
+ /**
86
+ * Calculates delay for a given attempt with exponential backoff
87
+ * @param {number} attempt - Current attempt number (1-based)
88
+ * @returns {number} Delay in milliseconds
89
+ */
90
+ calculateDelay(attempt) {
91
+ const { initialDelay, maxDelay, backoffMultiplier, jitter } = this.config;
92
+
93
+ // Exponential backoff: delay = initial * (multiplier ^ (attempt - 1))
94
+ let delay = initialDelay * Math.pow(backoffMultiplier, attempt - 1);
95
+
96
+ // Cap at max delay
97
+ delay = Math.min(delay, maxDelay);
98
+
99
+ // Add jitter to prevent thundering herd
100
+ if (jitter) {
101
+ const jitterAmount = delay * 0.2; // ±20% jitter
102
+ delay = delay + (Math.random() * 2 - 1) * jitterAmount;
103
+ }
104
+
105
+ return Math.max(0, Math.floor(delay));
106
+ }
107
+
108
+ /**
109
+ * Sleeps for specified duration
110
+ * @param {number} ms - Milliseconds to sleep
111
+ * @returns {Promise<void>}
112
+ */
113
+ sleep(ms) {
114
+ return new Promise(resolve => setTimeout(resolve, ms));
115
+ }
116
+
117
+ /**
118
+ * Executes operation with retry and returns detailed result
119
+ * @param {Function} operation - Async operation to execute
120
+ * @param {Object} [options] - Execution options
121
+ * @returns {Promise<Object>} Result with metadata
122
+ */
123
+ async executeWithMetadata(operation, options = {}) {
124
+ const startTime = Date.now();
125
+ const attempts = [];
126
+ let result;
127
+ let success = false;
128
+
129
+ try {
130
+ result = await this.execute(operation, {
131
+ ...options,
132
+ onRetry: (error, attempt, delay) => {
133
+ attempts.push({
134
+ attempt,
135
+ error: error.message,
136
+ delay,
137
+ timestamp: Date.now()
138
+ });
139
+ if (options.onRetry) {
140
+ options.onRetry(error, attempt, delay);
141
+ }
142
+ }
143
+ });
144
+ success = true;
145
+ } catch (error) {
146
+ result = error;
147
+ success = false;
148
+ }
149
+
150
+ return {
151
+ success,
152
+ result: success ? result : undefined,
153
+ error: success ? undefined : result,
154
+ attempts: attempts.length + 1,
155
+ duration: Date.now() - startTime,
156
+ retryHistory: attempts
157
+ };
158
+ }
159
+
160
+ /**
161
+ * Creates a retryable version of an async function
162
+ * @param {Function} fn - Function to wrap
163
+ * @param {Object} [options] - Retry options
164
+ * @returns {Function} Wrapped function
165
+ */
166
+ wrap(fn, options = {}) {
167
+ return async (...args) => {
168
+ return this.execute(() => fn(...args), options);
169
+ };
170
+ }
171
+
172
+ /**
173
+ * Checks if an error is retryable based on configuration
174
+ * @param {Object} classifiedError - Classified error object
175
+ * @returns {boolean} True if error is retryable
176
+ */
177
+ isRetryable(classifiedError) {
178
+ if (!classifiedError.retryable) {
179
+ return false;
180
+ }
181
+ return this.config.retryableErrors.includes(classifiedError.category);
182
+ }
183
+
184
+ /**
185
+ * Updates retry configuration
186
+ * @param {Object} updates - Configuration updates
187
+ * @returns {void}
188
+ */
189
+ updateConfig(updates) {
190
+ this.config = RetryStrategySchema.parse({
191
+ ...this.config,
192
+ ...updates
193
+ });
194
+ }
195
+
196
+ /**
197
+ * Gets current configuration
198
+ * @returns {Object} Current configuration
199
+ */
200
+ getConfig() {
201
+ return { ...this.config };
202
+ }
203
+ }
204
+
205
+ /**
206
+ * Creates a new retry strategy instance
207
+ * @param {Object} [config] - Retry configuration
208
+ * @returns {RetryStrategy} Retry strategy instance
209
+ */
210
+ export function createRetryStrategy(config) {
211
+ return new RetryStrategy(config);
212
+ }
213
+
214
+ /**
215
+ * Immediate retry helper (3 attempts, no backoff)
216
+ * @param {Function} operation - Operation to execute
217
+ * @returns {Promise<any>} Operation result
218
+ */
219
+ export async function immediateRetry(operation) {
220
+ const strategy = new RetryStrategy({
221
+ maxAttempts: 3,
222
+ initialDelay: 0,
223
+ backoffMultiplier: 1
224
+ });
225
+ return strategy.execute(operation);
226
+ }
227
+
228
+ /**
229
+ * Exponential backoff retry helper (default config)
230
+ * @param {Function} operation - Operation to execute
231
+ * @returns {Promise<any>} Operation result
232
+ */
233
+ export async function exponentialRetry(operation) {
234
+ const strategy = new RetryStrategy({
235
+ maxAttempts: 5,
236
+ initialDelay: 1000,
237
+ maxDelay: 30000,
238
+ backoffMultiplier: 2
239
+ });
240
+ return strategy.execute(operation);
241
+ }
@@ -0,0 +1,185 @@
1
+ /**
2
+ * @file Zod schemas for self-healing workflows
3
+ * @module @unrdf/self-healing-workflows/schemas
4
+ * @description Validation schemas for error recovery, retry strategies, and health monitoring
5
+ */
6
+
7
+ import { z } from 'zod';
8
+
9
+ /**
10
+ * Error severity levels
11
+ */
12
+ export const ErrorSeveritySchema = z.enum([
13
+ 'critical', // Unrecoverable, requires manual intervention
14
+ 'high', // Severe but potentially recoverable
15
+ 'medium', // Standard errors, retry likely to succeed
16
+ 'low' // Minor errors, safe to retry
17
+ ]);
18
+
19
+ /**
20
+ * Error categories for classification
21
+ */
22
+ export const ErrorCategorySchema = z.enum([
23
+ 'network', // Network connectivity issues
24
+ 'timeout', // Operation timeout
25
+ 'validation', // Data validation failure
26
+ 'resource', // Resource unavailable (memory, disk, etc.)
27
+ 'dependency', // External dependency failure
28
+ 'business-logic', // Business rule violation
29
+ 'unknown' // Unclassified error
30
+ ]);
31
+
32
+ /**
33
+ * Recovery action types
34
+ */
35
+ export const RecoveryActionTypeSchema = z.enum([
36
+ 'retry', // Retry the operation
37
+ 'skip', // Skip and continue
38
+ 'compensate', // Execute compensating transaction
39
+ 'restart', // Restart the workflow
40
+ 'fallback', // Use fallback strategy
41
+ 'manual' // Require manual intervention
42
+ ]);
43
+
44
+ /**
45
+ * Retry strategy configuration
46
+ */
47
+ export const RetryStrategySchema = z.object({
48
+ maxAttempts: z.number().int().positive().default(3),
49
+ initialDelay: z.number().nonnegative().default(1000),
50
+ maxDelay: z.number().positive().default(30000),
51
+ backoffMultiplier: z.number().positive().default(2),
52
+ jitter: z.boolean().default(true),
53
+ retryableErrors: z.array(ErrorCategorySchema).default(['network', 'timeout', 'resource'])
54
+ });
55
+
56
+ /**
57
+ * Circuit breaker configuration
58
+ */
59
+ export const CircuitBreakerConfigSchema = z.object({
60
+ failureThreshold: z.number().int().positive().default(5),
61
+ successThreshold: z.number().int().positive().default(2),
62
+ timeout: z.number().positive().default(60000),
63
+ resetTimeout: z.number().positive().default(30000),
64
+ monitoringPeriod: z.number().positive().default(10000)
65
+ });
66
+
67
+ /**
68
+ * Circuit breaker states
69
+ */
70
+ export const CircuitBreakerStateSchema = z.enum([
71
+ 'closed', // Normal operation, requests allowed
72
+ 'open', // Failure threshold exceeded, requests blocked
73
+ 'half-open' // Testing if service recovered
74
+ ]);
75
+
76
+ /**
77
+ * Error pattern for classification
78
+ */
79
+ export const ErrorPatternSchema = z.object({
80
+ name: z.string().min(1),
81
+ category: ErrorCategorySchema,
82
+ severity: ErrorSeveritySchema,
83
+ pattern: z.union([
84
+ z.string(),
85
+ z.instanceof(RegExp)
86
+ ]),
87
+ metadata: z.record(z.unknown()).optional()
88
+ });
89
+
90
+ /**
91
+ * Classified error
92
+ */
93
+ export const ClassifiedErrorSchema = z.object({
94
+ originalError: z.instanceof(Error),
95
+ category: ErrorCategorySchema,
96
+ severity: ErrorSeveritySchema,
97
+ matchedPattern: z.string().optional(),
98
+ retryable: z.boolean(),
99
+ timestamp: z.number(),
100
+ metadata: z.record(z.unknown()).optional()
101
+ });
102
+
103
+ /**
104
+ * Recovery action
105
+ */
106
+ export const RecoveryActionSchema = z.object({
107
+ type: RecoveryActionTypeSchema,
108
+ name: z.string().min(1),
109
+ execute: z.function(),
110
+ condition: z.function().optional(),
111
+ priority: z.number().int().min(0).max(100).default(50),
112
+ metadata: z.record(z.unknown()).optional()
113
+ });
114
+
115
+ /**
116
+ * Recovery result
117
+ */
118
+ export const RecoveryResultSchema = z.object({
119
+ success: z.boolean(),
120
+ action: RecoveryActionTypeSchema,
121
+ attempts: z.number().int().nonnegative(),
122
+ duration: z.number().nonnegative(),
123
+ error: z.instanceof(Error).optional(),
124
+ metadata: z.record(z.unknown()).optional()
125
+ });
126
+
127
+ /**
128
+ * Health check configuration
129
+ */
130
+ export const HealthCheckConfigSchema = z.object({
131
+ interval: z.number().positive().default(30000),
132
+ timeout: z.number().positive().default(5000),
133
+ unhealthyThreshold: z.number().int().positive().default(3),
134
+ healthyThreshold: z.number().int().positive().default(2)
135
+ });
136
+
137
+ /**
138
+ * Health status
139
+ */
140
+ export const HealthStatusSchema = z.enum([
141
+ 'healthy',
142
+ 'degraded',
143
+ 'unhealthy'
144
+ ]);
145
+
146
+ /**
147
+ * Health check result
148
+ */
149
+ export const HealthCheckResultSchema = z.object({
150
+ status: HealthStatusSchema,
151
+ timestamp: z.number(),
152
+ checks: z.array(z.object({
153
+ name: z.string(),
154
+ status: HealthStatusSchema,
155
+ message: z.string().optional(),
156
+ duration: z.number().nonnegative()
157
+ })),
158
+ metadata: z.record(z.unknown()).optional()
159
+ });
160
+
161
+ /**
162
+ * Self-healing engine configuration
163
+ */
164
+ export const SelfHealingConfigSchema = z.object({
165
+ retry: RetryStrategySchema.optional(),
166
+ circuitBreaker: CircuitBreakerConfigSchema.optional(),
167
+ healthCheck: HealthCheckConfigSchema.optional(),
168
+ errorPatterns: z.array(ErrorPatternSchema).default([]),
169
+ recoveryActions: z.array(RecoveryActionSchema).default([]),
170
+ enableOtel: z.boolean().default(true),
171
+ maxConcurrentRecoveries: z.number().int().positive().default(10)
172
+ });
173
+
174
+ /**
175
+ * Recovery statistics
176
+ */
177
+ export const RecoveryStatsSchema = z.object({
178
+ totalAttempts: z.number().int().nonnegative(),
179
+ successfulRecoveries: z.number().int().nonnegative(),
180
+ failedRecoveries: z.number().int().nonnegative(),
181
+ averageRecoveryTime: z.number().nonnegative(),
182
+ successRate: z.number().min(0).max(1),
183
+ errorsByCategory: z.record(ErrorCategorySchema, z.number().int().nonnegative()),
184
+ actionsByType: z.record(RecoveryActionTypeSchema, z.number().int().nonnegative())
185
+ });