@onlineapps/conn-orch-orchestrator 1.0.43 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/WorkflowOrchestrator.js +114 -36
package/package.json
CHANGED
|
@@ -37,6 +37,14 @@ class WorkflowOrchestrator {
|
|
|
37
37
|
this.logger = config.logger || console;
|
|
38
38
|
this.defaultTimeout = config.defaultTimeout || 30000;
|
|
39
39
|
|
|
40
|
+
// Retry configuration for DLQ mechanism
|
|
41
|
+
this.retryConfig = {
|
|
42
|
+
maxAttempts: config.maxRetryAttempts || 3,
|
|
43
|
+
baseDelay: config.retryBaseDelay || 1000,
|
|
44
|
+
maxDelay: config.retryMaxDelay || 30000,
|
|
45
|
+
backoffMultiplier: config.retryBackoffMultiplier || 2
|
|
46
|
+
};
|
|
47
|
+
|
|
40
48
|
// Create cookbook router - createRouter is an exported function, not a method
|
|
41
49
|
const { createRouter } = this.cookbook;
|
|
42
50
|
if (typeof createRouter === 'function') {
|
|
@@ -230,61 +238,131 @@ class WorkflowOrchestrator {
|
|
|
230
238
|
};
|
|
231
239
|
|
|
232
240
|
} catch (error) {
|
|
233
|
-
//
|
|
241
|
+
// DLQ/Retry mechanism: retry with exponential backoff before sending to DLQ
|
|
242
|
+
const dlqHistory = context._dlq_history || [];
|
|
243
|
+
const stepAttempts = dlqHistory.filter(h => h.step_id === current_step && h.attempt).length;
|
|
244
|
+
const attemptNumber = stepAttempts + 1;
|
|
245
|
+
|
|
246
|
+
// Calculate step_index for failed step
|
|
247
|
+
let failedStepIndex = 0;
|
|
248
|
+
try {
|
|
249
|
+
const stepsArr = this._getStepsArray(cookbookDef);
|
|
250
|
+
const idx = stepsArr.findIndex(s => this._getStepId(s) === current_step);
|
|
251
|
+
if (idx >= 0) failedStepIndex = idx;
|
|
252
|
+
} catch (e) {
|
|
253
|
+
// Ignore - use default 0
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Add attempt to DLQ history
|
|
257
|
+
const attemptRecord = {
|
|
258
|
+
step_id: current_step,
|
|
259
|
+
step_index: failedStepIndex,
|
|
260
|
+
attempt: attemptNumber,
|
|
261
|
+
service: serviceName,
|
|
262
|
+
error: error.message,
|
|
263
|
+
at: new Date().toISOString()
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
const updatedDlqHistory = [...dlqHistory, attemptRecord];
|
|
267
|
+
const updatedContext = {
|
|
268
|
+
...context,
|
|
269
|
+
_dlq_history: updatedDlqHistory
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
this.logger.warn(`[WorkflowOrchestrator] Step failed, attempt ${attemptNumber}/${this.retryConfig.maxAttempts}`, {
|
|
273
|
+
workflow_id,
|
|
274
|
+
current_step,
|
|
275
|
+
service: serviceName,
|
|
276
|
+
error: error.message,
|
|
277
|
+
attempt: attemptNumber
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
// Check if we should retry
|
|
281
|
+
if (attemptNumber < this.retryConfig.maxAttempts) {
|
|
282
|
+
// Calculate delay with exponential backoff
|
|
283
|
+
const delay = Math.min(
|
|
284
|
+
this.retryConfig.baseDelay * Math.pow(this.retryConfig.backoffMultiplier, attemptNumber - 1),
|
|
285
|
+
this.retryConfig.maxDelay
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
this.logger.info(`[WorkflowOrchestrator] Scheduling retry ${attemptNumber + 1} in ${delay}ms`, {
|
|
289
|
+
workflow_id,
|
|
290
|
+
current_step,
|
|
291
|
+
delay
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Wait for delay
|
|
295
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
296
|
+
|
|
297
|
+
// Republish to same service queue for retry
|
|
298
|
+
const serviceQueue = `${serviceName}.workflow`;
|
|
299
|
+
await this.mqClient.publish(serviceQueue, {
|
|
300
|
+
workflow_id,
|
|
301
|
+
cookbook: cookbookDef,
|
|
302
|
+
current_step,
|
|
303
|
+
context: updatedContext
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
this.logger.info(`[WorkflowOrchestrator] Message republished for retry`, {
|
|
307
|
+
workflow_id,
|
|
308
|
+
current_step,
|
|
309
|
+
queue: serviceQueue,
|
|
310
|
+
nextAttempt: attemptNumber + 1
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
// Don't throw - message will be retried
|
|
314
|
+
return { retrying: true, attempt: attemptNumber, nextAttempt: attemptNumber + 1 };
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Max retries exhausted - send to workflow.failed (DLQ entry)
|
|
318
|
+
const dlqEntryRecord = {
|
|
319
|
+
dlq_entered: true,
|
|
320
|
+
step_id: current_step,
|
|
321
|
+
step_index: failedStepIndex,
|
|
322
|
+
service: serviceName,
|
|
323
|
+
reason: 'max_retries_exceeded',
|
|
324
|
+
total_attempts: attemptNumber,
|
|
325
|
+
at: new Date().toISOString()
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
const finalDlqHistory = [...updatedDlqHistory, dlqEntryRecord];
|
|
329
|
+
const finalContext = {
|
|
330
|
+
...context,
|
|
331
|
+
_dlq_history: finalDlqHistory
|
|
332
|
+
};
|
|
333
|
+
|
|
234
334
|
try {
|
|
235
|
-
console.log(`[WorkflowOrchestrator] [
|
|
236
|
-
this.logger.error(`[WorkflowOrchestrator] [
|
|
335
|
+
console.log(`[WorkflowOrchestrator] [DLQ] Max retries exhausted, publishing to workflow.failed for ${workflow_id}`);
|
|
336
|
+
this.logger.error(`[WorkflowOrchestrator] [DLQ] Workflow entering DLQ after ${attemptNumber} attempts: ${error.message}`, {
|
|
237
337
|
workflow_id,
|
|
238
338
|
current_step,
|
|
339
|
+
attempts: attemptNumber,
|
|
239
340
|
error: error.stack
|
|
240
341
|
});
|
|
241
342
|
|
|
242
|
-
// Calculate step_index for failed step
|
|
243
|
-
let failedStepIndex = 0;
|
|
244
|
-
try {
|
|
245
|
-
const stepsArr = this._getStepsArray(cookbookDef);
|
|
246
|
-
const idx = stepsArr.findIndex(s => this._getStepId(s) === current_step);
|
|
247
|
-
if (idx >= 0) failedStepIndex = idx;
|
|
248
|
-
} catch (e) {
|
|
249
|
-
// Ignore - use default 0
|
|
250
|
-
}
|
|
251
|
-
|
|
252
343
|
await this.mqClient.publish('workflow.failed', {
|
|
253
344
|
workflow_id,
|
|
254
345
|
current_step,
|
|
255
|
-
step_id: current_step,
|
|
256
|
-
step_index: failedStepIndex,
|
|
257
|
-
service: serviceName,
|
|
258
|
-
cookbook: cookbookDef,
|
|
259
|
-
context:
|
|
346
|
+
step_id: current_step,
|
|
347
|
+
step_index: failedStepIndex,
|
|
348
|
+
service: serviceName,
|
|
349
|
+
cookbook: cookbookDef,
|
|
350
|
+
context: finalContext,
|
|
260
351
|
error: error.message,
|
|
261
352
|
errorStack: error.stack,
|
|
353
|
+
_dlq_history: finalDlqHistory,
|
|
262
354
|
failed_at: new Date().toISOString()
|
|
263
355
|
});
|
|
264
356
|
|
|
265
|
-
console.log(`[WorkflowOrchestrator] [
|
|
266
|
-
this.logger.info(`[WorkflowOrchestrator] [
|
|
357
|
+
console.log(`[WorkflowOrchestrator] [DLQ] ✓ Successfully published workflow.failed for ${workflow_id}`);
|
|
358
|
+
this.logger.info(`[WorkflowOrchestrator] [DLQ] ✓ Published workflow.failed: ${workflow_id}`);
|
|
267
359
|
} catch (publishError) {
|
|
268
|
-
console.error(`[WorkflowOrchestrator] [
|
|
269
|
-
this.logger.error(`[WorkflowOrchestrator] [
|
|
360
|
+
console.error(`[WorkflowOrchestrator] [DLQ] ✗ Failed to publish workflow.failed for ${workflow_id}:`, publishError.message);
|
|
361
|
+
this.logger.error(`[WorkflowOrchestrator] [DLQ] ✗ Failed to publish workflow.failed`, {
|
|
270
362
|
workflow_id,
|
|
271
363
|
originalError: error.message,
|
|
272
364
|
publishError: publishError.message
|
|
273
365
|
});
|
|
274
|
-
// Don't throw - original error is more important
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// Handle error
|
|
278
|
-
if (this.errorHandler) {
|
|
279
|
-
await this.errorHandler.handleError(error, {
|
|
280
|
-
workflow_id,
|
|
281
|
-
current_step,
|
|
282
|
-
service: serviceName,
|
|
283
|
-
context
|
|
284
|
-
});
|
|
285
|
-
} else {
|
|
286
|
-
// Fallback to router DLQ
|
|
287
|
-
await this.router.routeToDLQ(cookbookDef, context, error, serviceName);
|
|
288
366
|
}
|
|
289
367
|
|
|
290
368
|
throw error;
|