@onlineapps/conn-orch-orchestrator 1.0.43 → 1.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@onlineapps/conn-orch-orchestrator",
3
- "version": "1.0.43",
3
+ "version": "1.0.45",
4
4
  "description": "Workflow orchestration connector for OA Drive - handles message routing and workflow execution",
5
5
  "main": "src/index.js",
6
6
  "scripts": {
@@ -37,6 +37,14 @@ class WorkflowOrchestrator {
37
37
  this.logger = config.logger || console;
38
38
  this.defaultTimeout = config.defaultTimeout || 30000;
39
39
 
40
+ // Retry configuration for DLQ mechanism
41
+ this.retryConfig = {
42
+ maxAttempts: config.maxRetryAttempts || 3,
43
+ baseDelay: config.retryBaseDelay || 1000,
44
+ maxDelay: config.retryMaxDelay || 30000,
45
+ backoffMultiplier: config.retryBackoffMultiplier || 2
46
+ };
47
+
40
48
  // Create cookbook router - createRouter is an exported function, not a method
41
49
  const { createRouter } = this.cookbook;
42
50
  if (typeof createRouter === 'function') {
@@ -230,61 +238,159 @@ class WorkflowOrchestrator {
230
238
  };
231
239
 
232
240
  } catch (error) {
233
- // Publish to workflow.failed queue with FULL context for monitoring
241
+ // DLQ/Retry mechanism: retry with exponential backoff before sending to DLQ
242
+ const dlqHistory = context._dlq_history || [];
243
+ const stepAttempts = dlqHistory.filter(h => h.step_id === current_step && h.attempt).length;
244
+ const attemptNumber = stepAttempts + 1;
245
+
246
+ // Calculate step_index for failed step
247
+ let failedStepIndex = 0;
248
+ try {
249
+ const stepsArr = this._getStepsArray(cookbookDef);
250
+ const idx = stepsArr.findIndex(s => this._getStepId(s) === current_step);
251
+ if (idx >= 0) failedStepIndex = idx;
252
+ } catch (e) {
253
+ // Ignore - use default 0
254
+ }
255
+
256
+ // IMPORTANT: Publish progress event for FAILED step (so it appears in trace)
257
+ // This ensures every cookbook step is visible in monitoring, even failed ones
258
+ const { publishToMonitoringWorkflow } = require('@onlineapps/mq-client-core').monitoring;
259
+ try {
260
+ await publishToMonitoringWorkflow(this.mqClient, {
261
+ event_type: 'progress',
262
+ workflow_id: workflow_id,
263
+ service_name: serviceName,
264
+ step_index: failedStepIndex,
265
+ step_id: current_step,
266
+ cookbook: cookbookDef,
267
+ context: context,
268
+ output: null,
269
+ error: {
270
+ message: error.message,
271
+ attempt: attemptNumber
272
+ },
273
+ status: 'failed',
274
+ timestamp: new Date().toISOString()
275
+ }, this.logger, { workflow_id, step_id: current_step });
276
+ } catch (monitoringError) {
277
+ // Don't fail workflow if monitoring publish fails
278
+ this.logger.warn('Failed to publish failed step progress to monitoring', {
279
+ workflow_id,
280
+ error: monitoringError.message
281
+ });
282
+ }
283
+
284
+ // Add attempt to DLQ history
285
+ const attemptRecord = {
286
+ step_id: current_step,
287
+ step_index: failedStepIndex,
288
+ attempt: attemptNumber,
289
+ service: serviceName,
290
+ error: error.message,
291
+ at: new Date().toISOString()
292
+ };
293
+
294
+ const updatedDlqHistory = [...dlqHistory, attemptRecord];
295
+ const updatedContext = {
296
+ ...context,
297
+ _dlq_history: updatedDlqHistory
298
+ };
299
+
300
+ this.logger.warn(`[WorkflowOrchestrator] Step failed, attempt ${attemptNumber}/${this.retryConfig.maxAttempts}`, {
301
+ workflow_id,
302
+ current_step,
303
+ service: serviceName,
304
+ error: error.message,
305
+ attempt: attemptNumber
306
+ });
307
+
308
+ // Check if we should retry
309
+ if (attemptNumber < this.retryConfig.maxAttempts) {
310
+ // Calculate delay with exponential backoff
311
+ const delay = Math.min(
312
+ this.retryConfig.baseDelay * Math.pow(this.retryConfig.backoffMultiplier, attemptNumber - 1),
313
+ this.retryConfig.maxDelay
314
+ );
315
+
316
+ this.logger.info(`[WorkflowOrchestrator] Scheduling retry ${attemptNumber + 1} in ${delay}ms`, {
317
+ workflow_id,
318
+ current_step,
319
+ delay
320
+ });
321
+
322
+ // Wait for delay
323
+ await new Promise(resolve => setTimeout(resolve, delay));
324
+
325
+ // Republish to same service queue for retry
326
+ const serviceQueue = `${serviceName}.workflow`;
327
+ await this.mqClient.publish(serviceQueue, {
328
+ workflow_id,
329
+ cookbook: cookbookDef,
330
+ current_step,
331
+ context: updatedContext
332
+ });
333
+
334
+ this.logger.info(`[WorkflowOrchestrator] Message republished for retry`, {
335
+ workflow_id,
336
+ current_step,
337
+ queue: serviceQueue,
338
+ nextAttempt: attemptNumber + 1
339
+ });
340
+
341
+ // Don't throw - message will be retried
342
+ return { retrying: true, attempt: attemptNumber, nextAttempt: attemptNumber + 1 };
343
+ }
344
+
345
+ // Max retries exhausted - send to workflow.failed (DLQ entry)
346
+ const dlqEntryRecord = {
347
+ dlq_entered: true,
348
+ step_id: current_step,
349
+ step_index: failedStepIndex,
350
+ service: serviceName,
351
+ reason: 'max_retries_exceeded',
352
+ total_attempts: attemptNumber,
353
+ at: new Date().toISOString()
354
+ };
355
+
356
+ const finalDlqHistory = [...updatedDlqHistory, dlqEntryRecord];
357
+ const finalContext = {
358
+ ...context,
359
+ _dlq_history: finalDlqHistory
360
+ };
361
+
234
362
  try {
235
- console.log(`[WorkflowOrchestrator] [PUBLISH] Preparing to publish workflow.failed for ${workflow_id}`);
236
- this.logger.error(`[WorkflowOrchestrator] [PUBLISH] Workflow processing failed: ${error.message}`, {
363
+ console.log(`[WorkflowOrchestrator] [DLQ] Max retries exhausted, publishing to workflow.failed for ${workflow_id}`);
364
+ this.logger.error(`[WorkflowOrchestrator] [DLQ] Workflow entering DLQ after ${attemptNumber} attempts: ${error.message}`, {
237
365
  workflow_id,
238
366
  current_step,
367
+ attempts: attemptNumber,
239
368
  error: error.stack
240
369
  });
241
370
 
242
- // Calculate step_index for failed step
243
- let failedStepIndex = 0;
244
- try {
245
- const stepsArr = this._getStepsArray(cookbookDef);
246
- const idx = stepsArr.findIndex(s => this._getStepId(s) === current_step);
247
- if (idx >= 0) failedStepIndex = idx;
248
- } catch (e) {
249
- // Ignore - use default 0
250
- }
251
-
252
371
  await this.mqClient.publish('workflow.failed', {
253
372
  workflow_id,
254
373
  current_step,
255
- step_id: current_step, // Explicit step_id for monitoring
256
- step_index: failedStepIndex, // Step index for ordering
257
- service: serviceName, // Service that failed
258
- cookbook: cookbookDef, // Full cookbook for context
259
- context: context, // Full context
374
+ step_id: current_step,
375
+ step_index: failedStepIndex,
376
+ service: serviceName,
377
+ cookbook: cookbookDef,
378
+ context: finalContext,
260
379
  error: error.message,
261
380
  errorStack: error.stack,
381
+ _dlq_history: finalDlqHistory,
262
382
  failed_at: new Date().toISOString()
263
383
  });
264
384
 
265
- console.log(`[WorkflowOrchestrator] [PUBLISH] ✓ Successfully published workflow.failed for ${workflow_id}`);
266
- this.logger.info(`[WorkflowOrchestrator] [PUBLISH] ✓ Published workflow.failed: ${workflow_id}`);
385
+ console.log(`[WorkflowOrchestrator] [DLQ] ✓ Successfully published workflow.failed for ${workflow_id}`);
386
+ this.logger.info(`[WorkflowOrchestrator] [DLQ] ✓ Published workflow.failed: ${workflow_id}`);
267
387
  } catch (publishError) {
268
- console.error(`[WorkflowOrchestrator] [PUBLISH] ✗ Failed to publish workflow.failed for ${workflow_id}:`, publishError.message);
269
- this.logger.error(`[WorkflowOrchestrator] [PUBLISH] ✗ Failed to publish workflow.failed`, {
388
+ console.error(`[WorkflowOrchestrator] [DLQ] ✗ Failed to publish workflow.failed for ${workflow_id}:`, publishError.message);
389
+ this.logger.error(`[WorkflowOrchestrator] [DLQ] ✗ Failed to publish workflow.failed`, {
270
390
  workflow_id,
271
391
  originalError: error.message,
272
392
  publishError: publishError.message
273
393
  });
274
- // Don't throw - original error is more important
275
- }
276
-
277
- // Handle error
278
- if (this.errorHandler) {
279
- await this.errorHandler.handleError(error, {
280
- workflow_id,
281
- current_step,
282
- service: serviceName,
283
- context
284
- });
285
- } else {
286
- // Fallback to router DLQ
287
- await this.router.routeToDLQ(cookbookDef, context, error, serviceName);
288
394
  }
289
395
 
290
396
  throw error;