@msm-core/jobs 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/LICENSE +21 -0
  3. package/dist/approval/engine.d.ts +0 -1
  4. package/dist/approval/engine.js +0 -1
  5. package/dist/approval/policy.d.ts +0 -1
  6. package/dist/approval/policy.js +0 -1
  7. package/dist/config.d.ts +0 -1
  8. package/dist/config.js +0 -1
  9. package/dist/enums.d.ts +1 -2
  10. package/dist/enums.js +1 -1
  11. package/dist/index.d.ts +1 -2
  12. package/dist/index.js +2 -2
  13. package/dist/mission/cron.d.ts +0 -1
  14. package/dist/mission/cron.js +0 -1
  15. package/dist/mission/scheduler.d.ts +11 -2
  16. package/dist/mission/scheduler.js +22 -1
  17. package/dist/orchestrator.d.ts +0 -1
  18. package/dist/orchestrator.js +312 -222
  19. package/dist/port.d.ts +0 -1
  20. package/dist/port.js +0 -1
  21. package/dist/reconciler.d.ts +0 -1
  22. package/dist/reconciler.js +0 -1
  23. package/dist/resume-token.d.ts +0 -1
  24. package/dist/resume-token.js +0 -1
  25. package/dist/types.d.ts +12 -1
  26. package/dist/types.js +0 -1
  27. package/dist/workflow/registry.d.ts +7 -1
  28. package/dist/workflow/registry.js +21 -5
  29. package/package.json +8 -7
  30. package/dist/approval/engine.d.ts.map +0 -1
  31. package/dist/approval/engine.js.map +0 -1
  32. package/dist/approval/policy.d.ts.map +0 -1
  33. package/dist/approval/policy.js.map +0 -1
  34. package/dist/config.d.ts.map +0 -1
  35. package/dist/config.js.map +0 -1
  36. package/dist/enums.d.ts.map +0 -1
  37. package/dist/enums.js.map +0 -1
  38. package/dist/index.d.ts.map +0 -1
  39. package/dist/index.js.map +0 -1
  40. package/dist/mission/cron.d.ts.map +0 -1
  41. package/dist/mission/cron.js.map +0 -1
  42. package/dist/mission/scheduler.d.ts.map +0 -1
  43. package/dist/mission/scheduler.js.map +0 -1
  44. package/dist/orchestrator.d.ts.map +0 -1
  45. package/dist/orchestrator.js.map +0 -1
  46. package/dist/port.d.ts.map +0 -1
  47. package/dist/port.js.map +0 -1
  48. package/dist/reconciler.d.ts.map +0 -1
  49. package/dist/reconciler.js.map +0 -1
  50. package/dist/resume-token.d.ts.map +0 -1
  51. package/dist/resume-token.js.map +0 -1
  52. package/dist/types.d.ts.map +0 -1
  53. package/dist/types.js.map +0 -1
  54. package/dist/workflow/registry.d.ts.map +0 -1
  55. package/dist/workflow/registry.js.map +0 -1
@@ -19,11 +19,31 @@ function isDuplicateKeyError(error) {
19
19
  export function createOrchestrator(deps) {
20
20
  const { jobStore, queue, ops, clock, registry, stepExecutor, resumeToken, config } = deps;
21
21
  const runConfig = resolveRunConfig(config);
22
- function buildIdempotencyKey(input, version, currentStep) {
22
+ function buildIdempotencyKey(input, currentStep) {
23
23
  if (input.idempotencyKey)
24
24
  return input.idempotencyKey;
25
- const reason = input.reason || "queue";
26
- return `${input.jobId}:${version}:${currentStep}:${reason}`;
25
+ // STABLE across every re-delivery of the same logical step: a given job's
26
+ // step executes exactly once. The key MUST NOT include the version (the
27
+ // claim increments it on every delivery) or the trigger reason (varies by
28
+ // delivery) — either makes a post-crash re-delivery compute a different
29
+ // key, miss the already-recorded step, and re-run the side effect.
30
+ return `${input.jobId}:step:${currentStep}`;
31
+ }
32
+ /** Terminal step-failure path: force the job to `failed` and emit the event. */
33
+ async function failJob(input, claimed, error) {
34
+ await jobStore.markFailed({ jobId: input.jobId, tenantId: input.tenantId });
35
+ ops.record({
36
+ jobId: input.jobId,
37
+ tenantId: input.tenantId,
38
+ missionId: claimed.missionId,
39
+ source: "job_orchestrator",
40
+ eventType: "job.failed",
41
+ status: "failed",
42
+ queueName: QUEUE_NAME,
43
+ employeeId: claimed.employeeId,
44
+ correlationId: claimed.correlationId,
45
+ metadata: { error: error.message },
46
+ });
27
47
  }
28
48
  async function enqueueBurstContinuation(input) {
29
49
  const continuation = await jobStore.getJob(input.jobId, input.tenantId);
@@ -75,9 +95,65 @@ export function createOrchestrator(deps) {
75
95
  });
76
96
  if (!claimed)
77
97
  return; // CAS claim lost — another worker owns this tick.
78
- const idempotencyKey = buildIdempotencyKey(input, claimed.version, claimed.currentStep);
98
+ const idempotencyKey = buildIdempotencyKey(input, claimed.currentStep);
79
99
  const existingStep = await jobStore.findStepByIdempotencyKey(input.jobId, input.tenantId, idempotencyKey);
80
100
  if (existingStep) {
101
+ // The step already executed (its side effect ran exactly once). Two cases:
102
+ //
103
+ // (a) CRASH RECOVERY — the original finalize never landed (process died
104
+ // between insertStep and finalizeStep), so the job is still at the
105
+ // pre-step `currentStep`. Re-apply the transition the step recorded,
106
+ // advancing the job WITHOUT re-running the side effect, then keep it
107
+ // moving. This is what makes the engine crash-safe / exactly-once.
108
+ if (existingStep.finalize && claimed.currentStep < existingStep.stepNumber) {
109
+ const recovered = await jobStore.finalizeStep({
110
+ jobId: input.jobId,
111
+ tenantId: input.tenantId,
112
+ expectedVersion: claimed.version,
113
+ set: existingStep.finalize.set,
114
+ ...(existingStep.finalize.incCounters
115
+ ? { incCounters: existingStep.finalize.incCounters }
116
+ : {}),
117
+ });
118
+ ops.record({
119
+ jobId: input.jobId,
120
+ tenantId: input.tenantId,
121
+ missionId: claimed.missionId,
122
+ source: "job_orchestrator",
123
+ eventType: "job.step.recovered",
124
+ status: recovered?.status ?? claimed.status,
125
+ queueName: QUEUE_NAME,
126
+ employeeId: claimed.employeeId,
127
+ correlationId: claimed.correlationId,
128
+ metadata: { idempotencyKey, stepNumber: existingStep.stepNumber },
129
+ });
130
+ // Keep the recovered job alive: a waiting_time job needs its resume
131
+ // re-enqueued; a runnable status needs another tick. Terminal /
132
+ // waiting_event / awaiting_approval statuses are driven externally.
133
+ if (recovered && config.resumeMode !== "scan_only") {
134
+ if (recovered.status === "waiting_time" && recovered.resumeAt) {
135
+ await queue.enqueue({
136
+ jobId: input.jobId,
137
+ tenantId: input.tenantId,
138
+ reason: "crash_recovery_resume",
139
+ delayMs: Math.max(1, recovered.resumeAt.getTime() - clock.now().getTime()),
140
+ dedupeKey: `recover:${existingStep.stepNumber}`,
141
+ });
142
+ }
143
+ else if (isRunnableJobStatus(recovered.status)) {
144
+ await queue.enqueue({
145
+ jobId: input.jobId,
146
+ tenantId: input.tenantId,
147
+ reason: "crash_recovery_continue",
148
+ dedupeKey: `recover:${existingStep.stepNumber}`,
149
+ });
150
+ }
151
+ }
152
+ return;
153
+ }
154
+ // (b) CONCURRENT DUPLICATE — the finalize already landed (another worker
155
+ // advanced the job past this step). Just restore our `running` claim
156
+ // to the correct resting status and skip.
81
157
  const restoredStatus = claimed.waitEvent
82
158
  ? "waiting_event"
83
159
  : claimed.resumeAt
@@ -121,8 +197,11 @@ export function createOrchestrator(deps) {
121
197
  correlationId: claimed.correlationId,
122
198
  metadata: { stepNumber, reason: input.reason || "queue", idempotencyKey },
123
199
  });
200
+ // ── Resolve the next transition (pure). A throw here is a workflow-definition
201
+ // bug — fail the job. ──
202
+ let resolution;
124
203
  try {
125
- const resolution = registry.resolveStep({
204
+ resolution = registry.resolveStep({
126
205
  workflowType: claimed.workflowType,
127
206
  workflowVersion: claimed.workflowVersion,
128
207
  budget: claimed.budget,
@@ -131,276 +210,288 @@ export function createOrchestrator(deps) {
131
210
  reason: input.reason || "queue",
132
211
  now: stepStartedAt,
133
212
  });
134
- // COST CEILING (P2-8): before a COSTLY step, if the tenant has hit its day-ceiling, DEFER the
135
- // job to the next UTC midnight (waiting_time) instead of running the step. No step is recorded.
136
- if (resolution.stepType === "run_interaction_task" &&
137
- config.costCeilingUsd > 0 &&
138
- deps.costPort) {
139
- const now2 = clock.now();
140
- const spend = await deps.costPort.tenantDaySpendUsd(input.tenantId, startOfUtcDay(now2));
141
- if (spend >= config.costCeilingUsd) {
142
- const resumeAt = nextUtcMidnight(now2);
143
- const deferred = await jobStore.finalizeStep({
144
- jobId: input.jobId,
145
- tenantId: input.tenantId,
146
- expectedVersion: claimed.version,
147
- set: { status: "waiting_time", resumeAt },
148
- });
149
- ops.record({
213
+ }
214
+ catch (error) {
215
+ await failJob(input, claimed, error);
216
+ throw error;
217
+ }
218
+ // COST CEILING (P2-8): before a COSTLY step, if the tenant has hit its day-ceiling, DEFER the
219
+ // job to the next UTC midnight (waiting_time) instead of running the step. No step is recorded.
220
+ if (resolution.stepType === "run_interaction_task" &&
221
+ config.costCeilingUsd > 0 &&
222
+ deps.costPort) {
223
+ const now2 = clock.now();
224
+ const spend = await deps.costPort.tenantDaySpendUsd(input.tenantId, startOfUtcDay(now2));
225
+ if (spend >= config.costCeilingUsd) {
226
+ const resumeAt = nextUtcMidnight(now2);
227
+ const deferred = await jobStore.finalizeStep({
228
+ jobId: input.jobId,
229
+ tenantId: input.tenantId,
230
+ expectedVersion: claimed.version,
231
+ set: { status: "waiting_time", resumeAt },
232
+ });
233
+ ops.record({
234
+ jobId: input.jobId,
235
+ tenantId: input.tenantId,
236
+ missionId: claimed.missionId,
237
+ source: "job_orchestrator",
238
+ eventType: "job.cost_ceiling_deferred",
239
+ status: "waiting_time",
240
+ queueName: QUEUE_NAME,
241
+ employeeId: claimed.employeeId,
242
+ correlationId: claimed.correlationId,
243
+ metadata: { spendUsd: spend, ceilingUsd: config.costCeilingUsd, resumeAt: resumeAt.toISOString() },
244
+ });
245
+ if (deferred && config.resumeMode !== "scan_only") {
246
+ await queue.enqueue({
150
247
  jobId: input.jobId,
151
248
  tenantId: input.tenantId,
152
- missionId: claimed.missionId,
153
- source: "job_orchestrator",
154
- eventType: "job.cost_ceiling_deferred",
155
- status: "waiting_time",
156
- queueName: QUEUE_NAME,
157
- employeeId: claimed.employeeId,
158
- correlationId: claimed.correlationId,
159
- metadata: { spendUsd: spend, ceilingUsd: config.costCeilingUsd, resumeAt: resumeAt.toISOString() },
249
+ reason: "cost_ceiling_resume",
250
+ delayMs: Math.max(1, resumeAt.getTime() - now2.getTime()),
251
+ dedupeKey: `ceiling:${resumeAt.getTime()}`,
160
252
  });
161
- if (deferred && config.resumeMode !== "scan_only") {
162
- await queue.enqueue({
163
- jobId: input.jobId,
164
- tenantId: input.tenantId,
165
- reason: "cost_ceiling_resume",
166
- delayMs: Math.max(1, resumeAt.getTime() - now2.getTime()),
167
- dedupeKey: `ceiling:${resumeAt.getTime()}`,
168
- });
169
- }
170
- return;
171
253
  }
254
+ return;
172
255
  }
173
- // Run the real side-effecting work (agent turn for run_interaction_task; {} for
174
- // pure transitions). Throwing here fails the step via the catch below.
175
- const execResult = await stepExecutor.execute({
256
+ }
257
+ // ── Run the real side-effecting work (agent turn for run_interaction_task; {}
258
+ // for pure transitions). A throw here is a genuine STEP failure — fail the
259
+ // job. (Infra failures further down do NOT fail the job; see below.) ──
260
+ let execResult;
261
+ try {
262
+ execResult = await stepExecutor.execute({
176
263
  stepType: resolution.stepType,
177
264
  triggeredBy: resolution.triggeredBy,
178
265
  job: claimed,
179
266
  resolution,
180
267
  stepNumber,
181
268
  });
182
- const stepEndedAt = clock.now();
183
- const durationMs = stepEndedAt.getTime() - stepStartedAt.getTime();
184
- const mergedOutput = { ...(resolution.output ?? {}), ...(execResult.output ?? {}) };
185
- const mergedStatePatch = {
186
- ...(resolution.statePatch ?? {}),
187
- ...(execResult.statePatch ?? {}),
188
- };
189
- const step = {
190
- stepId: deps.genStepId(),
269
+ }
270
+ catch (error) {
271
+ await failJob(input, claimed, error);
272
+ throw error;
273
+ }
274
+ const stepEndedAt = clock.now();
275
+ const durationMs = stepEndedAt.getTime() - stepStartedAt.getTime();
276
+ const mergedOutput = { ...(resolution.output ?? {}), ...(execResult.output ?? {}) };
277
+ const mergedStatePatch = {
278
+ ...(resolution.statePatch ?? {}),
279
+ ...(execResult.statePatch ?? {}),
280
+ };
281
+ // Compute the FULL transition BEFORE recording the step, so the step carries
282
+ // the durable decision (crash recovery re-applies it WITHOUT re-running).
283
+ const nextStatus = resolution.nextStatus;
284
+ const nextResumeAt = nextStatus === "waiting_time"
285
+ ? new Date(clock.now().getTime() + (resolution.resumeDelayMs ?? config.defaultWaitMs))
286
+ : null;
287
+ const waitEventName = resolution.waitEventName || "job.resume";
288
+ const issuedWaitEventToken = nextStatus === "waiting_event"
289
+ ? resumeToken.issue({
191
290
  jobId: input.jobId,
192
291
  tenantId: input.tenantId,
292
+ eventName: waitEventName,
293
+ })
294
+ : null;
295
+ const nextWaitEvent = nextStatus === "waiting_event"
296
+ ? { eventName: waitEventName, token: issuedWaitEventToken?.token ?? null }
297
+ : null;
298
+ const nextState = { ...(claimed.state ?? {}), ...mergedStatePatch };
299
+ // Accrue this step's USD cost onto the job (powers the tenant/day ceiling, P2-8).
300
+ const stepCostUsd = typeof execResult.costUsd === "number" && execResult.costUsd > 0 ? execResult.costUsd : 0;
301
+ const finalizeArgs = {
302
+ set: {
303
+ currentStep: stepNumber,
304
+ status: nextStatus,
305
+ resumeAt: nextResumeAt,
306
+ waitEvent: nextWaitEvent,
307
+ state: nextState,
308
+ completedAt: isTerminalJobStatus(nextStatus) ? stepEndedAt : null,
309
+ },
310
+ incCounters: { totalStepsExecuted: 1, attempt: 1, ...(stepCostUsd ? { totalCostUsd: stepCostUsd } : {}) },
311
+ };
312
+ const step = {
313
+ stepId: deps.genStepId(),
314
+ jobId: input.jobId,
315
+ tenantId: input.tenantId,
316
+ stepNumber,
317
+ stepType: resolution.stepType,
318
+ status: "completed",
319
+ triggeredBy: resolution.triggeredBy,
320
+ startedAt: stepStartedAt,
321
+ endedAt: stepEndedAt,
322
+ durationMs,
323
+ toolCalls: [],
324
+ output: mergedOutput,
325
+ error: null,
326
+ nextStep: null,
327
+ transitionReason: resolution.transitionReason,
328
+ idempotencyKey,
329
+ finalize: finalizeArgs,
330
+ };
331
+ // ── Record the step (the idempotency marker + durable transition). A concurrent
332
+ // insert of the same key → skip. ANY OTHER throw here is an infra/transport
333
+ // failure (Mongo blip): it must PROPAGATE so the queue retries the tick — it
334
+ // must NOT fail an otherwise-healthy job. The stable idempotency key makes the
335
+ // retry safe (it finds this step and recovers). ──
336
+ try {
337
+ await jobStore.insertStep(step);
338
+ }
339
+ catch (error) {
340
+ if (isDuplicateKeyError(error)) {
341
+ ops.record({
342
+ jobId: input.jobId,
343
+ tenantId: input.tenantId,
344
+ missionId: claimed.missionId,
345
+ source: "job_orchestrator",
346
+ eventType: "job.step.duplicate_skipped",
347
+ status: claimed.status,
348
+ queueName: QUEUE_NAME,
349
+ employeeId: claimed.employeeId,
350
+ correlationId: claimed.correlationId,
351
+ metadata: { idempotencyKey },
352
+ });
353
+ return;
354
+ }
355
+ throw error; // retryable infra error — do NOT markFailed (H4)
356
+ }
357
+ const finalized = await jobStore.finalizeStep({
358
+ jobId: input.jobId,
359
+ tenantId: input.tenantId,
360
+ expectedVersion: claimed.version,
361
+ ...finalizeArgs,
362
+ });
363
+ if (!finalized)
364
+ return; // CAS finalize lost (or crash); a re-delivery recovers it.
365
+ stepsExecuted += 1;
366
+ ops.record({
367
+ jobId: input.jobId,
368
+ tenantId: input.tenantId,
369
+ missionId: finalized.missionId,
370
+ source: "job_orchestrator",
371
+ eventType: "job.step.completed",
372
+ status: finalized.status,
373
+ queueName: QUEUE_NAME,
374
+ employeeId: finalized.employeeId,
375
+ correlationId: finalized.correlationId,
376
+ metadata: {
193
377
  stepNumber,
194
378
  stepType: resolution.stepType,
195
- status: "completed",
196
- triggeredBy: resolution.triggeredBy,
197
- startedAt: stepStartedAt,
198
- endedAt: stepEndedAt,
199
- durationMs,
200
- toolCalls: [],
201
- output: mergedOutput,
202
- error: null,
203
- nextStep: null,
204
379
  transitionReason: resolution.transitionReason,
380
+ durationMs,
205
381
  idempotencyKey,
206
- };
207
- await jobStore.insertStep(step);
208
- const nextStatus = resolution.nextStatus;
209
- const nextResumeAt = nextStatus === "waiting_time"
210
- ? new Date(clock.now().getTime() + (resolution.resumeDelayMs ?? config.defaultWaitMs))
211
- : null;
212
- const waitEventName = resolution.waitEventName || "job.resume";
213
- const issuedWaitEventToken = nextStatus === "waiting_event"
214
- ? resumeToken.issue({
215
- jobId: input.jobId,
216
- tenantId: input.tenantId,
217
- eventName: waitEventName,
218
- })
219
- : null;
220
- const nextWaitEvent = nextStatus === "waiting_event"
221
- ? { eventName: waitEventName, token: issuedWaitEventToken?.token ?? null }
222
- : null;
223
- const nextState = { ...(claimed.state ?? {}), ...mergedStatePatch };
224
- // Accrue this step's USD cost onto the job (powers the tenant/day ceiling, P2-8).
225
- const stepCostUsd = typeof execResult.costUsd === "number" && execResult.costUsd > 0 ? execResult.costUsd : 0;
226
- const finalized = await jobStore.finalizeStep({
382
+ stepsExecutedThisTick: stepsExecuted,
383
+ maxStepsPerTick: runConfig.maxStepsPerTick,
384
+ },
385
+ });
386
+ if (nextStatus === "completed") {
387
+ ops.record({
227
388
  jobId: input.jobId,
228
389
  tenantId: input.tenantId,
229
- expectedVersion: claimed.version,
230
- set: {
231
- currentStep: stepNumber,
232
- status: nextStatus,
233
- resumeAt: nextResumeAt,
234
- waitEvent: nextWaitEvent,
235
- state: nextState,
236
- completedAt: isTerminalJobStatus(nextStatus) ? clock.now() : null,
237
- },
238
- incCounters: { totalStepsExecuted: 1, attempt: 1, ...(stepCostUsd ? { totalCostUsd: stepCostUsd } : {}) },
390
+ missionId: finalized.missionId,
391
+ source: "job_orchestrator",
392
+ eventType: "job.completed",
393
+ status: finalized.status,
394
+ queueName: QUEUE_NAME,
395
+ employeeId: finalized.employeeId,
396
+ correlationId: finalized.correlationId,
239
397
  });
240
- if (!finalized)
241
- return; // CAS finalize lost.
242
- stepsExecuted += 1;
398
+ return;
399
+ }
400
+ if (nextStatus === "failed") {
243
401
  ops.record({
244
402
  jobId: input.jobId,
245
403
  tenantId: input.tenantId,
246
404
  missionId: finalized.missionId,
247
405
  source: "job_orchestrator",
248
- eventType: "job.step.completed",
406
+ eventType: "job.failed",
249
407
  status: finalized.status,
250
408
  queueName: QUEUE_NAME,
251
409
  employeeId: finalized.employeeId,
252
410
  correlationId: finalized.correlationId,
253
- metadata: {
254
- stepNumber,
255
- stepType: resolution.stepType,
256
- transitionReason: resolution.transitionReason,
257
- durationMs,
258
- idempotencyKey,
259
- stepsExecutedThisTick: stepsExecuted,
260
- maxStepsPerTick: runConfig.maxStepsPerTick,
261
- },
262
411
  });
263
- if (nextStatus === "completed") {
264
- ops.record({
265
- jobId: input.jobId,
266
- tenantId: input.tenantId,
267
- missionId: finalized.missionId,
268
- source: "job_orchestrator",
269
- eventType: "job.completed",
270
- status: finalized.status,
271
- queueName: QUEUE_NAME,
272
- employeeId: finalized.employeeId,
273
- correlationId: finalized.correlationId,
274
- });
275
- return;
276
- }
277
- if (nextStatus === "failed") {
278
- ops.record({
279
- jobId: input.jobId,
280
- tenantId: input.tenantId,
281
- missionId: finalized.missionId,
282
- source: "job_orchestrator",
283
- eventType: "job.failed",
284
- status: finalized.status,
285
- queueName: QUEUE_NAME,
286
- employeeId: finalized.employeeId,
287
- correlationId: finalized.correlationId,
288
- });
289
- return;
290
- }
291
- if (nextStatus === "waiting_event") {
292
- ops.record({
293
- jobId: input.jobId,
294
- tenantId: input.tenantId,
295
- missionId: finalized.missionId,
296
- source: "job_orchestrator",
297
- eventType: "job.waiting_event",
298
- status: finalized.status,
299
- queueName: QUEUE_NAME,
300
- employeeId: finalized.employeeId,
301
- correlationId: finalized.correlationId,
302
- metadata: {
303
- stepNumber,
304
- waitEventName: nextWaitEvent?.eventName ?? null,
305
- resumeTokenExpiresAt: issuedWaitEventToken?.expiresAt ?? null,
306
- },
307
- });
308
- return;
309
- }
310
- if (nextStatus === "awaiting_approval") {
311
- await handleAwaitingApproval(finalized, resolution, stepNumber, input);
312
- return;
313
- }
314
- if (nextStatus !== "waiting_time")
315
- return;
412
+ return;
413
+ }
414
+ if (nextStatus === "waiting_event") {
316
415
  ops.record({
317
416
  jobId: input.jobId,
318
417
  tenantId: input.tenantId,
319
418
  missionId: finalized.missionId,
320
419
  source: "job_orchestrator",
321
- eventType: "job.waiting_time",
420
+ eventType: "job.waiting_event",
322
421
  status: finalized.status,
323
422
  queueName: QUEUE_NAME,
324
423
  employeeId: finalized.employeeId,
325
424
  correlationId: finalized.correlationId,
326
425
  metadata: {
327
426
  stepNumber,
328
- transitionReason: resolution.transitionReason,
329
- resumeAt: nextResumeAt?.toISOString(),
330
- resumeMode: config.resumeMode,
427
+ waitEventName: nextWaitEvent?.eventName ?? null,
428
+ resumeTokenExpiresAt: issuedWaitEventToken?.expiresAt ?? null,
331
429
  },
332
430
  });
333
- const resumeDelayMs = nextResumeAt
334
- ? Math.max(1, nextResumeAt.getTime() - clock.now().getTime())
335
- : config.defaultWaitMs;
336
- const canInlineContinue = stepsExecuted < runConfig.maxStepsPerTick &&
337
- resumeDelayMs <= runConfig.inlineResumeMaxDelayMs;
338
- if (canInlineContinue) {
339
- const inlined = await jobStore.claim({
340
- jobId: input.jobId,
341
- tenantId: input.tenantId,
342
- expectedVersion: finalized.version,
343
- statusIn: ["waiting_time"],
344
- set: { status: "queued", resumeAt: null },
345
- });
346
- if (!inlined)
347
- return;
348
- ops.record({
349
- jobId: input.jobId,
350
- tenantId: input.tenantId,
351
- missionId: inlined.missionId,
352
- source: "job_orchestrator",
353
- eventType: "job.waiting_time.inline_continued",
354
- status: inlined.status,
355
- queueName: QUEUE_NAME,
356
- employeeId: inlined.employeeId,
357
- correlationId: inlined.correlationId,
358
- metadata: { stepNumber, resumeDelayMs, inlineResumeMaxDelayMs: runConfig.inlineResumeMaxDelayMs },
359
- });
360
- continue;
361
- }
362
- if (config.resumeMode === "scan_only")
363
- return;
364
- await queue.enqueue({
431
+ return;
432
+ }
433
+ if (nextStatus === "awaiting_approval") {
434
+ await handleAwaitingApproval(finalized, resolution, stepNumber, input);
435
+ return;
436
+ }
437
+ if (nextStatus !== "waiting_time")
438
+ return;
439
+ ops.record({
440
+ jobId: input.jobId,
441
+ tenantId: input.tenantId,
442
+ missionId: finalized.missionId,
443
+ source: "job_orchestrator",
444
+ eventType: "job.waiting_time",
445
+ status: finalized.status,
446
+ queueName: QUEUE_NAME,
447
+ employeeId: finalized.employeeId,
448
+ correlationId: finalized.correlationId,
449
+ metadata: {
450
+ stepNumber,
451
+ transitionReason: resolution.transitionReason,
452
+ resumeAt: nextResumeAt?.toISOString(),
453
+ resumeMode: config.resumeMode,
454
+ },
455
+ });
456
+ const resumeDelayMs = nextResumeAt
457
+ ? Math.max(1, nextResumeAt.getTime() - clock.now().getTime())
458
+ : config.defaultWaitMs;
459
+ const canInlineContinue = stepsExecuted < runConfig.maxStepsPerTick &&
460
+ resumeDelayMs <= runConfig.inlineResumeMaxDelayMs;
461
+ if (canInlineContinue) {
462
+ const inlined = await jobStore.claim({
365
463
  jobId: input.jobId,
366
464
  tenantId: input.tenantId,
367
- reason: "waiting_time_resume",
368
- delayMs: resumeDelayMs,
369
- dedupeKey: `${stepNumber}:${idempotencyKey}`,
465
+ expectedVersion: finalized.version,
466
+ statusIn: ["waiting_time"],
467
+ set: { status: "queued", resumeAt: null },
370
468
  });
371
- return;
372
- }
373
- catch (error) {
374
- if (isDuplicateKeyError(error)) {
375
- ops.record({
376
- jobId: input.jobId,
377
- tenantId: input.tenantId,
378
- missionId: claimed.missionId,
379
- source: "job_orchestrator",
380
- eventType: "job.step.duplicate_skipped",
381
- status: claimed.status,
382
- queueName: QUEUE_NAME,
383
- employeeId: claimed.employeeId,
384
- correlationId: claimed.correlationId,
385
- metadata: { idempotencyKey },
386
- });
469
+ if (!inlined)
387
470
  return;
388
- }
389
- await jobStore.markFailed({ jobId: input.jobId, tenantId: input.tenantId });
390
471
  ops.record({
391
472
  jobId: input.jobId,
392
473
  tenantId: input.tenantId,
393
- missionId: claimed.missionId,
474
+ missionId: inlined.missionId,
394
475
  source: "job_orchestrator",
395
- eventType: "job.failed",
396
- status: "failed",
476
+ eventType: "job.waiting_time.inline_continued",
477
+ status: inlined.status,
397
478
  queueName: QUEUE_NAME,
398
- employeeId: claimed.employeeId,
399
- correlationId: claimed.correlationId,
400
- metadata: { error: error.message },
479
+ employeeId: inlined.employeeId,
480
+ correlationId: inlined.correlationId,
481
+ metadata: { stepNumber, resumeDelayMs, inlineResumeMaxDelayMs: runConfig.inlineResumeMaxDelayMs },
401
482
  });
402
- throw error;
483
+ continue;
403
484
  }
485
+ if (config.resumeMode === "scan_only")
486
+ return;
487
+ await queue.enqueue({
488
+ jobId: input.jobId,
489
+ tenantId: input.tenantId,
490
+ reason: "waiting_time_resume",
491
+ delayMs: resumeDelayMs,
492
+ dedupeKey: `${stepNumber}:${idempotencyKey}`,
493
+ });
494
+ return;
404
495
  }
405
496
  await enqueueBurstContinuation(input);
406
497
  }
@@ -452,4 +543,3 @@ export function createOrchestrator(deps) {
452
543
  }
453
544
  return { orchestrateJobStep };
454
545
  }
455
- //# sourceMappingURL=orchestrator.js.map
package/dist/port.d.ts CHANGED
@@ -178,4 +178,3 @@ export interface ApprovalCreatorPort {
178
178
  export interface IdGeneratorPort {
179
179
  jobStepId(): string;
180
180
  }
181
- //# sourceMappingURL=port.d.ts.map
package/dist/port.js CHANGED
@@ -14,4 +14,3 @@ export class DuplicateStepError extends Error {
14
14
  this.name = "DuplicateStepError";
15
15
  }
16
16
  }
17
- //# sourceMappingURL=port.js.map
@@ -22,4 +22,3 @@ export interface Reconciler {
22
22
  reconcileWaitingTimeJobs(asOf?: Date): Promise<WaitingTimeReconcileResult>;
23
23
  }
24
24
  export declare function createReconciler(deps: ReconcilerDeps): Reconciler;
25
- //# sourceMappingURL=reconciler.d.ts.map
@@ -72,4 +72,3 @@ export function createReconciler(deps) {
72
72
  }
73
73
  return { reconcileWaitingTimeJobs };
74
74
  }
75
- //# sourceMappingURL=reconciler.js.map
@@ -37,4 +37,3 @@ export interface ResumeTokenService {
37
37
  ttlSeconds: number;
38
38
  }
39
39
  export declare function createResumeTokenService(deps: ResumeTokenDeps): ResumeTokenService;
40
- //# sourceMappingURL=resume-token.d.ts.map