@msm-core/jobs 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/LICENSE +21 -0
- package/dist/approval/engine.d.ts +0 -1
- package/dist/approval/engine.js +0 -1
- package/dist/approval/policy.d.ts +0 -1
- package/dist/approval/policy.js +0 -1
- package/dist/config.d.ts +0 -1
- package/dist/config.js +0 -1
- package/dist/enums.d.ts +0 -1
- package/dist/enums.js +0 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.js +1 -1
- package/dist/mission/cron.d.ts +0 -1
- package/dist/mission/cron.js +0 -1
- package/dist/mission/scheduler.d.ts +11 -2
- package/dist/mission/scheduler.js +22 -1
- package/dist/orchestrator.d.ts +0 -1
- package/dist/orchestrator.js +312 -222
- package/dist/port.d.ts +0 -1
- package/dist/port.js +0 -1
- package/dist/reconciler.d.ts +0 -1
- package/dist/reconciler.js +0 -1
- package/dist/resume-token.d.ts +0 -1
- package/dist/resume-token.js +0 -1
- package/dist/types.d.ts +12 -1
- package/dist/types.js +0 -1
- package/dist/workflow/registry.d.ts +0 -1
- package/dist/workflow/registry.js +0 -1
- package/package.json +8 -7
- package/dist/approval/engine.d.ts.map +0 -1
- package/dist/approval/engine.js.map +0 -1
- package/dist/approval/policy.d.ts.map +0 -1
- package/dist/approval/policy.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/enums.d.ts.map +0 -1
- package/dist/enums.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/mission/cron.d.ts.map +0 -1
- package/dist/mission/cron.js.map +0 -1
- package/dist/mission/scheduler.d.ts.map +0 -1
- package/dist/mission/scheduler.js.map +0 -1
- package/dist/orchestrator.d.ts.map +0 -1
- package/dist/orchestrator.js.map +0 -1
- package/dist/port.d.ts.map +0 -1
- package/dist/port.js.map +0 -1
- package/dist/reconciler.d.ts.map +0 -1
- package/dist/reconciler.js.map +0 -1
- package/dist/resume-token.d.ts.map +0 -1
- package/dist/resume-token.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/workflow/registry.d.ts.map +0 -1
- package/dist/workflow/registry.js.map +0 -1
package/dist/orchestrator.js
CHANGED
|
@@ -19,11 +19,31 @@ function isDuplicateKeyError(error) {
|
|
|
19
19
|
export function createOrchestrator(deps) {
|
|
20
20
|
const { jobStore, queue, ops, clock, registry, stepExecutor, resumeToken, config } = deps;
|
|
21
21
|
const runConfig = resolveRunConfig(config);
|
|
22
|
-
function buildIdempotencyKey(input,
|
|
22
|
+
function buildIdempotencyKey(input, currentStep) {
|
|
23
23
|
if (input.idempotencyKey)
|
|
24
24
|
return input.idempotencyKey;
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
// STABLE across every re-delivery of the same logical step: a given job's
|
|
26
|
+
// step executes exactly once. The key MUST NOT include the version (the
|
|
27
|
+
// claim increments it on every delivery) or the trigger reason (varies by
|
|
28
|
+
// delivery) — either makes a post-crash re-delivery compute a different
|
|
29
|
+
// key, miss the already-recorded step, and re-run the side effect.
|
|
30
|
+
return `${input.jobId}:step:${currentStep}`;
|
|
31
|
+
}
|
|
32
|
+
/** Terminal step-failure path: force the job to `failed` and emit the event. */
|
|
33
|
+
async function failJob(input, claimed, error) {
|
|
34
|
+
await jobStore.markFailed({ jobId: input.jobId, tenantId: input.tenantId });
|
|
35
|
+
ops.record({
|
|
36
|
+
jobId: input.jobId,
|
|
37
|
+
tenantId: input.tenantId,
|
|
38
|
+
missionId: claimed.missionId,
|
|
39
|
+
source: "job_orchestrator",
|
|
40
|
+
eventType: "job.failed",
|
|
41
|
+
status: "failed",
|
|
42
|
+
queueName: QUEUE_NAME,
|
|
43
|
+
employeeId: claimed.employeeId,
|
|
44
|
+
correlationId: claimed.correlationId,
|
|
45
|
+
metadata: { error: error.message },
|
|
46
|
+
});
|
|
27
47
|
}
|
|
28
48
|
async function enqueueBurstContinuation(input) {
|
|
29
49
|
const continuation = await jobStore.getJob(input.jobId, input.tenantId);
|
|
@@ -75,9 +95,65 @@ export function createOrchestrator(deps) {
|
|
|
75
95
|
});
|
|
76
96
|
if (!claimed)
|
|
77
97
|
return; // CAS claim lost — another worker owns this tick.
|
|
78
|
-
const idempotencyKey = buildIdempotencyKey(input, claimed.
|
|
98
|
+
const idempotencyKey = buildIdempotencyKey(input, claimed.currentStep);
|
|
79
99
|
const existingStep = await jobStore.findStepByIdempotencyKey(input.jobId, input.tenantId, idempotencyKey);
|
|
80
100
|
if (existingStep) {
|
|
101
|
+
// The step already executed (its side effect ran exactly once). Two cases:
|
|
102
|
+
//
|
|
103
|
+
// (a) CRASH RECOVERY — the original finalize never landed (process died
|
|
104
|
+
// between insertStep and finalizeStep), so the job is still at the
|
|
105
|
+
// pre-step `currentStep`. Re-apply the transition the step recorded,
|
|
106
|
+
// advancing the job WITHOUT re-running the side effect, then keep it
|
|
107
|
+
// moving. This is what makes the engine crash-safe / exactly-once.
|
|
108
|
+
if (existingStep.finalize && claimed.currentStep < existingStep.stepNumber) {
|
|
109
|
+
const recovered = await jobStore.finalizeStep({
|
|
110
|
+
jobId: input.jobId,
|
|
111
|
+
tenantId: input.tenantId,
|
|
112
|
+
expectedVersion: claimed.version,
|
|
113
|
+
set: existingStep.finalize.set,
|
|
114
|
+
...(existingStep.finalize.incCounters
|
|
115
|
+
? { incCounters: existingStep.finalize.incCounters }
|
|
116
|
+
: {}),
|
|
117
|
+
});
|
|
118
|
+
ops.record({
|
|
119
|
+
jobId: input.jobId,
|
|
120
|
+
tenantId: input.tenantId,
|
|
121
|
+
missionId: claimed.missionId,
|
|
122
|
+
source: "job_orchestrator",
|
|
123
|
+
eventType: "job.step.recovered",
|
|
124
|
+
status: recovered?.status ?? claimed.status,
|
|
125
|
+
queueName: QUEUE_NAME,
|
|
126
|
+
employeeId: claimed.employeeId,
|
|
127
|
+
correlationId: claimed.correlationId,
|
|
128
|
+
metadata: { idempotencyKey, stepNumber: existingStep.stepNumber },
|
|
129
|
+
});
|
|
130
|
+
// Keep the recovered job alive: a waiting_time job needs its resume
|
|
131
|
+
// re-enqueued; a runnable status needs another tick. Terminal /
|
|
132
|
+
// waiting_event / awaiting_approval statuses are driven externally.
|
|
133
|
+
if (recovered && config.resumeMode !== "scan_only") {
|
|
134
|
+
if (recovered.status === "waiting_time" && recovered.resumeAt) {
|
|
135
|
+
await queue.enqueue({
|
|
136
|
+
jobId: input.jobId,
|
|
137
|
+
tenantId: input.tenantId,
|
|
138
|
+
reason: "crash_recovery_resume",
|
|
139
|
+
delayMs: Math.max(1, recovered.resumeAt.getTime() - clock.now().getTime()),
|
|
140
|
+
dedupeKey: `recover:${existingStep.stepNumber}`,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
else if (isRunnableJobStatus(recovered.status)) {
|
|
144
|
+
await queue.enqueue({
|
|
145
|
+
jobId: input.jobId,
|
|
146
|
+
tenantId: input.tenantId,
|
|
147
|
+
reason: "crash_recovery_continue",
|
|
148
|
+
dedupeKey: `recover:${existingStep.stepNumber}`,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
// (b) CONCURRENT DUPLICATE — the finalize already landed (another worker
|
|
155
|
+
// advanced the job past this step). Just restore our `running` claim
|
|
156
|
+
// to the correct resting status and skip.
|
|
81
157
|
const restoredStatus = claimed.waitEvent
|
|
82
158
|
? "waiting_event"
|
|
83
159
|
: claimed.resumeAt
|
|
@@ -121,8 +197,11 @@ export function createOrchestrator(deps) {
|
|
|
121
197
|
correlationId: claimed.correlationId,
|
|
122
198
|
metadata: { stepNumber, reason: input.reason || "queue", idempotencyKey },
|
|
123
199
|
});
|
|
200
|
+
// ── Resolve the next transition (pure). A throw here is a workflow-definition
|
|
201
|
+
// bug — fail the job. ──
|
|
202
|
+
let resolution;
|
|
124
203
|
try {
|
|
125
|
-
|
|
204
|
+
resolution = registry.resolveStep({
|
|
126
205
|
workflowType: claimed.workflowType,
|
|
127
206
|
workflowVersion: claimed.workflowVersion,
|
|
128
207
|
budget: claimed.budget,
|
|
@@ -131,276 +210,288 @@ export function createOrchestrator(deps) {
|
|
|
131
210
|
reason: input.reason || "queue",
|
|
132
211
|
now: stepStartedAt,
|
|
133
212
|
});
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
213
|
+
}
|
|
214
|
+
catch (error) {
|
|
215
|
+
await failJob(input, claimed, error);
|
|
216
|
+
throw error;
|
|
217
|
+
}
|
|
218
|
+
// COST CEILING (P2-8): before a COSTLY step, if the tenant has hit its day-ceiling, DEFER the
|
|
219
|
+
// job to the next UTC midnight (waiting_time) instead of running the step. No step is recorded.
|
|
220
|
+
if (resolution.stepType === "run_interaction_task" &&
|
|
221
|
+
config.costCeilingUsd > 0 &&
|
|
222
|
+
deps.costPort) {
|
|
223
|
+
const now2 = clock.now();
|
|
224
|
+
const spend = await deps.costPort.tenantDaySpendUsd(input.tenantId, startOfUtcDay(now2));
|
|
225
|
+
if (spend >= config.costCeilingUsd) {
|
|
226
|
+
const resumeAt = nextUtcMidnight(now2);
|
|
227
|
+
const deferred = await jobStore.finalizeStep({
|
|
228
|
+
jobId: input.jobId,
|
|
229
|
+
tenantId: input.tenantId,
|
|
230
|
+
expectedVersion: claimed.version,
|
|
231
|
+
set: { status: "waiting_time", resumeAt },
|
|
232
|
+
});
|
|
233
|
+
ops.record({
|
|
234
|
+
jobId: input.jobId,
|
|
235
|
+
tenantId: input.tenantId,
|
|
236
|
+
missionId: claimed.missionId,
|
|
237
|
+
source: "job_orchestrator",
|
|
238
|
+
eventType: "job.cost_ceiling_deferred",
|
|
239
|
+
status: "waiting_time",
|
|
240
|
+
queueName: QUEUE_NAME,
|
|
241
|
+
employeeId: claimed.employeeId,
|
|
242
|
+
correlationId: claimed.correlationId,
|
|
243
|
+
metadata: { spendUsd: spend, ceilingUsd: config.costCeilingUsd, resumeAt: resumeAt.toISOString() },
|
|
244
|
+
});
|
|
245
|
+
if (deferred && config.resumeMode !== "scan_only") {
|
|
246
|
+
await queue.enqueue({
|
|
150
247
|
jobId: input.jobId,
|
|
151
248
|
tenantId: input.tenantId,
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
status: "waiting_time",
|
|
156
|
-
queueName: QUEUE_NAME,
|
|
157
|
-
employeeId: claimed.employeeId,
|
|
158
|
-
correlationId: claimed.correlationId,
|
|
159
|
-
metadata: { spendUsd: spend, ceilingUsd: config.costCeilingUsd, resumeAt: resumeAt.toISOString() },
|
|
249
|
+
reason: "cost_ceiling_resume",
|
|
250
|
+
delayMs: Math.max(1, resumeAt.getTime() - now2.getTime()),
|
|
251
|
+
dedupeKey: `ceiling:${resumeAt.getTime()}`,
|
|
160
252
|
});
|
|
161
|
-
if (deferred && config.resumeMode !== "scan_only") {
|
|
162
|
-
await queue.enqueue({
|
|
163
|
-
jobId: input.jobId,
|
|
164
|
-
tenantId: input.tenantId,
|
|
165
|
-
reason: "cost_ceiling_resume",
|
|
166
|
-
delayMs: Math.max(1, resumeAt.getTime() - now2.getTime()),
|
|
167
|
-
dedupeKey: `ceiling:${resumeAt.getTime()}`,
|
|
168
|
-
});
|
|
169
|
-
}
|
|
170
|
-
return;
|
|
171
253
|
}
|
|
254
|
+
return;
|
|
172
255
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
256
|
+
}
|
|
257
|
+
// ── Run the real side-effecting work (agent turn for run_interaction_task; {}
|
|
258
|
+
// for pure transitions). A throw here is a genuine STEP failure — fail the
|
|
259
|
+
// job. (Infra failures further down do NOT fail the job; see below.) ──
|
|
260
|
+
let execResult;
|
|
261
|
+
try {
|
|
262
|
+
execResult = await stepExecutor.execute({
|
|
176
263
|
stepType: resolution.stepType,
|
|
177
264
|
triggeredBy: resolution.triggeredBy,
|
|
178
265
|
job: claimed,
|
|
179
266
|
resolution,
|
|
180
267
|
stepNumber,
|
|
181
268
|
});
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
269
|
+
}
|
|
270
|
+
catch (error) {
|
|
271
|
+
await failJob(input, claimed, error);
|
|
272
|
+
throw error;
|
|
273
|
+
}
|
|
274
|
+
const stepEndedAt = clock.now();
|
|
275
|
+
const durationMs = stepEndedAt.getTime() - stepStartedAt.getTime();
|
|
276
|
+
const mergedOutput = { ...(resolution.output ?? {}), ...(execResult.output ?? {}) };
|
|
277
|
+
const mergedStatePatch = {
|
|
278
|
+
...(resolution.statePatch ?? {}),
|
|
279
|
+
...(execResult.statePatch ?? {}),
|
|
280
|
+
};
|
|
281
|
+
// Compute the FULL transition BEFORE recording the step, so the step carries
|
|
282
|
+
// the durable decision (crash recovery re-applies it WITHOUT re-running).
|
|
283
|
+
const nextStatus = resolution.nextStatus;
|
|
284
|
+
const nextResumeAt = nextStatus === "waiting_time"
|
|
285
|
+
? new Date(clock.now().getTime() + (resolution.resumeDelayMs ?? config.defaultWaitMs))
|
|
286
|
+
: null;
|
|
287
|
+
const waitEventName = resolution.waitEventName || "job.resume";
|
|
288
|
+
const issuedWaitEventToken = nextStatus === "waiting_event"
|
|
289
|
+
? resumeToken.issue({
|
|
191
290
|
jobId: input.jobId,
|
|
192
291
|
tenantId: input.tenantId,
|
|
292
|
+
eventName: waitEventName,
|
|
293
|
+
})
|
|
294
|
+
: null;
|
|
295
|
+
const nextWaitEvent = nextStatus === "waiting_event"
|
|
296
|
+
? { eventName: waitEventName, token: issuedWaitEventToken?.token ?? null }
|
|
297
|
+
: null;
|
|
298
|
+
const nextState = { ...(claimed.state ?? {}), ...mergedStatePatch };
|
|
299
|
+
// Accrue this step's USD cost onto the job (powers the tenant/day ceiling, P2-8).
|
|
300
|
+
const stepCostUsd = typeof execResult.costUsd === "number" && execResult.costUsd > 0 ? execResult.costUsd : 0;
|
|
301
|
+
const finalizeArgs = {
|
|
302
|
+
set: {
|
|
303
|
+
currentStep: stepNumber,
|
|
304
|
+
status: nextStatus,
|
|
305
|
+
resumeAt: nextResumeAt,
|
|
306
|
+
waitEvent: nextWaitEvent,
|
|
307
|
+
state: nextState,
|
|
308
|
+
completedAt: isTerminalJobStatus(nextStatus) ? stepEndedAt : null,
|
|
309
|
+
},
|
|
310
|
+
incCounters: { totalStepsExecuted: 1, attempt: 1, ...(stepCostUsd ? { totalCostUsd: stepCostUsd } : {}) },
|
|
311
|
+
};
|
|
312
|
+
const step = {
|
|
313
|
+
stepId: deps.genStepId(),
|
|
314
|
+
jobId: input.jobId,
|
|
315
|
+
tenantId: input.tenantId,
|
|
316
|
+
stepNumber,
|
|
317
|
+
stepType: resolution.stepType,
|
|
318
|
+
status: "completed",
|
|
319
|
+
triggeredBy: resolution.triggeredBy,
|
|
320
|
+
startedAt: stepStartedAt,
|
|
321
|
+
endedAt: stepEndedAt,
|
|
322
|
+
durationMs,
|
|
323
|
+
toolCalls: [],
|
|
324
|
+
output: mergedOutput,
|
|
325
|
+
error: null,
|
|
326
|
+
nextStep: null,
|
|
327
|
+
transitionReason: resolution.transitionReason,
|
|
328
|
+
idempotencyKey,
|
|
329
|
+
finalize: finalizeArgs,
|
|
330
|
+
};
|
|
331
|
+
// ── Record the step (the idempotency marker + durable transition). A concurrent
|
|
332
|
+
// insert of the same key → skip. ANY OTHER throw here is an infra/transport
|
|
333
|
+
// failure (Mongo blip): it must PROPAGATE so the queue retries the tick — it
|
|
334
|
+
// must NOT fail an otherwise-healthy job. The stable idempotency key makes the
|
|
335
|
+
// retry safe (it finds this step and recovers). ──
|
|
336
|
+
try {
|
|
337
|
+
await jobStore.insertStep(step);
|
|
338
|
+
}
|
|
339
|
+
catch (error) {
|
|
340
|
+
if (isDuplicateKeyError(error)) {
|
|
341
|
+
ops.record({
|
|
342
|
+
jobId: input.jobId,
|
|
343
|
+
tenantId: input.tenantId,
|
|
344
|
+
missionId: claimed.missionId,
|
|
345
|
+
source: "job_orchestrator",
|
|
346
|
+
eventType: "job.step.duplicate_skipped",
|
|
347
|
+
status: claimed.status,
|
|
348
|
+
queueName: QUEUE_NAME,
|
|
349
|
+
employeeId: claimed.employeeId,
|
|
350
|
+
correlationId: claimed.correlationId,
|
|
351
|
+
metadata: { idempotencyKey },
|
|
352
|
+
});
|
|
353
|
+
return;
|
|
354
|
+
}
|
|
355
|
+
throw error; // retryable infra error — do NOT markFailed (H4)
|
|
356
|
+
}
|
|
357
|
+
const finalized = await jobStore.finalizeStep({
|
|
358
|
+
jobId: input.jobId,
|
|
359
|
+
tenantId: input.tenantId,
|
|
360
|
+
expectedVersion: claimed.version,
|
|
361
|
+
...finalizeArgs,
|
|
362
|
+
});
|
|
363
|
+
if (!finalized)
|
|
364
|
+
return; // CAS finalize lost (or crash); a re-delivery recovers it.
|
|
365
|
+
stepsExecuted += 1;
|
|
366
|
+
ops.record({
|
|
367
|
+
jobId: input.jobId,
|
|
368
|
+
tenantId: input.tenantId,
|
|
369
|
+
missionId: finalized.missionId,
|
|
370
|
+
source: "job_orchestrator",
|
|
371
|
+
eventType: "job.step.completed",
|
|
372
|
+
status: finalized.status,
|
|
373
|
+
queueName: QUEUE_NAME,
|
|
374
|
+
employeeId: finalized.employeeId,
|
|
375
|
+
correlationId: finalized.correlationId,
|
|
376
|
+
metadata: {
|
|
193
377
|
stepNumber,
|
|
194
378
|
stepType: resolution.stepType,
|
|
195
|
-
status: "completed",
|
|
196
|
-
triggeredBy: resolution.triggeredBy,
|
|
197
|
-
startedAt: stepStartedAt,
|
|
198
|
-
endedAt: stepEndedAt,
|
|
199
|
-
durationMs,
|
|
200
|
-
toolCalls: [],
|
|
201
|
-
output: mergedOutput,
|
|
202
|
-
error: null,
|
|
203
|
-
nextStep: null,
|
|
204
379
|
transitionReason: resolution.transitionReason,
|
|
380
|
+
durationMs,
|
|
205
381
|
idempotencyKey,
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
const waitEventName = resolution.waitEventName || "job.resume";
|
|
213
|
-
const issuedWaitEventToken = nextStatus === "waiting_event"
|
|
214
|
-
? resumeToken.issue({
|
|
215
|
-
jobId: input.jobId,
|
|
216
|
-
tenantId: input.tenantId,
|
|
217
|
-
eventName: waitEventName,
|
|
218
|
-
})
|
|
219
|
-
: null;
|
|
220
|
-
const nextWaitEvent = nextStatus === "waiting_event"
|
|
221
|
-
? { eventName: waitEventName, token: issuedWaitEventToken?.token ?? null }
|
|
222
|
-
: null;
|
|
223
|
-
const nextState = { ...(claimed.state ?? {}), ...mergedStatePatch };
|
|
224
|
-
// Accrue this step's USD cost onto the job (powers the tenant/day ceiling, P2-8).
|
|
225
|
-
const stepCostUsd = typeof execResult.costUsd === "number" && execResult.costUsd > 0 ? execResult.costUsd : 0;
|
|
226
|
-
const finalized = await jobStore.finalizeStep({
|
|
382
|
+
stepsExecutedThisTick: stepsExecuted,
|
|
383
|
+
maxStepsPerTick: runConfig.maxStepsPerTick,
|
|
384
|
+
},
|
|
385
|
+
});
|
|
386
|
+
if (nextStatus === "completed") {
|
|
387
|
+
ops.record({
|
|
227
388
|
jobId: input.jobId,
|
|
228
389
|
tenantId: input.tenantId,
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
completedAt: isTerminalJobStatus(nextStatus) ? clock.now() : null,
|
|
237
|
-
},
|
|
238
|
-
incCounters: { totalStepsExecuted: 1, attempt: 1, ...(stepCostUsd ? { totalCostUsd: stepCostUsd } : {}) },
|
|
390
|
+
missionId: finalized.missionId,
|
|
391
|
+
source: "job_orchestrator",
|
|
392
|
+
eventType: "job.completed",
|
|
393
|
+
status: finalized.status,
|
|
394
|
+
queueName: QUEUE_NAME,
|
|
395
|
+
employeeId: finalized.employeeId,
|
|
396
|
+
correlationId: finalized.correlationId,
|
|
239
397
|
});
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
if (nextStatus === "failed") {
|
|
243
401
|
ops.record({
|
|
244
402
|
jobId: input.jobId,
|
|
245
403
|
tenantId: input.tenantId,
|
|
246
404
|
missionId: finalized.missionId,
|
|
247
405
|
source: "job_orchestrator",
|
|
248
|
-
eventType: "job.
|
|
406
|
+
eventType: "job.failed",
|
|
249
407
|
status: finalized.status,
|
|
250
408
|
queueName: QUEUE_NAME,
|
|
251
409
|
employeeId: finalized.employeeId,
|
|
252
410
|
correlationId: finalized.correlationId,
|
|
253
|
-
metadata: {
|
|
254
|
-
stepNumber,
|
|
255
|
-
stepType: resolution.stepType,
|
|
256
|
-
transitionReason: resolution.transitionReason,
|
|
257
|
-
durationMs,
|
|
258
|
-
idempotencyKey,
|
|
259
|
-
stepsExecutedThisTick: stepsExecuted,
|
|
260
|
-
maxStepsPerTick: runConfig.maxStepsPerTick,
|
|
261
|
-
},
|
|
262
411
|
});
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
tenantId: input.tenantId,
|
|
267
|
-
missionId: finalized.missionId,
|
|
268
|
-
source: "job_orchestrator",
|
|
269
|
-
eventType: "job.completed",
|
|
270
|
-
status: finalized.status,
|
|
271
|
-
queueName: QUEUE_NAME,
|
|
272
|
-
employeeId: finalized.employeeId,
|
|
273
|
-
correlationId: finalized.correlationId,
|
|
274
|
-
});
|
|
275
|
-
return;
|
|
276
|
-
}
|
|
277
|
-
if (nextStatus === "failed") {
|
|
278
|
-
ops.record({
|
|
279
|
-
jobId: input.jobId,
|
|
280
|
-
tenantId: input.tenantId,
|
|
281
|
-
missionId: finalized.missionId,
|
|
282
|
-
source: "job_orchestrator",
|
|
283
|
-
eventType: "job.failed",
|
|
284
|
-
status: finalized.status,
|
|
285
|
-
queueName: QUEUE_NAME,
|
|
286
|
-
employeeId: finalized.employeeId,
|
|
287
|
-
correlationId: finalized.correlationId,
|
|
288
|
-
});
|
|
289
|
-
return;
|
|
290
|
-
}
|
|
291
|
-
if (nextStatus === "waiting_event") {
|
|
292
|
-
ops.record({
|
|
293
|
-
jobId: input.jobId,
|
|
294
|
-
tenantId: input.tenantId,
|
|
295
|
-
missionId: finalized.missionId,
|
|
296
|
-
source: "job_orchestrator",
|
|
297
|
-
eventType: "job.waiting_event",
|
|
298
|
-
status: finalized.status,
|
|
299
|
-
queueName: QUEUE_NAME,
|
|
300
|
-
employeeId: finalized.employeeId,
|
|
301
|
-
correlationId: finalized.correlationId,
|
|
302
|
-
metadata: {
|
|
303
|
-
stepNumber,
|
|
304
|
-
waitEventName: nextWaitEvent?.eventName ?? null,
|
|
305
|
-
resumeTokenExpiresAt: issuedWaitEventToken?.expiresAt ?? null,
|
|
306
|
-
},
|
|
307
|
-
});
|
|
308
|
-
return;
|
|
309
|
-
}
|
|
310
|
-
if (nextStatus === "awaiting_approval") {
|
|
311
|
-
await handleAwaitingApproval(finalized, resolution, stepNumber, input);
|
|
312
|
-
return;
|
|
313
|
-
}
|
|
314
|
-
if (nextStatus !== "waiting_time")
|
|
315
|
-
return;
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
if (nextStatus === "waiting_event") {
|
|
316
415
|
ops.record({
|
|
317
416
|
jobId: input.jobId,
|
|
318
417
|
tenantId: input.tenantId,
|
|
319
418
|
missionId: finalized.missionId,
|
|
320
419
|
source: "job_orchestrator",
|
|
321
|
-
eventType: "job.
|
|
420
|
+
eventType: "job.waiting_event",
|
|
322
421
|
status: finalized.status,
|
|
323
422
|
queueName: QUEUE_NAME,
|
|
324
423
|
employeeId: finalized.employeeId,
|
|
325
424
|
correlationId: finalized.correlationId,
|
|
326
425
|
metadata: {
|
|
327
426
|
stepNumber,
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
resumeMode: config.resumeMode,
|
|
427
|
+
waitEventName: nextWaitEvent?.eventName ?? null,
|
|
428
|
+
resumeTokenExpiresAt: issuedWaitEventToken?.expiresAt ?? null,
|
|
331
429
|
},
|
|
332
430
|
});
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
await
|
|
431
|
+
return;
|
|
432
|
+
}
|
|
433
|
+
if (nextStatus === "awaiting_approval") {
|
|
434
|
+
await handleAwaitingApproval(finalized, resolution, stepNumber, input);
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
if (nextStatus !== "waiting_time")
|
|
438
|
+
return;
|
|
439
|
+
ops.record({
|
|
440
|
+
jobId: input.jobId,
|
|
441
|
+
tenantId: input.tenantId,
|
|
442
|
+
missionId: finalized.missionId,
|
|
443
|
+
source: "job_orchestrator",
|
|
444
|
+
eventType: "job.waiting_time",
|
|
445
|
+
status: finalized.status,
|
|
446
|
+
queueName: QUEUE_NAME,
|
|
447
|
+
employeeId: finalized.employeeId,
|
|
448
|
+
correlationId: finalized.correlationId,
|
|
449
|
+
metadata: {
|
|
450
|
+
stepNumber,
|
|
451
|
+
transitionReason: resolution.transitionReason,
|
|
452
|
+
resumeAt: nextResumeAt?.toISOString(),
|
|
453
|
+
resumeMode: config.resumeMode,
|
|
454
|
+
},
|
|
455
|
+
});
|
|
456
|
+
const resumeDelayMs = nextResumeAt
|
|
457
|
+
? Math.max(1, nextResumeAt.getTime() - clock.now().getTime())
|
|
458
|
+
: config.defaultWaitMs;
|
|
459
|
+
const canInlineContinue = stepsExecuted < runConfig.maxStepsPerTick &&
|
|
460
|
+
resumeDelayMs <= runConfig.inlineResumeMaxDelayMs;
|
|
461
|
+
if (canInlineContinue) {
|
|
462
|
+
const inlined = await jobStore.claim({
|
|
365
463
|
jobId: input.jobId,
|
|
366
464
|
tenantId: input.tenantId,
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
465
|
+
expectedVersion: finalized.version,
|
|
466
|
+
statusIn: ["waiting_time"],
|
|
467
|
+
set: { status: "queued", resumeAt: null },
|
|
370
468
|
});
|
|
371
|
-
|
|
372
|
-
}
|
|
373
|
-
catch (error) {
|
|
374
|
-
if (isDuplicateKeyError(error)) {
|
|
375
|
-
ops.record({
|
|
376
|
-
jobId: input.jobId,
|
|
377
|
-
tenantId: input.tenantId,
|
|
378
|
-
missionId: claimed.missionId,
|
|
379
|
-
source: "job_orchestrator",
|
|
380
|
-
eventType: "job.step.duplicate_skipped",
|
|
381
|
-
status: claimed.status,
|
|
382
|
-
queueName: QUEUE_NAME,
|
|
383
|
-
employeeId: claimed.employeeId,
|
|
384
|
-
correlationId: claimed.correlationId,
|
|
385
|
-
metadata: { idempotencyKey },
|
|
386
|
-
});
|
|
469
|
+
if (!inlined)
|
|
387
470
|
return;
|
|
388
|
-
}
|
|
389
|
-
await jobStore.markFailed({ jobId: input.jobId, tenantId: input.tenantId });
|
|
390
471
|
ops.record({
|
|
391
472
|
jobId: input.jobId,
|
|
392
473
|
tenantId: input.tenantId,
|
|
393
|
-
missionId:
|
|
474
|
+
missionId: inlined.missionId,
|
|
394
475
|
source: "job_orchestrator",
|
|
395
|
-
eventType: "job.
|
|
396
|
-
status:
|
|
476
|
+
eventType: "job.waiting_time.inline_continued",
|
|
477
|
+
status: inlined.status,
|
|
397
478
|
queueName: QUEUE_NAME,
|
|
398
|
-
employeeId:
|
|
399
|
-
correlationId:
|
|
400
|
-
metadata: {
|
|
479
|
+
employeeId: inlined.employeeId,
|
|
480
|
+
correlationId: inlined.correlationId,
|
|
481
|
+
metadata: { stepNumber, resumeDelayMs, inlineResumeMaxDelayMs: runConfig.inlineResumeMaxDelayMs },
|
|
401
482
|
});
|
|
402
|
-
|
|
483
|
+
continue;
|
|
403
484
|
}
|
|
485
|
+
if (config.resumeMode === "scan_only")
|
|
486
|
+
return;
|
|
487
|
+
await queue.enqueue({
|
|
488
|
+
jobId: input.jobId,
|
|
489
|
+
tenantId: input.tenantId,
|
|
490
|
+
reason: "waiting_time_resume",
|
|
491
|
+
delayMs: resumeDelayMs,
|
|
492
|
+
dedupeKey: `${stepNumber}:${idempotencyKey}`,
|
|
493
|
+
});
|
|
494
|
+
return;
|
|
404
495
|
}
|
|
405
496
|
await enqueueBurstContinuation(input);
|
|
406
497
|
}
|
|
@@ -452,4 +543,3 @@ export function createOrchestrator(deps) {
|
|
|
452
543
|
}
|
|
453
544
|
return { orchestrateJobStep };
|
|
454
545
|
}
|
|
455
|
-
//# sourceMappingURL=orchestrator.js.map
|
package/dist/port.d.ts
CHANGED
package/dist/port.js
CHANGED
package/dist/reconciler.d.ts
CHANGED
package/dist/reconciler.js
CHANGED
package/dist/resume-token.d.ts
CHANGED