@smithers-orchestrator/scheduler 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/ScheduleResult.ts +2 -0
- package/src/index.d.ts +2 -0
- package/src/makeWorkflowSession.js +35 -10
- package/src/scheduleTasks.js +207 -18
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smithers-orchestrator/scheduler",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.19.0",
|
|
4
4
|
"description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -176,8 +176,8 @@
|
|
|
176
176
|
],
|
|
177
177
|
"dependencies": {
|
|
178
178
|
"effect": "^3.21.1",
|
|
179
|
-
"@smithers-orchestrator/errors": "0.
|
|
180
|
-
"@smithers-orchestrator/graph": "0.
|
|
179
|
+
"@smithers-orchestrator/errors": "0.19.0",
|
|
180
|
+
"@smithers-orchestrator/graph": "0.19.0"
|
|
181
181
|
},
|
|
182
182
|
"devDependencies": {
|
|
183
183
|
"@types/bun": "latest",
|
package/src/ScheduleResult.ts
CHANGED
package/src/index.d.ts
CHANGED
|
@@ -74,6 +74,8 @@ type ScheduleResult$3 = {
|
|
|
74
74
|
readonly continuation?: ContinuationRequest$1;
|
|
75
75
|
readonly nextRetryAtMs?: number;
|
|
76
76
|
readonly fatalError?: string;
|
|
77
|
+
readonly failureRecoveryActive?: boolean;
|
|
78
|
+
readonly failureRecoveryKeys?: readonly string[];
|
|
77
79
|
};
|
|
78
80
|
|
|
79
81
|
type ScheduleSnapshot$1 = {
|
|
@@ -179,8 +179,15 @@ function isRetryableFailure(descriptor, error) {
|
|
|
179
179
|
const payloadCode = error && typeof error === "object" && typeof error.code === "string"
|
|
180
180
|
? error.code
|
|
181
181
|
: undefined;
|
|
182
|
+
const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
|
|
183
|
+
? error.details
|
|
184
|
+
: undefined;
|
|
182
185
|
const normalized = toSmithersError(error);
|
|
183
186
|
const code = payloadCode ?? normalized.code;
|
|
187
|
+
const failureRetryable = payloadDetails?.failureRetryable ?? normalized.details?.failureRetryable;
|
|
188
|
+
if (failureRetryable === false || code === "AGENT_CONFIG_INVALID") {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
184
191
|
const isAgentTask = Boolean(descriptor.agent);
|
|
185
192
|
const nonRetryableComputeCodes = new Set([
|
|
186
193
|
"INVALID_OUTPUT",
|
|
@@ -372,6 +379,25 @@ export function makeWorkflowSession(options = {}) {
|
|
|
372
379
|
state.failures.set(key, error);
|
|
373
380
|
return decide();
|
|
374
381
|
}
|
|
382
|
+
/**
|
|
383
|
+
* @returns {EngineDecision | null}
|
|
384
|
+
*/
|
|
385
|
+
function unhandledFailureDecision(recoveryKeys = new Set()) {
|
|
386
|
+
for (const [key, taskState] of state.states) {
|
|
387
|
+
const parsed = parseStateKey(key);
|
|
388
|
+
const descriptor = findDescriptor(state, parsed.nodeId, parsed.iteration);
|
|
389
|
+
if (taskState === "failed" && !descriptor?.continueOnFail) {
|
|
390
|
+
if (recoveryKeys.has(key)) {
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
return {
|
|
394
|
+
_tag: "Failed",
|
|
395
|
+
error: new SmithersError("SESSION_ERROR", `Task failed: ${descriptor?.nodeId ?? key}`, { key }, state.failures.get(key)),
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
return null;
|
|
400
|
+
}
|
|
375
401
|
function ralphStatePayload() {
|
|
376
402
|
return {
|
|
377
403
|
ralphState: Object.fromEntries([...state.ralphState.entries()].map(([id, value]) => [
|
|
@@ -393,16 +419,6 @@ export function makeWorkflowSession(options = {}) {
|
|
|
393
419
|
if (!state.graph) {
|
|
394
420
|
return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
|
|
395
421
|
}
|
|
396
|
-
for (const [key, taskState] of state.states) {
|
|
397
|
-
const parsed = parseStateKey(key);
|
|
398
|
-
const descriptor = findDescriptor(state, parsed.nodeId, parsed.iteration);
|
|
399
|
-
if (taskState === "failed" && !descriptor?.continueOnFail) {
|
|
400
|
-
return {
|
|
401
|
-
_tag: "Failed",
|
|
402
|
-
error: new SmithersError("SESSION_ERROR", `Task failed: ${descriptor?.nodeId ?? key}`, { key }, state.failures.get(key)),
|
|
403
|
-
};
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
422
|
const schedule = computeSchedule();
|
|
407
423
|
if (schedule.fatalError) {
|
|
408
424
|
return {
|
|
@@ -419,6 +435,11 @@ export function makeWorkflowSession(options = {}) {
|
|
|
419
435
|
},
|
|
420
436
|
};
|
|
421
437
|
}
|
|
438
|
+
const recoveryKeys = new Set(schedule.failureRecoveryKeys ?? []);
|
|
439
|
+
let failure = unhandledFailureDecision(recoveryKeys);
|
|
440
|
+
if (failure) {
|
|
441
|
+
return failure;
|
|
442
|
+
}
|
|
422
443
|
const executable = [];
|
|
423
444
|
let waitReason;
|
|
424
445
|
let changed = false;
|
|
@@ -489,6 +510,10 @@ export function makeWorkflowSession(options = {}) {
|
|
|
489
510
|
if ([...state.states.values()].some((taskState) => taskState === "in-progress")) {
|
|
490
511
|
return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
|
|
491
512
|
}
|
|
513
|
+
failure = unhandledFailureDecision(recoveryKeys);
|
|
514
|
+
if (failure) {
|
|
515
|
+
return failure;
|
|
516
|
+
}
|
|
492
517
|
if (schedule.readyRalphs.length > 0) {
|
|
493
518
|
for (const ralph of schedule.readyRalphs) {
|
|
494
519
|
const current = state.ralphState.get(ralph.id) ?? {
|
package/src/scheduleTasks.js
CHANGED
|
@@ -70,6 +70,8 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
70
70
|
let continuation;
|
|
71
71
|
let nextRetryAtMs;
|
|
72
72
|
let fatalError;
|
|
73
|
+
let failureRecoveryActive = false;
|
|
74
|
+
const failureRecoveryKeys = new Set();
|
|
73
75
|
const groupUsage = new Map();
|
|
74
76
|
for (const [stateKey, state] of states) {
|
|
75
77
|
if (state !== "in-progress")
|
|
@@ -89,7 +91,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
89
91
|
* @param {PlanNode} node
|
|
90
92
|
* @returns {{ readonly terminal: boolean; readonly failed: boolean }}
|
|
91
93
|
*/
|
|
92
|
-
function inspect(node) {
|
|
94
|
+
function inspect(node, options = {}) {
|
|
93
95
|
switch (node.kind) {
|
|
94
96
|
case "task": {
|
|
95
97
|
const descriptor = descriptors.get(node.nodeId);
|
|
@@ -102,12 +104,16 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
102
104
|
state === "failed" ||
|
|
103
105
|
Boolean(descriptor.waitAsync &&
|
|
104
106
|
(state === "waiting-approval" || state === "waiting-event"));
|
|
105
|
-
return {
|
|
107
|
+
return {
|
|
108
|
+
terminal,
|
|
109
|
+
failed: state === "failed" &&
|
|
110
|
+
(options.includeContinuedFailures || !descriptor.continueOnFail),
|
|
111
|
+
};
|
|
106
112
|
}
|
|
107
113
|
case "sequence":
|
|
108
114
|
case "group": {
|
|
109
115
|
for (const child of node.children) {
|
|
110
|
-
const result = inspect(child);
|
|
116
|
+
const result = inspect(child, options);
|
|
111
117
|
if (!result.terminal)
|
|
112
118
|
return { terminal: false, failed: false };
|
|
113
119
|
if (result.failed)
|
|
@@ -119,7 +125,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
119
125
|
let terminal = true;
|
|
120
126
|
let failed = false;
|
|
121
127
|
for (const child of node.children) {
|
|
122
|
-
const result = inspect(child);
|
|
128
|
+
const result = inspect(child, options);
|
|
123
129
|
if (!result.terminal)
|
|
124
130
|
terminal = false;
|
|
125
131
|
if (result.failed)
|
|
@@ -128,30 +134,137 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
128
134
|
return { terminal, failed: terminal && failed };
|
|
129
135
|
}
|
|
130
136
|
case "saga": {
|
|
137
|
+
let completedActions = 0;
|
|
138
|
+
let failed = false;
|
|
131
139
|
for (const child of node.actionChildren) {
|
|
132
|
-
const result = inspect(child
|
|
140
|
+
const result = inspect(child, {
|
|
141
|
+
includeContinuedFailures: true,
|
|
142
|
+
});
|
|
143
|
+
if (!result.terminal)
|
|
144
|
+
return { terminal: false, failed: false };
|
|
145
|
+
if (result.failed) {
|
|
146
|
+
failed = true;
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
completedActions += 1;
|
|
150
|
+
}
|
|
151
|
+
if (!failed)
|
|
152
|
+
return { terminal: true, failed: false };
|
|
153
|
+
if (node.onFailure === "fail")
|
|
154
|
+
return { terminal: true, failed: true };
|
|
155
|
+
let compensationFailed = false;
|
|
156
|
+
for (let index = completedActions - 1; index >= 0; index -= 1) {
|
|
157
|
+
const compensation = node.compensationChildren[index];
|
|
158
|
+
if (!compensation)
|
|
159
|
+
continue;
|
|
160
|
+
const result = inspect(compensation, options);
|
|
133
161
|
if (!result.terminal)
|
|
134
162
|
return { terminal: false, failed: false };
|
|
135
163
|
if (result.failed)
|
|
136
|
-
|
|
164
|
+
compensationFailed = true;
|
|
137
165
|
}
|
|
138
|
-
return {
|
|
166
|
+
return {
|
|
167
|
+
terminal: true,
|
|
168
|
+
failed: compensationFailed || node.onFailure === "compensate-and-fail",
|
|
169
|
+
};
|
|
139
170
|
}
|
|
140
171
|
case "try-catch-finally": {
|
|
172
|
+
let tryFailed = false;
|
|
141
173
|
for (const child of node.tryChildren) {
|
|
142
|
-
const result = inspect(child
|
|
174
|
+
const result = inspect(child, {
|
|
175
|
+
includeContinuedFailures: true,
|
|
176
|
+
});
|
|
143
177
|
if (!result.terminal)
|
|
144
178
|
return { terminal: false, failed: false };
|
|
145
|
-
if (result.failed)
|
|
146
|
-
|
|
179
|
+
if (result.failed) {
|
|
180
|
+
tryFailed = true;
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
147
183
|
}
|
|
148
|
-
|
|
184
|
+
if (!tryFailed) {
|
|
185
|
+
return inspect({
|
|
186
|
+
kind: "sequence",
|
|
187
|
+
children: node.finallyChildren,
|
|
188
|
+
}, options);
|
|
189
|
+
}
|
|
190
|
+
let catchFailed = node.catchChildren.length === 0;
|
|
191
|
+
if (node.catchChildren.length > 0) {
|
|
192
|
+
const catchStatus = inspect({
|
|
193
|
+
kind: "sequence",
|
|
194
|
+
children: node.catchChildren,
|
|
195
|
+
}, options);
|
|
196
|
+
if (!catchStatus.terminal)
|
|
197
|
+
return { terminal: false, failed: false };
|
|
198
|
+
catchFailed = catchStatus.failed;
|
|
199
|
+
}
|
|
200
|
+
const finallyStatus = inspect({
|
|
201
|
+
kind: "sequence",
|
|
202
|
+
children: node.finallyChildren,
|
|
203
|
+
}, options);
|
|
204
|
+
if (!finallyStatus.terminal)
|
|
205
|
+
return { terminal: false, failed: false };
|
|
206
|
+
return {
|
|
207
|
+
terminal: true,
|
|
208
|
+
failed: catchFailed || finallyStatus.failed,
|
|
209
|
+
};
|
|
149
210
|
}
|
|
150
211
|
default:
|
|
151
212
|
return { terminal: true, failed: false };
|
|
152
213
|
}
|
|
153
214
|
}
|
|
154
215
|
/**
|
|
216
|
+
* @param {PlanNode} node
|
|
217
|
+
* @param {{ includeContinuedFailures?: boolean }} options
|
|
218
|
+
*/
|
|
219
|
+
function collectFailureKeys(node, options = {}) {
|
|
220
|
+
switch (node.kind) {
|
|
221
|
+
case "task": {
|
|
222
|
+
const descriptor = descriptors.get(node.nodeId);
|
|
223
|
+
if (!descriptor)
|
|
224
|
+
return;
|
|
225
|
+
const key = buildStateKey(descriptor.nodeId, descriptor.iteration);
|
|
226
|
+
const state = states.get(key) ?? "pending";
|
|
227
|
+
if (state === "failed" &&
|
|
228
|
+
(options.includeContinuedFailures || !descriptor.continueOnFail)) {
|
|
229
|
+
failureRecoveryKeys.add(key);
|
|
230
|
+
}
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
case "sequence":
|
|
234
|
+
case "group":
|
|
235
|
+
case "parallel":
|
|
236
|
+
for (const child of node.children) {
|
|
237
|
+
collectFailureKeys(child, options);
|
|
238
|
+
}
|
|
239
|
+
return;
|
|
240
|
+
case "saga":
|
|
241
|
+
for (const child of node.actionChildren) {
|
|
242
|
+
collectFailureKeys(child, options);
|
|
243
|
+
}
|
|
244
|
+
return;
|
|
245
|
+
case "try-catch-finally":
|
|
246
|
+
for (const child of node.tryChildren) {
|
|
247
|
+
collectFailureKeys(child, options);
|
|
248
|
+
}
|
|
249
|
+
for (const child of node.catchChildren) {
|
|
250
|
+
collectFailureKeys(child, options);
|
|
251
|
+
}
|
|
252
|
+
for (const child of node.finallyChildren) {
|
|
253
|
+
collectFailureKeys(child, options);
|
|
254
|
+
}
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* @param {readonly PlanNode[]} children
|
|
260
|
+
* @param {{ includeContinuedFailures?: boolean }} options
|
|
261
|
+
*/
|
|
262
|
+
function collectChildFailureKeys(children, options = {}) {
|
|
263
|
+
for (const child of children) {
|
|
264
|
+
collectFailureKeys(child, options);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
155
268
|
* @param {readonly PlanNode[]} children
|
|
156
269
|
*/
|
|
157
270
|
function walkSequence(children) {
|
|
@@ -247,7 +360,9 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
247
360
|
let completedActions = 0;
|
|
248
361
|
let failed = false;
|
|
249
362
|
for (const child of node.actionChildren) {
|
|
250
|
-
const status = inspect(child
|
|
363
|
+
const status = inspect(child, {
|
|
364
|
+
includeContinuedFailures: true,
|
|
365
|
+
});
|
|
251
366
|
if (!status.terminal)
|
|
252
367
|
return walk(child);
|
|
253
368
|
if (status.failed) {
|
|
@@ -262,6 +377,23 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
262
377
|
fatalError ??= `Saga ${node.id} failed`;
|
|
263
378
|
return { terminal: true };
|
|
264
379
|
}
|
|
380
|
+
collectChildFailureKeys(node.actionChildren, {
|
|
381
|
+
includeContinuedFailures: true,
|
|
382
|
+
});
|
|
383
|
+
let compensationFailed = false;
|
|
384
|
+
for (let index = completedActions - 1; index >= 0; index -= 1) {
|
|
385
|
+
const compensation = node.compensationChildren[index];
|
|
386
|
+
if (!compensation)
|
|
387
|
+
continue;
|
|
388
|
+
if (inspect(compensation).failed) {
|
|
389
|
+
compensationFailed = true;
|
|
390
|
+
break;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
if (compensationFailed) {
|
|
394
|
+
return { terminal: false };
|
|
395
|
+
}
|
|
396
|
+
failureRecoveryActive = true;
|
|
265
397
|
for (let index = completedActions - 1; index >= 0; index -= 1) {
|
|
266
398
|
const compensation = node.compensationChildren[index];
|
|
267
399
|
if (!compensation)
|
|
@@ -278,7 +410,9 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
278
410
|
case "try-catch-finally": {
|
|
279
411
|
let tryFailed = false;
|
|
280
412
|
for (const child of node.tryChildren) {
|
|
281
|
-
const status = inspect(child
|
|
413
|
+
const status = inspect(child, {
|
|
414
|
+
includeContinuedFailures: true,
|
|
415
|
+
});
|
|
282
416
|
if (!status.terminal)
|
|
283
417
|
return walk(child);
|
|
284
418
|
if (status.failed) {
|
|
@@ -286,19 +420,72 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
286
420
|
break;
|
|
287
421
|
}
|
|
288
422
|
}
|
|
289
|
-
if (tryFailed) {
|
|
290
|
-
|
|
423
|
+
if (tryFailed && node.catchChildren.length > 0) {
|
|
424
|
+
const collectTryFailureKeys = () => collectChildFailureKeys(node.tryChildren, {
|
|
425
|
+
includeContinuedFailures: true,
|
|
426
|
+
});
|
|
427
|
+
let catchFailed = false;
|
|
428
|
+
collectTryFailureKeys();
|
|
429
|
+
const catchStatus = inspect({
|
|
430
|
+
kind: "sequence",
|
|
431
|
+
children: node.catchChildren,
|
|
432
|
+
});
|
|
433
|
+
failureRecoveryActive = true;
|
|
434
|
+
catchFailed = catchStatus.failed;
|
|
435
|
+
if (!catchStatus.terminal) {
|
|
291
436
|
const catchResult = walkSequence(node.catchChildren);
|
|
292
437
|
if (!catchResult.terminal)
|
|
293
438
|
return catchResult;
|
|
294
439
|
}
|
|
295
|
-
|
|
296
|
-
|
|
440
|
+
const finallyStatus = inspect({
|
|
441
|
+
kind: "sequence",
|
|
442
|
+
children: node.finallyChildren,
|
|
443
|
+
});
|
|
444
|
+
if (finallyStatus.failed) {
|
|
445
|
+
collectTryFailureKeys();
|
|
446
|
+
failureRecoveryActive = false;
|
|
447
|
+
return { terminal: false };
|
|
297
448
|
}
|
|
449
|
+
const finallyResult = walkSequence(node.finallyChildren);
|
|
450
|
+
if (!finallyResult.terminal) {
|
|
451
|
+
collectTryFailureKeys();
|
|
452
|
+
if (catchFailed) {
|
|
453
|
+
collectChildFailureKeys(node.catchChildren);
|
|
454
|
+
}
|
|
455
|
+
failureRecoveryActive = true;
|
|
456
|
+
return finallyResult;
|
|
457
|
+
}
|
|
458
|
+
if (catchFailed) {
|
|
459
|
+
return { terminal: true };
|
|
460
|
+
}
|
|
461
|
+
return { terminal: true };
|
|
462
|
+
}
|
|
463
|
+
const finallyStatus = inspect({
|
|
464
|
+
kind: "sequence",
|
|
465
|
+
children: node.finallyChildren,
|
|
466
|
+
});
|
|
467
|
+
if (finallyStatus.failed) {
|
|
468
|
+
if (tryFailed) {
|
|
469
|
+
collectChildFailureKeys(node.tryChildren, {
|
|
470
|
+
includeContinuedFailures: true,
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
failureRecoveryActive = false;
|
|
474
|
+
return { terminal: false };
|
|
298
475
|
}
|
|
299
476
|
const finallyResult = walkSequence(node.finallyChildren);
|
|
300
|
-
if (!finallyResult.terminal)
|
|
477
|
+
if (!finallyResult.terminal) {
|
|
478
|
+
if (tryFailed && node.catchChildren.length === 0) {
|
|
479
|
+
collectChildFailureKeys(node.tryChildren, {
|
|
480
|
+
includeContinuedFailures: true,
|
|
481
|
+
});
|
|
482
|
+
failureRecoveryActive = true;
|
|
483
|
+
}
|
|
301
484
|
return finallyResult;
|
|
485
|
+
}
|
|
486
|
+
if (tryFailed && node.catchChildren.length === 0) {
|
|
487
|
+
fatalError ??= `TryCatchFinally ${node.id} failed`;
|
|
488
|
+
}
|
|
302
489
|
return { terminal: true };
|
|
303
490
|
}
|
|
304
491
|
case "group": {
|
|
@@ -326,5 +513,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
326
513
|
continuation,
|
|
327
514
|
nextRetryAtMs,
|
|
328
515
|
fatalError,
|
|
516
|
+
failureRecoveryActive,
|
|
517
|
+
failureRecoveryKeys: [...failureRecoveryKeys],
|
|
329
518
|
};
|
|
330
519
|
}
|