@smithers-orchestrator/scheduler 0.17.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smithers-orchestrator/scheduler",
3
- "version": "0.17.0",
3
+ "version": "0.19.0",
4
4
  "description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -176,8 +176,8 @@
176
176
  ],
177
177
  "dependencies": {
178
178
  "effect": "^3.21.1",
179
- "@smithers-orchestrator/errors": "0.17.0",
180
- "@smithers-orchestrator/graph": "0.17.0"
179
+ "@smithers-orchestrator/errors": "0.19.0",
180
+ "@smithers-orchestrator/graph": "0.19.0"
181
181
  },
182
182
  "devDependencies": {
183
183
  "@types/bun": "latest",
@@ -12,4 +12,6 @@ export type ScheduleResult = {
12
12
  readonly continuation?: ContinuationRequest;
13
13
  readonly nextRetryAtMs?: number;
14
14
  readonly fatalError?: string;
15
+ readonly failureRecoveryActive?: boolean;
16
+ readonly failureRecoveryKeys?: readonly string[];
15
17
  };
package/src/index.d.ts CHANGED
@@ -74,6 +74,8 @@ type ScheduleResult$3 = {
74
74
  readonly continuation?: ContinuationRequest$1;
75
75
  readonly nextRetryAtMs?: number;
76
76
  readonly fatalError?: string;
77
+ readonly failureRecoveryActive?: boolean;
78
+ readonly failureRecoveryKeys?: readonly string[];
77
79
  };
78
80
 
79
81
  type ScheduleSnapshot$1 = {
@@ -179,8 +179,15 @@ function isRetryableFailure(descriptor, error) {
179
179
  const payloadCode = error && typeof error === "object" && typeof error.code === "string"
180
180
  ? error.code
181
181
  : undefined;
182
+ const payloadDetails = error && typeof error === "object" && error.details && typeof error.details === "object"
183
+ ? error.details
184
+ : undefined;
182
185
  const normalized = toSmithersError(error);
183
186
  const code = payloadCode ?? normalized.code;
187
+ const failureRetryable = payloadDetails?.failureRetryable ?? normalized.details?.failureRetryable;
188
+ if (failureRetryable === false || code === "AGENT_CONFIG_INVALID") {
189
+ return false;
190
+ }
184
191
  const isAgentTask = Boolean(descriptor.agent);
185
192
  const nonRetryableComputeCodes = new Set([
186
193
  "INVALID_OUTPUT",
@@ -372,6 +379,25 @@ export function makeWorkflowSession(options = {}) {
372
379
  state.failures.set(key, error);
373
380
  return decide();
374
381
  }
382
+ /**
383
+ * @returns {EngineDecision | null}
384
+ */
385
+ function unhandledFailureDecision(recoveryKeys = new Set()) {
386
+ for (const [key, taskState] of state.states) {
387
+ const parsed = parseStateKey(key);
388
+ const descriptor = findDescriptor(state, parsed.nodeId, parsed.iteration);
389
+ if (taskState === "failed" && !descriptor?.continueOnFail) {
390
+ if (recoveryKeys.has(key)) {
391
+ continue;
392
+ }
393
+ return {
394
+ _tag: "Failed",
395
+ error: new SmithersError("SESSION_ERROR", `Task failed: ${descriptor?.nodeId ?? key}`, { key }, state.failures.get(key)),
396
+ };
397
+ }
398
+ }
399
+ return null;
400
+ }
375
401
  function ralphStatePayload() {
376
402
  return {
377
403
  ralphState: Object.fromEntries([...state.ralphState.entries()].map(([id, value]) => [
@@ -393,16 +419,6 @@ export function makeWorkflowSession(options = {}) {
393
419
  if (!state.graph) {
394
420
  return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
395
421
  }
396
- for (const [key, taskState] of state.states) {
397
- const parsed = parseStateKey(key);
398
- const descriptor = findDescriptor(state, parsed.nodeId, parsed.iteration);
399
- if (taskState === "failed" && !descriptor?.continueOnFail) {
400
- return {
401
- _tag: "Failed",
402
- error: new SmithersError("SESSION_ERROR", `Task failed: ${descriptor?.nodeId ?? key}`, { key }, state.failures.get(key)),
403
- };
404
- }
405
- }
406
422
  const schedule = computeSchedule();
407
423
  if (schedule.fatalError) {
408
424
  return {
@@ -419,6 +435,11 @@ export function makeWorkflowSession(options = {}) {
419
435
  },
420
436
  };
421
437
  }
438
+ const recoveryKeys = new Set(schedule.failureRecoveryKeys ?? []);
439
+ let failure = unhandledFailureDecision(recoveryKeys);
440
+ if (failure) {
441
+ return failure;
442
+ }
422
443
  const executable = [];
423
444
  let waitReason;
424
445
  let changed = false;
@@ -489,6 +510,10 @@ export function makeWorkflowSession(options = {}) {
489
510
  if ([...state.states.values()].some((taskState) => taskState === "in-progress")) {
490
511
  return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
491
512
  }
513
+ failure = unhandledFailureDecision(recoveryKeys);
514
+ if (failure) {
515
+ return failure;
516
+ }
492
517
  if (schedule.readyRalphs.length > 0) {
493
518
  for (const ralph of schedule.readyRalphs) {
494
519
  const current = state.ralphState.get(ralph.id) ?? {
@@ -70,6 +70,8 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
70
70
  let continuation;
71
71
  let nextRetryAtMs;
72
72
  let fatalError;
73
+ let failureRecoveryActive = false;
74
+ const failureRecoveryKeys = new Set();
73
75
  const groupUsage = new Map();
74
76
  for (const [stateKey, state] of states) {
75
77
  if (state !== "in-progress")
@@ -89,7 +91,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
89
91
  * @param {PlanNode} node
90
92
  * @returns {{ readonly terminal: boolean; readonly failed: boolean }}
91
93
  */
92
- function inspect(node) {
94
+ function inspect(node, options = {}) {
93
95
  switch (node.kind) {
94
96
  case "task": {
95
97
  const descriptor = descriptors.get(node.nodeId);
@@ -102,12 +104,16 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
102
104
  state === "failed" ||
103
105
  Boolean(descriptor.waitAsync &&
104
106
  (state === "waiting-approval" || state === "waiting-event"));
105
- return { terminal, failed: state === "failed" && !descriptor.continueOnFail };
107
+ return {
108
+ terminal,
109
+ failed: state === "failed" &&
110
+ (options.includeContinuedFailures || !descriptor.continueOnFail),
111
+ };
106
112
  }
107
113
  case "sequence":
108
114
  case "group": {
109
115
  for (const child of node.children) {
110
- const result = inspect(child);
116
+ const result = inspect(child, options);
111
117
  if (!result.terminal)
112
118
  return { terminal: false, failed: false };
113
119
  if (result.failed)
@@ -119,7 +125,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
119
125
  let terminal = true;
120
126
  let failed = false;
121
127
  for (const child of node.children) {
122
- const result = inspect(child);
128
+ const result = inspect(child, options);
123
129
  if (!result.terminal)
124
130
  terminal = false;
125
131
  if (result.failed)
@@ -128,30 +134,137 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
128
134
  return { terminal, failed: terminal && failed };
129
135
  }
130
136
  case "saga": {
137
+ let completedActions = 0;
138
+ let failed = false;
131
139
  for (const child of node.actionChildren) {
132
- const result = inspect(child);
140
+ const result = inspect(child, {
141
+ includeContinuedFailures: true,
142
+ });
143
+ if (!result.terminal)
144
+ return { terminal: false, failed: false };
145
+ if (result.failed) {
146
+ failed = true;
147
+ break;
148
+ }
149
+ completedActions += 1;
150
+ }
151
+ if (!failed)
152
+ return { terminal: true, failed: false };
153
+ if (node.onFailure === "fail")
154
+ return { terminal: true, failed: true };
155
+ let compensationFailed = false;
156
+ for (let index = completedActions - 1; index >= 0; index -= 1) {
157
+ const compensation = node.compensationChildren[index];
158
+ if (!compensation)
159
+ continue;
160
+ const result = inspect(compensation, options);
133
161
  if (!result.terminal)
134
162
  return { terminal: false, failed: false };
135
163
  if (result.failed)
136
- return { terminal: true, failed: true };
164
+ compensationFailed = true;
137
165
  }
138
- return { terminal: true, failed: false };
166
+ return {
167
+ terminal: true,
168
+ failed: compensationFailed || node.onFailure === "compensate-and-fail",
169
+ };
139
170
  }
140
171
  case "try-catch-finally": {
172
+ let tryFailed = false;
141
173
  for (const child of node.tryChildren) {
142
- const result = inspect(child);
174
+ const result = inspect(child, {
175
+ includeContinuedFailures: true,
176
+ });
143
177
  if (!result.terminal)
144
178
  return { terminal: false, failed: false };
145
- if (result.failed)
146
- return { terminal: true, failed: true };
179
+ if (result.failed) {
180
+ tryFailed = true;
181
+ break;
182
+ }
147
183
  }
148
- return { terminal: true, failed: false };
184
+ if (!tryFailed) {
185
+ return inspect({
186
+ kind: "sequence",
187
+ children: node.finallyChildren,
188
+ }, options);
189
+ }
190
+ let catchFailed = node.catchChildren.length === 0;
191
+ if (node.catchChildren.length > 0) {
192
+ const catchStatus = inspect({
193
+ kind: "sequence",
194
+ children: node.catchChildren,
195
+ }, options);
196
+ if (!catchStatus.terminal)
197
+ return { terminal: false, failed: false };
198
+ catchFailed = catchStatus.failed;
199
+ }
200
+ const finallyStatus = inspect({
201
+ kind: "sequence",
202
+ children: node.finallyChildren,
203
+ }, options);
204
+ if (!finallyStatus.terminal)
205
+ return { terminal: false, failed: false };
206
+ return {
207
+ terminal: true,
208
+ failed: catchFailed || finallyStatus.failed,
209
+ };
149
210
  }
150
211
  default:
151
212
  return { terminal: true, failed: false };
152
213
  }
153
214
  }
154
215
  /**
216
+ * @param {PlanNode} node
217
+ * @param {{ includeContinuedFailures?: boolean }} options
218
+ */
219
+ function collectFailureKeys(node, options = {}) {
220
+ switch (node.kind) {
221
+ case "task": {
222
+ const descriptor = descriptors.get(node.nodeId);
223
+ if (!descriptor)
224
+ return;
225
+ const key = buildStateKey(descriptor.nodeId, descriptor.iteration);
226
+ const state = states.get(key) ?? "pending";
227
+ if (state === "failed" &&
228
+ (options.includeContinuedFailures || !descriptor.continueOnFail)) {
229
+ failureRecoveryKeys.add(key);
230
+ }
231
+ return;
232
+ }
233
+ case "sequence":
234
+ case "group":
235
+ case "parallel":
236
+ for (const child of node.children) {
237
+ collectFailureKeys(child, options);
238
+ }
239
+ return;
240
+ case "saga":
241
+ for (const child of node.actionChildren) {
242
+ collectFailureKeys(child, options);
243
+ }
244
+ return;
245
+ case "try-catch-finally":
246
+ for (const child of node.tryChildren) {
247
+ collectFailureKeys(child, options);
248
+ }
249
+ for (const child of node.catchChildren) {
250
+ collectFailureKeys(child, options);
251
+ }
252
+ for (const child of node.finallyChildren) {
253
+ collectFailureKeys(child, options);
254
+ }
255
+ return;
256
+ }
257
+ }
258
+ /**
259
+ * @param {readonly PlanNode[]} children
260
+ * @param {{ includeContinuedFailures?: boolean }} options
261
+ */
262
+ function collectChildFailureKeys(children, options = {}) {
263
+ for (const child of children) {
264
+ collectFailureKeys(child, options);
265
+ }
266
+ }
267
+ /**
155
268
  * @param {readonly PlanNode[]} children
156
269
  */
157
270
  function walkSequence(children) {
@@ -247,7 +360,9 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
247
360
  let completedActions = 0;
248
361
  let failed = false;
249
362
  for (const child of node.actionChildren) {
250
- const status = inspect(child);
363
+ const status = inspect(child, {
364
+ includeContinuedFailures: true,
365
+ });
251
366
  if (!status.terminal)
252
367
  return walk(child);
253
368
  if (status.failed) {
@@ -262,6 +377,23 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
262
377
  fatalError ??= `Saga ${node.id} failed`;
263
378
  return { terminal: true };
264
379
  }
380
+ collectChildFailureKeys(node.actionChildren, {
381
+ includeContinuedFailures: true,
382
+ });
383
+ let compensationFailed = false;
384
+ for (let index = completedActions - 1; index >= 0; index -= 1) {
385
+ const compensation = node.compensationChildren[index];
386
+ if (!compensation)
387
+ continue;
388
+ if (inspect(compensation).failed) {
389
+ compensationFailed = true;
390
+ break;
391
+ }
392
+ }
393
+ if (compensationFailed) {
394
+ return { terminal: false };
395
+ }
396
+ failureRecoveryActive = true;
265
397
  for (let index = completedActions - 1; index >= 0; index -= 1) {
266
398
  const compensation = node.compensationChildren[index];
267
399
  if (!compensation)
@@ -278,7 +410,9 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
278
410
  case "try-catch-finally": {
279
411
  let tryFailed = false;
280
412
  for (const child of node.tryChildren) {
281
- const status = inspect(child);
413
+ const status = inspect(child, {
414
+ includeContinuedFailures: true,
415
+ });
282
416
  if (!status.terminal)
283
417
  return walk(child);
284
418
  if (status.failed) {
@@ -286,19 +420,72 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
286
420
  break;
287
421
  }
288
422
  }
289
- if (tryFailed) {
290
- if (node.catchChildren.length > 0) {
423
+ if (tryFailed && node.catchChildren.length > 0) {
424
+ const collectTryFailureKeys = () => collectChildFailureKeys(node.tryChildren, {
425
+ includeContinuedFailures: true,
426
+ });
427
+ let catchFailed = false;
428
+ collectTryFailureKeys();
429
+ const catchStatus = inspect({
430
+ kind: "sequence",
431
+ children: node.catchChildren,
432
+ });
433
+ failureRecoveryActive = true;
434
+ catchFailed = catchStatus.failed;
435
+ if (!catchStatus.terminal) {
291
436
  const catchResult = walkSequence(node.catchChildren);
292
437
  if (!catchResult.terminal)
293
438
  return catchResult;
294
439
  }
295
- else {
296
- fatalError ??= `TryCatchFinally ${node.id} failed`;
440
+ const finallyStatus = inspect({
441
+ kind: "sequence",
442
+ children: node.finallyChildren,
443
+ });
444
+ if (finallyStatus.failed) {
445
+ collectTryFailureKeys();
446
+ failureRecoveryActive = false;
447
+ return { terminal: false };
297
448
  }
449
+ const finallyResult = walkSequence(node.finallyChildren);
450
+ if (!finallyResult.terminal) {
451
+ collectTryFailureKeys();
452
+ if (catchFailed) {
453
+ collectChildFailureKeys(node.catchChildren);
454
+ }
455
+ failureRecoveryActive = true;
456
+ return finallyResult;
457
+ }
458
+ if (catchFailed) {
459
+ return { terminal: true };
460
+ }
461
+ return { terminal: true };
462
+ }
463
+ const finallyStatus = inspect({
464
+ kind: "sequence",
465
+ children: node.finallyChildren,
466
+ });
467
+ if (finallyStatus.failed) {
468
+ if (tryFailed) {
469
+ collectChildFailureKeys(node.tryChildren, {
470
+ includeContinuedFailures: true,
471
+ });
472
+ }
473
+ failureRecoveryActive = false;
474
+ return { terminal: false };
298
475
  }
299
476
  const finallyResult = walkSequence(node.finallyChildren);
300
- if (!finallyResult.terminal)
477
+ if (!finallyResult.terminal) {
478
+ if (tryFailed && node.catchChildren.length === 0) {
479
+ collectChildFailureKeys(node.tryChildren, {
480
+ includeContinuedFailures: true,
481
+ });
482
+ failureRecoveryActive = true;
483
+ }
301
484
  return finallyResult;
485
+ }
486
+ if (tryFailed && node.catchChildren.length === 0) {
487
+ fatalError ??= `TryCatchFinally ${node.id} failed`;
488
+ }
302
489
  return { terminal: true };
303
490
  }
304
491
  case "group": {
@@ -326,5 +513,7 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
326
513
  continuation,
327
514
  nextRetryAtMs,
328
515
  fatalError,
516
+ failureRecoveryActive,
517
+ failureRecoveryKeys: [...failureRecoveryKeys],
329
518
  };
330
519
  }