@smithers-orchestrator/scheduler 0.25.0 → 0.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/RunResult.ts +16 -0
- package/src/index.d.ts +16 -0
- package/src/makeWorkflowSession.js +42 -10
- package/src/scheduleTasks.js +25 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smithers-orchestrator/scheduler",
|
|
3
|
-
"version": "0.25.
|
|
3
|
+
"version": "0.25.2",
|
|
4
4
|
"description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -176,8 +176,8 @@
|
|
|
176
176
|
],
|
|
177
177
|
"dependencies": {
|
|
178
178
|
"effect": "^3.21.1",
|
|
179
|
-
"@smithers-orchestrator/errors": "0.25.
|
|
180
|
-
"@smithers-orchestrator/graph": "0.25.
|
|
179
|
+
"@smithers-orchestrator/errors": "0.25.2",
|
|
180
|
+
"@smithers-orchestrator/graph": "0.25.2"
|
|
181
181
|
},
|
|
182
182
|
"devDependencies": {
|
|
183
183
|
"@types/bun": "latest",
|
package/src/RunResult.ts
CHANGED
|
@@ -13,4 +13,20 @@ export type RunResult = {
|
|
|
13
13
|
readonly output?: unknown;
|
|
14
14
|
readonly error?: unknown;
|
|
15
15
|
readonly nextRunId?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Number of tasks that ended in a `failed` state yet did not fail the run —
|
|
18
|
+
* "masked" child failures the run-level status cannot express. Present (and
|
|
19
|
+
* `> 0`) only on a `finished` result that tolerated at least one failure
|
|
20
|
+
* (a {@link https://smithers.sh/components/task `continueOnFail`} task, or an
|
|
21
|
+
* agent task that failed transiently: rate limit, timeout, abort). A binary
|
|
22
|
+
* `finished` status would otherwise read as a clean success. See
|
|
23
|
+
* `docs/runtime/run-state.mdx`.
|
|
24
|
+
*/
|
|
25
|
+
readonly failedChildren?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Task state keys (`nodeId::iteration`) of the tasks counted by
|
|
28
|
+
* {@link failedChildren}. The iteration disambiguates the same `nodeId` failing
|
|
29
|
+
* across loop/Ralph iterations.
|
|
30
|
+
*/
|
|
31
|
+
readonly failedChildKeys?: readonly string[];
|
|
16
32
|
};
|
package/src/index.d.ts
CHANGED
|
@@ -138,6 +138,22 @@ type RunResult$1 = {
|
|
|
138
138
|
readonly output?: unknown;
|
|
139
139
|
readonly error?: unknown;
|
|
140
140
|
readonly nextRunId?: string;
|
|
141
|
+
/**
|
|
142
|
+
* Number of tasks that ended in a `failed` state yet did not fail the run —
|
|
143
|
+
* "masked" child failures the run-level status cannot express. Present (and
|
|
144
|
+
* `> 0`) only on a `finished` result that tolerated at least one failure
|
|
145
|
+
* (a {@link https://smithers.sh/components/task `continueOnFail`} task, or an
|
|
146
|
+
* agent task that failed transiently: rate limit, timeout, abort). A binary
|
|
147
|
+
* `finished` status would otherwise read as a clean success. See
|
|
148
|
+
* `docs/runtime/run-state.mdx`.
|
|
149
|
+
*/
|
|
150
|
+
readonly failedChildren?: number;
|
|
151
|
+
/**
|
|
152
|
+
* Task state keys (`nodeId::iteration`) of the tasks counted by
|
|
153
|
+
* {@link failedChildren}. The iteration disambiguates the same `nodeId` failing
|
|
154
|
+
* across loop/Ralph iterations.
|
|
155
|
+
*/
|
|
156
|
+
readonly failedChildKeys?: readonly string[];
|
|
141
157
|
};
|
|
142
158
|
|
|
143
159
|
type WaitReason$1 = {
|
|
@@ -353,14 +353,37 @@ export function makeWorkflowSession(options = {}) {
|
|
|
353
353
|
* @returns {EngineDecision}
|
|
354
354
|
*/
|
|
355
355
|
function finishedResult(status = "finished") {
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
output: [...state.outputs.values()].at(-1)?.output,
|
|
362
|
-
},
|
|
356
|
+
/** @type {RunResult} */
|
|
357
|
+
const result = {
|
|
358
|
+
runId: state.runId,
|
|
359
|
+
status,
|
|
360
|
+
output: [...state.outputs.values()].at(-1)?.output,
|
|
363
361
|
};
|
|
362
|
+
if (status === "finished") {
|
|
363
|
+
// At a `finished` terminal, any task still in `failed` state is a
|
|
364
|
+
// *tolerated* failure — an unhandled one would have produced a `Failed`
|
|
365
|
+
// decision via unhandledFailureDecision() and never reached here. Those
|
|
366
|
+
// are exactly the masked children (continueOnFail tasks, transient agent
|
|
367
|
+
// failures) the binary run status cannot express. Surface them so callers
|
|
368
|
+
// can detect a run that "succeeded" while children failed. See issue #295
|
|
369
|
+
// and docs/runtime/run-state.mdx.
|
|
370
|
+
//
|
|
371
|
+
// Keys are the canonical task state keys (`nodeId::iteration`), not bare
|
|
372
|
+
// node ids: a looped/Ralph workflow can fail the same nodeId across
|
|
373
|
+
// iterations, and the iteration is what disambiguates which child to
|
|
374
|
+
// inspect.
|
|
375
|
+
const failedChildKeys = [];
|
|
376
|
+
for (const [key, taskState] of state.states) {
|
|
377
|
+
if (taskState === "failed") {
|
|
378
|
+
failedChildKeys.push(key);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
if (failedChildKeys.length > 0) {
|
|
382
|
+
result.failedChildren = failedChildKeys.length;
|
|
383
|
+
result.failedChildKeys = failedChildKeys;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return { _tag: "Finished", result };
|
|
364
387
|
}
|
|
365
388
|
/**
|
|
366
389
|
* @returns {ScheduleResult}
|
|
@@ -574,14 +597,23 @@ export function makeWorkflowSession(options = {}) {
|
|
|
574
597
|
};
|
|
575
598
|
}
|
|
576
599
|
/**
|
|
577
|
-
* @param {number} [depth] recursion depth;
|
|
600
|
+
* @param {number} [depth] recursion depth; a safety net for a true decision
|
|
601
|
+
* cycle (a non-monotonic transition bug)
|
|
578
602
|
* @returns {EngineDecision}
|
|
579
603
|
*/
|
|
580
604
|
function decide(depth = 0) {
|
|
581
|
-
|
|
605
|
+
// Each recursion below only fires when `changed` is true, i.e. at least
|
|
606
|
+
// one task moved to a terminal/in-progress/waiting state — monotonic
|
|
607
|
+
// forward progress. A legitimate chain can therefore be as long as the
|
|
608
|
+
// number of tasks: e.g. a <Sequence> of N skipIf steps yields exactly one
|
|
609
|
+
// skip per pass (#bug: 11+ such steps tripped a hard constant-10 guard and
|
|
610
|
+
// failed a perfectly valid run). Bound by the task count + slack instead;
|
|
611
|
+
// a genuine cycle keeps recursing past the point where every task settled.
|
|
612
|
+
const maxDecideDepth = state.descriptors.size + 10;
|
|
613
|
+
if (depth > maxDecideDepth) {
|
|
582
614
|
return {
|
|
583
615
|
_tag: "Failed",
|
|
584
|
-
error: new SmithersError("SCHEDULER_ERROR", "Exceeded scheduler decide() depth guard.", { depth }),
|
|
616
|
+
error: new SmithersError("SCHEDULER_ERROR", "Exceeded scheduler decide() depth guard.", { depth, maxDepth: maxDecideDepth }),
|
|
585
617
|
};
|
|
586
618
|
}
|
|
587
619
|
if (state.cancelled) {
|
package/src/scheduleTasks.js
CHANGED
|
@@ -398,8 +398,20 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
398
398
|
const status = inspect(child, {
|
|
399
399
|
includeContinuedFailures: true,
|
|
400
400
|
});
|
|
401
|
-
if (!status.terminal)
|
|
401
|
+
if (!status.terminal) {
|
|
402
|
+
// A failure already present in this still-running action
|
|
403
|
+
// subtree (e.g. a failed task in a <Parallel> whose sibling
|
|
404
|
+
// is still in flight) must be recorded as recoverable now.
|
|
405
|
+
// Otherwise decide()'s unhandled-failure check fails the run
|
|
406
|
+
// before the action region settles and the saga's
|
|
407
|
+
// compensation can run — an order-dependent bug that only
|
|
408
|
+
// bites when the failing task settles before its sibling.
|
|
409
|
+
const before = failureRecoveryKeys.size;
|
|
410
|
+
collectFailureKeys(child, { includeContinuedFailures: true });
|
|
411
|
+
if (failureRecoveryKeys.size > before)
|
|
412
|
+
failureRecoveryActive = true;
|
|
402
413
|
return walk(child);
|
|
414
|
+
}
|
|
403
415
|
if (status.failed) {
|
|
404
416
|
failed = true;
|
|
405
417
|
break;
|
|
@@ -448,8 +460,19 @@ export function scheduleTasks(plan, states, descriptors, ralphState, retryWait,
|
|
|
448
460
|
const status = inspect(child, {
|
|
449
461
|
includeContinuedFailures: true,
|
|
450
462
|
});
|
|
451
|
-
if (!status.terminal)
|
|
463
|
+
if (!status.terminal) {
|
|
464
|
+
// A failure already present in this still-running try child
|
|
465
|
+
// (e.g. a failed task in a <Parallel> whose sibling is still
|
|
466
|
+
// in flight) must be recorded as recoverable now, or decide()
|
|
467
|
+
// fails the run before the try region settles — skipping
|
|
468
|
+
// catch AND finally. Deferring here lets the region finish so
|
|
469
|
+
// catch/finally run regardless of which task settles first.
|
|
470
|
+
const before = failureRecoveryKeys.size;
|
|
471
|
+
collectFailureKeys(child, { includeContinuedFailures: true });
|
|
472
|
+
if (failureRecoveryKeys.size > before)
|
|
473
|
+
failureRecoveryActive = true;
|
|
452
474
|
return walk(child);
|
|
475
|
+
}
|
|
453
476
|
if (status.failed) {
|
|
454
477
|
tryFailed = true;
|
|
455
478
|
break;
|