npm - @smithers-orchestrator/scheduler - Versions diffs - 0.23.0 → 0.24.2 - Mend

@smithers-orchestrator/scheduler 0.23.0 → 0.24.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +3 -3
package/src/WorkflowSessionOptions.ts +28 -0
package/src/index.d.ts +9 -0
package/src/makeWorkflowSession.js +159 -47

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@smithers-orchestrator/scheduler",
-  "version": "0.23.0",
+  "version": "0.24.2",
   "description": "Pure decision engine: session, scheduler, and task state management for Smithers workflows",
   "type": "module",
   "sideEffects": false,
@@ -176,8 +176,8 @@
   ],
   "dependencies": {
     "effect": "^3.21.1",
-    "@smithers-orchestrator/errors": "0.23.0",
-    "@smithers-orchestrator/graph": "0.23.0"
+    "@smithers-orchestrator/errors": "0.24.2",
+    "@smithers-orchestrator/graph": "0.24.2"
   },
   "devDependencies": {
     "@types/bun": "latest",

package/src/WorkflowSessionOptions.ts CHANGED Viewed

@@ -1,3 +1,13 @@
+import type { TaskDescriptor } from "@smithers-orchestrator/graph/TaskDescriptor";
+/** A breached Aspects budget for a task that is about to be dispatched. */
+export type AspectBudgetBreach = {
+  readonly kind: "tokens" | "latency";
+  readonly limit: number;
+  readonly current: number;
+  readonly onExceeded: "fail" | "warn" | "skip-remaining";
+};
 export type WorkflowSessionOptions = {
   readonly runId?: string;
   readonly nowMs?: () => number;
@@ -7,4 +17,22 @@ export type WorkflowSessionOptions = {
     readonly iteration: number;
     readonly done: boolean;
   }>;
+  /**
+   * Evaluate a runnable task's Aspects budgets against the run's accumulated
+   * usage. Return the first breach, or `null`/`undefined` when within budget.
+   * Only invoked for tasks that would otherwise execute.
+   */
+  readonly evaluateAspectBudget?: (
+    descriptor: TaskDescriptor,
+  ) => AspectBudgetBreach | null | undefined;
+  /** Called when a task is skipped because its budget was exceeded (`skip-remaining`). */
+  readonly onAspectBudgetSkip?: (
+    descriptor: TaskDescriptor,
+    breach: AspectBudgetBreach,
+  ) => void;
+  /** Called when a task continues despite an exceeded budget (`warn`). */
+  readonly onAspectBudgetWarn?: (
+    descriptor: TaskDescriptor,
+    breach: AspectBudgetBreach,
+  ) => void;
 };

package/src/index.d.ts CHANGED Viewed

@@ -199,6 +199,12 @@ type WorkflowSessionService$2 = {
     readonly getCurrentGraph: () => Effect.Effect<WorkflowGraph | null>;
 };
+type AspectBudgetBreach$1 = {
+    readonly kind: "tokens" | "latency";
+    readonly limit: number;
+    readonly current: number;
+    readonly onExceeded: "fail" | "warn" | "skip-remaining";
+};
 type WorkflowSessionOptions$2 = {
     readonly runId?: string;
     readonly nowMs?: () => number;
@@ -208,6 +214,9 @@ type WorkflowSessionOptions$2 = {
         readonly iteration: number;
         readonly done: boolean;
     }>;
+    readonly evaluateAspectBudget?: (descriptor: TaskDescriptor$3) => AspectBudgetBreach$1 | null | undefined;
+    readonly onAspectBudgetSkip?: (descriptor: TaskDescriptor$3, breach: AspectBudgetBreach$1) => void;
+    readonly onAspectBudgetWarn?: (descriptor: TaskDescriptor$3, breach: AspectBudgetBreach$1) => void;
 };
 type TaskRecord$1 = {

package/src/makeWorkflowSession.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { toSmithersError } from "@smithers-orchestrator/errors/toSmithersError";
 import { buildPlanTree } from "./buildPlanTree.js";
 import { buildStateKey } from "./buildStateKey.js";
 import { cloneTaskStateMap } from "./cloneTaskStateMap.js";
+import { computeRetryDelayMs } from "./computeRetryDelayMs.js";
 import { parseStateKey } from "./parseStateKey.js";
 import { scheduleTasks } from "./scheduleTasks.js";
 /** @typedef {import("./ApprovalResolution.ts").ApprovalResolution} ApprovalResolution */
@@ -149,27 +150,6 @@ function parseDurationMs(value) {
             return amount;
     }
 }
-/**
- * @param {TaskDescriptor} descriptor
- * @param {number} failureCount
- * @returns {number}
- */
-function retryDelayMs(descriptor, failureCount) {
-    const policy = descriptor.retryPolicy;
-    if (!policy)
-        return 0;
-    const initial = policy.initialDelayMs ?? 0;
-    if (policy.backoff === "exponential") {
-        const multiplier = policy.multiplier ?? 2;
-        const computed = initial * Math.pow(multiplier, Math.max(0, failureCount - 1));
-        return Math.min(policy.maxDelayMs ?? computed, computed);
-    }
-    if (policy.backoff === "linear") {
-        const computed = initial * Math.max(1, failureCount);
-        return Math.min(policy.maxDelayMs ?? computed, computed);
-    }
-    return initial;
-}
 /**
  * @param {TaskDescriptor} descriptor
  * @param {unknown} error
@@ -199,6 +179,67 @@ function isRetryableFailure(descriptor, error) {
     }
     return true;
 }
+/**
+ * @param {unknown} error
+ * @returns {boolean}
+ */
+function isTransientSessionFailure(error) {
+    const normalized = toSmithersError(error);
+    const code = error && typeof error === "object" && typeof error.code === "string"
+        ? error.code
+        : normalized.code;
+    return code === "SESSION_ERROR" ||
+        code === "TASK_TIMEOUT" ||
+        code === "TASK_HEARTBEAT_TIMEOUT" ||
+        code === "TASK_ABORTED" ||
+        normalized.details?.failureRetryable === true;
+}
+/**
+ * Build a human-readable diagnostic for a dependency deadlock: pending tasks
+ * that can never run because their `dependsOn` edges point at tasks missing from
+ * the graph or themselves permanently blocked. The most common cause is a
+ * `deps`/`needs` mismatch — a `deps={{ key: ... }}` whose key is not the upstream
+ * task's id and was not remapped with `needs={{ key: '<id>' }}`, which the Task
+ * component (deriveDepNodeIds) turns into a dependency on a non-existent node id.
+ * @param {SessionState} state
+ * @returns {string}
+ */
+function describeDeadlock(state) {
+    const blocked = [];
+    let sawMissing = false;
+    for (const descriptor of state.descriptors.values()) {
+        const taskState = state.states.get(stateKeyFor(descriptor)) ?? "pending";
+        if (taskState !== "pending" && taskState !== "cancelled")
+            continue;
+        const unmet = [];
+        for (const depId of descriptor.dependsOn ?? []) {
+            const dep = state.descriptors.get(depId);
+            if (!dep) {
+                sawMissing = true;
+                unmet.push(`'${depId}' (no such task)`);
+            }
+            else {
+                const depState = state.states.get(stateKeyFor(dep)) ?? "pending";
+                unmet.push(`'${depId}' (${depState})`);
+            }
+        }
+        if (unmet.length > 0) {
+            blocked.push(`  - '${descriptor.nodeId}' is blocked on ${unmet.join(", ")}`);
+        }
+    }
+    const lines = [
+        "Workflow deadlocked: no task can run, and none is waiting on an approval, event, timer, or retry.",
+    ];
+    if (blocked.length > 0) {
+        lines.push("Pending tasks and their unsatisfied dependencies:", ...blocked);
+    }
+    if (sawMissing) {
+        lines.push("", "A dependency marked '(no such task)' references a node id that is not a mounted task. " +
+            "If it came from deps={{ <key>: ... }}, the key is treated as the upstream task's id unless you remap it: " +
+            "add needs={{ <key>: '<upstream task id>' }} (or rename the upstream task to match the key).");
+    }
+    return lines.join("\n");
+}
 /**
  * @param {unknown} error
  * @param {string} label
@@ -224,6 +265,7 @@ export function makeWorkflowSession(options = {}) {
         states: new Map(),
         outputs: new Map(),
         failures: new Map(),
+        failureDescriptors: new Map(),
         retryCounts: new Map(),
         retryWait: new Map(),
         approvals: new Set(),
@@ -231,6 +273,7 @@ export function makeWorkflowSession(options = {}) {
         schedule: null,
         cancelled: false,
         lastMountedSignature: null,
+        lastDeadlockSignature: null,
     };
     /**
    * @param {Pick<TaskOutput, "nodeId" | "iteration">} output
@@ -288,6 +331,7 @@ export function makeWorkflowSession(options = {}) {
                 state.retryWait.delete(key);
                 state.approvals.delete(key);
                 state.retryCounts.delete(key);
+                state.failureDescriptors.delete(key);
             }
         }
         for (const ralph of ralphs) {
@@ -317,6 +361,7 @@ export function makeWorkflowSession(options = {}) {
         state.states.set(key, "finished");
         state.outputs.set(key, output);
         state.retryWait.delete(key);
+        state.failureDescriptors.delete(key);
     }
     /**
    * @param {number} [iteration]
@@ -367,7 +412,7 @@ export function makeWorkflowSession(options = {}) {
         const canRetry = retryable &&
             (descriptor.retries === Infinity || failureCount <= descriptor.retries);
         if (canRetry) {
-            const delay = retryDelayMs(descriptor, failureCount);
+            const delay = computeRetryDelayMs(descriptor.retryPolicy, failureCount);
             state.states.set(key, "pending");
             if (delay > 0) {
                 state.retryWait.set(key, nowMs() + delay);
@@ -379,6 +424,7 @@ export function makeWorkflowSession(options = {}) {
         }
         state.states.set(key, "failed");
         state.failures.set(key, error);
+        state.failureDescriptors.set(key, descriptor);
         return decide();
     }
     /**
@@ -387,11 +433,15 @@ export function makeWorkflowSession(options = {}) {
     function unhandledFailureDecision(recoveryKeys = new Set()) {
         for (const [key, taskState] of state.states) {
             const parsed = parseStateKey(key);
-            const descriptor = findDescriptor(state, parsed.nodeId, parsed.iteration);
+            const descriptor = findDescriptor(state, parsed.nodeId, parsed.iteration) ??
+                state.failureDescriptors.get(key);
             if (taskState === "failed" && !descriptor?.continueOnFail) {
                 if (recoveryKeys.has(key)) {
                     continue;
                 }
+                if (descriptor?.agent && isTransientSessionFailure(state.failures.get(key))) {
+                    continue;
+                }
                 return {
                     _tag: "Failed",
                     error: new SmithersError("SESSION_ERROR", `Task failed: ${descriptor?.nodeId ?? key}`, { key }, state.failures.get(key)),
@@ -480,6 +530,28 @@ export function makeWorkflowSession(options = {}) {
                 changed = true;
                 continue;
             }
+            const budgetBreach = options.evaluateAspectBudget?.(task);
+            if (budgetBreach) {
+                if (budgetBreach.onExceeded === "skip-remaining") {
+                    options.onAspectBudgetSkip?.(task, budgetBreach);
+                    state.states.set(key, "skipped");
+                    changed = true;
+                    continue;
+                }
+                if (budgetBreach.onExceeded === "warn") {
+                    options.onAspectBudgetWarn?.(task, budgetBreach);
+                }
+                else {
+                    return {
+                        _tag: "Failed",
+                        error: new SmithersError("ASPECT_BUDGET_EXCEEDED", `Aspects ${budgetBreach.kind} budget exceeded for task "${task.nodeId}": ${budgetBreach.current} >= ${budgetBreach.limit}`, {
+                            kind: budgetBreach.kind,
+                            limit: budgetBreach.limit,
+                            current: budgetBreach.current,
+                        }),
+                    };
+                }
+            }
             state.states.set(key, "in-progress");
             executable.push(task);
             changed = true;
@@ -497,26 +569,17 @@ export function makeWorkflowSession(options = {}) {
         if (existingWait) {
             return { _tag: "Wait", reason: existingWait };
         }
-        if (schedule.pendingExists) {
-            if (schedule.nextRetryAtMs != null) {
-                return {
-                    _tag: "Wait",
-                    reason: {
-                        _tag: "RetryBackoff",
-                        waitMs: Math.max(0, schedule.nextRetryAtMs - nowMs()),
-                    },
-                };
-            }
-            return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
-        }
-        if ([...state.states.values()].some((taskState) => taskState === "in-progress")) {
-            return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
-        }
-        failure = unhandledFailureDecision(recoveryKeys);
-        if (failure) {
-            return failure;
-        }
-        if (schedule.readyRalphs.length > 0) {
+        if (schedule.readyRalphs.length > 0 && !unhandledFailureDecision(recoveryKeys)) {
+            // A ralph is ready only when every task in its own subtree is
+            // terminal, so pending or in-flight work elsewhere in the graph must
+            // not starve its next iteration (#267). Run-level continue-as-new
+            // handoffs stay quiescence-only: tearing down the run while sibling
+            // tasks are mid-flight is not safe, so those ralphs are deferred.
+            // An unhandled task failure keeps its precedence over further loop
+            // iterations (decide() already returns it at the top; this guard
+            // makes the ordering explicit).
+            const hasInProgress = [...state.states.values()].some((taskState) => taskState === "in-progress");
+            let advanced = false;
             for (const ralph of schedule.readyRalphs) {
                 const current = state.ralphState.get(ralph.id) ?? {
                     iteration: 0,
@@ -524,6 +587,7 @@ export function makeWorkflowSession(options = {}) {
                 };
                 if (ralph.until) {
                     state.ralphState.set(ralph.id, { ...current, done: true });
+                    advanced = true;
                     continue;
                 }
                 const nextIteration = current.iteration + 1;
@@ -535,13 +599,18 @@ export function makeWorkflowSession(options = {}) {
                         };
                     }
                     state.ralphState.set(ralph.id, { iteration: current.iteration, done: true });
+                    advanced = true;
                     continue;
                 }
-                state.ralphState.set(ralph.id, { iteration: nextIteration, done: false });
-                if (ralph.continueAsNewEvery != null &&
+                const wantsContinueAsNew = ralph.continueAsNewEvery != null &&
                     ralph.continueAsNewEvery > 0 &&
                     nextIteration > 0 &&
-                    nextIteration % ralph.continueAsNewEvery === 0) {
+                    nextIteration % ralph.continueAsNewEvery === 0;
+                if (wantsContinueAsNew && (hasInProgress || schedule.pendingExists)) {
+                    continue;
+                }
+                state.ralphState.set(ralph.id, { iteration: nextIteration, done: false });
+                if (wantsContinueAsNew) {
                     return {
                         _tag: "ContinueAsNew",
                         transition: {
@@ -551,8 +620,51 @@ export function makeWorkflowSession(options = {}) {
                         },
                     };
                 }
+                advanced = true;
+            }
+            if (advanced) {
+                return { _tag: "ReRender", context: renderContext(state) };
+            }
+        }
+        if (schedule.pendingExists) {
+            if (schedule.nextRetryAtMs != null) {
+                return {
+                    _tag: "Wait",
+                    reason: {
+                        _tag: "RetryBackoff",
+                        waitMs: Math.max(0, schedule.nextRetryAtMs - nowMs()),
+                    },
+                };
+            }
+            // Nothing is runnable, in flight, or waiting on an approval, event, or
+            // timer, yet tasks remain pending. They are blocked on dependencies
+            // nothing will ever satisfy — most often a deps/needs key that maps to
+            // a node id no task produces, which becomes a dependsOn on a missing
+            // node. Returning Wait here suspends the run forever with no error.
+            // Give a reactive re-render one chance to mount a producer (the mounted
+            // signature changes), then fail loudly with a diagnostic.
+            const noInProgress = ![...state.states.values()].some((taskState) => taskState === "in-progress");
+            if (noInProgress) {
+                if (options.requireStableFinish && state.graph) {
+                    const signature = mountedSignature(state.graph);
+                    if (state.lastDeadlockSignature !== signature) {
+                        state.lastDeadlockSignature = signature;
+                        return { _tag: "ReRender", context: renderContext(state) };
+                    }
+                }
+                return {
+                    _tag: "Failed",
+                    error: new SmithersError("DEPENDENCY_DEADLOCK", describeDeadlock(state)),
+                };
             }
-            return { _tag: "ReRender", context: renderContext(state) };
+            return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
+        }
+        if ([...state.states.values()].some((taskState) => taskState === "in-progress")) {
+            return { _tag: "Wait", reason: { _tag: "ExternalTrigger" } };
+        }
+        failure = unhandledFailureDecision(recoveryKeys);
+        if (failure) {
+            return failure;
         }
         if (options.requireStableFinish && state.graph) {
             const signature = mountedSignature(state.graph);