npm - @blokjs/runner - Versions diffs - 0.4.0 → 0.6.0 - Mend

@blokjs/runner 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

package/dist/Blok.js +32 -3
package/dist/Blok.js.map +1 -1
package/dist/Configuration.d.ts +41 -5
package/dist/Configuration.js +215 -92
package/dist/Configuration.js.map +1 -1
package/dist/ForEachNode.d.ts +59 -0
package/dist/ForEachNode.js +522 -0
package/dist/ForEachNode.js.map +1 -0
package/dist/LoopMaxIterationsError.d.ts +11 -0
package/dist/LoopMaxIterationsError.js +18 -0
package/dist/LoopMaxIterationsError.js.map +1 -0
package/dist/LoopNode.d.ts +36 -0
package/dist/LoopNode.js +182 -0
package/dist/LoopNode.js.map +1 -0
package/dist/Runner.d.ts +11 -1
package/dist/Runner.js +9 -2
package/dist/Runner.js.map +1 -1
package/dist/RunnerSteps.js +419 -112
package/dist/RunnerSteps.js.map +1 -1
package/dist/RuntimeAdapterNode.d.ts +2 -1
package/dist/RuntimeAdapterNode.js +2 -2
package/dist/RuntimeAdapterNode.js.map +1 -1
package/dist/RuntimeRegistry.d.ts +23 -2
package/dist/RuntimeRegistry.js +31 -2
package/dist/RuntimeRegistry.js.map +1 -1
package/dist/SubworkflowNode.d.ts +106 -0
package/dist/SubworkflowNode.js +261 -3
package/dist/SubworkflowNode.js.map +1 -1
package/dist/SwitchNode.d.ts +37 -0
package/dist/SwitchNode.js +153 -0
package/dist/SwitchNode.js.map +1 -0
package/dist/TriggerBase.d.ts +50 -0
package/dist/TriggerBase.js +262 -4
package/dist/TriggerBase.js.map +1 -1
package/dist/TryCatchNode.d.ts +32 -0
package/dist/TryCatchNode.js +207 -0
package/dist/TryCatchNode.js.map +1 -0
package/dist/adapters/grpc/GrpcCodec.js +2 -2
package/dist/adapters/grpc/GrpcRuntimeAdapter.d.ts +6 -4
package/dist/adapters/grpc/GrpcRuntimeAdapter.js +6 -4
package/dist/adapters/grpc/GrpcRuntimeAdapter.js.map +1 -1
package/dist/adapters/grpc/types.d.ts +7 -5
package/dist/adapters/grpc/types.js.map +1 -1
package/dist/adapters/transport.d.ts +12 -41
package/dist/adapters/transport.js +21 -70
package/dist/adapters/transport.js.map +1 -1
package/dist/cache/NodeResultCache.js +7 -0
package/dist/cache/NodeResultCache.js.map +1 -1
package/dist/concurrency/NatsKvConcurrencyBackend.js +18 -5
package/dist/concurrency/NatsKvConcurrencyBackend.js.map +1 -1
package/dist/concurrency/RedisConcurrencyBackend.d.ts +64 -0
package/dist/concurrency/RedisConcurrencyBackend.js +374 -0
package/dist/concurrency/RedisConcurrencyBackend.js.map +1 -0
package/dist/concurrency/createConcurrencyBackend.d.ts +1 -0
package/dist/concurrency/createConcurrencyBackend.js +5 -1
package/dist/concurrency/createConcurrencyBackend.js.map +1 -1
package/dist/defineNode.d.ts +8 -0
package/dist/defineNode.js +25 -5
package/dist/defineNode.js.map +1 -1
package/dist/graphql/GraphQLSchemaGenerator.js +1 -1
package/dist/graphql/GraphQLSchemaGenerator.js.map +1 -1
package/dist/index.d.ts +10 -6
package/dist/index.js +13 -9
package/dist/index.js.map +1 -1
package/dist/marketplace/RuntimeCatalog.d.ts +6 -0
package/dist/marketplace/RuntimeCatalog.js.map +1 -1
package/dist/marketplace/RuntimeDiscovery.d.ts +2 -2
package/dist/marketplace/RuntimeDiscovery.js +18 -6
package/dist/marketplace/RuntimeDiscovery.js.map +1 -1
package/dist/monitoring/ConcurrencyMetrics.d.ts +26 -0
package/dist/monitoring/ConcurrencyMetrics.js +36 -4
package/dist/monitoring/ConcurrencyMetrics.js.map +1 -1
package/dist/monitoring/ForEachWaitMetrics.d.ts +22 -0
package/dist/monitoring/ForEachWaitMetrics.js +36 -0
package/dist/monitoring/ForEachWaitMetrics.js.map +1 -0
package/dist/openapi/OpenAPIGenerator.js +7 -2
package/dist/openapi/OpenAPIGenerator.js.map +1 -1
package/dist/runtime/PrimitiveStack.d.ts +64 -0
package/dist/runtime/PrimitiveStack.js +92 -0
package/dist/runtime/PrimitiveStack.js.map +1 -0
package/dist/scheduling/DebounceBackend.d.ts +108 -0
package/dist/scheduling/DebounceBackend.js +23 -0
package/dist/scheduling/DebounceBackend.js.map +1 -0
package/dist/scheduling/DebounceCoordinator.d.ts +65 -12
package/dist/scheduling/DebounceCoordinator.js +234 -13
package/dist/scheduling/DebounceCoordinator.js.map +1 -1
package/dist/scheduling/DeferredRunScheduler.d.ts +28 -0
package/dist/scheduling/DeferredRunScheduler.js +105 -3
package/dist/scheduling/DeferredRunScheduler.js.map +1 -1
package/dist/scheduling/NatsKvDebounceBackend.d.ts +53 -0
package/dist/scheduling/NatsKvDebounceBackend.js +334 -0
package/dist/scheduling/NatsKvDebounceBackend.js.map +1 -0
package/dist/scheduling/RedisDebounceBackend.d.ts +49 -0
package/dist/scheduling/RedisDebounceBackend.js +356 -0
package/dist/scheduling/RedisDebounceBackend.js.map +1 -0
package/dist/scheduling/createDebounceBackend.d.ts +25 -0
package/dist/scheduling/createDebounceBackend.js +39 -0
package/dist/scheduling/createDebounceBackend.js.map +1 -0
package/dist/security/AuditLogger.js +1 -1
package/dist/security/AuditLogger.js.map +1 -1
package/dist/security/AuthMiddleware.d.ts +19 -20
package/dist/security/AuthMiddleware.js +35 -20
package/dist/security/AuthMiddleware.js.map +1 -1
package/dist/security/OAuthProvider.js +2 -2
package/dist/security/OAuthProvider.js.map +1 -1
package/dist/security/SecretManager.js +14 -13
package/dist/security/SecretManager.js.map +1 -1
package/dist/security/index.d.ts +3 -1
package/dist/security/index.js +3 -1
package/dist/security/index.js.map +1 -1
package/dist/testing/TestHarness.d.ts +27 -12
package/dist/testing/TestHarness.js +19 -3
package/dist/testing/TestHarness.js.map +1 -1
package/dist/testing/WorkflowTestRunner.js +0 -7
package/dist/testing/WorkflowTestRunner.js.map +1 -1
package/dist/tracing/InMemoryRunStore.d.ts +14 -1
package/dist/tracing/InMemoryRunStore.js +95 -6
package/dist/tracing/InMemoryRunStore.js.map +1 -1
package/dist/tracing/PostgresRunStore.d.ts +28 -2
package/dist/tracing/PostgresRunStore.js +276 -3
package/dist/tracing/PostgresRunStore.js.map +1 -1
package/dist/tracing/RoutingDiagnostics.d.ts +55 -0
package/dist/tracing/RoutingDiagnostics.js +50 -0
package/dist/tracing/RoutingDiagnostics.js.map +1 -0
package/dist/tracing/RunStore.d.ts +82 -1
package/dist/tracing/RunTracker.d.ts +7 -1
package/dist/tracing/RunTracker.js +23 -0
package/dist/tracing/RunTracker.js.map +1 -1
package/dist/tracing/SqliteRunStore.d.ts +57 -2
package/dist/tracing/SqliteRunStore.js +408 -48
package/dist/tracing/SqliteRunStore.js.map +1 -1
package/dist/tracing/TraceRouter.js +380 -18
package/dist/tracing/TraceRouter.js.map +1 -1
package/dist/tracing/createStore.js +14 -3
package/dist/tracing/createStore.js.map +1 -1
package/dist/tracing/metadataFilter.d.ts +63 -0
package/dist/tracing/metadataFilter.js +224 -0
package/dist/tracing/metadataFilter.js.map +1 -0
package/dist/tracing/types.d.ts +331 -7
package/dist/utils/envAllowlist.d.ts +35 -0
package/dist/utils/envAllowlist.js +113 -0
package/dist/utils/envAllowlist.js.map +1 -0
package/dist/version/RuntimeVersionValidator.d.ts +38 -0
package/dist/version/RuntimeVersionValidator.js +121 -0
package/dist/version/RuntimeVersionValidator.js.map +1 -0
package/dist/visualization/WorkflowVisualizer.js +4 -4
package/dist/visualization/WorkflowVisualizer.js.map +1 -1
package/dist/workflow/PersistenceHelper.d.ts +18 -10
package/dist/workflow/PersistenceHelper.js +35 -9
package/dist/workflow/PersistenceHelper.js.map +1 -1
package/dist/workflow/WorkflowNormalizer.d.ts +19 -1
package/dist/workflow/WorkflowNormalizer.js +469 -19
package/dist/workflow/WorkflowNormalizer.js.map +1 -1
package/dist/workflow/WorkflowRegistry.d.ts +122 -0
package/dist/workflow/WorkflowRegistry.js +121 -0
package/dist/workflow/WorkflowRegistry.js.map +1 -1
package/dist/workflow/sampleBody.d.ts +54 -0
package/dist/workflow/sampleBody.js +320 -0
package/dist/workflow/sampleBody.js.map +1 -0
package/package.json +3 -8
package/dist/adapters/HttpRuntimeAdapter.d.ts +0 -79
package/dist/adapters/HttpRuntimeAdapter.js +0 -233
package/dist/adapters/HttpRuntimeAdapter.js.map +0 -1

package/dist/RunnerSteps.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { GlobalError } from "@blokjs/shared";
 import { RunCancelledError } from "./RunCancelledError";
 import { WaitDispatchRequest } from "./WaitDispatchRequest";
 import { resolveIdempotencyKey } from "./idempotency/resolveIdempotencyKey";
+import { getPrimitiveStack } from "./runtime/PrimitiveStack";
 import { StepTimeoutError } from "./timeouts/StepTimeoutError";
 import { RunTracker } from "./tracing/RunTracker";
 import { sanitize } from "./tracing/sanitize";
@@ -27,6 +28,59 @@ function computeBackoff(config, attempt) {
     const raw = min * factor ** Math.max(0, attempt - 1);
     return Math.min(max, Math.floor(raw));
 }
+/**
+ * Default cap on the JSON-serialized `ctx.state` snapshot taken before
+ * a `WaitDispatchRequest` throw. 1 MB matches the existing
+ * `BLOK_DISPATCH_PAYLOAD_MAX_BYTES` cap used by the durable scheduler
+ * for trigger payloads. Override per-deployment via the env var of the
+ * same name.
+ */
+const DEFAULT_STATE_SNAPSHOT_MAX_BYTES = 1_048_576;
+/**
+ * Serialize `ctx.state` for persistence in `workflow_runs.state_snapshot`
+ * (sqlite migration v11). Called immediately before the runner throws
+ * `WaitDispatchRequest`, so the snapshot reflects the canonical pre-wait
+ * state. Honors two ops env vars:
+ *
+ *  - `BLOK_STATE_SNAPSHOT_DISABLED=1` — kill-switch. Returns `undefined`
+ *    and the runner does NOT update the column. The wait still defers;
+ *    cross-process recovery just resumes with empty `ctx.state`. Use
+ *    this when state contains values that JSON.stringify can't round-
+ *    trip safely (Date, Map, BigInt, circular refs) and the author
+ *    accepts the limitation.
+ *  - `BLOK_STATE_SNAPSHOT_MAX_BYTES=<n>` — cap on the serialized blob
+ *    (default 1 MB). Above the cap, the helper logs a warning and
+ *    returns `undefined`. Same effect as the kill-switch for that one
+ *    run; subsequent runs with smaller state still snapshot.
+ *
+ * On JSON serialization failure (typed errors that bubble out of
+ * `JSON.stringify` — circular refs, BigInt, etc.), the helper logs a
+ * warning and returns `undefined`. The wait still defers — resumption
+ * for that specific run becomes best-effort, matching pre-v0.6
+ * behaviour for top-level waits across process restart.
+ */
+function serializeStateSnapshot(state, logger) {
+    if (process.env.BLOK_STATE_SNAPSHOT_DISABLED === "1")
+        return undefined;
+    const capRaw = process.env.BLOK_STATE_SNAPSHOT_MAX_BYTES;
+    const cap = capRaw ? Number(capRaw) : DEFAULT_STATE_SNAPSHOT_MAX_BYTES;
+    const effectiveCap = Number.isFinite(cap) && cap > 0 ? cap : DEFAULT_STATE_SNAPSHOT_MAX_BYTES;
+    let serialized;
+    try {
+        serialized = JSON.stringify(state ?? {});
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        logger.logLevel("warn", `[blok][wait] ctx.state snapshot failed to serialize: ${msg}. Wait will still defer; resumption is best-effort across process restart.`);
+        return undefined;
+    }
+    const size = Buffer.byteLength(serialized, "utf8");
+    if (size > effectiveCap) {
+        logger.logLevel("warn", `[blok][wait] ctx.state snapshot exceeds ${effectiveCap} bytes (got ${size}); skipping snapshot. Wait will still defer; resumption is best-effort. Reduce state size or raise BLOK_STATE_SNAPSHOT_MAX_BYTES.`);
+        return undefined;
+    }
+    return serialized;
+}
 function sleep(ms) {
     return new Promise((resolve) => {
         setTimeout(resolve, ms);
@@ -84,7 +138,26 @@ export default class RunnerSteps {
             // at runSteps entry. Default `-1` = no resume; runner starts
             // at i = 0.
             const persistedRun = !deep && tracker && traceRunId ? tracker.getStore().getRun(traceRunId) : undefined;
-            const resumeFromIndex = persistedRun?.lastCompletedStepIndex !== undefined ? persistedRun.lastCompletedStepIndex + 1 : 0;
+            // Two cursor sources:
+            //   - Top-level (deep === false): workflow_runs.lastCompletedStepIndex.
+            //   - Nested inside a primitive iterator (deep === true, v0.6
+            //     Phase 2): `_blokInnerResumeIndex` stamped on the child ctx
+            //     by ForEachNode.runIteration when resuming at a specific
+            //     inner step. Undefined = start at 0 (fresh iteration body).
+            const innerResumeIndexRaw = ctx._blokInnerResumeIndex;
+            const innerResumeIndex = typeof innerResumeIndexRaw === "number" ? innerResumeIndexRaw : undefined;
+            const resumeFromIndex = !deep
+                ? persistedRun?.lastCompletedStepIndex !== undefined
+                    ? persistedRun.lastCompletedStepIndex + 1
+                    : 0
+                : (innerResumeIndex ?? 0);
+            // Clear the sentinel so a re-runner started fresh from this
+            // childCtx (e.g. the nested branch flow path) doesn't inherit
+            // a stale resume hint. ForEachNode set it for THIS one re-entry
+            // only; it should not propagate further.
+            if (deep && innerResumeIndex !== undefined) {
+                ctx._blokInnerResumeIndex = undefined;
+            }
             for (let i = 0; i < steps.length; i++) {
                 const step = steps[i];
                 // PR 4 — skip pre-wait steps on resume. State + NodeRuns
@@ -103,6 +176,19 @@ export default class RunnerSteps {
                 if (ctx.signal?.aborted) {
                     throw new RunCancelledError(traceRunId);
                 }
+                // v0.6 Phase 4 — bump the TOP primitive frame's
+                // `innerStepIndex` to the current step. If a wait fires from
+                // inside this step (or anywhere deeper down the call stack),
+                // the wait-throw site walks the stack to persist each frame
+                // and needs the TOP frame's cursor to point at THIS step.
+                // `deep === true` is the only case where this can apply —
+                // the top-level runSteps doesn't have a frame.
+                if (deep) {
+                    const stack = getPrimitiveStack(ctx);
+                    if (stack.length > 0) {
+                        stack[stack.length - 1].cursor.innerStepIndex = i;
+                    }
+                }
                 if (!step.active) {
                     // Track skipped nodes
                     if (tracker && traceRunId) {
@@ -144,6 +230,13 @@ export default class RunnerSteps {
                         // (wait:true / default) in StepRail. Only meaningful
                         // for subworkflow steps; undefined elsewhere.
                         const subworkflowWait = stepType === "subworkflow" ? stepAny.wait : undefined;
+                        // G2 (v0.6) — capture the `dispatch` strategy so the
+                        // rail can mark http-self invocations with a small
+                        // `http` badge alongside the existing `↳ async`/`↳ sub`.
+                        // Normalize: unknown values + the default fall through
+                        // to `undefined` (rendered as in-process by Studio).
+                        const dispatchRaw = stepType === "subworkflow" ? stepAny.dispatch : undefined;
+                        const subworkflowDispatch = dispatchRaw === "http-self" || dispatchRaw === "in-process" ? dispatchRaw : undefined;
                         // PR 5 E3 — surface sub-workflow nesting depth.
                         // `_subworkflowDepth` on ctx is set by SubworkflowNode +
                         // createChildContext; the parent's invocation of a
@@ -152,6 +245,23 @@ export default class RunnerSteps {
                         const subworkflowDepth = stepType === "subworkflow"
                             ? (ctx._subworkflowDepth ?? 0) + 1
                             : undefined;
+                        // v0.5 middleware origin tagging — when the trigger's
+                        // `runMiddlewareChain` is dispatching a middleware
+                        // workflow on this ctx, it sets `_blokMiddlewareName`
+                        // to the middleware's name. Surface that here so
+                        // Studio's StepRail can render a `mw:<name>` origin
+                        // badge on every inner step the middleware produced.
+                        const middleware = ctx._blokMiddlewareName;
+                        // v0.5.3 — read the iteration sentinel set by ForEachNode +
+                        // LoopNode on per-iteration child ctxs. Lets Studio group
+                        // inner steps under "iteration N" headers in StepRail.
+                        // Inherited by nested runners (tryCatch, switch) inside
+                        // the same iteration — which is correct: their inner steps
+                        // belong to that iteration. A nested forEach inside an
+                        // outer iteration overrides the sentinel on its own child
+                        // ctx, so the inner-most iteration wins for its descendants.
+                        const iterationIndexRaw = ctx._blokIterationIndex;
+                        const iterationIndex = typeof iterationIndexRaw === "number" ? iterationIndexRaw : undefined;
                         const nodeRun = tracker.startNode(traceRunId, {
                             nodeName: step.name,
                             nodeType: stepType,
@@ -160,7 +270,10 @@ export default class RunnerSteps {
                             depth: depthLevel,
                             stepIndex: i,
                             wait: subworkflowWait,
+                            dispatch: subworkflowDispatch,
                             subworkflowDepth,
+                            middleware,
+                            iterationIndex,
                         });
                         nodeRunId = nodeRun.id;
                         ctx._traceNodeId = nodeRunId;
@@ -214,9 +327,21 @@ export default class RunnerSteps {
                         // scheduledAt (or it's from trigger-level delay); on
                         // re-entry from a wait dispatch, the run was marked
                         // `delayed` with scheduledAt set to the wait deadline.
+                        //
+                        // v0.6 Phase 4 — for deep (nested) runSteps, a primitive
+                        // (SwitchNode etc.) sets `_blokInnerResumeIndex` to the
+                        // resume target — including `0` when the wait is at the
+                        // first step of its sub-pipeline. The original
+                        // `resumeFromIndex > 0` guard prevented re-entry from
+                        // firing at index 0, but Phase 4 needs the index-0 case
+                        // (e.g., switch arm whose first step is the wait). For
+                        // deep runs we additionally require `innerResumeIndex`
+                        // to be defined — that's how we tell "this primitive
+                        // resumed here" vs "we're at index 0 because of a fresh
+                        // iteration that doesn't have a resume cursor".
                         const isReentry = ctx._blokDispatchReentry === true &&
-                            resumeFromIndex > 0 &&
-                            i === resumeFromIndex;
+                            i === resumeFromIndex &&
+                            (!deep ? resumeFromIndex > 0 : innerResumeIndex !== undefined);
                         const deadline = computeDeadline();
                         const now = Date.now();
                         if (isReentry || deadline <= now) {
@@ -228,21 +353,117 @@ export default class RunnerSteps {
                                 tracker.completeNode(nodeRunId, { __waited__: true, deadline });
                             }
                             ctx.logger.log(`[step ${i + 1}/${steps.length}] ${step.name} (wait) → satisfied`);
-                            // Advance the resume cursor so a subsequent wait at a
-                            // later index can rely on it.
-                            if (tracker && traceRunId) {
+                            // Advance the resume cursor at TOP-LEVEL only.
+                            // Nested satisfies (deep=true, v0.6 Phase 2 — wait
+                            // inside a forEach iteration body) must NOT
+                            // overwrite the workflow's resume cursor with the
+                            // inner step index — that would skip past the
+                            // primitive entirely on the next re-entry. The
+                            // primitive's own NodeRun.iteration_context tracks
+                            // progress for nested resumes.
+                            if (!deep && tracker && traceRunId) {
                                 tracker.getStore().updateRun(traceRunId, { lastCompletedStepIndex: i });
                             }
                             continue;
                         }
                         // First pass: schedule + throw WaitDispatchRequest.
                         // Set resume cursor BEFORE throwing so re-entry knows
-                        // where to pick up. Cursor = i - 1 (the last non-wait
-                        // step that completed).
+                        // where to pick up.
+                        //
+                        // Two cases for cursor placement:
+                        //   - Top-level wait (deep === false). Cursor = i - 1
+                        //     (the last non-wait outer step that completed).
+                        //     On re-entry, runSteps reads
+                        //     workflow_runs.lastCompletedStepIndex + 1 = i and
+                        //     starts the wait step which flips to "satisfied".
+                        //   - Nested wait inside a primitive (deep === true,
+                        //     v0.6 Phase 2). The wait fired from inside an
+                        //     iteration body of a forEach (or analogous future
+                        //     primitive). The OUTER runSteps wrote `i - 1` =
+                        //     forEach-step-index minus 1 *before* invoking
+                        //     forEach.process, so workflow_runs.lastCompleted-
+                        //     StepIndex still points at the OUTER cursor we
+                        //     want — DON'T overwrite it with the inner-i (that
+                        //     would skip the forEach entirely on resume).
+                        //     Instead, persist the iteration cursor on the
+                        //     forEach's NodeRun's `iteration_context` column.
+                        //     ForEachNode reads it on re-entry to resume the
+                        //     right iteration + inner step.
+                        //
+                        // v0.6 prerequisite for wait-inside-primitives Phase 2
+                        // — snapshot `ctx.state` regardless of nesting. Two
+                        // re-entry paths consume this snapshot:
+                        //   1. In-process timer fire (DeferredRunScheduler):
+                        //      same `ctx` is reused, state is already there;
+                        //      rehydrate at TriggerBase.run is a no-op.
+                        //   2. Cross-process recovery (recoverDispatches →
+                        //      restoreDispatch on boot): a fresh `ctx` is
+                        //      built from the persisted scheduled_dispatches
+                        //      row with empty `state`. Without the snapshot,
+                        //      Phase 2's iteration-state-persistence promise
+                        //      breaks across restart.
                         if (tracker && traceRunId) {
-                            tracker.getStore().updateRun(traceRunId, {
-                                lastCompletedStepIndex: i - 1,
-                            });
+                            const updates = {
+                                stateSnapshot: serializeStateSnapshot(ctx.state, ctx.logger),
+                            };
+                            if (!deep) {
+                                updates.lastCompletedStepIndex = i - 1;
+                            }
+                            tracker.getStore().updateRun(traceRunId, updates);
+                            // Phase 2/3 — write iteration_context to the active
+                            // primitive's NodeRun when nested. Reads sentinels
+                            // stamped by the primitive (ForEachNode in Phase 2,
+                            // LoopNode in Phase 3) on the parent ctx:
+                            //   - _blokActivePrimitiveNodeRunId: which NodeRun
+                            //     gets the cursor (set by RunnerSteps' outer
+                            //     iteration around the primitive's process()).
+                            //   - _blokForEachCurrentIteration: iteration index
+                            //     of the in-flight iteration.
+                            //   - _blokForEachPartialResults (Phase 2 only):
+                            //     accumulator for iterations [0..iteration-1]
+                            //     so the post-resume final result array covers
+                            //     all iterations. LoopNode doesn't aggregate
+                            //     results (it returns the last iteration's
+                            //     output), so it doesn't stamp this sentinel —
+                            //     the cursor stores `completedResults: []` and
+                            //     LoopNode ignores the field on resume.
+                            // v0.6 Phase 4 — walk the primitive stack and persist
+                            // each frame's cursor to its NodeRun. The TOP frame's
+                            // `innerStepIndex` is the wait step's position within
+                            // the deepest primitive's sub-pipeline; outer frames'
+                            // `innerStepIndex` values were set by their enclosing
+                            // runSteps' step-boundary write when control passed
+                            // into the deeper primitive. This is what lets
+                            // `forEach > forEach > wait`,
+                            // `switch > forEach > wait`, etc. all resume
+                            // correctly on re-entry.
+                            //
+                            // Each frame's `cursor` is owned by the primitive
+                            // (it stamps `iteration`/`caseIndex`/`completedResults`).
+                            // The runner's only responsibility here is to refresh
+                            // the TOP frame's `innerStepIndex` to `i` and
+                            // persist every frame.
+                            if (deep) {
+                                const stack = getPrimitiveStack(ctx);
+                                if (stack.length > 0) {
+                                    stack[stack.length - 1].cursor.innerStepIndex = i;
+                                    for (const frame of stack) {
+                                        // Skip parallel-forEach frames — the
+                                        // parallel branch in ForEachNode writes
+                                        // its own cursor (with cancelled set +
+                                        // completedResults) post-`Promise.allSettled`.
+                                        // Writing the placeholder here would let
+                                        // "error beats wait" classifications leak
+                                        // a parallel cursor onto the failed
+                                        // run's NodeRun.
+                                        if (frame.cursor.mode === "parallel")
+                                            continue;
+                                        tracker.getStore().updateNodeRun(frame.nodeRunId, {
+                                            iterationContext: frame.cursor,
+                                        });
+                                    }
+                                }
+                            }
                         }
                         ctx.logger.log(`[step ${i + 1}/${steps.length}] ${step.name} (wait) → scheduled (deadline=${new Date(deadline).toISOString()})`);
                         throw new WaitDispatchRequest({
@@ -295,114 +516,153 @@ export default class RunnerSteps {
                     // `30000` via `parseDuration`).
                     const maxDurationMs = step.maxDurationMs;
                     let attempt = 0;
-                    while (true) {
-                        attempt += 1;
-                        try {
-                            const processInvocation = () => step.process(ctx, step);
-                            const model = typeof maxDurationMs === "number" && maxDurationMs > 0
-                                ? await wrapWithTimeout(processInvocation, maxDurationMs, step.name)
-                                : await processInvocation();
-                            ctx.response = model.data;
-                            // Treat soft errors (data carries `.error`) the same as
-                            // thrown errors so retry semantics are uniform.
-                            if (ctx.response?.error) {
-                                throw ctx.response.error;
-                            }
-                            // === Tier 1: idempotency cache write ===
-                            // Cache on the success path only — failed steps are
-                            // re-runnable. Honour `idempotencyKeyTTL` per step;
-                            // default 24h. A TTL of 0 stores an immediately-
-                            // expired entry (useful as a kill-switch).
-                            if (cacheStore && resolvedIdemKey && nodeRunId && traceRunId) {
-                                const ttlField = step.idempotencyKeyTTL;
-                                const ttlMs = typeof ttlField === "number" ? ttlField : DEFAULT_IDEMPOTENCY_TTL_MS;
-                                const now = Date.now();
-                                const expiresAt = ttlMs > 0 ? now + ttlMs : now - 1;
-                                cacheStore.setIdempotencyCache(workflowName, step.name, resolvedIdemKey, {
-                                    data: model.data,
-                                    cachedAt: now,
-                                    expiresAt,
-                                    sourceRunId: traceRunId,
-                                    sourceNodeRunId: nodeRunId,
-                                });
-                            }
-                            const stepDuration = (performance.now() - stepStart).toFixed(1);
-                            // --- Trace: complete node ---
-                            if (tracker && nodeRunId) {
-                                // `_stepMetrics` is stashed on ctx by RuntimeAdapterNode
-                                // when an adapter returns metrics (gRPC wire bytes,
-                                // duration, cpu, memory). Threading it through
-                                // `completeNode` is what gets the metrics into the
-                                // run store + NODE_COMPLETED event payload — Studio's
-                                // inspector reads them from there.
-                                const ctxAny = ctx;
-                                const stepMetrics = ctxAny._stepMetrics;
-                                ctxAny._stepMetrics = undefined;
-                                tracker.completeNode(nodeRunId, sanitize(ctx.response.data), stepMetrics);
-                                // PR 4 — advance the resume cursor after each
-                                // successful non-wait step. A subsequent wait step
-                                // reads this value to set its own cursor before
-                                // throwing WaitDispatchRequest. Only at top-level
-                                // (deep=false); nested branch flow doesn't update.
-                                if (!deep && traceRunId) {
-                                    tracker.getStore().updateRun(traceRunId, { lastCompletedStepIndex: i });
+                    // v0.6 Phase 4 — the primitive stack on ctx is owned by
+                    // ForEachNode/LoopNode/SwitchNode (push on entry, pop in
+                    // finally). The Phase 2/3 single-slot
+                    // `_blokActivePrimitiveNodeRunId` mechanism is gone —
+                    // nested primitives each register their own frame, and
+                    // the wait-throw site walks the full stack. We keep
+                    // `isIteratingPrimitive` only as a hook for legacy
+                    // readers (none in core today) — wait-cursor writes no
+                    // longer depend on it.
+                    const isIteratingPrimitive = step.isPrimitiveIterator === true;
+                    try {
+                        while (true) {
+                            attempt += 1;
+                            try {
+                                const processInvocation = () => step.process(ctx, step);
+                                const model = typeof maxDurationMs === "number" && maxDurationMs > 0
+                                    ? await wrapWithTimeout(processInvocation, maxDurationMs, step.name)
+                                    : await processInvocation();
+                                ctx.response = model.data;
+                                // Treat soft errors (data carries `.error`) the same as
+                                // thrown errors so retry semantics are uniform.
+                                if (ctx.response?.error) {
+                                    throw ctx.response.error;
                                 }
-                            }
-                            const attemptSuffix = attempt > 1 ? ` after ${attempt} attempts` : "";
-                            ctx.logger.log(`${stepPrefix} → completed (${stepDuration}ms${attemptSuffix})`);
-                            break;
-                        }
-                        catch (nodeErr) {
-                            if (attempt < maxAttempts && retryConfig) {
-                                // More attempts remain — record this as a soft
-                                // failure and back off before retrying. The node
-                                // stays in `running` status; failNode is the
-                                // terminal call.
+                                // === Tier 1: idempotency cache write ===
+                                // Cache on the success path only — failed steps are
+                                // re-runnable. Honour `idempotencyKeyTTL` per step;
+                                // default 24h. A TTL of 0 stores an immediately-
+                                // expired entry (useful as a kill-switch).
+                                if (cacheStore && resolvedIdemKey && nodeRunId && traceRunId) {
+                                    const ttlField = step.idempotencyKeyTTL;
+                                    const ttlMs = typeof ttlField === "number" ? ttlField : DEFAULT_IDEMPOTENCY_TTL_MS;
+                                    const now = Date.now();
+                                    const expiresAt = ttlMs > 0 ? now + ttlMs : now - 1;
+                                    cacheStore.setIdempotencyCache(workflowName, step.name, resolvedIdemKey, {
+                                        data: model.data,
+                                        cachedAt: now,
+                                        expiresAt,
+                                        sourceRunId: traceRunId,
+                                        sourceNodeRunId: nodeRunId,
+                                    });
+                                }
+                                const stepDuration = (performance.now() - stepStart).toFixed(1);
+                                // --- Trace: complete node ---
                                 if (tracker && nodeRunId) {
-                                    tracker.recordNodeAttemptFailed(nodeRunId, { attempt, error: nodeErr });
+                                    // `_stepMetrics` is stashed on ctx by RuntimeAdapterNode
+                                    // when an adapter returns metrics (gRPC wire bytes,
+                                    // duration, cpu, memory). Threading it through
+                                    // `completeNode` is what gets the metrics into the
+                                    // run store + NODE_COMPLETED event payload — Studio's
+                                    // inspector reads them from there.
+                                    const ctxAny = ctx;
+                                    const stepMetrics = ctxAny._stepMetrics;
+                                    ctxAny._stepMetrics = undefined;
+                                    tracker.completeNode(nodeRunId, sanitize(ctx.response.data), stepMetrics);
+                                    // PR 4 — advance the resume cursor after each
+                                    // successful non-wait step. A subsequent wait step
+                                    // reads this value to set its own cursor before
+                                    // throwing WaitDispatchRequest. Only at top-level
+                                    // (deep=false); nested branch flow doesn't update.
+                                    if (!deep && traceRunId) {
+                                        tracker.getStore().updateRun(traceRunId, { lastCompletedStepIndex: i });
+                                    }
                                 }
-                                const backoffMs = computeBackoff(retryConfig, attempt);
-                                const errMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
-                                ctx.logger.log(`${stepPrefix} → attempt ${attempt}/${maxAttempts} failed (${errMsg}), retrying in ${backoffMs}ms`);
-                                await sleep(backoffMs);
-                                continue;
+                                const attemptSuffix = attempt > 1 ? ` after ${attempt} attempts` : "";
+                                ctx.logger.log(`${stepPrefix} → completed (${stepDuration}ms${attemptSuffix})`);
+                                break;
                             }
-                            // Final attempt — fail the node and propagate the
-                            // enriched error so RunnerSteps' outer catch can
-                            // wrap it as a GlobalError.
-                            if (tracker && nodeRunId) {
-                                const existing = tracker.getNodeRun(nodeRunId);
-                                if (existing && existing.status === "running") {
-                                    tracker.failNode(nodeRunId, nodeErr instanceof Error ? nodeErr : new Error(String(nodeErr)));
+                            catch (nodeErr) {
+                                // v0.5.3 — control-flow signals from a step's run()
+                                // must NOT be retried OR wrapped as enriched errors.
+                                // In the production wait path, RunnerSteps throws
+                                // WaitDispatchRequest from outside this retry loop, so
+                                // this branch is normally inert. But if a custom node
+                                // ever throws a wait/cancel signal from inside its
+                                // process()/run(), preserve the type so the outer
+                                // catch + TryCatchNode pass-through still recognise
+                                // it. Same rationale as the outer-catch instanceof
+                                // guards at line ~498.
+                                if (nodeErr instanceof WaitDispatchRequest || nodeErr instanceof RunCancelledError) {
+                                    throw nodeErr;
                                 }
+                                if (attempt < maxAttempts && retryConfig) {
+                                    // More attempts remain — record this as a soft
+                                    // failure and back off before retrying. The node
+                                    // stays in `running` status; failNode is the
+                                    // terminal call.
+                                    if (tracker && nodeRunId) {
+                                        tracker.recordNodeAttemptFailed(nodeRunId, { attempt, error: nodeErr });
+                                    }
+                                    const backoffMs = computeBackoff(retryConfig, attempt);
+                                    const errMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
+                                    ctx.logger.log(`${stepPrefix} → attempt ${attempt}/${maxAttempts} failed (${errMsg}), retrying in ${backoffMs}ms`);
+                                    await sleep(backoffMs);
+                                    continue;
+                                }
+                                // Final attempt — fail the node and propagate the
+                                // enriched error so RunnerSteps' outer catch can
+                                // wrap it as a GlobalError.
+                                if (tracker && nodeRunId) {
+                                    const existing = tracker.getNodeRun(nodeRunId);
+                                    if (existing && existing.status === "running") {
+                                        tracker.failNode(nodeRunId, nodeErr instanceof Error ? nodeErr : new Error(String(nodeErr)));
+                                    }
+                                }
+                                // Tier 2 quick-wins — final-attempt timeout flips
+                                // the run to "timedOut" (distinct from "failed").
+                                // Only when the FINAL error was a StepTimeoutError;
+                                // mixed failures (some retries timed out, final
+                                // retry threw a different error) keep the normal
+                                // "failed" status.
+                                if (tracker &&
+                                    traceRunId &&
+                                    typeof maxDurationMs === "number" &&
+                                    maxDurationMs > 0 &&
+                                    nodeErr instanceof StepTimeoutError) {
+                                    tracker.markRunTimedOut(traceRunId, {
+                                        stepId: step.name,
+                                        maxDurationMs,
+                                        attemptsExhausted: attempt,
+                                    });
+                                }
+                                const stepDuration = (performance.now() - stepStart).toFixed(1);
+                                const attemptSuffix = attempt > 1 ? ` after ${attempt} attempts` : "";
+                                ctx.logger.log(`${stepPrefix} → FAILED (${stepDuration}ms${attemptSuffix})`);
+                                // Enrich error with step context so developers know which step failed.
+                                // Attach `_blokStepId` directly on the wrap so TryCatchNode's
+                                // envelope construction can surface `$.error.stepId` to authors
+                                // without parsing the prefix back out of the message string.
+                                const originalMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
+                                const enrichedError = new Error(`${stepPrefix} failed: ${originalMsg}`);
+                                const enrichedAny = enrichedError;
+                                enrichedAny.cause = nodeErr;
+                                enrichedAny._blokStepId = step.name;
+                                throw enrichedError;
                             }
-                            // Tier 2 quick-wins — final-attempt timeout flips
-                            // the run to "timedOut" (distinct from "failed").
-                            // Only when the FINAL error was a StepTimeoutError;
-                            // mixed failures (some retries timed out, final
-                            // retry threw a different error) keep the normal
-                            // "failed" status.
-                            if (tracker &&
-                                traceRunId &&
-                                typeof maxDurationMs === "number" &&
-                                maxDurationMs > 0 &&
-                                nodeErr instanceof StepTimeoutError) {
-                                tracker.markRunTimedOut(traceRunId, {
-                                    stepId: step.name,
-                                    maxDurationMs,
-                                    attemptsExhausted: attempt,
-                                });
-                            }
-                            const stepDuration = (performance.now() - stepStart).toFixed(1);
-                            const attemptSuffix = attempt > 1 ? ` after ${attempt} attempts` : "";
-                            ctx.logger.log(`${stepPrefix} → FAILED (${stepDuration}ms${attemptSuffix})`);
-                            // Enrich error with step context so developers know which step failed
-                            const originalMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
-                            const enrichedError = new Error(`${stepPrefix} failed: ${originalMsg}`);
-                            enrichedError.cause = nodeErr;
-                            throw enrichedError;
                         }
                     }
+                    finally {
+                        // v0.6 Phase 4 — primitives own their stack frame
+                        // lifecycle now (push on entry, pop in finally), so
+                        // there's nothing to restore here. The
+                        // `isIteratingPrimitive` flag stays in the type
+                        // system for documentation but no longer drives
+                        // cursor accounting.
+                        void isIteratingPrimitive;
+                    }
                 }
                 else {
                     stepName = step.name;
@@ -435,12 +695,59 @@ export default class RunnerSteps {
             if (e instanceof WaitDispatchRequest) {
                 throw e;
             }
+            // Capture the step-enrichment wrap's `_blokStepId` BEFORE we
+            // unwrap past it. The wrap is the outermost layer (set inside
+            // the inner-try retry loop above); after unwrapping to the inner
+            // GlobalError this metadata would otherwise be lost. Surfaces to
+            // authors as `$.error.stepId` inside tryCatch.catch arms.
+            const wrapStepId = typeof e === "object" && e !== null && "_blokStepId" in e
+                ? e._blokStepId
+                : undefined;
             let error_context = {};
             if (e instanceof GlobalError) {
                 error_context = e;
             }
             else {
-                error_context = new GlobalError(e.message);
+                // Walk the `.cause` chain looking for a GlobalError. The
+                // step-enrichment wrap at line ~465 sets `cause = nodeErr`,
+                // and `nodeErr` may itself be a GlobalError thrown from
+                // `defineNode`-built nodes (e.g. `@blokjs/throw` setting
+                // `code: 401` for an auth-check middleware). Without this
+                // walk, the outer wrap below would force the framework's
+                // generic `[step N/M] X failed: ...` message + default 500
+                // code, clobbering the author's structured rejection.
+                let inner = e;
+                let foundGlobal = null;
+                while (typeof inner === "object" &&
+                    inner !== null &&
+                    "cause" in inner &&
+                    inner.cause !== undefined &&
+                    inner.cause !== inner) {
+                    inner = inner.cause;
+                    if (inner instanceof GlobalError) {
+                        foundGlobal = inner;
+                        break;
+                    }
+                }
+                if (foundGlobal) {
+                    error_context = foundGlobal;
+                }
+                else {
+                    error_context = new GlobalError(e.message);
+                    // Preserve the original error chain so outer handlers
+                    // (notably v0.5 TryCatchNode's `$.error.message` resolution)
+                    // can peel back through `.cause` to the author's original
+                    // `throw new Error("...")` text instead of the runner's
+                    // `[step N/M] <name> failed: ...` enriched prefix.
+                    error_context.cause = e;
+                }
+            }
+            // Stamp the wrap's stepId on the unwrapped error so TryCatchNode's
+            // `toErrorEnvelope` walk can surface it as `$.error.stepId`. The
+            // inner-try wrap layer is gone by this point; this is the only
+            // place where the runner can identify which sub-step failed.
+            if (typeof wrapStepId === "string" && wrapStepId.length > 0) {
+                error_context._blokStepId = wrapStepId;
             }
             throw error_context;
         }