@blokjs/runner 0.2.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/dist/Blok.js +32 -3
  2. package/dist/Blok.js.map +1 -1
  3. package/dist/Configuration.d.ts +59 -5
  4. package/dist/Configuration.js +366 -96
  5. package/dist/Configuration.js.map +1 -1
  6. package/dist/ForEachNode.d.ts +59 -0
  7. package/dist/ForEachNode.js +522 -0
  8. package/dist/ForEachNode.js.map +1 -0
  9. package/dist/LoopMaxIterationsError.d.ts +11 -0
  10. package/dist/LoopMaxIterationsError.js +18 -0
  11. package/dist/LoopMaxIterationsError.js.map +1 -0
  12. package/dist/LoopNode.d.ts +36 -0
  13. package/dist/LoopNode.js +182 -0
  14. package/dist/LoopNode.js.map +1 -0
  15. package/dist/PayloadTooLargeError.d.ts +19 -0
  16. package/dist/PayloadTooLargeError.js +29 -0
  17. package/dist/PayloadTooLargeError.js.map +1 -0
  18. package/dist/RunCancelledError.d.ts +17 -0
  19. package/dist/RunCancelledError.js +25 -0
  20. package/dist/RunCancelledError.js.map +1 -0
  21. package/dist/Runner.d.ts +11 -1
  22. package/dist/Runner.js +9 -2
  23. package/dist/Runner.js.map +1 -1
  24. package/dist/RunnerSteps.js +648 -44
  25. package/dist/RunnerSteps.js.map +1 -1
  26. package/dist/RuntimeAdapterNode.d.ts +2 -1
  27. package/dist/RuntimeAdapterNode.js +2 -2
  28. package/dist/RuntimeAdapterNode.js.map +1 -1
  29. package/dist/RuntimeRegistry.d.ts +23 -2
  30. package/dist/RuntimeRegistry.js +31 -2
  31. package/dist/RuntimeRegistry.js.map +1 -1
  32. package/dist/SubworkflowNode.d.ts +181 -0
  33. package/dist/SubworkflowNode.js +479 -0
  34. package/dist/SubworkflowNode.js.map +1 -0
  35. package/dist/SwitchNode.d.ts +37 -0
  36. package/dist/SwitchNode.js +153 -0
  37. package/dist/SwitchNode.js.map +1 -0
  38. package/dist/TriggerBase.d.ts +178 -0
  39. package/dist/TriggerBase.js +1032 -5
  40. package/dist/TriggerBase.js.map +1 -1
  41. package/dist/TryCatchNode.d.ts +32 -0
  42. package/dist/TryCatchNode.js +207 -0
  43. package/dist/TryCatchNode.js.map +1 -0
  44. package/dist/WaitDispatchRequest.d.ts +38 -0
  45. package/dist/WaitDispatchRequest.js +13 -0
  46. package/dist/WaitDispatchRequest.js.map +1 -0
  47. package/dist/WaitNode.d.ts +23 -0
  48. package/dist/WaitNode.js +26 -0
  49. package/dist/WaitNode.js.map +1 -0
  50. package/dist/adapters/grpc/GrpcCodec.js +2 -2
  51. package/dist/adapters/grpc/GrpcRuntimeAdapter.d.ts +6 -4
  52. package/dist/adapters/grpc/GrpcRuntimeAdapter.js +6 -4
  53. package/dist/adapters/grpc/GrpcRuntimeAdapter.js.map +1 -1
  54. package/dist/adapters/grpc/types.d.ts +7 -5
  55. package/dist/adapters/grpc/types.js.map +1 -1
  56. package/dist/adapters/transport.d.ts +12 -41
  57. package/dist/adapters/transport.js +21 -70
  58. package/dist/adapters/transport.js.map +1 -1
  59. package/dist/cache/NodeResultCache.js +7 -0
  60. package/dist/cache/NodeResultCache.js.map +1 -1
  61. package/dist/concurrency/ConcurrencyBackend.d.ts +61 -0
  62. package/dist/concurrency/ConcurrencyBackend.js +20 -0
  63. package/dist/concurrency/ConcurrencyBackend.js.map +1 -0
  64. package/dist/concurrency/ConcurrencyLimitError.d.ts +37 -0
  65. package/dist/concurrency/ConcurrencyLimitError.js +16 -0
  66. package/dist/concurrency/ConcurrencyLimitError.js.map +1 -0
  67. package/dist/concurrency/NatsKvConcurrencyBackend.d.ts +64 -0
  68. package/dist/concurrency/NatsKvConcurrencyBackend.js +310 -0
  69. package/dist/concurrency/NatsKvConcurrencyBackend.js.map +1 -0
  70. package/dist/concurrency/QueueExpiredError.d.ts +40 -0
  71. package/dist/concurrency/QueueExpiredError.js +15 -0
  72. package/dist/concurrency/QueueExpiredError.js.map +1 -0
  73. package/dist/concurrency/RedisConcurrencyBackend.d.ts +64 -0
  74. package/dist/concurrency/RedisConcurrencyBackend.js +374 -0
  75. package/dist/concurrency/RedisConcurrencyBackend.js.map +1 -0
  76. package/dist/concurrency/createConcurrencyBackend.d.ts +24 -0
  77. package/dist/concurrency/createConcurrencyBackend.js +38 -0
  78. package/dist/concurrency/createConcurrencyBackend.js.map +1 -0
  79. package/dist/concurrency/readConcurrencyConfig.d.ts +60 -0
  80. package/dist/concurrency/readConcurrencyConfig.js +60 -0
  81. package/dist/concurrency/readConcurrencyConfig.js.map +1 -0
  82. package/dist/defineNode.d.ts +8 -0
  83. package/dist/defineNode.js +25 -5
  84. package/dist/defineNode.js.map +1 -1
  85. package/dist/graphql/GraphQLSchemaGenerator.js +1 -1
  86. package/dist/graphql/GraphQLSchemaGenerator.js.map +1 -1
  87. package/dist/idempotency/resolveIdempotencyKey.d.ts +20 -0
  88. package/dist/idempotency/resolveIdempotencyKey.js +37 -0
  89. package/dist/idempotency/resolveIdempotencyKey.js.map +1 -0
  90. package/dist/index.d.ts +30 -6
  91. package/dist/index.js +55 -6
  92. package/dist/index.js.map +1 -1
  93. package/dist/marketplace/RuntimeCatalog.d.ts +6 -0
  94. package/dist/marketplace/RuntimeCatalog.js.map +1 -1
  95. package/dist/marketplace/RuntimeDiscovery.d.ts +2 -2
  96. package/dist/marketplace/RuntimeDiscovery.js +18 -6
  97. package/dist/marketplace/RuntimeDiscovery.js.map +1 -1
  98. package/dist/monitoring/ConcurrencyMetrics.d.ts +82 -0
  99. package/dist/monitoring/ConcurrencyMetrics.js +139 -0
  100. package/dist/monitoring/ConcurrencyMetrics.js.map +1 -0
  101. package/dist/monitoring/ForEachWaitMetrics.d.ts +22 -0
  102. package/dist/monitoring/ForEachWaitMetrics.js +36 -0
  103. package/dist/monitoring/ForEachWaitMetrics.js.map +1 -0
  104. package/dist/monitoring/JanitorMetrics.d.ts +27 -0
  105. package/dist/monitoring/JanitorMetrics.js +48 -0
  106. package/dist/monitoring/JanitorMetrics.js.map +1 -0
  107. package/dist/openapi/OpenAPIGenerator.js +7 -2
  108. package/dist/openapi/OpenAPIGenerator.js.map +1 -1
  109. package/dist/runtime/PrimitiveStack.d.ts +64 -0
  110. package/dist/runtime/PrimitiveStack.js +92 -0
  111. package/dist/runtime/PrimitiveStack.js.map +1 -0
  112. package/dist/scheduling/DebounceBackend.d.ts +108 -0
  113. package/dist/scheduling/DebounceBackend.js +23 -0
  114. package/dist/scheduling/DebounceBackend.js.map +1 -0
  115. package/dist/scheduling/DebounceCoordinator.d.ts +141 -0
  116. package/dist/scheduling/DebounceCoordinator.js +362 -0
  117. package/dist/scheduling/DebounceCoordinator.js.map +1 -0
  118. package/dist/scheduling/DeferredDispatchSignal.d.ts +50 -0
  119. package/dist/scheduling/DeferredDispatchSignal.js +14 -0
  120. package/dist/scheduling/DeferredDispatchSignal.js.map +1 -0
  121. package/dist/scheduling/DeferredRunScheduler.d.ts +96 -0
  122. package/dist/scheduling/DeferredRunScheduler.js +256 -0
  123. package/dist/scheduling/DeferredRunScheduler.js.map +1 -0
  124. package/dist/scheduling/NatsKvDebounceBackend.d.ts +53 -0
  125. package/dist/scheduling/NatsKvDebounceBackend.js +334 -0
  126. package/dist/scheduling/NatsKvDebounceBackend.js.map +1 -0
  127. package/dist/scheduling/RedisDebounceBackend.d.ts +49 -0
  128. package/dist/scheduling/RedisDebounceBackend.js +356 -0
  129. package/dist/scheduling/RedisDebounceBackend.js.map +1 -0
  130. package/dist/scheduling/createDebounceBackend.d.ts +25 -0
  131. package/dist/scheduling/createDebounceBackend.js +39 -0
  132. package/dist/scheduling/createDebounceBackend.js.map +1 -0
  133. package/dist/scheduling/readSchedulingConfig.d.ts +24 -0
  134. package/dist/scheduling/readSchedulingConfig.js +52 -0
  135. package/dist/scheduling/readSchedulingConfig.js.map +1 -0
  136. package/dist/security/AuditLogger.js +1 -1
  137. package/dist/security/AuditLogger.js.map +1 -1
  138. package/dist/security/AuthMiddleware.d.ts +19 -20
  139. package/dist/security/AuthMiddleware.js +35 -20
  140. package/dist/security/AuthMiddleware.js.map +1 -1
  141. package/dist/security/OAuthProvider.js +2 -2
  142. package/dist/security/OAuthProvider.js.map +1 -1
  143. package/dist/security/SecretManager.js +14 -13
  144. package/dist/security/SecretManager.js.map +1 -1
  145. package/dist/security/index.d.ts +3 -1
  146. package/dist/security/index.js +3 -1
  147. package/dist/security/index.js.map +1 -1
  148. package/dist/testing/TestHarness.d.ts +27 -12
  149. package/dist/testing/TestHarness.js +19 -3
  150. package/dist/testing/TestHarness.js.map +1 -1
  151. package/dist/testing/WorkflowTestRunner.js +0 -7
  152. package/dist/testing/WorkflowTestRunner.js.map +1 -1
  153. package/dist/timeouts/StepTimeoutError.d.ts +22 -0
  154. package/dist/timeouts/StepTimeoutError.js +31 -0
  155. package/dist/timeouts/StepTimeoutError.js.map +1 -0
  156. package/dist/tracing/InMemoryRunStore.d.ts +41 -1
  157. package/dist/tracing/InMemoryRunStore.js +239 -0
  158. package/dist/tracing/InMemoryRunStore.js.map +1 -1
  159. package/dist/tracing/Janitor.d.ts +70 -0
  160. package/dist/tracing/Janitor.js +150 -0
  161. package/dist/tracing/Janitor.js.map +1 -0
  162. package/dist/tracing/PostgresRunStore.d.ts +57 -1
  163. package/dist/tracing/PostgresRunStore.js +711 -6
  164. package/dist/tracing/PostgresRunStore.js.map +1 -1
  165. package/dist/tracing/RoutingDiagnostics.d.ts +55 -0
  166. package/dist/tracing/RoutingDiagnostics.js +50 -0
  167. package/dist/tracing/RoutingDiagnostics.js.map +1 -0
  168. package/dist/tracing/RunStore.d.ts +181 -1
  169. package/dist/tracing/RunTracker.d.ts +244 -9
  170. package/dist/tracing/RunTracker.js +594 -1
  171. package/dist/tracing/RunTracker.js.map +1 -1
  172. package/dist/tracing/SqliteRunStore.d.ts +79 -2
  173. package/dist/tracing/SqliteRunStore.js +775 -16
  174. package/dist/tracing/SqliteRunStore.js.map +1 -1
  175. package/dist/tracing/TraceRouter.d.ts +20 -2
  176. package/dist/tracing/TraceRouter.js +612 -6
  177. package/dist/tracing/TraceRouter.js.map +1 -1
  178. package/dist/tracing/createStore.js +14 -3
  179. package/dist/tracing/createStore.js.map +1 -1
  180. package/dist/tracing/metadataFilter.d.ts +63 -0
  181. package/dist/tracing/metadataFilter.js +224 -0
  182. package/dist/tracing/metadataFilter.js.map +1 -0
  183. package/dist/tracing/sanitize.d.ts +11 -0
  184. package/dist/tracing/sanitize.js +29 -0
  185. package/dist/tracing/sanitize.js.map +1 -1
  186. package/dist/tracing/types.d.ts +672 -2
  187. package/dist/utils/createChildContext.d.ts +32 -0
  188. package/dist/utils/createChildContext.js +113 -0
  189. package/dist/utils/createChildContext.js.map +1 -0
  190. package/dist/utils/envAllowlist.d.ts +35 -0
  191. package/dist/utils/envAllowlist.js +113 -0
  192. package/dist/utils/envAllowlist.js.map +1 -0
  193. package/dist/version/RuntimeVersionValidator.d.ts +38 -0
  194. package/dist/version/RuntimeVersionValidator.js +121 -0
  195. package/dist/version/RuntimeVersionValidator.js.map +1 -0
  196. package/dist/visualization/WorkflowVisualizer.js +4 -4
  197. package/dist/visualization/WorkflowVisualizer.js.map +1 -1
  198. package/dist/workflow/PersistenceHelper.d.ts +18 -10
  199. package/dist/workflow/PersistenceHelper.js +35 -9
  200. package/dist/workflow/PersistenceHelper.js.map +1 -1
  201. package/dist/workflow/WorkflowNormalizer.d.ts +48 -42
  202. package/dist/workflow/WorkflowNormalizer.js +650 -18
  203. package/dist/workflow/WorkflowNormalizer.js.map +1 -1
  204. package/dist/workflow/WorkflowRegistry.d.ts +186 -0
  205. package/dist/workflow/WorkflowRegistry.js +202 -0
  206. package/dist/workflow/WorkflowRegistry.js.map +1 -0
  207. package/dist/workflow/sampleBody.d.ts +54 -0
  208. package/dist/workflow/sampleBody.js +320 -0
  209. package/dist/workflow/sampleBody.js.map +1 -0
  210. package/package.json +3 -8
  211. package/dist/adapters/HttpRuntimeAdapter.d.ts +0 -79
  212. package/dist/adapters/HttpRuntimeAdapter.js +0 -233
  213. package/dist/adapters/HttpRuntimeAdapter.js.map +0 -1
@@ -1,6 +1,112 @@
1
1
  import { GlobalError } from "@blokjs/shared";
2
+ import { RunCancelledError } from "./RunCancelledError";
3
+ import { WaitDispatchRequest } from "./WaitDispatchRequest";
4
+ import { resolveIdempotencyKey } from "./idempotency/resolveIdempotencyKey";
5
+ import { getPrimitiveStack } from "./runtime/PrimitiveStack";
6
+ import { StepTimeoutError } from "./timeouts/StepTimeoutError";
2
7
  import { RunTracker } from "./tracing/RunTracker";
3
8
  import { sanitize } from "./tracing/sanitize";
9
+ import { applyStepOutput } from "./workflow/PersistenceHelper";
10
+ /**
11
+ * Default TTL for idempotency cache entries when the step author does not
12
+ * pass `idempotencyKeyTTL` explicitly. 24 hours, matching Trigger.dev's
13
+ * default and the decision recorded in the Tier 1 ROADMAP session.
14
+ */
15
+ const DEFAULT_IDEMPOTENCY_TTL_MS = 24 * 60 * 60 * 1000;
16
+ /**
17
+ * Compute the delay before retry attempt N+1 using capped exponential
18
+ * backoff. Mirrors Trigger.dev's `retry` semantics — no jitter by default.
19
+ *
20
+ * `delay = min(maxTimeoutInMs, minTimeoutInMs * factor^(attempt - 1))`
21
+ *
22
+ * Defaults: min=1000, max=30000, factor=2 — same as Trigger.dev.
23
+ */
24
+ function computeBackoff(config, attempt) {
25
+ const min = config.minTimeoutInMs ?? 1000;
26
+ const max = config.maxTimeoutInMs ?? 30000;
27
+ const factor = config.factor ?? 2;
28
+ const raw = min * factor ** Math.max(0, attempt - 1);
29
+ return Math.min(max, Math.floor(raw));
30
+ }
31
+ /**
32
+ * Default cap on the JSON-serialized `ctx.state` snapshot taken before
33
+ * a `WaitDispatchRequest` throw. 1 MB matches the existing
34
+ * `BLOK_DISPATCH_PAYLOAD_MAX_BYTES` cap used by the durable scheduler
35
+ * for trigger payloads. Override per-deployment via the env var of the
36
+ * same name.
37
+ */
38
+ const DEFAULT_STATE_SNAPSHOT_MAX_BYTES = 1_048_576;
39
+ /**
40
+ * Serialize `ctx.state` for persistence in `workflow_runs.state_snapshot`
41
+ * (sqlite migration v11). Called immediately before the runner throws
42
+ * `WaitDispatchRequest`, so the snapshot reflects the canonical pre-wait
43
+ * state. Honors two ops env vars:
44
+ *
45
+ * - `BLOK_STATE_SNAPSHOT_DISABLED=1` — kill-switch. Returns `undefined`
46
+ * and the runner does NOT update the column. The wait still defers;
47
+ * cross-process recovery just resumes with empty `ctx.state`. Use
48
+ * this when state contains values that JSON.stringify can't round-
49
+ * trip safely (Date, Map, BigInt, circular refs) and the author
50
+ * accepts the limitation.
51
+ * - `BLOK_STATE_SNAPSHOT_MAX_BYTES=<n>` — cap on the serialized blob
52
+ * (default 1 MB). Above the cap, the helper logs a warning and
53
+ * returns `undefined`. Same effect as the kill-switch for that one
54
+ * run; subsequent runs with smaller state still snapshot.
55
+ *
56
+ * On JSON serialization failure (typed errors that bubble out of
57
+ * `JSON.stringify` — circular refs, BigInt, etc.), the helper logs a
58
+ * warning and returns `undefined`. The wait still defers — resumption
59
+ * for that specific run becomes best-effort, matching pre-v0.6
60
+ * behaviour for top-level waits across process restart.
61
+ */
62
+ function serializeStateSnapshot(state, logger) {
63
+ if (process.env.BLOK_STATE_SNAPSHOT_DISABLED === "1")
64
+ return undefined;
65
+ const capRaw = process.env.BLOK_STATE_SNAPSHOT_MAX_BYTES;
66
+ const cap = capRaw ? Number(capRaw) : DEFAULT_STATE_SNAPSHOT_MAX_BYTES;
67
+ const effectiveCap = Number.isFinite(cap) && cap > 0 ? cap : DEFAULT_STATE_SNAPSHOT_MAX_BYTES;
68
+ let serialized;
69
+ try {
70
+ serialized = JSON.stringify(state ?? {});
71
+ }
72
+ catch (err) {
73
+ const msg = err instanceof Error ? err.message : String(err);
74
+ logger.logLevel("warn", `[blok][wait] ctx.state snapshot failed to serialize: ${msg}. Wait will still defer; resumption is best-effort across process restart.`);
75
+ return undefined;
76
+ }
77
+ const size = Buffer.byteLength(serialized, "utf8");
78
+ if (size > effectiveCap) {
79
+ logger.logLevel("warn", `[blok][wait] ctx.state snapshot exceeds ${effectiveCap} bytes (got ${size}); skipping snapshot. Wait will still defer; resumption is best-effort. Reduce state size or raise BLOK_STATE_SNAPSHOT_MAX_BYTES.`);
80
+ return undefined;
81
+ }
82
+ return serialized;
83
+ }
84
+ function sleep(ms) {
85
+ return new Promise((resolve) => {
86
+ setTimeout(resolve, ms);
87
+ });
88
+ }
89
+ /**
90
+ * Tier 2 quick-wins — wrap a Promise in a setTimeout-based timeout
91
+ * race. On timeout, rejects with `StepTimeoutError`. The underlying
92
+ * `fn()` continues to run (no AbortSignal cancellation in v1) but
93
+ * the runner has already moved on — orphaned resolution settles
94
+ * harmlessly into the void.
95
+ */
96
+ function wrapWithTimeout(fn, ms, stepName) {
97
+ return new Promise((resolve, reject) => {
98
+ const timer = setTimeout(() => {
99
+ reject(new StepTimeoutError(stepName, ms));
100
+ }, ms);
101
+ fn().then((value) => {
102
+ clearTimeout(timer);
103
+ resolve(value);
104
+ }, (err) => {
105
+ clearTimeout(timer);
106
+ reject(err);
107
+ });
108
+ });
109
+ }
4
110
  export default class RunnerSteps {
5
111
  /**
6
112
  * Executes a series of steps in the given context.
@@ -23,8 +129,66 @@ export default class RunnerSteps {
23
129
  let flow_steps = [];
24
130
  let flow_step = 0;
25
131
  let stepName = "";
132
+ // PR 4 — wait.for / wait.until resume cursor.
133
+ //
134
+ // On `dispatchDeferred` re-entry from a wait step, the runner
135
+ // must skip past pre-wait steps that already completed in the
136
+ // previous pass. `lastCompletedStepIndex` is set on the run
137
+ // record before each WaitDispatchRequest throw and read here
138
+ // at runSteps entry. Default `-1` = no resume; runner starts
139
+ // at i = 0.
140
+ const persistedRun = !deep && tracker && traceRunId ? tracker.getStore().getRun(traceRunId) : undefined;
141
+ // Two cursor sources:
142
+ // - Top-level (deep === false): workflow_runs.lastCompletedStepIndex.
143
+ // - Nested inside a primitive iterator (deep === true, v0.6
144
+ // Phase 2): `_blokInnerResumeIndex` stamped on the child ctx
145
+ // by ForEachNode.runIteration when resuming at a specific
146
+ // inner step. Undefined = start at 0 (fresh iteration body).
147
+ const innerResumeIndexRaw = ctx._blokInnerResumeIndex;
148
+ const innerResumeIndex = typeof innerResumeIndexRaw === "number" ? innerResumeIndexRaw : undefined;
149
+ const resumeFromIndex = !deep
150
+ ? persistedRun?.lastCompletedStepIndex !== undefined
151
+ ? persistedRun.lastCompletedStepIndex + 1
152
+ : 0
153
+ : (innerResumeIndex ?? 0);
154
+ // Clear the sentinel so a re-runner started fresh from this
155
+ // childCtx (e.g. the nested branch flow path) doesn't inherit
156
+ // a stale resume hint. ForEachNode set it for THIS one re-entry
157
+ // only; it should not propagate further.
158
+ if (deep && innerResumeIndex !== undefined) {
159
+ ctx._blokInnerResumeIndex = undefined;
160
+ }
26
161
  for (let i = 0; i < steps.length; i++) {
27
162
  const step = steps[i];
163
+ // PR 4 — skip pre-wait steps on resume. State + NodeRuns
164
+ // from the first pass are still on `ctx.state` / in the
165
+ // store; the runner just advances past them.
166
+ if (i < resumeFromIndex) {
167
+ ctx.logger.log(`[step ${i + 1}/${steps.length}] ${step.name} → skipped (resumed past wait at lastCompletedStepIndex=${persistedRun?.lastCompletedStepIndex})`);
168
+ continue;
169
+ }
170
+ // Tier 2 follow-up · cooperative cancellation. Operators can
171
+ // abort `running` runs via `POST /__blok/runs/:runId/cancel`,
172
+ // which fires the ctx's AbortController. The check is between
173
+ // steps so a long-running step's `step.process()` doesn't have
174
+ // to consult the signal itself (though nodes that want finer
175
+ // granularity can read `ctx.signal.aborted` themselves).
176
+ if (ctx.signal?.aborted) {
177
+ throw new RunCancelledError(traceRunId);
178
+ }
179
+ // v0.6 Phase 4 — bump the TOP primitive frame's
180
+ // `innerStepIndex` to the current step. If a wait fires from
181
+ // inside this step (or anywhere deeper down the call stack),
182
+ // the wait-throw site walks the stack to persist each frame
183
+ // and needs the TOP frame's cursor to point at THIS step.
184
+ // `deep === true` is the only case where this can apply —
185
+ // the top-level runSteps doesn't have a frame.
186
+ if (deep) {
187
+ const stack = getPrimitiveStack(ctx);
188
+ if (stack.length > 0) {
189
+ stack[stack.length - 1].cursor.innerStepIndex = i;
190
+ }
191
+ }
28
192
  if (!step.active) {
29
193
  // Track skipped nodes
30
194
  if (tracker && traceRunId) {
@@ -61,6 +225,43 @@ export default class RunnerSteps {
61
225
  };
62
226
  if (tracker && traceRunId) {
63
227
  const configAny = ctx.config;
228
+ // Tier 2 #4 sub-workflow: capture the `wait` mode so
229
+ // Studio can render `↳ async` (wait:false) vs `↳ sub`
230
+ // (wait:true / default) in StepRail. Only meaningful
231
+ // for subworkflow steps; undefined elsewhere.
232
+ const subworkflowWait = stepType === "subworkflow" ? stepAny.wait : undefined;
233
+ // G2 (v0.6) — capture the `dispatch` strategy so the
234
+ // rail can mark http-self invocations with a small
235
+ // `http` badge alongside the existing `↳ async`/`↳ sub`.
236
+ // Normalize: unknown values + the default fall through
237
+ // to `undefined` (rendered as in-process by Studio).
238
+ const dispatchRaw = stepType === "subworkflow" ? stepAny.dispatch : undefined;
239
+ const subworkflowDispatch = dispatchRaw === "http-self" || dispatchRaw === "in-process" ? dispatchRaw : undefined;
240
+ // PR 5 E3 — surface sub-workflow nesting depth.
241
+ // `_subworkflowDepth` on ctx is set by SubworkflowNode +
242
+ // createChildContext; the parent's invocation of a
243
+ // child step has depth = parent.depth + 1. Top-level =
244
+ // 1; nested = 2+. Only meaningful for subworkflow steps.
245
+ const subworkflowDepth = stepType === "subworkflow"
246
+ ? (ctx._subworkflowDepth ?? 0) + 1
247
+ : undefined;
248
+ // v0.5 middleware origin tagging — when the trigger's
249
+ // `runMiddlewareChain` is dispatching a middleware
250
+ // workflow on this ctx, it sets `_blokMiddlewareName`
251
+ // to the middleware's name. Surface that here so
252
+ // Studio's StepRail can render a `mw:<name>` origin
253
+ // badge on every inner step the middleware produced.
254
+ const middleware = ctx._blokMiddlewareName;
255
+ // v0.5.3 — read the iteration sentinel set by ForEachNode +
256
+ // LoopNode on per-iteration child ctxs. Lets Studio group
257
+ // inner steps under "iteration N" headers in StepRail.
258
+ // Inherited by nested runners (tryCatch, switch) inside
259
+ // the same iteration — which is correct: their inner steps
260
+ // belong to that iteration. A nested forEach inside an
261
+ // outer iteration overrides the sentinel on its own child
262
+ // ctx, so the inner-most iteration wins for its descendants.
263
+ const iterationIndexRaw = ctx._blokIterationIndex;
264
+ const iterationIndex = typeof iterationIndexRaw === "number" ? iterationIndexRaw : undefined;
64
265
  const nodeRun = tracker.startNode(traceRunId, {
65
266
  nodeName: step.name,
66
267
  nodeType: stepType,
@@ -68,60 +269,399 @@ export default class RunnerSteps {
68
269
  inputs: sanitize(configAny?.[step.name]?.inputs ?? stepAny.config),
69
270
  depth: depthLevel,
70
271
  stepIndex: i,
272
+ wait: subworkflowWait,
273
+ dispatch: subworkflowDispatch,
274
+ subworkflowDepth,
275
+ middleware,
276
+ iterationIndex,
71
277
  });
72
278
  nodeRunId = nodeRun.id;
73
279
  ctx._traceNodeId = nodeRunId;
74
280
  }
75
- ctx.logger.log(`${stepPrefix} started`);
76
- const stepStart = performance.now();
77
- try {
78
- const model = await step.process(ctx, step);
79
- ctx.response = model.data;
80
- const stepDuration = (performance.now() - stepStart).toFixed(1);
81
- // --- Trace: complete or fail node ---
82
- if (tracker && nodeRunId) {
83
- if (ctx.response.error) {
84
- // Pass the error VERBATIM so RunTracker's
85
- // `toRunErrorDetail` can preserve BlokError
86
- // fields (category, retryable, remediation,
87
- // causes, …) when the SDK supplied a typed
88
- // failure. Strings and bare Errors fall
89
- // through to the legacy `{message, stack}`
90
- // shape.
91
- tracker.failNode(nodeRunId, ctx.response.error);
281
+ // === PR 4: wait.for(duration) / wait.until(date) step ===
282
+ // Two paths:
283
+ // 1. First pass: compute deadline, mark NodeRun complete
284
+ // (the wait step has no `process()` body), set the
285
+ // run's resume cursor (lastCompletedStepIndex = i - 1),
286
+ // throw WaitDispatchRequest. TriggerBase translates to
287
+ // DeferredDispatchSignal 202 Accepted.
288
+ // 2. Re-entry (dispatchDeferred): the resume cursor logic
289
+ // at the top of runSteps already skipped indices < i.
290
+ // For the wait step itself at i = lastCompletedStepIndex
291
+ // + 1, treat it as satisfied and advance.
292
+ // Detection: existence of run.scheduledAt + wait step =
293
+ // we're on the second pass.
294
+ if (stepType === "wait") {
295
+ const waitForMs = stepAny.waitForMs;
296
+ const waitUntil = stepAny.waitUntil;
297
+ // Compute the deadline (resolves $-proxy and ISO strings).
298
+ // Review fix-up · BUG-2. A malformed `until` string used to
299
+ // silently fall through to `Date.now()` (immediate no-op).
300
+ // Authors expecting "wait until tomorrow" with a typo got a
301
+ // no-op with no warning — the worst kind of footgun. Throw
302
+ // instead so the failure surfaces immediately, both in the
303
+ // run trace + Studio's error surface.
304
+ const computeDeadline = () => {
305
+ if (typeof waitForMs === "number")
306
+ return Date.now() + waitForMs;
307
+ if (typeof waitUntil === "number")
308
+ return waitUntil;
309
+ if (typeof waitUntil === "string") {
310
+ // Try parsing as a number first (ms-since-epoch as a string).
311
+ const asNum = Number(waitUntil);
312
+ if (!Number.isNaN(asNum))
313
+ return asNum;
314
+ // ISO-date string.
315
+ const t = Date.parse(waitUntil);
316
+ if (!Number.isNaN(t))
317
+ return t;
318
+ // Fail-fast on unparseable strings (the helpful path).
319
+ throw new Error(`wait.until: cannot parse '${waitUntil}' as a number or date. Use ms-since-epoch (number or numeric string) or a valid ISO date string.`);
320
+ }
321
+ // Schema rejects this combination, but defensive: treat
322
+ // unsupported input as immediate so the runner doesn't
323
+ // hang on a never-firing timer.
324
+ return Date.now();
325
+ };
326
+ // Detect re-entry: on first pass the run has no
327
+ // scheduledAt (or it's from trigger-level delay); on
328
+ // re-entry from a wait dispatch, the run was marked
329
+ // `delayed` with scheduledAt set to the wait deadline.
330
+ //
331
+ // v0.6 Phase 4 — for deep (nested) runSteps, a primitive
332
+ // (SwitchNode etc.) sets `_blokInnerResumeIndex` to the
333
+ // resume target — including `0` when the wait is at the
334
+ // first step of its sub-pipeline. The original
335
+ // `resumeFromIndex > 0` guard prevented re-entry from
336
+ // firing at index 0, but Phase 4 needs the index-0 case
337
+ // (e.g., switch arm whose first step is the wait). For
338
+ // deep runs we additionally require `innerResumeIndex`
339
+ // to be defined — that's how we tell "this primitive
340
+ // resumed here" vs "we're at index 0 because of a fresh
341
+ // iteration that doesn't have a resume cursor".
342
+ const isReentry = ctx._blokDispatchReentry === true &&
343
+ i === resumeFromIndex &&
344
+ (!deep ? resumeFromIndex > 0 : innerResumeIndex !== undefined);
345
+ const deadline = computeDeadline();
346
+ const now = Date.now();
347
+ if (isReentry || deadline <= now) {
348
+ // Wait already satisfied (timer fired AND we're on
349
+ // re-entry past the deadline) OR the deadline is
350
+ // in the past (e.g., wait.for(0) or wait.until(<past>)).
351
+ // Mark NodeRun complete and advance.
352
+ if (tracker && nodeRunId) {
353
+ tracker.completeNode(nodeRunId, { __waited__: true, deadline });
354
+ }
355
+ ctx.logger.log(`[step ${i + 1}/${steps.length}] ${step.name} (wait) → satisfied`);
356
+ // Advance the resume cursor at TOP-LEVEL only.
357
+ // Nested satisfies (deep=true, v0.6 Phase 2 — wait
358
+ // inside a forEach iteration body) must NOT
359
+ // overwrite the workflow's resume cursor with the
360
+ // inner step index — that would skip past the
361
+ // primitive entirely on the next re-entry. The
362
+ // primitive's own NodeRun.iteration_context tracks
363
+ // progress for nested resumes.
364
+ if (!deep && tracker && traceRunId) {
365
+ tracker.getStore().updateRun(traceRunId, { lastCompletedStepIndex: i });
92
366
  }
93
- else {
94
- // `_stepMetrics` is stashed on ctx by RuntimeAdapterNode
95
- // when an adapter returns metrics (gRPC wire bytes,
96
- // duration, cpu, memory). Threading it through
97
- // `completeNode` is what gets the metrics into the
98
- // run store + NODE_COMPLETED event payload — Studio's
99
- // inspector reads them from there.
100
- const ctxAny = ctx;
101
- const stepMetrics = ctxAny._stepMetrics;
102
- ctxAny._stepMetrics = undefined;
103
- tracker.completeNode(nodeRunId, sanitize(ctx.response.data), stepMetrics);
367
+ continue;
368
+ }
369
+ // First pass: schedule + throw WaitDispatchRequest.
370
+ // Set resume cursor BEFORE throwing so re-entry knows
371
+ // where to pick up.
372
+ //
373
+ // Two cases for cursor placement:
374
+ // - Top-level wait (deep === false). Cursor = i - 1
375
+ // (the last non-wait outer step that completed).
376
+ // On re-entry, runSteps reads
377
+ // workflow_runs.lastCompletedStepIndex + 1 = i and
378
+ // starts the wait step which flips to "satisfied".
379
+ // - Nested wait inside a primitive (deep === true,
380
+ // v0.6 Phase 2). The wait fired from inside an
381
+ // iteration body of a forEach (or analogous future
382
+ // primitive). The OUTER runSteps wrote `i - 1` =
383
+ // forEach-step-index minus 1 *before* invoking
384
+ // forEach.process, so workflow_runs.lastCompleted-
385
+ // StepIndex still points at the OUTER cursor we
386
+ // want — DON'T overwrite it with the inner-i (that
387
+ // would skip the forEach entirely on resume).
388
+ // Instead, persist the iteration cursor on the
389
+ // forEach's NodeRun's `iteration_context` column.
390
+ // ForEachNode reads it on re-entry to resume the
391
+ // right iteration + inner step.
392
+ //
393
+ // v0.6 prerequisite for wait-inside-primitives Phase 2
394
+ // — snapshot `ctx.state` regardless of nesting. Two
395
+ // re-entry paths consume this snapshot:
396
+ // 1. In-process timer fire (DeferredRunScheduler):
397
+ // same `ctx` is reused, state is already there;
398
+ // rehydrate at TriggerBase.run is a no-op.
399
+ // 2. Cross-process recovery (recoverDispatches →
400
+ // restoreDispatch on boot): a fresh `ctx` is
401
+ // built from the persisted scheduled_dispatches
402
+ // row with empty `state`. Without the snapshot,
403
+ // Phase 2's iteration-state-persistence promise
404
+ // breaks across restart.
405
+ if (tracker && traceRunId) {
406
+ const updates = {
407
+ stateSnapshot: serializeStateSnapshot(ctx.state, ctx.logger),
408
+ };
409
+ if (!deep) {
410
+ updates.lastCompletedStepIndex = i - 1;
411
+ }
412
+ tracker.getStore().updateRun(traceRunId, updates);
413
+ // Phase 2/3 — write iteration_context to the active
414
+ // primitive's NodeRun when nested. Reads sentinels
415
+ // stamped by the primitive (ForEachNode in Phase 2,
416
+ // LoopNode in Phase 3) on the parent ctx:
417
+ // - _blokActivePrimitiveNodeRunId: which NodeRun
418
+ // gets the cursor (set by RunnerSteps' outer
419
+ // iteration around the primitive's process()).
420
+ // - _blokForEachCurrentIteration: iteration index
421
+ // of the in-flight iteration.
422
+ // - _blokForEachPartialResults (Phase 2 only):
423
+ // accumulator for iterations [0..iteration-1]
424
+ // so the post-resume final result array covers
425
+ // all iterations. LoopNode doesn't aggregate
426
+ // results (it returns the last iteration's
427
+ // output), so it doesn't stamp this sentinel —
428
+ // the cursor stores `completedResults: []` and
429
+ // LoopNode ignores the field on resume.
430
+ // v0.6 Phase 4 — walk the primitive stack and persist
431
+ // each frame's cursor to its NodeRun. The TOP frame's
432
+ // `innerStepIndex` is the wait step's position within
433
+ // the deepest primitive's sub-pipeline; outer frames'
434
+ // `innerStepIndex` values were set by their enclosing
435
+ // runSteps' step-boundary write when control passed
436
+ // into the deeper primitive. This is what lets
437
+ // `forEach > forEach > wait`,
438
+ // `switch > forEach > wait`, etc. all resume
439
+ // correctly on re-entry.
440
+ //
441
+ // Each frame's `cursor` is owned by the primitive
442
+ // (it stamps `iteration`/`caseIndex`/`completedResults`).
443
+ // The runner's only responsibility here is to refresh
444
+ // the TOP frame's `innerStepIndex` to `i` and
445
+ // persist every frame.
446
+ if (deep) {
447
+ const stack = getPrimitiveStack(ctx);
448
+ if (stack.length > 0) {
449
+ stack[stack.length - 1].cursor.innerStepIndex = i;
450
+ for (const frame of stack) {
451
+ // Skip parallel-forEach frames — the
452
+ // parallel branch in ForEachNode writes
453
+ // its own cursor (with cancelled set +
454
+ // completedResults) post-`Promise.allSettled`.
455
+ // Writing the placeholder here would let
456
+ // "error beats wait" classifications leak
457
+ // a parallel cursor onto the failed
458
+ // run's NodeRun.
459
+ if (frame.cursor.mode === "parallel")
460
+ continue;
461
+ tracker.getStore().updateNodeRun(frame.nodeRunId, {
462
+ iterationContext: frame.cursor,
463
+ });
464
+ }
465
+ }
104
466
  }
105
467
  }
106
- if (ctx.response.error) {
107
- ctx.logger.log(`${stepPrefix} FAILED (${stepDuration}ms)`);
108
- throw ctx.response.error;
468
+ ctx.logger.log(`[step ${i + 1}/${steps.length}] ${step.name} (wait) → scheduled (deadline=${new Date(deadline).toISOString()})`);
469
+ throw new WaitDispatchRequest({
470
+ scheduledAt: deadline,
471
+ stepIndex: i,
472
+ stepId: step.name,
473
+ lastCompletedStepIndex: i - 1,
474
+ });
475
+ }
476
+ // === Tier 1: idempotency cache lookup ===
477
+ // Resolve the step's idempotency key against the live ctx,
478
+ // then consult the cache. On hit, short-circuit step.process
479
+ // entirely: replay the cached result through the same v2
480
+ // persistence rules (ephemeral / spread / as), mark the
481
+ // node cached for tracing, log "cached", and skip to the
482
+ // next step. Caching layers ABOVE PersistenceHelper —
483
+ // applyStepOutput's rules apply identically to cached and
484
+ // freshly-computed results.
485
+ const workflowName = ctx.workflow_name ?? "";
486
+ const cacheStore = tracker && traceRunId ? tracker.getStore() : null;
487
+ const resolvedIdemKey = cacheStore && workflowName ? resolveIdempotencyKey(step.idempotencyKey, ctx) : null;
488
+ if (cacheStore && resolvedIdemKey && nodeRunId) {
489
+ const hit = cacheStore.getIdempotencyCache(workflowName, step.name, resolvedIdemKey);
490
+ if (hit) {
491
+ applyStepOutput(ctx, step, { data: hit.data });
492
+ ctx.response = hit.data;
493
+ tracker?.markNodeCached(nodeRunId, {
494
+ sourceRunId: hit.sourceRunId,
495
+ sourceNodeRunId: hit.sourceNodeRunId,
496
+ cachedAt: hit.cachedAt,
497
+ }, hit.data);
498
+ ctx.logger.log(`${stepPrefix} → cached (from run ${hit.sourceRunId})`);
499
+ continue;
109
500
  }
110
- ctx.logger.log(`${stepPrefix} → completed (${stepDuration}ms)`);
111
501
  }
112
- catch (nodeErr) {
113
- // --- Trace: fail node on exception ---
114
- if (tracker && nodeRunId) {
115
- const existing = tracker.getNodeRun(nodeRunId);
116
- if (existing && existing.status === "running") {
117
- tracker.failNode(nodeRunId, nodeErr instanceof Error ? nodeErr : new Error(String(nodeErr)));
502
+ ctx.logger.log(`${stepPrefix} → started`);
503
+ const stepStart = performance.now();
504
+ // === Tier 1: retry loop ===
505
+ // Wraps step.process() with capped exponential backoff per
506
+ // `step.retry`. Default `maxAttempts: 1` preserves
507
+ // pre-Phase-4 behaviour exactly (single attempt, no retry).
508
+ // Soft errors (model.data.error returned from the SDK)
509
+ // participate in retry alongside thrown errors — both flow
510
+ // through the catch block below.
511
+ const retryConfig = step.retry;
512
+ const maxAttempts = retryConfig ? Math.max(1, retryConfig.maxAttempts) : 1;
513
+ // Tier 2 quick-wins — per-attempt timeout. When unset, the
514
+ // step runs without a cap. Numeric `maxDurationMs` arrives
515
+ // pre-parsed from `Configuration` (string `"30s"` →
516
+ // `30000` via `parseDuration`).
517
+ const maxDurationMs = step.maxDurationMs;
518
+ let attempt = 0;
519
+ // v0.6 Phase 4 — the primitive stack on ctx is owned by
520
+ // ForEachNode/LoopNode/SwitchNode (push on entry, pop in
521
+ // finally). The Phase 2/3 single-slot
522
+ // `_blokActivePrimitiveNodeRunId` mechanism is gone —
523
+ // nested primitives each register their own frame, and
524
+ // the wait-throw site walks the full stack. We keep
525
+ // `isIteratingPrimitive` only as a hook for legacy
526
+ // readers (none in core today) — wait-cursor writes no
527
+ // longer depend on it.
528
+ const isIteratingPrimitive = step.isPrimitiveIterator === true;
529
+ try {
530
+ while (true) {
531
+ attempt += 1;
532
+ try {
533
+ const processInvocation = () => step.process(ctx, step);
534
+ const model = typeof maxDurationMs === "number" && maxDurationMs > 0
535
+ ? await wrapWithTimeout(processInvocation, maxDurationMs, step.name)
536
+ : await processInvocation();
537
+ ctx.response = model.data;
538
+ // Treat soft errors (data carries `.error`) the same as
539
+ // thrown errors so retry semantics are uniform.
540
+ if (ctx.response?.error) {
541
+ throw ctx.response.error;
542
+ }
543
+ // === Tier 1: idempotency cache write ===
544
+ // Cache on the success path only — failed steps are
545
+ // re-runnable. Honour `idempotencyKeyTTL` per step;
546
+ // default 24h. A TTL of 0 stores an immediately-
547
+ // expired entry (useful as a kill-switch).
548
+ if (cacheStore && resolvedIdemKey && nodeRunId && traceRunId) {
549
+ const ttlField = step.idempotencyKeyTTL;
550
+ const ttlMs = typeof ttlField === "number" ? ttlField : DEFAULT_IDEMPOTENCY_TTL_MS;
551
+ const now = Date.now();
552
+ const expiresAt = ttlMs > 0 ? now + ttlMs : now - 1;
553
+ cacheStore.setIdempotencyCache(workflowName, step.name, resolvedIdemKey, {
554
+ data: model.data,
555
+ cachedAt: now,
556
+ expiresAt,
557
+ sourceRunId: traceRunId,
558
+ sourceNodeRunId: nodeRunId,
559
+ });
560
+ }
561
+ const stepDuration = (performance.now() - stepStart).toFixed(1);
562
+ // --- Trace: complete node ---
563
+ if (tracker && nodeRunId) {
564
+ // `_stepMetrics` is stashed on ctx by RuntimeAdapterNode
565
+ // when an adapter returns metrics (gRPC wire bytes,
566
+ // duration, cpu, memory). Threading it through
567
+ // `completeNode` is what gets the metrics into the
568
+ // run store + NODE_COMPLETED event payload — Studio's
569
+ // inspector reads them from there.
570
+ const ctxAny = ctx;
571
+ const stepMetrics = ctxAny._stepMetrics;
572
+ ctxAny._stepMetrics = undefined;
573
+ tracker.completeNode(nodeRunId, sanitize(ctx.response.data), stepMetrics);
574
+ // PR 4 — advance the resume cursor after each
575
+ // successful non-wait step. A subsequent wait step
576
+ // reads this value to set its own cursor before
577
+ // throwing WaitDispatchRequest. Only at top-level
578
+ // (deep=false); nested branch flow doesn't update.
579
+ if (!deep && traceRunId) {
580
+ tracker.getStore().updateRun(traceRunId, { lastCompletedStepIndex: i });
581
+ }
582
+ }
583
+ const attemptSuffix = attempt > 1 ? ` after ${attempt} attempts` : "";
584
+ ctx.logger.log(`${stepPrefix} → completed (${stepDuration}ms${attemptSuffix})`);
585
+ break;
586
+ }
587
+ catch (nodeErr) {
588
+ // v0.5.3 — control-flow signals from a step's run()
589
+ // must NOT be retried OR wrapped as enriched errors.
590
+ // In the production wait path, RunnerSteps throws
591
+ // WaitDispatchRequest from outside this retry loop, so
592
+ // this branch is normally inert. But if a custom node
593
+ // ever throws a wait/cancel signal from inside its
594
+ // process()/run(), preserve the type so the outer
595
+ // catch + TryCatchNode pass-through still recognise
596
+ // it. Same rationale as the outer-catch instanceof
597
+ // guards at line ~498.
598
+ if (nodeErr instanceof WaitDispatchRequest || nodeErr instanceof RunCancelledError) {
599
+ throw nodeErr;
600
+ }
601
+ if (attempt < maxAttempts && retryConfig) {
602
+ // More attempts remain — record this as a soft
603
+ // failure and back off before retrying. The node
604
+ // stays in `running` status; failNode is the
605
+ // terminal call.
606
+ if (tracker && nodeRunId) {
607
+ tracker.recordNodeAttemptFailed(nodeRunId, { attempt, error: nodeErr });
608
+ }
609
+ const backoffMs = computeBackoff(retryConfig, attempt);
610
+ const errMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
611
+ ctx.logger.log(`${stepPrefix} → attempt ${attempt}/${maxAttempts} failed (${errMsg}), retrying in ${backoffMs}ms`);
612
+ await sleep(backoffMs);
613
+ continue;
614
+ }
615
+ // Final attempt — fail the node and propagate the
616
+ // enriched error so RunnerSteps' outer catch can
617
+ // wrap it as a GlobalError.
618
+ if (tracker && nodeRunId) {
619
+ const existing = tracker.getNodeRun(nodeRunId);
620
+ if (existing && existing.status === "running") {
621
+ tracker.failNode(nodeRunId, nodeErr instanceof Error ? nodeErr : new Error(String(nodeErr)));
622
+ }
623
+ }
624
+ // Tier 2 quick-wins — final-attempt timeout flips
625
+ // the run to "timedOut" (distinct from "failed").
626
+ // Only when the FINAL error was a StepTimeoutError;
627
+ // mixed failures (some retries timed out, final
628
+ // retry threw a different error) keep the normal
629
+ // "failed" status.
630
+ if (tracker &&
631
+ traceRunId &&
632
+ typeof maxDurationMs === "number" &&
633
+ maxDurationMs > 0 &&
634
+ nodeErr instanceof StepTimeoutError) {
635
+ tracker.markRunTimedOut(traceRunId, {
636
+ stepId: step.name,
637
+ maxDurationMs,
638
+ attemptsExhausted: attempt,
639
+ });
640
+ }
641
+ const stepDuration = (performance.now() - stepStart).toFixed(1);
642
+ const attemptSuffix = attempt > 1 ? ` after ${attempt} attempts` : "";
643
+ ctx.logger.log(`${stepPrefix} → FAILED (${stepDuration}ms${attemptSuffix})`);
644
+ // Enrich error with step context so developers know which step failed.
645
+ // Attach `_blokStepId` directly on the wrap so TryCatchNode's
646
+ // envelope construction can surface `$.error.stepId` to authors
647
+ // without parsing the prefix back out of the message string.
648
+ const originalMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
649
+ const enrichedError = new Error(`${stepPrefix} failed: ${originalMsg}`);
650
+ const enrichedAny = enrichedError;
651
+ enrichedAny.cause = nodeErr;
652
+ enrichedAny._blokStepId = step.name;
653
+ throw enrichedError;
118
654
  }
119
655
  }
120
- // Enrich error with step context so developers know which step failed
121
- const originalMsg = nodeErr instanceof Error ? nodeErr.message : String(nodeErr);
122
- const enrichedError = new Error(`${stepPrefix} failed: ${originalMsg}`);
123
- enrichedError.cause = nodeErr;
124
- throw enrichedError;
656
+ }
657
+ finally {
658
+ // v0.6 Phase 4 primitives own their stack frame
659
+ // lifecycle now (push on entry, pop in finally), so
660
+ // there's nothing to restore here. The
661
+ // `isIteratingPrimitive` flag stays in the type
662
+ // system for documentation but no longer drives
663
+ // cursor accounting.
664
+ void isIteratingPrimitive;
125
665
  }
126
666
  }
127
667
  else {
@@ -138,12 +678,76 @@ export default class RunnerSteps {
138
678
  }
139
679
  }
140
680
  catch (e) {
681
+ // PR 1 follow-up · A2 fix companion. RunCancelledError carries
682
+ // the cancellation contract end-to-end — wrapping it as
683
+ // GlobalError would defeat TriggerBase.run's `instanceof
684
+ // RunCancelledError` discrimination and the run would get
685
+ // failRun'd on top of an already-cancelled status. Pass through
686
+ // untouched so the catch in TriggerBase.run sees the right type.
687
+ if (e instanceof RunCancelledError) {
688
+ throw e;
689
+ }
690
+ // PR 4 — WaitDispatchRequest is the wait.for / wait.until
691
+ // step's signal to TriggerBase that it should schedule a
692
+ // deferred dispatch. Same pass-through rationale as
693
+ // RunCancelledError — the catch in TriggerBase.run translates
694
+ // it to DeferredDispatchSignal + 202.
695
+ if (e instanceof WaitDispatchRequest) {
696
+ throw e;
697
+ }
698
+ // Capture the step-enrichment wrap's `_blokStepId` BEFORE we
699
+ // unwrap past it. The wrap is the outermost layer (set inside
700
+ // the inner-try retry loop above); after unwrapping to the inner
701
+ // GlobalError this metadata would otherwise be lost. Surfaces to
702
+ // authors as `$.error.stepId` inside tryCatch.catch arms.
703
+ const wrapStepId = typeof e === "object" && e !== null && "_blokStepId" in e
704
+ ? e._blokStepId
705
+ : undefined;
141
706
  let error_context = {};
142
707
  if (e instanceof GlobalError) {
143
708
  error_context = e;
144
709
  }
145
710
  else {
146
- error_context = new GlobalError(e.message);
711
+ // Walk the `.cause` chain looking for a GlobalError. The
712
+ // step-enrichment wrap at line ~465 sets `cause = nodeErr`,
713
+ // and `nodeErr` may itself be a GlobalError thrown from
714
+ // `defineNode`-built nodes (e.g. `@blokjs/throw` setting
715
+ // `code: 401` for an auth-check middleware). Without this
716
+ // walk, the outer wrap below would force the framework's
717
+ // generic `[step N/M] X failed: ...` message + default 500
718
+ // code, clobbering the author's structured rejection.
719
+ let inner = e;
720
+ let foundGlobal = null;
721
+ while (typeof inner === "object" &&
722
+ inner !== null &&
723
+ "cause" in inner &&
724
+ inner.cause !== undefined &&
725
+ inner.cause !== inner) {
726
+ inner = inner.cause;
727
+ if (inner instanceof GlobalError) {
728
+ foundGlobal = inner;
729
+ break;
730
+ }
731
+ }
732
+ if (foundGlobal) {
733
+ error_context = foundGlobal;
734
+ }
735
+ else {
736
+ error_context = new GlobalError(e.message);
737
+ // Preserve the original error chain so outer handlers
738
+ // (notably v0.5 TryCatchNode's `$.error.message` resolution)
739
+ // can peel back through `.cause` to the author's original
740
+ // `throw new Error("...")` text instead of the runner's
741
+ // `[step N/M] <name> failed: ...` enriched prefix.
742
+ error_context.cause = e;
743
+ }
744
+ }
745
+ // Stamp the wrap's stepId on the unwrapped error so TryCatchNode's
746
+ // `toErrorEnvelope` walk can surface it as `$.error.stepId`. The
747
+ // inner-try wrap layer is gone by this point; this is the only
748
+ // place where the runner can identify which sub-step failed.
749
+ if (typeof wrapStepId === "string" && wrapStepId.length > 0) {
750
+ error_context._blokStepId = wrapStepId;
147
751
  }
148
752
  throw error_context;
149
753
  }