@blokjs/runner 0.2.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/dist/Blok.js +32 -3
  2. package/dist/Blok.js.map +1 -1
  3. package/dist/Configuration.d.ts +59 -5
  4. package/dist/Configuration.js +366 -96
  5. package/dist/Configuration.js.map +1 -1
  6. package/dist/ForEachNode.d.ts +59 -0
  7. package/dist/ForEachNode.js +522 -0
  8. package/dist/ForEachNode.js.map +1 -0
  9. package/dist/LoopMaxIterationsError.d.ts +11 -0
  10. package/dist/LoopMaxIterationsError.js +18 -0
  11. package/dist/LoopMaxIterationsError.js.map +1 -0
  12. package/dist/LoopNode.d.ts +36 -0
  13. package/dist/LoopNode.js +182 -0
  14. package/dist/LoopNode.js.map +1 -0
  15. package/dist/PayloadTooLargeError.d.ts +19 -0
  16. package/dist/PayloadTooLargeError.js +29 -0
  17. package/dist/PayloadTooLargeError.js.map +1 -0
  18. package/dist/RunCancelledError.d.ts +17 -0
  19. package/dist/RunCancelledError.js +25 -0
  20. package/dist/RunCancelledError.js.map +1 -0
  21. package/dist/Runner.d.ts +11 -1
  22. package/dist/Runner.js +9 -2
  23. package/dist/Runner.js.map +1 -1
  24. package/dist/RunnerSteps.js +648 -44
  25. package/dist/RunnerSteps.js.map +1 -1
  26. package/dist/RuntimeAdapterNode.d.ts +2 -1
  27. package/dist/RuntimeAdapterNode.js +2 -2
  28. package/dist/RuntimeAdapterNode.js.map +1 -1
  29. package/dist/RuntimeRegistry.d.ts +23 -2
  30. package/dist/RuntimeRegistry.js +31 -2
  31. package/dist/RuntimeRegistry.js.map +1 -1
  32. package/dist/SubworkflowNode.d.ts +181 -0
  33. package/dist/SubworkflowNode.js +479 -0
  34. package/dist/SubworkflowNode.js.map +1 -0
  35. package/dist/SwitchNode.d.ts +37 -0
  36. package/dist/SwitchNode.js +153 -0
  37. package/dist/SwitchNode.js.map +1 -0
  38. package/dist/TriggerBase.d.ts +178 -0
  39. package/dist/TriggerBase.js +1032 -5
  40. package/dist/TriggerBase.js.map +1 -1
  41. package/dist/TryCatchNode.d.ts +32 -0
  42. package/dist/TryCatchNode.js +207 -0
  43. package/dist/TryCatchNode.js.map +1 -0
  44. package/dist/WaitDispatchRequest.d.ts +38 -0
  45. package/dist/WaitDispatchRequest.js +13 -0
  46. package/dist/WaitDispatchRequest.js.map +1 -0
  47. package/dist/WaitNode.d.ts +23 -0
  48. package/dist/WaitNode.js +26 -0
  49. package/dist/WaitNode.js.map +1 -0
  50. package/dist/adapters/grpc/GrpcCodec.js +2 -2
  51. package/dist/adapters/grpc/GrpcRuntimeAdapter.d.ts +6 -4
  52. package/dist/adapters/grpc/GrpcRuntimeAdapter.js +6 -4
  53. package/dist/adapters/grpc/GrpcRuntimeAdapter.js.map +1 -1
  54. package/dist/adapters/grpc/types.d.ts +7 -5
  55. package/dist/adapters/grpc/types.js.map +1 -1
  56. package/dist/adapters/transport.d.ts +12 -41
  57. package/dist/adapters/transport.js +21 -70
  58. package/dist/adapters/transport.js.map +1 -1
  59. package/dist/cache/NodeResultCache.js +7 -0
  60. package/dist/cache/NodeResultCache.js.map +1 -1
  61. package/dist/concurrency/ConcurrencyBackend.d.ts +61 -0
  62. package/dist/concurrency/ConcurrencyBackend.js +20 -0
  63. package/dist/concurrency/ConcurrencyBackend.js.map +1 -0
  64. package/dist/concurrency/ConcurrencyLimitError.d.ts +37 -0
  65. package/dist/concurrency/ConcurrencyLimitError.js +16 -0
  66. package/dist/concurrency/ConcurrencyLimitError.js.map +1 -0
  67. package/dist/concurrency/NatsKvConcurrencyBackend.d.ts +64 -0
  68. package/dist/concurrency/NatsKvConcurrencyBackend.js +310 -0
  69. package/dist/concurrency/NatsKvConcurrencyBackend.js.map +1 -0
  70. package/dist/concurrency/QueueExpiredError.d.ts +40 -0
  71. package/dist/concurrency/QueueExpiredError.js +15 -0
  72. package/dist/concurrency/QueueExpiredError.js.map +1 -0
  73. package/dist/concurrency/RedisConcurrencyBackend.d.ts +64 -0
  74. package/dist/concurrency/RedisConcurrencyBackend.js +374 -0
  75. package/dist/concurrency/RedisConcurrencyBackend.js.map +1 -0
  76. package/dist/concurrency/createConcurrencyBackend.d.ts +24 -0
  77. package/dist/concurrency/createConcurrencyBackend.js +38 -0
  78. package/dist/concurrency/createConcurrencyBackend.js.map +1 -0
  79. package/dist/concurrency/readConcurrencyConfig.d.ts +60 -0
  80. package/dist/concurrency/readConcurrencyConfig.js +60 -0
  81. package/dist/concurrency/readConcurrencyConfig.js.map +1 -0
  82. package/dist/defineNode.d.ts +8 -0
  83. package/dist/defineNode.js +25 -5
  84. package/dist/defineNode.js.map +1 -1
  85. package/dist/graphql/GraphQLSchemaGenerator.js +1 -1
  86. package/dist/graphql/GraphQLSchemaGenerator.js.map +1 -1
  87. package/dist/idempotency/resolveIdempotencyKey.d.ts +20 -0
  88. package/dist/idempotency/resolveIdempotencyKey.js +37 -0
  89. package/dist/idempotency/resolveIdempotencyKey.js.map +1 -0
  90. package/dist/index.d.ts +30 -6
  91. package/dist/index.js +55 -6
  92. package/dist/index.js.map +1 -1
  93. package/dist/marketplace/RuntimeCatalog.d.ts +6 -0
  94. package/dist/marketplace/RuntimeCatalog.js.map +1 -1
  95. package/dist/marketplace/RuntimeDiscovery.d.ts +2 -2
  96. package/dist/marketplace/RuntimeDiscovery.js +18 -6
  97. package/dist/marketplace/RuntimeDiscovery.js.map +1 -1
  98. package/dist/monitoring/ConcurrencyMetrics.d.ts +82 -0
  99. package/dist/monitoring/ConcurrencyMetrics.js +139 -0
  100. package/dist/monitoring/ConcurrencyMetrics.js.map +1 -0
  101. package/dist/monitoring/ForEachWaitMetrics.d.ts +22 -0
  102. package/dist/monitoring/ForEachWaitMetrics.js +36 -0
  103. package/dist/monitoring/ForEachWaitMetrics.js.map +1 -0
  104. package/dist/monitoring/JanitorMetrics.d.ts +27 -0
  105. package/dist/monitoring/JanitorMetrics.js +48 -0
  106. package/dist/monitoring/JanitorMetrics.js.map +1 -0
  107. package/dist/openapi/OpenAPIGenerator.js +7 -2
  108. package/dist/openapi/OpenAPIGenerator.js.map +1 -1
  109. package/dist/runtime/PrimitiveStack.d.ts +64 -0
  110. package/dist/runtime/PrimitiveStack.js +92 -0
  111. package/dist/runtime/PrimitiveStack.js.map +1 -0
  112. package/dist/scheduling/DebounceBackend.d.ts +108 -0
  113. package/dist/scheduling/DebounceBackend.js +23 -0
  114. package/dist/scheduling/DebounceBackend.js.map +1 -0
  115. package/dist/scheduling/DebounceCoordinator.d.ts +141 -0
  116. package/dist/scheduling/DebounceCoordinator.js +362 -0
  117. package/dist/scheduling/DebounceCoordinator.js.map +1 -0
  118. package/dist/scheduling/DeferredDispatchSignal.d.ts +50 -0
  119. package/dist/scheduling/DeferredDispatchSignal.js +14 -0
  120. package/dist/scheduling/DeferredDispatchSignal.js.map +1 -0
  121. package/dist/scheduling/DeferredRunScheduler.d.ts +96 -0
  122. package/dist/scheduling/DeferredRunScheduler.js +256 -0
  123. package/dist/scheduling/DeferredRunScheduler.js.map +1 -0
  124. package/dist/scheduling/NatsKvDebounceBackend.d.ts +53 -0
  125. package/dist/scheduling/NatsKvDebounceBackend.js +334 -0
  126. package/dist/scheduling/NatsKvDebounceBackend.js.map +1 -0
  127. package/dist/scheduling/RedisDebounceBackend.d.ts +49 -0
  128. package/dist/scheduling/RedisDebounceBackend.js +356 -0
  129. package/dist/scheduling/RedisDebounceBackend.js.map +1 -0
  130. package/dist/scheduling/createDebounceBackend.d.ts +25 -0
  131. package/dist/scheduling/createDebounceBackend.js +39 -0
  132. package/dist/scheduling/createDebounceBackend.js.map +1 -0
  133. package/dist/scheduling/readSchedulingConfig.d.ts +24 -0
  134. package/dist/scheduling/readSchedulingConfig.js +52 -0
  135. package/dist/scheduling/readSchedulingConfig.js.map +1 -0
  136. package/dist/security/AuditLogger.js +1 -1
  137. package/dist/security/AuditLogger.js.map +1 -1
  138. package/dist/security/AuthMiddleware.d.ts +19 -20
  139. package/dist/security/AuthMiddleware.js +35 -20
  140. package/dist/security/AuthMiddleware.js.map +1 -1
  141. package/dist/security/OAuthProvider.js +2 -2
  142. package/dist/security/OAuthProvider.js.map +1 -1
  143. package/dist/security/SecretManager.js +14 -13
  144. package/dist/security/SecretManager.js.map +1 -1
  145. package/dist/security/index.d.ts +3 -1
  146. package/dist/security/index.js +3 -1
  147. package/dist/security/index.js.map +1 -1
  148. package/dist/testing/TestHarness.d.ts +27 -12
  149. package/dist/testing/TestHarness.js +19 -3
  150. package/dist/testing/TestHarness.js.map +1 -1
  151. package/dist/testing/WorkflowTestRunner.js +0 -7
  152. package/dist/testing/WorkflowTestRunner.js.map +1 -1
  153. package/dist/timeouts/StepTimeoutError.d.ts +22 -0
  154. package/dist/timeouts/StepTimeoutError.js +31 -0
  155. package/dist/timeouts/StepTimeoutError.js.map +1 -0
  156. package/dist/tracing/InMemoryRunStore.d.ts +41 -1
  157. package/dist/tracing/InMemoryRunStore.js +239 -0
  158. package/dist/tracing/InMemoryRunStore.js.map +1 -1
  159. package/dist/tracing/Janitor.d.ts +70 -0
  160. package/dist/tracing/Janitor.js +150 -0
  161. package/dist/tracing/Janitor.js.map +1 -0
  162. package/dist/tracing/PostgresRunStore.d.ts +57 -1
  163. package/dist/tracing/PostgresRunStore.js +711 -6
  164. package/dist/tracing/PostgresRunStore.js.map +1 -1
  165. package/dist/tracing/RoutingDiagnostics.d.ts +55 -0
  166. package/dist/tracing/RoutingDiagnostics.js +50 -0
  167. package/dist/tracing/RoutingDiagnostics.js.map +1 -0
  168. package/dist/tracing/RunStore.d.ts +181 -1
  169. package/dist/tracing/RunTracker.d.ts +244 -9
  170. package/dist/tracing/RunTracker.js +594 -1
  171. package/dist/tracing/RunTracker.js.map +1 -1
  172. package/dist/tracing/SqliteRunStore.d.ts +79 -2
  173. package/dist/tracing/SqliteRunStore.js +775 -16
  174. package/dist/tracing/SqliteRunStore.js.map +1 -1
  175. package/dist/tracing/TraceRouter.d.ts +20 -2
  176. package/dist/tracing/TraceRouter.js +612 -6
  177. package/dist/tracing/TraceRouter.js.map +1 -1
  178. package/dist/tracing/createStore.js +14 -3
  179. package/dist/tracing/createStore.js.map +1 -1
  180. package/dist/tracing/metadataFilter.d.ts +63 -0
  181. package/dist/tracing/metadataFilter.js +224 -0
  182. package/dist/tracing/metadataFilter.js.map +1 -0
  183. package/dist/tracing/sanitize.d.ts +11 -0
  184. package/dist/tracing/sanitize.js +29 -0
  185. package/dist/tracing/sanitize.js.map +1 -1
  186. package/dist/tracing/types.d.ts +672 -2
  187. package/dist/utils/createChildContext.d.ts +32 -0
  188. package/dist/utils/createChildContext.js +113 -0
  189. package/dist/utils/createChildContext.js.map +1 -0
  190. package/dist/utils/envAllowlist.d.ts +35 -0
  191. package/dist/utils/envAllowlist.js +113 -0
  192. package/dist/utils/envAllowlist.js.map +1 -0
  193. package/dist/version/RuntimeVersionValidator.d.ts +38 -0
  194. package/dist/version/RuntimeVersionValidator.js +121 -0
  195. package/dist/version/RuntimeVersionValidator.js.map +1 -0
  196. package/dist/visualization/WorkflowVisualizer.js +4 -4
  197. package/dist/visualization/WorkflowVisualizer.js.map +1 -1
  198. package/dist/workflow/PersistenceHelper.d.ts +18 -10
  199. package/dist/workflow/PersistenceHelper.js +35 -9
  200. package/dist/workflow/PersistenceHelper.js.map +1 -1
  201. package/dist/workflow/WorkflowNormalizer.d.ts +48 -42
  202. package/dist/workflow/WorkflowNormalizer.js +650 -18
  203. package/dist/workflow/WorkflowNormalizer.js.map +1 -1
  204. package/dist/workflow/WorkflowRegistry.d.ts +186 -0
  205. package/dist/workflow/WorkflowRegistry.js +202 -0
  206. package/dist/workflow/WorkflowRegistry.js.map +1 -0
  207. package/dist/workflow/sampleBody.d.ts +54 -0
  208. package/dist/workflow/sampleBody.js +320 -0
  209. package/dist/workflow/sampleBody.js.map +1 -0
  210. package/package.json +3 -8
  211. package/dist/adapters/HttpRuntimeAdapter.d.ts +0 -79
  212. package/dist/adapters/HttpRuntimeAdapter.js +0 -233
  213. package/dist/adapters/HttpRuntimeAdapter.js.map +0 -1
@@ -3,15 +3,66 @@ import { metrics } from "@opentelemetry/api";
3
3
  import { v4 as uuid } from "uuid";
4
4
  import Configuration from "./Configuration";
5
5
  import DefaultLogger from "./DefaultLogger";
6
+ import { RunCancelledError } from "./RunCancelledError";
6
7
  import Runner from "./Runner";
8
+ import { WaitDispatchRequest } from "./WaitDispatchRequest";
9
+ import { ConcurrencyLimitError } from "./concurrency/ConcurrencyLimitError";
10
+ import { QueueExpiredError } from "./concurrency/QueueExpiredError";
11
+ import { readConcurrencyConfig } from "./concurrency/readConcurrencyConfig";
7
12
  import { HotReloadManager } from "./hmr/HotReloadManager";
13
+ import { resolveIdempotencyKey } from "./idempotency/resolveIdempotencyKey";
8
14
  import { CircuitBreaker } from "./monitoring/CircuitBreaker";
15
+ import { ConcurrencyMetrics } from "./monitoring/ConcurrencyMetrics";
9
16
  import { HealthCheck } from "./monitoring/HealthCheck";
10
17
  import { PrometheusMetricsBridge } from "./monitoring/PrometheusMetricsBridge";
11
18
  import { RateLimiter } from "./monitoring/RateLimiter";
12
19
  import { TriggerMetricsCollector } from "./monitoring/TriggerMetricsCollector";
20
+ import { DebounceCoordinator } from "./scheduling/DebounceCoordinator";
21
+ import { DeferredDispatchSignal } from "./scheduling/DeferredDispatchSignal";
22
+ import { DeferredRunScheduler } from "./scheduling/DeferredRunScheduler";
23
+ import { readSchedulingConfig } from "./scheduling/readSchedulingConfig";
13
24
  import { RunTracker } from "./tracing/RunTracker";
14
25
  import { TracingLogger } from "./tracing/TracingLogger";
26
+ import { getEnvForCtx } from "./utils/envAllowlist";
27
+ import { WorkflowRegistry } from "./workflow/WorkflowRegistry";
28
+ /**
29
+ * Sample-body recording (option C follow-up to #100). Returns true
30
+ * when ANY of the workflow's triggers has `recordSample: true`.
31
+ * Currently the field only exists on the HTTP trigger schema, but the
32
+ * helper walks every key so a future addition (worker / pubsub /
33
+ * webhook) can opt in without changing this code.
34
+ */
35
+ /**
36
+ * G2 — pull a single string value from an incoming request's headers
37
+ * map. Handles the three shapes Node / Hono / Express can produce:
38
+ * `undefined`, a literal string, or an array (first element wins).
39
+ * Case-folded lookup (Hono normalises header names; Express/Node
40
+ * sometimes preserve case).
41
+ */
42
+ function pickHeader(headers, name) {
43
+ const lower = name.toLowerCase();
44
+ for (const key of Object.keys(headers)) {
45
+ if (key.toLowerCase() !== lower)
46
+ continue;
47
+ const value = headers[key];
48
+ if (Array.isArray(value))
49
+ return value[0];
50
+ if (typeof value === "string")
51
+ return value;
52
+ }
53
+ return undefined;
54
+ }
55
+ function shouldRecordSample(trigger) {
56
+ if (!trigger || typeof trigger !== "object")
57
+ return false;
58
+ for (const value of Object.values(trigger)) {
59
+ if (!value || typeof value !== "object")
60
+ continue;
61
+ if (value.recordSample === true)
62
+ return true;
63
+ }
64
+ return false;
65
+ }
15
66
  export default class TriggerBase extends Trigger {
16
67
  configuration;
17
68
  /** Health check instance for this trigger */
@@ -45,6 +96,318 @@ export default class TriggerBase extends Trigger {
45
96
  getRunner() {
46
97
  return new Runner(this.configuration.steps);
47
98
  }
99
+ /**
100
+ * Tier 2 #5+#7 follow-up — durable scheduler hook.
101
+ *
102
+ * When a trigger supports re-firing deferred dispatches across process
103
+ * restarts, it overrides this method to extract a JSON-serializable
104
+ * subset of `ctx` sufficient for `restoreDispatch(payload)` (defined
105
+ * by the trigger) to reconstruct an equivalent ctx and re-enter
106
+ * `dispatchDeferred`.
107
+ *
108
+ * Returns `null` (default) when the trigger does NOT support
109
+ * cross-restart durability — the scheduler then runs purely in-memory
110
+ * for that trigger (existing pre-follow-up behaviour).
111
+ *
112
+ * Override in `HttpTrigger` to return `{method, path, headers, body,
113
+ * params, query, workflowPath}` (with sensitive header keys stripped).
114
+ * Worker triggers don't override — broker handles delay durability.
115
+ */
116
+ extractDispatchPayload(_ctx) {
117
+ return null;
118
+ }
119
+ /**
120
+ * Returns the trigger type string used to tag persisted scheduled
121
+ * dispatch rows (`scheduled_dispatches.trigger_type`). Mirrors the
122
+ * convention from `tracker.startRun({triggerType})`. Override when
123
+ * the class name doesn't naturally produce the right tag.
124
+ */
125
+ getTriggerType() {
126
+ return this.constructor.name.replace("Trigger", "").toLowerCase() || "unknown";
127
+ }
128
+ /**
129
+ * v0.6 — apply the merged middleware chain (process-global → workflow-level
130
+ * → trigger-level) to `ctx` before the main workflow body runs.
131
+ *
132
+ * Trigger-level names are read from
133
+ * `this.configuration.trigger[<this.getTriggerType()>].middleware`, so
134
+ * HttpTrigger reads `trigger.http.middleware`, WorkerTrigger reads
135
+ * `trigger.worker.middleware`, CronTrigger reads `trigger.cron.middleware`.
136
+ *
137
+ * Pre-v0.6 the merge code lived inline in `HttpTrigger.run` and worker
138
+ * + cron triggers silently skipped middleware. Centralising it on
139
+ * TriggerBase gives all three trigger families uniform semantics.
140
+ *
141
+ * Resolution order outer→inner:
142
+ * process-global → workflow-level → trigger-level → main workflow body.
143
+ *
144
+ * State mutations from earlier middleware (e.g. `ctx.state.identity`
145
+ * from auth-check) carry forward to later middleware and the main
146
+ * workflow because they share the same ctx. Middleware authors
147
+ * short-circuit via `@blokjs/throw` — the throw propagates to the
148
+ * caller's outer catch, and the main workflow does NOT run.
149
+ */
150
+ async applyMiddlewareChain(ctx, nodeMap) {
151
+ const triggerType = this.getTriggerType();
152
+ const triggerCfg = this.configuration.trigger?.[triggerType];
153
+ const triggerLevel = Array.isArray(triggerCfg?.middleware)
154
+ ? triggerCfg.middleware.filter((n) => typeof n === "string" && n.length > 0)
155
+ : [];
156
+ const workflowLevel = this.configuration.appliedMiddleware ?? [];
157
+ const globalLevel = WorkflowRegistry.getInstance().getGlobalMiddleware();
158
+ const middlewareNames = [...globalLevel, ...workflowLevel, ...triggerLevel];
159
+ if (middlewareNames.length > 0) {
160
+ await this.runMiddlewareChain(ctx, middlewareNames, nodeMap);
161
+ }
162
+ }
163
+ /**
164
+ * v0.6 — dispatch a chain of middleware workflows on the same parent
165
+ * ctx. Each entry in `names` is the `name:` of a workflow registered
166
+ * with `middleware: true`. For each:
167
+ *
168
+ * - Materialise a fresh `Configuration` for the middleware (resolves
169
+ * its inner steps + nodes against `nodeMap` so `@blokjs/throw`
170
+ * etc. resolve from `@blokjs/helpers`).
171
+ * - Save the parent ctx.config; swap in the middleware's resolved
172
+ * `mwConfig.nodes` so the blueprint mapper finds the middleware's
173
+ * step inputs.
174
+ * - Run via `new Runner(...).run(ctx, { deep: true })` — `deep: true`
175
+ * prevents the inner runSteps from inheriting the outer run's
176
+ * `lastCompletedStepIndex` cursor (PR 4 wait/resume hazard).
177
+ * - Restore parent ctx.config in `finally`.
178
+ *
179
+ * Missing middleware (name not registered) is a configuration error
180
+ * — throws a clear message naming the unknown middleware. Authors
181
+ * typically use `@blokjs/throw` inside middleware with a `code:` to
182
+ * produce structured HTTP responses (e.g. 401) — those throws
183
+ * propagate to the outer catch in the calling trigger.
184
+ *
185
+ * Pre-v0.6 this lived as a private method on `HttpTrigger`. Lifted
186
+ * here so worker + cron triggers can reuse it without duplication.
187
+ */
188
+ async runMiddlewareChain(ctx, names, nodeMap) {
189
+ const registry = WorkflowRegistry.getInstance();
190
+ for (const mwName of names) {
191
+ const entry = registry.getMiddleware(mwName);
192
+ if (!entry) {
193
+ const known = registry
194
+ .list()
195
+ .filter((e) => e.isMiddleware)
196
+ .map((e) => e.name);
197
+ const knownStr = known.length > 0 ? known.join(", ") : "(none registered)";
198
+ throw new Error(`[blok] middleware "${mwName}" not found in WorkflowRegistry. Available middleware: ${knownStr}. Make sure the middleware workflow has \`"middleware": true\` set at the workflow root and is in a scanned WORKFLOWS_PATH directory.`);
199
+ }
200
+ const mwConfig = new Configuration();
201
+ await mwConfig.init(mwName, nodeMap, entry.workflow);
202
+ const parentConfig = ctx.config;
203
+ ctx.config = mwConfig.nodes;
204
+ // Sentinel so RunnerSteps can tag every NodeRun emitted during
205
+ // this middleware's execution with `middleware: mwName`. Studio
206
+ // reads that field to render a `mw:<name>` badge on the inner
207
+ // step rows so operators can see which middleware in the chain
208
+ // produced each nested step.
209
+ ctx._blokMiddlewareName = mwName;
210
+ try {
211
+ const mwRunner = new Runner(mwConfig.steps);
212
+ await mwRunner.run(ctx, { deep: true, stepName: `mw:${mwName}` });
213
+ }
214
+ finally {
215
+ ctx.config = parentConfig;
216
+ ctx._blokMiddlewareName = undefined;
217
+ }
218
+ }
219
+ }
220
+ // --- Crash auto-flip (Tier 2 quick-wins follow-up) ---
221
+ /** Flag — set true after `installCrashHandlers` has run once in this process. */
222
+ static crashHandlersInstalled = false;
223
+ /**
224
+ * Tier 2 quick-wins follow-up — install process-level handlers for
225
+ * `uncaughtException` and `unhandledRejection`. When fired, flip
226
+ * every in-flight `running` run to `"crashed"` (with the captured
227
+ * error) BEFORE re-throwing / letting Node's default behavior take
228
+ * over. Idempotent — safe to call from every trigger's `listen()`;
229
+ * only the first call installs handlers.
230
+ *
231
+ * Kill-switch: `BLOK_CRASH_AUTOFLIP_DISABLED=1`.
232
+ *
233
+ * Why sync: `process.on("uncaughtException")` handlers can't await.
234
+ * `markAllRunningRunsAsCrashed` is sync (sqlite + in-memory writes
235
+ * complete before the handler returns).
236
+ */
237
+ static installCrashHandlers(logger) {
238
+ if (TriggerBase.crashHandlersInstalled)
239
+ return;
240
+ if (process.env.BLOK_CRASH_AUTOFLIP_DISABLED === "1")
241
+ return;
242
+ TriggerBase.crashHandlersInstalled = true;
243
+ const onUncaught = (err) => {
244
+ try {
245
+ const flipped = RunTracker.getInstance().markAllRunningRunsAsCrashed(err);
246
+ logger?.error?.(`[blok][crash-autoflip] uncaughtException — flipped ${flipped} running run(s) to crashed: ${err.stack || err.message}`);
247
+ }
248
+ catch (markErr) {
249
+ // Last-ditch — at least log so the operator knows the autoflip itself failed.
250
+ console.error("[blok][crash-autoflip] markAllRunningRunsAsCrashed failed:", markErr);
251
+ }
252
+ // Re-emit / let the runtime crash as expected — we don't want to
253
+ // silently swallow uncaught errors. Without this, Node would
254
+ // continue running with the handler attached but operators
255
+ // expect the process to die on uncaught exceptions.
256
+ throw err;
257
+ };
258
+ const onRejection = (reason) => {
259
+ const err = reason instanceof Error ? reason : new Error(String(reason));
260
+ try {
261
+ const flipped = RunTracker.getInstance().markAllRunningRunsAsCrashed(err);
262
+ logger?.error?.(`[blok][crash-autoflip] unhandledRejection — flipped ${flipped} running run(s) to crashed: ${err.stack || err.message}`);
263
+ }
264
+ catch (markErr) {
265
+ console.error("[blok][crash-autoflip] markAllRunningRunsAsCrashed failed:", markErr);
266
+ }
267
+ // Don't re-throw — unhandledRejection is a warning, not a crash.
268
+ // Node's default behavior (warn + continue) still applies because
269
+ // our handler is additive, not replacing the default.
270
+ };
271
+ process.on("uncaughtException", onUncaught);
272
+ process.on("unhandledRejection", onRejection);
273
+ }
274
+ /** Test-only — reset the install flag so tests can re-install handlers. */
275
+ static resetCrashHandlersInstalled() {
276
+ TriggerBase.crashHandlersInstalled = false;
277
+ }
278
+ // --- Graceful shutdown (Tier 2 follow-up) ---
279
+ /** Flag — set true after `installShutdownHandlers` has run once in this process. */
280
+ static shutdownHandlersInstalled = false;
281
+ /**
282
+ * Install SIGTERM + SIGINT handlers that drain process resources
283
+ * cleanly before exit. Mirrors the `installCrashHandlers` pattern —
284
+ * idempotent + opt-out via `BLOK_GRACEFUL_SHUTDOWN_DISABLED=1`.
285
+ *
286
+ * Drain order:
287
+ * 1. Stop accepting new work — calls `trigger.stop()` if available
288
+ * (HttpTrigger drains in-flight requests + closes the server).
289
+ * 2. Stop the periodic janitor sweep so it doesn't fire mid-drain.
290
+ * 3. Cancel pending deferred dispatches in the in-memory scheduler.
291
+ * (Persisted rows in `scheduled_dispatches` survive — the next
292
+ * boot recovers them.)
293
+ * 4. Disconnect the cross-process concurrency backend (NATS KV)
294
+ * so locks held by this process release on the broker side.
295
+ * 5. `process.exit(0)`.
296
+ *
297
+ * Errors during drain are caught + logged; the process still exits
298
+ * (cleanup is best-effort; the operator wants a clean exit).
299
+ *
300
+ * Why this is a `static` method: shutdown handlers must be installed
301
+ * once per process, regardless of how many trigger subclasses
302
+ * coexist. Subclasses pass `this` so the handler can call their
303
+ * specific `stop()`.
304
+ */
305
+ static installShutdownHandlers(trigger, logger) {
306
+ if (TriggerBase.shutdownHandlersInstalled)
307
+ return;
308
+ if (process.env.BLOK_GRACEFUL_SHUTDOWN_DISABLED === "1")
309
+ return;
310
+ TriggerBase.shutdownHandlersInstalled = true;
311
+ const onSignal = async (signal) => {
312
+ logger?.log?.(`[blok][shutdown] received ${signal} — draining...`);
313
+ try {
314
+ // 1. Stop the trigger (drain in-flight, close server).
315
+ const stoppable = trigger;
316
+ if (typeof stoppable.stop === "function") {
317
+ await stoppable.stop();
318
+ }
319
+ // 2. Stop the janitor.
320
+ try {
321
+ const { Janitor } = await import("./tracing/Janitor");
322
+ const janitor = Janitor.instance;
323
+ if (janitor)
324
+ janitor.stop();
325
+ }
326
+ catch {
327
+ // Janitor may not have been imported yet.
328
+ }
329
+ // 3. Clear pending deferred dispatches (in-memory only —
330
+ // persisted rows survive for next-boot recovery).
331
+ try {
332
+ DeferredRunScheduler.getInstance().clear();
333
+ }
334
+ catch {
335
+ // Best-effort.
336
+ }
337
+ // 4. Disconnect cross-process concurrency backend.
338
+ //
339
+ // PR 3 D5 — wrap disconnect() in a Promise.race timeout so a
340
+ // slow NATS drain doesn't hang past the SIGTERM-to-SIGKILL
341
+ // window. Default 10s; configurable via
342
+ // BLOK_BACKEND_DISCONNECT_TIMEOUT_MS. Timer is .unref()'d so
343
+ // it doesn't keep the event loop alive after a successful
344
+ // disconnect.
345
+ const backend = RunTracker.getInstance().getConcurrencyBackend();
346
+ if (backend) {
347
+ const disconnectTimeoutMs = (() => {
348
+ const raw = process.env.BLOK_BACKEND_DISCONNECT_TIMEOUT_MS;
349
+ if (!raw || !/^\d+$/.test(raw))
350
+ return 10_000;
351
+ return Number(raw);
352
+ })();
353
+ try {
354
+ await Promise.race([
355
+ backend.disconnect(),
356
+ new Promise((_, reject) => {
357
+ const t = setTimeout(() => reject(new Error(`backend.disconnect() timed out after ${disconnectTimeoutMs}ms`)), disconnectTimeoutMs);
358
+ t.unref?.();
359
+ }),
360
+ ]);
361
+ }
362
+ catch (err) {
363
+ logger?.error?.(`[blok][shutdown] backend disconnect failed (or timed out): ${err instanceof Error ? err.message : String(err)}`);
364
+ }
365
+ }
366
+ logger?.log?.("[blok][shutdown] graceful shutdown complete");
367
+ }
368
+ catch (err) {
369
+ logger?.error?.(`[blok][shutdown] drain error: ${err instanceof Error ? err.message : String(err)}`);
370
+ }
371
+ finally {
372
+ process.exit(0);
373
+ }
374
+ };
375
+ process.on("SIGTERM", onSignal);
376
+ process.on("SIGINT", onSignal);
377
+ }
378
+ /** Test-only — reset the install flag so tests can re-install handlers. */
379
+ static resetShutdownHandlersInstalled() {
380
+ TriggerBase.shutdownHandlersInstalled = false;
381
+ }
382
+ /**
383
+ * Tier 2 quick-wins follow-up — boot recovery for orphaned `running`
384
+ * runs. Scans the store for runs in `running` status whose
385
+ * `startedAt` is older than `thresholdMs` ago (default 2 minutes,
386
+ * override via `BLOK_ORPHAN_THRESHOLD_MS` env var). Flips each to
387
+ * `"crashed"` with `Error("Orphaned — process restarted before run completed")`.
388
+ *
389
+ * Catches the case where the previous process died via SIGKILL or
390
+ * OOM and the `installCrashHandlers` path never ran. Returns the
391
+ * count flipped for observability + tests.
392
+ *
393
+ * Idempotent — safe to call multiple times; runs are flipped to
394
+ * a terminal status so a second pass finds none.
395
+ */
396
+ static recoverOrphanedRuns(thresholdMs, logger) {
397
+ if (process.env.BLOK_CRASH_AUTOFLIP_DISABLED === "1")
398
+ return 0;
399
+ const envThreshold = process.env.BLOK_ORPHAN_THRESHOLD_MS;
400
+ const threshold = thresholdMs ?? (envThreshold && /^\d+$/.test(envThreshold) ? Number(envThreshold) : 2 * 60 * 1000);
401
+ const tracker = RunTracker.getInstance();
402
+ if (!tracker.active)
403
+ return 0;
404
+ const cutoff = Date.now() - threshold;
405
+ const flipped = tracker.markAllRunningRunsAsCrashed(new Error("Orphaned — process restarted before run completed"), { maxStartedAt: cutoff });
406
+ if (flipped > 0) {
407
+ logger?.log?.(`[blok][crash-autoflip] boot recovery — flipped ${flipped} orphaned run(s) older than ${threshold}ms to crashed`);
408
+ }
409
+ return flipped;
410
+ }
48
411
  // --- Hot Module Replacement ---
49
412
  /**
50
413
  * Enable hot reload for this trigger. Only active in development
@@ -161,25 +524,347 @@ export default class TriggerBase extends Trigger {
161
524
  this.inFlightRequests++;
162
525
  const runStart = performance.now();
163
526
  let runSuccess = true;
527
+ // Tier 2 #6 — concurrency lock claim, populated when the gate grants
528
+ // a slot. Released in the `finally` block. Null when the workflow has
529
+ // no concurrency gate or the gate failed open (key resolution).
530
+ let acquiredLock = null;
164
531
  // --- Trace: start run ---
532
+ // Tier 2 #5 + #7 · skip startRun on re-entry from a deferred timer.
533
+ // The deferred dispatcher (DeferredRunScheduler / DebounceCoordinator)
534
+ // re-enters `run(ctx)` with `_blokDispatchReentry = true` after the
535
+ // wait window closes; the existing run record is reused via
536
+ // `ctx._traceRunId`.
165
537
  const tracker = RunTracker.getInstance();
166
538
  let traceRunId;
167
- if (tracker.active) {
539
+ const ctxRecord = ctx;
540
+ const isReentryAtTrace = ctxRecord._blokDispatchReentry === true;
541
+ if (tracker.active && isReentryAtTrace) {
542
+ traceRunId = ctxRecord._traceRunId;
543
+ // Logger wrapping was already applied on the first pass — no
544
+ // need to re-wrap (and re-wrapping would double-route logs).
545
+ // PR 1 follow-up · A2 fix. The first-pass `finally` block
546
+ // unregisters the AbortController via `tracker.unregisterAbortController`.
547
+ // Without re-registering on re-entry, `tracker.abortRunningRun(runId)`
548
+ // can't fire the controller — the controller stays on
549
+ // `ctx._PRIVATE_.abortController` but the tracker's lookup
550
+ // returns undefined. Operator cancel of a `running` run that
551
+ // came from delayed/queued/debounced flips status to "cancelled"
552
+ // but the in-flight step never sees `ctx.signal.aborted`.
553
+ // Re-register here mirroring the first-pass branch below.
554
+ if (traceRunId) {
555
+ const privateSlot = ctx._PRIVATE_;
556
+ if (privateSlot?.abortController) {
557
+ tracker.registerAbortController(traceRunId, privateSlot.abortController);
558
+ }
559
+ // v0.6 prerequisite for wait-inside-primitives Phase 2 —
560
+ // rehydrate `ctx.state` from the persisted snapshot the
561
+ // runner took at the wait throw site. Two re-entry paths
562
+ // converge here:
563
+ // 1. In-process timer fire — same `ctx`, state already
564
+ // populated. Rehydrate is a no-op (the parsed
565
+ // snapshot equals current state); we still apply it
566
+ // for uniformity and to forgive any micro-drift
567
+ // between snapshot and current state if a malicious
568
+ // caller re-enters with a tampered ctx.
569
+ // 2. Cross-process recovery (`recoverDispatches` →
570
+ // `restoreDispatch` → `dispatchDeferred` with a
571
+ // fresh ctx). Without rehydrate, state is empty and
572
+ // forEach iteration index / loop accumulator / saga
573
+ // progress are all lost.
574
+ //
575
+ // Mutates `ctx.state` IN PLACE rather than reassigning so
576
+ // the `vars: state` alias set up in `createContext` keeps
577
+ // pointing at the same object. Authors writing
578
+ // `ctx.vars[k] = v` continue to mutate the canonical
579
+ // store; otherwise we'd silently fork the two views.
580
+ const persistedRun = tracker.getStore().getRun(traceRunId);
581
+ if (persistedRun?.stateSnapshot) {
582
+ try {
583
+ const parsed = JSON.parse(persistedRun.stateSnapshot);
584
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
585
+ const stateObj = ctx.state;
586
+ for (const k of Object.keys(stateObj)) {
587
+ delete stateObj[k];
588
+ }
589
+ Object.assign(stateObj, parsed);
590
+ }
591
+ }
592
+ catch (err) {
593
+ const msg = err instanceof Error ? err.message : String(err);
594
+ ctx.logger.logLevel("warn", `[blok][wait] failed to rehydrate ctx.state from snapshot: ${msg}. Proceeding with the ctx the runner was given.`);
595
+ }
596
+ }
597
+ // v0.6 wait-inside-primitives — rehydrate every NodeRun's
598
+ // `iteration_context` into a Map keyed by step NAME
599
+ // (not NodeRun id) so each primitive's run() can look
600
+ // itself up across defer/resume cycles. NodeRun ids
601
+ // CHANGE on every dispatchDeferred re-entry (tracker
602
+ // creates a fresh NodeRun per pass), but step names are
603
+ // stable across the run. Phase 4 — multiple primitives'
604
+ // cursors coexist (e.g. forEach > forEach > wait), one
605
+ // entry per primitive name. When two NodeRuns share a
606
+ // name (the rare case of two siblings at the same depth
607
+ // with the same step id), the LATEST wins via the
608
+ // startedAt-with-insertion-order tiebreak.
609
+ try {
610
+ const nodeRuns = tracker.getStore().getNodeRuns(traceRunId);
611
+ const sortedDesc = nodeRuns
612
+ .map((n, idx) => ({ n, idx }))
613
+ .filter(({ n }) => n.iterationContext !== undefined)
614
+ .sort((a, b) => {
615
+ const dt = b.n.startedAt - a.n.startedAt;
616
+ return dt !== 0 ? dt : b.idx - a.idx;
617
+ });
618
+ const cursorMap = new Map();
619
+ for (const { n } of sortedDesc) {
620
+ if (n.iterationContext === undefined)
621
+ continue;
622
+ // First write per name wins because sortedDesc is
623
+ // latest-first; this gives each primitive its most
624
+ // recent cursor.
625
+ if (!cursorMap.has(n.nodeName)) {
626
+ cursorMap.set(n.nodeName, n.iterationContext);
627
+ }
628
+ }
629
+ if (cursorMap.size > 0) {
630
+ ctx._blokIterationCursors = cursorMap;
631
+ }
632
+ // Back-compat `_blokIterationResume` (single-slot)
633
+ // keeps legacy callers working. Populated from the
634
+ // most recent cursor across the run.
635
+ const top = sortedDesc[0]?.n.iterationContext;
636
+ if (top) {
637
+ ctx._blokIterationResume = top;
638
+ }
639
+ }
640
+ catch (err) {
641
+ const msg = err instanceof Error ? err.message : String(err);
642
+ ctx.logger.logLevel("warn", `[blok][wait] failed to rehydrate iteration_context: ${msg}. primitives will resume from iteration 0.`);
643
+ }
644
+ }
645
+ }
646
+ else if (tracker.active) {
168
647
  const runner = this.getRunner();
169
648
  const stepCount = runner.getStepCount?.() ?? this.configuration.steps?.length ?? 0;
649
+ // Tier 1 · replay lineage. The replay endpoint
650
+ // (TraceRouter.POST /__blok/runs/:id/replay) sets
651
+ // `X-Blok-Replay-Of: <originalRunId>` on the dispatched HTTP
652
+ // request. Read it here so the new run carries `replayOf` and
653
+ // Studio can render a "Replay of #..." breadcrumb.
654
+ const reqHeaders = (ctx.request?.headers ?? {});
655
+ const replayOfHeader = reqHeaders["x-blok-replay-of"] ?? reqHeaders["X-Blok-Replay-Of"];
656
+ const replayOf = Array.isArray(replayOfHeader)
657
+ ? replayOfHeader[0]
658
+ : typeof replayOfHeader === "string"
659
+ ? replayOfHeader
660
+ : undefined;
661
+ // G2 (v0.6) · sub-workflow lineage across the HTTP boundary.
662
+ // `SubworkflowNode.dispatchHttpSelf` sets these headers on the
663
+ // outbound self-call so the receiver's run record carries the
664
+ // parent ids. Without this, an http-self child would appear
665
+ // as a fresh top-level run with no Studio breadcrumb.
666
+ const parentRunId = pickHeader(reqHeaders, "x-blok-parent-run-id");
667
+ const parentNodeRunId = pickHeader(reqHeaders, "x-blok-parent-node-run-id");
170
668
  const run = tracker.startRun({
171
669
  workflowName: this.configuration.name || ctx.workflow_name || "unknown",
172
670
  workflowPath: ctx.workflow_path || "",
173
671
  triggerType: this.constructor.name.replace("Trigger", "").toLowerCase() || "unknown",
174
672
  triggerSummary: this.buildTraceTriggerSummary(ctx),
175
673
  nodeCount: stepCount,
674
+ replayOf,
675
+ parentRunId,
676
+ parentNodeRunId,
176
677
  });
177
678
  traceRunId = run.id;
178
- ctx._traceRunId = run.id;
679
+ ctxRecord._traceRunId = run.id;
680
+ // Carry the sub-workflow depth across the HTTP hop so the
681
+ // recursion guard in nested children still fires.
682
+ const depthHeader = pickHeader(reqHeaders, "x-blok-subworkflow-depth");
683
+ const parsedDepth = depthHeader ? Number.parseInt(depthHeader, 10) : Number.NaN;
684
+ if (Number.isFinite(parsedDepth) && parsedDepth > 0) {
685
+ ctxRecord._subworkflowDepth = parsedDepth;
686
+ }
687
+ // Tier 2 follow-up · register the ctx's AbortController so the
688
+ // cancel API can fire it for `running` runs. Stashed on
689
+ // _PRIVATE_ by createContext; lookup via the optional shape.
690
+ const privateSlot = ctx._PRIVATE_;
691
+ if (privateSlot?.abortController) {
692
+ tracker.registerAbortController(run.id, privateSlot.abortController);
693
+ }
179
694
  // Wrap logger to forward log entries to RunTracker
180
695
  ctx.logger = new TracingLogger(ctx.logger, run.id, tracker);
181
696
  }
182
697
  try {
698
+ // --- Scheduling gates (Tier 2 #5 + #7) ---
699
+ // Run BEFORE the concurrency gate. Order: debounce → delay.
700
+ // Each gate may throw `DeferredDispatchSignal` to short-circuit
701
+ // the immediate dispatch path; the transport layer (HTTP/Worker)
702
+ // catches it and translates to 202 Accepted / NACK.
703
+ //
704
+ // Skipped on re-entry from a deferred timer (the timer callback
705
+ // sets `_blokDispatchReentry = true` on ctx) so we don't loop.
706
+ // Also skipped when:
707
+ // - tracker inactive (deferred dispatch needs persistence to
708
+ // survive even within the process lifetime)
709
+ // - `BLOK_SCHEDULING_DISABLED=1` (kill-switch).
710
+ const isReentry = ctx._blokDispatchReentry === true;
711
+ if (!isReentry && traceRunId && process.env.BLOK_SCHEDULING_DISABLED !== "1") {
712
+ const schedCfg = readSchedulingConfig(this.configuration.trigger);
713
+ if (schedCfg) {
714
+ const signal = await this.maybeDeferRun(ctx, traceRunId, schedCfg);
715
+ if (signal)
716
+ throw signal;
717
+ }
718
+ }
719
+ // --- Concurrency gate (Tier 2 #6) ---
720
+ // Runs after `tracker.startRun` so denied attempts appear in
721
+ // Studio with status "throttled". Skipped when:
722
+ // - tracker is inactive (lock store IS the run store)
723
+ // - the trigger config has no `concurrencyKey`
724
+ // - the resolved key is null/undefined (fail-open, matches
725
+ // idempotency-cache semantics)
726
+ // - `BLOK_CONCURRENCY_DISABLED=1` (kill-switch).
727
+ if (traceRunId && process.env.BLOK_CONCURRENCY_DISABLED !== "1") {
728
+ const concCfg = readConcurrencyConfig(this.configuration.trigger);
729
+ if (concCfg) {
730
+ const resolvedKey = resolveIdempotencyKey(concCfg.keyExpression, ctx);
731
+ if (resolvedKey !== null) {
732
+ const workflowName = this.configuration.name || ctx.workflow_name || "unknown";
733
+ const now = Date.now();
734
+ const result = await tracker.acquireConcurrencySlot(workflowName, resolvedKey, concCfg.limit, traceRunId, now + concCfg.leaseMs);
735
+ if (!result.acquired) {
736
+ // Tier 2 #6 follow-up — when the trigger is configured with
737
+ // `onLimit: "queue"`, defer the run via the in-process scheduler
738
+ // (Tier 2 #5+#7 plumbing) and re-attempt acquisition after a 1s
739
+ // delay instead of throwing. HTTP gets 202 + Location, Worker
740
+ // ACKs without retry. Re-defer happens transparently when the
741
+ // timer fires and the gate denies again.
742
+ if (concCfg.onLimit === "queue") {
743
+ // PR 5 B2 — TTL on queued runs. Compute on
744
+ // the first queue attempt and persist on the
745
+ // run record so re-defer attempts can check
746
+ // it. The existing `expiresAt` field on
747
+ // WorkflowRun is reused.
748
+ const existingRun = tracker.getStore().getRun(traceRunId);
749
+ const queueExpiresAt = existingRun?.expiresAt !== undefined
750
+ ? existingRun.expiresAt
751
+ : concCfg.queueTimeoutMs !== undefined
752
+ ? now + concCfg.queueTimeoutMs
753
+ : undefined;
754
+ if (queueExpiresAt !== undefined && now > queueExpiresAt) {
755
+ // TTL elapsed — flip to expired, no further re-defer.
756
+ tracker.markRunExpired(traceRunId, {
757
+ expiresAt: queueExpiresAt,
758
+ expiredAt: now,
759
+ });
760
+ ConcurrencyMetrics.getInstance().recordDenied({
761
+ workflow_name: workflowName,
762
+ concurrency_key: resolvedKey,
763
+ mode: "queue",
764
+ });
765
+ // PR 1-5 polish · throw a dedicated error so the HTTP
766
+ // transport returns 410 Gone instead of 429 Retry-After.
767
+ // Conflating queue-expired (permanently dead — the timer
768
+ // won't re-fire) with throttled (transient resource
769
+ // pressure) misleads clients into retrying. Status was
770
+ // already flipped to `expired` above, so the run record
771
+ // reflects reality regardless of the transport choice.
772
+ throw new QueueExpiredError({
773
+ workflowName,
774
+ concurrencyKey: resolvedKey,
775
+ queueExpiredAt: queueExpiresAt,
776
+ runId: traceRunId,
777
+ });
778
+ }
779
+ // PR 5 B3 — capped exponential backoff for re-defer.
780
+ // Track attempt count via existing pingCount field on the run record.
781
+ //
782
+ // Review fix-up · CONCERN-4. Clamp the exponent before
783
+ // `factor ** attempt`. Math.min would clamp the result
784
+ // (saving us from Infinity), but `factor ** 1024` is
785
+ // expensive and wasteful; clamping the exponent at
786
+ // `MAX_BACKOFF_EXPONENT` keeps the math cheap regardless
787
+ // of how many times a queue re-defers.
788
+ const MAX_BACKOFF_EXPONENT = 30;
789
+ const attempt = existingRun?.pingCount ?? 0;
790
+ const minBackoff = concCfg.queueRetry?.minBackoffMs ?? 1000;
791
+ const maxBackoff = concCfg.queueRetry?.maxBackoffMs ?? 30_000;
792
+ const factor = concCfg.queueRetry?.factor ?? 2;
793
+ const safeExponent = Math.min(attempt, MAX_BACKOFF_EXPONENT);
794
+ const retryAfterMs = Math.min(maxBackoff, minBackoff * factor ** safeExponent);
795
+ const scheduledAt = now + retryAfterMs;
796
+ tracker.markRunQueued(traceRunId, {
797
+ concurrencyKey: resolvedKey,
798
+ concurrencyLimit: concCfg.limit,
799
+ currentInFlight: result.currentInFlight,
800
+ scheduledAt,
801
+ });
802
+ // Bump pingCount (= attempt counter for backoff) and
803
+ // persist queueExpiresAt on first queue attempt.
804
+ tracker.getStore().updateRun(traceRunId, {
805
+ pingCount: attempt + 1,
806
+ ...(queueExpiresAt !== undefined && existingRun?.expiresAt === undefined
807
+ ? { expiresAt: queueExpiresAt }
808
+ : {}),
809
+ });
810
+ ConcurrencyMetrics.getInstance().recordDenied({
811
+ workflow_name: workflowName,
812
+ concurrency_key: resolvedKey,
813
+ mode: "queue",
814
+ });
815
+ const expiresAtForDispatch = undefined;
816
+ // Tier 2 #5+#7 follow-up · durable scheduling. Persist the
817
+ // dispatch row only when the subclass provides a payload
818
+ // (HttpTrigger.extractDispatchPayload returns the request
819
+ // subset; default returns null = in-memory only).
820
+ const persistPayload = this.extractDispatchPayload(ctx);
821
+ DeferredRunScheduler.getInstance().schedule(traceRunId, scheduledAt, async () => {
822
+ await this.dispatchDeferred(ctx, traceRunId, expiresAtForDispatch);
823
+ }, persistPayload === null
824
+ ? undefined
825
+ : {
826
+ workflowName,
827
+ triggerType: this.getTriggerType(),
828
+ expiresAt: expiresAtForDispatch,
829
+ dispatchStatus: "queued",
830
+ payload: persistPayload,
831
+ });
832
+ throw new DeferredDispatchSignal({
833
+ runId: traceRunId,
834
+ workflowName,
835
+ status: "queued",
836
+ scheduledAt,
837
+ debounced: false,
838
+ pingCount: 1,
839
+ });
840
+ }
841
+ tracker.markRunThrottled(traceRunId, {
842
+ concurrencyKey: resolvedKey,
843
+ concurrencyLimit: concCfg.limit,
844
+ currentInFlight: result.currentInFlight,
845
+ });
846
+ ConcurrencyMetrics.getInstance().recordDenied({
847
+ workflow_name: workflowName,
848
+ concurrency_key: resolvedKey,
849
+ mode: "throw",
850
+ });
851
+ throw new ConcurrencyLimitError({
852
+ workflowName,
853
+ concurrencyKey: resolvedKey,
854
+ concurrencyLimit: concCfg.limit,
855
+ currentInFlight: result.currentInFlight,
856
+ retryAfterMs: 1000,
857
+ runId: traceRunId,
858
+ });
859
+ }
860
+ acquiredLock = { workflowName, concurrencyKey: resolvedKey, runId: traceRunId };
861
+ ConcurrencyMetrics.getInstance().recordAcquired({
862
+ workflow_name: workflowName,
863
+ concurrency_key: resolvedKey,
864
+ });
865
+ }
866
+ }
867
+ }
183
868
  const start = performance.now();
184
869
  const defaultMeter = metrics.getMeter("default");
185
870
  const workflow_execution = defaultMeter.createCounter("workflow", {
@@ -285,6 +970,31 @@ export default class TriggerBase extends Trigger {
285
970
  // --- Trace: complete run ---
286
971
  if (traceRunId) {
287
972
  tracker.completeRun(traceRunId, context.response?.data);
973
+ // Sample-body recording (option C follow-up to #100).
974
+ // When any of the workflow's triggers opts in via
975
+ // `recordSample: true`, capture the request body of the
976
+ // FIRST successful run. The store enforces first-record-
977
+ // wins so subsequent successes are no-ops; we also early-
978
+ // return when a sample is already on file to avoid an
979
+ // unnecessary roundtrip per run.
980
+ try {
981
+ if (shouldRecordSample(this.configuration.trigger)) {
982
+ const workflowName = this.configuration.name || ctx.workflow_name || "";
983
+ if (workflowName && !tracker.getWorkflowSample(workflowName)) {
984
+ tracker.recordWorkflowSample({
985
+ workflowName,
986
+ body: context.request?.body,
987
+ sourceRunId: traceRunId,
988
+ recordedAt: Date.now(),
989
+ });
990
+ }
991
+ }
992
+ }
993
+ catch (err) {
994
+ // Recording is a nice-to-have for operators; never let
995
+ // it fail the run.
996
+ console.error("[blok] failed to record workflow sample:", err.message);
997
+ }
288
998
  }
289
999
  return {
290
1000
  ctx: context,
@@ -293,13 +1003,96 @@ export default class TriggerBase extends Trigger {
293
1003
  }
294
1004
  catch (err) {
295
1005
  runSuccess = false;
1006
+ // PR 4 — wait.for / wait.until step requesting deferred dispatch.
1007
+ // Translate to the existing scheduling pipeline:
1008
+ // 1. Mark run "delayed" with the wait deadline as scheduledAt.
1009
+ // 2. Persist the dispatch row (durable scheduler) so the wait
1010
+ // survives process restart.
1011
+ // 3. Register a setTimeout via DeferredRunScheduler.
1012
+ // 4. Throw DeferredDispatchSignal — HTTP transport returns 202.
1013
+ // The runner already set lastCompletedStepIndex before throwing
1014
+ // WaitDispatchRequest so the dispatchDeferred re-entry skips
1015
+ // past completed pre-wait steps.
1016
+ if (err instanceof WaitDispatchRequest && traceRunId) {
1017
+ const workflowName = this.configuration.name || ctx.workflow_name || "unknown";
1018
+ const scheduledAt = err.info.scheduledAt;
1019
+ const delayMs = Math.max(0, scheduledAt - Date.now());
1020
+ tracker.markRunDelayed(traceRunId, { scheduledAt, delayMs });
1021
+ const persistPayload = this.extractDispatchPayload(ctx);
1022
+ DeferredRunScheduler.getInstance().schedule(traceRunId, scheduledAt, async () => {
1023
+ await this.dispatchDeferred(ctx, traceRunId, undefined);
1024
+ }, persistPayload === null
1025
+ ? undefined
1026
+ : {
1027
+ workflowName,
1028
+ triggerType: this.getTriggerType(),
1029
+ dispatchStatus: "delayed",
1030
+ payload: persistPayload,
1031
+ });
1032
+ // Throw DeferredDispatchSignal so the transport layer can
1033
+ // translate to 202 Accepted (HTTP) / ACK without retry (Worker).
1034
+ throw new DeferredDispatchSignal({
1035
+ runId: traceRunId,
1036
+ workflowName,
1037
+ status: "delayed",
1038
+ scheduledAt,
1039
+ debounced: false,
1040
+ pingCount: 1,
1041
+ });
1042
+ }
296
1043
  // --- Trace: fail run ---
297
- if (traceRunId) {
1044
+ // Tier 2 #6: ConcurrencyLimitError already flipped the run's
1045
+ // status to "throttled" via markRunThrottled — don't override
1046
+ // it with "failed". The transport layer translates → 429 / NACK.
1047
+ //
1048
+ // Tier 2 #5 + #7: DeferredDispatchSignal already flipped the
1049
+ // run's status to "delayed" or "debounced". Don't override it
1050
+ // with "failed". The transport layer translates → 202 Accepted.
1051
+ //
1052
+ // Tier 2 follow-up: RunCancelledError is thrown by RunnerSteps
1053
+ // when an operator cancels via `abortRunningRun`. The tracker
1054
+ // has already flipped the run to "cancelled"; don't override.
1055
+ //
1056
+ // PR 4: WaitDispatchRequest is handled above (translated to
1057
+ // DeferredDispatchSignal); shouldn't reach here.
1058
+ //
1059
+ // PR 1-5 polish: QueueExpiredError flipped the run's status to
1060
+ // "expired" via markRunExpired — don't override it with
1061
+ // "failed". The HTTP transport translates → 410 Gone.
1062
+ if (traceRunId &&
1063
+ !(err instanceof ConcurrencyLimitError) &&
1064
+ !(err instanceof QueueExpiredError) &&
1065
+ !(err instanceof DeferredDispatchSignal) &&
1066
+ !(err instanceof RunCancelledError) &&
1067
+ !(err instanceof WaitDispatchRequest)) {
298
1068
  tracker.failRun(traceRunId, err instanceof Error ? err : new Error(String(err)));
299
1069
  }
300
1070
  throw err;
301
1071
  }
302
1072
  finally {
1073
+ // Release the concurrency slot if the gate granted one. Idempotent
1074
+ // at the store layer — a double-release (gate granted but then
1075
+ // crash + lazy-purge) is a no-op. `releaseConcurrencySlot` is async
1076
+ // (Tier 2 #6 follow-up cross-process backend); fire-and-forget here
1077
+ // — the finally block can't `await` cleanly across all callers, and
1078
+ // release errors don't change the run outcome. Errors logged via
1079
+ // the backend's own catch handlers.
1080
+ if (acquiredLock) {
1081
+ const lock = acquiredLock;
1082
+ void tracker.releaseConcurrencySlot(lock.workflowName, lock.concurrencyKey, lock.runId).catch((err) => {
1083
+ console.error(`[blok][concurrency] releaseConcurrencySlot failed for ${lock.workflowName}:${lock.concurrencyKey}:${lock.runId}:`, err instanceof Error ? err.stack || err.message : err);
1084
+ });
1085
+ ConcurrencyMetrics.getInstance().recordReleased({
1086
+ workflow_name: lock.workflowName,
1087
+ concurrency_key: lock.concurrencyKey,
1088
+ });
1089
+ }
1090
+ // Tier 2 follow-up · clean up the AbortController registration
1091
+ // once the run is terminal. Idempotent — safe even if the run
1092
+ // was cancelled mid-flight (the tracker already aborted).
1093
+ if (traceRunId) {
1094
+ tracker.unregisterAbortController(traceRunId);
1095
+ }
303
1096
  const durationMs = performance.now() - runStart;
304
1097
  this.metricsBridge.recordExecution(durationMs, runSuccess, {
305
1098
  workflow_name: this.configuration.name || "",
@@ -309,6 +1102,229 @@ export default class TriggerBase extends Trigger {
309
1102
  this.inFlightRequests--;
310
1103
  }
311
1104
  }
1105
+ /**
1106
+ * Tier 2 #5 + #7 — evaluate the scheduling gates and either return a
1107
+ * `DeferredDispatchSignal` (the caller throws it) or null (the caller
1108
+ * proceeds with immediate dispatch).
1109
+ *
1110
+ * Order: debounce → delay. They DON'T compose in a single PR (a
1111
+ * trigger may use one or the other; both at once would be unusual).
1112
+ * If both are configured, debounce takes precedence — the debounce
1113
+ * coordinator handles its own scheduling (the `delay` field is
1114
+ * effectively ignored on debounced triggers).
1115
+ */
1116
+ async maybeDeferRun(ctx, traceRunId, schedCfg) {
1117
+ const tracker = RunTracker.getInstance();
1118
+ const workflowName = this.configuration.name || ctx.workflow_name || "unknown";
1119
+ // === Debounce gate (Tier 2 #7) ===
1120
+ if (schedCfg.debounce) {
1121
+ const resolvedKey = resolveIdempotencyKey(schedCfg.debounce.keyExpression, ctx);
1122
+ if (resolvedKey === null) {
1123
+ // Fail-open — same semantics as concurrency-key resolution.
1124
+ return null;
1125
+ }
1126
+ // Tier 2 follow-up · persist debounce dispatches alongside delay/queue
1127
+ // entries. The DebounceCoordinator timer remains the in-process source
1128
+ // of truth (silence-window semantics + latest-payload coalesce); the
1129
+ // persisted row is for crash-recovery only. On boot, recovered
1130
+ // debounced rows fire via setTimeout (no silence-window re-establishment
1131
+ // — the time has already passed).
1132
+ const persistPayload = this.extractDispatchPayload(ctx);
1133
+ const triggerType = this.getTriggerType();
1134
+ const onFire = async () => {
1135
+ try {
1136
+ await this.dispatchDeferred(ctx, traceRunId, undefined);
1137
+ }
1138
+ catch (err) {
1139
+ console.error(`[blok][scheduling] debounce dispatchDeferred failed for run ${traceRunId}:`, err instanceof Error ? err.stack || err.message : err);
1140
+ }
1141
+ finally {
1142
+ // Best-effort cleanup — the DeferredRunScheduler delete-on-fire
1143
+ // path doesn't apply here (debounce uses its own timer). Use
1144
+ // the scheduler's persistedOnly cancel to delete the row.
1145
+ if (persistPayload !== null) {
1146
+ DeferredRunScheduler.getInstance().cancel(traceRunId, true);
1147
+ }
1148
+ }
1149
+ };
1150
+ const result = await DebounceCoordinator.getInstance().register({
1151
+ workflowName,
1152
+ debounceKey: resolvedKey,
1153
+ mode: schedCfg.debounce.mode,
1154
+ delayMs: schedCfg.debounce.delayMs,
1155
+ maxDelayMs: schedCfg.debounce.maxDelayMs,
1156
+ runId: traceRunId,
1157
+ onFire,
1158
+ });
1159
+ if (result.outcome === "fire-immediate") {
1160
+ // Leading-mode fresh window: caller runs the workflow synchronously.
1161
+ // The coordinator already opened its window so subsequent pings
1162
+ // within `delayMs` will coalesce. Caller continues to the
1163
+ // concurrency gate + runner.run path.
1164
+ return null;
1165
+ }
1166
+ if (result.outcome === "schedule-trailing") {
1167
+ // Trailing-mode fresh window: this run is the active one. Mark
1168
+ // `debounced` (transient) and throw the signal.
1169
+ tracker.markRunDebounced(traceRunId, {
1170
+ debounceKey: resolvedKey,
1171
+ mode: schedCfg.debounce.mode,
1172
+ pingCount: result.pingCount,
1173
+ scheduledAt: result.scheduledAt,
1174
+ });
1175
+ // Tier 2 follow-up · durable debounce. Write a `dispatch_status:
1176
+ // "debounced"` row so a process crash mid-window leaves a recoverable
1177
+ // pointer at the active run + its captured payload.
1178
+ if (persistPayload !== null && tracker.active) {
1179
+ try {
1180
+ tracker.getStore().upsertScheduledDispatch({
1181
+ runId: traceRunId,
1182
+ workflowName,
1183
+ triggerType,
1184
+ scheduledAt: result.scheduledAt ?? Date.now(),
1185
+ dispatchStatus: "debounced",
1186
+ payload: persistPayload,
1187
+ createdAt: Date.now(),
1188
+ });
1189
+ }
1190
+ catch (err) {
1191
+ console.error(`[blok][scheduling] persist debounce dispatch failed for run ${traceRunId}:`, err instanceof Error ? err.stack || err.message : err);
1192
+ }
1193
+ }
1194
+ return new DeferredDispatchSignal({
1195
+ runId: traceRunId,
1196
+ workflowName,
1197
+ status: "debounced",
1198
+ scheduledAt: result.scheduledAt ?? Date.now(),
1199
+ debounced: true,
1200
+ pingCount: result.pingCount,
1201
+ });
1202
+ }
1203
+ // Coalesce — this ping joined an existing window. Mark THIS run
1204
+ // `debounced` terminal pointing at the active run, and bump the
1205
+ // active run's pingCount (best-effort — the active run is in the
1206
+ // store).
1207
+ tracker.markRunDebounced(traceRunId, {
1208
+ debounceKey: resolvedKey,
1209
+ mode: schedCfg.debounce.mode,
1210
+ intoRunId: result.activeRunId,
1211
+ pingCount: result.pingCount,
1212
+ });
1213
+ tracker.recordDebouncePing(result.activeRunId, {
1214
+ pingCount: result.pingCount,
1215
+ scheduledAt: result.scheduledAt ?? Date.now(),
1216
+ });
1217
+ // Tier 2 follow-up · update the active run's persisted dispatch with
1218
+ // the latest payload + new scheduledAt. Trailing mode: each ping
1219
+ // resets the dispatch time, and the coordinator captures the latest
1220
+ // onFire closure — we mirror that into the persisted row so a crash
1221
+ // recovery uses the latest payload.
1222
+ if (result.outcome === "coalesce" &&
1223
+ schedCfg.debounce.mode === "trailing" &&
1224
+ persistPayload !== null &&
1225
+ tracker.active) {
1226
+ try {
1227
+ tracker.getStore().upsertScheduledDispatch({
1228
+ runId: result.activeRunId,
1229
+ workflowName,
1230
+ triggerType,
1231
+ scheduledAt: result.scheduledAt ?? Date.now(),
1232
+ dispatchStatus: "debounced",
1233
+ payload: persistPayload,
1234
+ createdAt: Date.now(),
1235
+ });
1236
+ }
1237
+ catch (err) {
1238
+ console.error(`[blok][scheduling] persist debounce coalesce failed for run ${result.activeRunId}:`, err instanceof Error ? err.stack || err.message : err);
1239
+ }
1240
+ }
1241
+ return new DeferredDispatchSignal({
1242
+ runId: traceRunId,
1243
+ workflowName,
1244
+ status: "debounced",
1245
+ scheduledAt: result.scheduledAt ?? Date.now(),
1246
+ debounced: true,
1247
+ pingCount: result.pingCount,
1248
+ intoRunId: result.activeRunId,
1249
+ });
1250
+ }
1251
+ // === Delay gate (Tier 2 #5) ===
1252
+ if (schedCfg.delayMs !== undefined && schedCfg.delayMs > 0) {
1253
+ const scheduledAt = Date.now() + schedCfg.delayMs;
1254
+ const expiresAt = schedCfg.ttlMs !== undefined ? Date.now() + schedCfg.ttlMs : undefined;
1255
+ tracker.markRunDelayed(traceRunId, {
1256
+ scheduledAt,
1257
+ delayMs: schedCfg.delayMs,
1258
+ expiresAt,
1259
+ });
1260
+ // Tier 2 #5+#7 follow-up · durable scheduling.
1261
+ const persistPayload = this.extractDispatchPayload(ctx);
1262
+ DeferredRunScheduler.getInstance().schedule(traceRunId, scheduledAt, async () => {
1263
+ await this.dispatchDeferred(ctx, traceRunId, expiresAt);
1264
+ }, persistPayload === null
1265
+ ? undefined
1266
+ : {
1267
+ workflowName,
1268
+ triggerType: this.getTriggerType(),
1269
+ expiresAt,
1270
+ dispatchStatus: "delayed",
1271
+ payload: persistPayload,
1272
+ });
1273
+ return new DeferredDispatchSignal({
1274
+ runId: traceRunId,
1275
+ workflowName,
1276
+ status: "delayed",
1277
+ scheduledAt,
1278
+ expiresAt,
1279
+ debounced: false,
1280
+ pingCount: 1,
1281
+ });
1282
+ }
1283
+ return null;
1284
+ }
1285
+ /**
1286
+ * Tier 2 #5 + #7 — re-enter the dispatch pipeline for a deferred run.
1287
+ *
1288
+ * Called by the `DeferredRunScheduler` timer (delay) or
1289
+ * `DebounceCoordinator.onFire` (debounce trailing) when the wait
1290
+ * window closes. Checks TTL, transitions the run to `running`, and
1291
+ * re-enters `run(ctx)` with the `_blokDispatchReentry` flag so the
1292
+ * scheduling gates are skipped on the second pass.
1293
+ *
1294
+ * The re-entered `run(ctx)` reuses the existing `traceRunId` (already
1295
+ * stashed on `ctx._traceRunId` from the first pass).
1296
+ */
1297
+ async dispatchDeferred(ctx, traceRunId, expiresAt) {
1298
+ const tracker = RunTracker.getInstance();
1299
+ // TTL check — fire-once-then-give-up. If the dispatch is past its
1300
+ // TTL, mark the run `expired` and abort.
1301
+ if (expiresAt !== undefined && Date.now() > expiresAt) {
1302
+ tracker.markRunExpired(traceRunId, {
1303
+ expiresAt,
1304
+ expiredAt: Date.now(),
1305
+ });
1306
+ return;
1307
+ }
1308
+ // Flip status delayed/debounced → running.
1309
+ tracker.transitionRunToRunning(traceRunId);
1310
+ // Re-enter the dispatch pipeline. The reentry flag short-circuits
1311
+ // the scheduling gates so we don't loop. The existing traceRunId
1312
+ // is preserved (no second startRun call — see top of run()).
1313
+ const ctxRecord = ctx;
1314
+ ctxRecord._blokDispatchReentry = true;
1315
+ try {
1316
+ await this.run(ctx);
1317
+ }
1318
+ catch (err) {
1319
+ // The re-entered `run()` already handled tracker.failRun /
1320
+ // markRunThrottled internally. Swallow here so timer callbacks
1321
+ // don't crash on uncaught rejections.
1322
+ void err;
1323
+ }
1324
+ finally {
1325
+ ctxRecord._blokDispatchReentry = false;
1326
+ }
1327
+ }
312
1328
  /**
313
1329
  * Build a human-readable trigger summary for trace display.
314
1330
  */
@@ -326,6 +1342,14 @@ export default class TriggerBase extends Trigger {
326
1342
  // Single state object — shared by ctx.state (canonical) and ctx.vars
327
1343
  // (legacy alias). All step outputs land here unless `ephemeral: true`.
328
1344
  const state = {};
1345
+ // Tier 2 follow-up · cooperative cancellation. Each context owns
1346
+ // an AbortController whose signal flips when an operator cancels
1347
+ // the run via `POST /__blok/runs/:runId/cancel` while it's in
1348
+ // `running` status. RunnerSteps' between-step check throws
1349
+ // `RunCancelledError` which TriggerBase catches without flipping
1350
+ // the run to `failed` (the tracker has already flipped it to
1351
+ // `cancelled`).
1352
+ const abortController = new AbortController();
329
1353
  const ctx = {
330
1354
  id: requestId,
331
1355
  workflow_name: this.configuration.name,
@@ -341,7 +1365,10 @@ export default class TriggerBase extends Trigger {
341
1365
  // to either propagate. Authors writing `ctx.vars[k] = v` keep
342
1366
  // working; the runner reads via state.
343
1367
  vars: state,
344
- _PRIVATE_: null,
1368
+ signal: abortController.signal,
1369
+ // Stash the controller on _PRIVATE_ so TriggerBase.run can
1370
+ // hand it to the tracker without exposing it on the public ctx.
1371
+ _PRIVATE_: { abortController },
345
1372
  };
346
1373
  // V2 read-only aliases — same object reference, no copy.
347
1374
  // Reads via ctx.req / ctx.prev work; writes go to the canonical
@@ -371,7 +1398,7 @@ export default class TriggerBase extends Trigger {
371
1398
  enumerable: true,
372
1399
  });
373
1400
  Object.defineProperty(ctx, "env", {
374
- value: process.env,
1401
+ value: getEnvForCtx(),
375
1402
  enumerable: true,
376
1403
  });
377
1404
  return ctx;