@blokjs/runner 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Blok.js +11 -11
- package/dist/Blok.js.map +1 -1
- package/dist/Configuration.d.ts +39 -2
- package/dist/Configuration.js +337 -28
- package/dist/Configuration.js.map +1 -1
- package/dist/ConfigurationResolver.d.ts +9 -0
- package/dist/ConfigurationResolver.js +17 -1
- package/dist/ConfigurationResolver.js.map +1 -1
- package/dist/PayloadTooLargeError.d.ts +19 -0
- package/dist/PayloadTooLargeError.js +29 -0
- package/dist/PayloadTooLargeError.js.map +1 -0
- package/dist/RunCancelledError.d.ts +17 -0
- package/dist/RunCancelledError.js +25 -0
- package/dist/RunCancelledError.js.map +1 -0
- package/dist/RunnerSteps.js +363 -23
- package/dist/RunnerSteps.js.map +1 -1
- package/dist/RuntimeAdapterNode.d.ts +32 -2
- package/dist/RuntimeAdapterNode.js +122 -27
- package/dist/RuntimeAdapterNode.js.map +1 -1
- package/dist/SubworkflowNode.d.ts +75 -0
- package/dist/SubworkflowNode.js +221 -0
- package/dist/SubworkflowNode.js.map +1 -0
- package/dist/TriggerBase.d.ts +128 -0
- package/dist/TriggerBase.js +808 -6
- package/dist/TriggerBase.js.map +1 -1
- package/dist/WaitDispatchRequest.d.ts +38 -0
- package/dist/WaitDispatchRequest.js +13 -0
- package/dist/WaitDispatchRequest.js.map +1 -0
- package/dist/WaitNode.d.ts +23 -0
- package/dist/WaitNode.js +26 -0
- package/dist/WaitNode.js.map +1 -0
- package/dist/adapters/BunRuntimeAdapter.d.ts +1 -0
- package/dist/adapters/BunRuntimeAdapter.js +1 -0
- package/dist/adapters/BunRuntimeAdapter.js.map +1 -1
- package/dist/adapters/DockerRuntimeAdapter.d.ts +2 -1
- package/dist/adapters/DockerRuntimeAdapter.js +10 -1
- package/dist/adapters/DockerRuntimeAdapter.js.map +1 -1
- package/dist/adapters/HttpRuntimeAdapter.d.ts +26 -5
- package/dist/adapters/HttpRuntimeAdapter.js +97 -16
- package/dist/adapters/HttpRuntimeAdapter.js.map +1 -1
- package/dist/adapters/NodeJsRuntimeAdapter.d.ts +1 -0
- package/dist/adapters/NodeJsRuntimeAdapter.js +1 -0
- package/dist/adapters/NodeJsRuntimeAdapter.js.map +1 -1
- package/dist/adapters/RuntimeAdapter.d.ts +17 -0
- package/dist/adapters/WasmRuntimeAdapter.d.ts +1 -0
- package/dist/adapters/WasmRuntimeAdapter.js +1 -0
- package/dist/adapters/WasmRuntimeAdapter.js.map +1 -1
- package/dist/adapters/grpc/GrpcChannelOptions.d.ts +31 -0
- package/dist/adapters/grpc/GrpcChannelOptions.js +68 -0
- package/dist/adapters/grpc/GrpcChannelOptions.js.map +1 -0
- package/dist/adapters/grpc/GrpcClientPool.d.ts +43 -0
- package/dist/adapters/grpc/GrpcClientPool.js +89 -0
- package/dist/adapters/grpc/GrpcClientPool.js.map +1 -0
- package/dist/adapters/grpc/GrpcCodec.d.ts +226 -0
- package/dist/adapters/grpc/GrpcCodec.js +275 -0
- package/dist/adapters/grpc/GrpcCodec.js.map +1 -0
- package/dist/adapters/grpc/GrpcErrors.d.ts +59 -0
- package/dist/adapters/grpc/GrpcErrors.js +190 -0
- package/dist/adapters/grpc/GrpcErrors.js.map +1 -0
- package/dist/adapters/grpc/GrpcHealthChecker.d.ts +69 -0
- package/dist/adapters/grpc/GrpcHealthChecker.js +96 -0
- package/dist/adapters/grpc/GrpcHealthChecker.js.map +1 -0
- package/dist/adapters/grpc/GrpcRuntimeAdapter.d.ts +98 -0
- package/dist/adapters/grpc/GrpcRuntimeAdapter.js +478 -0
- package/dist/adapters/grpc/GrpcRuntimeAdapter.js.map +1 -0
- package/dist/adapters/grpc/index.d.ts +13 -0
- package/dist/adapters/grpc/index.js +14 -0
- package/dist/adapters/grpc/index.js.map +1 -0
- package/dist/adapters/grpc/proto/blok/runtime/v1/runtime.proto +302 -0
- package/dist/adapters/grpc/types.d.ts +97 -0
- package/dist/adapters/grpc/types.js +41 -0
- package/dist/adapters/grpc/types.js.map +1 -0
- package/dist/adapters/transport.d.ts +108 -0
- package/dist/adapters/transport.js +196 -0
- package/dist/adapters/transport.js.map +1 -0
- package/dist/concurrency/ConcurrencyBackend.d.ts +61 -0
- package/dist/concurrency/ConcurrencyBackend.js +20 -0
- package/dist/concurrency/ConcurrencyBackend.js.map +1 -0
- package/dist/concurrency/ConcurrencyLimitError.d.ts +37 -0
- package/dist/concurrency/ConcurrencyLimitError.js +16 -0
- package/dist/concurrency/ConcurrencyLimitError.js.map +1 -0
- package/dist/concurrency/NatsKvConcurrencyBackend.d.ts +64 -0
- package/dist/concurrency/NatsKvConcurrencyBackend.js +297 -0
- package/dist/concurrency/NatsKvConcurrencyBackend.js.map +1 -0
- package/dist/concurrency/QueueExpiredError.d.ts +40 -0
- package/dist/concurrency/QueueExpiredError.js +15 -0
- package/dist/concurrency/QueueExpiredError.js.map +1 -0
- package/dist/concurrency/createConcurrencyBackend.d.ts +23 -0
- package/dist/concurrency/createConcurrencyBackend.js +34 -0
- package/dist/concurrency/createConcurrencyBackend.js.map +1 -0
- package/dist/concurrency/readConcurrencyConfig.d.ts +60 -0
- package/dist/concurrency/readConcurrencyConfig.js +60 -0
- package/dist/concurrency/readConcurrencyConfig.js.map +1 -0
- package/dist/idempotency/resolveIdempotencyKey.d.ts +20 -0
- package/dist/idempotency/resolveIdempotencyKey.js +37 -0
- package/dist/idempotency/resolveIdempotencyKey.js.map +1 -0
- package/dist/index.d.ts +35 -3
- package/dist/index.js +61 -2
- package/dist/index.js.map +1 -1
- package/dist/monitoring/ConcurrencyMetrics.d.ts +56 -0
- package/dist/monitoring/ConcurrencyMetrics.js +107 -0
- package/dist/monitoring/ConcurrencyMetrics.js.map +1 -0
- package/dist/monitoring/JanitorMetrics.d.ts +27 -0
- package/dist/monitoring/JanitorMetrics.js +48 -0
- package/dist/monitoring/JanitorMetrics.js.map +1 -0
- package/dist/scheduling/DebounceCoordinator.d.ts +88 -0
- package/dist/scheduling/DebounceCoordinator.js +141 -0
- package/dist/scheduling/DebounceCoordinator.js.map +1 -0
- package/dist/scheduling/DeferredDispatchSignal.d.ts +50 -0
- package/dist/scheduling/DeferredDispatchSignal.js +14 -0
- package/dist/scheduling/DeferredDispatchSignal.js.map +1 -0
- package/dist/scheduling/DeferredRunScheduler.d.ts +68 -0
- package/dist/scheduling/DeferredRunScheduler.js +154 -0
- package/dist/scheduling/DeferredRunScheduler.js.map +1 -0
- package/dist/scheduling/readSchedulingConfig.d.ts +24 -0
- package/dist/scheduling/readSchedulingConfig.js +52 -0
- package/dist/scheduling/readSchedulingConfig.js.map +1 -0
- package/dist/testing/WorkflowTestRunner.js +12 -0
- package/dist/testing/WorkflowTestRunner.js.map +1 -1
- package/dist/timeouts/StepTimeoutError.d.ts +22 -0
- package/dist/timeouts/StepTimeoutError.js +31 -0
- package/dist/timeouts/StepTimeoutError.js.map +1 -0
- package/dist/tracing/InMemoryRunStore.d.ts +28 -1
- package/dist/tracing/InMemoryRunStore.js +150 -0
- package/dist/tracing/InMemoryRunStore.js.map +1 -1
- package/dist/tracing/Janitor.d.ts +70 -0
- package/dist/tracing/Janitor.js +150 -0
- package/dist/tracing/Janitor.js.map +1 -0
- package/dist/tracing/PostgresRunStore.d.ts +30 -0
- package/dist/tracing/PostgresRunStore.js +435 -3
- package/dist/tracing/PostgresRunStore.js.map +1 -1
- package/dist/tracing/RunStore.d.ts +100 -1
- package/dist/tracing/RunTracker.d.ts +261 -11
- package/dist/tracing/RunTracker.js +691 -11
- package/dist/tracing/RunTracker.js.map +1 -1
- package/dist/tracing/SqliteRunStore.d.ts +23 -1
- package/dist/tracing/SqliteRunStore.js +421 -6
- package/dist/tracing/SqliteRunStore.js.map +1 -1
- package/dist/tracing/TraceRouter.d.ts +20 -2
- package/dist/tracing/TraceRouter.js +494 -9
- package/dist/tracing/TraceRouter.js.map +1 -1
- package/dist/tracing/sanitize.d.ts +11 -0
- package/dist/tracing/sanitize.js +29 -0
- package/dist/tracing/sanitize.js.map +1 -1
- package/dist/tracing/types.d.ts +429 -11
- package/dist/types/GlobalOptions.d.ts +9 -2
- package/dist/utils/createChildContext.d.ts +32 -0
- package/dist/utils/createChildContext.js +113 -0
- package/dist/utils/createChildContext.js.map +1 -0
- package/dist/workflow/PersistenceHelper.d.ts +46 -0
- package/dist/workflow/PersistenceHelper.js +57 -0
- package/dist/workflow/PersistenceHelper.js.map +1 -0
- package/dist/workflow/WorkflowNormalizer.d.ts +79 -0
- package/dist/workflow/WorkflowNormalizer.js +486 -0
- package/dist/workflow/WorkflowNormalizer.js.map +1 -0
- package/dist/workflow/WorkflowRegistry.d.ts +64 -0
- package/dist/workflow/WorkflowRegistry.js +81 -0
- package/dist/workflow/WorkflowRegistry.js.map +1 -0
- package/package.json +10 -7
package/dist/TriggerBase.js
CHANGED
|
@@ -3,13 +3,24 @@ import { metrics } from "@opentelemetry/api";
|
|
|
3
3
|
import { v4 as uuid } from "uuid";
|
|
4
4
|
import Configuration from "./Configuration";
|
|
5
5
|
import DefaultLogger from "./DefaultLogger";
|
|
6
|
+
import { RunCancelledError } from "./RunCancelledError";
|
|
6
7
|
import Runner from "./Runner";
|
|
8
|
+
import { WaitDispatchRequest } from "./WaitDispatchRequest";
|
|
9
|
+
import { ConcurrencyLimitError } from "./concurrency/ConcurrencyLimitError";
|
|
10
|
+
import { QueueExpiredError } from "./concurrency/QueueExpiredError";
|
|
11
|
+
import { readConcurrencyConfig } from "./concurrency/readConcurrencyConfig";
|
|
7
12
|
import { HotReloadManager } from "./hmr/HotReloadManager";
|
|
13
|
+
import { resolveIdempotencyKey } from "./idempotency/resolveIdempotencyKey";
|
|
8
14
|
import { CircuitBreaker } from "./monitoring/CircuitBreaker";
|
|
15
|
+
import { ConcurrencyMetrics } from "./monitoring/ConcurrencyMetrics";
|
|
9
16
|
import { HealthCheck } from "./monitoring/HealthCheck";
|
|
10
17
|
import { PrometheusMetricsBridge } from "./monitoring/PrometheusMetricsBridge";
|
|
11
18
|
import { RateLimiter } from "./monitoring/RateLimiter";
|
|
12
19
|
import { TriggerMetricsCollector } from "./monitoring/TriggerMetricsCollector";
|
|
20
|
+
import { DebounceCoordinator } from "./scheduling/DebounceCoordinator";
|
|
21
|
+
import { DeferredDispatchSignal } from "./scheduling/DeferredDispatchSignal";
|
|
22
|
+
import { DeferredRunScheduler } from "./scheduling/DeferredRunScheduler";
|
|
23
|
+
import { readSchedulingConfig } from "./scheduling/readSchedulingConfig";
|
|
13
24
|
import { RunTracker } from "./tracing/RunTracker";
|
|
14
25
|
import { TracingLogger } from "./tracing/TracingLogger";
|
|
15
26
|
export default class TriggerBase extends Trigger {
|
|
@@ -45,6 +56,226 @@ export default class TriggerBase extends Trigger {
|
|
|
45
56
|
getRunner() {
|
|
46
57
|
return new Runner(this.configuration.steps);
|
|
47
58
|
}
|
|
59
|
+
/**
|
|
60
|
+
* Tier 2 #5+#7 follow-up — durable scheduler hook.
|
|
61
|
+
*
|
|
62
|
+
* When a trigger supports re-firing deferred dispatches across process
|
|
63
|
+
* restarts, it overrides this method to extract a JSON-serializable
|
|
64
|
+
* subset of `ctx` sufficient for `restoreDispatch(payload)` (defined
|
|
65
|
+
* by the trigger) to reconstruct an equivalent ctx and re-enter
|
|
66
|
+
* `dispatchDeferred`.
|
|
67
|
+
*
|
|
68
|
+
* Returns `null` (default) when the trigger does NOT support
|
|
69
|
+
* cross-restart durability — the scheduler then runs purely in-memory
|
|
70
|
+
* for that trigger (existing pre-follow-up behaviour).
|
|
71
|
+
*
|
|
72
|
+
* Override in `HttpTrigger` to return `{method, path, headers, body,
|
|
73
|
+
* params, query, workflowPath}` (with sensitive header keys stripped).
|
|
74
|
+
* Worker triggers don't override — broker handles delay durability.
|
|
75
|
+
*/
|
|
76
|
+
extractDispatchPayload(_ctx) {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Returns the trigger type string used to tag persisted scheduled
|
|
81
|
+
* dispatch rows (`scheduled_dispatches.trigger_type`). Mirrors the
|
|
82
|
+
* convention from `tracker.startRun({triggerType})`. Override when
|
|
83
|
+
* the class name doesn't naturally produce the right tag.
|
|
84
|
+
*/
|
|
85
|
+
getTriggerType() {
|
|
86
|
+
return this.constructor.name.replace("Trigger", "").toLowerCase() || "unknown";
|
|
87
|
+
}
|
|
88
|
+
// --- Crash auto-flip (Tier 2 quick-wins follow-up) ---
|
|
89
|
+
/** Flag — set true after `installCrashHandlers` has run once in this process. */
|
|
90
|
+
static crashHandlersInstalled = false;
|
|
91
|
+
/**
|
|
92
|
+
* Tier 2 quick-wins follow-up — install process-level handlers for
|
|
93
|
+
* `uncaughtException` and `unhandledRejection`. When fired, flip
|
|
94
|
+
* every in-flight `running` run to `"crashed"` (with the captured
|
|
95
|
+
* error) BEFORE re-throwing / letting Node's default behavior take
|
|
96
|
+
* over. Idempotent — safe to call from every trigger's `listen()`;
|
|
97
|
+
* only the first call installs handlers.
|
|
98
|
+
*
|
|
99
|
+
* Kill-switch: `BLOK_CRASH_AUTOFLIP_DISABLED=1`.
|
|
100
|
+
*
|
|
101
|
+
* Why sync: `process.on("uncaughtException")` handlers can't await.
|
|
102
|
+
* `markAllRunningRunsAsCrashed` is sync (sqlite + in-memory writes
|
|
103
|
+
* complete before the handler returns).
|
|
104
|
+
*/
|
|
105
|
+
static installCrashHandlers(logger) {
|
|
106
|
+
if (TriggerBase.crashHandlersInstalled)
|
|
107
|
+
return;
|
|
108
|
+
if (process.env.BLOK_CRASH_AUTOFLIP_DISABLED === "1")
|
|
109
|
+
return;
|
|
110
|
+
TriggerBase.crashHandlersInstalled = true;
|
|
111
|
+
const onUncaught = (err) => {
|
|
112
|
+
try {
|
|
113
|
+
const flipped = RunTracker.getInstance().markAllRunningRunsAsCrashed(err);
|
|
114
|
+
logger?.error?.(`[blok][crash-autoflip] uncaughtException — flipped ${flipped} running run(s) to crashed: ${err.stack || err.message}`);
|
|
115
|
+
}
|
|
116
|
+
catch (markErr) {
|
|
117
|
+
// Last-ditch — at least log so the operator knows the autoflip itself failed.
|
|
118
|
+
console.error("[blok][crash-autoflip] markAllRunningRunsAsCrashed failed:", markErr);
|
|
119
|
+
}
|
|
120
|
+
// Re-emit / let the runtime crash as expected — we don't want to
|
|
121
|
+
// silently swallow uncaught errors. Without this, Node would
|
|
122
|
+
// continue running with the handler attached but operators
|
|
123
|
+
// expect the process to die on uncaught exceptions.
|
|
124
|
+
throw err;
|
|
125
|
+
};
|
|
126
|
+
const onRejection = (reason) => {
|
|
127
|
+
const err = reason instanceof Error ? reason : new Error(String(reason));
|
|
128
|
+
try {
|
|
129
|
+
const flipped = RunTracker.getInstance().markAllRunningRunsAsCrashed(err);
|
|
130
|
+
logger?.error?.(`[blok][crash-autoflip] unhandledRejection — flipped ${flipped} running run(s) to crashed: ${err.stack || err.message}`);
|
|
131
|
+
}
|
|
132
|
+
catch (markErr) {
|
|
133
|
+
console.error("[blok][crash-autoflip] markAllRunningRunsAsCrashed failed:", markErr);
|
|
134
|
+
}
|
|
135
|
+
// Don't re-throw — unhandledRejection is a warning, not a crash.
|
|
136
|
+
// Node's default behavior (warn + continue) still applies because
|
|
137
|
+
// our handler is additive, not replacing the default.
|
|
138
|
+
};
|
|
139
|
+
process.on("uncaughtException", onUncaught);
|
|
140
|
+
process.on("unhandledRejection", onRejection);
|
|
141
|
+
}
|
|
142
|
+
/** Test-only — reset the install flag so tests can re-install handlers. */
|
|
143
|
+
static resetCrashHandlersInstalled() {
|
|
144
|
+
TriggerBase.crashHandlersInstalled = false;
|
|
145
|
+
}
|
|
146
|
+
// --- Graceful shutdown (Tier 2 follow-up) ---
|
|
147
|
+
/** Flag — set true after `installShutdownHandlers` has run once in this process. */
|
|
148
|
+
static shutdownHandlersInstalled = false;
|
|
149
|
+
/**
|
|
150
|
+
* Install SIGTERM + SIGINT handlers that drain process resources
|
|
151
|
+
* cleanly before exit. Mirrors the `installCrashHandlers` pattern —
|
|
152
|
+
* idempotent + opt-out via `BLOK_GRACEFUL_SHUTDOWN_DISABLED=1`.
|
|
153
|
+
*
|
|
154
|
+
* Drain order:
|
|
155
|
+
* 1. Stop accepting new work — calls `trigger.stop()` if available
|
|
156
|
+
* (HttpTrigger drains in-flight requests + closes the server).
|
|
157
|
+
* 2. Stop the periodic janitor sweep so it doesn't fire mid-drain.
|
|
158
|
+
* 3. Cancel pending deferred dispatches in the in-memory scheduler.
|
|
159
|
+
* (Persisted rows in `scheduled_dispatches` survive — the next
|
|
160
|
+
* boot recovers them.)
|
|
161
|
+
* 4. Disconnect the cross-process concurrency backend (NATS KV)
|
|
162
|
+
* so locks held by this process release on the broker side.
|
|
163
|
+
* 5. `process.exit(0)`.
|
|
164
|
+
*
|
|
165
|
+
* Errors during drain are caught + logged; the process still exits
|
|
166
|
+
* (cleanup is best-effort; the operator wants a clean exit).
|
|
167
|
+
*
|
|
168
|
+
* Why this is a `static` method: shutdown handlers must be installed
|
|
169
|
+
* once per process, regardless of how many trigger subclasses
|
|
170
|
+
* coexist. Subclasses pass `this` so the handler can call their
|
|
171
|
+
* specific `stop()`.
|
|
172
|
+
*/
|
|
173
|
+
static installShutdownHandlers(trigger, logger) {
|
|
174
|
+
if (TriggerBase.shutdownHandlersInstalled)
|
|
175
|
+
return;
|
|
176
|
+
if (process.env.BLOK_GRACEFUL_SHUTDOWN_DISABLED === "1")
|
|
177
|
+
return;
|
|
178
|
+
TriggerBase.shutdownHandlersInstalled = true;
|
|
179
|
+
const onSignal = async (signal) => {
|
|
180
|
+
logger?.log?.(`[blok][shutdown] received ${signal} — draining...`);
|
|
181
|
+
try {
|
|
182
|
+
// 1. Stop the trigger (drain in-flight, close server).
|
|
183
|
+
const stoppable = trigger;
|
|
184
|
+
if (typeof stoppable.stop === "function") {
|
|
185
|
+
await stoppable.stop();
|
|
186
|
+
}
|
|
187
|
+
// 2. Stop the janitor.
|
|
188
|
+
try {
|
|
189
|
+
const { Janitor } = await import("./tracing/Janitor");
|
|
190
|
+
const janitor = Janitor.instance;
|
|
191
|
+
if (janitor)
|
|
192
|
+
janitor.stop();
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
// Janitor may not have been imported yet.
|
|
196
|
+
}
|
|
197
|
+
// 3. Clear pending deferred dispatches (in-memory only —
|
|
198
|
+
// persisted rows survive for next-boot recovery).
|
|
199
|
+
try {
|
|
200
|
+
DeferredRunScheduler.getInstance().clear();
|
|
201
|
+
}
|
|
202
|
+
catch {
|
|
203
|
+
// Best-effort.
|
|
204
|
+
}
|
|
205
|
+
// 4. Disconnect cross-process concurrency backend.
|
|
206
|
+
//
|
|
207
|
+
// PR 3 D5 — wrap disconnect() in a Promise.race timeout so a
|
|
208
|
+
// slow NATS drain doesn't hang past the SIGTERM-to-SIGKILL
|
|
209
|
+
// window. Default 10s; configurable via
|
|
210
|
+
// BLOK_BACKEND_DISCONNECT_TIMEOUT_MS. Timer is .unref()'d so
|
|
211
|
+
// it doesn't keep the event loop alive after a successful
|
|
212
|
+
// disconnect.
|
|
213
|
+
const backend = RunTracker.getInstance().getConcurrencyBackend();
|
|
214
|
+
if (backend) {
|
|
215
|
+
const disconnectTimeoutMs = (() => {
|
|
216
|
+
const raw = process.env.BLOK_BACKEND_DISCONNECT_TIMEOUT_MS;
|
|
217
|
+
if (!raw || !/^\d+$/.test(raw))
|
|
218
|
+
return 10_000;
|
|
219
|
+
return Number(raw);
|
|
220
|
+
})();
|
|
221
|
+
try {
|
|
222
|
+
await Promise.race([
|
|
223
|
+
backend.disconnect(),
|
|
224
|
+
new Promise((_, reject) => {
|
|
225
|
+
const t = setTimeout(() => reject(new Error(`backend.disconnect() timed out after ${disconnectTimeoutMs}ms`)), disconnectTimeoutMs);
|
|
226
|
+
t.unref?.();
|
|
227
|
+
}),
|
|
228
|
+
]);
|
|
229
|
+
}
|
|
230
|
+
catch (err) {
|
|
231
|
+
logger?.error?.(`[blok][shutdown] backend disconnect failed (or timed out): ${err instanceof Error ? err.message : String(err)}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
logger?.log?.("[blok][shutdown] graceful shutdown complete");
|
|
235
|
+
}
|
|
236
|
+
catch (err) {
|
|
237
|
+
logger?.error?.(`[blok][shutdown] drain error: ${err instanceof Error ? err.message : String(err)}`);
|
|
238
|
+
}
|
|
239
|
+
finally {
|
|
240
|
+
process.exit(0);
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
process.on("SIGTERM", onSignal);
|
|
244
|
+
process.on("SIGINT", onSignal);
|
|
245
|
+
}
|
|
246
|
+
/** Test-only — reset the install flag so tests can re-install handlers. */
|
|
247
|
+
static resetShutdownHandlersInstalled() {
|
|
248
|
+
TriggerBase.shutdownHandlersInstalled = false;
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Tier 2 quick-wins follow-up — boot recovery for orphaned `running`
|
|
252
|
+
* runs. Scans the store for runs in `running` status whose
|
|
253
|
+
* `startedAt` is older than `thresholdMs` ago (default 2 minutes,
|
|
254
|
+
* override via `BLOK_ORPHAN_THRESHOLD_MS` env var). Flips each to
|
|
255
|
+
* `"crashed"` with `Error("Orphaned — process restarted before run completed")`.
|
|
256
|
+
*
|
|
257
|
+
* Catches the case where the previous process died via SIGKILL or
|
|
258
|
+
* OOM and the `installCrashHandlers` path never ran. Returns the
|
|
259
|
+
* count flipped for observability + tests.
|
|
260
|
+
*
|
|
261
|
+
* Idempotent — safe to call multiple times; runs are flipped to
|
|
262
|
+
* a terminal status so a second pass finds none.
|
|
263
|
+
*/
|
|
264
|
+
static recoverOrphanedRuns(thresholdMs, logger) {
|
|
265
|
+
if (process.env.BLOK_CRASH_AUTOFLIP_DISABLED === "1")
|
|
266
|
+
return 0;
|
|
267
|
+
const envThreshold = process.env.BLOK_ORPHAN_THRESHOLD_MS;
|
|
268
|
+
const threshold = thresholdMs ?? (envThreshold && /^\d+$/.test(envThreshold) ? Number(envThreshold) : 2 * 60 * 1000);
|
|
269
|
+
const tracker = RunTracker.getInstance();
|
|
270
|
+
if (!tracker.active)
|
|
271
|
+
return 0;
|
|
272
|
+
const cutoff = Date.now() - threshold;
|
|
273
|
+
const flipped = tracker.markAllRunningRunsAsCrashed(new Error("Orphaned — process restarted before run completed"), { maxStartedAt: cutoff });
|
|
274
|
+
if (flipped > 0) {
|
|
275
|
+
logger?.log?.(`[blok][crash-autoflip] boot recovery — flipped ${flipped} orphaned run(s) older than ${threshold}ms to crashed`);
|
|
276
|
+
}
|
|
277
|
+
return flipped;
|
|
278
|
+
}
|
|
48
279
|
// --- Hot Module Replacement ---
|
|
49
280
|
/**
|
|
50
281
|
* Enable hot reload for this trigger. Only active in development
|
|
@@ -161,25 +392,246 @@ export default class TriggerBase extends Trigger {
|
|
|
161
392
|
this.inFlightRequests++;
|
|
162
393
|
const runStart = performance.now();
|
|
163
394
|
let runSuccess = true;
|
|
395
|
+
// Tier 2 #6 — concurrency lock claim, populated when the gate grants
|
|
396
|
+
// a slot. Released in the `finally` block. Null when the workflow has
|
|
397
|
+
// no concurrency gate or the gate failed open (key resolution).
|
|
398
|
+
let acquiredLock = null;
|
|
164
399
|
// --- Trace: start run ---
|
|
400
|
+
// Tier 2 #5 + #7 · skip startRun on re-entry from a deferred timer.
|
|
401
|
+
// The deferred dispatcher (DeferredRunScheduler / DebounceCoordinator)
|
|
402
|
+
// re-enters `run(ctx)` with `_blokDispatchReentry = true` after the
|
|
403
|
+
// wait window closes; the existing run record is reused via
|
|
404
|
+
// `ctx._traceRunId`.
|
|
165
405
|
const tracker = RunTracker.getInstance();
|
|
166
406
|
let traceRunId;
|
|
167
|
-
|
|
407
|
+
const ctxRecord = ctx;
|
|
408
|
+
const isReentryAtTrace = ctxRecord._blokDispatchReentry === true;
|
|
409
|
+
if (tracker.active && isReentryAtTrace) {
|
|
410
|
+
traceRunId = ctxRecord._traceRunId;
|
|
411
|
+
// Logger wrapping was already applied on the first pass — no
|
|
412
|
+
// need to re-wrap (and re-wrapping would double-route logs).
|
|
413
|
+
// PR 1 follow-up · A2 fix. The first-pass `finally` block
|
|
414
|
+
// unregisters the AbortController via `tracker.unregisterAbortController`.
|
|
415
|
+
// Without re-registering on re-entry, `tracker.abortRunningRun(runId)`
|
|
416
|
+
// can't fire the controller — the controller stays on
|
|
417
|
+
// `ctx._PRIVATE_.abortController` but the tracker's lookup
|
|
418
|
+
// returns undefined. Operator cancel of a `running` run that
|
|
419
|
+
// came from delayed/queued/debounced flips status to "cancelled"
|
|
420
|
+
// but the in-flight step never sees `ctx.signal.aborted`.
|
|
421
|
+
// Re-register here mirroring the first-pass branch below.
|
|
422
|
+
if (traceRunId) {
|
|
423
|
+
const privateSlot = ctx._PRIVATE_;
|
|
424
|
+
if (privateSlot?.abortController) {
|
|
425
|
+
tracker.registerAbortController(traceRunId, privateSlot.abortController);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
else if (tracker.active) {
|
|
168
430
|
const runner = this.getRunner();
|
|
169
431
|
const stepCount = runner.getStepCount?.() ?? this.configuration.steps?.length ?? 0;
|
|
432
|
+
// Tier 1 · replay lineage. The replay endpoint
|
|
433
|
+
// (TraceRouter.POST /__blok/runs/:id/replay) sets
|
|
434
|
+
// `X-Blok-Replay-Of: <originalRunId>` on the dispatched HTTP
|
|
435
|
+
// request. Read it here so the new run carries `replayOf` and
|
|
436
|
+
// Studio can render a "Replay of #..." breadcrumb.
|
|
437
|
+
const reqHeaders = (ctx.request?.headers ?? {});
|
|
438
|
+
const replayOfHeader = reqHeaders["x-blok-replay-of"] ?? reqHeaders["X-Blok-Replay-Of"];
|
|
439
|
+
const replayOf = Array.isArray(replayOfHeader)
|
|
440
|
+
? replayOfHeader[0]
|
|
441
|
+
: typeof replayOfHeader === "string"
|
|
442
|
+
? replayOfHeader
|
|
443
|
+
: undefined;
|
|
170
444
|
const run = tracker.startRun({
|
|
171
445
|
workflowName: this.configuration.name || ctx.workflow_name || "unknown",
|
|
172
446
|
workflowPath: ctx.workflow_path || "",
|
|
173
447
|
triggerType: this.constructor.name.replace("Trigger", "").toLowerCase() || "unknown",
|
|
174
448
|
triggerSummary: this.buildTraceTriggerSummary(ctx),
|
|
175
449
|
nodeCount: stepCount,
|
|
450
|
+
replayOf,
|
|
176
451
|
});
|
|
177
452
|
traceRunId = run.id;
|
|
178
|
-
|
|
453
|
+
ctxRecord._traceRunId = run.id;
|
|
454
|
+
// Tier 2 follow-up · register the ctx's AbortController so the
|
|
455
|
+
// cancel API can fire it for `running` runs. Stashed on
|
|
456
|
+
// _PRIVATE_ by createContext; lookup via the optional shape.
|
|
457
|
+
const privateSlot = ctx._PRIVATE_;
|
|
458
|
+
if (privateSlot?.abortController) {
|
|
459
|
+
tracker.registerAbortController(run.id, privateSlot.abortController);
|
|
460
|
+
}
|
|
179
461
|
// Wrap logger to forward log entries to RunTracker
|
|
180
462
|
ctx.logger = new TracingLogger(ctx.logger, run.id, tracker);
|
|
181
463
|
}
|
|
182
464
|
try {
|
|
465
|
+
// --- Scheduling gates (Tier 2 #5 + #7) ---
|
|
466
|
+
// Run BEFORE the concurrency gate. Order: debounce → delay.
|
|
467
|
+
// Each gate may throw `DeferredDispatchSignal` to short-circuit
|
|
468
|
+
// the immediate dispatch path; the transport layer (HTTP/Worker)
|
|
469
|
+
// catches it and translates to 202 Accepted / NACK.
|
|
470
|
+
//
|
|
471
|
+
// Skipped on re-entry from a deferred timer (the timer callback
|
|
472
|
+
// sets `_blokDispatchReentry = true` on ctx) so we don't loop.
|
|
473
|
+
// Also skipped when:
|
|
474
|
+
// - tracker inactive (deferred dispatch needs persistence to
|
|
475
|
+
// survive even within the process lifetime)
|
|
476
|
+
// - `BLOK_SCHEDULING_DISABLED=1` (kill-switch).
|
|
477
|
+
const isReentry = ctx._blokDispatchReentry === true;
|
|
478
|
+
if (!isReentry && traceRunId && process.env.BLOK_SCHEDULING_DISABLED !== "1") {
|
|
479
|
+
const schedCfg = readSchedulingConfig(this.configuration.trigger);
|
|
480
|
+
if (schedCfg) {
|
|
481
|
+
const signal = this.maybeDeferRun(ctx, traceRunId, schedCfg);
|
|
482
|
+
if (signal)
|
|
483
|
+
throw signal;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
// --- Concurrency gate (Tier 2 #6) ---
|
|
487
|
+
// Runs after `tracker.startRun` so denied attempts appear in
|
|
488
|
+
// Studio with status "throttled". Skipped when:
|
|
489
|
+
// - tracker is inactive (lock store IS the run store)
|
|
490
|
+
// - the trigger config has no `concurrencyKey`
|
|
491
|
+
// - the resolved key is null/undefined (fail-open, matches
|
|
492
|
+
// idempotency-cache semantics)
|
|
493
|
+
// - `BLOK_CONCURRENCY_DISABLED=1` (kill-switch).
|
|
494
|
+
if (traceRunId && process.env.BLOK_CONCURRENCY_DISABLED !== "1") {
|
|
495
|
+
const concCfg = readConcurrencyConfig(this.configuration.trigger);
|
|
496
|
+
if (concCfg) {
|
|
497
|
+
const resolvedKey = resolveIdempotencyKey(concCfg.keyExpression, ctx);
|
|
498
|
+
if (resolvedKey !== null) {
|
|
499
|
+
const workflowName = this.configuration.name || ctx.workflow_name || "unknown";
|
|
500
|
+
const now = Date.now();
|
|
501
|
+
const result = await tracker.acquireConcurrencySlot(workflowName, resolvedKey, concCfg.limit, traceRunId, now + concCfg.leaseMs);
|
|
502
|
+
if (!result.acquired) {
|
|
503
|
+
// Tier 2 #6 follow-up — when the trigger is configured with
|
|
504
|
+
// `onLimit: "queue"`, defer the run via the in-process scheduler
|
|
505
|
+
// (Tier 2 #5+#7 plumbing) and re-attempt acquisition after a 1s
|
|
506
|
+
// delay instead of throwing. HTTP gets 202 + Location, Worker
|
|
507
|
+
// ACKs without retry. Re-defer happens transparently when the
|
|
508
|
+
// timer fires and the gate denies again.
|
|
509
|
+
if (concCfg.onLimit === "queue") {
|
|
510
|
+
// PR 5 B2 — TTL on queued runs. Compute on
|
|
511
|
+
// the first queue attempt and persist on the
|
|
512
|
+
// run record so re-defer attempts can check
|
|
513
|
+
// it. The existing `expiresAt` field on
|
|
514
|
+
// WorkflowRun is reused.
|
|
515
|
+
const existingRun = tracker.getStore().getRun(traceRunId);
|
|
516
|
+
const queueExpiresAt = existingRun?.expiresAt !== undefined
|
|
517
|
+
? existingRun.expiresAt
|
|
518
|
+
: concCfg.queueTimeoutMs !== undefined
|
|
519
|
+
? now + concCfg.queueTimeoutMs
|
|
520
|
+
: undefined;
|
|
521
|
+
if (queueExpiresAt !== undefined && now > queueExpiresAt) {
|
|
522
|
+
// TTL elapsed — flip to expired, no further re-defer.
|
|
523
|
+
tracker.markRunExpired(traceRunId, {
|
|
524
|
+
expiresAt: queueExpiresAt,
|
|
525
|
+
expiredAt: now,
|
|
526
|
+
});
|
|
527
|
+
ConcurrencyMetrics.getInstance().recordDenied({
|
|
528
|
+
workflow_name: workflowName,
|
|
529
|
+
concurrency_key: resolvedKey,
|
|
530
|
+
mode: "queue",
|
|
531
|
+
});
|
|
532
|
+
// PR 1-5 polish · throw a dedicated error so the HTTP
|
|
533
|
+
// transport returns 410 Gone instead of 429 Retry-After.
|
|
534
|
+
// Conflating queue-expired (permanently dead — the timer
|
|
535
|
+
// won't re-fire) with throttled (transient resource
|
|
536
|
+
// pressure) misleads clients into retrying. Status was
|
|
537
|
+
// already flipped to `expired` above, so the run record
|
|
538
|
+
// reflects reality regardless of the transport choice.
|
|
539
|
+
throw new QueueExpiredError({
|
|
540
|
+
workflowName,
|
|
541
|
+
concurrencyKey: resolvedKey,
|
|
542
|
+
queueExpiredAt: queueExpiresAt,
|
|
543
|
+
runId: traceRunId,
|
|
544
|
+
});
|
|
545
|
+
}
|
|
546
|
+
// PR 5 B3 — capped exponential backoff for re-defer.
|
|
547
|
+
// Track attempt count via existing pingCount field on the run record.
|
|
548
|
+
//
|
|
549
|
+
// Review fix-up · CONCERN-4. Clamp the exponent before
|
|
550
|
+
// `factor ** attempt`. Math.min would clamp the result
|
|
551
|
+
// (saving us from Infinity), but `factor ** 1024` is
|
|
552
|
+
// expensive and wasteful; clamping the exponent at
|
|
553
|
+
// `MAX_BACKOFF_EXPONENT` keeps the math cheap regardless
|
|
554
|
+
// of how many times a queue re-defers.
|
|
555
|
+
const MAX_BACKOFF_EXPONENT = 30;
|
|
556
|
+
const attempt = existingRun?.pingCount ?? 0;
|
|
557
|
+
const minBackoff = concCfg.queueRetry?.minBackoffMs ?? 1000;
|
|
558
|
+
const maxBackoff = concCfg.queueRetry?.maxBackoffMs ?? 30_000;
|
|
559
|
+
const factor = concCfg.queueRetry?.factor ?? 2;
|
|
560
|
+
const safeExponent = Math.min(attempt, MAX_BACKOFF_EXPONENT);
|
|
561
|
+
const retryAfterMs = Math.min(maxBackoff, minBackoff * factor ** safeExponent);
|
|
562
|
+
const scheduledAt = now + retryAfterMs;
|
|
563
|
+
tracker.markRunQueued(traceRunId, {
|
|
564
|
+
concurrencyKey: resolvedKey,
|
|
565
|
+
concurrencyLimit: concCfg.limit,
|
|
566
|
+
currentInFlight: result.currentInFlight,
|
|
567
|
+
scheduledAt,
|
|
568
|
+
});
|
|
569
|
+
// Bump pingCount (= attempt counter for backoff) and
|
|
570
|
+
// persist queueExpiresAt on first queue attempt.
|
|
571
|
+
tracker.getStore().updateRun(traceRunId, {
|
|
572
|
+
pingCount: attempt + 1,
|
|
573
|
+
...(queueExpiresAt !== undefined && existingRun?.expiresAt === undefined
|
|
574
|
+
? { expiresAt: queueExpiresAt }
|
|
575
|
+
: {}),
|
|
576
|
+
});
|
|
577
|
+
ConcurrencyMetrics.getInstance().recordDenied({
|
|
578
|
+
workflow_name: workflowName,
|
|
579
|
+
concurrency_key: resolvedKey,
|
|
580
|
+
mode: "queue",
|
|
581
|
+
});
|
|
582
|
+
const expiresAtForDispatch = undefined;
|
|
583
|
+
// Tier 2 #5+#7 follow-up · durable scheduling. Persist the
|
|
584
|
+
// dispatch row only when the subclass provides a payload
|
|
585
|
+
// (HttpTrigger.extractDispatchPayload returns the request
|
|
586
|
+
// subset; default returns null = in-memory only).
|
|
587
|
+
const persistPayload = this.extractDispatchPayload(ctx);
|
|
588
|
+
DeferredRunScheduler.getInstance().schedule(traceRunId, scheduledAt, async () => {
|
|
589
|
+
await this.dispatchDeferred(ctx, traceRunId, expiresAtForDispatch);
|
|
590
|
+
}, persistPayload === null
|
|
591
|
+
? undefined
|
|
592
|
+
: {
|
|
593
|
+
workflowName,
|
|
594
|
+
triggerType: this.getTriggerType(),
|
|
595
|
+
expiresAt: expiresAtForDispatch,
|
|
596
|
+
dispatchStatus: "queued",
|
|
597
|
+
payload: persistPayload,
|
|
598
|
+
});
|
|
599
|
+
throw new DeferredDispatchSignal({
|
|
600
|
+
runId: traceRunId,
|
|
601
|
+
workflowName,
|
|
602
|
+
status: "queued",
|
|
603
|
+
scheduledAt,
|
|
604
|
+
debounced: false,
|
|
605
|
+
pingCount: 1,
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
tracker.markRunThrottled(traceRunId, {
|
|
609
|
+
concurrencyKey: resolvedKey,
|
|
610
|
+
concurrencyLimit: concCfg.limit,
|
|
611
|
+
currentInFlight: result.currentInFlight,
|
|
612
|
+
});
|
|
613
|
+
ConcurrencyMetrics.getInstance().recordDenied({
|
|
614
|
+
workflow_name: workflowName,
|
|
615
|
+
concurrency_key: resolvedKey,
|
|
616
|
+
mode: "throw",
|
|
617
|
+
});
|
|
618
|
+
throw new ConcurrencyLimitError({
|
|
619
|
+
workflowName,
|
|
620
|
+
concurrencyKey: resolvedKey,
|
|
621
|
+
concurrencyLimit: concCfg.limit,
|
|
622
|
+
currentInFlight: result.currentInFlight,
|
|
623
|
+
retryAfterMs: 1000,
|
|
624
|
+
runId: traceRunId,
|
|
625
|
+
});
|
|
626
|
+
}
|
|
627
|
+
acquiredLock = { workflowName, concurrencyKey: resolvedKey, runId: traceRunId };
|
|
628
|
+
ConcurrencyMetrics.getInstance().recordAcquired({
|
|
629
|
+
workflow_name: workflowName,
|
|
630
|
+
concurrency_key: resolvedKey,
|
|
631
|
+
});
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
}
|
|
183
635
|
const start = performance.now();
|
|
184
636
|
const defaultMeter = metrics.getMeter("default");
|
|
185
637
|
const workflow_execution = defaultMeter.createCounter("workflow", {
|
|
@@ -293,13 +745,96 @@ export default class TriggerBase extends Trigger {
|
|
|
293
745
|
}
|
|
294
746
|
catch (err) {
|
|
295
747
|
runSuccess = false;
|
|
748
|
+
// PR 4 — wait.for / wait.until step requesting deferred dispatch.
|
|
749
|
+
// Translate to the existing scheduling pipeline:
|
|
750
|
+
// 1. Mark run "delayed" with the wait deadline as scheduledAt.
|
|
751
|
+
// 2. Persist the dispatch row (durable scheduler) so the wait
|
|
752
|
+
// survives process restart.
|
|
753
|
+
// 3. Register a setTimeout via DeferredRunScheduler.
|
|
754
|
+
// 4. Throw DeferredDispatchSignal — HTTP transport returns 202.
|
|
755
|
+
// The runner already set lastCompletedStepIndex before throwing
|
|
756
|
+
// WaitDispatchRequest so the dispatchDeferred re-entry skips
|
|
757
|
+
// past completed pre-wait steps.
|
|
758
|
+
if (err instanceof WaitDispatchRequest && traceRunId) {
|
|
759
|
+
const workflowName = this.configuration.name || ctx.workflow_name || "unknown";
|
|
760
|
+
const scheduledAt = err.info.scheduledAt;
|
|
761
|
+
const delayMs = Math.max(0, scheduledAt - Date.now());
|
|
762
|
+
tracker.markRunDelayed(traceRunId, { scheduledAt, delayMs });
|
|
763
|
+
const persistPayload = this.extractDispatchPayload(ctx);
|
|
764
|
+
DeferredRunScheduler.getInstance().schedule(traceRunId, scheduledAt, async () => {
|
|
765
|
+
await this.dispatchDeferred(ctx, traceRunId, undefined);
|
|
766
|
+
}, persistPayload === null
|
|
767
|
+
? undefined
|
|
768
|
+
: {
|
|
769
|
+
workflowName,
|
|
770
|
+
triggerType: this.getTriggerType(),
|
|
771
|
+
dispatchStatus: "delayed",
|
|
772
|
+
payload: persistPayload,
|
|
773
|
+
});
|
|
774
|
+
// Throw DeferredDispatchSignal so the transport layer can
|
|
775
|
+
// translate to 202 Accepted (HTTP) / ACK without retry (Worker).
|
|
776
|
+
throw new DeferredDispatchSignal({
|
|
777
|
+
runId: traceRunId,
|
|
778
|
+
workflowName,
|
|
779
|
+
status: "delayed",
|
|
780
|
+
scheduledAt,
|
|
781
|
+
debounced: false,
|
|
782
|
+
pingCount: 1,
|
|
783
|
+
});
|
|
784
|
+
}
|
|
296
785
|
// --- Trace: fail run ---
|
|
297
|
-
|
|
786
|
+
// Tier 2 #6: ConcurrencyLimitError already flipped the run's
|
|
787
|
+
// status to "throttled" via markRunThrottled — don't override
|
|
788
|
+
// it with "failed". The transport layer translates → 429 / NACK.
|
|
789
|
+
//
|
|
790
|
+
// Tier 2 #5 + #7: DeferredDispatchSignal already flipped the
|
|
791
|
+
// run's status to "delayed" or "debounced". Don't override it
|
|
792
|
+
// with "failed". The transport layer translates → 202 Accepted.
|
|
793
|
+
//
|
|
794
|
+
// Tier 2 follow-up: RunCancelledError is thrown by RunnerSteps
|
|
795
|
+
// when an operator cancels via `abortRunningRun`. The tracker
|
|
796
|
+
// has already flipped the run to "cancelled"; don't override.
|
|
797
|
+
//
|
|
798
|
+
// PR 4: WaitDispatchRequest is handled above (translated to
|
|
799
|
+
// DeferredDispatchSignal); shouldn't reach here.
|
|
800
|
+
//
|
|
801
|
+
// PR 1-5 polish: QueueExpiredError flipped the run's status to
|
|
802
|
+
// "expired" via markRunExpired — don't override it with
|
|
803
|
+
// "failed". The HTTP transport translates → 410 Gone.
|
|
804
|
+
if (traceRunId &&
|
|
805
|
+
!(err instanceof ConcurrencyLimitError) &&
|
|
806
|
+
!(err instanceof QueueExpiredError) &&
|
|
807
|
+
!(err instanceof DeferredDispatchSignal) &&
|
|
808
|
+
!(err instanceof RunCancelledError) &&
|
|
809
|
+
!(err instanceof WaitDispatchRequest)) {
|
|
298
810
|
tracker.failRun(traceRunId, err instanceof Error ? err : new Error(String(err)));
|
|
299
811
|
}
|
|
300
812
|
throw err;
|
|
301
813
|
}
|
|
302
814
|
finally {
|
|
815
|
+
// Release the concurrency slot if the gate granted one. Idempotent
|
|
816
|
+
// at the store layer — a double-release (gate granted but then
|
|
817
|
+
// crash + lazy-purge) is a no-op. `releaseConcurrencySlot` is async
|
|
818
|
+
// (Tier 2 #6 follow-up cross-process backend); fire-and-forget here
|
|
819
|
+
// — the finally block can't `await` cleanly across all callers, and
|
|
820
|
+
// release errors don't change the run outcome. Errors logged via
|
|
821
|
+
// the backend's own catch handlers.
|
|
822
|
+
if (acquiredLock) {
|
|
823
|
+
const lock = acquiredLock;
|
|
824
|
+
void tracker.releaseConcurrencySlot(lock.workflowName, lock.concurrencyKey, lock.runId).catch((err) => {
|
|
825
|
+
console.error(`[blok][concurrency] releaseConcurrencySlot failed for ${lock.workflowName}:${lock.concurrencyKey}:${lock.runId}:`, err instanceof Error ? err.stack || err.message : err);
|
|
826
|
+
});
|
|
827
|
+
ConcurrencyMetrics.getInstance().recordReleased({
|
|
828
|
+
workflow_name: lock.workflowName,
|
|
829
|
+
concurrency_key: lock.concurrencyKey,
|
|
830
|
+
});
|
|
831
|
+
}
|
|
832
|
+
// Tier 2 follow-up · clean up the AbortController registration
|
|
833
|
+
// once the run is terminal. Idempotent — safe even if the run
|
|
834
|
+
// was cancelled mid-flight (the tracker already aborted).
|
|
835
|
+
if (traceRunId) {
|
|
836
|
+
tracker.unregisterAbortController(traceRunId);
|
|
837
|
+
}
|
|
303
838
|
const durationMs = performance.now() - runStart;
|
|
304
839
|
this.metricsBridge.recordExecution(durationMs, runSuccess, {
|
|
305
840
|
workflow_name: this.configuration.name || "",
|
|
@@ -309,6 +844,229 @@ export default class TriggerBase extends Trigger {
|
|
|
309
844
|
this.inFlightRequests--;
|
|
310
845
|
}
|
|
311
846
|
}
|
|
847
|
+
/**
|
|
848
|
+
* Tier 2 #5 + #7 — evaluate the scheduling gates and either return a
|
|
849
|
+
* `DeferredDispatchSignal` (the caller throws it) or null (the caller
|
|
850
|
+
* proceeds with immediate dispatch).
|
|
851
|
+
*
|
|
852
|
+
* Order: debounce → delay. They DON'T compose in a single PR (a
|
|
853
|
+
* trigger may use one or the other; both at once would be unusual).
|
|
854
|
+
* If both are configured, debounce takes precedence — the debounce
|
|
855
|
+
* coordinator handles its own scheduling (the `delay` field is
|
|
856
|
+
* effectively ignored on debounced triggers).
|
|
857
|
+
*/
|
|
858
|
+
maybeDeferRun(ctx, traceRunId, schedCfg) {
|
|
859
|
+
const tracker = RunTracker.getInstance();
|
|
860
|
+
const workflowName = this.configuration.name || ctx.workflow_name || "unknown";
|
|
861
|
+
// === Debounce gate (Tier 2 #7) ===
|
|
862
|
+
if (schedCfg.debounce) {
|
|
863
|
+
const resolvedKey = resolveIdempotencyKey(schedCfg.debounce.keyExpression, ctx);
|
|
864
|
+
if (resolvedKey === null) {
|
|
865
|
+
// Fail-open — same semantics as concurrency-key resolution.
|
|
866
|
+
return null;
|
|
867
|
+
}
|
|
868
|
+
// Tier 2 follow-up · persist debounce dispatches alongside delay/queue
|
|
869
|
+
// entries. The DebounceCoordinator timer remains the in-process source
|
|
870
|
+
// of truth (silence-window semantics + latest-payload coalesce); the
|
|
871
|
+
// persisted row is for crash-recovery only. On boot, recovered
|
|
872
|
+
// debounced rows fire via setTimeout (no silence-window re-establishment
|
|
873
|
+
// — the time has already passed).
|
|
874
|
+
const persistPayload = this.extractDispatchPayload(ctx);
|
|
875
|
+
const triggerType = this.getTriggerType();
|
|
876
|
+
const onFire = async () => {
|
|
877
|
+
try {
|
|
878
|
+
await this.dispatchDeferred(ctx, traceRunId, undefined);
|
|
879
|
+
}
|
|
880
|
+
catch (err) {
|
|
881
|
+
console.error(`[blok][scheduling] debounce dispatchDeferred failed for run ${traceRunId}:`, err instanceof Error ? err.stack || err.message : err);
|
|
882
|
+
}
|
|
883
|
+
finally {
|
|
884
|
+
// Best-effort cleanup — the DeferredRunScheduler delete-on-fire
|
|
885
|
+
// path doesn't apply here (debounce uses its own timer). Use
|
|
886
|
+
// the scheduler's persistedOnly cancel to delete the row.
|
|
887
|
+
if (persistPayload !== null) {
|
|
888
|
+
DeferredRunScheduler.getInstance().cancel(traceRunId, true);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
};
|
|
892
|
+
const result = DebounceCoordinator.getInstance().register({
|
|
893
|
+
workflowName,
|
|
894
|
+
debounceKey: resolvedKey,
|
|
895
|
+
mode: schedCfg.debounce.mode,
|
|
896
|
+
delayMs: schedCfg.debounce.delayMs,
|
|
897
|
+
maxDelayMs: schedCfg.debounce.maxDelayMs,
|
|
898
|
+
runId: traceRunId,
|
|
899
|
+
onFire,
|
|
900
|
+
});
|
|
901
|
+
if (result.outcome === "fire-immediate") {
|
|
902
|
+
// Leading-mode fresh window: caller runs the workflow synchronously.
|
|
903
|
+
// The coordinator already opened its window so subsequent pings
|
|
904
|
+
// within `delayMs` will coalesce. Caller continues to the
|
|
905
|
+
// concurrency gate + runner.run path.
|
|
906
|
+
return null;
|
|
907
|
+
}
|
|
908
|
+
if (result.outcome === "schedule-trailing") {
|
|
909
|
+
// Trailing-mode fresh window: this run is the active one. Mark
|
|
910
|
+
// `debounced` (transient) and throw the signal.
|
|
911
|
+
tracker.markRunDebounced(traceRunId, {
|
|
912
|
+
debounceKey: resolvedKey,
|
|
913
|
+
mode: schedCfg.debounce.mode,
|
|
914
|
+
pingCount: result.pingCount,
|
|
915
|
+
scheduledAt: result.scheduledAt,
|
|
916
|
+
});
|
|
917
|
+
// Tier 2 follow-up · durable debounce. Write a `dispatch_status:
|
|
918
|
+
// "debounced"` row so a process crash mid-window leaves a recoverable
|
|
919
|
+
// pointer at the active run + its captured payload.
|
|
920
|
+
if (persistPayload !== null && tracker.active) {
|
|
921
|
+
try {
|
|
922
|
+
tracker.getStore().upsertScheduledDispatch({
|
|
923
|
+
runId: traceRunId,
|
|
924
|
+
workflowName,
|
|
925
|
+
triggerType,
|
|
926
|
+
scheduledAt: result.scheduledAt ?? Date.now(),
|
|
927
|
+
dispatchStatus: "debounced",
|
|
928
|
+
payload: persistPayload,
|
|
929
|
+
createdAt: Date.now(),
|
|
930
|
+
});
|
|
931
|
+
}
|
|
932
|
+
catch (err) {
|
|
933
|
+
console.error(`[blok][scheduling] persist debounce dispatch failed for run ${traceRunId}:`, err instanceof Error ? err.stack || err.message : err);
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
return new DeferredDispatchSignal({
|
|
937
|
+
runId: traceRunId,
|
|
938
|
+
workflowName,
|
|
939
|
+
status: "debounced",
|
|
940
|
+
scheduledAt: result.scheduledAt ?? Date.now(),
|
|
941
|
+
debounced: true,
|
|
942
|
+
pingCount: result.pingCount,
|
|
943
|
+
});
|
|
944
|
+
}
|
|
945
|
+
// Coalesce — this ping joined an existing window. Mark THIS run
|
|
946
|
+
// `debounced` terminal pointing at the active run, and bump the
|
|
947
|
+
// active run's pingCount (best-effort — the active run is in the
|
|
948
|
+
// store).
|
|
949
|
+
tracker.markRunDebounced(traceRunId, {
|
|
950
|
+
debounceKey: resolvedKey,
|
|
951
|
+
mode: schedCfg.debounce.mode,
|
|
952
|
+
intoRunId: result.activeRunId,
|
|
953
|
+
pingCount: result.pingCount,
|
|
954
|
+
});
|
|
955
|
+
tracker.recordDebouncePing(result.activeRunId, {
|
|
956
|
+
pingCount: result.pingCount,
|
|
957
|
+
scheduledAt: result.scheduledAt ?? Date.now(),
|
|
958
|
+
});
|
|
959
|
+
// Tier 2 follow-up · update the active run's persisted dispatch with
|
|
960
|
+
// the latest payload + new scheduledAt. Trailing mode: each ping
|
|
961
|
+
// resets the dispatch time, and the coordinator captures the latest
|
|
962
|
+
// onFire closure — we mirror that into the persisted row so a crash
|
|
963
|
+
// recovery uses the latest payload.
|
|
964
|
+
if (result.outcome === "coalesce" &&
|
|
965
|
+
schedCfg.debounce.mode === "trailing" &&
|
|
966
|
+
persistPayload !== null &&
|
|
967
|
+
tracker.active) {
|
|
968
|
+
try {
|
|
969
|
+
tracker.getStore().upsertScheduledDispatch({
|
|
970
|
+
runId: result.activeRunId,
|
|
971
|
+
workflowName,
|
|
972
|
+
triggerType,
|
|
973
|
+
scheduledAt: result.scheduledAt ?? Date.now(),
|
|
974
|
+
dispatchStatus: "debounced",
|
|
975
|
+
payload: persistPayload,
|
|
976
|
+
createdAt: Date.now(),
|
|
977
|
+
});
|
|
978
|
+
}
|
|
979
|
+
catch (err) {
|
|
980
|
+
console.error(`[blok][scheduling] persist debounce coalesce failed for run ${result.activeRunId}:`, err instanceof Error ? err.stack || err.message : err);
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
return new DeferredDispatchSignal({
|
|
984
|
+
runId: traceRunId,
|
|
985
|
+
workflowName,
|
|
986
|
+
status: "debounced",
|
|
987
|
+
scheduledAt: result.scheduledAt ?? Date.now(),
|
|
988
|
+
debounced: true,
|
|
989
|
+
pingCount: result.pingCount,
|
|
990
|
+
intoRunId: result.activeRunId,
|
|
991
|
+
});
|
|
992
|
+
}
|
|
993
|
+
// === Delay gate (Tier 2 #5) ===
|
|
994
|
+
if (schedCfg.delayMs !== undefined && schedCfg.delayMs > 0) {
|
|
995
|
+
const scheduledAt = Date.now() + schedCfg.delayMs;
|
|
996
|
+
const expiresAt = schedCfg.ttlMs !== undefined ? Date.now() + schedCfg.ttlMs : undefined;
|
|
997
|
+
tracker.markRunDelayed(traceRunId, {
|
|
998
|
+
scheduledAt,
|
|
999
|
+
delayMs: schedCfg.delayMs,
|
|
1000
|
+
expiresAt,
|
|
1001
|
+
});
|
|
1002
|
+
// Tier 2 #5+#7 follow-up · durable scheduling.
|
|
1003
|
+
const persistPayload = this.extractDispatchPayload(ctx);
|
|
1004
|
+
DeferredRunScheduler.getInstance().schedule(traceRunId, scheduledAt, async () => {
|
|
1005
|
+
await this.dispatchDeferred(ctx, traceRunId, expiresAt);
|
|
1006
|
+
}, persistPayload === null
|
|
1007
|
+
? undefined
|
|
1008
|
+
: {
|
|
1009
|
+
workflowName,
|
|
1010
|
+
triggerType: this.getTriggerType(),
|
|
1011
|
+
expiresAt,
|
|
1012
|
+
dispatchStatus: "delayed",
|
|
1013
|
+
payload: persistPayload,
|
|
1014
|
+
});
|
|
1015
|
+
return new DeferredDispatchSignal({
|
|
1016
|
+
runId: traceRunId,
|
|
1017
|
+
workflowName,
|
|
1018
|
+
status: "delayed",
|
|
1019
|
+
scheduledAt,
|
|
1020
|
+
expiresAt,
|
|
1021
|
+
debounced: false,
|
|
1022
|
+
pingCount: 1,
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
return null;
|
|
1026
|
+
}
|
|
1027
|
+
/**
|
|
1028
|
+
* Tier 2 #5 + #7 — re-enter the dispatch pipeline for a deferred run.
|
|
1029
|
+
*
|
|
1030
|
+
* Called by the `DeferredRunScheduler` timer (delay) or
|
|
1031
|
+
* `DebounceCoordinator.onFire` (debounce trailing) when the wait
|
|
1032
|
+
* window closes. Checks TTL, transitions the run to `running`, and
|
|
1033
|
+
* re-enters `run(ctx)` with the `_blokDispatchReentry` flag so the
|
|
1034
|
+
* scheduling gates are skipped on the second pass.
|
|
1035
|
+
*
|
|
1036
|
+
* The re-entered `run(ctx)` reuses the existing `traceRunId` (already
|
|
1037
|
+
* stashed on `ctx._traceRunId` from the first pass).
|
|
1038
|
+
*/
|
|
1039
|
+
async dispatchDeferred(ctx, traceRunId, expiresAt) {
|
|
1040
|
+
const tracker = RunTracker.getInstance();
|
|
1041
|
+
// TTL check — fire-once-then-give-up. If the dispatch is past its
|
|
1042
|
+
// TTL, mark the run `expired` and abort.
|
|
1043
|
+
if (expiresAt !== undefined && Date.now() > expiresAt) {
|
|
1044
|
+
tracker.markRunExpired(traceRunId, {
|
|
1045
|
+
expiresAt,
|
|
1046
|
+
expiredAt: Date.now(),
|
|
1047
|
+
});
|
|
1048
|
+
return;
|
|
1049
|
+
}
|
|
1050
|
+
// Flip status delayed/debounced → running.
|
|
1051
|
+
tracker.transitionRunToRunning(traceRunId);
|
|
1052
|
+
// Re-enter the dispatch pipeline. The reentry flag short-circuits
|
|
1053
|
+
// the scheduling gates so we don't loop. The existing traceRunId
|
|
1054
|
+
// is preserved (no second startRun call — see top of run()).
|
|
1055
|
+
const ctxRecord = ctx;
|
|
1056
|
+
ctxRecord._blokDispatchReentry = true;
|
|
1057
|
+
try {
|
|
1058
|
+
await this.run(ctx);
|
|
1059
|
+
}
|
|
1060
|
+
catch (err) {
|
|
1061
|
+
// The re-entered `run()` already handled tracker.failRun /
|
|
1062
|
+
// markRunThrottled internally. Swallow here so timer callbacks
|
|
1063
|
+
// don't crash on uncaught rejections.
|
|
1064
|
+
void err;
|
|
1065
|
+
}
|
|
1066
|
+
finally {
|
|
1067
|
+
ctxRecord._blokDispatchReentry = false;
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
312
1070
|
/**
|
|
313
1071
|
* Build a human-readable trigger summary for trace display.
|
|
314
1072
|
*/
|
|
@@ -321,17 +1079,61 @@ export default class TriggerBase extends Trigger {
|
|
|
321
1079
|
}
|
|
322
1080
|
createContext(logger, blueprintPath, id) {
|
|
323
1081
|
const requestId = id || uuid();
|
|
1082
|
+
const request = { body: {} };
|
|
1083
|
+
const response = { data: "", contentType: "", success: true, error: null };
|
|
1084
|
+
// Single state object — shared by ctx.state (canonical) and ctx.vars
|
|
1085
|
+
// (legacy alias). All step outputs land here unless `ephemeral: true`.
|
|
1086
|
+
const state = {};
|
|
1087
|
+
// Tier 2 follow-up · cooperative cancellation. Each context owns
|
|
1088
|
+
// an AbortController whose signal flips when an operator cancels
|
|
1089
|
+
// the run via `POST /__blok/runs/:runId/cancel` while it's in
|
|
1090
|
+
// `running` status. RunnerSteps' between-step check throws
|
|
1091
|
+
// `RunCancelledError` which TriggerBase catches without flipping
|
|
1092
|
+
// the run to `failed` (the tracker has already flipped it to
|
|
1093
|
+
// `cancelled`).
|
|
1094
|
+
const abortController = new AbortController();
|
|
324
1095
|
const ctx = {
|
|
325
1096
|
id: requestId,
|
|
326
1097
|
workflow_name: this.configuration.name,
|
|
327
1098
|
workflow_path: blueprintPath || "",
|
|
328
1099
|
config: this.configuration.nodes,
|
|
329
|
-
request
|
|
330
|
-
response
|
|
1100
|
+
request,
|
|
1101
|
+
response,
|
|
331
1102
|
error: { message: [] },
|
|
332
1103
|
logger: logger || new DefaultLogger(this.configuration.name, blueprintPath, requestId),
|
|
333
1104
|
eventLogger: null,
|
|
334
|
-
|
|
1105
|
+
state,
|
|
1106
|
+
// vars is a legacy alias of state — same reference, mutations
|
|
1107
|
+
// to either propagate. Authors writing `ctx.vars[k] = v` keep
|
|
1108
|
+
// working; the runner reads via state.
|
|
1109
|
+
vars: state,
|
|
1110
|
+
signal: abortController.signal,
|
|
1111
|
+
// Stash the controller on _PRIVATE_ so TriggerBase.run can
|
|
1112
|
+
// hand it to the tracker without exposing it on the public ctx.
|
|
1113
|
+
_PRIVATE_: { abortController },
|
|
1114
|
+
};
|
|
1115
|
+
// V2 read-only aliases — same object reference, no copy.
|
|
1116
|
+
// Reads via ctx.req / ctx.prev work; writes go to the canonical
|
|
1117
|
+
// field (request / response).
|
|
1118
|
+
Object.defineProperty(ctx, "req", {
|
|
1119
|
+
get() {
|
|
1120
|
+
return ctx.request;
|
|
1121
|
+
},
|
|
1122
|
+
enumerable: true,
|
|
1123
|
+
});
|
|
1124
|
+
Object.defineProperty(ctx, "prev", {
|
|
1125
|
+
get() {
|
|
1126
|
+
return ctx.response;
|
|
1127
|
+
},
|
|
1128
|
+
enumerable: true,
|
|
1129
|
+
});
|
|
1130
|
+
// Explicit side-channel publication. Writes to state under `name`
|
|
1131
|
+
// and emits a Studio trace event. Most nodes don't need this —
|
|
1132
|
+
// returning the value lets the runner persist it via PersistenceHelper.
|
|
1133
|
+
ctx.publish = (name, value) => {
|
|
1134
|
+
ctx.state[name] = value;
|
|
1135
|
+
const evt = ctx.eventLogger;
|
|
1136
|
+
evt?.emit?.("publish", { name, value, runId: requestId });
|
|
335
1137
|
};
|
|
336
1138
|
Object.defineProperty(ctx, "id", {
|
|
337
1139
|
value: requestId,
|