@blokjs/runner 0.6.21 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/Blok.d.ts +2 -0
  2. package/dist/Blok.js +42 -110
  3. package/dist/Blok.js.map +1 -1
  4. package/dist/DefaultLogger.d.ts +13 -0
  5. package/dist/DefaultLogger.js +25 -0
  6. package/dist/DefaultLogger.js.map +1 -1
  7. package/dist/RunnerSteps.d.ts +23 -0
  8. package/dist/RunnerSteps.js +128 -87
  9. package/dist/RunnerSteps.js.map +1 -1
  10. package/dist/SubworkflowNode.js +19 -0
  11. package/dist/SubworkflowNode.js.map +1 -1
  12. package/dist/TriggerBase.d.ts +12 -0
  13. package/dist/TriggerBase.js +216 -181
  14. package/dist/TriggerBase.js.map +1 -1
  15. package/dist/adapters/grpc/GrpcRuntimeAdapter.d.ts +9 -0
  16. package/dist/adapters/grpc/GrpcRuntimeAdapter.js +76 -6
  17. package/dist/adapters/grpc/GrpcRuntimeAdapter.js.map +1 -1
  18. package/dist/index.d.ts +4 -39
  19. package/dist/index.js +7 -32
  20. package/dist/index.js.map +1 -1
  21. package/dist/monitoring/JanitorMetrics.d.ts +3 -0
  22. package/dist/monitoring/JanitorMetrics.js +11 -0
  23. package/dist/monitoring/JanitorMetrics.js.map +1 -1
  24. package/dist/monitoring/ProcessErrorMetrics.d.ts +32 -0
  25. package/dist/monitoring/ProcessErrorMetrics.js +43 -0
  26. package/dist/monitoring/ProcessErrorMetrics.js.map +1 -0
  27. package/dist/monitoring/PrometheusMetricsBridge.d.ts +7 -0
  28. package/dist/monitoring/PrometheusMetricsBridge.js +8 -2
  29. package/dist/monitoring/PrometheusMetricsBridge.js.map +1 -1
  30. package/dist/monitoring/SubworkflowMetrics.d.ts +25 -0
  31. package/dist/monitoring/SubworkflowMetrics.js +38 -0
  32. package/dist/monitoring/SubworkflowMetrics.js.map +1 -0
  33. package/dist/observability/ErrorSink.d.ts +23 -0
  34. package/dist/observability/ErrorSink.js +32 -0
  35. package/dist/observability/ErrorSink.js.map +1 -0
  36. package/dist/observability/SentryIntegration.d.ts +9 -0
  37. package/dist/observability/SentryIntegration.js +31 -0
  38. package/dist/observability/SentryIntegration.js.map +1 -0
  39. package/dist/scheduling/DebounceCoordinator.d.ts +7 -53
  40. package/dist/scheduling/DebounceCoordinator.js +8 -207
  41. package/dist/scheduling/DebounceCoordinator.js.map +1 -1
  42. package/dist/tracing/InMemoryRunStore.d.ts +5 -1
  43. package/dist/tracing/InMemoryRunStore.js +14 -0
  44. package/dist/tracing/InMemoryRunStore.js.map +1 -1
  45. package/dist/tracing/Janitor.js +3 -0
  46. package/dist/tracing/Janitor.js.map +1 -1
  47. package/dist/tracing/PostgresRunStore.d.ts +4 -1
  48. package/dist/tracing/PostgresRunStore.js +73 -3
  49. package/dist/tracing/PostgresRunStore.js.map +1 -1
  50. package/dist/tracing/RunStore.d.ts +17 -1
  51. package/dist/tracing/RunTracker.d.ts +13 -34
  52. package/dist/tracing/RunTracker.js +62 -32
  53. package/dist/tracing/RunTracker.js.map +1 -1
  54. package/dist/tracing/SqliteRunStore.d.ts +4 -1
  55. package/dist/tracing/SqliteRunStore.js +60 -0
  56. package/dist/tracing/SqliteRunStore.js.map +1 -1
  57. package/dist/tracing/TraceRouter.d.ts +13 -0
  58. package/dist/tracing/TraceRouter.js +43 -11
  59. package/dist/tracing/TraceRouter.js.map +1 -1
  60. package/dist/tracing/TracingLogger.js +22 -0
  61. package/dist/tracing/TracingLogger.js.map +1 -1
  62. package/dist/tracing/createStore.js +51 -22
  63. package/dist/tracing/createStore.js.map +1 -1
  64. package/dist/tracing/types.d.ts +22 -0
  65. package/dist/types/GlobalOptions.d.ts +5 -7
  66. package/dist/workflow/WorkflowNormalizer.js +63 -0
  67. package/dist/workflow/WorkflowNormalizer.js.map +1 -1
  68. package/package.json +7 -4
  69. package/dist/cache/NodeResultCache.d.ts +0 -286
  70. package/dist/cache/NodeResultCache.js +0 -506
  71. package/dist/cache/NodeResultCache.js.map +0 -1
  72. package/dist/cache/index.d.ts +0 -1
  73. package/dist/cache/index.js +0 -2
  74. package/dist/cache/index.js.map +0 -1
  75. package/dist/concurrency/ConcurrencyBackend.d.ts +0 -61
  76. package/dist/concurrency/ConcurrencyBackend.js +0 -20
  77. package/dist/concurrency/ConcurrencyBackend.js.map +0 -1
  78. package/dist/concurrency/NatsKvConcurrencyBackend.d.ts +0 -64
  79. package/dist/concurrency/NatsKvConcurrencyBackend.js +0 -310
  80. package/dist/concurrency/NatsKvConcurrencyBackend.js.map +0 -1
  81. package/dist/concurrency/RedisConcurrencyBackend.d.ts +0 -64
  82. package/dist/concurrency/RedisConcurrencyBackend.js +0 -374
  83. package/dist/concurrency/RedisConcurrencyBackend.js.map +0 -1
  84. package/dist/concurrency/createConcurrencyBackend.d.ts +0 -24
  85. package/dist/concurrency/createConcurrencyBackend.js +0 -38
  86. package/dist/concurrency/createConcurrencyBackend.js.map +0 -1
  87. package/dist/graphql/GraphQLSchemaGenerator.d.ts +0 -129
  88. package/dist/graphql/GraphQLSchemaGenerator.js +0 -425
  89. package/dist/graphql/GraphQLSchemaGenerator.js.map +0 -1
  90. package/dist/integrations/APMIntegration.d.ts +0 -141
  91. package/dist/integrations/APMIntegration.js +0 -212
  92. package/dist/integrations/APMIntegration.js.map +0 -1
  93. package/dist/integrations/AzureMonitorIntegration.d.ts +0 -118
  94. package/dist/integrations/AzureMonitorIntegration.js +0 -254
  95. package/dist/integrations/AzureMonitorIntegration.js.map +0 -1
  96. package/dist/integrations/CloudWatchIntegration.d.ts +0 -135
  97. package/dist/integrations/CloudWatchIntegration.js +0 -293
  98. package/dist/integrations/CloudWatchIntegration.js.map +0 -1
  99. package/dist/integrations/SentryIntegration.d.ts +0 -153
  100. package/dist/integrations/SentryIntegration.js +0 -200
  101. package/dist/integrations/SentryIntegration.js.map +0 -1
  102. package/dist/integrations/index.d.ts +0 -19
  103. package/dist/integrations/index.js +0 -16
  104. package/dist/integrations/index.js.map +0 -1
  105. package/dist/marketplace/RuntimeAutoScaler.d.ts +0 -148
  106. package/dist/marketplace/RuntimeAutoScaler.js +0 -366
  107. package/dist/marketplace/RuntimeAutoScaler.js.map +0 -1
  108. package/dist/marketplace/RuntimeCatalog.d.ts +0 -180
  109. package/dist/marketplace/RuntimeCatalog.js +0 -339
  110. package/dist/marketplace/RuntimeCatalog.js.map +0 -1
  111. package/dist/marketplace/RuntimeDiscovery.d.ts +0 -86
  112. package/dist/marketplace/RuntimeDiscovery.js +0 -231
  113. package/dist/marketplace/RuntimeDiscovery.js.map +0 -1
  114. package/dist/marketplace/RuntimeHealthMonitor.d.ts +0 -100
  115. package/dist/marketplace/RuntimeHealthMonitor.js +0 -241
  116. package/dist/marketplace/RuntimeHealthMonitor.js.map +0 -1
  117. package/dist/marketplace/RuntimeMetricsDashboard.d.ts +0 -113
  118. package/dist/marketplace/RuntimeMetricsDashboard.js +0 -293
  119. package/dist/marketplace/RuntimeMetricsDashboard.js.map +0 -1
  120. package/dist/openapi/OpenAPIGenerator.d.ts +0 -192
  121. package/dist/openapi/OpenAPIGenerator.js +0 -378
  122. package/dist/openapi/OpenAPIGenerator.js.map +0 -1
  123. package/dist/openapi/index.d.ts +0 -20
  124. package/dist/openapi/index.js +0 -20
  125. package/dist/openapi/index.js.map +0 -1
  126. package/dist/scheduling/DebounceBackend.d.ts +0 -108
  127. package/dist/scheduling/DebounceBackend.js +0 -23
  128. package/dist/scheduling/DebounceBackend.js.map +0 -1
  129. package/dist/scheduling/NatsKvDebounceBackend.d.ts +0 -53
  130. package/dist/scheduling/NatsKvDebounceBackend.js +0 -334
  131. package/dist/scheduling/NatsKvDebounceBackend.js.map +0 -1
  132. package/dist/scheduling/RedisDebounceBackend.d.ts +0 -49
  133. package/dist/scheduling/RedisDebounceBackend.js +0 -356
  134. package/dist/scheduling/RedisDebounceBackend.js.map +0 -1
  135. package/dist/scheduling/createDebounceBackend.d.ts +0 -25
  136. package/dist/scheduling/createDebounceBackend.js +0 -39
  137. package/dist/scheduling/createDebounceBackend.js.map +0 -1
  138. package/dist/security/ABAC.d.ts +0 -224
  139. package/dist/security/ABAC.js +0 -380
  140. package/dist/security/ABAC.js.map +0 -1
  141. package/dist/security/AuditLogger.d.ts +0 -242
  142. package/dist/security/AuditLogger.js +0 -317
  143. package/dist/security/AuditLogger.js.map +0 -1
  144. package/dist/security/AuthMiddleware.d.ts +0 -162
  145. package/dist/security/AuthMiddleware.js +0 -289
  146. package/dist/security/AuthMiddleware.js.map +0 -1
  147. package/dist/security/EncryptionAtRest.d.ts +0 -206
  148. package/dist/security/EncryptionAtRest.js +0 -236
  149. package/dist/security/EncryptionAtRest.js.map +0 -1
  150. package/dist/security/OAuthProvider.d.ts +0 -334
  151. package/dist/security/OAuthProvider.js +0 -719
  152. package/dist/security/OAuthProvider.js.map +0 -1
  153. package/dist/security/PIIDetector.d.ts +0 -233
  154. package/dist/security/PIIDetector.js +0 -354
  155. package/dist/security/PIIDetector.js.map +0 -1
  156. package/dist/security/RBAC.d.ts +0 -143
  157. package/dist/security/RBAC.js +0 -285
  158. package/dist/security/RBAC.js.map +0 -1
  159. package/dist/security/SecretManager.d.ts +0 -652
  160. package/dist/security/SecretManager.js +0 -1147
  161. package/dist/security/SecretManager.js.map +0 -1
  162. package/dist/security/TLSConfig.d.ts +0 -305
  163. package/dist/security/TLSConfig.js +0 -550
  164. package/dist/security/TLSConfig.js.map +0 -1
  165. package/dist/security/index.d.ts +0 -81
  166. package/dist/security/index.js +0 -82
  167. package/dist/security/index.js.map +0 -1
@@ -14,9 +14,12 @@ import { resolveIdempotencyKey } from "./idempotency/resolveIdempotencyKey";
14
14
  import { CircuitBreaker } from "./monitoring/CircuitBreaker";
15
15
  import { ConcurrencyMetrics } from "./monitoring/ConcurrencyMetrics";
16
16
  import { HealthCheck } from "./monitoring/HealthCheck";
17
+ import { ProcessErrorMetrics } from "./monitoring/ProcessErrorMetrics";
17
18
  import { PrometheusMetricsBridge } from "./monitoring/PrometheusMetricsBridge";
18
19
  import { RateLimiter } from "./monitoring/RateLimiter";
19
20
  import { TriggerMetricsCollector } from "./monitoring/TriggerMetricsCollector";
21
+ import { captureError, setErrorSink } from "./observability/ErrorSink";
22
+ import { createSentryErrorSink } from "./observability/SentryIntegration";
20
23
  import { DebounceCoordinator } from "./scheduling/DebounceCoordinator";
21
24
  import { DeferredDispatchSignal } from "./scheduling/DeferredDispatchSignal";
22
25
  import { DeferredRunScheduler } from "./scheduling/DeferredRunScheduler";
@@ -257,6 +260,12 @@ export default class TriggerBase extends Trigger {
257
260
  return;
258
261
  TriggerBase.crashHandlersInstalled = true;
259
262
  const onUncaught = (err) => {
263
+ // OBS-06 T8 — count the fatal-error event before the flip/rethrow.
264
+ ProcessErrorMetrics.getInstance().recordUnhandledRejection({
265
+ trigger_type: "process",
266
+ reason_class: err?.constructor?.name ?? "Error",
267
+ });
268
+ captureError(err, { source: "uncaughtException" });
260
269
  try {
261
270
  const flipped = RunTracker.getInstance().markAllRunningRunsAsCrashed(err);
262
271
  logger?.error?.(`[blok][crash-autoflip] uncaughtException — flipped ${flipped} running run(s) to crashed: ${err.stack || err.message}`);
@@ -273,6 +282,12 @@ export default class TriggerBase extends Trigger {
273
282
  };
274
283
  const onRejection = (reason) => {
275
284
  const err = reason instanceof Error ? reason : new Error(String(reason));
285
+ // OBS-06 T8 — count the rejection event.
286
+ ProcessErrorMetrics.getInstance().recordUnhandledRejection({
287
+ trigger_type: "process",
288
+ reason_class: err.constructor?.name ?? "Error",
289
+ });
290
+ captureError(err, { source: "unhandledRejection" });
276
291
  try {
277
292
  const flipped = RunTracker.getInstance().markAllRunningRunsAsCrashed(err);
278
293
  logger?.error?.(`[blok][crash-autoflip] unhandledRejection — flipped ${flipped} running run(s) to crashed: ${err.stack || err.message}`);
@@ -350,35 +365,6 @@ export default class TriggerBase extends Trigger {
350
365
  catch {
351
366
  // Best-effort.
352
367
  }
353
- // 4. Disconnect cross-process concurrency backend.
354
- //
355
- // PR 3 D5 — wrap disconnect() in a Promise.race timeout so a
356
- // slow NATS drain doesn't hang past the SIGTERM-to-SIGKILL
357
- // window. Default 10s; configurable via
358
- // BLOK_BACKEND_DISCONNECT_TIMEOUT_MS. Timer is .unref()'d so
359
- // it doesn't keep the event loop alive after a successful
360
- // disconnect.
361
- const backend = RunTracker.getInstance().getConcurrencyBackend();
362
- if (backend) {
363
- const disconnectTimeoutMs = (() => {
364
- const raw = process.env.BLOK_BACKEND_DISCONNECT_TIMEOUT_MS;
365
- if (!raw || !/^\d+$/.test(raw))
366
- return 10_000;
367
- return Number(raw);
368
- })();
369
- try {
370
- await Promise.race([
371
- backend.disconnect(),
372
- new Promise((_, reject) => {
373
- const t = setTimeout(() => reject(new Error(`backend.disconnect() timed out after ${disconnectTimeoutMs}ms`)), disconnectTimeoutMs);
374
- t.unref?.();
375
- }),
376
- ]);
377
- }
378
- catch (err) {
379
- logger?.error?.(`[blok][shutdown] backend disconnect failed (or timed out): ${err instanceof Error ? err.message : String(err)}`);
380
- }
381
- }
382
368
  logger?.log?.("[blok][shutdown] graceful shutdown complete");
383
369
  }
384
370
  catch (err) {
@@ -466,6 +452,19 @@ export default class TriggerBase extends Trigger {
466
452
  catch (err) {
467
453
  logger?.error?.(`[blok][shutdown] setup failed: ${err instanceof Error ? err.message : String(err)}`);
468
454
  }
455
+ // Error sink (MO-ALERTS) — opt-in via SENTRY_DSN. Forwards uncaught
456
+ // exceptions / unhandled rejections to Sentry. Unset DSN → no sink
457
+ // installed → zero behaviour change. Setup failure is logged, never fatal.
458
+ try {
459
+ const dsn = process.env.SENTRY_DSN;
460
+ if (dsn) {
461
+ setErrorSink(createSentryErrorSink(dsn));
462
+ logger?.log?.("[blok][error-sink] Sentry error sink initialized (SENTRY_DSN set).");
463
+ }
464
+ }
465
+ catch (err) {
466
+ logger?.error?.(`[blok][error-sink] setup failed: ${err instanceof Error ? err.message : String(err)}`);
467
+ }
469
468
  }
470
469
  /**
471
470
  * F6 — populate the `WorkflowRegistry` from `this.nodeMap.workflows`
@@ -652,6 +651,176 @@ export default class TriggerBase extends Trigger {
652
651
  this.hmr = null;
653
652
  }
654
653
  }
654
+ /**
655
+ * Re-entry trace setup (deferred timer fire). Rehydrates ctx.state +
656
+ * iteration cursors from the persisted snapshot and re-registers the
657
+ * AbortController. Extracted verbatim from run(); see the inline notes.
658
+ */
659
+ rehydrateDeferredRun(ctx, tracker) {
660
+ const ctxRecord = ctx;
661
+ const traceRunId = ctxRecord._traceRunId;
662
+ // Logger wrapping was already applied on the first pass — no
663
+ // need to re-wrap (and re-wrapping would double-route logs).
664
+ // PR 1 follow-up · A2 fix. The first-pass `finally` block
665
+ // unregisters the AbortController via `tracker.unregisterAbortController`.
666
+ // Without re-registering on re-entry, `tracker.abortRunningRun(runId)`
667
+ // can't fire the controller — the controller stays on
668
+ // `ctx._PRIVATE_.abortController` but the tracker's lookup
669
+ // returns undefined. Operator cancel of a `running` run that
670
+ // came from delayed/queued/debounced flips status to "cancelled"
671
+ // but the in-flight step never sees `ctx.signal.aborted`.
672
+ // Re-register here mirroring the first-pass branch below.
673
+ if (traceRunId) {
674
+ const privateSlot = ctx._PRIVATE_;
675
+ if (privateSlot?.abortController) {
676
+ tracker.registerAbortController(traceRunId, privateSlot.abortController);
677
+ }
678
+ // v0.6 prerequisite for wait-inside-primitives Phase 2 —
679
+ // rehydrate `ctx.state` from the persisted snapshot the
680
+ // runner took at the wait throw site. Two re-entry paths
681
+ // converge here:
682
+ // 1. In-process timer fire — same `ctx`, state already
683
+ // populated. Rehydrate is a no-op (the parsed
684
+ // snapshot equals current state); we still apply it
685
+ // for uniformity and to forgive any micro-drift
686
+ // between snapshot and current state if a malicious
687
+ // caller re-enters with a tampered ctx.
688
+ // 2. Cross-process recovery (`recoverDispatches` →
689
+ // `restoreDispatch` → `dispatchDeferred` with a
690
+ // fresh ctx). Without rehydrate, state is empty and
691
+ // forEach iteration index / loop accumulator / saga
692
+ // progress are all lost.
693
+ //
694
+ // Mutates `ctx.state` IN PLACE rather than reassigning so
695
+ // the `vars: state` alias set up in `createContext` keeps
696
+ // pointing at the same object. Authors writing
697
+ // `ctx.vars[k] = v` continue to mutate the canonical
698
+ // store; otherwise we'd silently fork the two views.
699
+ const persistedRun = tracker.getStore().getRun(traceRunId);
700
+ if (persistedRun?.stateSnapshot) {
701
+ try {
702
+ const parsed = JSON.parse(persistedRun.stateSnapshot);
703
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
704
+ const stateObj = ctx.state;
705
+ for (const k of Object.keys(stateObj)) {
706
+ delete stateObj[k];
707
+ }
708
+ Object.assign(stateObj, parsed);
709
+ }
710
+ }
711
+ catch (err) {
712
+ const msg = err instanceof Error ? err.message : String(err);
713
+ ctx.logger.logLevel("warn", `[blok][wait] failed to rehydrate ctx.state from snapshot: ${msg}. Proceeding with the ctx the runner was given.`);
714
+ }
715
+ }
716
+ // v0.6 wait-inside-primitives — rehydrate every NodeRun's
717
+ // `iteration_context` into a Map keyed by step NAME
718
+ // (not NodeRun id) so each primitive's run() can look
719
+ // itself up across defer/resume cycles. NodeRun ids
720
+ // CHANGE on every dispatchDeferred re-entry (tracker
721
+ // creates a fresh NodeRun per pass), but step names are
722
+ // stable across the run. Phase 4 — multiple primitives'
723
+ // cursors coexist (e.g. forEach > forEach > wait), one
724
+ // entry per primitive name. When two NodeRuns share a
725
+ // name (the rare case of two siblings at the same depth
726
+ // with the same step id), the LATEST wins via the
727
+ // startedAt-with-insertion-order tiebreak.
728
+ try {
729
+ const nodeRuns = tracker.getStore().getNodeRuns(traceRunId);
730
+ const sortedDesc = nodeRuns
731
+ .map((n, idx) => ({ n, idx }))
732
+ .filter(({ n }) => n.iterationContext !== undefined)
733
+ .sort((a, b) => {
734
+ const dt = b.n.startedAt - a.n.startedAt;
735
+ return dt !== 0 ? dt : b.idx - a.idx;
736
+ });
737
+ const cursorMap = new Map();
738
+ for (const { n } of sortedDesc) {
739
+ if (n.iterationContext === undefined)
740
+ continue;
741
+ // First write per name wins because sortedDesc is
742
+ // latest-first; this gives each primitive its most
743
+ // recent cursor.
744
+ if (!cursorMap.has(n.nodeName)) {
745
+ cursorMap.set(n.nodeName, n.iterationContext);
746
+ }
747
+ }
748
+ if (cursorMap.size > 0) {
749
+ ctx._blokIterationCursors = cursorMap;
750
+ }
751
+ // Back-compat `_blokIterationResume` (single-slot)
752
+ // keeps legacy callers working. Populated from the
753
+ // most recent cursor across the run.
754
+ const top = sortedDesc[0]?.n.iterationContext;
755
+ if (top) {
756
+ ctx._blokIterationResume = top;
757
+ }
758
+ }
759
+ catch (err) {
760
+ const msg = err instanceof Error ? err.message : String(err);
761
+ ctx.logger.logLevel("warn", `[blok][wait] failed to rehydrate iteration_context: ${msg}. primitives will resume from iteration 0.`);
762
+ }
763
+ }
764
+ return traceRunId;
765
+ }
766
+ /**
767
+ * First-pass trace setup. Starts the run record (replay + sub-workflow
768
+ * lineage from headers), registers the AbortController, and wraps the
769
+ * logger. Extracted verbatim from run().
770
+ */
771
+ startRunTrace(ctx, cfg, tracker) {
772
+ const ctxRecord = ctx;
773
+ const runner = this.getRunner(cfg);
774
+ const stepCount = runner.getStepCount?.() ?? cfg.steps?.length ?? 0;
775
+ // Tier 1 · replay lineage. The replay endpoint
776
+ // (TraceRouter.POST /__blok/runs/:id/replay) sets
777
+ // `X-Blok-Replay-Of: <originalRunId>` on the dispatched HTTP
778
+ // request. Read it here so the new run carries `replayOf` and
779
+ // Studio can render a "Replay of #..." breadcrumb.
780
+ const reqHeaders = (ctx.request?.headers ?? {});
781
+ const replayOfHeader = reqHeaders["x-blok-replay-of"] ?? reqHeaders["X-Blok-Replay-Of"];
782
+ const replayOf = Array.isArray(replayOfHeader)
783
+ ? replayOfHeader[0]
784
+ : typeof replayOfHeader === "string"
785
+ ? replayOfHeader
786
+ : undefined;
787
+ // G2 (v0.6) · sub-workflow lineage across the HTTP boundary.
788
+ // `SubworkflowNode.dispatchHttpSelf` sets these headers on the
789
+ // outbound self-call so the receiver's run record carries the
790
+ // parent ids. Without this, an http-self child would appear
791
+ // as a fresh top-level run with no Studio breadcrumb.
792
+ const parentRunId = pickHeader(reqHeaders, "x-blok-parent-run-id");
793
+ const parentNodeRunId = pickHeader(reqHeaders, "x-blok-parent-node-run-id");
794
+ const run = tracker.startRun({
795
+ workflowName: cfg.name || ctx.workflow_name || "unknown",
796
+ workflowPath: ctx.workflow_path || "",
797
+ triggerType: this.constructor.name.replace("Trigger", "").toLowerCase() || "unknown",
798
+ triggerSummary: this.buildTraceTriggerSummary(ctx),
799
+ nodeCount: stepCount,
800
+ replayOf,
801
+ parentRunId,
802
+ parentNodeRunId,
803
+ });
804
+ const traceRunId = run.id;
805
+ ctxRecord._traceRunId = run.id;
806
+ // Carry the sub-workflow depth across the HTTP hop so the
807
+ // recursion guard in nested children still fires.
808
+ const depthHeader = pickHeader(reqHeaders, "x-blok-subworkflow-depth");
809
+ const parsedDepth = depthHeader ? Number.parseInt(depthHeader, 10) : Number.NaN;
810
+ if (Number.isFinite(parsedDepth) && parsedDepth > 0) {
811
+ ctxRecord._subworkflowDepth = parsedDepth;
812
+ }
813
+ // Tier 2 follow-up · register the ctx's AbortController so the
814
+ // cancel API can fire it for `running` runs. Stashed on
815
+ // _PRIVATE_ by createContext; lookup via the optional shape.
816
+ const privateSlot = ctx._PRIVATE_;
817
+ if (privateSlot?.abortController) {
818
+ tracker.registerAbortController(run.id, privateSlot.abortController);
819
+ }
820
+ // Wrap logger to forward log entries to RunTracker
821
+ ctx.logger = new TracingLogger(ctx.logger, run.id, tracker);
822
+ return traceRunId;
823
+ }
655
824
  async run(ctx, configuration = this.configuration) {
656
825
  this.inFlightRequests++;
657
826
  const runStart = performance.now();
@@ -678,160 +847,10 @@ export default class TriggerBase extends Trigger {
678
847
  const ctxRecord = ctx;
679
848
  const isReentryAtTrace = ctxRecord._blokDispatchReentry === true;
680
849
  if (tracker.active && isReentryAtTrace) {
681
- traceRunId = ctxRecord._traceRunId;
682
- // Logger wrapping was already applied on the first pass — no
683
- // need to re-wrap (and re-wrapping would double-route logs).
684
- // PR 1 follow-up · A2 fix. The first-pass `finally` block
685
- // unregisters the AbortController via `tracker.unregisterAbortController`.
686
- // Without re-registering on re-entry, `tracker.abortRunningRun(runId)`
687
- // can't fire the controller — the controller stays on
688
- // `ctx._PRIVATE_.abortController` but the tracker's lookup
689
- // returns undefined. Operator cancel of a `running` run that
690
- // came from delayed/queued/debounced flips status to "cancelled"
691
- // but the in-flight step never sees `ctx.signal.aborted`.
692
- // Re-register here mirroring the first-pass branch below.
693
- if (traceRunId) {
694
- const privateSlot = ctx._PRIVATE_;
695
- if (privateSlot?.abortController) {
696
- tracker.registerAbortController(traceRunId, privateSlot.abortController);
697
- }
698
- // v0.6 prerequisite for wait-inside-primitives Phase 2 —
699
- // rehydrate `ctx.state` from the persisted snapshot the
700
- // runner took at the wait throw site. Two re-entry paths
701
- // converge here:
702
- // 1. In-process timer fire — same `ctx`, state already
703
- // populated. Rehydrate is a no-op (the parsed
704
- // snapshot equals current state); we still apply it
705
- // for uniformity and to forgive any micro-drift
706
- // between snapshot and current state if a malicious
707
- // caller re-enters with a tampered ctx.
708
- // 2. Cross-process recovery (`recoverDispatches` →
709
- // `restoreDispatch` → `dispatchDeferred` with a
710
- // fresh ctx). Without rehydrate, state is empty and
711
- // forEach iteration index / loop accumulator / saga
712
- // progress are all lost.
713
- //
714
- // Mutates `ctx.state` IN PLACE rather than reassigning so
715
- // the `vars: state` alias set up in `createContext` keeps
716
- // pointing at the same object. Authors writing
717
- // `ctx.vars[k] = v` continue to mutate the canonical
718
- // store; otherwise we'd silently fork the two views.
719
- const persistedRun = tracker.getStore().getRun(traceRunId);
720
- if (persistedRun?.stateSnapshot) {
721
- try {
722
- const parsed = JSON.parse(persistedRun.stateSnapshot);
723
- if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
724
- const stateObj = ctx.state;
725
- for (const k of Object.keys(stateObj)) {
726
- delete stateObj[k];
727
- }
728
- Object.assign(stateObj, parsed);
729
- }
730
- }
731
- catch (err) {
732
- const msg = err instanceof Error ? err.message : String(err);
733
- ctx.logger.logLevel("warn", `[blok][wait] failed to rehydrate ctx.state from snapshot: ${msg}. Proceeding with the ctx the runner was given.`);
734
- }
735
- }
736
- // v0.6 wait-inside-primitives — rehydrate every NodeRun's
737
- // `iteration_context` into a Map keyed by step NAME
738
- // (not NodeRun id) so each primitive's run() can look
739
- // itself up across defer/resume cycles. NodeRun ids
740
- // CHANGE on every dispatchDeferred re-entry (tracker
741
- // creates a fresh NodeRun per pass), but step names are
742
- // stable across the run. Phase 4 — multiple primitives'
743
- // cursors coexist (e.g. forEach > forEach > wait), one
744
- // entry per primitive name. When two NodeRuns share a
745
- // name (the rare case of two siblings at the same depth
746
- // with the same step id), the LATEST wins via the
747
- // startedAt-with-insertion-order tiebreak.
748
- try {
749
- const nodeRuns = tracker.getStore().getNodeRuns(traceRunId);
750
- const sortedDesc = nodeRuns
751
- .map((n, idx) => ({ n, idx }))
752
- .filter(({ n }) => n.iterationContext !== undefined)
753
- .sort((a, b) => {
754
- const dt = b.n.startedAt - a.n.startedAt;
755
- return dt !== 0 ? dt : b.idx - a.idx;
756
- });
757
- const cursorMap = new Map();
758
- for (const { n } of sortedDesc) {
759
- if (n.iterationContext === undefined)
760
- continue;
761
- // First write per name wins because sortedDesc is
762
- // latest-first; this gives each primitive its most
763
- // recent cursor.
764
- if (!cursorMap.has(n.nodeName)) {
765
- cursorMap.set(n.nodeName, n.iterationContext);
766
- }
767
- }
768
- if (cursorMap.size > 0) {
769
- ctx._blokIterationCursors = cursorMap;
770
- }
771
- // Back-compat `_blokIterationResume` (single-slot)
772
- // keeps legacy callers working. Populated from the
773
- // most recent cursor across the run.
774
- const top = sortedDesc[0]?.n.iterationContext;
775
- if (top) {
776
- ctx._blokIterationResume = top;
777
- }
778
- }
779
- catch (err) {
780
- const msg = err instanceof Error ? err.message : String(err);
781
- ctx.logger.logLevel("warn", `[blok][wait] failed to rehydrate iteration_context: ${msg}. primitives will resume from iteration 0.`);
782
- }
783
- }
850
+ traceRunId = this.rehydrateDeferredRun(ctx, tracker);
784
851
  }
785
852
  else if (tracker.active) {
786
- const runner = this.getRunner(cfg);
787
- const stepCount = runner.getStepCount?.() ?? cfg.steps?.length ?? 0;
788
- // Tier 1 · replay lineage. The replay endpoint
789
- // (TraceRouter.POST /__blok/runs/:id/replay) sets
790
- // `X-Blok-Replay-Of: <originalRunId>` on the dispatched HTTP
791
- // request. Read it here so the new run carries `replayOf` and
792
- // Studio can render a "Replay of #..." breadcrumb.
793
- const reqHeaders = (ctx.request?.headers ?? {});
794
- const replayOfHeader = reqHeaders["x-blok-replay-of"] ?? reqHeaders["X-Blok-Replay-Of"];
795
- const replayOf = Array.isArray(replayOfHeader)
796
- ? replayOfHeader[0]
797
- : typeof replayOfHeader === "string"
798
- ? replayOfHeader
799
- : undefined;
800
- // G2 (v0.6) · sub-workflow lineage across the HTTP boundary.
801
- // `SubworkflowNode.dispatchHttpSelf` sets these headers on the
802
- // outbound self-call so the receiver's run record carries the
803
- // parent ids. Without this, an http-self child would appear
804
- // as a fresh top-level run with no Studio breadcrumb.
805
- const parentRunId = pickHeader(reqHeaders, "x-blok-parent-run-id");
806
- const parentNodeRunId = pickHeader(reqHeaders, "x-blok-parent-node-run-id");
807
- const run = tracker.startRun({
808
- workflowName: cfg.name || ctx.workflow_name || "unknown",
809
- workflowPath: ctx.workflow_path || "",
810
- triggerType: this.constructor.name.replace("Trigger", "").toLowerCase() || "unknown",
811
- triggerSummary: this.buildTraceTriggerSummary(ctx),
812
- nodeCount: stepCount,
813
- replayOf,
814
- parentRunId,
815
- parentNodeRunId,
816
- });
817
- traceRunId = run.id;
818
- ctxRecord._traceRunId = run.id;
819
- // Carry the sub-workflow depth across the HTTP hop so the
820
- // recursion guard in nested children still fires.
821
- const depthHeader = pickHeader(reqHeaders, "x-blok-subworkflow-depth");
822
- const parsedDepth = depthHeader ? Number.parseInt(depthHeader, 10) : Number.NaN;
823
- if (Number.isFinite(parsedDepth) && parsedDepth > 0) {
824
- ctxRecord._subworkflowDepth = parsedDepth;
825
- }
826
- // Tier 2 follow-up · register the ctx's AbortController so the
827
- // cancel API can fire it for `running` runs. Stashed on
828
- // _PRIVATE_ by createContext; lookup via the optional shape.
829
- const privateSlot = ctx._PRIVATE_;
830
- if (privateSlot?.abortController) {
831
- tracker.registerAbortController(run.id, privateSlot.abortController);
832
- }
833
- // Wrap logger to forward log entries to RunTracker
834
- ctx.logger = new TracingLogger(ctx.logger, run.id, tracker);
853
+ traceRunId = this.startRunTrace(ctx, cfg, tracker);
835
854
  }
836
855
  try {
837
856
  // --- Scheduling gates (Tier 2 #5 + #7) ---
@@ -1206,6 +1225,22 @@ export default class TriggerBase extends Trigger {
1206
1225
  !(err instanceof WaitDispatchRequest)) {
1207
1226
  tracker.failRun(traceRunId, err instanceof Error ? err : new Error(String(err)));
1208
1227
  }
1228
+ // OBS-05 T2 — emit blok_workflow_errors_total with the resolved
1229
+ // terminal status so dashboards separate failed / crashed /
1230
+ // timedOut / throttled / cancelled. DeferredDispatchSignal and
1231
+ // WaitDispatchRequest are deferral control-flow, not errors —
1232
+ // skip them. The status is read back from the run record AFTER
1233
+ // the upstream markers (markRunTimedOut / markRunThrottled /
1234
+ // abortRunningRun / failRun) have flipped it.
1235
+ if (traceRunId && !(err instanceof DeferredDispatchSignal) && !(err instanceof WaitDispatchRequest)) {
1236
+ const resolvedStatus = tracker.getRun(traceRunId)?.status;
1237
+ this.metricsBridge.recordError(err instanceof Error ? err.constructor.name : "Error", {
1238
+ workflow_name: cfg.name || "",
1239
+ workflow_version: `${cfg.version}`,
1240
+ env: process.env.NODE_ENV || "development",
1241
+ status: resolvedStatus,
1242
+ });
1243
+ }
1209
1244
  throw err;
1210
1245
  }
1211
1246
  finally {