@blokjs/runner 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/dist/Blok.js +11 -11
  2. package/dist/Blok.js.map +1 -1
  3. package/dist/Configuration.d.ts +39 -2
  4. package/dist/Configuration.js +337 -28
  5. package/dist/Configuration.js.map +1 -1
  6. package/dist/ConfigurationResolver.d.ts +9 -0
  7. package/dist/ConfigurationResolver.js +17 -1
  8. package/dist/ConfigurationResolver.js.map +1 -1
  9. package/dist/PayloadTooLargeError.d.ts +19 -0
  10. package/dist/PayloadTooLargeError.js +29 -0
  11. package/dist/PayloadTooLargeError.js.map +1 -0
  12. package/dist/RunCancelledError.d.ts +17 -0
  13. package/dist/RunCancelledError.js +25 -0
  14. package/dist/RunCancelledError.js.map +1 -0
  15. package/dist/RunnerSteps.js +363 -23
  16. package/dist/RunnerSteps.js.map +1 -1
  17. package/dist/RuntimeAdapterNode.d.ts +32 -2
  18. package/dist/RuntimeAdapterNode.js +122 -27
  19. package/dist/RuntimeAdapterNode.js.map +1 -1
  20. package/dist/SubworkflowNode.d.ts +75 -0
  21. package/dist/SubworkflowNode.js +221 -0
  22. package/dist/SubworkflowNode.js.map +1 -0
  23. package/dist/TriggerBase.d.ts +128 -0
  24. package/dist/TriggerBase.js +808 -6
  25. package/dist/TriggerBase.js.map +1 -1
  26. package/dist/WaitDispatchRequest.d.ts +38 -0
  27. package/dist/WaitDispatchRequest.js +13 -0
  28. package/dist/WaitDispatchRequest.js.map +1 -0
  29. package/dist/WaitNode.d.ts +23 -0
  30. package/dist/WaitNode.js +26 -0
  31. package/dist/WaitNode.js.map +1 -0
  32. package/dist/adapters/BunRuntimeAdapter.d.ts +1 -0
  33. package/dist/adapters/BunRuntimeAdapter.js +1 -0
  34. package/dist/adapters/BunRuntimeAdapter.js.map +1 -1
  35. package/dist/adapters/DockerRuntimeAdapter.d.ts +2 -1
  36. package/dist/adapters/DockerRuntimeAdapter.js +10 -1
  37. package/dist/adapters/DockerRuntimeAdapter.js.map +1 -1
  38. package/dist/adapters/HttpRuntimeAdapter.d.ts +26 -5
  39. package/dist/adapters/HttpRuntimeAdapter.js +97 -16
  40. package/dist/adapters/HttpRuntimeAdapter.js.map +1 -1
  41. package/dist/adapters/NodeJsRuntimeAdapter.d.ts +1 -0
  42. package/dist/adapters/NodeJsRuntimeAdapter.js +1 -0
  43. package/dist/adapters/NodeJsRuntimeAdapter.js.map +1 -1
  44. package/dist/adapters/RuntimeAdapter.d.ts +17 -0
  45. package/dist/adapters/WasmRuntimeAdapter.d.ts +1 -0
  46. package/dist/adapters/WasmRuntimeAdapter.js +1 -0
  47. package/dist/adapters/WasmRuntimeAdapter.js.map +1 -1
  48. package/dist/adapters/grpc/GrpcChannelOptions.d.ts +31 -0
  49. package/dist/adapters/grpc/GrpcChannelOptions.js +68 -0
  50. package/dist/adapters/grpc/GrpcChannelOptions.js.map +1 -0
  51. package/dist/adapters/grpc/GrpcClientPool.d.ts +43 -0
  52. package/dist/adapters/grpc/GrpcClientPool.js +89 -0
  53. package/dist/adapters/grpc/GrpcClientPool.js.map +1 -0
  54. package/dist/adapters/grpc/GrpcCodec.d.ts +226 -0
  55. package/dist/adapters/grpc/GrpcCodec.js +275 -0
  56. package/dist/adapters/grpc/GrpcCodec.js.map +1 -0
  57. package/dist/adapters/grpc/GrpcErrors.d.ts +59 -0
  58. package/dist/adapters/grpc/GrpcErrors.js +190 -0
  59. package/dist/adapters/grpc/GrpcErrors.js.map +1 -0
  60. package/dist/adapters/grpc/GrpcHealthChecker.d.ts +69 -0
  61. package/dist/adapters/grpc/GrpcHealthChecker.js +96 -0
  62. package/dist/adapters/grpc/GrpcHealthChecker.js.map +1 -0
  63. package/dist/adapters/grpc/GrpcRuntimeAdapter.d.ts +98 -0
  64. package/dist/adapters/grpc/GrpcRuntimeAdapter.js +478 -0
  65. package/dist/adapters/grpc/GrpcRuntimeAdapter.js.map +1 -0
  66. package/dist/adapters/grpc/index.d.ts +13 -0
  67. package/dist/adapters/grpc/index.js +14 -0
  68. package/dist/adapters/grpc/index.js.map +1 -0
  69. package/dist/adapters/grpc/proto/blok/runtime/v1/runtime.proto +302 -0
  70. package/dist/adapters/grpc/types.d.ts +97 -0
  71. package/dist/adapters/grpc/types.js +41 -0
  72. package/dist/adapters/grpc/types.js.map +1 -0
  73. package/dist/adapters/transport.d.ts +108 -0
  74. package/dist/adapters/transport.js +196 -0
  75. package/dist/adapters/transport.js.map +1 -0
  76. package/dist/concurrency/ConcurrencyBackend.d.ts +61 -0
  77. package/dist/concurrency/ConcurrencyBackend.js +20 -0
  78. package/dist/concurrency/ConcurrencyBackend.js.map +1 -0
  79. package/dist/concurrency/ConcurrencyLimitError.d.ts +37 -0
  80. package/dist/concurrency/ConcurrencyLimitError.js +16 -0
  81. package/dist/concurrency/ConcurrencyLimitError.js.map +1 -0
  82. package/dist/concurrency/NatsKvConcurrencyBackend.d.ts +64 -0
  83. package/dist/concurrency/NatsKvConcurrencyBackend.js +297 -0
  84. package/dist/concurrency/NatsKvConcurrencyBackend.js.map +1 -0
  85. package/dist/concurrency/QueueExpiredError.d.ts +40 -0
  86. package/dist/concurrency/QueueExpiredError.js +15 -0
  87. package/dist/concurrency/QueueExpiredError.js.map +1 -0
  88. package/dist/concurrency/createConcurrencyBackend.d.ts +23 -0
  89. package/dist/concurrency/createConcurrencyBackend.js +34 -0
  90. package/dist/concurrency/createConcurrencyBackend.js.map +1 -0
  91. package/dist/concurrency/readConcurrencyConfig.d.ts +60 -0
  92. package/dist/concurrency/readConcurrencyConfig.js +60 -0
  93. package/dist/concurrency/readConcurrencyConfig.js.map +1 -0
  94. package/dist/idempotency/resolveIdempotencyKey.d.ts +20 -0
  95. package/dist/idempotency/resolveIdempotencyKey.js +37 -0
  96. package/dist/idempotency/resolveIdempotencyKey.js.map +1 -0
  97. package/dist/index.d.ts +35 -3
  98. package/dist/index.js +61 -2
  99. package/dist/index.js.map +1 -1
  100. package/dist/monitoring/ConcurrencyMetrics.d.ts +56 -0
  101. package/dist/monitoring/ConcurrencyMetrics.js +107 -0
  102. package/dist/monitoring/ConcurrencyMetrics.js.map +1 -0
  103. package/dist/monitoring/JanitorMetrics.d.ts +27 -0
  104. package/dist/monitoring/JanitorMetrics.js +48 -0
  105. package/dist/monitoring/JanitorMetrics.js.map +1 -0
  106. package/dist/scheduling/DebounceCoordinator.d.ts +88 -0
  107. package/dist/scheduling/DebounceCoordinator.js +141 -0
  108. package/dist/scheduling/DebounceCoordinator.js.map +1 -0
  109. package/dist/scheduling/DeferredDispatchSignal.d.ts +50 -0
  110. package/dist/scheduling/DeferredDispatchSignal.js +14 -0
  111. package/dist/scheduling/DeferredDispatchSignal.js.map +1 -0
  112. package/dist/scheduling/DeferredRunScheduler.d.ts +68 -0
  113. package/dist/scheduling/DeferredRunScheduler.js +154 -0
  114. package/dist/scheduling/DeferredRunScheduler.js.map +1 -0
  115. package/dist/scheduling/readSchedulingConfig.d.ts +24 -0
  116. package/dist/scheduling/readSchedulingConfig.js +52 -0
  117. package/dist/scheduling/readSchedulingConfig.js.map +1 -0
  118. package/dist/testing/WorkflowTestRunner.js +12 -0
  119. package/dist/testing/WorkflowTestRunner.js.map +1 -1
  120. package/dist/timeouts/StepTimeoutError.d.ts +22 -0
  121. package/dist/timeouts/StepTimeoutError.js +31 -0
  122. package/dist/timeouts/StepTimeoutError.js.map +1 -0
  123. package/dist/tracing/InMemoryRunStore.d.ts +28 -1
  124. package/dist/tracing/InMemoryRunStore.js +150 -0
  125. package/dist/tracing/InMemoryRunStore.js.map +1 -1
  126. package/dist/tracing/Janitor.d.ts +70 -0
  127. package/dist/tracing/Janitor.js +150 -0
  128. package/dist/tracing/Janitor.js.map +1 -0
  129. package/dist/tracing/PostgresRunStore.d.ts +30 -0
  130. package/dist/tracing/PostgresRunStore.js +435 -3
  131. package/dist/tracing/PostgresRunStore.js.map +1 -1
  132. package/dist/tracing/RunStore.d.ts +100 -1
  133. package/dist/tracing/RunTracker.d.ts +261 -11
  134. package/dist/tracing/RunTracker.js +691 -11
  135. package/dist/tracing/RunTracker.js.map +1 -1
  136. package/dist/tracing/SqliteRunStore.d.ts +23 -1
  137. package/dist/tracing/SqliteRunStore.js +421 -6
  138. package/dist/tracing/SqliteRunStore.js.map +1 -1
  139. package/dist/tracing/TraceRouter.d.ts +20 -2
  140. package/dist/tracing/TraceRouter.js +494 -9
  141. package/dist/tracing/TraceRouter.js.map +1 -1
  142. package/dist/tracing/sanitize.d.ts +11 -0
  143. package/dist/tracing/sanitize.js +29 -0
  144. package/dist/tracing/sanitize.js.map +1 -1
  145. package/dist/tracing/types.d.ts +429 -11
  146. package/dist/types/GlobalOptions.d.ts +9 -2
  147. package/dist/utils/createChildContext.d.ts +32 -0
  148. package/dist/utils/createChildContext.js +113 -0
  149. package/dist/utils/createChildContext.js.map +1 -0
  150. package/dist/workflow/PersistenceHelper.d.ts +46 -0
  151. package/dist/workflow/PersistenceHelper.js +57 -0
  152. package/dist/workflow/PersistenceHelper.js.map +1 -0
  153. package/dist/workflow/WorkflowNormalizer.d.ts +79 -0
  154. package/dist/workflow/WorkflowNormalizer.js +486 -0
  155. package/dist/workflow/WorkflowNormalizer.js.map +1 -0
  156. package/dist/workflow/WorkflowRegistry.d.ts +64 -0
  157. package/dist/workflow/WorkflowRegistry.js +81 -0
  158. package/dist/workflow/WorkflowRegistry.js.map +1 -0
  159. package/package.json +10 -7
@@ -1,5 +1,34 @@
1
1
  import http from "node:http";
2
+ import { DebounceCoordinator } from "../scheduling/DebounceCoordinator";
3
+ import { DeferredRunScheduler } from "../scheduling/DeferredRunScheduler";
2
4
  import { RunTracker } from "./RunTracker";
5
+ /**
6
+ * Security review FW-2 — sensitive headers that are NEVER honored when
7
+ * supplied via the replay endpoint's `overrides.headers`. Combined with
8
+ * the FW-1 trace-auth gate, this blocks the replay-as-auth-bypass attack
9
+ * where an unauthenticated client posts to `/__blok/runs/:id/replay`
10
+ * with an attacker-controlled `Authorization` header that the runner
11
+ * would otherwise dispatch verbatim to the user-authored route.
12
+ */
13
+ const REPLAY_HEADER_DENYLIST = new Set([
14
+ "authorization",
15
+ "cookie",
16
+ "set-cookie",
17
+ "x-api-key",
18
+ "x-auth-token",
19
+ "proxy-authorization",
20
+ ]);
21
+ function filterReplayHeaders(headers) {
22
+ if (!headers)
23
+ return {};
24
+ const filtered = {};
25
+ for (const [k, v] of Object.entries(headers)) {
26
+ if (REPLAY_HEADER_DENYLIST.has(k.toLowerCase()))
27
+ continue;
28
+ filtered[k] = v;
29
+ }
30
+ return filtered;
31
+ }
3
32
  /**
4
33
  * Register trace API routes on an Express-compatible router.
5
34
  *
@@ -12,22 +41,64 @@ import { RunTracker } from "./RunTracker";
12
41
  * import { Router } from "express";
13
42
  * import { registerTraceRoutes } from "@blokjs/runner";
14
43
  * const traceRouter = Router();
15
- * registerTraceRoutes(traceRouter);
44
+ * registerTraceRoutes(traceRouter, undefined, { authorize: myAuthFn });
16
45
  * app.use("/__blok", traceRouter);
17
46
  * ```
18
47
  */
19
- export function registerTraceRoutes(router, tracker) {
48
+ export function registerTraceRoutes(router, tracker, options) {
20
49
  const t = tracker || RunTracker.getInstance();
21
50
  // --- CORS for cross-origin Studio UI ---
51
+ // Security review FW-4 — `BLOK_TRACE_CORS_ORIGIN` overrides the
52
+ // permissive `*` default. Set to a single allow-listed origin in
53
+ // production to prevent cross-origin reads of trace data.
54
+ const corsOrigin = process.env.BLOK_TRACE_CORS_ORIGIN || "*";
55
+ // Security review FW-1 — production-default-deny on /__blok/* unless
56
+ // the operator either registers an authorize hook (preferred) or
57
+ // explicitly opts out via BLOK_TRACE_AUTH_DISABLED=1.
58
+ const isProd = process.env.BLOK_ENV === "production" || process.env.NODE_ENV === "production";
59
+ const authDisabled = process.env.BLOK_TRACE_AUTH_DISABLED === "1";
60
+ const authorize = options?.authorize;
22
61
  router.use((req, res, next) => {
23
- res.setHeader("Access-Control-Allow-Origin", "*");
62
+ res.setHeader("Access-Control-Allow-Origin", corsOrigin);
24
63
  res.setHeader("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS");
25
64
  res.setHeader("Access-Control-Allow-Headers", "Content-Type, Last-Event-ID");
26
65
  if (req.method === "OPTIONS") {
27
66
  res.sendStatus(204);
28
67
  return;
29
68
  }
30
- next();
69
+ // Dev OR explicit opt-out → pass through (preserves previous behaviour).
70
+ if (!isProd || authDisabled) {
71
+ next();
72
+ return;
73
+ }
74
+ // Production WITHOUT an authorize hook → 503 with a hint.
75
+ if (!authorize) {
76
+ res.status(503).json({
77
+ error: "Trace endpoints require auth in production",
78
+ hint: "Register an authorize hook before listen() — `trigger.setTraceAuth(req => ...)` — or set BLOK_TRACE_AUTH_DISABLED=1 to opt out (typically because /__blok/* is already firewalled).",
79
+ docs: "https://github.com/deskree-inc/blok/blob/main/docs/d/security/cookbook.mdx#secure-the-trace-api-and-studio",
80
+ });
81
+ return;
82
+ }
83
+ // Production WITH an authorize hook → consult it. Wrap in
84
+ // `Promise.resolve().then(...)` so a SYNC throw inside the
85
+ // authorize function is caught the same as an async rejection.
86
+ Promise.resolve()
87
+ .then(() => authorize(req))
88
+ .then((ok) => {
89
+ if (ok) {
90
+ next();
91
+ }
92
+ else {
93
+ res.status(401).json({ error: "Unauthorized" });
94
+ }
95
+ })
96
+ .catch((err) => {
97
+ // Don't leak the underlying error message — log it once,
98
+ // return a generic 401.
99
+ console.error("[blok][trace-auth] authorize() threw:", err?.message ?? err);
100
+ res.status(401).json({ error: "Unauthorized" });
101
+ });
31
102
  });
32
103
  // === Utility Endpoints ===
33
104
  router.get("/health", (_req, res) => {
@@ -285,25 +356,283 @@ export function registerTraceRoutes(router, tracker) {
285
356
  }
286
357
  res.json({ removed: true });
287
358
  });
359
+ // === Queues (Phase 5) ===
360
+ //
361
+ // Direction A · Phase 5. Honest "what's configured to receive
362
+ // work" page — Blok's HTTP triggers are stateless (no queue depth)
363
+ // so this view is workflow-by-trigger-type with throughput +
364
+ // last-run timing, not a JetStream-style depth dashboard.
365
+ // JetStream-backed worker queues will surface real depth here when
366
+ // the NATS integration grows the capability — for now we mark
367
+ // `depth: null` everywhere so the UI knows to show "—" instead of
368
+ // "0".
369
+ //
370
+ // Query params:
371
+ // ?env=<name> filter by environment scope (Phase 2.1)
372
+ router.get("/queues", (req, res) => {
373
+ const envFilter = typeof req.query.env === "string" && req.query.env.length > 0 && req.query.env !== "all"
374
+ ? req.query.env
375
+ : undefined;
376
+ // Reuse the workflow-summary aggregation; queues are workflows
377
+ // reframed by their trigger type. Pull recent runs to compute
378
+ // per-trigger-type throughput counts.
379
+ const workflows = t.getWorkflowSummaries();
380
+ const recent = t.getRuns({ limit: 500, sort: "desc" }).runs;
381
+ // env post-filter on the recent-run window
382
+ const recentInScope = envFilter ? recent.filter((r) => (r.environment ?? "production") === envFilter) : recent;
383
+ // Group workflows by their first trigger type (HTTP triggers
384
+ // dominate today; future triggers will surface in this list).
385
+ const queues = workflows.map((w) => {
386
+ const wfRecent = recentInScope.filter((r) => r.workflowName === w.name);
387
+ const triggerType = w.triggerTypes[0] ?? "unknown";
388
+ const lastRun = wfRecent[0];
389
+ return {
390
+ id: w.name,
391
+ name: w.name,
392
+ triggerType,
393
+ triggerTypes: w.triggerTypes,
394
+ // Stateless HTTP triggers have no queue depth; depth
395
+ // will populate when NATS JetStream integration lands.
396
+ depth: null,
397
+ runs24h: wfRecent.length,
398
+ totalRuns: w.totalRuns,
399
+ lastRunAt: lastRun?.startedAt ?? w.lastRunAt,
400
+ lastRunStatus: lastRun?.status ?? w.lastRunStatus,
401
+ avgDurationMs: w.avgDurationMs,
402
+ errorRate: w.errorRate,
403
+ };
404
+ });
405
+ res.json({ queues, total: queues.length, env: envFilter ?? null });
406
+ });
407
+ // === Deployments (Phase 5) ===
408
+ //
409
+ // Read-only "what versions are running where" view. Blok workflows
410
+ // declare a `version` string in their definition; we group runs by
411
+ // `workflowName + version` and report counts + success rate per
412
+ // pair. Studio lists these as "what's deployed", and clicking a
413
+ // row drills into the workflow's runs filtered to that version.
414
+ //
415
+ // Source: scan recent run metadata. Workflow versions live in the
416
+ // trigger's workflow registry but the runner doesn't keep that
417
+ // catalog at this layer — recent runs are the source of truth for
418
+ // "what version produced what trace".
419
+ router.get("/deployments", (req, res) => {
420
+ const envFilter = typeof req.query.env === "string" && req.query.env.length > 0 && req.query.env !== "all"
421
+ ? req.query.env
422
+ : undefined;
423
+ const limit = Math.min(Number.parseInt(req.query.limit || "500", 10), 2000);
424
+ const runs = t.getRuns({ limit, sort: "desc" }).runs;
425
+ const inScope = envFilter ? runs.filter((r) => (r.environment ?? "production") === envFilter) : runs;
426
+ // Group by `workflowName::version`. Version is read from the
427
+ // run's metadata if present, else "unknown" so the row still
428
+ // surfaces.
429
+ const buckets = new Map();
430
+ for (const run of inScope) {
431
+ const version = run.metadata?.version ?? "unknown";
432
+ const env = run.environment ?? "production";
433
+ const key = `${run.workflowName}::${version}::${env}`;
434
+ let b = buckets.get(key);
435
+ if (!b) {
436
+ b = {
437
+ workflowName: run.workflowName,
438
+ version,
439
+ environment: env,
440
+ runs: 0,
441
+ succeeded: 0,
442
+ failed: 0,
443
+ lastRunAt: 0,
444
+ firstRunAt: run.startedAt,
445
+ avgDurationMs: 0,
446
+ _durationSum: 0,
447
+ };
448
+ buckets.set(key, b);
449
+ }
450
+ b.runs += 1;
451
+ if (run.status === "completed")
452
+ b.succeeded += 1;
453
+ if (run.status === "failed")
454
+ b.failed += 1;
455
+ if (run.startedAt > b.lastRunAt)
456
+ b.lastRunAt = run.startedAt;
457
+ if (run.startedAt < b.firstRunAt)
458
+ b.firstRunAt = run.startedAt;
459
+ if (run.durationMs)
460
+ b._durationSum += run.durationMs;
461
+ }
462
+ const deployments = [...buckets.values()].map((b) => {
463
+ const { _durationSum, ...rest } = b;
464
+ return {
465
+ ...rest,
466
+ avgDurationMs: b.runs > 0 ? Math.round(_durationSum / b.runs) : 0,
467
+ successRate: b.runs > 0 ? b.succeeded / b.runs : 0,
468
+ };
469
+ });
470
+ deployments.sort((a, b) => b.lastRunAt - a.lastRunAt);
471
+ res.json({ deployments, total: deployments.length, env: envFilter ?? null });
472
+ });
473
+ // === Logs (cross-run aggregator) ===
474
+ //
475
+ // Direction A · Phase 3 · the page that doesn't exist in current
476
+ // Studio. Aggregates `TraceLogEntry`s across recent runs into a
477
+ // flat feed so operators can grep across workflows during an
478
+ // incident without having to know which run-id to open.
479
+ //
480
+ // Pagination is deliberately simple — `limit` + `since` (epoch ms)
481
+ // with `desc` sort. We over-fetch from the store (limit*4 runs ×
482
+ // up-to-N logs each) and apply filters in memory because the
483
+ // underlying log store doesn't have an indexed multi-key query.
484
+ // At ≤1000 rows this stays well under 50ms even on the in-memory
485
+ // backend; SQLite can be similarly fast since each `getLogs(runId)`
486
+ // is a single indexed query. When the cap is reached, the response
487
+ // signals truncation via `truncated: true` so the client can prompt
488
+ // for narrower filters.
489
+ //
490
+ // Query params (all optional):
491
+ // ?workflow=<name> exact match
492
+ // ?level=info,warn,error,debug comma-separated
493
+ // ?q=<text> case-insensitive substring of message
494
+ // ?since=<epoch ms> only logs newer than this
495
+ // ?limit=<int> max rows returned, default 200, cap 1000
496
+ router.get("/logs", (req, res) => {
497
+ const workflowFilter = typeof req.query.workflow === "string" && req.query.workflow.length > 0 ? req.query.workflow : undefined;
498
+ const levelFilter = (() => {
499
+ if (typeof req.query.level !== "string" || req.query.level.length === 0)
500
+ return undefined;
501
+ return new Set(req.query.level.split(",").map((s) => s.trim().toLowerCase()));
502
+ })();
503
+ const qRaw = typeof req.query.q === "string" ? req.query.q.trim() : "";
504
+ const q = qRaw.length > 0 ? qRaw.toLowerCase() : undefined;
505
+ const since = req.query.since ? Number.parseInt(req.query.since, 10) : undefined;
506
+ const limit = Math.min(Number.parseInt(req.query.limit || "200", 10), 1000);
507
+ // Phase 2.1 · environment scoping. Default `production` matches
508
+ // SqliteRunStore.rowToRun's NULL → "production" mapping so legacy
509
+ // runs still surface under the default scope.
510
+ const envFilter = typeof req.query.env === "string" && req.query.env.length > 0 && req.query.env !== "all"
511
+ ? req.query.env
512
+ : undefined;
513
+ // Pull recent runs so we can flatten their logs. We over-pull
514
+ // (limit*4 runs cap'd at 200) so a noisy run with 50+ logs
515
+ // doesn't crowd out logs from quieter neighbors.
516
+ const runs = t.getRuns({ limit: Math.min(limit * 4, 200), sort: "desc" }).runs;
517
+ const matches = [];
518
+ let truncated = false;
519
+ outer: for (const run of runs) {
520
+ if (workflowFilter && run.workflowName !== workflowFilter)
521
+ continue;
522
+ if (envFilter && (run.environment ?? "production") !== envFilter)
523
+ continue;
524
+ const logs = t.getLogs(run.id);
525
+ for (const log of logs) {
526
+ if (since !== undefined && log.timestamp <= since)
527
+ continue;
528
+ if (levelFilter && !levelFilter.has(log.level))
529
+ continue;
530
+ if (q && !log.message.toLowerCase().includes(q))
531
+ continue;
532
+ matches.push({
533
+ id: log.id,
534
+ runId: run.id,
535
+ workflowName: run.workflowName,
536
+ workflowPath: run.workflowPath,
537
+ nodeId: log.nodeId,
538
+ nodeName: log.nodeName,
539
+ level: log.level,
540
+ message: log.message,
541
+ timestamp: log.timestamp,
542
+ data: log.data,
543
+ });
544
+ if (matches.length >= limit) {
545
+ truncated = true;
546
+ break outer;
547
+ }
548
+ }
549
+ }
550
+ matches.sort((a, b) => b.timestamp - a.timestamp);
551
+ res.json({
552
+ logs: matches,
553
+ total: matches.length,
554
+ truncated,
555
+ query: { workflow: workflowFilter, level: req.query.level, q: qRaw, since, limit },
556
+ });
557
+ });
288
558
  // === Run Endpoints ===
289
559
  router.get("/runs", (req, res) => {
290
560
  const workflow = req.query.workflow;
291
561
  const status = req.query.status;
292
562
  const tags = req.query.tags ? req.query.tags.split(",").map((t) => t.trim()) : undefined;
563
+ // Tier 2 quick-wins — `metadata.<key>=<value>` query params parsed
564
+ // into a `Record<string, string>` for the RunQuery filter. Multiple
565
+ // pairs combine with AND semantics. Keys are restricted by the
566
+ // SqliteRunStore implementation (`/^[a-zA-Z0-9_-]+$/`) for JSON
567
+ // path safety; non-matching keys silently drop.
568
+ let metadata;
569
+ for (const [key, value] of Object.entries(req.query)) {
570
+ if (key.startsWith("metadata.") && typeof value === "string" && value.length > 0) {
571
+ const metaKey = key.slice("metadata.".length);
572
+ if (metaKey.length > 0) {
573
+ if (!metadata)
574
+ metadata = {};
575
+ metadata[metaKey] = value;
576
+ }
577
+ }
578
+ }
293
579
  const limit = Number.parseInt(req.query.limit || "50", 10);
294
580
  const offset = Number.parseInt(req.query.offset || "0", 10);
295
581
  const sort = req.query.sort || "desc";
582
+ // Phase 2.1 · environment scoping. Same post-filter pattern as
583
+ // `categoryFilter` below: applied after `getRuns()` returns so it
584
+ // works against any store (SQLite has the column; InMemory just
585
+ // stores the object). Empty string + "all" both bypass the
586
+ // filter (Studio's EnvChip can dispatch a "show all envs"
587
+ // view in a follow-up).
588
+ const envFilter = typeof req.query.env === "string" && req.query.env.length > 0 && req.query.env !== "all"
589
+ ? req.query.env
590
+ : undefined;
591
+ // Master plan §17.10: optional category filter. The filter is
592
+ // applied AFTER `getRuns()` returns so it works against any
593
+ // store backend (in-memory, sqlite, postgres) without a schema
594
+ // change. The trade-off is that pagination math now reflects
595
+ // the post-filter count, not the underlying store count — this
596
+ // is the right behavior for a UI filter (the user sees "12
597
+ // dependency failures" not "12 of 1247 runs that happen to be
598
+ // dependency failures").
599
+ const categoryFilter = typeof req.query.category === "string" && req.query.category.length > 0
600
+ ? req.query.category.toUpperCase()
601
+ : undefined;
602
+ // Combined filter mode — when EITHER category OR env post-filters
603
+ // are active we have to over-fetch + re-paginate after applying
604
+ // them.
605
+ const needsPostFilter = Boolean(categoryFilter || envFilter);
296
606
  const result = t.getRuns({
297
607
  workflow,
298
608
  status: status,
299
609
  tags,
300
- limit,
301
- offset,
610
+ metadata,
611
+ limit: needsPostFilter ? Math.max(limit, 1000) : limit,
612
+ offset: needsPostFilter ? 0 : offset,
302
613
  sort,
303
614
  });
615
+ let runs = result.runs;
616
+ let total = result.total;
617
+ if (categoryFilter) {
618
+ runs = runs.filter((r) => {
619
+ const category = r.error?.category;
620
+ return typeof category === "string" && category.toUpperCase() === categoryFilter;
621
+ });
622
+ total = runs.length;
623
+ }
624
+ if (envFilter) {
625
+ // Default `production` for legacy rows where env is NULL —
626
+ // matches the SqliteRunStore.rowToRun default.
627
+ runs = runs.filter((r) => (r.environment ?? "production") === envFilter);
628
+ total = runs.length;
629
+ }
630
+ if (needsPostFilter) {
631
+ runs = runs.slice(offset, offset + limit);
632
+ }
304
633
  res.json({
305
- runs: result.runs,
306
- total: result.total,
634
+ runs,
635
+ total,
307
636
  page: Math.floor(offset / limit) + 1,
308
637
  });
309
638
  });
@@ -329,6 +658,21 @@ export function registerTraceRoutes(router, tracker) {
329
658
  const events = t.getEvents(runId, since);
330
659
  res.json(events);
331
660
  });
661
+ /**
662
+ * Tier 2 · sub-workflow lineage. Returns the runs that were started
663
+ * by `subworkflow:` steps inside the given parent run. Studio renders
664
+ * these as a "Sub-runs" list on the parent's run detail page.
665
+ */
666
+ router.get("/runs/:runId/subruns", (req, res) => {
667
+ const { runId } = req.params;
668
+ const run = t.getRun(runId);
669
+ if (!run) {
670
+ res.status(404).json({ error: `Run '${runId}' not found` });
671
+ return;
672
+ }
673
+ const subruns = t.getRunsByParent(runId);
674
+ res.json(subruns);
675
+ });
332
676
  router.delete("/runs", (_req, res) => {
333
677
  const deleted = t.clearAll();
334
678
  res.json({ deleted });
@@ -361,9 +705,18 @@ export function registerTraceRoutes(router, tracker) {
361
705
  const overrides = (req.body || {});
362
706
  const finalMethod = (overrides.method || method).toUpperCase();
363
707
  const finalUrl = overrides.path ? `${protocol}://${host}${overrides.path}` : url;
708
+ // Security review FW-2 — strip sensitive headers from overrides
709
+ // BEFORE merging, then layer the framework-controlled headers
710
+ // LAST so an attacker can't replace `X-Blok-Replay-Of`.
711
+ const safeOverrideHeaders = filterReplayHeaders(overrides.headers);
364
712
  const customHeaders = {
365
713
  "Content-Type": "application/json",
366
- ...(overrides.headers || {}),
714
+ ...safeOverrideHeaders,
715
+ // Tier 1 · replay lineage. TriggerBase reads this header and threads
716
+ // it into `tracker.startRun({ replayOf })`, which persists onto the
717
+ // new run's WorkflowRun.replayOf field. Studio renders a
718
+ // "Replay of #..." breadcrumb that links back to the source run.
719
+ "X-Blok-Replay-Of": runId,
367
720
  };
368
721
  const body = overrides.body !== undefined ? JSON.stringify(overrides.body) : undefined;
369
722
  // Listen for the next RUN_STARTED event matching this workflow
@@ -383,6 +736,10 @@ export function registerTraceRoutes(router, tracker) {
383
736
  newRunId: event.runId,
384
737
  originalRunId: runId,
385
738
  workflowName: run.workflowName,
739
+ // Tier 1 · explicit lineage in the API response so Studio
740
+ // doesn't have to fetch the new run separately to confirm
741
+ // the replay relationship.
742
+ replayOf: runId,
386
743
  });
387
744
  };
388
745
  t.on("RUN_STARTED", onRunStarted);
@@ -415,6 +772,134 @@ export function registerTraceRoutes(router, tracker) {
415
772
  // Cleanup if client disconnects
416
773
  req.on("close", cleanup);
417
774
  });
775
+ // === Concurrency observability (Tier 2 follow-up) ===
776
+ /**
777
+ * Concurrency backend health probe. Returns the configured backend
778
+ * (`"in-process"` when none) and basic state. Useful for k8s-style
779
+ * health checks AND Studio's "Backend status" tile.
780
+ *
781
+ * GET /__blok/concurrency/health
782
+ */
783
+ router.get("/concurrency/health", (_req, res) => {
784
+ const backend = t.getConcurrencyBackend();
785
+ res.json({
786
+ backend: backend?.name ?? "in-process",
787
+ disabled: process.env.BLOK_CONCURRENCY_DISABLED === "1",
788
+ leaseMs: process.env.BLOK_CONCURRENCY_LEASE_MS ? Number(process.env.BLOK_CONCURRENCY_LEASE_MS) : 60 * 60 * 1000,
789
+ });
790
+ });
791
+ /**
792
+ * Snapshot of currently in-flight concurrency slots, grouped by
793
+ * (workflowName, concurrencyKey) bucket. Powers Studio's per-key
794
+ * in-flight tile.
795
+ *
796
+ * GET /__blok/concurrency/state
797
+ */
798
+ router.get("/concurrency/state", (_req, res) => {
799
+ const buckets = t.getStore().getConcurrencySnapshot(Date.now());
800
+ const totalLeases = buckets.reduce((sum, b) => sum + b.leases.length, 0);
801
+ res.json({
802
+ totalBuckets: buckets.length,
803
+ totalLeases,
804
+ buckets: buckets.map((b) => ({
805
+ workflowName: b.workflowName,
806
+ concurrencyKey: b.concurrencyKey,
807
+ inFlight: b.leases.length,
808
+ leases: b.leases,
809
+ })),
810
+ });
811
+ });
812
+ // === Cancellation (Tier 2 polish) ===
813
+ /**
814
+ * Cancel a pending (delayed/debounced/queued) run before it executes.
815
+ *
816
+ * `POST /__blok/runs/:runId/cancel`
817
+ *
818
+ * Returns:
819
+ * - `200 { cancelled: true, runId, previousStatus, newStatus: "cancelled" }` on success
820
+ * - `400 { error }` when the run isn't in a cancellable state
821
+ * (running/completed/failed/throttled/expired/crashed/timedOut/cancelled)
822
+ * - `404 { error }` when the runId doesn't exist
823
+ *
824
+ * Cancels the underlying scheduler entry (`DeferredRunScheduler` for
825
+ * delayed/queued runs; `DebounceCoordinator` for debounced trailing-mode
826
+ * runs) AND flips the run's status to `"cancelled"` via
827
+ * `tracker.cancelRun(runId)`. Both scheduler `.cancel()` methods are
828
+ * idempotent so calling them on a runId that doesn't have a pending
829
+ * timer is a safe no-op.
830
+ */
831
+ router.post("/runs/:runId/cancel", (req, res) => {
832
+ const { runId } = req.params;
833
+ const run = t.getRun(runId);
834
+ if (!run) {
835
+ res.status(404).json({ error: `Run '${runId}' not found` });
836
+ return;
837
+ }
838
+ // Tier 2 follow-up · "running" added so cooperative AbortSignal
839
+ // cancellation can flip in-flight runs to `cancelled` via
840
+ // `tracker.abortRunningRun(runId)`. Other terminal states
841
+ // (completed/failed/throttled/expired/crashed/timedOut) remain
842
+ // non-cancellable.
843
+ const cancellable = ["delayed", "debounced", "queued", "running"];
844
+ if (!cancellable.includes(run.status)) {
845
+ res.status(400).json({
846
+ error: `Cannot cancel run in '${run.status}' state. Only runs in 'delayed', 'debounced', 'queued', or 'running' state can be cancelled.`,
847
+ runId,
848
+ status: run.status,
849
+ });
850
+ return;
851
+ }
852
+ // Capture previousStatus BEFORE cancelRun mutates the run record.
853
+ const previousStatus = run.status;
854
+ // Tier 2 follow-up · running runs use cooperative AbortSignal.
855
+ // `abortRunningRun` fires the controller AND flips status via
856
+ // cancelRun in one atomic-feeling call. Returns 200 — the
857
+ // in-flight step's between-step check will throw shortly.
858
+ if (run.status === "running") {
859
+ const aborted = t.abortRunningRun(runId);
860
+ if (!aborted) {
861
+ // No registered controller — likely a stale state where
862
+ // the run is mid-finalization. Still return success since
863
+ // the run is on its way to terminal anyway.
864
+ res.json({
865
+ cancelled: true,
866
+ runId,
867
+ previousStatus,
868
+ newStatus: "cancelled",
869
+ note: "No active AbortController; run will reach terminal state naturally.",
870
+ });
871
+ return;
872
+ }
873
+ res.json({
874
+ cancelled: true,
875
+ runId,
876
+ previousStatus,
877
+ newStatus: "cancelled",
878
+ note: "Cancellation initiated via AbortSignal; in-flight step will abort cooperatively.",
879
+ });
880
+ return;
881
+ }
882
+ // Best-effort scheduler cleanup (both methods are idempotent).
883
+ DeferredRunScheduler.getInstance().cancel(runId);
884
+ if (run.debounceKey) {
885
+ DebounceCoordinator.getInstance().cancel(run.workflowName, run.debounceKey);
886
+ }
887
+ const cancelled = t.cancelRun(runId);
888
+ if (!cancelled) {
889
+ // Race: status changed between our check and the call.
890
+ res.status(409).json({
891
+ error: `Could not cancel run '${runId}'. It may have just transitioned to a non-cancellable state.`,
892
+ runId,
893
+ });
894
+ return;
895
+ }
896
+ res.json({
897
+ cancelled: true,
898
+ runId,
899
+ previousStatus,
900
+ newStatus: "cancelled",
901
+ });
902
+ });
418
903
  // === AI Error Explanation ===
419
904
  /**
420
905
  * Explain a run or node error using an LLM.