deepline 0.1.109 → 0.1.111

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/cli/index.js +2634 -1532
  2. package/dist/cli/index.mjs +2547 -1451
  3. package/dist/index.d.mts +21 -14
  4. package/dist/index.d.ts +21 -14
  5. package/dist/index.js +97 -23
  6. package/dist/index.mjs +97 -23
  7. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +192 -121
  8. package/dist/repo/apps/play-runner-workers/src/entry.ts +254 -65
  9. package/dist/repo/apps/play-runner-workers/src/runtime/receipts.ts +18 -27
  10. package/dist/repo/apps/play-runner-workers/src/workflow-instance-create.ts +44 -0
  11. package/dist/repo/apps/play-runner-workers/src/workflow-retry.ts +7 -11
  12. package/dist/repo/sdk/src/client.ts +35 -12
  13. package/dist/repo/sdk/src/errors.ts +2 -2
  14. package/dist/repo/sdk/src/http.ts +87 -7
  15. package/dist/repo/sdk/src/play.ts +1 -1
  16. package/dist/repo/sdk/src/plays/bundle-play-file.ts +5 -1
  17. package/dist/repo/sdk/src/release.ts +13 -10
  18. package/dist/repo/sdk/src/tool-output.ts +2 -2
  19. package/dist/repo/sdk/src/types.ts +9 -6
  20. package/dist/repo/shared_libs/play-runtime/fullenrich-batching.ts +229 -0
  21. package/dist/repo/shared_libs/play-runtime/governor/policy.ts +1 -1
  22. package/dist/repo/shared_libs/play-runtime/play-runtime-batching-registry.ts +20 -0
  23. package/dist/repo/shared_libs/play-runtime/run-failure.ts +20 -12
  24. package/dist/repo/shared_libs/play-runtime/run-ledger.ts +147 -70
  25. package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +6 -2
  26. package/dist/repo/shared_libs/play-runtime/secret-redaction.ts +15 -0
  27. package/dist/repo/shared_libs/play-runtime/work-receipts.ts +1 -0
  28. package/dist/repo/shared_libs/plays/bundling/index.ts +193 -21
  29. package/dist/repo/shared_libs/plays/static-pipeline.ts +1 -3
  30. package/dist/repo/shared_libs/security/outbound-url-policy.ts +238 -0
  31. package/dist/repo/shared_libs/security/safe-fetch.ts +118 -0
  32. package/dist/viewer/viewer.css +617 -0
  33. package/dist/viewer/viewer.js +1496 -0
  34. package/package.json +5 -1
@@ -42,7 +42,7 @@ import {
42
42
  executeChunkedRequests,
43
43
  type ChunkExecutionResult,
44
44
  } from '../../../shared_libs/play-runtime/batch-runtime';
45
- import { getDefaultPlayRuntimeBatchStrategy } from '../../../shared_libs/play-runtime/default-batch-strategies';
45
+ import { getPlayRuntimeBatchStrategy } from '../../../shared_libs/play-runtime/play-runtime-batching-registry';
46
46
  import { STANDARD_PLAY_RUNTIME_LIMIT_SECONDS } from '../../../shared_libs/temporal/constants';
47
47
  import {
48
48
  createPlayExecutionGovernor,
@@ -161,6 +161,12 @@ import {
161
161
  type SecretAwareRequestInit,
162
162
  type SecretHandle,
163
163
  } from '../../../shared_libs/play-runtime/secret-capability';
164
+ import { safePublicFetch } from '../../../shared_libs/security/safe-fetch';
165
+ import {
166
+ assertPublicHttpUrl,
167
+ isIpAddressLiteral,
168
+ UnsafeOutboundUrlError,
169
+ } from '../../../shared_libs/security/outbound-url-policy';
164
170
  import type {
165
171
  LiveNodeProgressMap,
166
172
  LiveNodeProgressSnapshot,
@@ -395,6 +401,9 @@ function captureRuntimeApiBinding(env: WorkerEnv): void {
395
401
  }
396
402
 
397
403
  let cachedCoordinatorBinding: WorkerEnv['COORDINATOR'] | null = null;
404
+ const TRACE_FLUSH_MS = 1_000;
405
+ const pendingTraceForwardsByRun = new Map<string, Promise<void>>();
406
+
398
407
  function captureCoordinatorBinding(env: WorkerEnv): void {
399
408
  cachedCoordinatorBinding = env.COORDINATOR ?? null;
400
409
  }
@@ -679,32 +688,58 @@ function recordRunnerPerfTrace(input: {
679
688
  ms?: number;
680
689
  extra?: Record<string, unknown>;
681
690
  }): void {
691
+ // Benchmark note: these runner spans decompose the server watch's terminal
692
+ // wait. They are logged locally and forwarded to the coordinator so
693
+ // `/api/v2/plays/run --watch` benchmark exports can join them with
694
+ // `server.stream_scheduler_terminal_event` by runId.
682
695
  if (!input.req.runId || !input.phase) return;
696
+ const phase = input.phase.startsWith('runner.')
697
+ ? input.phase
698
+ : `runner.${input.phase}`;
683
699
  // Tool-level traces can fire once per row/provider step. Forwarding each one
684
700
  // through the coordinator binding can consume Cloudflare's subrequest budget
685
701
  // before large batched maps finish.
686
- if (input.phase.startsWith('runner.tool.')) {
702
+ if (phase.startsWith('runner.tool.')) {
687
703
  return;
688
704
  }
689
705
  const payload = {
690
706
  ts: Date.now(),
691
707
  source: 'dynamic_worker' as const,
692
708
  runId: input.req.runId,
693
- phase: `runner.${input.phase}`,
709
+ phase,
694
710
  ms: input.ms ?? 0,
695
711
  ...(input.extra ?? {}),
696
712
  };
697
713
  console.log(
698
714
  `[deepline-run:${input.req.runId}] [perf-trace] ${JSON.stringify(payload)}`,
699
715
  );
700
- cachedCoordinatorBinding
701
- ?.recordPerfTrace(input.req.runId, payload)
702
- .catch((error: unknown) => {
703
- const message = error instanceof Error ? error.message : String(error);
704
- console.warn(
705
- `[deepline-run:${input.req.runId}] failed to forward runner perf trace: ${message}`,
706
- );
707
- });
716
+ const binding = cachedCoordinatorBinding;
717
+ if (!binding) return;
718
+ const forward = binding
719
+ .recordPerfTrace(input.req.runId, payload)
720
+ .catch(() => undefined);
721
+ const previous = pendingTraceForwardsByRun.get(input.req.runId);
722
+ const pending = previous
723
+ ? previous.then(
724
+ () => forward,
725
+ () => forward,
726
+ )
727
+ : forward;
728
+ pendingTraceForwardsByRun.set(input.req.runId, pending);
729
+ void pending.finally(() => {
730
+ if (pendingTraceForwardsByRun.get(input.req.runId) === pending) {
731
+ pendingTraceForwardsByRun.delete(input.req.runId);
732
+ }
733
+ });
734
+ }
735
+
736
+ async function drainRunnerPerfTraces(req: RunRequest): Promise<void> {
737
+ const pending = pendingTraceForwardsByRun.get(req.runId);
738
+ if (!pending) return;
739
+ await Promise.race([
740
+ pending,
741
+ new Promise((resolve) => setTimeout(resolve, TRACE_FLUSH_MS)),
742
+ ]);
708
743
  }
709
744
 
710
745
  function makeRequestId(): string {
@@ -1031,10 +1066,18 @@ async function executeToolWithLifecycle(
1031
1066
  args: { id: string; toolId: string; input: Record<string, unknown> },
1032
1067
  workflowStep: WorkflowStep | undefined,
1033
1068
  callbacks: WorkerCtxCallbacks | undefined,
1069
+ onProviderBackpressure?: (retryAfterMs: number) => void,
1070
+ onRetryAttempt?: () => void,
1034
1071
  ): Promise<ToolExecuteResult> {
1035
1072
  callbacks?.onToolCalled?.(args.toolId, nowMs());
1036
1073
  try {
1037
- return await executeTool(req, args, workflowStep);
1074
+ return await executeTool(
1075
+ req,
1076
+ args,
1077
+ workflowStep,
1078
+ onProviderBackpressure,
1079
+ onRetryAttempt,
1080
+ );
1038
1081
  } catch (error) {
1039
1082
  callbacks?.onToolFailed?.(args.toolId, nowMs());
1040
1083
  throw error;
@@ -1178,17 +1221,38 @@ async function callToolDirect(
1178
1221
  attempt <= WORKER_TOOL_RATE_LIMIT_MAX_ATTEMPTS;
1179
1222
  attempt += 1
1180
1223
  ) {
1181
- const res = await fetchRuntimeApi(req.baseUrl, path, {
1182
- method: 'POST',
1183
- headers: {
1184
- 'content-type': 'application/json',
1185
- authorization: `Bearer ${req.executorToken}`,
1186
- 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${attempt}`,
1187
- [EXECUTE_RESPONSE_CONTRACT_HEADER]: V2_EXECUTE_RESPONSE_CONTRACT,
1188
- [EXECUTE_TOOL_METADATA_HEADER]: 'true',
1189
- },
1190
- body: JSON.stringify({ payload: input }),
1191
- });
1224
+ let res: Response;
1225
+ try {
1226
+ res = await fetchRuntimeApi(req.baseUrl, path, {
1227
+ method: 'POST',
1228
+ headers: {
1229
+ 'content-type': 'application/json',
1230
+ authorization: `Bearer ${req.executorToken}`,
1231
+ 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${attempt}`,
1232
+ [EXECUTE_RESPONSE_CONTRACT_HEADER]: V2_EXECUTE_RESPONSE_CONTRACT,
1233
+ [EXECUTE_TOOL_METADATA_HEADER]: 'true',
1234
+ },
1235
+ body: JSON.stringify({ payload: input }),
1236
+ });
1237
+ } catch (error) {
1238
+ const message = error instanceof Error ? error.message : String(error);
1239
+ lastError = new Error(
1240
+ `Tool ${toolId} transport failed calling ${path} for run ${req.runId} on attempt ${attempt}/${WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS}: ${message}`,
1241
+ );
1242
+ if (
1243
+ attempt >= WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS ||
1244
+ !isRetryableRuntimeApiError(error)
1245
+ ) {
1246
+ throw lastError;
1247
+ }
1248
+ onRetryAttempt?.();
1249
+ const delayMs = WORKER_TOOL_TRANSPORT_RETRY_DELAY_MS * attempt;
1250
+ console.warn(
1251
+ `[deepline-run:${req.runId}] tool transport retry tool=${toolId} path=${path} attempt=${attempt}/${WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS} retryAfterMs=${delayMs} error=${redactSecretsFromLogString(message)}`,
1252
+ );
1253
+ await sleepWorkerMs(delayMs);
1254
+ continue;
1255
+ }
1192
1256
  if (res.ok) {
1193
1257
  const body = (await res.json()) as Record<string, unknown>;
1194
1258
  const parsed = parseToolExecuteResponse(toolId, body);
@@ -1423,6 +1487,8 @@ const WORKER_RETRY_SAFE_5XX_TOOLS = new Set(['test_transient_500']);
1423
1487
  * retry budget, so a runaway storm stays bounded and loud.
1424
1488
  */
1425
1489
  const WORKER_TOOL_RATE_LIMIT_MAX_ATTEMPTS = 8;
1490
+ const WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS = 3;
1491
+ const WORKER_TOOL_TRANSPORT_RETRY_DELAY_MS = 1_000;
1426
1492
 
1427
1493
  function sleepWorkerMs(ms: number): Promise<void> {
1428
1494
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -1442,6 +1508,7 @@ class WorkerToolBatchScheduler {
1442
1508
  private readonly resolvePacing: WorkerPacingResolver,
1443
1509
  private readonly abortSignal?: AbortSignal,
1444
1510
  private readonly onRequestsSettled?: (count: number) => void,
1511
+ private readonly callbacks?: WorkerCtxCallbacks,
1445
1512
  ) {}
1446
1513
 
1447
1514
  /**
@@ -1508,7 +1575,7 @@ class WorkerToolBatchScheduler {
1508
1575
  return this.queue.some(
1509
1576
  (request) =>
1510
1577
  request.toolId !== 'test_wait_for_event' &&
1511
- getDefaultPlayRuntimeBatchStrategy(request.toolId) !== null,
1578
+ getPlayRuntimeBatchStrategy(request.toolId) !== null,
1512
1579
  );
1513
1580
  }
1514
1581
 
@@ -1539,8 +1606,8 @@ class WorkerToolBatchScheduler {
1539
1606
  toolId: string,
1540
1607
  requests: WorkerToolBatchRequest[],
1541
1608
  ): Promise<void> {
1542
- const strategy = getDefaultPlayRuntimeBatchStrategy(toolId);
1543
- if (!strategy || toolId === 'test_wait_for_event') {
1609
+ const strategy = getPlayRuntimeBatchStrategy(toolId);
1610
+ if (!strategy || toolId === 'test_wait_for_event' || requests.length < 2) {
1544
1611
  const groupStartedAt = nowMs();
1545
1612
  await Promise.all(
1546
1613
  requests.map(async (request) => {
@@ -1552,10 +1619,11 @@ class WorkerToolBatchScheduler {
1552
1619
  });
1553
1620
  try {
1554
1621
  request.resolve(
1555
- await executeTool(
1622
+ await executeToolWithLifecycle(
1556
1623
  this.req,
1557
1624
  { id: request.id, toolId, input: request.input },
1558
1625
  request.workflowStep,
1626
+ this.callbacks,
1559
1627
  (retryAfterMs) => this.reportBackpressure(toolId, retryAfterMs),
1560
1628
  () => this.governor.chargeBudget('retry'),
1561
1629
  ),
@@ -1591,6 +1659,7 @@ class WorkerToolBatchScheduler {
1591
1659
  reportBackpressure: (retryAfterMs) =>
1592
1660
  this.reportBackpressure(toolId, retryAfterMs),
1593
1661
  onRequestsSettled: this.onRequestsSettled,
1662
+ callbacks: this.callbacks,
1594
1663
  });
1595
1664
  recordRunnerPerfTrace({
1596
1665
  req: this.req,
@@ -1625,6 +1694,7 @@ async function executeBatchedWorkerToolGroup(input: {
1625
1694
  abortSignal?: AbortSignal;
1626
1695
  reportBackpressure: (retryAfterMs: number) => void;
1627
1696
  onRequestsSettled?: (count: number) => void;
1697
+ callbacks?: WorkerCtxCallbacks;
1628
1698
  }): Promise<void> {
1629
1699
  const compiledBatches = compileRequestsWithStrategy({
1630
1700
  requests: input.requests,
@@ -1659,6 +1729,7 @@ async function executeBatchedWorkerToolGroup(input: {
1659
1729
  signal: input.abortSignal,
1660
1730
  });
1661
1731
  try {
1732
+ input.callbacks?.onToolCalled?.(batch.batchOperation, nowMs());
1662
1733
  return await executeTool(
1663
1734
  input.req,
1664
1735
  {
@@ -1670,6 +1741,9 @@ async function executeBatchedWorkerToolGroup(input: {
1670
1741
  input.reportBackpressure,
1671
1742
  () => input.governor.chargeBudget('retry'),
1672
1743
  );
1744
+ } catch (error) {
1745
+ input.callbacks?.onToolFailed?.(batch.batchOperation, nowMs());
1746
+ throw error;
1673
1747
  } finally {
1674
1748
  slot.release();
1675
1749
  }
@@ -2276,6 +2350,46 @@ function parseFetchJsonOrNull(bodyText: string): unknown | null {
2276
2350
  }
2277
2351
  }
2278
2352
 
2353
+ async function safeWorkerPublicFetch(
2354
+ input: string | URL,
2355
+ init: RequestInit,
2356
+ options: {
2357
+ allowedOrigins: Iterable<string>;
2358
+ sensitiveHeaders: Iterable<string>;
2359
+ },
2360
+ ): Promise<Response> {
2361
+ const allowedOrigins = new Set(options.allowedOrigins);
2362
+ return safePublicFetch(input, init, {
2363
+ sensitiveHeaders: options.sensitiveHeaders,
2364
+ fetchImpl: async (nextInput, nextInit) => {
2365
+ const url = assertPublicHttpUrl(nextInput);
2366
+ if (
2367
+ !isIpAddressLiteral(url.hostname) &&
2368
+ !allowedOrigins.has(url.origin)
2369
+ ) {
2370
+ throw new UnsafeOutboundUrlError(
2371
+ 'workers_edge ctx.fetch requires a public IP literal target or Deepline runtime origin. Use a Deepline integration tool for other hostname URLs.',
2372
+ );
2373
+ }
2374
+ return fetch(url, nextInit);
2375
+ },
2376
+ });
2377
+ }
2378
+
2379
+ function normalizeAllowedWorkerFetchOrigin(rawUrl: string): string | null {
2380
+ try {
2381
+ return assertPublicHttpUrl(rawUrl).origin;
2382
+ } catch {
2383
+ return null;
2384
+ }
2385
+ }
2386
+
2387
+ function getAllowedWorkerFetchOrigins(req: RunRequest): string[] {
2388
+ return [req.baseUrl, req.callbackUrl]
2389
+ .map(normalizeAllowedWorkerFetchOrigin)
2390
+ .filter((origin): origin is string => origin !== null);
2391
+ }
2392
+
2279
2393
  // ---------------------------------------------------------------------------
2280
2394
  // Streaming CSV parser. Pipes a `ReadableStream<Uint8Array>` from R2 through
2281
2395
  // a TextDecoder + line buffer + RFC-4180-ish state machine, yielding chunks
@@ -3248,7 +3362,8 @@ function createMinimalWorkerCtx(
3248
3362
  const executeWithRuntimeReceipt = async <T>(
3249
3363
  key: string,
3250
3364
  execute: () => Promise<T> | T,
3251
- repairRunningReceiptForSameRun = false,
3365
+ repairRunningReceiptForSameRun = true,
3366
+ reclaimRunning = false,
3252
3367
  ): Promise<T> => {
3253
3368
  const serialized = await runWorkerRuntimeReceiptBoundary<unknown>({
3254
3369
  orgId: req.orgId,
@@ -3258,6 +3373,7 @@ function createMinimalWorkerCtx(
3258
3373
  receiptStore,
3259
3374
  execute: async () => serializeDurableStepValue(await execute()),
3260
3375
  repairRunningReceiptForSameRun,
3376
+ reclaimRunning,
3261
3377
  });
3262
3378
  return deserializeDurableStepValue(serialized) as T;
3263
3379
  };
@@ -3279,7 +3395,7 @@ function createMinimalWorkerCtx(
3279
3395
  )(name, async () => serializeDurableStepValue(await execute()));
3280
3396
  return deserializeDurableStepValue(serialized) as T;
3281
3397
  },
3282
- true,
3398
+ false,
3283
3399
  );
3284
3400
  };
3285
3401
  const nextCtxStepReceiptKey = (name: string): string => {
@@ -3300,6 +3416,14 @@ function createMinimalWorkerCtx(
3300
3416
  }
3301
3417
  return `:stale:${staleAfterSeconds}:${Math.floor(nowMs() / (staleAfterSeconds * 1000))}`;
3302
3418
  };
3419
+ const rootToolBatchScheduler = new WorkerToolBatchScheduler(
3420
+ req,
3421
+ governor,
3422
+ resolveToolPacing,
3423
+ abortSignal,
3424
+ undefined,
3425
+ callbacks,
3426
+ );
3303
3427
  // Local ancestry chain that always ENDS with the currently-executing play
3304
3428
  // (req.playName). The /api/v2/plays/run lineage validator requires the
3305
3429
  // submitted ancestry's tail to equal the executor token's play name (i.e.
@@ -3746,10 +3870,8 @@ function createMinimalWorkerCtx(
3746
3870
  reportExecutionHeartbeat(false);
3747
3871
  const entry = uniqueRowsToExecuteEntries[myIndex]!;
3748
3872
  const pendingRow = pendingRowsByKey.get(entry.rowKey);
3749
- const row = runtimeCsvExecutionRow(
3750
- entry.row,
3751
- pendingRow,
3752
- ) as T & Record<string, unknown>;
3873
+ const row = runtimeCsvExecutionRow(entry.row, pendingRow) as T &
3874
+ Record<string, unknown>;
3753
3875
  const absoluteIndex = entry.absoluteIndex;
3754
3876
  const enriched: Record<string, unknown> =
3755
3877
  cloneCsvAliasedRow(row);
@@ -4753,7 +4875,13 @@ function createMinimalWorkerCtx(
4753
4875
  toolId: request.toolId,
4754
4876
  requestInput: request.input,
4755
4877
  })}${staleRuntimeSuffix(request.staleAfterSeconds)}`,
4756
- () => executeToolWithLifecycle(req, request, workflowStep, callbacks),
4878
+ () =>
4879
+ rootToolBatchScheduler.execute(
4880
+ request.id,
4881
+ request.toolId,
4882
+ request.input,
4883
+ workflowStep,
4884
+ ),
4757
4885
  );
4758
4886
  },
4759
4887
  },
@@ -5214,7 +5342,10 @@ function createMinimalWorkerCtx(
5214
5342
  };
5215
5343
  const fetchInit = { ...init, headers };
5216
5344
  delete fetchInit.auth;
5217
- const response = await fetch(url, fetchInit);
5345
+ const response = await safeWorkerPublicFetch(url, fetchInit, {
5346
+ allowedOrigins: getAllowedWorkerFetchOrigins(req),
5347
+ sensitiveHeaders: Object.keys(secretHeaderMarkers),
5348
+ });
5218
5349
  assertNotAborted(abortSignal);
5219
5350
  const bodyText = await response.text();
5220
5351
  const redactedBodyText = secretRedactor.redactString(bodyText);
@@ -5500,12 +5631,10 @@ async function executeRunRequest(
5500
5631
  let runLogBuffer: string[] = [];
5501
5632
  let pendingRunLogLines: string[] = [];
5502
5633
  // Monotonic count of every line ever appended to this run's worker log
5503
- // channel. runLogBuffer/pendingRunLogLines are rotating tails of those
5504
- // lines (RUN_LOG_BUFFER_LIMIT is the coordinator transport cache only), so
5505
- // each log.appended batch can carry the absolute channelOffset of its first
5506
- // line: totalEmittedLogLines - pendingRunLogLines.length. Run Log Stream
5507
- // ingestion skips re-sent prefixes positionally (exactly-once, repeated
5508
- // identical lines preserved) instead of text-deduping.
5634
+ // channel. runLogBuffer is only the rotating live/coordinator transport
5635
+ // cache; pendingRunLogLines is the durable unsent suffix and must not rotate,
5636
+ // otherwise a flush already in flight can let fresh lines fall out before
5637
+ // Run Log Stream ingestion ever sees them.
5509
5638
  let totalEmittedLogLines = 0;
5510
5639
  let stepProgressByNodeId: LiveNodeProgressMap = {};
5511
5640
  let dirtyProgressNodeIds = new Set<string>();
@@ -5531,9 +5660,7 @@ async function executeRunRequest(
5531
5660
  if (!trimmed) return;
5532
5661
  totalEmittedLogLines += 1;
5533
5662
  runLogBuffer = [...runLogBuffer, trimmed].slice(-RUN_LOG_BUFFER_LIMIT);
5534
- pendingRunLogLines = [...pendingRunLogLines, trimmed].slice(
5535
- -RUN_LOG_BUFFER_LIMIT,
5536
- );
5663
+ pendingRunLogLines = [...pendingRunLogLines, trimmed];
5537
5664
  };
5538
5665
 
5539
5666
  const updateStepProgress = (input: {
@@ -5722,9 +5849,7 @@ async function executeRunRequest(
5722
5849
  lines: pendingRunLogLines,
5723
5850
  // Positional cursor: pendingRunLogLines always holds the LAST
5724
5851
  // pending lines emitted on this channel, so the offset of its first
5725
- // line is total-emitted minus pending length. This also covers the
5726
- // terminal full-buffer re-send (pending = runLogBuffer), which
5727
- // ingestion then skips positionally instead of via text dedupe.
5852
+ // line is total-emitted minus pending length.
5728
5853
  channelOffset: totalEmittedLogLines - pendingRunLogLines.length,
5729
5854
  });
5730
5855
  pendingRunLogLines = [];
@@ -5820,18 +5945,13 @@ async function executeRunRequest(
5820
5945
  terminalEvent: PlayRunLedgerEvent,
5821
5946
  ): Promise<void> => {
5822
5947
  if (!options?.persistResultDatasets) return;
5948
+ await ledgerFlushInFlight;
5823
5949
  const now = nowMs();
5824
- // Terminal re-send of the full retained buffer. drainPendingLedgerEvents
5825
- // stamps it with channelOffset = totalEmitted - buffer length, so Run Log
5826
- // Stream ingestion drops the already-ingested prefix positionally.
5827
- pendingRunLogLines = runLogBuffer;
5828
5950
  dirtyProgressNodeIds = new Set([
5829
5951
  ...dirtyProgressNodeIds,
5830
5952
  ...Object.keys(stepProgressByNodeId),
5831
5953
  ]);
5832
- pendingLedgerEvents = [...pendingLedgerEvents, terminalEvent];
5833
- await ledgerFlushInFlight;
5834
- const events = drainPendingLedgerEvents(now);
5954
+ const events = [...drainPendingLedgerEvents(now), terminalEvent];
5835
5955
  if (events.length === 0) return;
5836
5956
  try {
5837
5957
  await postRuntimeApi(req.baseUrl, req.executorToken, {
@@ -5871,7 +5991,17 @@ async function executeRunRequest(
5871
5991
  onToolFailed: (toolId, at) => stepLifecycle?.onToolFailed(toolId, at),
5872
5992
  };
5873
5993
 
5994
+ let hasEmittedRunnerEvent = false;
5874
5995
  const wrappedEmit = (event: RunnerEvent) => {
5996
+ if (!hasEmittedRunnerEvent) {
5997
+ hasEmittedRunnerEvent = true;
5998
+ recordRunnerPerfTrace({
5999
+ req,
6000
+ phase: 'first_event',
6001
+ ms: nowMs() - startedAt,
6002
+ extra: { eventType: event.type },
6003
+ });
6004
+ }
5875
6005
  if (event.type === 'log') {
5876
6006
  appendRunLogLine(event.message);
5877
6007
  flushLedgerEvents(false);
@@ -6042,11 +6172,18 @@ async function executeRunRequest(
6042
6172
  phase: 'runner.execute_total',
6043
6173
  ms: nowMs() - startedAt,
6044
6174
  });
6175
+ // The server-side watch path reads coordinator-buffered perf traces from
6176
+ // the same tail response that carries the terminal event. Runner traces are
6177
+ // forwarded asynchronously during execution so normal play latency is not
6178
+ // gated on observability writes; before returning terminal output, wait a
6179
+ // bounded interval for those writes to land. This keeps benchmark exports
6180
+ // able to decompose "terminal wait" into runner/dataset/ledger phases
6181
+ // without turning trace delivery into a correctness dependency.
6182
+ await drainRunnerPerfTraces(req);
6045
6183
  return {
6046
6184
  playName: req.playName,
6047
6185
  result: serializedResult,
6048
6186
  outputRows: inferOutputRows(serializedResult),
6049
- liveLogs: runLogBuffer,
6050
6187
  liveNodeProgress: stepProgressSnapshot(),
6051
6188
  durationMs: nowMs() - startedAt,
6052
6189
  };
@@ -6070,6 +6207,7 @@ async function executeRunRequest(
6070
6207
  appendRunLogLine(
6071
6208
  `${aborted ? '[cancelled]' : '[error]'} ${redactSecretsFromLogString(message)}`,
6072
6209
  );
6210
+ const terminalUpdateStartedAt = nowMs();
6073
6211
  await flushTerminalLedgerEvents({
6074
6212
  type: aborted ? 'run.cancelled' : 'run.failed',
6075
6213
  runId: req.runId,
@@ -6094,25 +6232,55 @@ async function executeRunRequest(
6094
6232
  ],
6095
6233
  },
6096
6234
  });
6235
+ recordRunnerPerfTrace({
6236
+ req,
6237
+ phase: aborted
6238
+ ? 'runner.terminal_ledger_append_cancelled'
6239
+ : 'runner.terminal_ledger_append_failed',
6240
+ ms: nowMs() - terminalUpdateStartedAt,
6241
+ extra: {
6242
+ errorCode: failure.code,
6243
+ errorPhase: failure.phase,
6244
+ },
6245
+ });
6246
+ const billingStartedAt = nowMs();
6097
6247
  await finalizeWorkerComputeBilling({
6098
6248
  req,
6099
6249
  success: false,
6100
6250
  actionEstimate: 4,
6101
- }).catch((finalizeError) => {
6102
- console.error(
6103
- `[play-harness] non-fatal compute billing finalize failed runId=${req.runId}: ${
6104
- finalizeError instanceof Error
6105
- ? finalizeError.message
6106
- : String(finalizeError)
6107
- }`,
6108
- );
6109
- });
6251
+ })
6252
+ .catch((finalizeError) => {
6253
+ console.error(
6254
+ `[play-harness] non-fatal compute billing finalize failed runId=${req.runId}: ${
6255
+ finalizeError instanceof Error
6256
+ ? finalizeError.message
6257
+ : String(finalizeError)
6258
+ }`,
6259
+ );
6260
+ })
6261
+ .finally(() => {
6262
+ recordRunnerPerfTrace({
6263
+ req,
6264
+ phase: 'runner.compute_billing_finalize_failed',
6265
+ ms: nowMs() - billingStartedAt,
6266
+ });
6267
+ });
6110
6268
  }
6111
6269
  await signalParentPlayTerminal({
6112
6270
  req,
6113
6271
  status: aborted ? 'cancelled' : 'failed',
6114
6272
  error: message,
6115
6273
  }).catch(() => null);
6274
+ recordRunnerPerfTrace({
6275
+ req,
6276
+ phase: aborted ? 'runner.execute_cancelled' : 'runner.execute_failed',
6277
+ ms: nowMs() - startedAt,
6278
+ extra: {
6279
+ errorCode: failure.code,
6280
+ errorPhase: failure.phase,
6281
+ },
6282
+ });
6283
+ await drainRunnerPerfTraces(req);
6116
6284
  throw error;
6117
6285
  } finally {
6118
6286
  clearTimeout(runtimeDeadlineTimer);
@@ -6541,14 +6709,29 @@ export class TenantWorkflow extends WorkflowEntrypoint<
6541
6709
  // Must run BEFORE any SDK call site that would reach into HARNESS,
6542
6710
  // i.e. before user play code is invoked. Idempotent within a run.
6543
6711
  captureHarnessBinding(this.env);
6712
+ recordRunnerPerfTrace({
6713
+ req,
6714
+ phase: 'tenant_workflow_entry',
6715
+ ms: 0,
6716
+ extra: {
6717
+ hasWorkflowStep: true,
6718
+ },
6719
+ });
6544
6720
  // Fire the one-time wiring probe (deduplicated across runs in the
6545
6721
  // same isolate). Awaited so the result is in the log before user code
6546
6722
  // begins. A missing or unhealthy HARNESS fails the run before user code
6547
6723
  // can accidentally take a slower fallback path.
6724
+ const probeStartedAt = nowMs();
6548
6725
  await probeHarnessOnce(this.env, runPrefix);
6726
+ recordRunnerPerfTrace({
6727
+ req,
6728
+ phase: 'tenant_workflow_probe_harness',
6729
+ ms: nowMs() - probeStartedAt,
6730
+ });
6549
6731
  const abortController = new AbortController();
6550
6732
  try {
6551
- return (await executeRunRequest(
6733
+ const executeStartedAt = nowMs();
6734
+ const output = (await executeRunRequest(
6552
6735
  req,
6553
6736
  this.env,
6554
6737
  (runnerEvent) => {
@@ -6579,6 +6762,12 @@ export class TenantWorkflow extends WorkflowEntrypoint<
6579
6762
  waitUntil: (promise) => this.ctx.waitUntil(promise),
6580
6763
  },
6581
6764
  )) as Record<string, unknown>;
6765
+ recordRunnerPerfTrace({
6766
+ req,
6767
+ phase: 'tenant_workflow_execute_request',
6768
+ ms: nowMs() - executeStartedAt,
6769
+ });
6770
+ return output;
6582
6771
  } catch (error) {
6583
6772
  // CF Workflows + the dynamic-workflows framework swallow the error
6584
6773
  // message and surface only "internal error; reference = <id>" via
@@ -41,22 +41,13 @@ function errorMessage(error: unknown): string {
41
41
  return error instanceof Error ? error.message : String(error);
42
42
  }
43
43
 
44
- function runningReceiptError(
45
- key: string,
46
- receipt: WorkerRuntimeReceipt,
47
- ): Error {
48
- return new Error(
49
- `Runtime receipt ${key} is already running for run ${receipt.runId ?? 'unknown'}.`,
50
- );
51
- }
52
-
53
44
  async function executeAndPersistReceipt<T>(input: {
54
45
  key: string;
55
46
  playName: string;
56
47
  runId: string;
57
48
  execute: () => Promise<T> | T;
58
49
  receiptStore: WorkerRuntimeReceiptStore;
59
- ownership: 'claimed' | 'workflow_replay';
50
+ ownership: 'claimed' | 'reconciled';
60
51
  }): Promise<T> {
61
52
  let output: T;
62
53
  try {
@@ -83,9 +74,13 @@ async function executeAndPersistReceipt<T>(input: {
83
74
  output,
84
75
  });
85
76
  if (!completed) {
86
- throw new Error(
87
- `Runtime receipt ${input.key} ${input.ownership} execution completed but completed receipt could not be persisted.`,
88
- );
77
+ return output;
78
+ }
79
+ if (
80
+ (completed.status === 'completed' || completed.status === 'skipped') &&
81
+ completed.output !== undefined
82
+ ) {
83
+ return receiptOutput<T>(completed);
89
84
  }
90
85
  return output;
91
86
  }
@@ -94,6 +89,7 @@ export async function runWorkerRuntimeReceiptBoundary<T>(
94
89
  input: RuntimeReceiptContext & {
95
90
  execute: () => Promise<T> | T;
96
91
  repairRunningReceiptForSameRun?: boolean;
92
+ reclaimRunning?: boolean;
97
93
  },
98
94
  ): Promise<T> {
99
95
  const key = scopedReceiptKey(input);
@@ -102,25 +98,20 @@ export async function runWorkerRuntimeReceiptBoundary<T>(
102
98
  playName: input.playName,
103
99
  runId: input.runId,
104
100
  key,
101
+ ...(input.reclaimRunning === true ? { reclaimRunning: true } : {}),
105
102
  });
106
103
  if (claimed.disposition === 'reused') {
107
104
  return receiptOutput<T>(claimed.receipt);
108
105
  }
109
106
  if (claimed.disposition === 'running') {
110
- if (
111
- input.repairRunningReceiptForSameRun &&
112
- claimed.receipt.runId === input.runId
113
- ) {
114
- return executeAndPersistReceipt({
115
- key,
116
- playName: input.playName,
117
- runId: input.runId,
118
- execute: input.execute,
119
- receiptStore,
120
- ownership: 'workflow_replay',
121
- });
122
- }
123
- throw runningReceiptError(key, claimed.receipt);
107
+ return executeAndPersistReceipt({
108
+ key,
109
+ playName: input.playName,
110
+ runId: input.runId,
111
+ execute: input.execute,
112
+ receiptStore,
113
+ ownership: 'reconciled',
114
+ });
124
115
  }
125
116
  if (claimed.disposition === 'failed') {
126
117
  throw new Error(