deepline 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,6 +56,7 @@ import {
56
56
  type ToolResultMetadataInput,
57
57
  } from '../../../shared_libs/play-runtime/tool-result';
58
58
  import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
59
+ import type { PreloadedRuntimeDbSession } from '../../../shared_libs/play-runtime/db-session';
59
60
  import type { PlayRuntimeManifestMap } from '../../../shared_libs/plays/compiler-manifest';
60
61
  import {
61
62
  derivePlayRowIdentity,
@@ -80,6 +81,8 @@ import {
80
81
  // re-bundle harness internals into per-play. Keep that in mind.
81
82
  import {
82
83
  harnessFetchStagedFile,
84
+ harnessPersistCompletedSheetRows,
85
+ harnessPrewarmPostgresSessions,
83
86
  harnessStartSheetDataset,
84
87
  setHarnessBinding,
85
88
  } from '../../../sdk/src/plays/harness-stub';
@@ -129,6 +132,7 @@ type RunRequest = {
129
132
  childPlayManifests?: PlayRuntimeManifestMap | null;
130
133
  /** Internal ctx.runPlay lineage. Public SDK/users never see this. */
131
134
  playCallGovernance?: PlayCallGovernanceSnapshot | null;
135
+ preloadedDbSessions?: PreloadedRuntimeDbSession[] | null;
132
136
  /** Cloudflare coordinator URL for direct Workflow control-plane signals. */
133
137
  coordinatorUrl?: string | null;
134
138
  /** Request-scoped coordinator auth token for preview/dev direct control calls. */
@@ -191,6 +195,10 @@ type WorkerEnv = {
191
195
  runId: string,
192
196
  payload: Record<string, unknown>,
193
197
  ): Promise<void>;
198
+ recordRunEvent(
199
+ runId: string,
200
+ event: Record<string, unknown>,
201
+ ): Promise<void>;
194
202
  };
195
203
  /**
196
204
  * Service binding to the long-lived Play Harness Worker
@@ -516,10 +524,58 @@ type WorkflowRunOutput = {
516
524
  durationMs: number;
517
525
  };
518
526
 
527
+ type LiveNodeProgressSnapshot = {
528
+ completed?: number;
529
+ total?: number;
530
+ failed?: number;
531
+ message?: string;
532
+ updatedAt?: number;
533
+ startedAt?: number;
534
+ completedAt?: number;
535
+ artifactTableNamespace?: string | null;
536
+ };
537
+
538
+ type LiveNodeProgressMap = Record<string, LiveNodeProgressSnapshot>;
539
+
540
+ type WorkerCtxCallbacks = {
541
+ onNodeProgress?: (input: {
542
+ nodeId: string;
543
+ progress: LiveNodeProgressSnapshot;
544
+ }) => void;
545
+ };
546
+
519
547
  function nowMs(): number {
520
548
  return Date.now();
521
549
  }
522
550
 
551
+ function recordRunnerPerfTrace(input: {
552
+ req: RunRequest;
553
+ phase: string;
554
+ ms?: number;
555
+ extra?: Record<string, unknown>;
556
+ }): void {
557
+ if (!input.req.runId || !input.phase) return;
558
+ const payload = {
559
+ ts: Date.now(),
560
+ source: 'dynamic_worker' as const,
561
+ runId: input.req.runId,
562
+ phase: `runner.${input.phase}`,
563
+ ...(input.ms !== undefined ? { ms: input.ms } : {}),
564
+ ...(input.extra ?? {}),
565
+ };
566
+ console.log(
567
+ `[deepline-run:${input.req.runId}] [perf-trace] ${JSON.stringify(payload)}`,
568
+ );
569
+ cachedCoordinatorBinding
570
+ ?.recordPerfTrace(input.req.runId, payload)
571
+ .catch((error: unknown) => {
572
+ const message = error instanceof Error ? error.message : String(error);
573
+ console.warn(
574
+ `[deepline-run:${input.req.runId}] failed to forward runner perf trace: ${message}`,
575
+ );
576
+ });
577
+ }
578
+
523
579
  function makeRequestId(): string {
524
580
  // Workers crypto.randomUUID is available without nodejs_compat.
525
581
  return crypto.randomUUID();
@@ -660,7 +716,7 @@ async function submitChildPlayThroughCoordinator(input: {
660
716
  const coordinatorUrl = input.req.coordinatorUrl?.trim();
661
717
  if (coordinatorUrl) {
662
718
  // Keep child plays on the same coordinator/Workflow submit path as
663
- // top-level runs; the RPC binding remains only as a no-URL fallback.
719
+ // top-level runs when the coordinator URL is present.
664
720
  const res = await fetch(
665
721
  `${coordinatorUrl.replace(/\/$/, '')}/workflow/${encodeURIComponent(
666
722
  input.req.runId,
@@ -1056,6 +1112,29 @@ async function callToolDirect(
1056
1112
  let lastError: Error | null = null;
1057
1113
 
1058
1114
  for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
1115
+ if (toolId === 'test_transient_500' || toolId === 'test_transient_429') {
1116
+ const syntheticResult = executeSyntheticTransientRetry(
1117
+ toolId,
1118
+ input,
1119
+ attempt,
1120
+ );
1121
+ if (syntheticResult.ok) {
1122
+ return wrapWorkerToolResult(
1123
+ toolId,
1124
+ syntheticResult.result,
1125
+ syntheticToolMetadata(toolId),
1126
+ );
1127
+ }
1128
+ lastError = new Error(
1129
+ `tool ${toolId} ${syntheticResult.status} attempt ${attempt}/${maxAttempts}: ${syntheticResult.message}`,
1130
+ );
1131
+ if (attempt >= maxAttempts) {
1132
+ throw lastError;
1133
+ }
1134
+ await new Promise((resolve) => setTimeout(resolve, 1_000));
1135
+ continue;
1136
+ }
1137
+
1059
1138
  const res = await fetchRuntimeApi(req.baseUrl, path, {
1060
1139
  method: 'POST',
1061
1140
  headers: {
@@ -1241,6 +1320,41 @@ async function executeSyntheticTestRateLimitBatch(
1241
1320
  };
1242
1321
  }
1243
1322
 
1323
+ type SyntheticTransientRetryResult =
1324
+ | { ok: true; result: Record<string, unknown> }
1325
+ | { ok: false; status: number; message: string };
1326
+
1327
+ function executeSyntheticTransientRetry(
1328
+ toolId: string,
1329
+ input: Record<string, unknown>,
1330
+ attempt: number,
1331
+ ): SyntheticTransientRetryResult {
1332
+ const failuresBeforeSuccess =
1333
+ typeof input.failures_before_success === 'number' &&
1334
+ Number.isInteger(input.failures_before_success) &&
1335
+ input.failures_before_success >= 0
1336
+ ? input.failures_before_success
1337
+ : 1;
1338
+ if (attempt <= failuresBeforeSuccess) {
1339
+ const status = toolId === 'test_transient_429' ? 429 : 502;
1340
+ return {
1341
+ ok: false,
1342
+ status,
1343
+ message: `Synthetic transient ${status} for attempt ${attempt}`,
1344
+ };
1345
+ }
1346
+ return {
1347
+ ok: true,
1348
+ result: {
1349
+ status: 'completed',
1350
+ provider: 'test',
1351
+ key: String(input.key ?? 'transient'),
1352
+ attempts: attempt,
1353
+ recovered: attempt > 1,
1354
+ },
1355
+ };
1356
+ }
1357
+
1244
1358
  function executeSyntheticTestRateLimit(
1245
1359
  input: Record<string, unknown>,
1246
1360
  ): Record<string, unknown> {
@@ -1417,7 +1531,23 @@ class WorkerToolBatchScheduler {
1417
1531
  workflowStep?: WorkflowStep,
1418
1532
  ): Promise<unknown> {
1419
1533
  return new Promise((resolve, reject) => {
1420
- this.queue.push({ id, toolId, input, workflowStep, resolve, reject });
1534
+ const queuedAt = nowMs();
1535
+ this.queue.push({
1536
+ id,
1537
+ toolId,
1538
+ input,
1539
+ workflowStep,
1540
+ resolve: (value) => {
1541
+ recordRunnerPerfTrace({
1542
+ req: this.req,
1543
+ phase: 'runner.tool.request',
1544
+ ms: nowMs() - queuedAt,
1545
+ extra: { id, toolId },
1546
+ });
1547
+ resolve(value);
1548
+ },
1549
+ reject,
1550
+ });
1421
1551
  this.scheduleDrain();
1422
1552
  });
1423
1553
  }
@@ -1447,6 +1577,7 @@ class WorkerToolBatchScheduler {
1447
1577
  const requests = this.queue;
1448
1578
  this.queue = [];
1449
1579
  this.scheduled = false;
1580
+ const drainStartedAt = nowMs();
1450
1581
  await Promise.all(
1451
1582
  [...groupWorkerToolRequestsByTool(requests).entries()].map(
1452
1583
  async ([toolId, groupedRequests]) => {
@@ -1454,6 +1585,12 @@ class WorkerToolBatchScheduler {
1454
1585
  },
1455
1586
  ),
1456
1587
  );
1588
+ recordRunnerPerfTrace({
1589
+ req: this.req,
1590
+ phase: 'runner.tool.drain',
1591
+ ms: nowMs() - drainStartedAt,
1592
+ extra: { requests: requests.length },
1593
+ });
1457
1594
  if (this.queue.length > 0) {
1458
1595
  this.scheduleDrain();
1459
1596
  }
@@ -1465,6 +1602,7 @@ class WorkerToolBatchScheduler {
1465
1602
  ): Promise<void> {
1466
1603
  const strategy = getDefaultPlayRuntimeBatchStrategy(toolId);
1467
1604
  if (!strategy || toolId === 'test_wait_for_event') {
1605
+ const groupStartedAt = nowMs();
1468
1606
  await Promise.all(
1469
1607
  requests.map(async (request) => {
1470
1608
  try {
@@ -1480,14 +1618,27 @@ class WorkerToolBatchScheduler {
1480
1618
  }
1481
1619
  }),
1482
1620
  );
1621
+ recordRunnerPerfTrace({
1622
+ req: this.req,
1623
+ phase: 'runner.tool.group',
1624
+ ms: nowMs() - groupStartedAt,
1625
+ extra: { toolId, requests: requests.length, batched: false },
1626
+ });
1483
1627
  return;
1484
1628
  }
1485
1629
 
1630
+ const batchStartedAt = nowMs();
1486
1631
  await executeBatchedWorkerToolGroup({
1487
1632
  req: this.req,
1488
1633
  requests,
1489
1634
  strategy,
1490
1635
  });
1636
+ recordRunnerPerfTrace({
1637
+ req: this.req,
1638
+ phase: 'runner.tool.group',
1639
+ ms: nowMs() - batchStartedAt,
1640
+ extra: { toolId, requests: requests.length, batched: true },
1641
+ });
1491
1642
  }
1492
1643
  }
1493
1644
 
@@ -2292,7 +2443,7 @@ function isStreamingDataset<T extends Record<string, unknown>>(
2292
2443
 
2293
2444
  /**
2294
2445
  * Resolve the sheet contract for a (play, namespace) from the request's
2295
- * contractSnapshot. Required by both direct-Neon writes and the HTTP fallback.
2446
+ * contractSnapshot. Required by direct runtime sheet persistence.
2296
2447
  */
2297
2448
  function resolveSheetContractFromReq(
2298
2449
  req: RunRequest,
@@ -2320,13 +2471,13 @@ function resolveSheetContractFromReq(
2320
2471
  }
2321
2472
 
2322
2473
  /**
2323
- * Direct-Neon writes from the Workers harness. Resolves the org runtime
2324
- * Postgres URL via `create_db_session` (cached by runtime-api per
2325
- * runId+namespace), then runs the same mega-CTE SQL Daytona uses but over
2326
- * `@neondatabase/serverless` so it runs inside the V8 isolate.
2474
+ * Direct-Neon writes from the shared harness Worker. Resolves the org runtime
2475
+ * Postgres URL via `create_db_session` inside that long-lived Worker, then
2476
+ * runs the same row persistence path Daytona uses without bundling Neon into
2477
+ * every per-play dynamic Worker.
2327
2478
  *
2328
- * No HTTP fallback: if the contract/session can't be obtained, we want a
2329
- * loud failure rather than silently regressing to per-chunk Vercel hops.
2479
+ * If the contract/session can't be obtained, fail loudly rather than silently
2480
+ * regressing to per-chunk Vercel hops.
2330
2481
  */
2331
2482
  function requireSheetContract(
2332
2483
  req: RunRequest,
@@ -2351,12 +2502,9 @@ async function persistCompletedMapRows(input: {
2351
2502
  extraOutputFields?: string[];
2352
2503
  }): Promise<void> {
2353
2504
  if (input.rows.length === 0) return;
2354
- await postRuntimeApi<{
2355
- ok: true;
2356
- rowsWritten: number;
2357
- tableNamespace: string;
2358
- }>(input.req.baseUrl, input.req.executorToken, {
2359
- action: 'persist_completed_sheet_rows',
2505
+ await harnessPersistCompletedSheetRows({
2506
+ baseUrl: input.req.baseUrl,
2507
+ executorToken: input.req.executorToken,
2360
2508
  playName: input.req.playName,
2361
2509
  tableNamespace: input.tableNamespace,
2362
2510
  sheetContract: requireSheetContract(input.req, input.tableNamespace),
@@ -2368,6 +2516,7 @@ async function persistCompletedMapRows(input: {
2368
2516
  ),
2369
2517
  ],
2370
2518
  runId: input.req.runId,
2519
+ userEmail: input.req.userEmail,
2371
2520
  });
2372
2521
  }
2373
2522
 
@@ -2384,19 +2533,15 @@ async function prepareMapRows(input: {
2384
2533
  if (input.rows.length === 0) {
2385
2534
  return { inserted: 0, skipped: 0, pendingRows: [], completedRows: [] };
2386
2535
  }
2387
- const result = await postRuntimeApi<{
2388
- inserted: number;
2389
- skipped: number;
2390
- pendingRows: Record<string, unknown>[];
2391
- completedRows: Record<string, unknown>[];
2392
- tableNamespace: string;
2393
- }>(input.req.baseUrl, input.req.executorToken, {
2394
- action: 'start_sheet_dataset',
2536
+ const result = await harnessStartSheetDataset({
2537
+ baseUrl: input.req.baseUrl,
2538
+ executorToken: input.req.executorToken,
2395
2539
  playName: input.req.playName,
2396
2540
  tableNamespace: input.tableNamespace,
2397
2541
  sheetContract: requireSheetContract(input.req, input.tableNamespace),
2398
2542
  rows: input.rows.map((row) => ({ ...row })),
2399
2543
  runId: input.req.runId,
2544
+ userEmail: input.req.userEmail,
2400
2545
  });
2401
2546
  return {
2402
2547
  inserted: result.inserted,
@@ -2494,6 +2639,7 @@ function createMinimalWorkerCtx(
2494
2639
  env: WorkerEnv,
2495
2640
  workflowStep?: WorkflowStep,
2496
2641
  abortSignal?: AbortSignal,
2642
+ callbacks?: WorkerCtxCallbacks,
2497
2643
  ): unknown {
2498
2644
  let playCallCount = 0;
2499
2645
  const parentChildCalls: Record<string, number> = {};
@@ -2567,6 +2713,8 @@ function createMinimalWorkerCtx(
2567
2713
  >,
2568
2714
  opts?: WorkerMapOptions,
2569
2715
  ): Promise<unknown> => {
2716
+ const mapStartedAt = nowMs();
2717
+ const mapNodeId = `map:${name}`;
2570
2718
  const sliced = rows;
2571
2719
  const baseOffset = 0;
2572
2720
  const fieldEntries = Object.entries(fieldsDef);
@@ -2587,6 +2735,30 @@ function createMinimalWorkerCtx(
2587
2735
  softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
2588
2736
  });
2589
2737
  const outputFields = fieldEntries.map(([field]) => field);
2738
+ const updateMapProgress = (progress: LiveNodeProgressSnapshot) => {
2739
+ callbacks?.onNodeProgress?.({
2740
+ nodeId: mapNodeId,
2741
+ progress: {
2742
+ artifactTableNamespace: name,
2743
+ failed: 0,
2744
+ ...progress,
2745
+ updatedAt: progress.updatedAt ?? nowMs(),
2746
+ },
2747
+ });
2748
+ };
2749
+ const formatMapProgressMessage = (completed: number, total?: number) =>
2750
+ typeof total === 'number' && Number.isFinite(total) && total > 0
2751
+ ? `${completed.toLocaleString()} / ${total.toLocaleString()} rows processed`
2752
+ : `${completed.toLocaleString()} rows processed`;
2753
+ updateMapProgress({
2754
+ completed: 0,
2755
+ total: streaming ? undefined : sliced.length,
2756
+ startedAt: mapStartedAt,
2757
+ message: formatMapProgressMessage(
2758
+ 0,
2759
+ streaming ? undefined : sliced.length,
2760
+ ),
2761
+ });
2590
2762
  const explicitRowKeysSeen =
2591
2763
  opts?.key === undefined ? null : new Map<string, number>();
2592
2764
  const resolveExplicitKeyValue = (
@@ -2669,12 +2841,21 @@ function createMinimalWorkerCtx(
2669
2841
  chunkStart: number,
2670
2842
  chunkIndex: number,
2671
2843
  ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
2844
+ const chunkStartedAt = nowMs();
2672
2845
  assertNotAborted(abortSignal);
2846
+ const keyStartedAt = nowMs();
2673
2847
  const chunkEntries = chunkRows.map((row, localIndex) => {
2674
2848
  const absoluteIndex = baseOffset + chunkStart + localIndex;
2675
2849
  const rowKey = resolveRowKey(row, absoluteIndex);
2676
2850
  return { row, absoluteIndex, rowKey };
2677
2851
  });
2852
+ recordRunnerPerfTrace({
2853
+ req,
2854
+ phase: 'runner.map_chunk.keys',
2855
+ ms: nowMs() - keyStartedAt,
2856
+ extra: { mapName: name, chunkIndex, rows: chunkRows.length },
2857
+ });
2858
+ const prepareStartedAt = nowMs();
2678
2859
  const prepared = await prepareMapRows({
2679
2860
  req,
2680
2861
  tableNamespace: name,
@@ -2683,6 +2864,20 @@ function createMinimalWorkerCtx(
2683
2864
  __deeplineRowKey: rowKey,
2684
2865
  })),
2685
2866
  });
2867
+ recordRunnerPerfTrace({
2868
+ req,
2869
+ phase: 'runner.map_chunk.prepare_rows',
2870
+ ms: nowMs() - prepareStartedAt,
2871
+ extra: {
2872
+ mapName: name,
2873
+ chunkIndex,
2874
+ rows: chunkRows.length,
2875
+ inserted: prepared.inserted,
2876
+ skipped: prepared.skipped,
2877
+ pendingRows: prepared.pendingRows.length,
2878
+ completedRows: prepared.completedRows.length,
2879
+ },
2880
+ });
2686
2881
  const pendingKeys = new Set<string>();
2687
2882
  const completedKeys = new Set<string>();
2688
2883
  const preparedKeys = new Set<string>();
@@ -2901,8 +3096,56 @@ function createMinimalWorkerCtx(
2901
3096
  })),
2902
3097
  });
2903
3098
  };
3099
+ const workersStartedAt = nowMs();
2904
3100
  const workerResults = await Promise.allSettled(workers);
2905
- await persistExecutedRows();
3101
+ recordRunnerPerfTrace({
3102
+ req,
3103
+ phase: 'runner.map_chunk.execute_workers',
3104
+ ms: nowMs() - workersStartedAt,
3105
+ extra: {
3106
+ mapName: name,
3107
+ chunkIndex,
3108
+ rowsToExecute: rowsToExecute.length,
3109
+ concurrency,
3110
+ },
3111
+ });
3112
+ const persistRowsStartedAt = nowMs();
3113
+ recordRunnerPerfTrace({
3114
+ req,
3115
+ phase: 'runner.map_chunk.persist_rows_start',
3116
+ ms: 0,
3117
+ extra: {
3118
+ mapName: name,
3119
+ chunkIndex,
3120
+ rowsToExecute: rowsToExecute.length,
3121
+ },
3122
+ });
3123
+ try {
3124
+ await persistExecutedRows();
3125
+ recordRunnerPerfTrace({
3126
+ req,
3127
+ phase: 'runner.map_chunk.persist_rows',
3128
+ ms: nowMs() - persistRowsStartedAt,
3129
+ extra: {
3130
+ mapName: name,
3131
+ chunkIndex,
3132
+ rowsToExecute: rowsToExecute.length,
3133
+ },
3134
+ });
3135
+ } catch (error) {
3136
+ recordRunnerPerfTrace({
3137
+ req,
3138
+ phase: 'runner.map_chunk.persist_rows_error',
3139
+ ms: nowMs() - persistRowsStartedAt,
3140
+ extra: {
3141
+ mapName: name,
3142
+ chunkIndex,
3143
+ rowsToExecute: rowsToExecute.length,
3144
+ error: error instanceof Error ? error.message : String(error),
3145
+ },
3146
+ });
3147
+ throw error;
3148
+ }
2906
3149
  const rejectedWorker = workerResults.find(
2907
3150
  (result): result is PromiseRejectedResult =>
2908
3151
  result.status === 'rejected',
@@ -2942,6 +3185,27 @@ function createMinimalWorkerCtx(
2942
3185
  return resultByKey.get(key);
2943
3186
  })
2944
3187
  .filter((row): row is T & Record<string, unknown> => Boolean(row));
3188
+ const hashStartedAt = nowMs();
3189
+ const hash = await hashJson(out);
3190
+ recordRunnerPerfTrace({
3191
+ req,
3192
+ phase: 'runner.map_chunk.hash',
3193
+ ms: nowMs() - hashStartedAt,
3194
+ extra: { mapName: name, chunkIndex, rows: out.length },
3195
+ });
3196
+ recordRunnerPerfTrace({
3197
+ req,
3198
+ phase: 'runner.map_chunk.total',
3199
+ ms: nowMs() - chunkStartedAt,
3200
+ extra: {
3201
+ mapName: name,
3202
+ chunkIndex,
3203
+ rowsRead: chunkRows.length,
3204
+ rowsWritten: out.length,
3205
+ rowsExecuted: executedRows.length,
3206
+ rowsCached: Math.max(0, out.length - executedRows.length),
3207
+ },
3208
+ });
2945
3209
  return {
2946
3210
  chunkIndex,
2947
3211
  rangeStart: baseOffset + chunkStart,
@@ -2954,7 +3218,7 @@ function createMinimalWorkerCtx(
2954
3218
  rowsInserted,
2955
3219
  rowsSkipped,
2956
3220
  outputDatasetId: `map:${name}`,
2957
- hash: await hashJson(out),
3221
+ hash,
2958
3222
  preview: toWorkflowSerializableValue(out.slice(0, 5)),
2959
3223
  };
2960
3224
  };
@@ -2978,7 +3242,7 @@ function createMinimalWorkerCtx(
2978
3242
  workflowStep.do as unknown as (
2979
3243
  name: string,
2980
3244
  config: {
2981
- retries: { limit: number; delay: string; backoff: 'exponential' };
3245
+ retries: { limit: number; delay: number; backoff: 'exponential' };
2982
3246
  },
2983
3247
  callback: () => Promise<
2984
3248
  WorkerMapChunkSummary<T & Record<string, unknown>>
@@ -2986,7 +3250,7 @@ function createMinimalWorkerCtx(
2986
3250
  ) => Promise<WorkerMapChunkSummary<T & Record<string, unknown>>>
2987
3251
  )(
2988
3252
  deterministicMapChunkStepName({ mapName: name, chunkIndex }),
2989
- { retries: { limit: 5, delay: '5 seconds', backoff: 'exponential' } },
3253
+ { retries: { limit: 5, delay: 100, backoff: 'exponential' } },
2990
3254
  async () => await processChunk(chunkRows, chunkStart, chunkIndex),
2991
3255
  )) as WorkerMapChunkSummary<T & Record<string, unknown>>;
2992
3256
  };
@@ -2996,6 +3260,14 @@ function createMinimalWorkerCtx(
2996
3260
  `Map completed: ${totalRowsWritten} results ` +
2997
3261
  `(${totalRowsExecuted} executed, ${totalRowsCached} already satisfied) ` +
2998
3262
  `inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
3263
+ const completedAt = nowMs();
3264
+ updateMapProgress({
3265
+ completed: totalRowsWritten,
3266
+ total: totalRowsWritten,
3267
+ completedAt,
3268
+ updatedAt: completedAt,
3269
+ message: formatMapProgressMessage(totalRowsWritten, totalRowsWritten),
3270
+ });
2999
3271
  emitEvent({
3000
3272
  type: 'log',
3001
3273
  level: 'info',
@@ -3039,13 +3311,24 @@ function createMinimalWorkerCtx(
3039
3311
  totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
3040
3312
  totalRowsInserted += chunkResult.rowsInserted;
3041
3313
  totalRowsSkipped += chunkResult.rowsSkipped;
3314
+ updateMapProgress({
3315
+ completed: totalRowsWritten,
3316
+ message: formatMapProgressMessage(totalRowsWritten),
3317
+ });
3042
3318
  if (out.length < 10) {
3043
3319
  out.push(...chunkResult.preview.slice(0, 10 - out.length));
3044
3320
  }
3045
3321
  chunkStart += chunkRows.length;
3046
3322
  chunkIndex += 1;
3047
3323
  }
3048
- return finalize(totalRowsWritten);
3324
+ const dataset = finalize(totalRowsWritten);
3325
+ recordRunnerPerfTrace({
3326
+ req,
3327
+ phase: 'runner.map.total',
3328
+ ms: nowMs() - mapStartedAt,
3329
+ extra: { mapName: name, rowsWritten: totalRowsWritten, streaming: true },
3330
+ });
3331
+ return dataset;
3049
3332
  }
3050
3333
 
3051
3334
  if (workflowStep && sliced.length > rowsPerChunk) {
@@ -3063,11 +3346,23 @@ function createMinimalWorkerCtx(
3063
3346
  totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
3064
3347
  totalRowsInserted += chunkResult.rowsInserted;
3065
3348
  totalRowsSkipped += chunkResult.rowsSkipped;
3349
+ updateMapProgress({
3350
+ completed: totalRowsWritten,
3351
+ total: sliced.length,
3352
+ message: formatMapProgressMessage(totalRowsWritten, sliced.length),
3353
+ });
3066
3354
  if (out.length < 10) {
3067
3355
  out.push(...chunkResult.preview.slice(0, 10 - out.length));
3068
3356
  }
3069
3357
  }
3070
- return finalize(totalRowsWritten);
3358
+ const dataset = finalize(totalRowsWritten);
3359
+ recordRunnerPerfTrace({
3360
+ req,
3361
+ phase: 'runner.map.total',
3362
+ ms: nowMs() - mapStartedAt,
3363
+ extra: { mapName: name, rowsWritten: totalRowsWritten, streaming: false },
3364
+ });
3365
+ return dataset;
3071
3366
  }
3072
3367
 
3073
3368
  assertUniqueExplicitRowKeys(sliced, 0);
@@ -3078,7 +3373,23 @@ function createMinimalWorkerCtx(
3078
3373
  totalRowsInserted = chunkResult.rowsInserted;
3079
3374
  totalRowsSkipped = chunkResult.rowsSkipped;
3080
3375
  out.push(...chunkResult.preview);
3081
- return finalize(chunkResult.rowsWritten);
3376
+ updateMapProgress({
3377
+ completed: chunkResult.rowsWritten,
3378
+ total: sliced.length,
3379
+ message: formatMapProgressMessage(chunkResult.rowsWritten, sliced.length),
3380
+ });
3381
+ const dataset = finalize(chunkResult.rowsWritten);
3382
+ recordRunnerPerfTrace({
3383
+ req,
3384
+ phase: 'runner.map.total',
3385
+ ms: nowMs() - mapStartedAt,
3386
+ extra: {
3387
+ mapName: name,
3388
+ rowsWritten: chunkResult.rowsWritten,
3389
+ streaming: false,
3390
+ },
3391
+ });
3392
+ return dataset;
3082
3393
  };
3083
3394
 
3084
3395
  class WorkerMapBuilder<T extends Record<string, unknown>> {
@@ -3165,28 +3476,43 @@ function createMinimalWorkerCtx(
3165
3476
  arg: unknown,
3166
3477
  options?: CsvRenameOptions,
3167
3478
  ): Promise<T[]> {
3479
+ const csvStartedAt = nowMs();
3168
3480
  if (Array.isArray(arg)) {
3169
3481
  // Inline rows passed at call site — already in memory, keep the
3170
3482
  // legacy array-backed dataset shape.
3171
- return makeWorkerDataset(
3483
+ const dataset = makeWorkerDataset(
3172
3484
  'csv',
3173
3485
  applyCsvRenameProjection(arg as T[], options),
3174
3486
  {
3175
3487
  datasetKind: 'csv',
3176
3488
  },
3177
3489
  ) as unknown as T[];
3490
+ recordRunnerPerfTrace({
3491
+ req,
3492
+ phase: 'runner.csv',
3493
+ ms: nowMs() - csvStartedAt,
3494
+ extra: { mode: 'inline_array', rows: arg.length },
3495
+ });
3496
+ return dataset;
3178
3497
  }
3179
3498
  const filename = String(arg ?? '');
3180
3499
  if (req.inlineCsv && filename === req.inlineCsv.name) {
3181
3500
  // Inline CSV pre-staged by the dispatcher (small files <1 MiB). Already
3182
3501
  // in memory; no streaming needed.
3183
- return makeWorkerDataset(
3502
+ const dataset = makeWorkerDataset(
3184
3503
  'csv',
3185
3504
  applyCsvRenameProjection(req.inlineCsv.rows as T[], options),
3186
3505
  {
3187
3506
  datasetKind: 'csv',
3188
3507
  },
3189
3508
  ) as unknown as T[];
3509
+ recordRunnerPerfTrace({
3510
+ req,
3511
+ phase: 'runner.csv',
3512
+ ms: nowMs() - csvStartedAt,
3513
+ extra: { mode: 'inline_csv', rows: req.inlineCsv.rows.length },
3514
+ });
3515
+ return dataset;
3190
3516
  }
3191
3517
  // Resolution order: explicit inputR2Keys (runtime input) → packaged
3192
3518
  // files (relative-path imports bundled with the play artifact).
@@ -3211,7 +3537,7 @@ function createMinimalWorkerCtx(
3211
3537
  // and switches its chunked execution loop to consume iterChunks
3212
3538
  // directly, so 2M-row CSVs never get fully materialized in memory.
3213
3539
  const storageKey = r2Key;
3214
- return makeStreamingCsvDataset<T>({
3540
+ const dataset = makeStreamingCsvDataset<T>({
3215
3541
  name: filename,
3216
3542
  logicalPath: filename,
3217
3543
  renameOptions: options,
@@ -3223,6 +3549,13 @@ function createMinimalWorkerCtx(
3223
3549
  storageKey,
3224
3550
  }),
3225
3551
  }) as unknown as T[];
3552
+ recordRunnerPerfTrace({
3553
+ req,
3554
+ phase: 'runner.csv',
3555
+ ms: nowMs() - csvStartedAt,
3556
+ extra: { mode: 'streaming_r2', filename },
3557
+ });
3558
+ return dataset;
3226
3559
  },
3227
3560
  map<T extends Record<string, unknown>>(
3228
3561
  name: string,
@@ -3253,346 +3586,6 @@ function createMinimalWorkerCtx(
3253
3586
  throw new Error(
3254
3587
  'ctx.map(key, rows, fields, options) was removed. Use ctx.map(key, rows).step(...).run(options).',
3255
3588
  );
3256
- /*
3257
- const sliced = rows;
3258
- const baseOffset = 0;
3259
- const fieldEntries = Object.entries(fieldsDef);
3260
- const plan = req.executionPlan;
3261
- const planMap = plan?.maps.find(
3262
- (candidate) =>
3263
- candidate.mapName === name || candidate.tableNamespace === name,
3264
- );
3265
- const streaming = isStreamingDataset<T>(sliced);
3266
- // For streaming inputs we don't know the row count upfront — pass
3267
- // `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
3268
- // default chunk size rather than trying to budget against an unknown.
3269
- const rowsPerChunk = chooseMapChunkSize({
3270
- totalRows: streaming ? 0 : sliced.length,
3271
- mapCount: Math.max(1, plan?.maps.length ?? 1),
3272
- stepsPerChunk: planMap?.stepsPerChunk ?? 1,
3273
- preferredChunkSize: planMap?.defaultChunkSize,
3274
- softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
3275
- });
3276
- const outputFields = fieldEntries.map(([field]) => field);
3277
-
3278
- // Workflow steps have bounded CPU but unbounded wall time, so long
3279
- // network calls are OK here as long as we checkpoint by chunk and do not
3280
- // collapse a giant map into one monolithic step result.
3281
-
3282
- const processChunk = async (
3283
- chunkRows: T[],
3284
- chunkStart: number,
3285
- chunkIndex: number,
3286
- ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
3287
- assertNotAborted(abortSignal);
3288
- const prepared = await prepareMapRows({
3289
- req,
3290
- tableNamespace: name,
3291
- rows: chunkRows,
3292
- });
3293
- const pendingKeys = new Set<string>();
3294
- const completedKeys = new Set<string>();
3295
- const preparedKeys = new Set<string>();
3296
- for (const row of prepared.pendingRows) {
3297
- const key = derivePlayRowIdentity(
3298
- publicCsvInputRow(row),
3299
- name,
3300
- mapLogicFingerprint,
3301
- );
3302
- if (key) {
3303
- pendingKeys.add(key);
3304
- preparedKeys.add(key);
3305
- }
3306
- }
3307
- for (const row of prepared.completedRows) {
3308
- const key =
3309
- typeof row.__deeplineRowKey === 'string'
3310
- ? row.__deeplineRowKey
3311
- : derivePlayRowIdentity(
3312
- publicCsvInputRow(row),
3313
- name,
3314
- mapLogicFingerprint,
3315
- );
3316
- if (key) {
3317
- completedKeys.add(key);
3318
- preparedKeys.add(key);
3319
- }
3320
- }
3321
- const missingPreparedRows = chunkRows.filter((row) => {
3322
- const key = derivePlayRowIdentity(
3323
- publicCsvInputRow(row),
3324
- name,
3325
- mapLogicFingerprint,
3326
- );
3327
- return !key || !preparedKeys.has(key);
3328
- });
3329
- const rowsToExecute = chunkRows.filter((row) => {
3330
- const key = derivePlayRowIdentity(
3331
- publicCsvInputRow(row),
3332
- name,
3333
- mapLogicFingerprint,
3334
- );
3335
- return !key || pendingKeys.has(key) || !completedKeys.has(key);
3336
- });
3337
- const rowsInserted = prepared.inserted + missingPreparedRows.length;
3338
- const rowsSkipped = Math.max(
3339
- 0,
3340
- prepared.skipped - missingPreparedRows.length,
3341
- );
3342
- const concurrency = Math.max(1, Math.min(opts?.concurrency ?? 10, 100));
3343
- const executedRows: Array<T & Record<string, unknown>> = new Array(
3344
- rowsToExecute.length,
3345
- );
3346
- const generatedOutputFields = new Set<string>();
3347
- let idx = 0;
3348
- const workers: Array<Promise<void>> = [];
3349
- for (let w = 0; w < concurrency; w += 1) {
3350
- workers.push(
3351
- (async () => {
3352
- while (true) {
3353
- if (abortSignal?.aborted) return;
3354
- const myIndex = idx++;
3355
- if (myIndex >= rowsToExecute.length) return;
3356
- const row = rowsToExecute[myIndex]!;
3357
- const absoluteIndex = baseOffset + chunkStart + myIndex;
3358
- const enriched: Record<string, unknown> = cloneCsvAliasedRow(row);
3359
- const fieldOutputs: Record<string, unknown> = {};
3360
- const waterfallOutputs: RecordedWaterfallOutput[] = [];
3361
- const rowCtx = {
3362
- ...(ctx as Record<string, unknown>),
3363
- waterfall: (
3364
- toolNameOrSpec: string | WorkerInlineWaterfallSpec,
3365
- waterfallInput: Record<string, unknown>,
3366
- waterfallOpts?: WorkerWaterfallOptions,
3367
- ) =>
3368
- executeWorkerWaterfall(
3369
- req,
3370
- waterfallOutputs,
3371
- toolNameOrSpec,
3372
- waterfallInput,
3373
- waterfallOpts,
3374
- ),
3375
- };
3376
- for (const [key, value] of fieldEntries) {
3377
- if (typeof value === 'function') {
3378
- const resolved = await (
3379
- value as (
3380
- r: T,
3381
- c: unknown,
3382
- f: Record<string, unknown>,
3383
- i: number,
3384
- ) => Promise<unknown> | unknown
3385
- )(row, rowCtx, fieldOutputs, absoluteIndex);
3386
- enriched[key] = resolved;
3387
- fieldOutputs[key] = resolved;
3388
- } else {
3389
- enriched[key] = value;
3390
- fieldOutputs[key] = value;
3391
- }
3392
- }
3393
- for (const waterfallOutput of waterfallOutputs) {
3394
- const columnName =
3395
- `${sqlishIdentifierPart(waterfallOutput.waterfallId)}__` +
3396
- sqlishIdentifierPart(waterfallOutput.stepId);
3397
- enriched[columnName] = waterfallOutput.value;
3398
- generatedOutputFields.add(columnName);
3399
- }
3400
- executedRows[myIndex] = enriched as T & Record<string, unknown>;
3401
- }
3402
- })(),
3403
- );
3404
- }
3405
- await Promise.all(workers);
3406
- if (executedRows.length > 0) {
3407
- await persistCompletedMapRows({
3408
- req,
3409
- tableNamespace: name,
3410
- outputFields,
3411
- extraOutputFields: Array.from(generatedOutputFields),
3412
- rows: executedRows.map((row, executedIndex) => ({
3413
- ...row,
3414
- __deeplineRowKey: derivePlayRowIdentity(
3415
- publicCsvInputRow(rowsToExecute[executedIndex]!),
3416
- name,
3417
- mapLogicFingerprint,
3418
- ),
3419
- })),
3420
- });
3421
- }
3422
- const resultByKey = new Map<string, T & Record<string, unknown>>();
3423
- for (const completedRow of prepared.completedRows) {
3424
- const key =
3425
- typeof completedRow.__deeplineRowKey === 'string'
3426
- ? completedRow.__deeplineRowKey
3427
- : derivePlayRowIdentity(
3428
- publicCsvInputRow(completedRow),
3429
- name,
3430
- mapLogicFingerprint,
3431
- );
3432
- if (key) {
3433
- const { __deeplineRowKey: _rowKey, ...cleanedRow } =
3434
- publicCsvInputRow(completedRow);
3435
- void _rowKey;
3436
- resultByKey.set(key, cleanedRow as T & Record<string, unknown>);
3437
- }
3438
- }
3439
- for (
3440
- let executedIndex = 0;
3441
- executedIndex < executedRows.length;
3442
- executedIndex += 1
3443
- ) {
3444
- const executedRow = executedRows[executedIndex]!;
3445
- const key = derivePlayRowIdentity(
3446
- publicCsvInputRow(rowsToExecute[executedIndex]!),
3447
- name,
3448
- mapLogicFingerprint,
3449
- );
3450
- if (key) resultByKey.set(key, executedRow);
3451
- }
3452
- const out = chunkRows
3453
- .map((row) => {
3454
- const key = derivePlayRowIdentity(
3455
- publicCsvInputRow(row),
3456
- name,
3457
- mapLogicFingerprint,
3458
- );
3459
- return key ? resultByKey.get(key) : undefined;
3460
- })
3461
- .filter((row): row is T & Record<string, unknown> => Boolean(row));
3462
- return {
3463
- chunkIndex,
3464
- rangeStart: baseOffset + chunkStart,
3465
- rangeEnd: baseOffset + chunkStart + out.length,
3466
- rowsRead: chunkRows.length,
3467
- rowsWritten: out.length,
3468
- rowsExecuted: executedRows.length,
3469
- rowsCached: prepared.completedRows.length,
3470
- rowsInserted,
3471
- rowsSkipped,
3472
- outputDatasetId: `map:${name}`,
3473
- hash: await hashJson(out),
3474
- preview: toWorkflowSerializableValue(out.slice(0, 5)),
3475
- };
3476
- };
3477
-
3478
- const out: Array<T & Record<string, unknown>> = [];
3479
- let totalRowsExecuted = 0;
3480
- let totalRowsCached = 0;
3481
- let totalRowsInserted = 0;
3482
- let totalRowsSkipped = 0;
3483
-
3484
- const runChunkStep = async (
3485
- chunkRows: T[],
3486
- chunkStart: number,
3487
- chunkIndex: number,
3488
- ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
3489
- if (!workflowStep) {
3490
- return await processChunk(chunkRows, chunkStart, chunkIndex);
3491
- }
3492
- return (await (
3493
- workflowStep.do as unknown as (
3494
- name: string,
3495
- config: {
3496
- retries: { limit: number; delay: string; backoff: 'exponential' };
3497
- },
3498
- callback: () => Promise<
3499
- WorkerMapChunkSummary<T & Record<string, unknown>>
3500
- >,
3501
- ) => Promise<WorkerMapChunkSummary<T & Record<string, unknown>>>
3502
- )(
3503
- deterministicMapChunkStepName({ mapName: name, chunkIndex }),
3504
- { retries: { limit: 5, delay: '5 seconds', backoff: 'exponential' } },
3505
- async () => await processChunk(chunkRows, chunkStart, chunkIndex),
3506
- )) as WorkerMapChunkSummary<T & Record<string, unknown>>;
3507
- };
3508
-
3509
- const finalize = (totalRowsWritten: number) => {
3510
- const cacheSummary =
3511
- `Map completed: ${totalRowsWritten} results ` +
3512
- `(${totalRowsExecuted} executed, ${totalRowsCached} already satisfied) ` +
3513
- `inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
3514
- emitEvent({
3515
- type: 'log',
3516
- level: 'info',
3517
- message: cacheSummary,
3518
- ts: nowMs(),
3519
- });
3520
- return makeWorkerDataset(name, out, {
3521
- count: totalRowsWritten,
3522
- cacheSummary,
3523
- workProgress: {
3524
- total: totalRowsWritten,
3525
- executed: totalRowsExecuted,
3526
- reused: totalRowsCached,
3527
- skipped: totalRowsCached,
3528
- pending: 0,
3529
- failed: 0,
3530
- },
3531
- });
3532
- };
3533
-
3534
- // Streaming path: input came from `ctx.csv` over R2 and we never
3535
- // materialized the full row array. Pull row chunks from the dataset's
3536
- // iterChunks() and run each through processChunk inside its own
3537
- // workflow step. Memory stays bounded by `rowsPerChunk`.
3538
- if (streaming) {
3539
- let totalRowsWritten = 0;
3540
- let chunkIndex = 0;
3541
- let chunkStart = 0;
3542
- const streamingDataset = sliced as unknown as StreamingCsvDataset<T>;
3543
- for await (const chunkRows of streamingDataset.iterChunks(
3544
- rowsPerChunk,
3545
- )) {
3546
- assertNotAborted(abortSignal);
3547
- if (chunkRows.length === 0) continue;
3548
- const chunkResult = await runChunkStep(
3549
- chunkRows,
3550
- chunkStart,
3551
- chunkIndex,
3552
- );
3553
- totalRowsWritten += chunkResult.rowsWritten;
3554
- totalRowsExecuted += chunkResult.rowsExecuted;
3555
- totalRowsCached += chunkResult.rowsCached;
3556
- totalRowsInserted += chunkResult.rowsInserted;
3557
- totalRowsSkipped += chunkResult.rowsSkipped;
3558
- if (out.length < 10) {
3559
- out.push(...chunkResult.preview.slice(0, 10 - out.length));
3560
- }
3561
- chunkStart += chunkRows.length;
3562
- chunkIndex += 1;
3563
- }
3564
- return finalize(totalRowsWritten);
3565
- }
3566
-
3567
- // Materialized array path (inline rows or small CSV).
3568
- if (workflowStep && sliced.length > rowsPerChunk) {
3569
- let totalRowsWritten = 0;
3570
- for (let start = 0; start < sliced.length; start += rowsPerChunk) {
3571
- assertNotAborted(abortSignal);
3572
- const end = Math.min(sliced.length, start + rowsPerChunk);
3573
- const chunkRows = sliced.slice(start, end);
3574
- const chunkIndex = Math.floor(start / rowsPerChunk);
3575
- const chunkResult = await runChunkStep(chunkRows, start, chunkIndex);
3576
- totalRowsWritten += chunkResult.rowsWritten;
3577
- totalRowsExecuted += chunkResult.rowsExecuted;
3578
- totalRowsCached += chunkResult.rowsCached;
3579
- totalRowsInserted += chunkResult.rowsInserted;
3580
- totalRowsSkipped += chunkResult.rowsSkipped;
3581
- if (out.length < 10) {
3582
- out.push(...chunkResult.preview.slice(0, 10 - out.length));
3583
- }
3584
- }
3585
- return finalize(totalRowsWritten);
3586
- }
3587
-
3588
- const chunkResult = await processChunk(sliced, 0, 0);
3589
- totalRowsExecuted = chunkResult.rowsExecuted;
3590
- totalRowsCached = chunkResult.rowsCached;
3591
- totalRowsInserted = chunkResult.rowsInserted;
3592
- totalRowsSkipped = chunkResult.rowsSkipped;
3593
- out.push(...chunkResult.preview);
3594
- return finalize(chunkResult.rowsWritten);
3595
- */
3596
3589
  },
3597
3590
  tool: async (
3598
3591
  key: string,
@@ -4012,6 +4005,7 @@ async function executeRunRequest(
4012
4005
  workflowStep?: WorkflowStep,
4013
4006
  options?: {
4014
4007
  persistResultDatasets?: boolean;
4008
+ waitUntil?: (promise: Promise<unknown>) => void;
4015
4009
  /**
4016
4010
  * Cooperative cancellation token. CF Workflows surfaces termination as a
4017
4011
  * thrown error from any in-progress step; the harness catches that, flips
@@ -4022,14 +4016,36 @@ async function executeRunRequest(
4022
4016
  ): Promise<WorkflowRunOutput> {
4023
4017
  installProcessExitTrap();
4024
4018
  const startedAt = nowMs();
4019
+ recordRunnerPerfTrace({
4020
+ req,
4021
+ phase: 'runner.execute_start',
4022
+ extra: {
4023
+ persistResultDatasets: Boolean(options?.persistResultDatasets),
4024
+ hasWorkflowStep: Boolean(workflowStep),
4025
+ },
4026
+ });
4025
4027
  const abortController = options?.abortController ?? new AbortController();
4026
4028
  const abortSignal = abortController.signal;
4029
+ const postgresPrewarmStartedAt = nowMs();
4030
+ await harnessPrewarmPostgresSessions({
4031
+ executorToken: req.executorToken,
4032
+ sessions: req.preloadedDbSessions ?? [],
4033
+ });
4034
+ recordRunnerPerfTrace({
4035
+ req,
4036
+ phase: 'runner.prewarm_postgres',
4037
+ ms: nowMs() - postgresPrewarmStartedAt,
4038
+ extra: {
4039
+ sessions: req.preloadedDbSessions?.length ?? 0,
4040
+ },
4041
+ });
4027
4042
  // Maintain a rolling buffer of log lines emitted during the run. This is
4028
4043
  // what the play-page UI consumes via Convex polling + diffPlayRunStreamEvents
4029
4044
  // → play.run.log SSE events. Without periodic flushing, the play page only
4030
4045
  // sees the final terminal status with no intermediate logs/progress.
4031
4046
  let liveLogs: string[] = [];
4032
4047
  let liveLogsDirty = false;
4048
+ let liveNodeProgress: LiveNodeProgressMap = {};
4033
4049
  let lastLiveLogFlushAt =
4034
4050
  nowMs() - LIVE_LOG_FLUSH_INTERVAL_MS + LIVE_LOG_FIRST_FLUSH_DELAY_MS;
4035
4051
  let liveLogFlushInFlight: Promise<void> = Promise.resolve();
@@ -4039,6 +4055,21 @@ async function executeRunRequest(
4039
4055
  liveLogs = [...liveLogs, trimmed].slice(-LIVE_LOG_BUFFER_LIMIT);
4040
4056
  liveLogsDirty = true;
4041
4057
  };
4058
+ const updateLiveNodeProgress = (input: {
4059
+ nodeId: string;
4060
+ progress: LiveNodeProgressSnapshot;
4061
+ }) => {
4062
+ const nodeId = input.nodeId.trim();
4063
+ if (!nodeId) return;
4064
+ liveNodeProgress = {
4065
+ ...liveNodeProgress,
4066
+ [nodeId]: {
4067
+ ...(liveNodeProgress[nodeId] ?? {}),
4068
+ ...input.progress,
4069
+ },
4070
+ };
4071
+ };
4072
+ const liveNodeProgressSnapshot = () => ({ ...liveNodeProgress });
4042
4073
  const flushLiveLogs = (force: boolean): void => {
4043
4074
  if (!options?.persistResultDatasets) return;
4044
4075
  if (!liveLogsDirty && !force) return;
@@ -4057,6 +4088,7 @@ async function executeRunRequest(
4057
4088
  status: 'running',
4058
4089
  runtimeBackend: 'cf_workflows_dynamic_worker',
4059
4090
  liveLogs: snapshot,
4091
+ liveNodeProgress: liveNodeProgressSnapshot(),
4060
4092
  lastCheckpointAt: now,
4061
4093
  });
4062
4094
  } catch {
@@ -4086,37 +4118,77 @@ async function executeRunRequest(
4086
4118
  env,
4087
4119
  workflowStep,
4088
4120
  abortSignal,
4121
+ { onNodeProgress: updateLiveNodeProgress },
4089
4122
  );
4090
4123
  try {
4124
+ const playStartedAt = nowMs();
4091
4125
  const result = await (
4092
4126
  playFn as (
4093
4127
  ctx: unknown,
4094
4128
  input: Record<string, unknown>,
4095
4129
  ) => Promise<unknown>
4096
4130
  )(ctx, req.runtimeInput);
4131
+ recordRunnerPerfTrace({
4132
+ req,
4133
+ phase: 'runner.play_function',
4134
+ ms: nowMs() - playStartedAt,
4135
+ });
4136
+ const serializeStartedAt = nowMs();
4097
4137
  const serializedResult = serializePlayReturnValue(result);
4138
+ recordRunnerPerfTrace({
4139
+ req,
4140
+ phase: 'runner.serialize_result',
4141
+ ms: nowMs() - serializeStartedAt,
4142
+ });
4098
4143
  if (options?.persistResultDatasets) {
4144
+ const persistStartedAt = nowMs();
4099
4145
  await liveLogFlushInFlight.catch(() => undefined);
4146
+ recordRunnerPerfTrace({
4147
+ req,
4148
+ phase: 'runner.live_log_flush_wait',
4149
+ ms: nowMs() - persistStartedAt,
4150
+ });
4151
+ const resultDatasetStartedAt = nowMs();
4100
4152
  await persistResultDatasets(req, serializedResult);
4153
+ recordRunnerPerfTrace({
4154
+ req,
4155
+ phase: 'runner.persist_result_datasets',
4156
+ ms: nowMs() - resultDatasetStartedAt,
4157
+ });
4101
4158
  const terminalResult = trimResultForStatus(serializedResult);
4159
+ const terminalUpdateStartedAt = nowMs();
4102
4160
  await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
4103
4161
  action: 'update_run_status',
4104
4162
  playId: req.runId,
4105
4163
  status: 'completed',
4164
+ error: null,
4106
4165
  result: terminalResult,
4107
4166
  runtimeBackend: 'cf_workflows_dynamic_worker',
4108
4167
  waitKind: null,
4109
4168
  waitUntil: null,
4110
4169
  activeBoundaryId: null,
4111
4170
  liveLogs,
4171
+ liveNodeProgress: liveNodeProgressSnapshot(),
4112
4172
  lastCheckpointAt: nowMs(),
4113
4173
  });
4174
+ recordRunnerPerfTrace({
4175
+ req,
4176
+ phase: 'runner.terminal_status_update',
4177
+ ms: nowMs() - terminalUpdateStartedAt,
4178
+ });
4179
+ const billingStartedAt = nowMs();
4114
4180
  await finalizeWorkerComputeBilling({
4115
4181
  req,
4116
4182
  success: true,
4117
4183
  actionEstimate: 4,
4118
4184
  });
4185
+ recordRunnerPerfTrace({
4186
+ req,
4187
+ phase: 'runner.compute_billing_finalize',
4188
+ ms: nowMs() - billingStartedAt,
4189
+ });
4119
4190
  }
4191
+ const parentSignalStartedAt = nowMs();
4120
4192
  await signalParentPlayTerminal({
4121
4193
  req,
4122
4194
  status: 'completed',
@@ -4128,6 +4200,16 @@ async function executeRunRequest(
4128
4200
  }`,
4129
4201
  );
4130
4202
  });
4203
+ recordRunnerPerfTrace({
4204
+ req,
4205
+ phase: 'runner.parent_terminal_signal',
4206
+ ms: nowMs() - parentSignalStartedAt,
4207
+ });
4208
+ recordRunnerPerfTrace({
4209
+ req,
4210
+ phase: 'runner.execute_total',
4211
+ ms: nowMs() - startedAt,
4212
+ });
4131
4213
  return {
4132
4214
  playName: req.playName,
4133
4215
  result: serializedResult,
@@ -4156,6 +4238,7 @@ async function executeRunRequest(
4156
4238
  waitUntil: null,
4157
4239
  activeBoundaryId: null,
4158
4240
  liveLogs,
4241
+ liveNodeProgress: liveNodeProgressSnapshot(),
4159
4242
  lastCheckpointAt: nowMs(),
4160
4243
  });
4161
4244
  await finalizeWorkerComputeBilling({
@@ -4285,6 +4368,9 @@ function runRequestFromWorkflowParams(
4285
4368
  playCallGovernance: isPlayCallGovernanceSnapshot(params.playCallGovernance)
4286
4369
  ? params.playCallGovernance
4287
4370
  : null,
4371
+ preloadedDbSessions: Array.isArray(params.preloadedDbSessions)
4372
+ ? (params.preloadedDbSessions as PreloadedRuntimeDbSession[])
4373
+ : null,
4288
4374
  coordinatorUrl:
4289
4375
  typeof params.coordinatorUrl === 'string' && params.coordinatorUrl.trim()
4290
4376
  ? params.coordinatorUrl.trim()
@@ -4525,6 +4611,7 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4525
4611
  Record<string, unknown>
4526
4612
  > {
4527
4613
  declare readonly env: WorkerEnv;
4614
+ declare readonly ctx: ExecutionContext;
4528
4615
 
4529
4616
  /**
4530
4617
  * Cancellation model:
@@ -4568,14 +4655,30 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4568
4655
  (runnerEvent) => {
4569
4656
  if (runnerEvent.type === 'log') {
4570
4657
  console.log(`${runPrefix} ${runnerEvent.message}`);
4658
+ void this.env.COORDINATOR?.recordRunEvent(req.runId, {
4659
+ runId: req.runId,
4660
+ type: 'log',
4661
+ line: runnerEvent.message,
4662
+ ts: runnerEvent.ts,
4663
+ }).catch(() => undefined);
4571
4664
  } else if (runnerEvent.type === 'error') {
4572
4665
  console.error(
4573
4666
  `${runPrefix} ${runnerEvent.message}${runnerEvent.stack ? `\n${runnerEvent.stack}` : ''}`,
4574
4667
  );
4668
+ void this.env.COORDINATOR?.recordRunEvent(req.runId, {
4669
+ runId: req.runId,
4670
+ type: 'log',
4671
+ line: `[error] ${runnerEvent.message}`,
4672
+ ts: runnerEvent.ts,
4673
+ }).catch(() => undefined);
4575
4674
  }
4576
4675
  },
4577
4676
  step,
4578
- { persistResultDatasets: !req.playCallGovernance, abortController },
4677
+ {
4678
+ persistResultDatasets: !req.playCallGovernance,
4679
+ abortController,
4680
+ waitUntil: (promise) => this.ctx.waitUntil(promise),
4681
+ },
4579
4682
  )) as Record<string, unknown>;
4580
4683
  } catch (error) {
4581
4684
  // CF Workflows + the dynamic-workflows framework swallow the error