deepline 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,6 +56,7 @@ import {
56
56
  type ToolResultMetadataInput,
57
57
  } from '../../../shared_libs/play-runtime/tool-result';
58
58
  import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
59
+ import type { PreloadedRuntimeDbSession } from '../../../shared_libs/play-runtime/db-session';
59
60
  import type { PlayRuntimeManifestMap } from '../../../shared_libs/plays/compiler-manifest';
60
61
  import {
61
62
  derivePlayRowIdentity,
@@ -63,6 +64,7 @@ import {
63
64
  } from '../../../shared_libs/plays/row-identity';
64
65
  import {
65
66
  getCompiledPipelineSubsteps,
67
+ flattenStaticPipeline,
66
68
  resolveSheetContractForTableNamespace,
67
69
  sqlSafePlayColumnName,
68
70
  type PlayStaticPipeline,
@@ -79,6 +81,8 @@ import {
79
81
  // re-bundle harness internals into per-play. Keep that in mind.
80
82
  import {
81
83
  harnessFetchStagedFile,
84
+ harnessPersistCompletedSheetRows,
85
+ harnessPrewarmPostgresSessions,
82
86
  harnessStartSheetDataset,
83
87
  setHarnessBinding,
84
88
  } from '../../../sdk/src/plays/harness-stub';
@@ -128,6 +132,7 @@ type RunRequest = {
128
132
  childPlayManifests?: PlayRuntimeManifestMap | null;
129
133
  /** Internal ctx.runPlay lineage. Public SDK/users never see this. */
130
134
  playCallGovernance?: PlayCallGovernanceSnapshot | null;
135
+ preloadedDbSessions?: PreloadedRuntimeDbSession[] | null;
131
136
  /** Cloudflare coordinator URL for direct Workflow control-plane signals. */
132
137
  coordinatorUrl?: string | null;
133
138
  /** Request-scoped coordinator auth token for preview/dev direct control calls. */
@@ -190,6 +195,10 @@ type WorkerEnv = {
190
195
  runId: string,
191
196
  payload: Record<string, unknown>,
192
197
  ): Promise<void>;
198
+ recordRunEvent(
199
+ runId: string,
200
+ event: Record<string, unknown>,
201
+ ): Promise<void>;
193
202
  };
194
203
  /**
195
204
  * Service binding to the long-lived Play Harness Worker
@@ -365,8 +374,9 @@ function cachedVercelProtectionBypassToken(): string | null {
365
374
 
366
375
  const WORKER_PLAY_CALL_LIMITS = {
367
376
  maxPlayCallDepth: 6,
368
- maxPlayCallCount: 32,
369
- maxChildPlayCallsPerParent: 16,
377
+ maxPlayCallCount: 1_000,
378
+ maxChildPlayCallsPerParent: 1_000,
379
+ maxConcurrentPlayCalls: 16,
370
380
  };
371
381
 
372
382
  /**
@@ -518,6 +528,34 @@ function nowMs(): number {
518
528
  return Date.now();
519
529
  }
520
530
 
531
+ function recordRunnerPerfTrace(input: {
532
+ req: RunRequest;
533
+ phase: string;
534
+ ms?: number;
535
+ extra?: Record<string, unknown>;
536
+ }): void {
537
+ if (!input.req.runId || !input.phase) return;
538
+ const payload = {
539
+ ts: Date.now(),
540
+ source: 'dynamic_worker' as const,
541
+ runId: input.req.runId,
542
+ phase: `runner.${input.phase}`,
543
+ ...(input.ms !== undefined ? { ms: input.ms } : {}),
544
+ ...(input.extra ?? {}),
545
+ };
546
+ console.log(
547
+ `[deepline-run:${input.req.runId}] [perf-trace] ${JSON.stringify(payload)}`,
548
+ );
549
+ cachedCoordinatorBinding
550
+ ?.recordPerfTrace(input.req.runId, payload)
551
+ .catch((error: unknown) => {
552
+ const message = error instanceof Error ? error.message : String(error);
553
+ console.warn(
554
+ `[deepline-run:${input.req.runId}] failed to forward runner perf trace: ${message}`,
555
+ );
556
+ });
557
+ }
558
+
521
559
  function makeRequestId(): string {
522
560
  // Workers crypto.randomUUID is available without nodejs_compat.
523
561
  return crypto.randomUUID();
@@ -637,6 +675,7 @@ async function postRuntimeApiBestEffort(
637
675
  async function submitChildPlayThroughCoordinator(input: {
638
676
  req: RunRequest;
639
677
  body: unknown;
678
+ allowInline?: boolean;
640
679
  }): Promise<{
641
680
  workflowId?: string;
642
681
  runId?: string;
@@ -648,7 +687,7 @@ async function submitChildPlayThroughCoordinator(input: {
648
687
  logs?: string[];
649
688
  timings?: Array<{ phase: string; ms: number }>;
650
689
  }> {
651
- if (cachedCoordinatorBinding) {
690
+ if (cachedCoordinatorBinding && input.allowInline !== false) {
652
691
  if (!isRecord(input.body)) {
653
692
  throw new Error('ctx.runPlay child submit requires an object body.');
654
693
  }
@@ -657,7 +696,7 @@ async function submitChildPlayThroughCoordinator(input: {
657
696
  const coordinatorUrl = input.req.coordinatorUrl?.trim();
658
697
  if (coordinatorUrl) {
659
698
  // Keep child plays on the same coordinator/Workflow submit path as
660
- // top-level runs; the RPC binding remains only as a no-URL fallback.
699
+ // top-level runs when the coordinator URL is present.
661
700
  const res = await fetch(
662
701
  `${coordinatorUrl.replace(/\/$/, '')}/workflow/${encodeURIComponent(
663
702
  input.req.runId,
@@ -1414,7 +1453,23 @@ class WorkerToolBatchScheduler {
1414
1453
  workflowStep?: WorkflowStep,
1415
1454
  ): Promise<unknown> {
1416
1455
  return new Promise((resolve, reject) => {
1417
- this.queue.push({ id, toolId, input, workflowStep, resolve, reject });
1456
+ const queuedAt = nowMs();
1457
+ this.queue.push({
1458
+ id,
1459
+ toolId,
1460
+ input,
1461
+ workflowStep,
1462
+ resolve: (value) => {
1463
+ recordRunnerPerfTrace({
1464
+ req: this.req,
1465
+ phase: 'runner.tool.request',
1466
+ ms: nowMs() - queuedAt,
1467
+ extra: { id, toolId },
1468
+ });
1469
+ resolve(value);
1470
+ },
1471
+ reject,
1472
+ });
1418
1473
  this.scheduleDrain();
1419
1474
  });
1420
1475
  }
@@ -1444,6 +1499,7 @@ class WorkerToolBatchScheduler {
1444
1499
  const requests = this.queue;
1445
1500
  this.queue = [];
1446
1501
  this.scheduled = false;
1502
+ const drainStartedAt = nowMs();
1447
1503
  await Promise.all(
1448
1504
  [...groupWorkerToolRequestsByTool(requests).entries()].map(
1449
1505
  async ([toolId, groupedRequests]) => {
@@ -1451,6 +1507,12 @@ class WorkerToolBatchScheduler {
1451
1507
  },
1452
1508
  ),
1453
1509
  );
1510
+ recordRunnerPerfTrace({
1511
+ req: this.req,
1512
+ phase: 'runner.tool.drain',
1513
+ ms: nowMs() - drainStartedAt,
1514
+ extra: { requests: requests.length },
1515
+ });
1454
1516
  if (this.queue.length > 0) {
1455
1517
  this.scheduleDrain();
1456
1518
  }
@@ -1462,6 +1524,7 @@ class WorkerToolBatchScheduler {
1462
1524
  ): Promise<void> {
1463
1525
  const strategy = getDefaultPlayRuntimeBatchStrategy(toolId);
1464
1526
  if (!strategy || toolId === 'test_wait_for_event') {
1527
+ const groupStartedAt = nowMs();
1465
1528
  await Promise.all(
1466
1529
  requests.map(async (request) => {
1467
1530
  try {
@@ -1477,14 +1540,27 @@ class WorkerToolBatchScheduler {
1477
1540
  }
1478
1541
  }),
1479
1542
  );
1543
+ recordRunnerPerfTrace({
1544
+ req: this.req,
1545
+ phase: 'runner.tool.group',
1546
+ ms: nowMs() - groupStartedAt,
1547
+ extra: { toolId, requests: requests.length, batched: false },
1548
+ });
1480
1549
  return;
1481
1550
  }
1482
1551
 
1552
+ const batchStartedAt = nowMs();
1483
1553
  await executeBatchedWorkerToolGroup({
1484
1554
  req: this.req,
1485
1555
  requests,
1486
1556
  strategy,
1487
1557
  });
1558
+ recordRunnerPerfTrace({
1559
+ req: this.req,
1560
+ phase: 'runner.tool.group',
1561
+ ms: nowMs() - batchStartedAt,
1562
+ extra: { toolId, requests: requests.length, batched: true },
1563
+ });
1488
1564
  }
1489
1565
  }
1490
1566
 
@@ -2289,7 +2365,7 @@ function isStreamingDataset<T extends Record<string, unknown>>(
2289
2365
 
2290
2366
  /**
2291
2367
  * Resolve the sheet contract for a (play, namespace) from the request's
2292
- * contractSnapshot. Required by both direct-Neon writes and the HTTP fallback.
2368
+ * contractSnapshot. Required by direct runtime sheet persistence.
2293
2369
  */
2294
2370
  function resolveSheetContractFromReq(
2295
2371
  req: RunRequest,
@@ -2317,13 +2393,13 @@ function resolveSheetContractFromReq(
2317
2393
  }
2318
2394
 
2319
2395
  /**
2320
- * Direct-Neon writes from the Workers harness. Resolves the org runtime
2321
- * Postgres URL via `create_db_session` (cached by runtime-api per
2322
- * runId+namespace), then runs the same mega-CTE SQL Daytona uses but over
2323
- * `@neondatabase/serverless` so it runs inside the V8 isolate.
2396
+ * Direct-Neon writes from the shared harness Worker. Resolves the org runtime
2397
+ * Postgres URL via `create_db_session` inside that long-lived Worker, then
2398
+ * runs the same row persistence path Daytona uses without bundling Neon into
2399
+ * every per-play dynamic Worker.
2324
2400
  *
2325
- * No HTTP fallback: if the contract/session can't be obtained, we want a
2326
- * loud failure rather than silently regressing to per-chunk Vercel hops.
2401
+ * If the contract/session can't be obtained, fail loudly rather than silently
2402
+ * regressing to per-chunk Vercel hops.
2327
2403
  */
2328
2404
  function requireSheetContract(
2329
2405
  req: RunRequest,
@@ -2348,12 +2424,9 @@ async function persistCompletedMapRows(input: {
2348
2424
  extraOutputFields?: string[];
2349
2425
  }): Promise<void> {
2350
2426
  if (input.rows.length === 0) return;
2351
- await postRuntimeApi<{
2352
- ok: true;
2353
- rowsWritten: number;
2354
- tableNamespace: string;
2355
- }>(input.req.baseUrl, input.req.executorToken, {
2356
- action: 'persist_completed_sheet_rows',
2427
+ await harnessPersistCompletedSheetRows({
2428
+ baseUrl: input.req.baseUrl,
2429
+ executorToken: input.req.executorToken,
2357
2430
  playName: input.req.playName,
2358
2431
  tableNamespace: input.tableNamespace,
2359
2432
  sheetContract: requireSheetContract(input.req, input.tableNamespace),
@@ -2365,6 +2438,7 @@ async function persistCompletedMapRows(input: {
2365
2438
  ),
2366
2439
  ],
2367
2440
  runId: input.req.runId,
2441
+ userEmail: input.req.userEmail,
2368
2442
  });
2369
2443
  }
2370
2444
 
@@ -2381,19 +2455,15 @@ async function prepareMapRows(input: {
2381
2455
  if (input.rows.length === 0) {
2382
2456
  return { inserted: 0, skipped: 0, pendingRows: [], completedRows: [] };
2383
2457
  }
2384
- const result = await postRuntimeApi<{
2385
- inserted: number;
2386
- skipped: number;
2387
- pendingRows: Record<string, unknown>[];
2388
- completedRows: Record<string, unknown>[];
2389
- tableNamespace: string;
2390
- }>(input.req.baseUrl, input.req.executorToken, {
2391
- action: 'start_sheet_dataset',
2458
+ const result = await harnessStartSheetDataset({
2459
+ baseUrl: input.req.baseUrl,
2460
+ executorToken: input.req.executorToken,
2392
2461
  playName: input.req.playName,
2393
2462
  tableNamespace: input.tableNamespace,
2394
2463
  sheetContract: requireSheetContract(input.req, input.tableNamespace),
2395
2464
  rows: input.rows.map((row) => ({ ...row })),
2396
2465
  runId: input.req.runId,
2466
+ userEmail: input.req.userEmail,
2397
2467
  });
2398
2468
  return {
2399
2469
  inserted: result.inserted,
@@ -2461,6 +2531,18 @@ function childPipelineUsesCtxMap(
2461
2531
  );
2462
2532
  }
2463
2533
 
2534
+ function childPipelineNeedsWorkflowScheduler(
2535
+ pipeline: PlayStaticPipeline | null | undefined,
2536
+ ): boolean {
2537
+ if (!pipeline) return false;
2538
+ return flattenStaticPipeline(pipeline).some(
2539
+ (substep) =>
2540
+ substep.type === 'tool' &&
2541
+ (substep.isEventWait === true ||
2542
+ substep.toolId === 'test_wait_for_event'),
2543
+ );
2544
+ }
2545
+
2464
2546
  function releaseChildPlayConcurrency(
2465
2547
  inFlightByPlayName: Record<string, number>,
2466
2548
  playName: string,
@@ -2483,6 +2565,41 @@ function createMinimalWorkerCtx(
2483
2565
  let playCallCount = 0;
2484
2566
  const parentChildCalls: Record<string, number> = {};
2485
2567
  const inFlightChildCallsByPlayName: Record<string, number> = {};
2568
+ let inFlightChildPlayCalls = 0;
2569
+ const childPlaySlotWaiters: Array<() => void> = [];
2570
+
2571
+ const acquireChildPlaySlot = async (): Promise<() => void> => {
2572
+ while (
2573
+ inFlightChildPlayCalls >= WORKER_PLAY_CALL_LIMITS.maxConcurrentPlayCalls
2574
+ ) {
2575
+ await new Promise<void>((resolve, reject) => {
2576
+ const waiter = () => {
2577
+ abortSignal?.removeEventListener('abort', onAbort);
2578
+ resolve();
2579
+ };
2580
+ const onAbort = () => {
2581
+ const index = childPlaySlotWaiters.indexOf(waiter);
2582
+ if (index >= 0) childPlaySlotWaiters.splice(index, 1);
2583
+ reject(
2584
+ abortSignal?.reason instanceof Error
2585
+ ? abortSignal.reason
2586
+ : new WorkflowAbortError(),
2587
+ );
2588
+ };
2589
+ childPlaySlotWaiters.push(waiter);
2590
+ abortSignal?.addEventListener('abort', onAbort, { once: true });
2591
+ });
2592
+ assertNotAborted(abortSignal);
2593
+ }
2594
+ inFlightChildPlayCalls += 1;
2595
+ let released = false;
2596
+ return () => {
2597
+ if (released) return;
2598
+ released = true;
2599
+ inFlightChildPlayCalls = Math.max(0, inFlightChildPlayCalls - 1);
2600
+ childPlaySlotWaiters.shift()?.();
2601
+ };
2602
+ };
2486
2603
  const rootGovernance = req.playCallGovernance;
2487
2604
  const rootRunId = rootGovernance?.rootRunId ?? req.runId;
2488
2605
  // Local ancestry chain that always ENDS with the currently-executing play
@@ -2517,6 +2634,7 @@ function createMinimalWorkerCtx(
2517
2634
  >,
2518
2635
  opts?: WorkerMapOptions,
2519
2636
  ): Promise<unknown> => {
2637
+ const mapStartedAt = nowMs();
2520
2638
  const sliced = rows;
2521
2639
  const baseOffset = 0;
2522
2640
  const fieldEntries = Object.entries(fieldsDef);
@@ -2579,6 +2697,7 @@ function createMinimalWorkerCtx(
2579
2697
  : JSON.stringify(normalizedParts);
2580
2698
  return keyValue;
2581
2699
  };
2700
+ const mapLogicFingerprint = req.graphHash ?? null;
2582
2701
  const resolveRowKey = (
2583
2702
  row: Record<string, unknown>,
2584
2703
  index: number,
@@ -2586,8 +2705,12 @@ function createMinimalWorkerCtx(
2586
2705
  const inputRow = publicCsvInputRow(row);
2587
2706
  const explicitKeyValue = resolveExplicitKeyValue(row, index);
2588
2707
  return explicitKeyValue == null
2589
- ? derivePlayRowIdentity(inputRow, name)
2590
- : derivePlayRowIdentityFromKey(explicitKeyValue, name);
2708
+ ? derivePlayRowIdentity(inputRow, name, mapLogicFingerprint)
2709
+ : derivePlayRowIdentityFromKey(
2710
+ explicitKeyValue,
2711
+ name,
2712
+ mapLogicFingerprint,
2713
+ );
2591
2714
  };
2592
2715
  const assertUniqueExplicitRowKeys = (
2593
2716
  chunkRows: readonly Record<string, unknown>[],
@@ -2614,12 +2737,21 @@ function createMinimalWorkerCtx(
2614
2737
  chunkStart: number,
2615
2738
  chunkIndex: number,
2616
2739
  ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
2740
+ const chunkStartedAt = nowMs();
2617
2741
  assertNotAborted(abortSignal);
2742
+ const keyStartedAt = nowMs();
2618
2743
  const chunkEntries = chunkRows.map((row, localIndex) => {
2619
2744
  const absoluteIndex = baseOffset + chunkStart + localIndex;
2620
2745
  const rowKey = resolveRowKey(row, absoluteIndex);
2621
2746
  return { row, absoluteIndex, rowKey };
2622
2747
  });
2748
+ recordRunnerPerfTrace({
2749
+ req,
2750
+ phase: 'runner.map_chunk.keys',
2751
+ ms: nowMs() - keyStartedAt,
2752
+ extra: { mapName: name, chunkIndex, rows: chunkRows.length },
2753
+ });
2754
+ const prepareStartedAt = nowMs();
2623
2755
  const prepared = await prepareMapRows({
2624
2756
  req,
2625
2757
  tableNamespace: name,
@@ -2628,6 +2760,20 @@ function createMinimalWorkerCtx(
2628
2760
  __deeplineRowKey: rowKey,
2629
2761
  })),
2630
2762
  });
2763
+ recordRunnerPerfTrace({
2764
+ req,
2765
+ phase: 'runner.map_chunk.prepare_rows',
2766
+ ms: nowMs() - prepareStartedAt,
2767
+ extra: {
2768
+ mapName: name,
2769
+ chunkIndex,
2770
+ rows: chunkRows.length,
2771
+ inserted: prepared.inserted,
2772
+ skipped: prepared.skipped,
2773
+ pendingRows: prepared.pendingRows.length,
2774
+ completedRows: prepared.completedRows.length,
2775
+ },
2776
+ });
2631
2777
  const pendingKeys = new Set<string>();
2632
2778
  const completedKeys = new Set<string>();
2633
2779
  const preparedKeys = new Set<string>();
@@ -2635,7 +2781,11 @@ function createMinimalWorkerCtx(
2635
2781
  const key =
2636
2782
  typeof row.__deeplineRowKey === 'string'
2637
2783
  ? row.__deeplineRowKey
2638
- : derivePlayRowIdentity(publicCsvInputRow(row), name);
2784
+ : derivePlayRowIdentity(
2785
+ publicCsvInputRow(row),
2786
+ name,
2787
+ mapLogicFingerprint,
2788
+ );
2639
2789
  if (key) {
2640
2790
  pendingKeys.add(key);
2641
2791
  preparedKeys.add(key);
@@ -2645,7 +2795,11 @@ function createMinimalWorkerCtx(
2645
2795
  const key =
2646
2796
  typeof row.__deeplineRowKey === 'string'
2647
2797
  ? row.__deeplineRowKey
2648
- : derivePlayRowIdentity(publicCsvInputRow(row), name);
2798
+ : derivePlayRowIdentity(
2799
+ publicCsvInputRow(row),
2800
+ name,
2801
+ mapLogicFingerprint,
2802
+ );
2649
2803
  if (key) {
2650
2804
  completedKeys.add(key);
2651
2805
  preparedKeys.add(key);
@@ -2838,8 +2992,56 @@ function createMinimalWorkerCtx(
2838
2992
  })),
2839
2993
  });
2840
2994
  };
2995
+ const workersStartedAt = nowMs();
2841
2996
  const workerResults = await Promise.allSettled(workers);
2842
- await persistExecutedRows();
2997
+ recordRunnerPerfTrace({
2998
+ req,
2999
+ phase: 'runner.map_chunk.execute_workers',
3000
+ ms: nowMs() - workersStartedAt,
3001
+ extra: {
3002
+ mapName: name,
3003
+ chunkIndex,
3004
+ rowsToExecute: rowsToExecute.length,
3005
+ concurrency,
3006
+ },
3007
+ });
3008
+ const persistRowsStartedAt = nowMs();
3009
+ recordRunnerPerfTrace({
3010
+ req,
3011
+ phase: 'runner.map_chunk.persist_rows_start',
3012
+ ms: 0,
3013
+ extra: {
3014
+ mapName: name,
3015
+ chunkIndex,
3016
+ rowsToExecute: rowsToExecute.length,
3017
+ },
3018
+ });
3019
+ try {
3020
+ await persistExecutedRows();
3021
+ recordRunnerPerfTrace({
3022
+ req,
3023
+ phase: 'runner.map_chunk.persist_rows',
3024
+ ms: nowMs() - persistRowsStartedAt,
3025
+ extra: {
3026
+ mapName: name,
3027
+ chunkIndex,
3028
+ rowsToExecute: rowsToExecute.length,
3029
+ },
3030
+ });
3031
+ } catch (error) {
3032
+ recordRunnerPerfTrace({
3033
+ req,
3034
+ phase: 'runner.map_chunk.persist_rows_error',
3035
+ ms: nowMs() - persistRowsStartedAt,
3036
+ extra: {
3037
+ mapName: name,
3038
+ chunkIndex,
3039
+ rowsToExecute: rowsToExecute.length,
3040
+ error: error instanceof Error ? error.message : String(error),
3041
+ },
3042
+ });
3043
+ throw error;
3044
+ }
2843
3045
  const rejectedWorker = workerResults.find(
2844
3046
  (result): result is PromiseRejectedResult =>
2845
3047
  result.status === 'rejected',
@@ -2852,7 +3054,11 @@ function createMinimalWorkerCtx(
2852
3054
  const key =
2853
3055
  typeof completedRow.__deeplineRowKey === 'string'
2854
3056
  ? completedRow.__deeplineRowKey
2855
- : derivePlayRowIdentity(publicCsvInputRow(completedRow), name);
3057
+ : derivePlayRowIdentity(
3058
+ publicCsvInputRow(completedRow),
3059
+ name,
3060
+ mapLogicFingerprint,
3061
+ );
2856
3062
  if (key) {
2857
3063
  const { __deeplineRowKey: _rowKey, ...cleanedRow } =
2858
3064
  publicCsvInputRow(completedRow);
@@ -2875,6 +3081,27 @@ function createMinimalWorkerCtx(
2875
3081
  return resultByKey.get(key);
2876
3082
  })
2877
3083
  .filter((row): row is T & Record<string, unknown> => Boolean(row));
3084
+ const hashStartedAt = nowMs();
3085
+ const hash = await hashJson(out);
3086
+ recordRunnerPerfTrace({
3087
+ req,
3088
+ phase: 'runner.map_chunk.hash',
3089
+ ms: nowMs() - hashStartedAt,
3090
+ extra: { mapName: name, chunkIndex, rows: out.length },
3091
+ });
3092
+ recordRunnerPerfTrace({
3093
+ req,
3094
+ phase: 'runner.map_chunk.total',
3095
+ ms: nowMs() - chunkStartedAt,
3096
+ extra: {
3097
+ mapName: name,
3098
+ chunkIndex,
3099
+ rowsRead: chunkRows.length,
3100
+ rowsWritten: out.length,
3101
+ rowsExecuted: executedRows.length,
3102
+ rowsCached: Math.max(0, out.length - executedRows.length),
3103
+ },
3104
+ });
2878
3105
  return {
2879
3106
  chunkIndex,
2880
3107
  rangeStart: baseOffset + chunkStart,
@@ -2887,7 +3114,7 @@ function createMinimalWorkerCtx(
2887
3114
  rowsInserted,
2888
3115
  rowsSkipped,
2889
3116
  outputDatasetId: `map:${name}`,
2890
- hash: await hashJson(out),
3117
+ hash,
2891
3118
  preview: toWorkflowSerializableValue(out.slice(0, 5)),
2892
3119
  };
2893
3120
  };
@@ -2911,7 +3138,7 @@ function createMinimalWorkerCtx(
2911
3138
  workflowStep.do as unknown as (
2912
3139
  name: string,
2913
3140
  config: {
2914
- retries: { limit: number; delay: string; backoff: 'exponential' };
3141
+ retries: { limit: number; delay: number; backoff: 'exponential' };
2915
3142
  },
2916
3143
  callback: () => Promise<
2917
3144
  WorkerMapChunkSummary<T & Record<string, unknown>>
@@ -2919,7 +3146,7 @@ function createMinimalWorkerCtx(
2919
3146
  ) => Promise<WorkerMapChunkSummary<T & Record<string, unknown>>>
2920
3147
  )(
2921
3148
  deterministicMapChunkStepName({ mapName: name, chunkIndex }),
2922
- { retries: { limit: 5, delay: '5 seconds', backoff: 'exponential' } },
3149
+ { retries: { limit: 5, delay: 100, backoff: 'exponential' } },
2923
3150
  async () => await processChunk(chunkRows, chunkStart, chunkIndex),
2924
3151
  )) as WorkerMapChunkSummary<T & Record<string, unknown>>;
2925
3152
  };
@@ -2978,7 +3205,14 @@ function createMinimalWorkerCtx(
2978
3205
  chunkStart += chunkRows.length;
2979
3206
  chunkIndex += 1;
2980
3207
  }
2981
- return finalize(totalRowsWritten);
3208
+ const dataset = finalize(totalRowsWritten);
3209
+ recordRunnerPerfTrace({
3210
+ req,
3211
+ phase: 'runner.map.total',
3212
+ ms: nowMs() - mapStartedAt,
3213
+ extra: { mapName: name, rowsWritten: totalRowsWritten, streaming: true },
3214
+ });
3215
+ return dataset;
2982
3216
  }
2983
3217
 
2984
3218
  if (workflowStep && sliced.length > rowsPerChunk) {
@@ -3000,7 +3234,14 @@ function createMinimalWorkerCtx(
3000
3234
  out.push(...chunkResult.preview.slice(0, 10 - out.length));
3001
3235
  }
3002
3236
  }
3003
- return finalize(totalRowsWritten);
3237
+ const dataset = finalize(totalRowsWritten);
3238
+ recordRunnerPerfTrace({
3239
+ req,
3240
+ phase: 'runner.map.total',
3241
+ ms: nowMs() - mapStartedAt,
3242
+ extra: { mapName: name, rowsWritten: totalRowsWritten, streaming: false },
3243
+ });
3244
+ return dataset;
3004
3245
  }
3005
3246
 
3006
3247
  assertUniqueExplicitRowKeys(sliced, 0);
@@ -3011,7 +3252,18 @@ function createMinimalWorkerCtx(
3011
3252
  totalRowsInserted = chunkResult.rowsInserted;
3012
3253
  totalRowsSkipped = chunkResult.rowsSkipped;
3013
3254
  out.push(...chunkResult.preview);
3014
- return finalize(chunkResult.rowsWritten);
3255
+ const dataset = finalize(chunkResult.rowsWritten);
3256
+ recordRunnerPerfTrace({
3257
+ req,
3258
+ phase: 'runner.map.total',
3259
+ ms: nowMs() - mapStartedAt,
3260
+ extra: {
3261
+ mapName: name,
3262
+ rowsWritten: chunkResult.rowsWritten,
3263
+ streaming: false,
3264
+ },
3265
+ });
3266
+ return dataset;
3015
3267
  };
3016
3268
 
3017
3269
  class WorkerMapBuilder<T extends Record<string, unknown>> {
@@ -3098,28 +3350,43 @@ function createMinimalWorkerCtx(
3098
3350
  arg: unknown,
3099
3351
  options?: CsvRenameOptions,
3100
3352
  ): Promise<T[]> {
3353
+ const csvStartedAt = nowMs();
3101
3354
  if (Array.isArray(arg)) {
3102
3355
  // Inline rows passed at call site — already in memory, keep the
3103
3356
  // legacy array-backed dataset shape.
3104
- return makeWorkerDataset(
3357
+ const dataset = makeWorkerDataset(
3105
3358
  'csv',
3106
3359
  applyCsvRenameProjection(arg as T[], options),
3107
3360
  {
3108
3361
  datasetKind: 'csv',
3109
3362
  },
3110
3363
  ) as unknown as T[];
3364
+ recordRunnerPerfTrace({
3365
+ req,
3366
+ phase: 'runner.csv',
3367
+ ms: nowMs() - csvStartedAt,
3368
+ extra: { mode: 'inline_array', rows: arg.length },
3369
+ });
3370
+ return dataset;
3111
3371
  }
3112
3372
  const filename = String(arg ?? '');
3113
3373
  if (req.inlineCsv && filename === req.inlineCsv.name) {
3114
3374
  // Inline CSV pre-staged by the dispatcher (small files <1 MiB). Already
3115
3375
  // in memory; no streaming needed.
3116
- return makeWorkerDataset(
3376
+ const dataset = makeWorkerDataset(
3117
3377
  'csv',
3118
3378
  applyCsvRenameProjection(req.inlineCsv.rows as T[], options),
3119
3379
  {
3120
3380
  datasetKind: 'csv',
3121
3381
  },
3122
3382
  ) as unknown as T[];
3383
+ recordRunnerPerfTrace({
3384
+ req,
3385
+ phase: 'runner.csv',
3386
+ ms: nowMs() - csvStartedAt,
3387
+ extra: { mode: 'inline_csv', rows: req.inlineCsv.rows.length },
3388
+ });
3389
+ return dataset;
3123
3390
  }
3124
3391
  // Resolution order: explicit inputR2Keys (runtime input) → packaged
3125
3392
  // files (relative-path imports bundled with the play artifact).
@@ -3144,7 +3411,7 @@ function createMinimalWorkerCtx(
3144
3411
  // and switches its chunked execution loop to consume iterChunks
3145
3412
  // directly, so 2M-row CSVs never get fully materialized in memory.
3146
3413
  const storageKey = r2Key;
3147
- return makeStreamingCsvDataset<T>({
3414
+ const dataset = makeStreamingCsvDataset<T>({
3148
3415
  name: filename,
3149
3416
  logicalPath: filename,
3150
3417
  renameOptions: options,
@@ -3156,6 +3423,13 @@ function createMinimalWorkerCtx(
3156
3423
  storageKey,
3157
3424
  }),
3158
3425
  }) as unknown as T[];
3426
+ recordRunnerPerfTrace({
3427
+ req,
3428
+ phase: 'runner.csv',
3429
+ ms: nowMs() - csvStartedAt,
3430
+ extra: { mode: 'streaming_r2', filename },
3431
+ });
3432
+ return dataset;
3159
3433
  },
3160
3434
  map<T extends Record<string, unknown>>(
3161
3435
  name: string,
@@ -3186,320 +3460,6 @@ function createMinimalWorkerCtx(
3186
3460
  throw new Error(
3187
3461
  'ctx.map(key, rows, fields, options) was removed. Use ctx.map(key, rows).step(...).run(options).',
3188
3462
  );
3189
- /*
3190
- const sliced = rows;
3191
- const baseOffset = 0;
3192
- const fieldEntries = Object.entries(fieldsDef);
3193
- const plan = req.executionPlan;
3194
- const planMap = plan?.maps.find(
3195
- (candidate) =>
3196
- candidate.mapName === name || candidate.tableNamespace === name,
3197
- );
3198
- const streaming = isStreamingDataset<T>(sliced);
3199
- // For streaming inputs we don't know the row count upfront — pass
3200
- // `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
3201
- // default chunk size rather than trying to budget against an unknown.
3202
- const rowsPerChunk = chooseMapChunkSize({
3203
- totalRows: streaming ? 0 : sliced.length,
3204
- mapCount: Math.max(1, plan?.maps.length ?? 1),
3205
- stepsPerChunk: planMap?.stepsPerChunk ?? 1,
3206
- preferredChunkSize: planMap?.defaultChunkSize,
3207
- softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
3208
- });
3209
- const outputFields = fieldEntries.map(([field]) => field);
3210
-
3211
- // Workflow steps have bounded CPU but unbounded wall time, so long
3212
- // network calls are OK here as long as we checkpoint by chunk and do not
3213
- // collapse a giant map into one monolithic step result.
3214
-
3215
- const processChunk = async (
3216
- chunkRows: T[],
3217
- chunkStart: number,
3218
- chunkIndex: number,
3219
- ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
3220
- assertNotAborted(abortSignal);
3221
- const prepared = await prepareMapRows({
3222
- req,
3223
- tableNamespace: name,
3224
- rows: chunkRows,
3225
- });
3226
- const pendingKeys = new Set<string>();
3227
- const completedKeys = new Set<string>();
3228
- const preparedKeys = new Set<string>();
3229
- for (const row of prepared.pendingRows) {
3230
- const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3231
- if (key) {
3232
- pendingKeys.add(key);
3233
- preparedKeys.add(key);
3234
- }
3235
- }
3236
- for (const row of prepared.completedRows) {
3237
- const key =
3238
- typeof row.__deeplineRowKey === 'string'
3239
- ? row.__deeplineRowKey
3240
- : derivePlayRowIdentity(publicCsvInputRow(row), name);
3241
- if (key) {
3242
- completedKeys.add(key);
3243
- preparedKeys.add(key);
3244
- }
3245
- }
3246
- const missingPreparedRows = chunkRows.filter((row) => {
3247
- const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3248
- return !key || !preparedKeys.has(key);
3249
- });
3250
- const rowsToExecute = chunkRows.filter((row) => {
3251
- const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3252
- return !key || pendingKeys.has(key) || !completedKeys.has(key);
3253
- });
3254
- const rowsInserted = prepared.inserted + missingPreparedRows.length;
3255
- const rowsSkipped = Math.max(
3256
- 0,
3257
- prepared.skipped - missingPreparedRows.length,
3258
- );
3259
- const concurrency = Math.max(1, Math.min(opts?.concurrency ?? 10, 100));
3260
- const executedRows: Array<T & Record<string, unknown>> = new Array(
3261
- rowsToExecute.length,
3262
- );
3263
- const generatedOutputFields = new Set<string>();
3264
- let idx = 0;
3265
- const workers: Array<Promise<void>> = [];
3266
- for (let w = 0; w < concurrency; w += 1) {
3267
- workers.push(
3268
- (async () => {
3269
- while (true) {
3270
- if (abortSignal?.aborted) return;
3271
- const myIndex = idx++;
3272
- if (myIndex >= rowsToExecute.length) return;
3273
- const row = rowsToExecute[myIndex]!;
3274
- const absoluteIndex = baseOffset + chunkStart + myIndex;
3275
- const enriched: Record<string, unknown> = cloneCsvAliasedRow(row);
3276
- const fieldOutputs: Record<string, unknown> = {};
3277
- const waterfallOutputs: RecordedWaterfallOutput[] = [];
3278
- const rowCtx = {
3279
- ...(ctx as Record<string, unknown>),
3280
- waterfall: (
3281
- toolNameOrSpec: string | WorkerInlineWaterfallSpec,
3282
- waterfallInput: Record<string, unknown>,
3283
- waterfallOpts?: WorkerWaterfallOptions,
3284
- ) =>
3285
- executeWorkerWaterfall(
3286
- req,
3287
- waterfallOutputs,
3288
- toolNameOrSpec,
3289
- waterfallInput,
3290
- waterfallOpts,
3291
- ),
3292
- };
3293
- for (const [key, value] of fieldEntries) {
3294
- if (typeof value === 'function') {
3295
- const resolved = await (
3296
- value as (
3297
- r: T,
3298
- c: unknown,
3299
- f: Record<string, unknown>,
3300
- i: number,
3301
- ) => Promise<unknown> | unknown
3302
- )(row, rowCtx, fieldOutputs, absoluteIndex);
3303
- enriched[key] = resolved;
3304
- fieldOutputs[key] = resolved;
3305
- } else {
3306
- enriched[key] = value;
3307
- fieldOutputs[key] = value;
3308
- }
3309
- }
3310
- for (const waterfallOutput of waterfallOutputs) {
3311
- const columnName =
3312
- `${sqlishIdentifierPart(waterfallOutput.waterfallId)}__` +
3313
- sqlishIdentifierPart(waterfallOutput.stepId);
3314
- enriched[columnName] = waterfallOutput.value;
3315
- generatedOutputFields.add(columnName);
3316
- }
3317
- executedRows[myIndex] = enriched as T & Record<string, unknown>;
3318
- }
3319
- })(),
3320
- );
3321
- }
3322
- await Promise.all(workers);
3323
- if (executedRows.length > 0) {
3324
- await persistCompletedMapRows({
3325
- req,
3326
- tableNamespace: name,
3327
- outputFields,
3328
- extraOutputFields: Array.from(generatedOutputFields),
3329
- rows: executedRows.map((row, executedIndex) => ({
3330
- ...row,
3331
- __deeplineRowKey: derivePlayRowIdentity(
3332
- publicCsvInputRow(rowsToExecute[executedIndex]!),
3333
- name,
3334
- ),
3335
- })),
3336
- });
3337
- }
3338
- const resultByKey = new Map<string, T & Record<string, unknown>>();
3339
- for (const completedRow of prepared.completedRows) {
3340
- const key =
3341
- typeof completedRow.__deeplineRowKey === 'string'
3342
- ? completedRow.__deeplineRowKey
3343
- : derivePlayRowIdentity(publicCsvInputRow(completedRow), name);
3344
- if (key) {
3345
- const { __deeplineRowKey: _rowKey, ...cleanedRow } =
3346
- publicCsvInputRow(completedRow);
3347
- void _rowKey;
3348
- resultByKey.set(key, cleanedRow as T & Record<string, unknown>);
3349
- }
3350
- }
3351
- for (
3352
- let executedIndex = 0;
3353
- executedIndex < executedRows.length;
3354
- executedIndex += 1
3355
- ) {
3356
- const executedRow = executedRows[executedIndex]!;
3357
- const key = derivePlayRowIdentity(
3358
- publicCsvInputRow(rowsToExecute[executedIndex]!),
3359
- name,
3360
- );
3361
- if (key) resultByKey.set(key, executedRow);
3362
- }
3363
- const out = chunkRows
3364
- .map((row) => {
3365
- const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3366
- return key ? resultByKey.get(key) : undefined;
3367
- })
3368
- .filter((row): row is T & Record<string, unknown> => Boolean(row));
3369
- return {
3370
- chunkIndex,
3371
- rangeStart: baseOffset + chunkStart,
3372
- rangeEnd: baseOffset + chunkStart + out.length,
3373
- rowsRead: chunkRows.length,
3374
- rowsWritten: out.length,
3375
- rowsExecuted: executedRows.length,
3376
- rowsCached: prepared.completedRows.length,
3377
- rowsInserted,
3378
- rowsSkipped,
3379
- outputDatasetId: `map:${name}`,
3380
- hash: await hashJson(out),
3381
- preview: toWorkflowSerializableValue(out.slice(0, 5)),
3382
- };
3383
- };
3384
-
3385
- const out: Array<T & Record<string, unknown>> = [];
3386
- let totalRowsExecuted = 0;
3387
- let totalRowsCached = 0;
3388
- let totalRowsInserted = 0;
3389
- let totalRowsSkipped = 0;
3390
-
3391
- const runChunkStep = async (
3392
- chunkRows: T[],
3393
- chunkStart: number,
3394
- chunkIndex: number,
3395
- ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
3396
- if (!workflowStep) {
3397
- return await processChunk(chunkRows, chunkStart, chunkIndex);
3398
- }
3399
- return (await (
3400
- workflowStep.do as unknown as (
3401
- name: string,
3402
- config: {
3403
- retries: { limit: number; delay: string; backoff: 'exponential' };
3404
- },
3405
- callback: () => Promise<
3406
- WorkerMapChunkSummary<T & Record<string, unknown>>
3407
- >,
3408
- ) => Promise<WorkerMapChunkSummary<T & Record<string, unknown>>>
3409
- )(
3410
- deterministicMapChunkStepName({ mapName: name, chunkIndex }),
3411
- { retries: { limit: 5, delay: '5 seconds', backoff: 'exponential' } },
3412
- async () => await processChunk(chunkRows, chunkStart, chunkIndex),
3413
- )) as WorkerMapChunkSummary<T & Record<string, unknown>>;
3414
- };
3415
-
3416
- const finalize = (totalRowsWritten: number) => {
3417
- const cacheSummary =
3418
- `Map completed: ${totalRowsWritten} results ` +
3419
- `(${totalRowsExecuted} executed, ${totalRowsCached} already satisfied) ` +
3420
- `inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
3421
- emitEvent({
3422
- type: 'log',
3423
- level: 'info',
3424
- message: cacheSummary,
3425
- ts: nowMs(),
3426
- });
3427
- return makeWorkerDataset(name, out, {
3428
- count: totalRowsWritten,
3429
- cacheSummary,
3430
- workProgress: {
3431
- total: totalRowsWritten,
3432
- executed: totalRowsExecuted,
3433
- reused: totalRowsCached,
3434
- skipped: totalRowsCached,
3435
- pending: 0,
3436
- failed: 0,
3437
- },
3438
- });
3439
- };
3440
-
3441
- // Streaming path: input came from `ctx.csv` over R2 and we never
3442
- // materialized the full row array. Pull row chunks from the dataset's
3443
- // iterChunks() and run each through processChunk inside its own
3444
- // workflow step. Memory stays bounded by `rowsPerChunk`.
3445
- if (streaming) {
3446
- let totalRowsWritten = 0;
3447
- let chunkIndex = 0;
3448
- let chunkStart = 0;
3449
- const streamingDataset = sliced as unknown as StreamingCsvDataset<T>;
3450
- for await (const chunkRows of streamingDataset.iterChunks(
3451
- rowsPerChunk,
3452
- )) {
3453
- assertNotAborted(abortSignal);
3454
- if (chunkRows.length === 0) continue;
3455
- const chunkResult = await runChunkStep(
3456
- chunkRows,
3457
- chunkStart,
3458
- chunkIndex,
3459
- );
3460
- totalRowsWritten += chunkResult.rowsWritten;
3461
- totalRowsExecuted += chunkResult.rowsExecuted;
3462
- totalRowsCached += chunkResult.rowsCached;
3463
- totalRowsInserted += chunkResult.rowsInserted;
3464
- totalRowsSkipped += chunkResult.rowsSkipped;
3465
- if (out.length < 10) {
3466
- out.push(...chunkResult.preview.slice(0, 10 - out.length));
3467
- }
3468
- chunkStart += chunkRows.length;
3469
- chunkIndex += 1;
3470
- }
3471
- return finalize(totalRowsWritten);
3472
- }
3473
-
3474
- // Materialized array path (inline rows or small CSV).
3475
- if (workflowStep && sliced.length > rowsPerChunk) {
3476
- let totalRowsWritten = 0;
3477
- for (let start = 0; start < sliced.length; start += rowsPerChunk) {
3478
- assertNotAborted(abortSignal);
3479
- const end = Math.min(sliced.length, start + rowsPerChunk);
3480
- const chunkRows = sliced.slice(start, end);
3481
- const chunkIndex = Math.floor(start / rowsPerChunk);
3482
- const chunkResult = await runChunkStep(chunkRows, start, chunkIndex);
3483
- totalRowsWritten += chunkResult.rowsWritten;
3484
- totalRowsExecuted += chunkResult.rowsExecuted;
3485
- totalRowsCached += chunkResult.rowsCached;
3486
- totalRowsInserted += chunkResult.rowsInserted;
3487
- totalRowsSkipped += chunkResult.rowsSkipped;
3488
- if (out.length < 10) {
3489
- out.push(...chunkResult.preview.slice(0, 10 - out.length));
3490
- }
3491
- }
3492
- return finalize(totalRowsWritten);
3493
- }
3494
-
3495
- const chunkResult = await processChunk(sliced, 0, 0);
3496
- totalRowsExecuted = chunkResult.rowsExecuted;
3497
- totalRowsCached = chunkResult.rowsCached;
3498
- totalRowsInserted = chunkResult.rowsInserted;
3499
- totalRowsSkipped = chunkResult.rowsSkipped;
3500
- out.push(...chunkResult.preview);
3501
- return finalize(chunkResult.rowsWritten);
3502
- */
3503
3463
  },
3504
3464
  tool: async (
3505
3465
  key: string,
@@ -3620,7 +3580,11 @@ function createMinimalWorkerCtx(
3620
3580
  const childIsMapBacked = childPipelineUsesCtxMap(
3621
3581
  childManifest.staticPipeline,
3622
3582
  );
3583
+ const childNeedsWorkflowScheduler = childPipelineNeedsWorkflowScheduler(
3584
+ childManifest.staticPipeline,
3585
+ );
3623
3586
  let childConcurrencyAcquired = false;
3587
+ let releaseChildPlaySlot: (() => void) | null = null;
3624
3588
  if (childIsMapBacked) {
3625
3589
  const nextInFlight =
3626
3590
  (inFlightChildCallsByPlayName[resolvedName] ?? 0) + 1;
@@ -3635,11 +3599,21 @@ function createMinimalWorkerCtx(
3635
3599
  childConcurrencyAcquired = true;
3636
3600
  }
3637
3601
  try {
3602
+ releaseChildPlaySlot = await acquireChildPlaySlot();
3638
3603
  const childSubmitStartedAt = nowMs();
3639
- let started: { workflowId?: string; runId?: string; error?: unknown };
3604
+ let started: {
3605
+ workflowId?: string;
3606
+ runId?: string;
3607
+ status?: string;
3608
+ output?: unknown;
3609
+ result?: unknown;
3610
+ error?: unknown;
3611
+ };
3640
3612
  try {
3641
3613
  started = await submitChildPlayThroughCoordinator({
3642
3614
  req,
3615
+ allowInline:
3616
+ options?.timeoutMs == null && !childNeedsWorkflowScheduler,
3643
3617
  body: {
3644
3618
  name: resolvedName,
3645
3619
  input: isRecord(input) ? input : {},
@@ -3709,6 +3683,27 @@ function createMinimalWorkerCtx(
3709
3683
  ms: nowMs() - childSubmitStartedAt,
3710
3684
  status: 'ok',
3711
3685
  });
3686
+ const startedStatus = String(started.status ?? '').toLowerCase();
3687
+ if (startedStatus === 'completed') {
3688
+ emitEvent({
3689
+ type: 'log',
3690
+ level: 'info',
3691
+ message: `Completed child play ${resolvedName} (${normalizedKey})`,
3692
+ ts: nowMs(),
3693
+ });
3694
+ return started.output ?? extractChildPlayOutput(started);
3695
+ }
3696
+ if (startedStatus === 'failed') {
3697
+ const startedError = isRecord(started.error)
3698
+ ? started.error
3699
+ : { message: started.error };
3700
+ const startedErrorMessage =
3701
+ typeof startedError.message === 'string' &&
3702
+ startedError.message.trim()
3703
+ ? startedError.message.trim()
3704
+ : `Child play ${resolvedName} (${workflowId}) failed.`;
3705
+ throw new Error(startedErrorMessage);
3706
+ }
3712
3707
  const childWaitStartedAt = nowMs();
3713
3708
  let result: unknown;
3714
3709
  try {
@@ -3761,6 +3756,7 @@ function createMinimalWorkerCtx(
3761
3756
  });
3762
3757
  return result;
3763
3758
  } finally {
3759
+ releaseChildPlaySlot?.();
3764
3760
  if (childConcurrencyAcquired) {
3765
3761
  releaseChildPlayConcurrency(
3766
3762
  inFlightChildCallsByPlayName,
@@ -3883,6 +3879,7 @@ async function executeRunRequest(
3883
3879
  workflowStep?: WorkflowStep,
3884
3880
  options?: {
3885
3881
  persistResultDatasets?: boolean;
3882
+ waitUntil?: (promise: Promise<unknown>) => void;
3886
3883
  /**
3887
3884
  * Cooperative cancellation token. CF Workflows surfaces termination as a
3888
3885
  * thrown error from any in-progress step; the harness catches that, flips
@@ -3893,8 +3890,29 @@ async function executeRunRequest(
3893
3890
  ): Promise<WorkflowRunOutput> {
3894
3891
  installProcessExitTrap();
3895
3892
  const startedAt = nowMs();
3893
+ recordRunnerPerfTrace({
3894
+ req,
3895
+ phase: 'runner.execute_start',
3896
+ extra: {
3897
+ persistResultDatasets: Boolean(options?.persistResultDatasets),
3898
+ hasWorkflowStep: Boolean(workflowStep),
3899
+ },
3900
+ });
3896
3901
  const abortController = options?.abortController ?? new AbortController();
3897
3902
  const abortSignal = abortController.signal;
3903
+ const postgresPrewarmStartedAt = nowMs();
3904
+ await harnessPrewarmPostgresSessions({
3905
+ executorToken: req.executorToken,
3906
+ sessions: req.preloadedDbSessions ?? [],
3907
+ });
3908
+ recordRunnerPerfTrace({
3909
+ req,
3910
+ phase: 'runner.prewarm_postgres',
3911
+ ms: nowMs() - postgresPrewarmStartedAt,
3912
+ extra: {
3913
+ sessions: req.preloadedDbSessions?.length ?? 0,
3914
+ },
3915
+ });
3898
3916
  // Maintain a rolling buffer of log lines emitted during the run. This is
3899
3917
  // what the play-page UI consumes via Convex polling + diffPlayRunStreamEvents
3900
3918
  // → play.run.log SSE events. Without periodic flushing, the play page only
@@ -3959,17 +3977,42 @@ async function executeRunRequest(
3959
3977
  abortSignal,
3960
3978
  );
3961
3979
  try {
3980
+ const playStartedAt = nowMs();
3962
3981
  const result = await (
3963
3982
  playFn as (
3964
3983
  ctx: unknown,
3965
3984
  input: Record<string, unknown>,
3966
3985
  ) => Promise<unknown>
3967
3986
  )(ctx, req.runtimeInput);
3987
+ recordRunnerPerfTrace({
3988
+ req,
3989
+ phase: 'runner.play_function',
3990
+ ms: nowMs() - playStartedAt,
3991
+ });
3992
+ const serializeStartedAt = nowMs();
3968
3993
  const serializedResult = serializePlayReturnValue(result);
3994
+ recordRunnerPerfTrace({
3995
+ req,
3996
+ phase: 'runner.serialize_result',
3997
+ ms: nowMs() - serializeStartedAt,
3998
+ });
3969
3999
  if (options?.persistResultDatasets) {
4000
+ const persistStartedAt = nowMs();
3970
4001
  await liveLogFlushInFlight.catch(() => undefined);
4002
+ recordRunnerPerfTrace({
4003
+ req,
4004
+ phase: 'runner.live_log_flush_wait',
4005
+ ms: nowMs() - persistStartedAt,
4006
+ });
4007
+ const resultDatasetStartedAt = nowMs();
3971
4008
  await persistResultDatasets(req, serializedResult);
4009
+ recordRunnerPerfTrace({
4010
+ req,
4011
+ phase: 'runner.persist_result_datasets',
4012
+ ms: nowMs() - resultDatasetStartedAt,
4013
+ });
3972
4014
  const terminalResult = trimResultForStatus(serializedResult);
4015
+ const terminalUpdateStartedAt = nowMs();
3973
4016
  await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
3974
4017
  action: 'update_run_status',
3975
4018
  playId: req.runId,
@@ -3982,12 +4025,24 @@ async function executeRunRequest(
3982
4025
  liveLogs,
3983
4026
  lastCheckpointAt: nowMs(),
3984
4027
  });
4028
+ recordRunnerPerfTrace({
4029
+ req,
4030
+ phase: 'runner.terminal_status_update',
4031
+ ms: nowMs() - terminalUpdateStartedAt,
4032
+ });
4033
+ const billingStartedAt = nowMs();
3985
4034
  await finalizeWorkerComputeBilling({
3986
4035
  req,
3987
4036
  success: true,
3988
4037
  actionEstimate: 4,
3989
4038
  });
4039
+ recordRunnerPerfTrace({
4040
+ req,
4041
+ phase: 'runner.compute_billing_finalize',
4042
+ ms: nowMs() - billingStartedAt,
4043
+ });
3990
4044
  }
4045
+ const parentSignalStartedAt = nowMs();
3991
4046
  await signalParentPlayTerminal({
3992
4047
  req,
3993
4048
  status: 'completed',
@@ -3999,6 +4054,16 @@ async function executeRunRequest(
3999
4054
  }`,
4000
4055
  );
4001
4056
  });
4057
+ recordRunnerPerfTrace({
4058
+ req,
4059
+ phase: 'runner.parent_terminal_signal',
4060
+ ms: nowMs() - parentSignalStartedAt,
4061
+ });
4062
+ recordRunnerPerfTrace({
4063
+ req,
4064
+ phase: 'runner.execute_total',
4065
+ ms: nowMs() - startedAt,
4066
+ });
4002
4067
  return {
4003
4068
  playName: req.playName,
4004
4069
  result: serializedResult,
@@ -4156,6 +4221,9 @@ function runRequestFromWorkflowParams(
4156
4221
  playCallGovernance: isPlayCallGovernanceSnapshot(params.playCallGovernance)
4157
4222
  ? params.playCallGovernance
4158
4223
  : null,
4224
+ preloadedDbSessions: Array.isArray(params.preloadedDbSessions)
4225
+ ? (params.preloadedDbSessions as PreloadedRuntimeDbSession[])
4226
+ : null,
4159
4227
  coordinatorUrl:
4160
4228
  typeof params.coordinatorUrl === 'string' && params.coordinatorUrl.trim()
4161
4229
  ? params.coordinatorUrl.trim()
@@ -4396,6 +4464,7 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4396
4464
  Record<string, unknown>
4397
4465
  > {
4398
4466
  declare readonly env: WorkerEnv;
4467
+ declare readonly ctx: ExecutionContext;
4399
4468
 
4400
4469
  /**
4401
4470
  * Cancellation model:
@@ -4439,14 +4508,30 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4439
4508
  (runnerEvent) => {
4440
4509
  if (runnerEvent.type === 'log') {
4441
4510
  console.log(`${runPrefix} ${runnerEvent.message}`);
4511
+ void this.env.COORDINATOR?.recordRunEvent(req.runId, {
4512
+ runId: req.runId,
4513
+ type: 'log',
4514
+ line: runnerEvent.message,
4515
+ ts: runnerEvent.ts,
4516
+ }).catch(() => undefined);
4442
4517
  } else if (runnerEvent.type === 'error') {
4443
4518
  console.error(
4444
4519
  `${runPrefix} ${runnerEvent.message}${runnerEvent.stack ? `\n${runnerEvent.stack}` : ''}`,
4445
4520
  );
4521
+ void this.env.COORDINATOR?.recordRunEvent(req.runId, {
4522
+ runId: req.runId,
4523
+ type: 'log',
4524
+ line: `[error] ${runnerEvent.message}`,
4525
+ ts: runnerEvent.ts,
4526
+ }).catch(() => undefined);
4446
4527
  }
4447
4528
  },
4448
4529
  step,
4449
- { persistResultDatasets: !req.playCallGovernance, abortController },
4530
+ {
4531
+ persistResultDatasets: !req.playCallGovernance,
4532
+ abortController,
4533
+ waitUntil: (promise) => this.ctx.waitUntil(promise),
4534
+ },
4450
4535
  )) as Record<string, unknown>;
4451
4536
  } catch (error) {
4452
4537
  // CF Workflows + the dynamic-workflows framework swallow the error