deepline 0.1.152 → 0.1.154

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/bundling-sources/apps/play-runner-workers/src/coordinator-entry.ts +46 -6
  2. package/dist/bundling-sources/apps/play-runner-workers/src/entry.ts +1180 -825
  3. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/batching.ts +34 -18
  4. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/harness-receipt-store.ts +41 -0
  5. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/receipts.ts +143 -8
  6. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/tool-receipts.ts +104 -0
  7. package/dist/bundling-sources/sdk/src/index.ts +0 -1
  8. package/dist/bundling-sources/sdk/src/play.ts +3 -48
  9. package/dist/bundling-sources/sdk/src/plays/harness-stub.ts +27 -2
  10. package/dist/bundling-sources/sdk/src/release.ts +2 -2
  11. package/dist/bundling-sources/sdk/src/worker-play-entry.ts +0 -10
  12. package/dist/bundling-sources/shared_libs/play-data-plane/index.ts +0 -1
  13. package/dist/bundling-sources/shared_libs/play-runtime/app-runtime-api.ts +87 -0
  14. package/dist/bundling-sources/shared_libs/play-runtime/batch-runtime.ts +0 -59
  15. package/dist/bundling-sources/shared_libs/play-runtime/cell-staleness.ts +0 -253
  16. package/dist/bundling-sources/shared_libs/play-runtime/context.ts +805 -1570
  17. package/dist/bundling-sources/shared_libs/play-runtime/ctx-types.ts +47 -74
  18. package/dist/bundling-sources/shared_libs/play-runtime/default-batch-strategies.ts +36 -14
  19. package/dist/bundling-sources/shared_libs/play-runtime/durable-call-cache.ts +145 -0
  20. package/dist/bundling-sources/shared_libs/play-runtime/durable-receipt-execution.ts +284 -0
  21. package/dist/bundling-sources/shared_libs/play-runtime/postgres-json.ts +12 -5
  22. package/dist/bundling-sources/shared_libs/play-runtime/run-lifecycle-policy.ts +78 -0
  23. package/dist/bundling-sources/shared_libs/play-runtime/run-snapshot-stream.ts +10 -45
  24. package/dist/bundling-sources/shared_libs/play-runtime/runtime-actions.ts +1 -0
  25. package/dist/bundling-sources/shared_libs/play-runtime/runtime-api.ts +923 -535
  26. package/dist/bundling-sources/shared_libs/play-runtime/runtime-pg-driver-neon-serverless.ts +58 -78
  27. package/dist/bundling-sources/shared_libs/play-runtime/runtime-pg-driver.ts +12 -1
  28. package/dist/bundling-sources/shared_libs/play-runtime/step-program-dataset-builder.ts +1 -14
  29. package/dist/bundling-sources/shared_libs/play-runtime/tool-execution-outcome.ts +159 -0
  30. package/dist/bundling-sources/shared_libs/play-runtime/tool-result-types.ts +4 -1
  31. package/dist/bundling-sources/shared_libs/play-runtime/work-receipts.ts +32 -0
  32. package/dist/bundling-sources/shared_libs/plays/definition.ts +4 -2
  33. package/dist/bundling-sources/shared_libs/plays/runtime-validation.ts +3 -14
  34. package/dist/bundling-sources/shared_libs/plays/static-pipeline.ts +1 -43
  35. package/dist/cli/index.js +1301 -399
  36. package/dist/cli/index.mjs +1269 -361
  37. package/dist/{compiler-manifest-BjoRENv9.d.ts → compiler-manifest-DW1flrHk.d.mts} +0 -9
  38. package/dist/{compiler-manifest-BjoRENv9.d.mts → compiler-manifest-DW1flrHk.d.ts} +0 -9
  39. package/dist/index.d.mts +9 -38
  40. package/dist/index.d.ts +9 -38
  41. package/dist/index.js +22 -11
  42. package/dist/index.mjs +22 -11
  43. package/dist/plays/bundle-play-file.d.mts +2 -2
  44. package/dist/plays/bundle-play-file.d.ts +2 -2
  45. package/package.json +1 -1
  46. package/dist/bundling-sources/shared_libs/play-data-plane/cell-policy.ts +0 -76
  47. package/dist/bundling-sources/shared_libs/play-runtime/progress-emitter.ts +0 -197
  48. package/dist/bundling-sources/shared_libs/play-runtime/waterfall-replay.ts +0 -79
@@ -23,9 +23,8 @@
23
23
  * compatible with the V8 isolate runtime.
24
24
  *
25
25
  * Status: experimental. First cut targets tool-basic (ctx.csv + ctx.dataset +
26
- * ctx.tools.execute). Plays that depend on the full ctx surface (durable sleep,
27
- * checkpoints, batched waterfalls, etc.) will fall back to "not implemented"
28
- * rather than producing wrong results — opt-in via DEEPLINE_PLAY_RUNNER_BACKEND.
26
+ * ctx.tools.execute). Plays that depend on unsupported ctx surface area fall
27
+ * back to "not implemented" rather than producing wrong results.
29
28
  */
30
29
 
31
30
  import {
@@ -83,6 +82,7 @@ import {
83
82
  type ToolExecuteResult,
84
83
  type ToolResultMetadataInput,
85
84
  } from '../../../shared_libs/play-runtime/tool-result';
85
+ import { toolExecutionMetadataForOutcome } from '../../../shared_libs/play-runtime/tool-execution-outcome';
86
86
  import {
87
87
  TOOL_EXECUTE_RATE_LIMIT_MAX_ATTEMPTS,
88
88
  TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS,
@@ -94,10 +94,16 @@ import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runti
94
94
  import type { PreloadedRuntimeDbSession } from '../../../shared_libs/play-runtime/db-session';
95
95
  import type { PlayRuntimeManifestMap } from '../../../shared_libs/plays/compiler-manifest';
96
96
  import {
97
+ deriveToolRequestIdentity,
97
98
  derivePlayRowIdentity,
98
99
  derivePlayRowIdentityFromKey,
99
- deriveToolRequestIdentity,
100
100
  } from '../../../shared_libs/plays/row-identity';
101
+ import {
102
+ buildDurableCtxCallCacheKey,
103
+ buildDurableToolCallAuthScopeDigest,
104
+ buildDurableToolCallCacheKey,
105
+ } from '../../../shared_libs/play-runtime/durable-call-cache';
106
+ import { buildScopedWorkReceiptKey } from '../../../shared_libs/play-runtime/work-receipts';
101
107
  import { DEDUPE_DUPLICATE_KEY_SAMPLE_CAP } from '../../../shared_libs/play-runtime/map-row-identity';
102
108
  import {
103
109
  getTopLevelPipelineSubsteps,
@@ -134,7 +140,23 @@ import {
134
140
  type WorkerDatasetHandle,
135
141
  type WorkerDatasetInput,
136
142
  } from './runtime/dataset-handles';
137
- import { runWorkerRuntimeReceiptBoundary } from './runtime/receipts';
143
+ import {
144
+ runWorkerRuntimeReceiptBoundary,
145
+ type WorkerRuntimeReceipt,
146
+ type WorkerRuntimeReceiptClaim,
147
+ type WorkerRuntimeReceiptStore,
148
+ } from './runtime/receipts';
149
+ import {
150
+ RuntimeReceiptWaitTimeoutError,
151
+ waitForCompletedRuntimeReceipt,
152
+ } from '../../../shared_libs/play-runtime/durable-receipt-execution';
153
+ import type { RuntimeStepReceipt } from '../../../shared_libs/play-runtime/ctx-types';
154
+ import {
155
+ canReclaimTimedOutWorkerToolReceipt,
156
+ markWorkerToolReceiptResultCached,
157
+ markWorkerToolReceiptResultExecution,
158
+ planWorkerToolReceiptGroups,
159
+ } from './runtime/tool-receipts';
138
160
  // The harness stub forwards leaf calls (validation, runtime-api HTTP) into
139
161
  // the long-lived Play Harness Worker via env.HARNESS. We import the
140
162
  // `setHarnessBinding` setter eagerly so it's available the moment
@@ -222,17 +244,8 @@ import {
222
244
  import {
223
245
  DEEPLINE_CELL_META_FIELD,
224
246
  previousCellFromValue,
225
- resolveCompletedCellStalenessMeta,
226
- shouldRecomputeCell,
227
- type AuthoredCellStalenessPolicyByField,
228
- type AuthoredStaleAfterSeconds,
229
- type CellStalenessPolicyByField,
230
247
  type PreviousCell,
231
248
  } from '../../../shared_libs/play-runtime/cell-staleness';
232
- import {
233
- authoredCellPoliciesFromDatasetSteps,
234
- cellPoliciesFromDatasetSteps,
235
- } from '../../../shared_libs/play-data-plane/cell-policy';
236
249
 
237
250
  // The play's default export. The bundler injects this — see bundle-play-file.ts.
238
251
  // eslint-disable-next-line @typescript-eslint/ban-ts-comment
@@ -335,6 +348,7 @@ type WorkerEnv = {
335
348
  RUNTIME_API?: {
336
349
  runtimeApiCall(input: {
337
350
  executorToken: string;
351
+ baseUrl?: string;
338
352
  path: string;
339
353
  method?: string;
340
354
  body: unknown;
@@ -553,6 +567,7 @@ async function fetchRuntimeApi(
553
567
  cachedRuntimeApiBinding,
554
568
  mergedInit,
555
569
  {
570
+ baseUrl,
556
571
  path,
557
572
  timeoutMs,
558
573
  },
@@ -579,7 +594,7 @@ async function fetchRuntimeApi(
579
594
  async function callRuntimeApiRpcBinding(
580
595
  binding: NonNullable<WorkerEnv['RUNTIME_API']>,
581
596
  init: RequestInit,
582
- input: { path: string; timeoutMs: number },
597
+ input: { baseUrl: string; path: string; timeoutMs: number },
583
598
  ): Promise<Response> {
584
599
  const h = new Headers(init.headers);
585
600
  const authorization = h.get('authorization') ?? '';
@@ -591,8 +606,9 @@ async function callRuntimeApiRpcBinding(
591
606
  const rawBody = typeof init.body === 'string' ? init.body : '';
592
607
  const result = await binding.runtimeApiCall({
593
608
  executorToken: authorization.replace(/^Bearer\s+/i, '').trim(),
609
+ baseUrl: input.baseUrl,
594
610
  path: input.path,
595
- method: init.method ?? 'POST',
611
+ method: init.method,
596
612
  body: rawBody ? JSON.parse(rawBody) : {},
597
613
  headers,
598
614
  timeoutMs: input.timeoutMs,
@@ -1225,6 +1241,7 @@ function normalizeToolExecuteArgs(request: unknown): {
1225
1241
  id: string;
1226
1242
  toolId: string;
1227
1243
  input: Record<string, unknown>;
1244
+ force?: boolean;
1228
1245
  staleAfterSeconds?: number;
1229
1246
  } {
1230
1247
  if (!isToolExecuteRecord(request)) {
@@ -1247,6 +1264,7 @@ function normalizeToolExecuteArgs(request: unknown): {
1247
1264
  id: request.id.trim(),
1248
1265
  toolId: request.tool,
1249
1266
  input: request.input,
1267
+ ...(request.force === true ? { force: true } : {}),
1250
1268
  ...(typeof request.staleAfterSeconds === 'number'
1251
1269
  ? { staleAfterSeconds: request.staleAfterSeconds }
1252
1270
  : {}),
@@ -1487,85 +1505,10 @@ function wrapWorkerToolResult(
1487
1505
  status,
1488
1506
  result,
1489
1507
  metadata: metadata ?? { toolId },
1490
- execution: {
1491
- idempotent: true,
1492
- cached: false,
1493
- source: 'live',
1494
- },
1508
+ execution: toolExecutionMetadataForOutcome({ kind: 'live' }),
1495
1509
  });
1496
1510
  }
1497
1511
 
1498
- function isRecordLike(value: unknown): value is Record<string, unknown> {
1499
- return value != null && typeof value === 'object' && !Array.isArray(value);
1500
- }
1501
-
1502
- /**
1503
- * True when a normalized tool result carries something the play would treat
1504
- * as a "hit". Conservative: empty objects/arrays/strings count as misses, but
1505
- * any non-empty primitive or any record with at least one non-nullish value
1506
- * counts as content. Used by ctx.waterfall to decide whether to short-circuit.
1507
- */
1508
- function resultHasContent(value: unknown): boolean {
1509
- if (value == null) return false;
1510
- if (typeof value === 'string') return value.length > 0;
1511
- if (typeof value === 'number' || typeof value === 'boolean') return true;
1512
- if (Array.isArray(value)) return value.length > 0;
1513
- if (typeof value === 'object') {
1514
- for (const v of Object.values(value as Record<string, unknown>)) {
1515
- if (v == null) continue;
1516
- if (typeof v === 'string' && v.length === 0) continue;
1517
- if (Array.isArray(v) && v.length === 0) continue;
1518
- return true;
1519
- }
1520
- return false;
1521
- }
1522
- return true;
1523
- }
1524
-
1525
- type WorkerInlineWaterfallSpec = {
1526
- id: string;
1527
- output: string;
1528
- minResults: number;
1529
- steps: Array<
1530
- | {
1531
- id: string;
1532
- kind?: 'tool';
1533
- toolId: string;
1534
- mapInput: (input: Record<string, unknown>) => Record<string, unknown>;
1535
- }
1536
- | {
1537
- id: string;
1538
- kind: 'code';
1539
- run: (
1540
- input: Record<string, unknown>,
1541
- ctx: {
1542
- tools: {
1543
- execute(request: {
1544
- id: string;
1545
- tool: string;
1546
- input: Record<string, unknown>;
1547
- description?: string;
1548
- staleAfterSeconds?: number;
1549
- }): Promise<unknown>;
1550
- };
1551
- },
1552
- ) => unknown | Promise<unknown>;
1553
- }
1554
- >;
1555
- };
1556
-
1557
- type WorkerWaterfallOptions = {
1558
- providers?: string[];
1559
- min_results?: number;
1560
- description?: string;
1561
- };
1562
-
1563
- type RecordedWaterfallOutput = {
1564
- waterfallId: string;
1565
- stepId: string;
1566
- value: unknown;
1567
- };
1568
-
1569
1512
  type RecordedStepProgramOutput = {
1570
1513
  field: string;
1571
1514
  columnName: string;
@@ -1581,6 +1524,9 @@ type WorkerStepResolution = {
1581
1524
 
1582
1525
  type WorkerToolBatchRequest = {
1583
1526
  id: string;
1527
+ cacheKey: string;
1528
+ receiptKey: string | null;
1529
+ force: boolean;
1584
1530
  toolId: string;
1585
1531
  input: Record<string, unknown>;
1586
1532
  workflowStep?: WorkflowStep;
@@ -1588,6 +1534,17 @@ type WorkerToolBatchRequest = {
1588
1534
  reject: (error: unknown) => void;
1589
1535
  };
1590
1536
 
1537
+ type ClaimedWorkerToolBatchRequest = {
1538
+ request: WorkerToolBatchRequest;
1539
+ receiptKey: string | null;
1540
+ followers: WorkerToolBatchRequest[];
1541
+ };
1542
+
1543
+ type PreparedWorkerToolBatchRequests = {
1544
+ claimedRequests: ClaimedWorkerToolBatchRequest[];
1545
+ deferredClaimedRequests: Promise<ClaimedWorkerToolBatchRequest[]>[];
1546
+ };
1547
+
1591
1548
  const WORKER_TOOL_BATCH_GRACE_MS = 250;
1592
1549
  const MAP_EXECUTION_HEARTBEAT_INTERVAL_MS = 5_000;
1593
1550
  const MAP_INCREMENTAL_PERSIST_CHUNK_ROWS = 100;
@@ -1604,6 +1561,7 @@ const MAP_ROW_FAILURE_SAMPLE_LIMIT = 3;
1604
1561
  // their previous batching behavior; declared providers tighten via the
1605
1562
  // Governor's suggestedParallelism.
1606
1563
  const WORKER_TOOL_BATCH_DEFAULT_PARALLELISM = 4;
1564
+
1607
1565
  /**
1608
1566
  * In-process retry budget for HTTP 429 tool responses. Rate-limit pushback is
1609
1567
  * throughput pacing (provider or Deepline limiter), not a tool defect, so it
@@ -1617,6 +1575,43 @@ function sleepWorkerMs(ms: number): Promise<void> {
1617
1575
  return new Promise((resolve) => setTimeout(resolve, ms));
1618
1576
  }
1619
1577
 
1578
+ function workerDurableToolCallCacheKey(input: {
1579
+ req: RunRequest;
1580
+ toolId: string;
1581
+ requestInput: Record<string, unknown>;
1582
+ providerActionVersion: string;
1583
+ staleAfterSeconds?: number | null;
1584
+ }): string {
1585
+ return buildDurableToolCallCacheKey({
1586
+ orgId: input.req.orgId,
1587
+ playId: input.req.playName,
1588
+ toolId: input.toolId,
1589
+ requestInput: input.requestInput,
1590
+ authScopeDigest: buildDurableToolCallAuthScopeDigest({
1591
+ orgId: input.req.orgId,
1592
+ userEmail: input.req.userEmail,
1593
+ toolId: input.toolId,
1594
+ }),
1595
+ providerActionVersion: input.providerActionVersion,
1596
+ staleAfterSeconds: input.staleAfterSeconds,
1597
+ });
1598
+ }
1599
+
1600
+ function workerRuntimeReceiptKey(input: {
1601
+ req: RunRequest;
1602
+ key: string;
1603
+ }): string {
1604
+ const orgId = input.req.orgId?.trim() || 'org';
1605
+ if (input.key.startsWith(`ctx:${orgId}:`)) {
1606
+ return input.key;
1607
+ }
1608
+ return buildScopedWorkReceiptKey({
1609
+ orgId: input.req.orgId,
1610
+ playName: input.req.playName,
1611
+ key: input.key,
1612
+ });
1613
+ }
1614
+
1620
1615
  function stepProgramColumnName(parentField: string, stepId: string): string {
1621
1616
  return sqlSafePlayColumnName(`${parentField}.${stepId}`);
1622
1617
  }
@@ -1629,9 +1624,12 @@ class WorkerToolBatchScheduler {
1629
1624
  private readonly req: RunRequest,
1630
1625
  private readonly governor: PlayExecutionGovernor,
1631
1626
  private readonly resolvePacing: WorkerPacingResolver,
1627
+ private readonly resolveToolActionCacheVersion: WorkerToolActionCacheVersionResolver,
1632
1628
  private readonly abortSignal?: AbortSignal,
1633
1629
  private readonly onRequestsSettled?: (count: number) => void,
1634
1630
  private readonly callbacks?: WorkerCtxCallbacks,
1631
+ private readonly receiptStore?: WorkerRuntimeReceiptStore,
1632
+ private readonly allowLocalRetryReceipts = false,
1635
1633
  ) {}
1636
1634
 
1637
1635
  /**
@@ -1653,16 +1651,29 @@ class WorkerToolBatchScheduler {
1653
1651
  })();
1654
1652
  }
1655
1653
 
1656
- execute(
1654
+ async execute(
1657
1655
  id: string,
1658
1656
  toolId: string,
1659
1657
  input: Record<string, unknown>,
1660
1658
  workflowStep?: WorkflowStep,
1659
+ options?: { force?: boolean; staleAfterSeconds?: number | null },
1661
1660
  ): Promise<unknown> {
1662
- return new Promise((resolve, reject) => {
1661
+ const providerActionVersion =
1662
+ await this.resolveToolActionCacheVersion(toolId);
1663
+ return await new Promise((resolve, reject) => {
1663
1664
  const queuedAt = nowMs();
1665
+ const receiptKey = workerDurableToolCallCacheKey({
1666
+ req: this.req,
1667
+ toolId,
1668
+ requestInput: input,
1669
+ providerActionVersion,
1670
+ staleAfterSeconds: options?.staleAfterSeconds,
1671
+ });
1664
1672
  this.queue.push({
1665
1673
  id,
1674
+ cacheKey: receiptKey,
1675
+ receiptKey,
1676
+ force: options?.force === true,
1666
1677
  toolId,
1667
1678
  input,
1668
1679
  workflowStep,
@@ -1725,15 +1736,529 @@ class WorkerToolBatchScheduler {
1725
1736
  }
1726
1737
  }
1727
1738
 
1739
+ private async waitForDurableToolReceipt(
1740
+ receiptKey: string,
1741
+ ): Promise<unknown> {
1742
+ if (!this.receiptStore) {
1743
+ throw new Error('Worker durable tool receipt store is not configured.');
1744
+ }
1745
+ if (!this.receiptStore.getReceipt) {
1746
+ throw new Error(
1747
+ 'Worker durable tool receipt wait requires read-only receipt lookup.',
1748
+ );
1749
+ }
1750
+ const receipt = await waitForCompletedRuntimeReceipt({
1751
+ receiptKey,
1752
+ store: {
1753
+ getMany: async (receiptKeys) => {
1754
+ const receipts = new Map<string, RuntimeStepReceipt>();
1755
+ await Promise.all(
1756
+ receiptKeys.map(async (key) => {
1757
+ const receipt = await this.receiptStore!.getReceipt!({
1758
+ playName: this.req.playName,
1759
+ key,
1760
+ });
1761
+ if (!receipt) return;
1762
+ receipts.set(key, {
1763
+ key: receipt.key,
1764
+ status: receipt.status,
1765
+ output: receipt.output,
1766
+ error: receipt.error ?? undefined,
1767
+ runId: receipt.runId ?? null,
1768
+ });
1769
+ }),
1770
+ );
1771
+ return receipts;
1772
+ },
1773
+ },
1774
+ });
1775
+ return markWorkerToolReceiptResultCached(
1776
+ deserializeDurableStepValue(receipt.output),
1777
+ receiptKey,
1778
+ receiptKey,
1779
+ );
1780
+ }
1781
+
1782
+ private settleRequests(
1783
+ claimed: ClaimedWorkerToolBatchRequest,
1784
+ result: unknown,
1785
+ ): void {
1786
+ claimed.request.resolve(result);
1787
+ for (const follower of claimed.followers) {
1788
+ follower.resolve(
1789
+ claimed.receiptKey
1790
+ ? markWorkerToolReceiptResultExecution(result, {
1791
+ kind: 'in_flight',
1792
+ receiptKey: claimed.receiptKey,
1793
+ attachedToReceiptKey: claimed.receiptKey,
1794
+ })
1795
+ : result,
1796
+ );
1797
+ }
1798
+ this.onRequestsSettled?.(1 + claimed.followers.length);
1799
+ }
1800
+
1801
+ private rejectRequests(
1802
+ claimed: ClaimedWorkerToolBatchRequest,
1803
+ error: unknown,
1804
+ ): void {
1805
+ claimed.request.reject(error);
1806
+ for (const follower of claimed.followers) {
1807
+ follower.reject(error);
1808
+ }
1809
+ this.onRequestsSettled?.(1 + claimed.followers.length);
1810
+ }
1811
+
1812
+ private rejectRawRequests(
1813
+ requests: WorkerToolBatchRequest[],
1814
+ error: unknown,
1815
+ ): void {
1816
+ for (const request of requests) {
1817
+ request.reject(error);
1818
+ }
1819
+ this.onRequestsSettled?.(requests.length);
1820
+ }
1821
+
1822
+ private async reclaimTimedOutDurableToolReceiptGroup(input: {
1823
+ group: WorkerToolBatchRequest[];
1824
+ receiptKey: string;
1825
+ runningReceipt: WorkerRuntimeReceipt;
1826
+ waitError: unknown;
1827
+ }): Promise<ClaimedWorkerToolBatchRequest[]> {
1828
+ const [request, ...followers] = input.group;
1829
+ if (!request || !this.receiptStore) {
1830
+ this.rejectRawRequests(input.group, input.waitError);
1831
+ return [];
1832
+ }
1833
+ if (
1834
+ !canReclaimTimedOutWorkerToolReceipt({
1835
+ ownerRunId: input.runningReceipt.runId,
1836
+ currentRunId: this.req.runId,
1837
+ })
1838
+ ) {
1839
+ this.rejectRawRequests(input.group, input.waitError);
1840
+ return [];
1841
+ }
1842
+ let claim: WorkerRuntimeReceiptClaim;
1843
+ try {
1844
+ claim = await this.receiptStore.claimReceipt({
1845
+ playName: this.req.playName,
1846
+ runId: this.req.runId,
1847
+ key: input.receiptKey,
1848
+ reclaimRunning: true,
1849
+ });
1850
+ } catch (error) {
1851
+ this.rejectRawRequests(input.group, error);
1852
+ return [];
1853
+ }
1854
+ if (claim.disposition === 'claimed') {
1855
+ return [{ request, receiptKey: input.receiptKey, followers }];
1856
+ }
1857
+ if (claim.disposition === 'reused') {
1858
+ const result = markWorkerToolReceiptResultCached(
1859
+ deserializeDurableStepValue(claim.receipt.output),
1860
+ request.cacheKey,
1861
+ input.receiptKey,
1862
+ );
1863
+ for (const pending of input.group) {
1864
+ pending.resolve(result);
1865
+ }
1866
+ this.onRequestsSettled?.(input.group.length);
1867
+ return [];
1868
+ }
1869
+ if (claim.disposition === 'failed') {
1870
+ this.rejectRawRequests(
1871
+ input.group,
1872
+ new Error(
1873
+ `Durable tool call ${input.receiptKey} failed: ${claim.receipt.error ?? 'unknown error'}`,
1874
+ ),
1875
+ );
1876
+ return [];
1877
+ }
1878
+ this.rejectRawRequests(input.group, input.waitError);
1879
+ return [];
1880
+ }
1881
+
1882
+ private async failureForRejectedToolRequest(
1883
+ claimed: ClaimedWorkerToolBatchRequest,
1884
+ error: unknown,
1885
+ ): Promise<unknown> {
1886
+ try {
1887
+ await this.failDurableToolRequest(claimed, error);
1888
+ return error;
1889
+ } catch (receiptError) {
1890
+ return new AggregateError(
1891
+ [error, receiptError],
1892
+ 'Tool call failed and durable receipt could not be marked failed',
1893
+ );
1894
+ }
1895
+ }
1896
+
1897
+ private async claimDurableToolReceiptGroups(
1898
+ groups: ReturnType<
1899
+ typeof planWorkerToolReceiptGroups<WorkerToolBatchRequest>
1900
+ >['durableGroups'],
1901
+ ): Promise<
1902
+ Array<{
1903
+ group: ReturnType<
1904
+ typeof planWorkerToolReceiptGroups<WorkerToolBatchRequest>
1905
+ >['durableGroups'][number];
1906
+ receiptKey: string;
1907
+ claim: WorkerRuntimeReceiptClaim;
1908
+ }>
1909
+ > {
1910
+ if (!this.receiptStore) return [];
1911
+ const planned = groups.map((group) => ({
1912
+ group,
1913
+ receiptKey: workerRuntimeReceiptKey({
1914
+ req: this.req,
1915
+ key: group.claimableReceiptKey,
1916
+ }),
1917
+ }));
1918
+ const claimOne = async (entry: (typeof planned)[number]) => ({
1919
+ ...entry,
1920
+ claim: await this.receiptStore!.claimReceipt({
1921
+ playName: this.req.playName,
1922
+ runId: this.req.runId,
1923
+ key: entry.receiptKey,
1924
+ ...(entry.group.forceDurableRefresh
1925
+ ? { forceRefresh: true, reclaimRunning: true }
1926
+ : {}),
1927
+ }),
1928
+ });
1929
+ if (!this.receiptStore.claimReceipts) {
1930
+ return await Promise.all(planned.map(claimOne));
1931
+ }
1932
+ const claimed: Array<{
1933
+ group: (typeof planned)[number]['group'];
1934
+ receiptKey: string;
1935
+ claim: WorkerRuntimeReceiptClaim;
1936
+ }> = [];
1937
+ for (const forceDurableRefresh of [false, true]) {
1938
+ const entries = planned.filter(
1939
+ (entry) => entry.group.forceDurableRefresh === forceDurableRefresh,
1940
+ );
1941
+ if (entries.length === 0) continue;
1942
+ const claims = await this.receiptStore.claimReceipts({
1943
+ playName: this.req.playName,
1944
+ runId: this.req.runId,
1945
+ keys: entries.map((entry) => entry.receiptKey),
1946
+ ...(forceDurableRefresh
1947
+ ? { forceRefresh: true, reclaimRunning: true }
1948
+ : {}),
1949
+ });
1950
+ entries.forEach((entry, index) => {
1951
+ const claim = claims[index];
1952
+ if (!claim) {
1953
+ throw new Error(
1954
+ `Runtime receipt batch claim did not return receipt ${entry.receiptKey}.`,
1955
+ );
1956
+ }
1957
+ claimed.push({ ...entry, claim });
1958
+ });
1959
+ }
1960
+ return claimed;
1961
+ }
1962
+
1963
+ private async prepareDurableToolRequests(
1964
+ requests: WorkerToolBatchRequest[],
1965
+ ): Promise<PreparedWorkerToolBatchRequests> {
1966
+ if (!this.receiptStore) {
1967
+ return {
1968
+ claimedRequests: requests.map((request) => ({
1969
+ request,
1970
+ receiptKey: null,
1971
+ followers: [],
1972
+ })),
1973
+ deferredClaimedRequests: [],
1974
+ };
1975
+ }
1976
+
1977
+ const claimedRequests: ClaimedWorkerToolBatchRequest[] = [];
1978
+ const deferredClaimedRequests: Promise<ClaimedWorkerToolBatchRequest[]>[] =
1979
+ [];
1980
+ const receiptGroupPlan = planWorkerToolReceiptGroups(requests, {
1981
+ allowLocalRetryReceipts: this.allowLocalRetryReceipts,
1982
+ getReceiptInput: (request) => ({
1983
+ durableReceiptKey: request.receiptKey,
1984
+ localRetryCacheKey: request.cacheKey,
1985
+ force: request.force,
1986
+ }),
1987
+ });
1988
+ for (const request of receiptGroupPlan.localRequests) {
1989
+ claimedRequests.push({
1990
+ request,
1991
+ receiptKey: null,
1992
+ followers: [],
1993
+ });
1994
+ }
1995
+
1996
+ try {
1997
+ const claimEntries = await this.claimDurableToolReceiptGroups(
1998
+ receiptGroupPlan.durableGroups,
1999
+ );
2000
+ for (const { group: groupState, receiptKey, claim } of claimEntries) {
2001
+ const group = groupState.requests;
2002
+ const [first] = group;
2003
+ if (!first) continue;
2004
+ if (claim.disposition === 'reused') {
2005
+ const result = markWorkerToolReceiptResultCached(
2006
+ deserializeDurableStepValue(claim.receipt.output),
2007
+ first.cacheKey,
2008
+ receiptKey,
2009
+ );
2010
+ for (const request of group) {
2011
+ request.resolve(result);
2012
+ }
2013
+ this.onRequestsSettled?.(group.length);
2014
+ continue;
2015
+ }
2016
+ if (claim.disposition === 'failed') {
2017
+ const error = new Error(
2018
+ `Durable tool call ${receiptKey} failed: ${claim.receipt.error ?? 'unknown error'}`,
2019
+ );
2020
+ this.rejectRawRequests(group, error);
2021
+ continue;
2022
+ }
2023
+ if (claim.disposition === 'running') {
2024
+ deferredClaimedRequests.push(
2025
+ (async (): Promise<ClaimedWorkerToolBatchRequest[]> => {
2026
+ let waitError: unknown = new RuntimeReceiptWaitTimeoutError(
2027
+ receiptKey,
2028
+ );
2029
+ try {
2030
+ const result = await this.waitForDurableToolReceipt(receiptKey);
2031
+ for (const request of group) {
2032
+ request.resolve(
2033
+ markWorkerToolReceiptResultExecution(result, {
2034
+ kind: 'in_flight',
2035
+ receiptKey,
2036
+ attachedToReceiptKey: receiptKey,
2037
+ }),
2038
+ );
2039
+ }
2040
+ this.onRequestsSettled?.(group.length);
2041
+ return [];
2042
+ } catch (error) {
2043
+ waitError = error;
2044
+ if (!(error instanceof RuntimeReceiptWaitTimeoutError)) {
2045
+ this.rejectRawRequests(group, error);
2046
+ return [];
2047
+ }
2048
+ }
2049
+ return await this.reclaimTimedOutDurableToolReceiptGroup({
2050
+ group,
2051
+ receiptKey,
2052
+ runningReceipt: claim.receipt,
2053
+ waitError,
2054
+ });
2055
+ })(),
2056
+ );
2057
+ continue;
2058
+ }
2059
+ const [request, ...followers] = group;
2060
+ if (!request) continue;
2061
+ claimedRequests.push({ request, receiptKey, followers });
2062
+ }
2063
+ } catch (error) {
2064
+ for (const group of receiptGroupPlan.durableGroups) {
2065
+ this.rejectRawRequests(group.requests, error);
2066
+ }
2067
+ }
2068
+ return { claimedRequests, deferredClaimedRequests };
2069
+ }
2070
+
2071
+ private async completeDurableToolRequest(
2072
+ claimed: ClaimedWorkerToolBatchRequest,
2073
+ result: unknown,
2074
+ ): Promise<unknown> {
2075
+ if (!this.receiptStore || !claimed.receiptKey) {
2076
+ return result;
2077
+ }
2078
+ const ownerResult = markWorkerToolReceiptResultExecution(result, {
2079
+ kind: 'live',
2080
+ receiptKey: claimed.receiptKey,
2081
+ });
2082
+ const completed = await this.receiptStore.completeReceipt({
2083
+ playName: this.req.playName,
2084
+ runId: this.req.runId,
2085
+ key: claimed.receiptKey,
2086
+ output: serializeDurableStepValue(ownerResult),
2087
+ });
2088
+ if (
2089
+ completed &&
2090
+ (completed.status === 'completed' || completed.status === 'skipped') &&
2091
+ completed.output !== undefined
2092
+ ) {
2093
+ const recovered = deserializeDurableStepValue(completed.output);
2094
+ return completed.runId && completed.runId !== this.req.runId
2095
+ ? markWorkerToolReceiptResultCached(
2096
+ recovered,
2097
+ claimed.request.cacheKey,
2098
+ claimed.receiptKey,
2099
+ )
2100
+ : recovered;
2101
+ }
2102
+ return ownerResult;
2103
+ }
2104
+
2105
+ private completedDurableToolRequestResult(input: {
2106
+ claimed: ClaimedWorkerToolBatchRequest;
2107
+ ownerResult: unknown;
2108
+ completed: WorkerRuntimeReceipt | null | undefined;
2109
+ }): unknown {
2110
+ const { claimed, completed, ownerResult } = input;
2111
+ if (
2112
+ completed &&
2113
+ (completed.status === 'completed' || completed.status === 'skipped') &&
2114
+ completed.output !== undefined
2115
+ ) {
2116
+ const recovered = deserializeDurableStepValue(completed.output);
2117
+ return completed.runId && completed.runId !== this.req.runId
2118
+ ? markWorkerToolReceiptResultCached(
2119
+ recovered,
2120
+ claimed.request.cacheKey,
2121
+ claimed.receiptKey ?? claimed.request.cacheKey,
2122
+ )
2123
+ : recovered;
2124
+ }
2125
+ return ownerResult;
2126
+ }
2127
+
2128
+ private async completeDurableToolRequests(
2129
+ entries: Array<{ claimed: ClaimedWorkerToolBatchRequest; result: unknown }>,
2130
+ ): Promise<unknown[]> {
2131
+ const results: unknown[] = new Array(entries.length);
2132
+ const durableEntries: Array<{
2133
+ index: number;
2134
+ claimed: ClaimedWorkerToolBatchRequest;
2135
+ ownerResult: unknown;
2136
+ }> = [];
2137
+ for (let index = 0; index < entries.length; index += 1) {
2138
+ const entry = entries[index]!;
2139
+ if (!this.receiptStore || !entry.claimed.receiptKey) {
2140
+ results[index] = entry.result;
2141
+ continue;
2142
+ }
2143
+ const ownerResult = markWorkerToolReceiptResultExecution(entry.result, {
2144
+ kind: 'live',
2145
+ receiptKey: entry.claimed.receiptKey,
2146
+ });
2147
+ durableEntries.push({ index, claimed: entry.claimed, ownerResult });
2148
+ }
2149
+ if (durableEntries.length === 0) {
2150
+ return results;
2151
+ }
2152
+ if (!this.receiptStore?.completeReceipts) {
2153
+ await Promise.all(
2154
+ durableEntries.map(async (entry) => {
2155
+ results[entry.index] = await this.completeDurableToolRequest(
2156
+ entry.claimed,
2157
+ entries[entry.index]!.result,
2158
+ );
2159
+ }),
2160
+ );
2161
+ return results;
2162
+ }
2163
+ const completed = await this.receiptStore.completeReceipts({
2164
+ playName: this.req.playName,
2165
+ receipts: durableEntries.map((entry) => ({
2166
+ runId: this.req.runId,
2167
+ key: entry.claimed.receiptKey!,
2168
+ output: serializeDurableStepValue(entry.ownerResult),
2169
+ })),
2170
+ });
2171
+ durableEntries.forEach((entry, resultIndex) => {
2172
+ results[entry.index] = this.completedDurableToolRequestResult({
2173
+ claimed: entry.claimed,
2174
+ ownerResult: entry.ownerResult,
2175
+ completed: completed[resultIndex],
2176
+ });
2177
+ });
2178
+ return results;
2179
+ }
2180
+
2181
+ private async failDurableToolRequest(
2182
+ claimed: ClaimedWorkerToolBatchRequest,
2183
+ error: unknown,
2184
+ ): Promise<void> {
2185
+ if (!this.receiptStore || !claimed.receiptKey) {
2186
+ return;
2187
+ }
2188
+ await this.receiptStore.failReceipt({
2189
+ playName: this.req.playName,
2190
+ runId: this.req.runId,
2191
+ key: claimed.receiptKey,
2192
+ error: error instanceof Error ? error.message : String(error),
2193
+ });
2194
+ }
2195
+
2196
+ private async failDurableToolRequests(
2197
+ claimedRequests: ClaimedWorkerToolBatchRequest[],
2198
+ error: unknown,
2199
+ ): Promise<void> {
2200
+ const durable = claimedRequests.filter(
2201
+ (claimed) => this.receiptStore && claimed.receiptKey,
2202
+ );
2203
+ if (durable.length === 0) return;
2204
+ if (!this.receiptStore?.failReceipts) {
2205
+ await Promise.all(
2206
+ durable.map((claimed) => this.failDurableToolRequest(claimed, error)),
2207
+ );
2208
+ return;
2209
+ }
2210
+ await this.receiptStore.failReceipts({
2211
+ playName: this.req.playName,
2212
+ receipts: durable.map((claimed) => ({
2213
+ runId: this.req.runId,
2214
+ key: claimed.receiptKey!,
2215
+ error: error instanceof Error ? error.message : String(error),
2216
+ })),
2217
+ });
2218
+ }
2219
+
1728
2220
  private async executeToolGroup(
1729
2221
  toolId: string,
1730
2222
  requests: WorkerToolBatchRequest[],
1731
2223
  ): Promise<void> {
2224
+ const { claimedRequests, deferredClaimedRequests } =
2225
+ await this.prepareDurableToolRequests(requests);
2226
+ await this.executeClaimedToolRequests(
2227
+ toolId,
2228
+ requests.length,
2229
+ claimedRequests,
2230
+ );
2231
+ if (deferredClaimedRequests.length === 0) {
2232
+ return;
2233
+ }
2234
+ const reclaimedRequests = (
2235
+ await Promise.all(deferredClaimedRequests)
2236
+ ).flat();
2237
+ await this.executeClaimedToolRequests(
2238
+ toolId,
2239
+ requests.length,
2240
+ reclaimedRequests,
2241
+ );
2242
+ }
2243
+
2244
+ private async executeClaimedToolRequests(
2245
+ toolId: string,
2246
+ requestCount: number,
2247
+ claimedRequests: ClaimedWorkerToolBatchRequest[],
2248
+ ): Promise<void> {
2249
+ if (claimedRequests.length === 0) {
2250
+ return;
2251
+ }
1732
2252
  const strategy = getPlayRuntimeBatchStrategy(toolId);
1733
- if (!strategy || toolId === 'test_wait_for_event' || requests.length < 2) {
2253
+ if (
2254
+ !strategy ||
2255
+ toolId === 'test_wait_for_event' ||
2256
+ claimedRequests.length < 2
2257
+ ) {
1734
2258
  const groupStartedAt = nowMs();
1735
2259
  await Promise.all(
1736
- requests.map(async (request) => {
2260
+ claimedRequests.map(async (claimed) => {
2261
+ const { request } = claimed;
1737
2262
  const toolContract = await this.resolvePacing(toolId).catch(
1738
2263
  () => null,
1739
2264
  );
@@ -1744,21 +2269,25 @@ class WorkerToolBatchScheduler {
1744
2269
  signal: this.abortSignal,
1745
2270
  });
1746
2271
  try {
1747
- request.resolve(
1748
- await executeToolWithLifecycle(
1749
- this.req,
1750
- { id: request.id, toolId, input: request.input },
1751
- request.workflowStep,
1752
- this.callbacks,
1753
- (retryAfterMs) => this.reportBackpressure(toolId, retryAfterMs),
1754
- () => this.governor.chargeBudget('retry'),
1755
- toolContract?.retrySafeTransientHttp === true,
1756
- ),
2272
+ const result = await executeToolWithLifecycle(
2273
+ this.req,
2274
+ { id: request.id, toolId, input: request.input },
2275
+ request.workflowStep,
2276
+ this.callbacks,
2277
+ (retryAfterMs) => this.reportBackpressure(toolId, retryAfterMs),
2278
+ () => this.governor.chargeBudget('retry'),
2279
+ toolContract?.retrySafeTransientHttp === true,
2280
+ );
2281
+ this.settleRequests(
2282
+ claimed,
2283
+ await this.completeDurableToolRequest(claimed, result),
1757
2284
  );
1758
2285
  } catch (error) {
1759
- request.reject(error);
2286
+ this.rejectRequests(
2287
+ claimed,
2288
+ await this.failureForRejectedToolRequest(claimed, error),
2289
+ );
1760
2290
  } finally {
1761
- this.onRequestsSettled?.(1);
1762
2291
  slot.release();
1763
2292
  }
1764
2293
  }),
@@ -1767,7 +2296,12 @@ class WorkerToolBatchScheduler {
1767
2296
  req: this.req,
1768
2297
  phase: 'runner.tool.group',
1769
2298
  ms: nowMs() - groupStartedAt,
1770
- extra: { toolId, requests: requests.length, batched: false },
2299
+ extra: {
2300
+ toolId,
2301
+ requests: requestCount,
2302
+ executed: claimedRequests.length,
2303
+ batched: false,
2304
+ },
1771
2305
  });
1772
2306
  return;
1773
2307
  }
@@ -1775,7 +2309,7 @@ class WorkerToolBatchScheduler {
1775
2309
  const batchStartedAt = nowMs();
1776
2310
  await executeBatchedWorkerToolGroup({
1777
2311
  req: this.req,
1778
- requests,
2312
+ requests: claimedRequests,
1779
2313
  strategy,
1780
2314
  governor: this.governor,
1781
2315
  suggestedParallelism: await this.governor.suggestedParallelism(
@@ -1788,12 +2322,23 @@ class WorkerToolBatchScheduler {
1788
2322
  resolveToolContract: this.resolvePacing,
1789
2323
  onRequestsSettled: this.onRequestsSettled,
1790
2324
  callbacks: this.callbacks,
2325
+ completeRequests: async (entries) =>
2326
+ await this.completeDurableToolRequests(entries),
2327
+ failRequests: async (claimedRequests, error) =>
2328
+ await this.failDurableToolRequests(claimedRequests, error),
2329
+ settleRequest: (claimed, result) => this.settleRequests(claimed, result),
2330
+ rejectRequest: (claimed, error) => this.rejectRequests(claimed, error),
1791
2331
  });
1792
2332
  recordRunnerPerfTrace({
1793
2333
  req: this.req,
1794
2334
  phase: 'runner.tool.group',
1795
2335
  ms: nowMs() - batchStartedAt,
1796
- extra: { toolId, requests: requests.length, batched: true },
2336
+ extra: {
2337
+ toolId,
2338
+ requests: requestCount,
2339
+ executed: claimedRequests.length,
2340
+ batched: true,
2341
+ },
1797
2342
  });
1798
2343
  }
1799
2344
  }
@@ -1815,7 +2360,7 @@ function groupWorkerToolRequestsByTool(
1815
2360
 
1816
2361
  async function executeBatchedWorkerToolGroup(input: {
1817
2362
  req: RunRequest;
1818
- requests: WorkerToolBatchRequest[];
2363
+ requests: ClaimedWorkerToolBatchRequest[];
1819
2364
  strategy: AnyBatchOperationStrategy;
1820
2365
  governor: PlayExecutionGovernor;
1821
2366
  suggestedParallelism: number;
@@ -1824,11 +2369,26 @@ async function executeBatchedWorkerToolGroup(input: {
1824
2369
  resolveToolContract: WorkerPacingResolver;
1825
2370
  onRequestsSettled?: (count: number) => void;
1826
2371
  callbacks?: WorkerCtxCallbacks;
2372
+ completeRequests: (
2373
+ entries: Array<{ claimed: ClaimedWorkerToolBatchRequest; result: unknown }>,
2374
+ ) => Promise<unknown[]>;
2375
+ failRequests: (
2376
+ requests: ClaimedWorkerToolBatchRequest[],
2377
+ error: unknown,
2378
+ ) => Promise<void>;
2379
+ settleRequest: (
2380
+ request: ClaimedWorkerToolBatchRequest,
2381
+ result: unknown,
2382
+ ) => void;
2383
+ rejectRequest: (
2384
+ request: ClaimedWorkerToolBatchRequest,
2385
+ error: unknown,
2386
+ ) => void;
1827
2387
  }): Promise<void> {
1828
2388
  const compiledBatches = compileRequestsWithStrategy({
1829
2389
  requests: input.requests,
1830
2390
  strategy: input.strategy,
1831
- getPayload: (request) => request.input,
2391
+ getPayload: (request) => request.request.input,
1832
2392
  });
1833
2393
  recordRunnerPerfTrace({
1834
2394
  req: input.req,
@@ -1865,7 +2425,7 @@ async function executeBatchedWorkerToolGroup(input: {
1865
2425
  return await executeTool(
1866
2426
  input.req,
1867
2427
  {
1868
- id: `batch:${batch.memberRequests.map((request) => request.id).join('|')}`,
2428
+ id: `batch:${batch.memberRequests.map((request) => request.request.id).join('|')}`,
1869
2429
  toolId: batch.batchOperation,
1870
2430
  input: batch.batchPayload,
1871
2431
  },
@@ -1891,8 +2451,17 @@ async function executeBatchedWorkerToolGroup(input: {
1891
2451
  // One batch's provider error stays scoped to that batch's member
1892
2452
  // requests. Sibling batches in this chunk keep their results so a
1893
2453
  // single provider hiccup cannot cascade into a whole-map failure.
1894
- for (const request of entry.request.memberRequests) {
1895
- request.reject(entry.error);
2454
+ let rejection: unknown = entry.error;
2455
+ try {
2456
+ await input.failRequests(entry.request.memberRequests, entry.error);
2457
+ } catch (receiptError) {
2458
+ rejection = new AggregateError(
2459
+ [entry.error, receiptError],
2460
+ 'Tool call failed and durable receipts could not be marked failed',
2461
+ );
2462
+ }
2463
+ for (const claimed of entry.request.memberRequests) {
2464
+ input.rejectRequest(claimed, rejection);
1896
2465
  }
1897
2466
  continue;
1898
2467
  }
@@ -1903,60 +2472,39 @@ async function executeBatchedWorkerToolGroup(input: {
1903
2472
  batchResult != null
1904
2473
  ? entry.request.splitResults(batchResult)
1905
2474
  : entry.request.memberRequests.map(() => null);
1906
- for (
1907
- let index = 0;
1908
- index < entry.request.memberRequests.length;
1909
- index += 1
1910
- ) {
1911
- const request = entry.request.memberRequests[index]!;
1912
- request.resolve(
1913
- wrapWorkerToolResult(
1914
- request.toolId,
2475
+ const completedResults = await input.completeRequests(
2476
+ entry.request.memberRequests.map((claimed, index) => ({
2477
+ claimed,
2478
+ result: wrapWorkerToolResult(
2479
+ claimed.request.toolId,
1915
2480
  splitResults[index] ?? null,
1916
- toolMetadataFallback(request.toolId),
2481
+ toolMetadataFallback(claimed.request.toolId),
1917
2482
  ),
1918
- );
2483
+ })),
2484
+ );
2485
+ for (let index = 0; index < completedResults.length; index += 1) {
2486
+ const claimed = entry.request.memberRequests[index]!;
2487
+ const request = claimed.request;
2488
+ input.settleRequest(claimed, completedResults[index]);
1919
2489
  }
1920
2490
  }
1921
- const settledMembers = chunkResults.reduce(
1922
- (total, entry) => total + entry.request.memberRequests.length,
1923
- 0,
1924
- );
1925
- if (settledMembers > 0) {
1926
- input.onRequestsSettled?.(settledMembers);
1927
- }
1928
2491
  },
1929
- }).catch((error) => {
1930
- for (const request of input.requests) {
1931
- request.reject(error);
2492
+ }).catch(async (error) => {
2493
+ let rejection: unknown = error;
2494
+ try {
2495
+ await input.failRequests(input.requests, error);
2496
+ } catch (receiptError) {
2497
+ rejection = new AggregateError(
2498
+ [error, receiptError],
2499
+ 'Tool call failed and durable receipts could not be marked failed',
2500
+ );
2501
+ }
2502
+ for (const claimed of input.requests) {
2503
+ input.rejectRequest(claimed, rejection);
1932
2504
  }
1933
2505
  });
1934
2506
  }
1935
2507
 
1936
- function isWorkerInlineCodeStep(
1937
- step: WorkerInlineWaterfallSpec['steps'][number],
1938
- ): step is Extract<
1939
- WorkerInlineWaterfallSpec['steps'][number],
1940
- { kind: 'code' }
1941
- > {
1942
- return step.kind === 'code';
1943
- }
1944
-
1945
- function extractWorkerInlineCodeStepValue(
1946
- output: string,
1947
- result: unknown,
1948
- ): unknown {
1949
- if (
1950
- result &&
1951
- typeof result === 'object' &&
1952
- !Array.isArray(result) &&
1953
- output in result
1954
- ) {
1955
- return (result as Record<string, unknown>)[output] ?? null;
1956
- }
1957
- return result ?? null;
1958
- }
1959
-
1960
2508
  function isCompletedWorkerFieldValue(value: unknown): boolean {
1961
2509
  return (
1962
2510
  value !== null &&
@@ -2042,9 +2590,6 @@ type WorkerConditionalStepResolver = {
2042
2590
 
2043
2591
  type WorkerStepProgramStep = {
2044
2592
  name: string;
2045
- recompute?: boolean;
2046
- recomputeOnError?: boolean;
2047
- staleAfterSeconds?: AuthoredStaleAfterSeconds;
2048
2593
  resolver:
2049
2594
  | WorkerStepResolver
2050
2595
  | WorkerConditionalStepResolver
@@ -2078,22 +2623,10 @@ type WorkerMapOptions = {
2078
2623
  onRowError?: 'isolate' | 'fail';
2079
2624
  };
2080
2625
 
2081
- function workerCellPoliciesFromSteps(
2082
- steps: readonly WorkerStepProgramStep[],
2083
- ): CellStalenessPolicyByField {
2084
- return cellPoliciesFromDatasetSteps(steps);
2085
- }
2086
-
2087
- function authoredWorkerCellPoliciesFromSteps(
2088
- steps: readonly WorkerStepProgramStep[],
2089
- ): AuthoredCellStalenessPolicyByField {
2090
- return authoredCellPoliciesFromDatasetSteps(steps);
2091
- }
2092
-
2093
2626
  /**
2094
2627
  * Per-cell terminal state recorded by map row execution and merged into the
2095
2628
  * Runtime Sheet row's `_cell_meta`. 'failed' carries the cell's error message;
2096
- * `shouldRecomputeCell` treats it as recompute on the next run.
2629
+ * call receipts, not cell meta, decide whether provider work is reused.
2097
2630
  */
2098
2631
  type WorkerCellMetaPatchEntry = {
2099
2632
  status: 'cached' | 'skipped' | 'completed' | 'failed';
@@ -2198,6 +2731,10 @@ async function executeWorkerStepProgram(
2198
2731
  let currentRow: Record<string, unknown> = cloneCsvAliasedRow(inputRow);
2199
2732
  for (const step of program.steps) {
2200
2733
  const stepPath = [...(recorder?.path ?? []), step.name];
2734
+ const stepId = stepPath.join('.');
2735
+ const stepOutputColumn = recorder
2736
+ ? stepProgramColumnName(recorder.parentField, stepId)
2737
+ : null;
2201
2738
  const runStep = async (): Promise<WorkerStepResolution> => {
2202
2739
  const resolution = await executeWorkerStepResolver(
2203
2740
  step.resolver,
@@ -2217,18 +2754,19 @@ async function executeWorkerStepProgram(
2217
2754
  ...(resolution.status ? { status: resolution.status } : {}),
2218
2755
  };
2219
2756
  };
2220
- const resolution = workflowStep
2221
- ? await (
2222
- workflowStep.do as unknown as (
2223
- name: string,
2224
- callback: () => Promise<WorkerStepResolution>,
2225
- ) => Promise<WorkerStepResolution>
2226
- )(stepPath.join('.'), runStep)
2227
- : await runStep();
2757
+ const executeStep = async (): Promise<WorkerStepResolution> =>
2758
+ workflowStep
2759
+ ? await (
2760
+ workflowStep.do as unknown as (
2761
+ name: string,
2762
+ callback: () => Promise<WorkerStepResolution>,
2763
+ ) => Promise<WorkerStepResolution>
2764
+ )(stepId, runStep)
2765
+ : await runStep();
2766
+ const resolution = await executeStep();
2228
2767
  const value = deserializeDurableStepValue(resolution.value);
2229
2768
  currentRow = cloneCsvAliasedRow(currentRow, { [step.name]: value });
2230
2769
  if (recorder) {
2231
- const stepId = stepPath.join('.');
2232
2770
  recorder.outputs.push({
2233
2771
  field: `${recorder.parentField}.${stepId}`,
2234
2772
  columnName: stepProgramColumnName(recorder.parentField, stepId),
@@ -2254,187 +2792,6 @@ function sqlishIdentifierPart(value: string): string {
2254
2792
  );
2255
2793
  }
2256
2794
 
2257
- async function executeWorkerWaterfall(
2258
- req: RunRequest,
2259
- recorder: RecordedWaterfallOutput[],
2260
- toolNameOrSpec: string | WorkerInlineWaterfallSpec,
2261
- input: Record<string, unknown>,
2262
- opts?: WorkerWaterfallOptions,
2263
- callbacks?: WorkerCtxCallbacks,
2264
- workflowStep?: WorkflowStep,
2265
- resolveToolContract?: WorkerPacingResolver,
2266
- ): Promise<unknown | null> {
2267
- const retrySafeTransientHttp = async (toolId: string): Promise<boolean> =>
2268
- (await resolveToolContract?.(toolId).catch(() => null))
2269
- ?.retrySafeTransientHttp === true;
2270
- // Inline-spec form
2271
- if (typeof toolNameOrSpec === 'object' && toolNameOrSpec) {
2272
- const spec = toolNameOrSpec;
2273
- const collected: Array<{ stepId: string; value: unknown }> = [];
2274
- for (const step of spec.steps) {
2275
- let result: unknown;
2276
- try {
2277
- if (isWorkerInlineCodeStep(step)) {
2278
- result = await step.run(input, {
2279
- tools: {
2280
- execute: async (request: unknown) => {
2281
- const args = normalizeToolExecuteArgs(request);
2282
- return await executeToolWithLifecycle(
2283
- req,
2284
- args,
2285
- workflowStep,
2286
- callbacks,
2287
- undefined,
2288
- undefined,
2289
- await retrySafeTransientHttp(args.toolId),
2290
- );
2291
- },
2292
- },
2293
- });
2294
- } else {
2295
- result = await executeToolWithLifecycle(
2296
- req,
2297
- {
2298
- id: step.id,
2299
- toolId: step.toolId,
2300
- input: step.mapInput(input),
2301
- },
2302
- workflowStep,
2303
- callbacks,
2304
- undefined,
2305
- undefined,
2306
- await retrySafeTransientHttp(step.toolId),
2307
- );
2308
- }
2309
- } catch {
2310
- continue;
2311
- }
2312
- if (isWorkerInlineCodeStep(step)) {
2313
- const direct = extractWorkerInlineCodeStepValue(spec.output, result);
2314
- if (direct != null && direct !== '' && resultHasContent(direct)) {
2315
- if (Array.isArray(direct)) {
2316
- for (const entry of direct)
2317
- collected.push({ stepId: step.id, value: entry });
2318
- } else {
2319
- collected.push({ stepId: step.id, value: direct });
2320
- }
2321
- if (collected.length >= spec.minResults) {
2322
- for (const hit of collected.slice(0, spec.minResults)) {
2323
- recorder.push({
2324
- waterfallId: spec.id,
2325
- stepId: hit.stepId,
2326
- value: hit.value,
2327
- });
2328
- }
2329
- const values = collected
2330
- .slice(0, spec.minResults)
2331
- .map((hit) => hit.value);
2332
- return spec.minResults === 1 ? (values[0] ?? null) : values;
2333
- }
2334
- }
2335
- continue;
2336
- }
2337
- // Short-circuit when the step produced a hit. We accept both:
2338
- // - top-level `output` field (most tools)
2339
- // - full non-empty result (fallback for older hand-authored specs)
2340
- if (isRecordLike(result)) {
2341
- const direct = result[spec.output];
2342
- if (direct != null && direct !== '' && resultHasContent(direct)) {
2343
- if (Array.isArray(direct)) {
2344
- for (const entry of direct)
2345
- collected.push({ stepId: step.id, value: entry });
2346
- } else {
2347
- collected.push({ stepId: step.id, value: direct });
2348
- }
2349
- if (collected.length >= spec.minResults) {
2350
- for (const hit of collected.slice(0, spec.minResults)) {
2351
- recorder.push({
2352
- waterfallId: spec.id,
2353
- stepId: hit.stepId,
2354
- value: hit.value,
2355
- });
2356
- }
2357
- const values = collected
2358
- .slice(0, spec.minResults)
2359
- .map((hit) => hit.value);
2360
- return spec.minResults === 1 ? (values[0] ?? null) : values;
2361
- }
2362
- continue;
2363
- }
2364
- if (spec.output in result) {
2365
- continue;
2366
- }
2367
- }
2368
- if (resultHasContent(result)) {
2369
- collected.push({ stepId: step.id, value: result });
2370
- if (collected.length >= spec.minResults) {
2371
- for (const hit of collected.slice(0, spec.minResults)) {
2372
- recorder.push({
2373
- waterfallId: spec.id,
2374
- stepId: hit.stepId,
2375
- value: hit.value,
2376
- });
2377
- }
2378
- const values = collected
2379
- .slice(0, spec.minResults)
2380
- .map((hit) => hit.value);
2381
- return spec.minResults === 1 ? (values[0] ?? null) : values;
2382
- }
2383
- }
2384
- }
2385
- return null;
2386
- }
2387
-
2388
- // Provider-list form
2389
- const toolName = String(toolNameOrSpec);
2390
- const providers = opts?.providers ?? [];
2391
- if (providers.length === 0) {
2392
- try {
2393
- return await executeToolWithLifecycle(
2394
- req,
2395
- { id: toolName, toolId: toolName, input },
2396
- workflowStep,
2397
- callbacks,
2398
- undefined,
2399
- undefined,
2400
- await retrySafeTransientHttp(toolName),
2401
- );
2402
- } catch {
2403
- return null;
2404
- }
2405
- }
2406
- let lastError: Error | null = null;
2407
- for (const provider of providers) {
2408
- try {
2409
- const result = await executeToolWithLifecycle(
2410
- req,
2411
- {
2412
- id: `${toolName}:${provider}`,
2413
- toolId: toolName,
2414
- input: { ...input, provider },
2415
- },
2416
- workflowStep,
2417
- callbacks,
2418
- undefined,
2419
- undefined,
2420
- await retrySafeTransientHttp(toolName),
2421
- );
2422
- if (resultHasContent(result)) {
2423
- recorder.push({
2424
- waterfallId: toolName,
2425
- stepId: provider,
2426
- value: result,
2427
- });
2428
- return result;
2429
- }
2430
- } catch (err) {
2431
- lastError = err as Error;
2432
- }
2433
- }
2434
- if (lastError && providers.length === 1) throw lastError;
2435
- return null;
2436
- }
2437
-
2438
2795
  async function hashJson(value: unknown): Promise<string> {
2439
2796
  const bytes = new TextEncoder().encode(canonicalizeJson(value));
2440
2797
  const digest = await crypto.subtle.digest('SHA-256', bytes);
@@ -3251,7 +3608,6 @@ async function prepareMapRows(input: {
3251
3608
  rows: Record<string, unknown>[];
3252
3609
  inputOffset: number;
3253
3610
  outputFields: string[];
3254
- cellPolicies?: CellStalenessPolicyByField;
3255
3611
  }): Promise<{
3256
3612
  inserted: number;
3257
3613
  skipped: number;
@@ -3273,7 +3629,6 @@ async function prepareMapRows(input: {
3273
3629
  }),
3274
3630
  rows,
3275
3631
  inputOffset: input.inputOffset,
3276
- cellPolicies: input.cellPolicies,
3277
3632
  });
3278
3633
  for (const timing of result.timings ?? []) {
3279
3634
  const phase =
@@ -3448,6 +3803,54 @@ type WorkerPacingResolver = (
3448
3803
  (ResolvedPacingPolicy & { retrySafeTransientHttp: boolean }) | null
3449
3804
  >;
3450
3805
 
3806
+ type WorkerToolActionCacheVersionResolver = (toolId: string) => Promise<string>;
3807
+
3808
+ function createWorkerToolActionCacheVersionResolver(
3809
+ req: RunRequest,
3810
+ ): WorkerToolActionCacheVersionResolver {
3811
+ const cache = new Map<string, Promise<string>>();
3812
+ return (toolId: string) => {
3813
+ const normalized = String(toolId || '').trim();
3814
+ if (!normalized) {
3815
+ return Promise.reject(
3816
+ new Error('Runtime tool metadata lookup requires a non-empty tool id.'),
3817
+ );
3818
+ }
3819
+ const cached = cache.get(normalized);
3820
+ if (cached) return cached;
3821
+ const promise = (async () => {
3822
+ const res = await fetchRuntimeApi(
3823
+ req.baseUrl,
3824
+ `/api/v2/plays/runtime-tools/${encodeURIComponent(normalized)}`,
3825
+ {
3826
+ method: 'GET',
3827
+ headers: { authorization: `Bearer ${req.executorToken}` },
3828
+ },
3829
+ );
3830
+ if (!res.ok) {
3831
+ throw new Error(
3832
+ `Runtime tool metadata lookup for ${normalized} failed (${res.status}): ${await res.text()}`,
3833
+ );
3834
+ }
3835
+ const body = (await res.json().catch(() => null)) as {
3836
+ actionCacheVersion?: unknown;
3837
+ } | null;
3838
+ const version =
3839
+ typeof body?.actionCacheVersion === 'string'
3840
+ ? body.actionCacheVersion.trim()
3841
+ : '';
3842
+ if (!version) {
3843
+ throw new Error(
3844
+ `Runtime tool metadata for ${normalized} is missing actionCacheVersion.`,
3845
+ );
3846
+ }
3847
+ return version;
3848
+ })();
3849
+ cache.set(normalized, promise);
3850
+ return promise;
3851
+ };
3852
+ }
3853
+
3451
3854
  function createWorkerPacingResolver(req: RunRequest): WorkerPacingResolver {
3452
3855
  const cache = new Map<
3453
3856
  string,
@@ -3592,6 +3995,8 @@ function createMinimalWorkerCtx(
3592
3995
  ): unknown {
3593
3996
  const { governor, resolvePacing: resolveToolPacing } =
3594
3997
  createGovernorForRun(req);
3998
+ const resolveToolActionCacheVersion =
3999
+ createWorkerToolActionCacheVersionResolver(req);
3595
4000
  // Play-call depth/count/per-parent budgets, child-play concurrency, and the
3596
4001
  // lineage snapshot are owned by the Governor (createGovernorForRun above).
3597
4002
  // The worker keeps only substrate mechanism here.
@@ -3645,8 +4050,9 @@ function createMinimalWorkerCtx(
3645
4050
  const executeWithRuntimeReceipt = async <T>(
3646
4051
  key: string,
3647
4052
  execute: () => Promise<T> | T,
3648
- repairRunningReceiptForSameRun = true,
4053
+ repairRunningReceiptForSameRun = false,
3649
4054
  reclaimRunning = false,
4055
+ repairRunningReceiptForSameRunAfterWaitTimeout = false,
3650
4056
  ): Promise<T> => {
3651
4057
  const serialized = await runWorkerRuntimeReceiptBoundary<unknown>({
3652
4058
  orgId: req.orgId,
@@ -3657,6 +4063,7 @@ function createMinimalWorkerCtx(
3657
4063
  execute: async () => serializeDurableStepValue(await execute()),
3658
4064
  repairRunningReceiptForSameRun,
3659
4065
  reclaimRunning,
4066
+ repairRunningReceiptForSameRunAfterWaitTimeout,
3660
4067
  });
3661
4068
  return deserializeDurableStepValue(serialized) as T;
3662
4069
  };
@@ -3681,31 +4088,31 @@ function createMinimalWorkerCtx(
3681
4088
  false,
3682
4089
  );
3683
4090
  };
3684
- const nextCtxStepReceiptKey = (name: string): string => {
4091
+ const nextCtxStepReceiptKey = (
4092
+ name: string,
4093
+ staleAfterSeconds?: number | null,
4094
+ ): string => {
3685
4095
  const count = stepCallCounts[name] ?? 0;
3686
4096
  stepCallCounts[name] = count + 1;
3687
- return count === 0 ? `step:${name}` : `step:${name}:${count}`;
3688
- };
3689
- const staleRuntimeSuffix = (staleAfterSeconds?: number): string => {
3690
- if (staleAfterSeconds === undefined) return '';
3691
- if (
3692
- !Number.isFinite(staleAfterSeconds) ||
3693
- !Number.isInteger(staleAfterSeconds) ||
3694
- staleAfterSeconds <= 0
3695
- ) {
3696
- throw new Error(
3697
- 'staleAfterSeconds must be a positive whole number of seconds.',
3698
- );
3699
- }
3700
- return `:stale:${staleAfterSeconds}:${Math.floor(nowMs() / (staleAfterSeconds * 1000))}`;
4097
+ return buildDurableCtxCallCacheKey({
4098
+ orgId: req.orgId,
4099
+ playId: req.playName,
4100
+ kind: 'step',
4101
+ id: name,
4102
+ semanticKey: count === 0 ? null : String(count),
4103
+ staleAfterSeconds,
4104
+ });
3701
4105
  };
3702
4106
  const rootToolBatchScheduler = new WorkerToolBatchScheduler(
3703
4107
  req,
3704
4108
  governor,
3705
4109
  resolveToolPacing,
4110
+ resolveToolActionCacheVersion,
3706
4111
  abortSignal,
3707
4112
  undefined,
3708
4113
  callbacks,
4114
+ receiptStore,
4115
+ true,
3709
4116
  );
3710
4117
  // Local ancestry chain that always ENDS with the currently-executing play
3711
4118
  // (req.playName). The /api/v2/plays/run lineage validator requires the
@@ -3737,8 +4144,6 @@ function createMinimalWorkerCtx(
3737
4144
  index: number,
3738
4145
  ) => Promise<unknown> | unknown)
3739
4146
  >,
3740
- cellPolicies?: CellStalenessPolicyByField,
3741
- authoredCellPolicies?: AuthoredCellStalenessPolicyByField,
3742
4147
  opts?: WorkerMapOptions,
3743
4148
  ): Promise<unknown> => {
3744
4149
  const mapStartedAt = nowMs();
@@ -3925,6 +4330,11 @@ function createMinimalWorkerCtx(
3925
4330
 
3926
4331
  let totalRowsWritten = 0;
3927
4332
 
4333
+ const volatileWorkflowChunkRows = new Map<
4334
+ number,
4335
+ Array<T & Record<string, unknown>>
4336
+ >();
4337
+
3928
4338
  const processChunk = async (
3929
4339
  chunkRows: T[],
3930
4340
  chunkStart: number,
@@ -3949,7 +4359,6 @@ function createMinimalWorkerCtx(
3949
4359
  req,
3950
4360
  tableNamespace: name,
3951
4361
  outputFields,
3952
- cellPolicies,
3953
4362
  rows: chunkEntries.map(({ row, rowKey }) => ({
3954
4363
  ...row,
3955
4364
  ...mapRowOutcomeRuntimeFields({ key: rowKey }),
@@ -4134,8 +4543,12 @@ function createMinimalWorkerCtx(
4134
4543
  req,
4135
4544
  governor,
4136
4545
  resolveToolPacing,
4546
+ resolveToolActionCacheVersion,
4137
4547
  abortSignal,
4138
4548
  reportSettledToolRequests,
4549
+ callbacks,
4550
+ receiptStore,
4551
+ false,
4139
4552
  );
4140
4553
  const generatedOutputFields = new Set<string>();
4141
4554
  const persistedExecutedIndexes = new Set<number>();
@@ -4296,8 +4709,8 @@ function createMinimalWorkerCtx(
4296
4709
  const fieldOutputs: Record<string, unknown> = {};
4297
4710
  const cellMetaPatch: Record<string, WorkerCellMetaPatchEntry> =
4298
4711
  {};
4299
- const waterfallOutputs: RecordedWaterfallOutput[] = [];
4300
4712
  const stepProgramOutputs: RecordedStepProgramOutput[] = [];
4713
+ let activeField: string | null = null;
4301
4714
  const rowCtx = {
4302
4715
  ...(ctx as Record<string, unknown>),
4303
4716
  tools: {
@@ -4311,26 +4724,14 @@ function createMinimalWorkerCtx(
4311
4724
  request.toolId,
4312
4725
  request.input,
4313
4726
  workflowStep,
4727
+ {
4728
+ force: request.force === true,
4729
+ staleAfterSeconds: request.staleAfterSeconds,
4730
+ },
4314
4731
  );
4315
4732
  },
4316
4733
  },
4317
- waterfall: (
4318
- toolNameOrSpec: string | WorkerInlineWaterfallSpec,
4319
- waterfallInput: Record<string, unknown>,
4320
- waterfallOpts?: WorkerWaterfallOptions,
4321
- ) =>
4322
- executeWorkerWaterfall(
4323
- req,
4324
- waterfallOutputs,
4325
- toolNameOrSpec,
4326
- waterfallInput,
4327
- waterfallOpts,
4328
- callbacks,
4329
- workflowStep,
4330
- resolveToolPacing,
4331
- ),
4332
4734
  };
4333
- let activeField: string | null = null;
4334
4735
  try {
4335
4736
  for (const [key, value] of fieldEntries) {
4336
4737
  activeField = key;
@@ -4344,19 +4745,6 @@ function createMinimalWorkerCtx(
4344
4745
  >
4345
4746
  )[key]
4346
4747
  : null;
4347
- const reuseDecision = shouldRecomputeCell({
4348
- hasValue: isCompletedWorkerFieldValue(enriched[key]),
4349
- meta:
4350
- rawCellMeta && typeof rawCellMeta === 'object'
4351
- ? (rawCellMeta as {
4352
- status?: string;
4353
- completedAt?: number;
4354
- staleAt?: number | null;
4355
- staleAfterSeconds?: number | null;
4356
- })
4357
- : null,
4358
- policy: cellPolicies?.[key],
4359
- });
4360
4748
  const previousCell = previousCellFromValue({
4361
4749
  hasValue: isCompletedWorkerFieldValue(enriched[key]),
4362
4750
  value: enriched[key],
@@ -4370,15 +4758,6 @@ function createMinimalWorkerCtx(
4370
4758
  })
4371
4759
  : null,
4372
4760
  });
4373
- if (reuseDecision.action === 'reuse') {
4374
- cellMetaPatch[key] = {
4375
- status: 'cached',
4376
- stage: key,
4377
- reused: true,
4378
- runId: req.runId,
4379
- };
4380
- continue;
4381
- }
4382
4761
  const resolved = await executeWorkerStepResolver(
4383
4762
  value,
4384
4763
  enriched,
@@ -4402,18 +4781,11 @@ function createMinimalWorkerCtx(
4402
4781
  runId: req.runId,
4403
4782
  };
4404
4783
  } else {
4405
- const completedAt = nowMs();
4406
- const stalenessMeta = resolveCompletedCellStalenessMeta({
4407
- policy: authoredCellPolicies?.[key],
4408
- value: resolved.value,
4409
- completedAt,
4410
- });
4411
4784
  cellMetaPatch[key] = {
4412
4785
  status: 'completed',
4413
4786
  stage: key,
4414
4787
  runId: req.runId,
4415
- completedAt,
4416
- ...stalenessMeta,
4788
+ completedAt: nowMs(),
4417
4789
  };
4418
4790
  }
4419
4791
  activeField = null;
@@ -4430,13 +4802,6 @@ function createMinimalWorkerCtx(
4430
4802
  };
4431
4803
  }
4432
4804
  }
4433
- for (const waterfallOutput of waterfallOutputs) {
4434
- const columnName =
4435
- `${sqlishIdentifierPart(waterfallOutput.waterfallId)}__` +
4436
- sqlishIdentifierPart(waterfallOutput.stepId);
4437
- enriched[columnName] = waterfallOutput.value;
4438
- generatedOutputFields.add(columnName);
4439
- }
4440
4805
  executedCellMetaPatches[myIndex] =
4441
4806
  Object.keys(cellMetaPatch).length > 0
4442
4807
  ? cellMetaPatch
@@ -4645,6 +5010,17 @@ function createMinimalWorkerCtx(
4645
5010
  const publicOut = out.map((row) => publicCsvOutputRow(row));
4646
5011
  const hashStartedAt = nowMs();
4647
5012
  const hash = await hashJson(publicOut);
5013
+ const includeCachedRowsInChunkResult = !workflowStep;
5014
+ if (
5015
+ workflowStep &&
5016
+ canCacheRows &&
5017
+ out.length <= WORKER_DATASET_IN_MEMORY_ROWS
5018
+ ) {
5019
+ volatileWorkflowChunkRows.set(
5020
+ chunkIndex,
5021
+ serializeDurableStepValue(out),
5022
+ );
5023
+ }
4648
5024
  recordRunnerPerfTrace({
4649
5025
  req,
4650
5026
  phase: 'runner.map_chunk.hash',
@@ -4680,8 +5056,13 @@ function createMinimalWorkerCtx(
4680
5056
  rowFailureSamples,
4681
5057
  outputDatasetId: `map:${name}`,
4682
5058
  hash,
5059
+ // Runtime Sheet owns the full row payloads. Native Workflow step
5060
+ // storage may keep only the bounded preview sample; same-run play code
5061
+ // that needs more rows uses volatileWorkflowChunkRows, which is not
5062
+ // part of the persisted step result.
4683
5063
  preview: toWorkflowSerializableValue(publicOut.slice(0, 5)),
4684
5064
  cachedRows:
5065
+ includeCachedRowsInChunkResult &&
4685
5066
  out.length <= WORKER_DATASET_IN_MEMORY_ROWS
4686
5067
  ? serializeDurableStepValue(out)
4687
5068
  : undefined,
@@ -4724,6 +5105,25 @@ function createMinimalWorkerCtx(
4724
5105
  )) as WorkerMapChunkSummary<T & Record<string, unknown>>;
4725
5106
  };
4726
5107
 
5108
+ const readPersistedRows = async (input: {
5109
+ limit: number;
5110
+ offset: number;
5111
+ }) => {
5112
+ const result = await harnessReadSheetDatasetRows({
5113
+ baseUrl: req.baseUrl,
5114
+ executorToken: req.executorToken,
5115
+ orgId: req.orgId,
5116
+ playName: req.playName,
5117
+ tableNamespace: name,
5118
+ runId: req.runId,
5119
+ limit: input.limit,
5120
+ offset: input.offset,
5121
+ userEmail: req.userEmail,
5122
+ preloadedDbSessions: req.preloadedDbSessions ?? null,
5123
+ });
5124
+ return result.rows as Array<T & Record<string, unknown>>;
5125
+ };
5126
+
4727
5127
  const finalize = (totalRowsWritten: number) => {
4728
5128
  const failureSampleSummary =
4729
5129
  totalRowFailureSamples.length > 0
@@ -4761,23 +5161,15 @@ function createMinimalWorkerCtx(
4761
5161
  playName: req.playName,
4762
5162
  name,
4763
5163
  count: totalRowsWritten,
5164
+ // In native Workflows, chunk summaries intentionally omit full row
5165
+ // payloads, so this preview only contains the bounded chunk samples.
5166
+ // Do not synchronously page rows back here: service-binding reads have
5167
+ // no abort handle and can keep an otherwise completed Workflow request
5168
+ // alive until Cloudflare cancels it as hung. The dataset handle remains
5169
+ // page-backed through readRows/export.
4764
5170
  previewRows,
4765
5171
  cachedRows: canCacheRows ? cachedRows : null,
4766
- readRows: async ({ limit, offset }) => {
4767
- const result = await harnessReadSheetDatasetRows({
4768
- baseUrl: req.baseUrl,
4769
- executorToken: req.executorToken,
4770
- orgId: req.orgId,
4771
- playName: req.playName,
4772
- tableNamespace: name,
4773
- runId: req.runId,
4774
- limit,
4775
- offset,
4776
- userEmail: req.userEmail,
4777
- preloadedDbSessions: req.preloadedDbSessions ?? null,
4778
- });
4779
- return result.rows as Array<T & Record<string, unknown>>;
4780
- },
5172
+ readRows: readPersistedRows,
4781
5173
  trace: (phase, ms, extra) =>
4782
5174
  recordRunnerPerfTrace({ req, phase, ms, extra }),
4783
5175
  nowMs,
@@ -4845,7 +5237,9 @@ function createMinimalWorkerCtx(
4845
5237
  );
4846
5238
  }
4847
5239
  if (canCacheRows) {
4848
- const nextRows = chunkResult.cachedRows ?? [];
5240
+ const volatileRows = volatileWorkflowChunkRows.get(chunkIndex);
5241
+ volatileWorkflowChunkRows.delete(chunkIndex);
5242
+ const nextRows = chunkResult.cachedRows ?? volatileRows ?? [];
4849
5243
  if (
4850
5244
  nextRows.length === chunkResult.rowsWritten &&
4851
5245
  cachedRows.length + nextRows.length <= WORKER_DATASET_IN_MEMORY_ROWS
@@ -4853,6 +5247,7 @@ function createMinimalWorkerCtx(
4853
5247
  cachedRows.push(...nextRows);
4854
5248
  } else {
4855
5249
  cachedRows.length = 0;
5250
+ volatileWorkflowChunkRows.clear();
4856
5251
  canCacheRows = false;
4857
5252
  }
4858
5253
  }
@@ -4904,7 +5299,7 @@ function createMinimalWorkerCtx(
4904
5299
  // step when no row otherwise succeeded) are summarized and registered as
4905
5300
  // a recovered dataset — the failed run then advertises a WORKING export
4906
5301
  // instead of a dead end (#15/#27). The run still fails (the throw below).
4907
- finalize(totalRowsWritten);
5302
+ await finalize(totalRowsWritten);
4908
5303
  const firstError = totalRowFailureSamples[0]?.error ?? 'unknown error';
4909
5304
  throw new Error(
4910
5305
  `ctx.dataset("${name}") failed for all ${totalRowsFailed} executed rows. ` +
@@ -4912,7 +5307,7 @@ function createMinimalWorkerCtx(
4912
5307
  `(rows are persisted with per-row errors; fix the cause and re-run to resume)`,
4913
5308
  );
4914
5309
  }
4915
- const dataset = finalize(totalRowsWritten);
5310
+ const dataset = await finalize(totalRowsWritten);
4916
5311
  recordRunnerPerfTrace({
4917
5312
  req,
4918
5313
  phase: 'runner.map.total',
@@ -4944,18 +5339,7 @@ function createMinimalWorkerCtx(
4944
5339
  const fields = Object.fromEntries(
4945
5340
  program.steps.map((step) => [step.name, step.resolver]),
4946
5341
  );
4947
- const cellPolicies = workerCellPoliciesFromSteps(program.steps);
4948
- const authoredCellPolicies = authoredWorkerCellPoliciesFromSteps(
4949
- program.steps,
4950
- );
4951
- return runMap(
4952
- this.name,
4953
- this.rows,
4954
- fields,
4955
- cellPolicies,
4956
- authoredCellPolicies,
4957
- opts,
4958
- );
5342
+ return runMap(this.name, this.rows, fields, opts);
4959
5343
  },
4960
5344
  {
4961
5345
  emptyColumnName:
@@ -5022,7 +5406,7 @@ function createMinimalWorkerCtx(
5022
5406
  throw new Error('ctx.step(name, callback) requires a name.');
5023
5407
  }
5024
5408
  return await executeWithWorkflowStep(
5025
- `${nextCtxStepReceiptKey(normalizedName)}${staleRuntimeSuffix(options?.staleAfterSeconds)}`,
5409
+ nextCtxStepReceiptKey(normalizedName, options?.staleAfterSeconds),
5026
5410
  callback,
5027
5411
  );
5028
5412
  },
@@ -5044,8 +5428,8 @@ function createMinimalWorkerCtx(
5044
5428
  });
5045
5429
  }
5046
5430
  // Static pipeline JS blocks already execute inside a Workflow step.
5047
- // Wrapping each generated waterfall step in another step.do can leave
5048
- // Workers preview runs parked after the last provider callback.
5431
+ // Wrapping each generated step in another step.do can leave Workers
5432
+ // preview runs parked after the last provider callback.
5049
5433
  return (await executeWorkerStepProgram(
5050
5434
  program,
5051
5435
  input,
@@ -5175,18 +5559,7 @@ function createMinimalWorkerCtx(
5175
5559
  const fields = Object.fromEntries(
5176
5560
  fieldsDef.steps.map((step) => [step.name, step.resolver]),
5177
5561
  );
5178
- const cellPolicies = workerCellPoliciesFromSteps(fieldsDef.steps);
5179
- const authoredCellPolicies = authoredWorkerCellPoliciesFromSteps(
5180
- fieldsDef.steps,
5181
- );
5182
- return runMap(
5183
- name,
5184
- rows,
5185
- fields,
5186
- cellPolicies,
5187
- authoredCellPolicies,
5188
- opts,
5189
- );
5562
+ return runMap(name, rows, fields, opts);
5190
5563
  }
5191
5564
  throw new Error(
5192
5565
  'ctx.dataset(key, rows, fields, options) is not supported. Use ctx.dataset(key, rows).withColumn(...).run(options).',
@@ -5202,55 +5575,18 @@ function createMinimalWorkerCtx(
5202
5575
  assertNotAborted(abortSignal);
5203
5576
  const request = normalizeToolExecuteArgs(requestArg);
5204
5577
  assertNoSecretTaint(request.input, 'ctx.tools.execute input');
5205
- return await executeWithRuntimeReceipt(
5206
- `tool:${request.id}:${deriveToolRequestIdentity({
5207
- toolId: request.toolId,
5208
- requestInput: request.input,
5209
- })}${staleRuntimeSuffix(request.staleAfterSeconds)}`,
5210
- () =>
5211
- rootToolBatchScheduler.execute(
5212
- request.id,
5213
- request.toolId,
5214
- request.input,
5215
- workflowStep,
5216
- ),
5578
+ return await rootToolBatchScheduler.execute(
5579
+ request.id,
5580
+ request.toolId,
5581
+ request.input,
5582
+ workflowStep,
5583
+ {
5584
+ force: request.force === true,
5585
+ staleAfterSeconds: request.staleAfterSeconds,
5586
+ },
5217
5587
  );
5218
5588
  },
5219
5589
  },
5220
- /**
5221
- * Two shapes:
5222
- *
5223
- * 1. ctx.waterfall(toolName, input, { providers }) — provider-fallback
5224
- * tool call. Iterates providers in order; returns first non-empty
5225
- * result, null on cumulative miss.
5226
- *
5227
- * 2. ctx.waterfall(spec, input) — inline waterfall
5228
- * with a structured spec ({ id, output, minResults, steps[] }). For
5229
- * each step, calls step.toolId with step.mapInput(input); if the
5230
- * result carries a non-null value at spec.output (or `step.<id>.<output>`),
5231
- * short-circuits. Returns the first hit's full response, or null.
5232
- *
5233
- * Min-results gating across rows / extract_js / run_if_js are intentionally
5234
- * NOT replicated here — the harness gives correctness for the common path
5235
- * first; richer semantics come when we port the shared_libs batch runtime
5236
- * onto the dedup DO.
5237
- */
5238
- async waterfall(
5239
- toolNameOrSpec: string | WorkerInlineWaterfallSpec,
5240
- input: Record<string, unknown>,
5241
- opts?: WorkerWaterfallOptions,
5242
- ): Promise<unknown | null> {
5243
- return executeWorkerWaterfall(
5244
- req,
5245
- [],
5246
- toolNameOrSpec,
5247
- input,
5248
- opts,
5249
- callbacks,
5250
- workflowStep,
5251
- resolveToolPacing,
5252
- );
5253
- },
5254
5590
  async sleep(ms: number): Promise<void> {
5255
5591
  assertNotAborted(abortSignal);
5256
5592
  const durationMs = Math.max(0, Math.round(Number(ms) || 0));
@@ -5285,152 +5621,210 @@ function createMinimalWorkerCtx(
5285
5621
  if (!resolvedName) {
5286
5622
  throw new Error('ctx.runPlay(...) requires a resolvable play name.');
5287
5623
  }
5288
- const receiptKey = `runPlay:${normalizedKey}:${await hashJson({
5289
- childPlayName: resolvedName,
5290
- input,
5291
- })}${staleRuntimeSuffix(options?.staleAfterSeconds)}`;
5292
- return await executeWithRuntimeReceipt(receiptKey, async () => {
5293
- // The Governor owns the play-call lineage: forkChild does the cycle
5294
- // guard, depth/per-parent/playCall/descendant budget charges, and
5295
- // returns the snapshot to thread into the child so budgets accumulate
5296
- // across isolates. Charged inside the receipt boundary so a replay
5297
- // (cache hit) never double-charges.
5298
- const childRunId = `${req.runId}:child:${normalizedKey}`;
5299
- const childGovernance = governor.forkChild({
5624
+ const receiptKey = buildDurableCtxCallCacheKey({
5625
+ orgId: req.orgId,
5626
+ playId: req.playName,
5627
+ kind: 'runPlay',
5628
+ id: normalizedKey,
5629
+ semanticKey: await hashJson({
5300
5630
  childPlayName: resolvedName,
5301
- childRunId,
5302
- });
5303
- const nextDepth = childGovernance.callDepth;
5304
- const nextParentCalls =
5305
- governor.snapshot().parentChildCalls[req.playName] ?? 0;
5631
+ input,
5632
+ }),
5633
+ staleAfterSeconds: options?.staleAfterSeconds,
5634
+ });
5635
+ return await executeWithRuntimeReceipt(
5636
+ receiptKey,
5637
+ async () => {
5638
+ // The Governor owns the play-call lineage: forkChild does the cycle
5639
+ // guard, depth/per-parent/playCall/descendant budget charges, and
5640
+ // returns the snapshot to thread into the child so budgets accumulate
5641
+ // across isolates. Charged inside the receipt boundary so a replay
5642
+ // (cache hit) never double-charges.
5643
+ const childRunId = `${req.runId}:child:${normalizedKey}`;
5644
+ const childGovernance = governor.forkChild({
5645
+ childPlayName: resolvedName,
5646
+ childRunId,
5647
+ });
5648
+ const nextDepth = childGovernance.callDepth;
5649
+ const nextParentCalls =
5650
+ governor.snapshot().parentChildCalls[req.playName] ?? 0;
5306
5651
 
5307
- emitEvent({
5308
- type: 'log',
5309
- level: 'info',
5310
- message: `Starting child play ${resolvedName} (${normalizedKey})`,
5311
- ts: nowMs(),
5312
- });
5313
- const childManifest = req.childPlayManifests?.[resolvedName];
5314
- if (!childManifest) {
5315
- throw new Error(
5316
- `ctx.runPlay(${normalizedKey}) cannot start ${resolvedName}: missing trusted Cloudflare child manifest from top-level submit.`,
5652
+ emitEvent({
5653
+ type: 'log',
5654
+ level: 'info',
5655
+ message: `Starting child play ${resolvedName} (${normalizedKey})`,
5656
+ ts: nowMs(),
5657
+ });
5658
+ const childManifest = req.childPlayManifests?.[resolvedName];
5659
+ if (!childManifest) {
5660
+ throw new Error(
5661
+ `ctx.runPlay(${normalizedKey}) cannot start ${resolvedName}: missing trusted Cloudflare child manifest from top-level submit.`,
5662
+ );
5663
+ }
5664
+ const childIsDatasetBacked = childPipelineUsesCtxDataset(
5665
+ childManifest.staticPipeline,
5317
5666
  );
5318
- }
5319
- const childIsDatasetBacked = childPipelineUsesCtxDataset(
5320
- childManifest.staticPipeline,
5321
- );
5322
- const childNeedsWorkflowScheduler = childPipelineNeedsWorkflowScheduler(
5323
- childManifest.staticPipeline,
5324
- );
5325
- console.info('[play.runtime.span]', {
5326
- event: 'play.runtime.span',
5327
- phase: 'child_route',
5328
- runId: req.runId,
5329
- parentRunId: req.runId,
5330
- playName: resolvedName,
5331
- graphHash: req.graphHash ?? null,
5332
- depth: nextDepth,
5333
- fanoutIndex: nextParentCalls - 1,
5334
- childIsDatasetBacked,
5335
- childNeedsWorkflowScheduler,
5336
- hasStaticPipeline: Boolean(childManifest.staticPipeline),
5337
- childTableNamespace:
5338
- typeof childManifest.staticPipeline?.tableNamespace === 'string'
5339
- ? childManifest.staticPipeline.tableNamespace
5667
+ const childNeedsWorkflowScheduler =
5668
+ childPipelineNeedsWorkflowScheduler(childManifest.staticPipeline);
5669
+ console.info('[play.runtime.span]', {
5670
+ event: 'play.runtime.span',
5671
+ phase: 'child_route',
5672
+ runId: req.runId,
5673
+ parentRunId: req.runId,
5674
+ playName: resolvedName,
5675
+ graphHash: req.graphHash ?? null,
5676
+ depth: nextDepth,
5677
+ fanoutIndex: nextParentCalls - 1,
5678
+ childIsDatasetBacked,
5679
+ childNeedsWorkflowScheduler,
5680
+ hasStaticPipeline: Boolean(childManifest.staticPipeline),
5681
+ childTableNamespace:
5682
+ typeof childManifest.staticPipeline?.tableNamespace === 'string'
5683
+ ? childManifest.staticPipeline.tableNamespace
5684
+ : null,
5685
+ childStageCount: Array.isArray(childManifest.staticPipeline?.stages)
5686
+ ? childManifest.staticPipeline.stages.length
5687
+ : null,
5688
+ childSubstepCount: Array.isArray(
5689
+ childManifest.staticPipeline?.substeps,
5690
+ )
5691
+ ? childManifest.staticPipeline.substeps.length
5340
5692
  : null,
5341
- childStageCount: Array.isArray(childManifest.staticPipeline?.stages)
5342
- ? childManifest.staticPipeline.stages.length
5343
- : null,
5344
- childSubstepCount: Array.isArray(
5345
- childManifest.staticPipeline?.substeps,
5346
- )
5347
- ? childManifest.staticPipeline.substeps.length
5348
- : null,
5349
- });
5350
- let childPlaySlot: { release(): void } | null = null;
5351
- try {
5352
- childPlaySlot = await governor.acquireChildPlaySlot({
5353
- signal: abortSignal,
5354
5693
  });
5355
- const childSubmitStartedAt = nowMs();
5356
- let started: {
5357
- workflowId?: string;
5358
- runId?: string;
5359
- status?: string;
5360
- mode?: string;
5361
- output?: unknown;
5362
- result?: unknown;
5363
- error?: unknown;
5364
- logs?: string[];
5365
- timings?: Array<{ phase: string; ms: number }>;
5366
- };
5694
+ let childPlaySlot: { release(): void } | null = null;
5367
5695
  try {
5368
- started = await submitChildPlayThroughCoordinator({
5369
- req,
5370
- coordinatorBinding: cachedCoordinatorBinding,
5371
- makeRequestId,
5372
- coordinatorRequestHeaders,
5373
- allowInline:
5374
- options?.timeoutMs == null && !childNeedsWorkflowScheduler,
5375
- body: {
5376
- name: resolvedName,
5377
- input: isRecord(input) ? input : {},
5378
- orgId: req.orgId,
5379
- callbackBaseUrl: req.callbackUrl,
5380
- baseUrl: req.baseUrl,
5381
- parentExecutorToken: req.executorToken,
5382
- userEmail: req.userEmail ?? '',
5383
- profile: 'workers_edge',
5384
- manifest: childManifest,
5385
- childPlayManifests: req.childPlayManifests ?? null,
5386
- internalRunPlay: {
5387
- rootRunId,
5388
- parentRunId: req.runId,
5389
- parentPlayName: req.playName,
5696
+ childPlaySlot = await governor.acquireChildPlaySlot({
5697
+ signal: abortSignal,
5698
+ });
5699
+ const childSubmitStartedAt = nowMs();
5700
+ let started: {
5701
+ workflowId?: string;
5702
+ runId?: string;
5703
+ status?: string;
5704
+ mode?: string;
5705
+ output?: unknown;
5706
+ result?: unknown;
5707
+ error?: unknown;
5708
+ logs?: string[];
5709
+ timings?: Array<{ phase: string; ms: number }>;
5710
+ };
5711
+ try {
5712
+ started = await submitChildPlayThroughCoordinator({
5713
+ req,
5714
+ coordinatorBinding: cachedCoordinatorBinding,
5715
+ makeRequestId,
5716
+ coordinatorRequestHeaders,
5717
+ allowInline:
5718
+ options?.timeoutMs == null && !childNeedsWorkflowScheduler,
5719
+ body: {
5720
+ name: resolvedName,
5721
+ input: isRecord(input) ? input : {},
5722
+ orgId: req.orgId,
5723
+ callbackBaseUrl: req.callbackUrl,
5724
+ baseUrl: req.baseUrl,
5725
+ parentExecutorToken: req.executorToken,
5726
+ userEmail: req.userEmail ?? '',
5727
+ profile: 'workers_edge',
5728
+ manifest: childManifest,
5729
+ childPlayManifests: req.childPlayManifests ?? null,
5730
+ internalRunPlay: {
5731
+ rootRunId,
5732
+ parentRunId: req.runId,
5733
+ parentPlayName: req.playName,
5734
+ key: normalizedKey,
5735
+ // Per the lineage validator: ancestry tail must equal the
5736
+ // executor token's play name (the parent making this call).
5737
+ ancestryPlayIds,
5738
+ callDepth: nextDepth,
5739
+ // Cumulative lineage-global budget counters (incl. this
5740
+ // launch's play/descendant charges) so the child seeds its
5741
+ // budgets from the lineage total instead of resetting to 0 in
5742
+ // its isolate. Threading descendantCount in particular keeps
5743
+ // fan-out descendant accounting lineage-global, matching cjs.
5744
+ playCallCount: childGovernance.playCallCount,
5745
+ toolCallCount: childGovernance.toolCallCount,
5746
+ retryCount: childGovernance.retryCount,
5747
+ descendantCount: childGovernance.descendantCount,
5748
+ waterfallStepExecutions:
5749
+ childGovernance.waterfallStepExecutions,
5750
+ description:
5751
+ typeof options?.description === 'string'
5752
+ ? options.description
5753
+ : null,
5754
+ },
5755
+ },
5756
+ });
5757
+ } catch (error) {
5758
+ console.info('[play.runtime.span]', {
5759
+ event: 'play.runtime.span',
5760
+ phase: 'child_submit',
5761
+ runId: req.runId,
5762
+ parentRunId: req.runId,
5763
+ playName: resolvedName,
5764
+ graphHash: req.graphHash ?? null,
5765
+ depth: nextDepth,
5766
+ fanoutIndex: nextParentCalls - 1,
5767
+ ms: nowMs() - childSubmitStartedAt,
5768
+ status: 'failed',
5769
+ errorCode: 'CHILD_SUBMIT_FAILED',
5770
+ });
5771
+ recordRunnerPerfTrace({
5772
+ req,
5773
+ phase: 'ctx_run_play.child_submit',
5774
+ ms: nowMs() - childSubmitStartedAt,
5775
+ extra: {
5776
+ status: 'failed',
5777
+ errorCode: 'CHILD_SUBMIT_FAILED',
5778
+ playName: resolvedName,
5390
5779
  key: normalizedKey,
5391
- // Per the lineage validator: ancestry tail must equal the
5392
- // executor token's play name (the parent making this call).
5393
- ancestryPlayIds,
5394
- callDepth: nextDepth,
5395
- // Cumulative lineage-global budget counters (incl. this
5396
- // launch's play/descendant charges) so the child seeds its
5397
- // budgets from the lineage total instead of resetting to 0 in
5398
- // its isolate. Threading descendantCount in particular keeps
5399
- // fan-out descendant accounting lineage-global, matching cjs.
5400
- playCallCount: childGovernance.playCallCount,
5401
- toolCallCount: childGovernance.toolCallCount,
5402
- retryCount: childGovernance.retryCount,
5403
- descendantCount: childGovernance.descendantCount,
5404
- waterfallStepExecutions:
5405
- childGovernance.waterfallStepExecutions,
5406
- description:
5407
- typeof options?.description === 'string'
5408
- ? options.description
5409
- : null,
5780
+ depth: nextDepth,
5781
+ fanoutIndex: nextParentCalls - 1,
5782
+ childIsDatasetBacked,
5783
+ childNeedsWorkflowScheduler,
5410
5784
  },
5411
- },
5412
- });
5413
- } catch (error) {
5785
+ });
5786
+ throw error;
5787
+ }
5788
+ const workflowId = started.workflowId ?? started.runId;
5789
+ if (!workflowId) {
5790
+ const startedError = isRecord(started.error)
5791
+ ? started.error
5792
+ : { message: started.error };
5793
+ const startedErrorMessage =
5794
+ typeof startedError.message === 'string' &&
5795
+ startedError.message.trim()
5796
+ ? startedError.message.trim()
5797
+ : null;
5798
+ throw new Error(
5799
+ startedErrorMessage ??
5800
+ `ctx.runPlay(${normalizedKey}) did not receive a child workflow id.`,
5801
+ );
5802
+ }
5414
5803
  console.info('[play.runtime.span]', {
5415
5804
  event: 'play.runtime.span',
5416
5805
  phase: 'child_submit',
5417
5806
  runId: req.runId,
5418
5807
  parentRunId: req.runId,
5808
+ childRunId: workflowId,
5419
5809
  playName: resolvedName,
5420
5810
  graphHash: req.graphHash ?? null,
5421
5811
  depth: nextDepth,
5422
5812
  fanoutIndex: nextParentCalls - 1,
5423
5813
  ms: nowMs() - childSubmitStartedAt,
5424
- status: 'failed',
5425
- errorCode: 'CHILD_SUBMIT_FAILED',
5814
+ status: 'ok',
5426
5815
  });
5427
5816
  recordRunnerPerfTrace({
5428
5817
  req,
5429
5818
  phase: 'ctx_run_play.child_submit',
5430
5819
  ms: nowMs() - childSubmitStartedAt,
5431
5820
  extra: {
5432
- status: 'failed',
5433
- errorCode: 'CHILD_SUBMIT_FAILED',
5821
+ status: 'ok',
5822
+ childRunId: workflowId,
5823
+ startedStatus: started.status ?? null,
5824
+ mode: started.mode ?? null,
5825
+ coordinatorTimings: Array.isArray(started.timings)
5826
+ ? started.timings
5827
+ : null,
5434
5828
  playName: resolvedName,
5435
5829
  key: normalizedKey,
5436
5830
  depth: nextDepth,
@@ -5439,118 +5833,100 @@ function createMinimalWorkerCtx(
5439
5833
  childNeedsWorkflowScheduler,
5440
5834
  },
5441
5835
  });
5442
- throw error;
5443
- }
5444
- const workflowId = started.workflowId ?? started.runId;
5445
- if (!workflowId) {
5446
- const startedError = isRecord(started.error)
5447
- ? started.error
5448
- : { message: started.error };
5449
- const startedErrorMessage =
5450
- typeof startedError.message === 'string' &&
5451
- startedError.message.trim()
5452
- ? startedError.message.trim()
5453
- : null;
5454
- throw new Error(
5455
- startedErrorMessage ??
5456
- `ctx.runPlay(${normalizedKey}) did not receive a child workflow id.`,
5457
- );
5458
- }
5459
- console.info('[play.runtime.span]', {
5460
- event: 'play.runtime.span',
5461
- phase: 'child_submit',
5462
- runId: req.runId,
5463
- parentRunId: req.runId,
5464
- childRunId: workflowId,
5465
- playName: resolvedName,
5466
- graphHash: req.graphHash ?? null,
5467
- depth: nextDepth,
5468
- fanoutIndex: nextParentCalls - 1,
5469
- ms: nowMs() - childSubmitStartedAt,
5470
- status: 'ok',
5471
- });
5472
- recordRunnerPerfTrace({
5473
- req,
5474
- phase: 'ctx_run_play.child_submit',
5475
- ms: nowMs() - childSubmitStartedAt,
5476
- extra: {
5477
- status: 'ok',
5478
- childRunId: workflowId,
5479
- startedStatus: started.status ?? null,
5480
- mode: started.mode ?? null,
5481
- coordinatorTimings: Array.isArray(started.timings)
5482
- ? started.timings
5483
- : null,
5484
- playName: resolvedName,
5485
- key: normalizedKey,
5486
- depth: nextDepth,
5487
- fanoutIndex: nextParentCalls - 1,
5488
- childIsDatasetBacked,
5489
- childNeedsWorkflowScheduler,
5490
- },
5491
- });
5492
- const startedStatus = String(started.status ?? '').toLowerCase();
5493
- if (startedStatus === 'completed') {
5494
- emitEvent({
5495
- type: 'log',
5496
- level: 'info',
5497
- message: `Completed child play ${resolvedName} (${normalizedKey})`,
5498
- ts: nowMs(),
5499
- });
5500
- return hydrateChildPlayOutputDatasets({
5501
- req,
5502
- childRunId: workflowId,
5503
- childPlayName: resolvedName,
5504
- value: started.output ?? extractChildPlayOutput(started),
5505
- });
5506
- }
5507
- if (startedStatus === 'failed') {
5508
- const startedError = isRecord(started.error)
5509
- ? started.error
5510
- : { message: started.error };
5511
- const startedErrorMessage =
5512
- typeof startedError.message === 'string' &&
5513
- startedError.message.trim()
5514
- ? startedError.message.trim()
5515
- : `Child play ${resolvedName} (${workflowId}) failed.`;
5516
- throw new Error(startedErrorMessage);
5517
- }
5518
- const childWaitStartedAt = nowMs();
5519
- let waitResult: ChildPlayTerminalWaitResult;
5520
- try {
5521
- waitResult = await awaitChildTerminal({
5522
- parentRunId: req.runId,
5523
- // CF's WorkflowStep.waitForEvent generic signature is wider than
5524
- // the small structural shape ChildPlayAwait needs; bridge it the
5525
- // same way the inline implementation did.
5526
- workflowStep: workflowStep as unknown as
5527
- | WorkflowStepLike
5528
- | undefined,
5529
- workflowId,
5530
- playName: resolvedName,
5531
- key: normalizedKey,
5532
- timeoutMs: Math.max(
5533
- 1_000,
5534
- Math.min(options?.timeoutMs ?? 5 * 60_000, 30 * 60_000),
5535
- ),
5536
- coordinator: cachedCoordinatorBinding?.readChildTerminalState
5537
- ? {
5538
- readChildTerminalState: (
5539
- parentRunId,
5540
- eventKey,
5541
- timeoutMs,
5542
- ) =>
5543
- cachedCoordinatorBinding!.readChildTerminalState!(
5836
+ const startedStatus = String(started.status ?? '').toLowerCase();
5837
+ if (startedStatus === 'completed') {
5838
+ emitEvent({
5839
+ type: 'log',
5840
+ level: 'info',
5841
+ message: `Completed child play ${resolvedName} (${normalizedKey})`,
5842
+ ts: nowMs(),
5843
+ });
5844
+ return hydrateChildPlayOutputDatasets({
5845
+ req,
5846
+ childRunId: workflowId,
5847
+ childPlayName: resolvedName,
5848
+ value: started.output ?? extractChildPlayOutput(started),
5849
+ });
5850
+ }
5851
+ if (startedStatus === 'failed') {
5852
+ const startedError = isRecord(started.error)
5853
+ ? started.error
5854
+ : { message: started.error };
5855
+ const startedErrorMessage =
5856
+ typeof startedError.message === 'string' &&
5857
+ startedError.message.trim()
5858
+ ? startedError.message.trim()
5859
+ : `Child play ${resolvedName} (${workflowId}) failed.`;
5860
+ throw new Error(startedErrorMessage);
5861
+ }
5862
+ const childWaitStartedAt = nowMs();
5863
+ let waitResult: ChildPlayTerminalWaitResult;
5864
+ try {
5865
+ waitResult = await awaitChildTerminal({
5866
+ parentRunId: req.runId,
5867
+ // CF's WorkflowStep.waitForEvent generic signature is wider than
5868
+ // the small structural shape ChildPlayAwait needs; bridge it the
5869
+ // same way the inline implementation did.
5870
+ workflowStep: workflowStep as unknown as
5871
+ | WorkflowStepLike
5872
+ | undefined,
5873
+ workflowId,
5874
+ playName: resolvedName,
5875
+ key: normalizedKey,
5876
+ timeoutMs: Math.max(
5877
+ 1_000,
5878
+ Math.min(options?.timeoutMs ?? 5 * 60_000, 30 * 60_000),
5879
+ ),
5880
+ coordinator: cachedCoordinatorBinding?.readChildTerminalState
5881
+ ? {
5882
+ readChildTerminalState: (
5544
5883
  parentRunId,
5545
5884
  eventKey,
5546
5885
  timeoutMs,
5547
- ),
5548
- }
5549
- : null,
5550
- now: nowMs,
5551
- hashJson,
5552
- });
5553
- } catch (error) {
5886
+ ) =>
5887
+ cachedCoordinatorBinding!.readChildTerminalState!(
5888
+ parentRunId,
5889
+ eventKey,
5890
+ timeoutMs,
5891
+ ),
5892
+ }
5893
+ : null,
5894
+ now: nowMs,
5895
+ hashJson,
5896
+ });
5897
+ } catch (error) {
5898
+ console.info('[play.runtime.span]', {
5899
+ event: 'play.runtime.span',
5900
+ phase: 'child_wait',
5901
+ runId: req.runId,
5902
+ parentRunId: req.runId,
5903
+ childRunId: workflowId,
5904
+ playName: resolvedName,
5905
+ graphHash: req.graphHash ?? null,
5906
+ depth: nextDepth,
5907
+ fanoutIndex: nextParentCalls - 1,
5908
+ ms: nowMs() - childWaitStartedAt,
5909
+ status: 'failed',
5910
+ errorCode: 'CHILD_WAIT_FAILED',
5911
+ });
5912
+ recordRunnerPerfTrace({
5913
+ req,
5914
+ phase: 'ctx_run_play.child_wait',
5915
+ ms: nowMs() - childWaitStartedAt,
5916
+ extra: {
5917
+ status: 'failed',
5918
+ errorCode: 'CHILD_WAIT_FAILED',
5919
+ childRunId: workflowId,
5920
+ playName: resolvedName,
5921
+ key: normalizedKey,
5922
+ depth: nextDepth,
5923
+ fanoutIndex: nextParentCalls - 1,
5924
+ childIsDatasetBacked,
5925
+ childNeedsWorkflowScheduler,
5926
+ },
5927
+ });
5928
+ throw error;
5929
+ }
5554
5930
  console.info('[play.runtime.span]', {
5555
5931
  event: 'play.runtime.span',
5556
5932
  phase: 'child_wait',
@@ -5562,16 +5938,17 @@ function createMinimalWorkerCtx(
5562
5938
  depth: nextDepth,
5563
5939
  fanoutIndex: nextParentCalls - 1,
5564
5940
  ms: nowMs() - childWaitStartedAt,
5565
- status: 'failed',
5566
- errorCode: 'CHILD_WAIT_FAILED',
5941
+ status: 'ok',
5942
+ waitSource: waitResult.source,
5943
+ waitAttempts: waitResult.attempts ?? null,
5944
+ reportedWaitMs: waitResult.waitMs,
5567
5945
  });
5568
5946
  recordRunnerPerfTrace({
5569
5947
  req,
5570
5948
  phase: 'ctx_run_play.child_wait',
5571
5949
  ms: nowMs() - childWaitStartedAt,
5572
5950
  extra: {
5573
- status: 'failed',
5574
- errorCode: 'CHILD_WAIT_FAILED',
5951
+ status: 'ok',
5575
5952
  childRunId: workflowId,
5576
5953
  playName: resolvedName,
5577
5954
  key: normalizedKey,
@@ -5579,60 +5956,31 @@ function createMinimalWorkerCtx(
5579
5956
  fanoutIndex: nextParentCalls - 1,
5580
5957
  childIsDatasetBacked,
5581
5958
  childNeedsWorkflowScheduler,
5959
+ waitSource: waitResult.source,
5960
+ waitAttempts: waitResult.attempts ?? null,
5961
+ reportedWaitMs: waitResult.waitMs,
5582
5962
  },
5583
5963
  });
5584
- throw error;
5585
- }
5586
- console.info('[play.runtime.span]', {
5587
- event: 'play.runtime.span',
5588
- phase: 'child_wait',
5589
- runId: req.runId,
5590
- parentRunId: req.runId,
5591
- childRunId: workflowId,
5592
- playName: resolvedName,
5593
- graphHash: req.graphHash ?? null,
5594
- depth: nextDepth,
5595
- fanoutIndex: nextParentCalls - 1,
5596
- ms: nowMs() - childWaitStartedAt,
5597
- status: 'ok',
5598
- waitSource: waitResult.source,
5599
- waitAttempts: waitResult.attempts ?? null,
5600
- reportedWaitMs: waitResult.waitMs,
5601
- });
5602
- recordRunnerPerfTrace({
5603
- req,
5604
- phase: 'ctx_run_play.child_wait',
5605
- ms: nowMs() - childWaitStartedAt,
5606
- extra: {
5607
- status: 'ok',
5964
+ emitEvent({
5965
+ type: 'log',
5966
+ level: 'info',
5967
+ message: `Completed child play ${resolvedName} (${normalizedKey})`,
5968
+ ts: nowMs(),
5969
+ });
5970
+ return hydrateChildPlayOutputDatasets({
5971
+ req,
5608
5972
  childRunId: workflowId,
5609
- playName: resolvedName,
5610
- key: normalizedKey,
5611
- depth: nextDepth,
5612
- fanoutIndex: nextParentCalls - 1,
5613
- childIsDatasetBacked,
5614
- childNeedsWorkflowScheduler,
5615
- waitSource: waitResult.source,
5616
- waitAttempts: waitResult.attempts ?? null,
5617
- reportedWaitMs: waitResult.waitMs,
5618
- },
5619
- });
5620
- emitEvent({
5621
- type: 'log',
5622
- level: 'info',
5623
- message: `Completed child play ${resolvedName} (${normalizedKey})`,
5624
- ts: nowMs(),
5625
- });
5626
- return hydrateChildPlayOutputDatasets({
5627
- req,
5628
- childRunId: workflowId,
5629
- childPlayName: resolvedName,
5630
- value: waitResult.output,
5631
- });
5632
- } finally {
5633
- childPlaySlot?.release();
5634
- }
5635
- });
5973
+ childPlayName: resolvedName,
5974
+ value: waitResult.output,
5975
+ });
5976
+ } finally {
5977
+ childPlaySlot?.release();
5978
+ }
5979
+ },
5980
+ false,
5981
+ false,
5982
+ true,
5983
+ );
5636
5984
  },
5637
5985
  async fetch(
5638
5986
  key: string,
@@ -5676,12 +6024,19 @@ function createMinimalWorkerCtx(
5676
6024
  `ctx.fetch(${method} ${url}) needs an Idempotency-Key header. Durable plays can replay after waits/retries; add an idempotency key or wrap the side effect in a Deepline integration tool.`,
5677
6025
  );
5678
6026
  }
5679
- const receiptKey = `fetch:${normalizedKey}:${await hashJson({
5680
- body,
5681
- method,
5682
- safeHeaders,
5683
- url,
5684
- })}${staleRuntimeSuffix(options?.staleAfterSeconds)}`;
6027
+ const receiptKey = buildDurableCtxCallCacheKey({
6028
+ orgId: req.orgId,
6029
+ playId: req.playName,
6030
+ kind: 'fetch',
6031
+ id: normalizedKey,
6032
+ semanticKey: await hashJson({
6033
+ body,
6034
+ method,
6035
+ safeHeaders,
6036
+ url,
6037
+ }),
6038
+ staleAfterSeconds: options?.staleAfterSeconds,
6039
+ });
5685
6040
  return await executeWithRuntimeReceipt(receiptKey, async () => {
5686
6041
  const secretHeaders = await resolveSecretAuth(init.auth);
5687
6042
  const headers = {