deepline 0.1.108 → 0.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/cli/index.js +2849 -1407
  2. package/dist/cli/index.mjs +2587 -1152
  3. package/dist/index.d.mts +81 -17
  4. package/dist/index.d.ts +81 -17
  5. package/dist/index.js +179 -51
  6. package/dist/index.mjs +179 -51
  7. package/dist/repo/apps/play-runner-workers/src/child-play-submit.ts +196 -0
  8. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +42 -21
  9. package/dist/repo/apps/play-runner-workers/src/entry.ts +162 -320
  10. package/dist/repo/apps/play-runner-workers/src/runtime/csv-rows.ts +102 -0
  11. package/dist/repo/apps/play-runner-workers/src/runtime/dataset-handles.ts +8 -3
  12. package/dist/repo/apps/play-runner-workers/src/runtime/receipts.ts +18 -27
  13. package/dist/repo/apps/play-runner-workers/src/workflow-instance-create.ts +44 -0
  14. package/dist/repo/apps/play-runner-workers/src/workflow-retry.ts +7 -11
  15. package/dist/repo/sdk/src/client.ts +35 -12
  16. package/dist/repo/sdk/src/errors.ts +2 -2
  17. package/dist/repo/sdk/src/http.ts +109 -9
  18. package/dist/repo/sdk/src/index.ts +4 -0
  19. package/dist/repo/sdk/src/play.ts +77 -7
  20. package/dist/repo/sdk/src/plays/bundle-play-file.ts +5 -1
  21. package/dist/repo/sdk/src/release.ts +14 -11
  22. package/dist/repo/sdk/src/tool-output.ts +2 -2
  23. package/dist/repo/sdk/src/types.ts +9 -6
  24. package/dist/repo/shared_libs/play-data-plane/cell-policy.ts +76 -0
  25. package/dist/repo/shared_libs/play-data-plane/column-names.ts +17 -0
  26. package/dist/repo/shared_libs/play-data-plane/sheet-contract.ts +190 -0
  27. package/dist/repo/shared_libs/play-runtime/coordinator-headers.ts +2 -0
  28. package/dist/repo/shared_libs/play-runtime/db-session.ts +4 -0
  29. package/dist/repo/shared_libs/play-runtime/fullenrich-batching.ts +229 -0
  30. package/dist/repo/shared_libs/play-runtime/governor/policy.ts +1 -1
  31. package/dist/repo/shared_libs/play-runtime/play-runtime-batching-registry.ts +20 -0
  32. package/dist/repo/shared_libs/play-runtime/providers.ts +5 -24
  33. package/dist/repo/shared_libs/play-runtime/run-failure.ts +20 -12
  34. package/dist/repo/shared_libs/play-runtime/run-ledger.ts +115 -25
  35. package/dist/repo/shared_libs/play-runtime/run-snapshot-stream.ts +49 -0
  36. package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +22 -9
  37. package/dist/repo/shared_libs/play-runtime/secret-redaction.ts +15 -0
  38. package/dist/repo/shared_libs/play-runtime/work-receipts.ts +1 -0
  39. package/dist/repo/shared_libs/plays/bundling/index.ts +69 -11
  40. package/dist/repo/shared_libs/plays/static-pipeline.ts +4 -14
  41. package/dist/repo/shared_libs/security/outbound-url-policy.ts +238 -0
  42. package/dist/repo/shared_libs/security/safe-fetch.ts +118 -0
  43. package/dist/viewer/viewer.css +617 -0
  44. package/dist/viewer/viewer.js +1496 -0
  45. package/package.json +5 -1
@@ -42,7 +42,7 @@ import {
42
42
  executeChunkedRequests,
43
43
  type ChunkExecutionResult,
44
44
  } from '../../../shared_libs/play-runtime/batch-runtime';
45
- import { getDefaultPlayRuntimeBatchStrategy } from '../../../shared_libs/play-runtime/default-batch-strategies';
45
+ import { getPlayRuntimeBatchStrategy } from '../../../shared_libs/play-runtime/play-runtime-batching-registry';
46
46
  import { STANDARD_PLAY_RUNTIME_LIMIT_SECONDS } from '../../../shared_libs/temporal/constants';
47
47
  import {
48
48
  createPlayExecutionGovernor,
@@ -59,6 +59,7 @@ import {
59
59
  type ChildPlayTerminalWaitResult,
60
60
  type WorkflowStepLike,
61
61
  } from './child-play-await';
62
+ import { submitChildPlayThroughCoordinator } from './child-play-submit';
62
63
  import type { AnyBatchOperationStrategy } from '../../../shared_libs/play-runtime/batching-types';
63
64
  import {
64
65
  parseToolExecuteResponse,
@@ -84,11 +85,12 @@ import {
84
85
  getCompiledPipelineSubsteps,
85
86
  flattenStaticPipeline,
86
87
  resolveSheetContractForTableNamespace,
87
- sqlSafePlayColumnName,
88
88
  type PlayStaticSubstep,
89
89
  type PlayStaticPipeline,
90
90
  type PlaySheetContract,
91
91
  } from '../../../shared_libs/plays/static-pipeline';
92
+ import { sqlSafePlayColumnName } from '../../../shared_libs/play-data-plane/column-names';
93
+ import { augmentSheetContractWithDatasetFields } from '../../../shared_libs/play-data-plane/sheet-contract';
92
94
  import {
93
95
  PlayStepLifecycleTracker,
94
96
  type PlayStepLifecycleEvent,
@@ -131,10 +133,16 @@ import {
131
133
  setHarnessBinding,
132
134
  } from '../../../sdk/src/plays/harness-stub';
133
135
  import { createHarnessWorkerReceiptStore } from './runtime/harness-receipt-store';
136
+ import {
137
+ runtimeCsvExecutionRow,
138
+ publicCsvInputRow,
139
+ publicCsvOutputRow,
140
+ publicCsvStorageRow,
141
+ runtimeCsvStorageRow,
142
+ } from './runtime/csv-rows';
134
143
  import { chooseWorkerMapRowsPerChunk } from './runtime/map-chunk-plan';
135
144
  import {
136
145
  applyCsvRenameProjection,
137
- stripCsvProjectionMetadata,
138
146
  cloneCsvAliasedRow,
139
147
  type CsvRenameOptions,
140
148
  } from '../../../shared_libs/play-runtime/csv-rename';
@@ -153,6 +161,12 @@ import {
153
161
  type SecretAwareRequestInit,
154
162
  type SecretHandle,
155
163
  } from '../../../shared_libs/play-runtime/secret-capability';
164
+ import { safePublicFetch } from '../../../shared_libs/security/safe-fetch';
165
+ import {
166
+ assertPublicHttpUrl,
167
+ isIpAddressLiteral,
168
+ UnsafeOutboundUrlError,
169
+ } from '../../../shared_libs/security/outbound-url-policy';
156
170
  import type {
157
171
  LiveNodeProgressMap,
158
172
  LiveNodeProgressSnapshot,
@@ -174,7 +188,6 @@ import {
174
188
  } from '../../../shared_libs/play-runtime/step-program-dataset-builder';
175
189
  import {
176
190
  DEEPLINE_CELL_META_FIELD,
177
- normalizeCellStalenessPolicy,
178
191
  previousCellFromValue,
179
192
  resolveCompletedCellStalenessMeta,
180
193
  shouldRecomputeCell,
@@ -183,6 +196,10 @@ import {
183
196
  type CellStalenessPolicyByField,
184
197
  type PreviousCell,
185
198
  } from '../../../shared_libs/play-runtime/cell-staleness';
199
+ import {
200
+ authoredCellPoliciesFromDatasetSteps,
201
+ cellPoliciesFromDatasetSteps,
202
+ } from '../../../shared_libs/play-data-plane/cell-policy';
186
203
 
187
204
  // The play's default export. The bundler injects this — see bundle-play-file.ts.
188
205
  // eslint-disable-next-line @typescript-eslint/ban-ts-comment
@@ -701,57 +718,6 @@ function makeRequestId(): string {
701
718
  return crypto.randomUUID();
702
719
  }
703
720
 
704
- function publicCsvInputRow<T extends Record<string, unknown>>(row: T): T {
705
- const restored = stripCsvProjectionMetadata(row) as Record<string, unknown>;
706
- const publicRow: Record<string, unknown> = {};
707
- for (const fieldName of Reflect.ownKeys(restored)) {
708
- if (typeof fieldName === 'string' && fieldName.startsWith('__deepline')) {
709
- continue;
710
- }
711
- const descriptor = Object.getOwnPropertyDescriptor(restored, fieldName);
712
- if (!descriptor) continue;
713
- Object.defineProperty(publicRow, fieldName, descriptor);
714
- }
715
- return publicRow as T;
716
- }
717
-
718
- function publicCsvOutputRow<T extends Record<string, unknown>>(row: T): T {
719
- const stripped = stripCsvProjectionMetadata(row) as Record<string, unknown>;
720
- const publicRow: Record<string, unknown> = {};
721
- for (const fieldName of Reflect.ownKeys(stripped)) {
722
- if (typeof fieldName === 'string' && fieldName.startsWith('__deepline')) {
723
- continue;
724
- }
725
- const descriptor = Object.getOwnPropertyDescriptor(stripped, fieldName);
726
- if (!descriptor) continue;
727
- Object.defineProperty(publicRow, fieldName, descriptor);
728
- }
729
- return publicRow as T;
730
- }
731
-
732
- function publicCsvStorageRow<T extends Record<string, unknown>>(row: T): T {
733
- const publicRow = publicCsvInputRow(row) as Record<string, unknown>;
734
- const storageRow: Record<string, unknown> = {};
735
- for (const fieldName of Reflect.ownKeys(publicRow)) {
736
- if (typeof fieldName !== 'string') continue;
737
- const descriptor = Object.getOwnPropertyDescriptor(publicRow, fieldName);
738
- if (!descriptor) continue;
739
- storageRow[fieldName] =
740
- 'value' in descriptor ? descriptor.value : publicRow[fieldName];
741
- }
742
- for (const runtimeField of [
743
- '__deeplineRowKey',
744
- '__deeplineCellMetaPatch',
745
- '__deeplineRowStatus',
746
- '__deeplineRowError',
747
- ]) {
748
- if (Object.prototype.hasOwnProperty.call(row, runtimeField)) {
749
- storageRow[runtimeField] = row[runtimeField];
750
- }
751
- }
752
- return storageRow as T;
753
- }
754
-
755
721
  /**
756
722
  * Strip credentials and JWT-shaped tokens from any string before it lands in
757
723
  * a log buffer or upstream error message. The harness routinely echoes
@@ -893,90 +859,6 @@ async function postRuntimeApiBestEffort(
893
859
  }
894
860
  }
895
861
 
896
- async function submitChildPlayThroughCoordinator(input: {
897
- req: RunRequest;
898
- body: unknown;
899
- allowInline?: boolean;
900
- }): Promise<{
901
- workflowId?: string;
902
- runId?: string;
903
- status?: string;
904
- mode?: string;
905
- output?: unknown;
906
- result?: unknown;
907
- error?: unknown;
908
- logs?: string[];
909
- timings?: Array<{ phase: string; ms: number }>;
910
- }> {
911
- if (cachedCoordinatorBinding && input.allowInline !== false) {
912
- if (!isRecord(input.body)) {
913
- throw new Error('ctx.runPlay child submit requires an object body.');
914
- }
915
- return cachedCoordinatorBinding.submitChild(input.req.runId, input.body);
916
- }
917
- if (cachedCoordinatorBinding?.submitWorkflowChild) {
918
- if (!isRecord(input.body)) {
919
- throw new Error('ctx.runPlay child submit requires an object body.');
920
- }
921
- return cachedCoordinatorBinding.submitWorkflowChild(
922
- input.req.runId,
923
- input.body,
924
- );
925
- }
926
- const coordinatorUrl = input.req.coordinatorUrl?.trim();
927
- if (coordinatorUrl) {
928
- // Keep child plays on the same coordinator/Workflow submit path as
929
- // top-level runs when the coordinator URL is present.
930
- const res = await fetch(
931
- `${coordinatorUrl.replace(/\/$/, '')}/workflow/${encodeURIComponent(
932
- input.req.runId,
933
- )}/submit-child`,
934
- {
935
- method: 'POST',
936
- headers: {
937
- 'x-deepline-request-id': makeRequestId(),
938
- ...coordinatorRequestHeaders({
939
- runId: input.req.runId,
940
- contentType: 'application/json',
941
- internalToken: input.req.coordinatorInternalToken,
942
- }),
943
- },
944
- body: JSON.stringify(input.body),
945
- },
946
- );
947
- const text = await res.text().catch(() => '');
948
- let parsed: {
949
- workflowId?: string;
950
- runId?: string;
951
- status?: string;
952
- mode?: string;
953
- output?: unknown;
954
- result?: unknown;
955
- error?: unknown;
956
- logs?: string[];
957
- timings?: Array<{ phase: string; ms: number }>;
958
- } = {};
959
- try {
960
- parsed = text ? JSON.parse(text) : {};
961
- } catch {
962
- parsed = { error: text };
963
- }
964
- if (!res.ok) {
965
- const error = isRecord(parsed.error) ? parsed.error : null;
966
- const message =
967
- (typeof error?.message === 'string' && error.message.trim()) ||
968
- (typeof parsed.error === 'string' && parsed.error.trim()) ||
969
- text.slice(0, 800) ||
970
- `Coordinator child submit failed with ${res.status}.`;
971
- throw new Error(message);
972
- }
973
- return parsed;
974
- }
975
- throw new Error(
976
- 'ctx.runPlay child submit requires a coordinator binding in the cf-workflows runtime.',
977
- );
978
- }
979
-
980
862
  function workflowEventType(name: string): string {
981
863
  const normalized = name
982
864
  .trim()
@@ -1155,10 +1037,18 @@ async function executeToolWithLifecycle(
1155
1037
  args: { id: string; toolId: string; input: Record<string, unknown> },
1156
1038
  workflowStep: WorkflowStep | undefined,
1157
1039
  callbacks: WorkerCtxCallbacks | undefined,
1040
+ onProviderBackpressure?: (retryAfterMs: number) => void,
1041
+ onRetryAttempt?: () => void,
1158
1042
  ): Promise<ToolExecuteResult> {
1159
1043
  callbacks?.onToolCalled?.(args.toolId, nowMs());
1160
1044
  try {
1161
- return await executeTool(req, args, workflowStep);
1045
+ return await executeTool(
1046
+ req,
1047
+ args,
1048
+ workflowStep,
1049
+ onProviderBackpressure,
1050
+ onRetryAttempt,
1051
+ );
1162
1052
  } catch (error) {
1163
1053
  callbacks?.onToolFailed?.(args.toolId, nowMs());
1164
1054
  throw error;
@@ -1302,17 +1192,38 @@ async function callToolDirect(
1302
1192
  attempt <= WORKER_TOOL_RATE_LIMIT_MAX_ATTEMPTS;
1303
1193
  attempt += 1
1304
1194
  ) {
1305
- const res = await fetchRuntimeApi(req.baseUrl, path, {
1306
- method: 'POST',
1307
- headers: {
1308
- 'content-type': 'application/json',
1309
- authorization: `Bearer ${req.executorToken}`,
1310
- 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${attempt}`,
1311
- [EXECUTE_RESPONSE_CONTRACT_HEADER]: V2_EXECUTE_RESPONSE_CONTRACT,
1312
- [EXECUTE_TOOL_METADATA_HEADER]: 'true',
1313
- },
1314
- body: JSON.stringify({ payload: input }),
1315
- });
1195
+ let res: Response;
1196
+ try {
1197
+ res = await fetchRuntimeApi(req.baseUrl, path, {
1198
+ method: 'POST',
1199
+ headers: {
1200
+ 'content-type': 'application/json',
1201
+ authorization: `Bearer ${req.executorToken}`,
1202
+ 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${attempt}`,
1203
+ [EXECUTE_RESPONSE_CONTRACT_HEADER]: V2_EXECUTE_RESPONSE_CONTRACT,
1204
+ [EXECUTE_TOOL_METADATA_HEADER]: 'true',
1205
+ },
1206
+ body: JSON.stringify({ payload: input }),
1207
+ });
1208
+ } catch (error) {
1209
+ const message = error instanceof Error ? error.message : String(error);
1210
+ lastError = new Error(
1211
+ `Tool ${toolId} transport failed calling ${path} for run ${req.runId} on attempt ${attempt}/${WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS}: ${message}`,
1212
+ );
1213
+ if (
1214
+ attempt >= WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS ||
1215
+ !isRetryableRuntimeApiError(error)
1216
+ ) {
1217
+ throw lastError;
1218
+ }
1219
+ onRetryAttempt?.();
1220
+ const delayMs = WORKER_TOOL_TRANSPORT_RETRY_DELAY_MS * attempt;
1221
+ console.warn(
1222
+ `[deepline-run:${req.runId}] tool transport retry tool=${toolId} path=${path} attempt=${attempt}/${WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS} retryAfterMs=${delayMs} error=${redactSecretsFromLogString(message)}`,
1223
+ );
1224
+ await sleepWorkerMs(delayMs);
1225
+ continue;
1226
+ }
1316
1227
  if (res.ok) {
1317
1228
  const body = (await res.json()) as Record<string, unknown>;
1318
1229
  const parsed = parseToolExecuteResponse(toolId, body);
@@ -1547,6 +1458,8 @@ const WORKER_RETRY_SAFE_5XX_TOOLS = new Set(['test_transient_500']);
1547
1458
  * retry budget, so a runaway storm stays bounded and loud.
1548
1459
  */
1549
1460
  const WORKER_TOOL_RATE_LIMIT_MAX_ATTEMPTS = 8;
1461
+ const WORKER_TOOL_TRANSPORT_MAX_ATTEMPTS = 3;
1462
+ const WORKER_TOOL_TRANSPORT_RETRY_DELAY_MS = 1_000;
1550
1463
 
1551
1464
  function sleepWorkerMs(ms: number): Promise<void> {
1552
1465
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -1566,6 +1479,7 @@ class WorkerToolBatchScheduler {
1566
1479
  private readonly resolvePacing: WorkerPacingResolver,
1567
1480
  private readonly abortSignal?: AbortSignal,
1568
1481
  private readonly onRequestsSettled?: (count: number) => void,
1482
+ private readonly callbacks?: WorkerCtxCallbacks,
1569
1483
  ) {}
1570
1484
 
1571
1485
  /**
@@ -1632,7 +1546,7 @@ class WorkerToolBatchScheduler {
1632
1546
  return this.queue.some(
1633
1547
  (request) =>
1634
1548
  request.toolId !== 'test_wait_for_event' &&
1635
- getDefaultPlayRuntimeBatchStrategy(request.toolId) !== null,
1549
+ getPlayRuntimeBatchStrategy(request.toolId) !== null,
1636
1550
  );
1637
1551
  }
1638
1552
 
@@ -1663,8 +1577,8 @@ class WorkerToolBatchScheduler {
1663
1577
  toolId: string,
1664
1578
  requests: WorkerToolBatchRequest[],
1665
1579
  ): Promise<void> {
1666
- const strategy = getDefaultPlayRuntimeBatchStrategy(toolId);
1667
- if (!strategy || toolId === 'test_wait_for_event') {
1580
+ const strategy = getPlayRuntimeBatchStrategy(toolId);
1581
+ if (!strategy || toolId === 'test_wait_for_event' || requests.length < 2) {
1668
1582
  const groupStartedAt = nowMs();
1669
1583
  await Promise.all(
1670
1584
  requests.map(async (request) => {
@@ -1676,10 +1590,11 @@ class WorkerToolBatchScheduler {
1676
1590
  });
1677
1591
  try {
1678
1592
  request.resolve(
1679
- await executeTool(
1593
+ await executeToolWithLifecycle(
1680
1594
  this.req,
1681
1595
  { id: request.id, toolId, input: request.input },
1682
1596
  request.workflowStep,
1597
+ this.callbacks,
1683
1598
  (retryAfterMs) => this.reportBackpressure(toolId, retryAfterMs),
1684
1599
  () => this.governor.chargeBudget('retry'),
1685
1600
  ),
@@ -1715,6 +1630,7 @@ class WorkerToolBatchScheduler {
1715
1630
  reportBackpressure: (retryAfterMs) =>
1716
1631
  this.reportBackpressure(toolId, retryAfterMs),
1717
1632
  onRequestsSettled: this.onRequestsSettled,
1633
+ callbacks: this.callbacks,
1718
1634
  });
1719
1635
  recordRunnerPerfTrace({
1720
1636
  req: this.req,
@@ -1749,6 +1665,7 @@ async function executeBatchedWorkerToolGroup(input: {
1749
1665
  abortSignal?: AbortSignal;
1750
1666
  reportBackpressure: (retryAfterMs: number) => void;
1751
1667
  onRequestsSettled?: (count: number) => void;
1668
+ callbacks?: WorkerCtxCallbacks;
1752
1669
  }): Promise<void> {
1753
1670
  const compiledBatches = compileRequestsWithStrategy({
1754
1671
  requests: input.requests,
@@ -1783,6 +1700,7 @@ async function executeBatchedWorkerToolGroup(input: {
1783
1700
  signal: input.abortSignal,
1784
1701
  });
1785
1702
  try {
1703
+ input.callbacks?.onToolCalled?.(batch.batchOperation, nowMs());
1786
1704
  return await executeTool(
1787
1705
  input.req,
1788
1706
  {
@@ -1794,6 +1712,9 @@ async function executeBatchedWorkerToolGroup(input: {
1794
1712
  input.reportBackpressure,
1795
1713
  () => input.governor.chargeBudget('retry'),
1796
1714
  );
1715
+ } catch (error) {
1716
+ input.callbacks?.onToolFailed?.(batch.batchOperation, nowMs());
1717
+ throw error;
1797
1718
  } finally {
1798
1719
  slot.release();
1799
1720
  }
@@ -1995,47 +1916,16 @@ type WorkerMapOptions = {
1995
1916
  onRowError?: 'isolate' | 'fail';
1996
1917
  };
1997
1918
 
1998
- function authoredCellPolicyForWorkerStep(
1999
- step: WorkerStepProgramStep,
2000
- ): {
2001
- recompute?: true;
2002
- recomputeOnError?: true;
2003
- staleAfterSeconds?: AuthoredStaleAfterSeconds;
2004
- } | null {
2005
- const policy = {
2006
- ...(step.recompute === true ? { recompute: true as const } : {}),
2007
- ...(step.recomputeOnError === true
2008
- ? { recomputeOnError: true as const }
2009
- : {}),
2010
- ...(step.staleAfterSeconds !== undefined
2011
- ? { staleAfterSeconds: step.staleAfterSeconds }
2012
- : {}),
2013
- };
2014
- return Object.keys(policy).length > 0 ? policy : null;
2015
- }
2016
-
2017
1919
  function workerCellPoliciesFromSteps(
2018
1920
  steps: readonly WorkerStepProgramStep[],
2019
1921
  ): CellStalenessPolicyByField {
2020
- return Object.fromEntries(
2021
- steps.flatMap((step) => {
2022
- const policy = authoredCellPolicyForWorkerStep(step);
2023
- return policy
2024
- ? [[step.name, normalizeCellStalenessPolicy(policy)]]
2025
- : [];
2026
- }),
2027
- ) as CellStalenessPolicyByField;
1922
+ return cellPoliciesFromDatasetSteps(steps);
2028
1923
  }
2029
1924
 
2030
1925
  function authoredWorkerCellPoliciesFromSteps(
2031
1926
  steps: readonly WorkerStepProgramStep[],
2032
1927
  ): AuthoredCellStalenessPolicyByField {
2033
- return Object.fromEntries(
2034
- steps.flatMap((step) => {
2035
- const policy = authoredCellPolicyForWorkerStep(step);
2036
- return policy ? [[step.name, policy]] : [];
2037
- }),
2038
- ) as AuthoredCellStalenessPolicyByField;
1928
+ return authoredCellPoliciesFromDatasetSteps(steps);
2039
1929
  }
2040
1930
 
2041
1931
  /**
@@ -2431,6 +2321,46 @@ function parseFetchJsonOrNull(bodyText: string): unknown | null {
2431
2321
  }
2432
2322
  }
2433
2323
 
2324
+ async function safeWorkerPublicFetch(
2325
+ input: string | URL,
2326
+ init: RequestInit,
2327
+ options: {
2328
+ allowedOrigins: Iterable<string>;
2329
+ sensitiveHeaders: Iterable<string>;
2330
+ },
2331
+ ): Promise<Response> {
2332
+ const allowedOrigins = new Set(options.allowedOrigins);
2333
+ return safePublicFetch(input, init, {
2334
+ sensitiveHeaders: options.sensitiveHeaders,
2335
+ fetchImpl: async (nextInput, nextInit) => {
2336
+ const url = assertPublicHttpUrl(nextInput);
2337
+ if (
2338
+ !isIpAddressLiteral(url.hostname) &&
2339
+ !allowedOrigins.has(url.origin)
2340
+ ) {
2341
+ throw new UnsafeOutboundUrlError(
2342
+ 'workers_edge ctx.fetch requires a public IP literal target or Deepline runtime origin. Use a Deepline integration tool for other hostname URLs.',
2343
+ );
2344
+ }
2345
+ return fetch(url, nextInit);
2346
+ },
2347
+ });
2348
+ }
2349
+
2350
+ function normalizeAllowedWorkerFetchOrigin(rawUrl: string): string | null {
2351
+ try {
2352
+ return assertPublicHttpUrl(rawUrl).origin;
2353
+ } catch {
2354
+ return null;
2355
+ }
2356
+ }
2357
+
2358
+ function getAllowedWorkerFetchOrigins(req: RunRequest): string[] {
2359
+ return [req.baseUrl, req.callbackUrl]
2360
+ .map(normalizeAllowedWorkerFetchOrigin)
2361
+ .filter((origin): origin is string => origin !== null);
2362
+ }
2363
+
2434
2364
  // ---------------------------------------------------------------------------
2435
2365
  // Streaming CSV parser. Pipes a `ReadableStream<Uint8Array>` from R2 through
2436
2366
  // a TextDecoder + line buffer + RFC-4180-ish state machine, yielding chunks
@@ -2964,96 +2894,6 @@ function requireSheetContract(
2964
2894
  return contract;
2965
2895
  }
2966
2896
 
2967
- function isDatasetPayloadField(field: string): boolean {
2968
- return (
2969
- field.length > 0 &&
2970
- !field.startsWith('__deepline') &&
2971
- field !== '_key' &&
2972
- field !== '_status' &&
2973
- field !== '_run_id' &&
2974
- field !== '_error' &&
2975
- field !== '_stage' &&
2976
- field !== '_provider' &&
2977
- field !== '_input_index' &&
2978
- field !== '_created_at' &&
2979
- field !== '_updated_at' &&
2980
- field !== '_cell_meta'
2981
- );
2982
- }
2983
-
2984
- function augmentSheetContractWithDatasetFields(input: {
2985
- contract: PlaySheetContract;
2986
- rows: readonly Record<string, unknown>[];
2987
- outputFields?: readonly string[];
2988
- }): PlaySheetContract {
2989
- const outputFields = new Set(input.outputFields ?? []);
2990
- const candidateFields = new Set<string>();
2991
- for (const row of input.rows) {
2992
- for (const field of Object.keys(row)) {
2993
- if (isDatasetPayloadField(field)) {
2994
- candidateFields.add(field);
2995
- }
2996
- }
2997
- }
2998
- for (const field of outputFields) {
2999
- if (isDatasetPayloadField(field)) {
3000
- candidateFields.add(field);
3001
- }
3002
- }
3003
-
3004
- const existingFields = new Set<string>();
3005
- const existingSqlNames = new Set<string>();
3006
- const inputColumns: PlaySheetContract['columns'] = [];
3007
- const outputColumns: PlaySheetContract['columns'] = [];
3008
- const appendColumn = (
3009
- target: PlaySheetContract['columns'],
3010
- column: PlaySheetContract['columns'][number],
3011
- ) => {
3012
- const field = typeof column.field === 'string' ? column.field : column.id;
3013
- const sqlName = column.sqlName.trim();
3014
- if (
3015
- !field ||
3016
- !sqlName ||
3017
- existingFields.has(field) ||
3018
- existingSqlNames.has(sqlName)
3019
- ) {
3020
- return;
3021
- }
3022
- existingFields.add(field);
3023
- existingSqlNames.add(sqlName);
3024
- target.push(column);
3025
- };
3026
-
3027
- for (const column of input.contract.columns) {
3028
- const field = typeof column.field === 'string' ? column.field : column.id;
3029
- if (
3030
- column.source === 'input' &&
3031
- ((field === input.contract.tableNamespace && !candidateFields.has(field)) ||
3032
- outputFields.has(field))
3033
- ) {
3034
- continue;
3035
- }
3036
- appendColumn(
3037
- column.source === 'input' ? inputColumns : outputColumns,
3038
- column,
3039
- );
3040
- }
3041
-
3042
- for (const field of candidateFields) {
3043
- if (existingFields.has(field)) continue;
3044
- const sqlName = sqlSafePlayColumnName(field);
3045
- if (existingSqlNames.has(sqlName)) continue;
3046
- appendColumn(outputFields.has(field) ? outputColumns : inputColumns, {
3047
- id: `runtime:${input.contract.tableNamespace}:${field}`,
3048
- sqlName,
3049
- source: outputFields.has(field) ? 'datasetColumn' : 'input',
3050
- field,
3051
- });
3052
- }
3053
-
3054
- return { ...input.contract, columns: [...inputColumns, ...outputColumns] };
3055
- }
3056
-
3057
2897
  async function persistCompletedMapRows(input: {
3058
2898
  req: RunRequest;
3059
2899
  tableNamespace: string;
@@ -3112,10 +2952,10 @@ async function prepareMapRows(input: {
3112
2952
  tableNamespace: input.tableNamespace,
3113
2953
  sheetContract: augmentSheetContractWithDatasetFields({
3114
2954
  contract: requireSheetContract(input.req, input.tableNamespace),
3115
- rows: input.rows.map((row) => publicCsvStorageRow(row)),
2955
+ rows: input.rows.map((row) => runtimeCsvStorageRow(row)),
3116
2956
  outputFields: input.outputFields,
3117
2957
  }),
3118
- rows: input.rows.map((row) => publicCsvStorageRow(row)),
2958
+ rows: input.rows.map((row) => runtimeCsvStorageRow(row)),
3119
2959
  runId: input.req.runId,
3120
2960
  inputOffset: input.inputOffset,
3121
2961
  userEmail: input.req.userEmail,
@@ -3493,7 +3333,8 @@ function createMinimalWorkerCtx(
3493
3333
  const executeWithRuntimeReceipt = async <T>(
3494
3334
  key: string,
3495
3335
  execute: () => Promise<T> | T,
3496
- repairRunningReceiptForSameRun = false,
3336
+ repairRunningReceiptForSameRun = true,
3337
+ reclaimRunning = false,
3497
3338
  ): Promise<T> => {
3498
3339
  const serialized = await runWorkerRuntimeReceiptBoundary<unknown>({
3499
3340
  orgId: req.orgId,
@@ -3503,6 +3344,7 @@ function createMinimalWorkerCtx(
3503
3344
  receiptStore,
3504
3345
  execute: async () => serializeDurableStepValue(await execute()),
3505
3346
  repairRunningReceiptForSameRun,
3347
+ reclaimRunning,
3506
3348
  });
3507
3349
  return deserializeDurableStepValue(serialized) as T;
3508
3350
  };
@@ -3524,7 +3366,7 @@ function createMinimalWorkerCtx(
3524
3366
  )(name, async () => serializeDurableStepValue(await execute()));
3525
3367
  return deserializeDurableStepValue(serialized) as T;
3526
3368
  },
3527
- true,
3369
+ false,
3528
3370
  );
3529
3371
  };
3530
3372
  const nextCtxStepReceiptKey = (name: string): string => {
@@ -3545,6 +3387,14 @@ function createMinimalWorkerCtx(
3545
3387
  }
3546
3388
  return `:stale:${staleAfterSeconds}:${Math.floor(nowMs() / (staleAfterSeconds * 1000))}`;
3547
3389
  };
3390
+ const rootToolBatchScheduler = new WorkerToolBatchScheduler(
3391
+ req,
3392
+ governor,
3393
+ resolveToolPacing,
3394
+ abortSignal,
3395
+ undefined,
3396
+ callbacks,
3397
+ );
3548
3398
  // Local ancestry chain that always ENDS with the currently-executing play
3549
3399
  // (req.playName). The /api/v2/plays/run lineage validator requires the
3550
3400
  // submitted ancestry's tail to equal the executor token's play name (i.e.
@@ -3682,7 +3532,7 @@ function createMinimalWorkerCtx(
3682
3532
  if (parts.some((part) => part === null || part === undefined)) {
3683
3533
  throw new Error(
3684
3534
  `ctx.dataset("${name}") key returned null or undefined for row ${index}. ` +
3685
- 'Return a non-empty string or number derived from a stable input column.',
3535
+ 'Use a stable non-empty key.',
3686
3536
  );
3687
3537
  }
3688
3538
  const normalizedParts = parts.map((part) => {
@@ -3694,7 +3544,7 @@ function createMinimalWorkerCtx(
3694
3544
  if (normalizedParts.some((part) => !part)) {
3695
3545
  throw new Error(
3696
3546
  `ctx.dataset("${name}") key returned an empty value for row ${index}. ` +
3697
- 'Return a non-empty string or finite number derived from a stable input column.',
3547
+ 'Use a stable non-empty finite key.',
3698
3548
  );
3699
3549
  }
3700
3550
  const keyValue =
@@ -3991,19 +3841,8 @@ function createMinimalWorkerCtx(
3991
3841
  reportExecutionHeartbeat(false);
3992
3842
  const entry = uniqueRowsToExecuteEntries[myIndex]!;
3993
3843
  const pendingRow = pendingRowsByKey.get(entry.rowKey);
3994
- const row = pendingRow
3995
- ? ({
3996
- ...entry.row,
3997
- ...publicCsvInputRow(pendingRow),
3998
- ...(pendingRow[DEEPLINE_CELL_META_FIELD] &&
3999
- typeof pendingRow[DEEPLINE_CELL_META_FIELD] === 'object'
4000
- ? {
4001
- [DEEPLINE_CELL_META_FIELD]:
4002
- pendingRow[DEEPLINE_CELL_META_FIELD],
4003
- }
4004
- : {}),
4005
- } as T & Record<string, unknown>)
4006
- : entry.row;
3844
+ const row = runtimeCsvExecutionRow(entry.row, pendingRow) as T &
3845
+ Record<string, unknown>;
4007
3846
  const absoluteIndex = entry.absoluteIndex;
4008
3847
  const enriched: Record<string, unknown> =
4009
3848
  cloneCsvAliasedRow(row);
@@ -4597,7 +4436,10 @@ function createMinimalWorkerCtx(
4597
4436
  const failFastRowErrors = opts?.onRowError === 'fail';
4598
4437
  let chunkIndex = 0;
4599
4438
  let chunkStart = 0;
4600
- for await (const rawChunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
4439
+ for await (const rawChunkRows of iterDatasetChunks(
4440
+ inputRows,
4441
+ rowsPerChunk,
4442
+ )) {
4601
4443
  assertNotAborted(abortSignal);
4602
4444
  if (rawChunkRows.length === 0) continue;
4603
4445
  // Drop duplicate explicit-key rows before anything downstream observes
@@ -5004,7 +4846,13 @@ function createMinimalWorkerCtx(
5004
4846
  toolId: request.toolId,
5005
4847
  requestInput: request.input,
5006
4848
  })}${staleRuntimeSuffix(request.staleAfterSeconds)}`,
5007
- () => executeToolWithLifecycle(req, request, workflowStep, callbacks),
4849
+ () =>
4850
+ rootToolBatchScheduler.execute(
4851
+ request.id,
4852
+ request.toolId,
4853
+ request.input,
4854
+ workflowStep,
4855
+ ),
5008
4856
  );
5009
4857
  },
5010
4858
  },
@@ -5157,6 +5005,9 @@ function createMinimalWorkerCtx(
5157
5005
  try {
5158
5006
  started = await submitChildPlayThroughCoordinator({
5159
5007
  req,
5008
+ coordinatorBinding: cachedCoordinatorBinding,
5009
+ makeRequestId,
5010
+ coordinatorRequestHeaders,
5160
5011
  allowInline:
5161
5012
  options?.timeoutMs == null && !childNeedsWorkflowScheduler,
5162
5013
  body: {
@@ -5462,7 +5313,10 @@ function createMinimalWorkerCtx(
5462
5313
  };
5463
5314
  const fetchInit = { ...init, headers };
5464
5315
  delete fetchInit.auth;
5465
- const response = await fetch(url, fetchInit);
5316
+ const response = await safeWorkerPublicFetch(url, fetchInit, {
5317
+ allowedOrigins: getAllowedWorkerFetchOrigins(req),
5318
+ sensitiveHeaders: Object.keys(secretHeaderMarkers),
5319
+ });
5466
5320
  assertNotAborted(abortSignal);
5467
5321
  const bodyText = await response.text();
5468
5322
  const redactedBodyText = secretRedactor.redactString(bodyText);
@@ -5748,12 +5602,10 @@ async function executeRunRequest(
5748
5602
  let runLogBuffer: string[] = [];
5749
5603
  let pendingRunLogLines: string[] = [];
5750
5604
  // Monotonic count of every line ever appended to this run's worker log
5751
- // channel. runLogBuffer/pendingRunLogLines are rotating tails of those
5752
- // lines (RUN_LOG_BUFFER_LIMIT is the coordinator transport cache only), so
5753
- // each log.appended batch can carry the absolute channelOffset of its first
5754
- // line: totalEmittedLogLines - pendingRunLogLines.length. Run Log Stream
5755
- // ingestion skips re-sent prefixes positionally (exactly-once, repeated
5756
- // identical lines preserved) instead of text-deduping.
5605
+ // channel. runLogBuffer is only the rotating live/coordinator transport
5606
+ // cache; pendingRunLogLines is the durable unsent suffix and must not rotate,
5607
+ // otherwise a flush already in flight can let fresh lines fall out before
5608
+ // Run Log Stream ingestion ever sees them.
5757
5609
  let totalEmittedLogLines = 0;
5758
5610
  let stepProgressByNodeId: LiveNodeProgressMap = {};
5759
5611
  let dirtyProgressNodeIds = new Set<string>();
@@ -5779,9 +5631,7 @@ async function executeRunRequest(
5779
5631
  if (!trimmed) return;
5780
5632
  totalEmittedLogLines += 1;
5781
5633
  runLogBuffer = [...runLogBuffer, trimmed].slice(-RUN_LOG_BUFFER_LIMIT);
5782
- pendingRunLogLines = [...pendingRunLogLines, trimmed].slice(
5783
- -RUN_LOG_BUFFER_LIMIT,
5784
- );
5634
+ pendingRunLogLines = [...pendingRunLogLines, trimmed];
5785
5635
  };
5786
5636
 
5787
5637
  const updateStepProgress = (input: {
@@ -5970,9 +5820,7 @@ async function executeRunRequest(
5970
5820
  lines: pendingRunLogLines,
5971
5821
  // Positional cursor: pendingRunLogLines always holds the LAST
5972
5822
  // pending lines emitted on this channel, so the offset of its first
5973
- // line is total-emitted minus pending length. This also covers the
5974
- // terminal full-buffer re-send (pending = runLogBuffer), which
5975
- // ingestion then skips positionally instead of via text dedupe.
5823
+ // line is total-emitted minus pending length.
5976
5824
  channelOffset: totalEmittedLogLines - pendingRunLogLines.length,
5977
5825
  });
5978
5826
  pendingRunLogLines = [];
@@ -6068,18 +5916,13 @@ async function executeRunRequest(
6068
5916
  terminalEvent: PlayRunLedgerEvent,
6069
5917
  ): Promise<void> => {
6070
5918
  if (!options?.persistResultDatasets) return;
5919
+ await ledgerFlushInFlight;
6071
5920
  const now = nowMs();
6072
- // Terminal re-send of the full retained buffer. drainPendingLedgerEvents
6073
- // stamps it with channelOffset = totalEmitted - buffer length, so Run Log
6074
- // Stream ingestion drops the already-ingested prefix positionally.
6075
- pendingRunLogLines = runLogBuffer;
6076
5921
  dirtyProgressNodeIds = new Set([
6077
5922
  ...dirtyProgressNodeIds,
6078
5923
  ...Object.keys(stepProgressByNodeId),
6079
5924
  ]);
6080
- pendingLedgerEvents = [...pendingLedgerEvents, terminalEvent];
6081
- await ledgerFlushInFlight;
6082
- const events = drainPendingLedgerEvents(now);
5925
+ const events = [...drainPendingLedgerEvents(now), terminalEvent];
6083
5926
  if (events.length === 0) return;
6084
5927
  try {
6085
5928
  await postRuntimeApi(req.baseUrl, req.executorToken, {
@@ -6294,7 +6137,6 @@ async function executeRunRequest(
6294
6137
  playName: req.playName,
6295
6138
  result: serializedResult,
6296
6139
  outputRows: inferOutputRows(serializedResult),
6297
- liveLogs: runLogBuffer,
6298
6140
  liveNodeProgress: stepProgressSnapshot(),
6299
6141
  durationMs: nowMs() - startedAt,
6300
6142
  };