deepline 0.1.150 → 0.1.152

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/dist/bundling-sources/apps/play-runner-workers/src/entry.ts +170 -168
  2. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/csv-rows.ts +2 -19
  3. package/dist/bundling-sources/apps/play-runner-workers/src/runtime/row-isolation.ts +5 -53
  4. package/dist/bundling-sources/sdk/src/config.ts +2 -2
  5. package/dist/bundling-sources/sdk/src/release.ts +2 -2
  6. package/dist/bundling-sources/shared_libs/play-runtime/context.ts +101 -162
  7. package/dist/bundling-sources/shared_libs/play-runtime/ctx-types.ts +3 -0
  8. package/dist/bundling-sources/shared_libs/play-runtime/durability-store.ts +54 -0
  9. package/dist/bundling-sources/shared_libs/play-runtime/map-row-outcome.ts +167 -0
  10. package/dist/bundling-sources/shared_libs/play-runtime/pacing.ts +79 -0
  11. package/dist/bundling-sources/shared_libs/play-runtime/row-isolation.ts +39 -0
  12. package/dist/bundling-sources/shared_libs/play-runtime/runtime-api.ts +36 -86
  13. package/dist/bundling-sources/shared_libs/play-runtime/runtime-sheet-row-transition.ts +90 -0
  14. package/dist/bundling-sources/shared_libs/play-runtime/runtime-sheet-session.ts +43 -0
  15. package/dist/bundling-sources/shared_libs/play-runtime/tool-execute-retry-policy.ts +142 -11
  16. package/dist/bundling-sources/shared_libs/play-runtime/tool-http-errors.ts +3 -2
  17. package/dist/bundling-sources/shared_libs/plays/bundling/index.ts +20 -23
  18. package/dist/cli/index.js +35 -3
  19. package/dist/cli/index.mjs +35 -3
  20. package/dist/index.js +3 -3
  21. package/dist/index.mjs +3 -3
  22. package/dist/plays/bundle-play-file.mjs +22 -19
  23. package/package.json +1 -1
@@ -55,13 +55,17 @@ import {
55
55
  import {
56
56
  CTX_FETCH_EGRESS_PROVIDER,
57
57
  CTX_FETCH_EGRESS_TOOL_ID,
58
- resolveBuiltinPacing,
59
58
  } from '../../../shared_libs/play-runtime/builtin-pacing';
60
59
  import {
61
60
  CoordinatorRateStateBackend,
62
61
  type CoordinatorRatePort,
63
62
  } from '../../../shared_libs/play-runtime/governor/coordinator-rate-state-backend';
64
63
  import type { PacingRule } from '../../../shared_libs/play-runtime/governor/rate-state-backend';
64
+ import {
65
+ pacingPolicyForTool,
66
+ pacingPolicyFromUnknownQueueHints,
67
+ type ResolvedPacingPolicy,
68
+ } from '../../../shared_libs/play-runtime/pacing';
65
69
  import {
66
70
  awaitChildTerminal,
67
71
  type ChildPlayTerminalWaitResult,
@@ -83,7 +87,8 @@ import {
83
87
  TOOL_EXECUTE_RATE_LIMIT_MAX_ATTEMPTS,
84
88
  TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS,
85
89
  TOOL_EXECUTE_TRANSPORT_RETRY_DELAY_MS,
86
- decideToolExecuteHttpRetry,
90
+ classifyToolExecuteHttpFailure,
91
+ createToolExecuteHttpFailureAttemptTracker,
87
92
  } from '../../../shared_libs/play-runtime/tool-execute-retry-policy';
88
93
  import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
89
94
  import type { PreloadedRuntimeDbSession } from '../../../shared_libs/play-runtime/db-session';
@@ -161,6 +166,15 @@ import {
161
166
  publicCsvStorageRow,
162
167
  runtimeCsvStorageRow,
163
168
  } from './runtime/csv-rows';
169
+ import {
170
+ completedMapRowOutcome,
171
+ failedMapRowOutcome,
172
+ mapRowOutcomeRuntimeRow,
173
+ mapRowOutcomeRuntimeFields,
174
+ resolveMapRowOutcomeKey,
175
+ stripMapRowOutcomeRuntimeFields,
176
+ } from '../../../shared_libs/play-runtime/map-row-outcome';
177
+ import { runtimeSheetSessionScope } from '../../../shared_libs/play-runtime/runtime-sheet-session';
164
178
  import { chooseWorkerMapRowsPerChunk } from './runtime/map-chunk-plan';
165
179
  import {
166
180
  applyCsvRenameProjection,
@@ -194,11 +208,7 @@ import type {
194
208
  LiveNodeProgressMap,
195
209
  LiveNodeProgressSnapshot,
196
210
  } from './runtime/live-progress';
197
- import {
198
- extractErrorBilling,
199
- isHardBillingToolHttpError,
200
- normalizeToolHttpErrorMessage,
201
- } from './runtime/tool-http-errors';
211
+ import { extractErrorBilling } from './runtime/tool-http-errors';
202
212
  import {
203
213
  WorkflowAbortError,
204
214
  isAbortLikeError,
@@ -326,6 +336,7 @@ type WorkerEnv = {
326
336
  runtimeApiCall(input: {
327
337
  executorToken: string;
328
338
  path: string;
339
+ method?: string;
329
340
  body: unknown;
330
341
  headers?: Record<string, string>;
331
342
  timeoutMs?: number;
@@ -581,6 +592,7 @@ async function callRuntimeApiRpcBinding(
581
592
  const result = await binding.runtimeApiCall({
582
593
  executorToken: authorization.replace(/^Bearer\s+/i, '').trim(),
583
594
  path: input.path,
595
+ method: init.method ?? 'POST',
584
596
  body: rawBody ? JSON.parse(rawBody) : {},
585
597
  headers,
586
598
  timeoutMs: input.timeoutMs,
@@ -1156,6 +1168,7 @@ async function executeTool(
1156
1168
  workflowStep?: WorkflowStep,
1157
1169
  onProviderBackpressure?: (retryAfterMs: number) => void,
1158
1170
  onRetryAttempt?: () => void,
1171
+ transientHttpRetrySafe = false,
1159
1172
  ): Promise<ToolExecuteResult> {
1160
1173
  if (args.toolId === 'test_wait_for_event' && workflowStep) {
1161
1174
  const result = await waitForSyntheticIntegrationEvent(
@@ -1170,7 +1183,13 @@ async function executeTool(
1170
1183
  // service bindings, NOT through HTTP from this worker. Removing the
1171
1184
  // dispatcher-side coordinatorUrl plumbing intentionally turns the old
1172
1185
  // HTTP-based dedup helpers into dead code.
1173
- return callToolDirect(req, args, onProviderBackpressure, onRetryAttempt);
1186
+ return callToolDirect(
1187
+ req,
1188
+ args,
1189
+ onProviderBackpressure,
1190
+ onRetryAttempt,
1191
+ transientHttpRetrySafe,
1192
+ );
1174
1193
  }
1175
1194
 
1176
1195
  async function executeToolWithLifecycle(
@@ -1180,6 +1199,7 @@ async function executeToolWithLifecycle(
1180
1199
  callbacks: WorkerCtxCallbacks | undefined,
1181
1200
  onProviderBackpressure?: (retryAfterMs: number) => void,
1182
1201
  onRetryAttempt?: () => void,
1202
+ transientHttpRetrySafe = false,
1183
1203
  ): Promise<ToolExecuteResult> {
1184
1204
  callbacks?.onToolCalled?.(args.toolId, nowMs());
1185
1205
  try {
@@ -1189,6 +1209,7 @@ async function executeToolWithLifecycle(
1189
1209
  workflowStep,
1190
1210
  onProviderBackpressure,
1191
1211
  onRetryAttempt,
1212
+ transientHttpRetrySafe,
1192
1213
  );
1193
1214
  } catch (error) {
1194
1215
  callbacks?.onToolFailed?.(args.toolId, nowMs());
@@ -1322,16 +1343,17 @@ async function callToolDirect(
1322
1343
  // 429 / transient-5xx retry). Without this the worker substrate would leave
1323
1344
  // policy.budgets.maxRetryCount effectively unenforced.
1324
1345
  onRetryAttempt?: () => void,
1346
+ transientHttpRetrySafe = false,
1325
1347
  ): Promise<ToolExecuteResult> {
1326
1348
  const { id, toolId, input } = args;
1327
1349
  const path = `/api/v2/integrations/${encodeURIComponent(toolId)}/execute`;
1328
1350
  let lastError: Error | null = null;
1351
+ const httpFailureAttempts = createToolExecuteHttpFailureAttemptTracker();
1352
+ let requestAttempt = 0;
1353
+ let transportAttempt = 0;
1329
1354
 
1330
- for (
1331
- let attempt = 1;
1332
- attempt <= TOOL_EXECUTE_RATE_LIMIT_MAX_ATTEMPTS;
1333
- attempt += 1
1334
- ) {
1355
+ while (true) {
1356
+ requestAttempt += 1;
1335
1357
  let res: Response;
1336
1358
  try {
1337
1359
  res = await fetchRuntimeApi(req.baseUrl, path, {
@@ -1339,7 +1361,7 @@ async function callToolDirect(
1339
1361
  headers: {
1340
1362
  'content-type': 'application/json',
1341
1363
  authorization: `Bearer ${req.executorToken}`,
1342
- 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${attempt}`,
1364
+ 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${requestAttempt}`,
1343
1365
  [EXECUTE_RESPONSE_CONTRACT_HEADER]: V2_EXECUTE_RESPONSE_CONTRACT,
1344
1366
  [EXECUTE_TOOL_METADATA_HEADER]: 'true',
1345
1367
  },
@@ -1349,20 +1371,21 @@ async function callToolDirect(
1349
1371
  }),
1350
1372
  });
1351
1373
  } catch (error) {
1374
+ transportAttempt += 1;
1352
1375
  const message = error instanceof Error ? error.message : String(error);
1353
1376
  lastError = new Error(
1354
- `Tool ${toolId} transport failed calling ${path} for run ${req.runId} on attempt ${attempt}/${TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS}: ${message}`,
1377
+ `Tool ${toolId} transport failed calling ${path} for run ${req.runId} on attempt ${transportAttempt}/${TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS}: ${message}`,
1355
1378
  );
1356
1379
  if (
1357
- attempt >= TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS ||
1380
+ transportAttempt >= TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS ||
1358
1381
  !isRetryableRuntimeApiError(error)
1359
1382
  ) {
1360
1383
  throw lastError;
1361
1384
  }
1362
1385
  onRetryAttempt?.();
1363
- const delayMs = TOOL_EXECUTE_TRANSPORT_RETRY_DELAY_MS * attempt;
1386
+ const delayMs = TOOL_EXECUTE_TRANSPORT_RETRY_DELAY_MS * transportAttempt;
1364
1387
  console.warn(
1365
- `[deepline-run:${req.runId}] tool transport retry tool=${toolId} path=${path} attempt=${attempt}/${TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS} retryAfterMs=${delayMs} error=${redactSecretsFromLogString(message)}`,
1388
+ `[deepline-run:${req.runId}] tool transport retry tool=${toolId} path=${path} attempt=${transportAttempt}/${TOOL_EXECUTE_TRANSPORT_MAX_ATTEMPTS} retryAfterMs=${delayMs} error=${redactSecretsFromLogString(message)}`,
1366
1389
  );
1367
1390
  await sleepWorkerMs(delayMs);
1368
1391
  continue;
@@ -1379,51 +1402,34 @@ async function callToolDirect(
1379
1402
  }
1380
1403
 
1381
1404
  const text = await res.text().catch(() => '');
1382
- const isRateLimited = res.status === 429;
1383
- const initialRetryDecision = decideToolExecuteHttpRetry({
1405
+ const httpFailureAttempt = httpFailureAttempts.next({
1384
1406
  toolId,
1385
1407
  status: res.status,
1408
+ transientHttpRetrySafe,
1386
1409
  });
1387
- lastError = normalizeToolHttpErrorMessage({
1410
+ const failure = classifyToolExecuteHttpFailure({
1388
1411
  toolId,
1389
1412
  status: res.status,
1390
- attempt,
1391
- maxAttempts: initialRetryDecision.attemptCap,
1413
+ attempt: httpFailureAttempt,
1392
1414
  bodyText: text,
1415
+ retryAfterHeader: res.headers.get('retry-after'),
1416
+ transientHttpRetrySafe,
1393
1417
  });
1394
- // Rate-limit pushback gets the larger 429-specific retry budget, unless the
1395
- // current response body is a hard Deepline billing denial.
1396
- const retryDecision = decideToolExecuteHttpRetry({
1397
- toolId,
1398
- status: res.status,
1399
- hardBillingFailure: isHardBillingToolHttpError(lastError),
1400
- });
1401
- const attemptCap = retryDecision.attemptCap;
1402
- const retryAfterSeconds = Number(res.headers.get('retry-after'));
1403
- const retryAfterMs =
1404
- Number.isFinite(retryAfterSeconds) && retryAfterSeconds > 0
1405
- ? Math.ceil(retryAfterSeconds * 1000)
1406
- : 0;
1407
- if (isRateLimited) {
1418
+ lastError = failure.error;
1419
+ if (failure.backpressureDelayMs !== null) {
1408
1420
  // Feed the provider's backpressure into the shared pacer even on the
1409
1421
  // final attempt so the (org, provider) bucket backs off across isolates.
1410
- onProviderBackpressure?.(retryAfterMs > 0 ? retryAfterMs : 1_000);
1422
+ onProviderBackpressure?.(failure.backpressureDelayMs);
1411
1423
  }
1412
- if (!retryDecision.retryable || attempt >= attemptCap) {
1424
+ if (!failure.shouldRetry) {
1413
1425
  throw lastError;
1414
1426
  }
1415
1427
  // Charge the retry budget per attempt, matching the cjs runner's
1416
1428
  // chargeBudget('retry') on every 429 / retryable-5xx retry.
1417
- onRetryAttempt?.();
1418
- // 429 delays escalate per attempt (still honoring a larger retry-after)
1419
- // so sustained throttling spaces calls out instead of hammering the
1420
- // limiter with fixed 1s retries.
1421
- const delayMs = isRateLimited
1422
- ? Math.min(5_000, Math.max(retryAfterMs, 1_000 * attempt))
1423
- : retryAfterMs > 0
1424
- ? Math.min(5_000, retryAfterMs)
1425
- : 1_000;
1426
- await new Promise((resolve) => setTimeout(resolve, delayMs));
1429
+ if (failure.chargeRetryBudget) {
1430
+ onRetryAttempt?.();
1431
+ }
1432
+ await sleepWorkerMs(failure.retryDelayMs);
1427
1433
  }
1428
1434
 
1429
1435
  throw lastError ?? new Error(`tool ${toolId} failed before execution.`);
@@ -1728,6 +1734,9 @@ class WorkerToolBatchScheduler {
1728
1734
  const groupStartedAt = nowMs();
1729
1735
  await Promise.all(
1730
1736
  requests.map(async (request) => {
1737
+ const toolContract = await this.resolvePacing(toolId).catch(
1738
+ () => null,
1739
+ );
1731
1740
  // Each unbatched provider call takes its own tool slot: the Governor
1732
1741
  // charges tool budget, holds a global tool-concurrency slot, and
1733
1742
  // applies per-(org,provider) pacing before the call runs.
@@ -1743,6 +1752,7 @@ class WorkerToolBatchScheduler {
1743
1752
  this.callbacks,
1744
1753
  (retryAfterMs) => this.reportBackpressure(toolId, retryAfterMs),
1745
1754
  () => this.governor.chargeBudget('retry'),
1755
+ toolContract?.retrySafeTransientHttp === true,
1746
1756
  ),
1747
1757
  );
1748
1758
  } catch (error) {
@@ -1775,6 +1785,7 @@ class WorkerToolBatchScheduler {
1775
1785
  abortSignal: this.abortSignal,
1776
1786
  reportBackpressure: (retryAfterMs) =>
1777
1787
  this.reportBackpressure(toolId, retryAfterMs),
1788
+ resolveToolContract: this.resolvePacing,
1778
1789
  onRequestsSettled: this.onRequestsSettled,
1779
1790
  callbacks: this.callbacks,
1780
1791
  });
@@ -1810,6 +1821,7 @@ async function executeBatchedWorkerToolGroup(input: {
1810
1821
  suggestedParallelism: number;
1811
1822
  abortSignal?: AbortSignal;
1812
1823
  reportBackpressure: (retryAfterMs: number) => void;
1824
+ resolveToolContract: WorkerPacingResolver;
1813
1825
  onRequestsSettled?: (count: number) => void;
1814
1826
  callbacks?: WorkerCtxCallbacks;
1815
1827
  }): Promise<void> {
@@ -1840,6 +1852,9 @@ async function executeBatchedWorkerToolGroup(input: {
1840
1852
  Math.min(input.suggestedParallelism, compiledBatches.length || 1),
1841
1853
  ),
1842
1854
  execute: async (batch) => {
1855
+ const toolContract = await input
1856
+ .resolveToolContract(batch.batchOperation)
1857
+ .catch(() => null);
1843
1858
  // One provider call per batch → one tool slot (budget + global
1844
1859
  // concurrency + per-(org,provider) pacing) around the whole batch.
1845
1860
  const slot = await input.governor.acquireToolSlot(batch.batchOperation, {
@@ -1857,6 +1872,7 @@ async function executeBatchedWorkerToolGroup(input: {
1857
1872
  undefined,
1858
1873
  input.reportBackpressure,
1859
1874
  () => input.governor.chargeBudget('retry'),
1875
+ toolContract?.retrySafeTransientHttp === true,
1860
1876
  );
1861
1877
  } catch (error) {
1862
1878
  input.callbacks?.onToolFailed?.(batch.batchOperation, nowMs());
@@ -2246,7 +2262,11 @@ async function executeWorkerWaterfall(
2246
2262
  opts?: WorkerWaterfallOptions,
2247
2263
  callbacks?: WorkerCtxCallbacks,
2248
2264
  workflowStep?: WorkflowStep,
2265
+ resolveToolContract?: WorkerPacingResolver,
2249
2266
  ): Promise<unknown | null> {
2267
+ const retrySafeTransientHttp = async (toolId: string): Promise<boolean> =>
2268
+ (await resolveToolContract?.(toolId).catch(() => null))
2269
+ ?.retrySafeTransientHttp === true;
2250
2270
  // Inline-spec form
2251
2271
  if (typeof toolNameOrSpec === 'object' && toolNameOrSpec) {
2252
2272
  const spec = toolNameOrSpec;
@@ -2257,13 +2277,18 @@ async function executeWorkerWaterfall(
2257
2277
  if (isWorkerInlineCodeStep(step)) {
2258
2278
  result = await step.run(input, {
2259
2279
  tools: {
2260
- execute: async (request: unknown) =>
2261
- await executeToolWithLifecycle(
2280
+ execute: async (request: unknown) => {
2281
+ const args = normalizeToolExecuteArgs(request);
2282
+ return await executeToolWithLifecycle(
2262
2283
  req,
2263
- normalizeToolExecuteArgs(request),
2284
+ args,
2264
2285
  workflowStep,
2265
2286
  callbacks,
2266
- ),
2287
+ undefined,
2288
+ undefined,
2289
+ await retrySafeTransientHttp(args.toolId),
2290
+ );
2291
+ },
2267
2292
  },
2268
2293
  });
2269
2294
  } else {
@@ -2276,6 +2301,9 @@ async function executeWorkerWaterfall(
2276
2301
  },
2277
2302
  workflowStep,
2278
2303
  callbacks,
2304
+ undefined,
2305
+ undefined,
2306
+ await retrySafeTransientHttp(step.toolId),
2279
2307
  );
2280
2308
  }
2281
2309
  } catch {
@@ -2367,6 +2395,9 @@ async function executeWorkerWaterfall(
2367
2395
  { id: toolName, toolId: toolName, input },
2368
2396
  workflowStep,
2369
2397
  callbacks,
2398
+ undefined,
2399
+ undefined,
2400
+ await retrySafeTransientHttp(toolName),
2370
2401
  );
2371
2402
  } catch {
2372
2403
  return null;
@@ -2384,6 +2415,9 @@ async function executeWorkerWaterfall(
2384
2415
  },
2385
2416
  workflowStep,
2386
2417
  callbacks,
2418
+ undefined,
2419
+ undefined,
2420
+ await retrySafeTransientHttp(toolName),
2387
2421
  );
2388
2422
  if (resultHasContent(result)) {
2389
2423
  recorder.push({
@@ -3196,22 +3230,18 @@ async function persistCompletedMapRows(input: {
3196
3230
  (field) => !input.outputFields.includes(field),
3197
3231
  ),
3198
3232
  ];
3233
+ const sessionScope = runtimeSheetSessionScope(input.req);
3234
+ const rows = input.rows.map((row) => publicCsvStorageRow(row));
3199
3235
  await harnessPersistCompletedSheetRows({
3200
- baseUrl: input.req.baseUrl,
3201
- executorToken: input.req.executorToken,
3202
- orgId: input.req.orgId,
3203
- preloadedDbSessions: input.req.preloadedDbSessions ?? null,
3204
- playName: input.req.playName,
3236
+ ...sessionScope,
3205
3237
  tableNamespace: input.tableNamespace,
3206
3238
  sheetContract: augmentSheetContractWithDatasetFields({
3207
3239
  contract: requireSheetContract(input.req, input.tableNamespace),
3208
- rows: input.rows.map((row) => publicCsvStorageRow(row)),
3240
+ rows,
3209
3241
  outputFields,
3210
3242
  }),
3211
- rows: input.rows.map((row) => publicCsvStorageRow(row)),
3243
+ rows,
3212
3244
  outputFields,
3213
- runId: input.req.runId,
3214
- userEmail: input.req.userEmail,
3215
3245
  });
3216
3246
  }
3217
3247
 
@@ -3231,22 +3261,18 @@ async function prepareMapRows(input: {
3231
3261
  if (input.rows.length === 0) {
3232
3262
  return { inserted: 0, skipped: 0, pendingRows: [], completedRows: [] };
3233
3263
  }
3264
+ const sessionScope = runtimeSheetSessionScope(input.req);
3265
+ const rows = input.rows.map((row) => runtimeCsvStorageRow(row));
3234
3266
  const result = await harnessStartSheetDataset({
3235
- baseUrl: input.req.baseUrl,
3236
- executorToken: input.req.executorToken,
3237
- orgId: input.req.orgId,
3238
- preloadedDbSessions: input.req.preloadedDbSessions ?? null,
3239
- playName: input.req.playName,
3267
+ ...sessionScope,
3240
3268
  tableNamespace: input.tableNamespace,
3241
3269
  sheetContract: augmentSheetContractWithDatasetFields({
3242
3270
  contract: requireSheetContract(input.req, input.tableNamespace),
3243
- rows: input.rows.map((row) => runtimeCsvStorageRow(row)),
3271
+ rows,
3244
3272
  outputFields: input.outputFields,
3245
3273
  }),
3246
- rows: input.rows.map((row) => runtimeCsvStorageRow(row)),
3247
- runId: input.req.runId,
3274
+ rows,
3248
3275
  inputOffset: input.inputOffset,
3249
- userEmail: input.req.userEmail,
3250
3276
  cellPolicies: input.cellPolicies,
3251
3277
  });
3252
3278
  for (const timing of result.timings ?? []) {
@@ -3418,18 +3444,25 @@ function createCoordinatorRatePort(req: RunRequest): CoordinatorRatePort {
3418
3444
  */
3419
3445
  type WorkerPacingResolver = (
3420
3446
  toolId: string,
3421
- ) => Promise<{ provider: string; rules: PacingRule[] } | null>;
3447
+ ) => Promise<
3448
+ (ResolvedPacingPolicy & { retrySafeTransientHttp: boolean }) | null
3449
+ >;
3422
3450
 
3423
3451
  function createWorkerPacingResolver(req: RunRequest): WorkerPacingResolver {
3424
3452
  const cache = new Map<
3425
3453
  string,
3426
- Promise<{ provider: string; rules: PacingRule[] } | null>
3454
+ Promise<(ResolvedPacingPolicy & { retrySafeTransientHttp: boolean }) | null>
3427
3455
  >();
3428
3456
  return (toolId: string) => {
3429
3457
  const normalized = String(toolId || '').trim();
3430
3458
  if (!normalized) return Promise.resolve(null);
3431
- const builtin = resolveBuiltinPacing(normalized);
3432
- if (builtin) return Promise.resolve(builtin);
3459
+ const builtin = pacingPolicyForTool(normalized, []);
3460
+ if (builtin) {
3461
+ return Promise.resolve({
3462
+ ...builtin,
3463
+ retrySafeTransientHttp: false,
3464
+ });
3465
+ }
3433
3466
  const cached = cache.get(normalized);
3434
3467
  if (cached) return cached;
3435
3468
  const promise = (async () => {
@@ -3445,37 +3478,26 @@ function createWorkerPacingResolver(req: RunRequest): WorkerPacingResolver {
3445
3478
  const body = (await res.json().catch(() => null)) as {
3446
3479
  provider?: unknown;
3447
3480
  queueHints?: unknown;
3481
+ retry?: unknown;
3448
3482
  } | null;
3449
3483
  if (!body) return null;
3450
- const provider =
3451
- typeof body.provider === 'string' && body.provider.trim()
3452
- ? body.provider.trim()
3453
- : null;
3454
- if (!provider || !Array.isArray(body.queueHints)) return null;
3455
- const rules: PacingRule[] = body.queueHints.flatMap((hint) => {
3456
- if (!hint || typeof hint !== 'object') return [];
3457
- const record = hint as Record<string, unknown>;
3458
- if (
3459
- typeof record.ruleId !== 'string' ||
3460
- typeof record.requestsPerWindow !== 'number' ||
3461
- typeof record.windowMs !== 'number'
3462
- ) {
3463
- return [];
3464
- }
3465
- return [
3466
- {
3467
- ruleId: record.ruleId,
3468
- requestsPerWindow: record.requestsPerWindow,
3469
- windowMs: record.windowMs,
3470
- maxConcurrency:
3471
- typeof record.maxConcurrency === 'number'
3472
- ? record.maxConcurrency
3473
- : null,
3474
- } satisfies PacingRule,
3475
- ];
3476
- });
3477
- if (rules.length === 0) return null;
3478
- return { provider, rules };
3484
+ const pacing = pacingPolicyFromUnknownQueueHints(body.queueHints);
3485
+ const retry =
3486
+ body.retry &&
3487
+ typeof body.retry === 'object' &&
3488
+ !Array.isArray(body.retry)
3489
+ ? (body.retry as Record<string, unknown>)
3490
+ : {};
3491
+ return {
3492
+ ...(pacing ?? {
3493
+ provider:
3494
+ typeof body.provider === 'string' && body.provider.trim()
3495
+ ? body.provider.trim()
3496
+ : '',
3497
+ rules: [],
3498
+ }),
3499
+ retrySafeTransientHttp: retry.retrySafeTransientHttp === true,
3500
+ };
3479
3501
  })();
3480
3502
  cache.set(normalized, promise);
3481
3503
  return promise;
@@ -3930,7 +3952,7 @@ function createMinimalWorkerCtx(
3930
3952
  cellPolicies,
3931
3953
  rows: chunkEntries.map(({ row, rowKey }) => ({
3932
3954
  ...row,
3933
- __deeplineRowKey: rowKey,
3955
+ ...mapRowOutcomeRuntimeFields({ key: rowKey }),
3934
3956
  })),
3935
3957
  inputOffset: baseOffset + chunkStart,
3936
3958
  });
@@ -3969,9 +3991,8 @@ function createMinimalWorkerCtx(
3969
3991
  const preparedKeys = new Set<string>();
3970
3992
  for (const row of prepared.pendingRows) {
3971
3993
  const key =
3972
- typeof row.__deeplineRowKey === 'string'
3973
- ? row.__deeplineRowKey
3974
- : derivePlayRowIdentity(publicCsvInputRow(row), name);
3994
+ resolveMapRowOutcomeKey(row) ??
3995
+ derivePlayRowIdentity(publicCsvInputRow(row), name);
3975
3996
  if (key) {
3976
3997
  pendingKeys.add(key);
3977
3998
  pendingRowsByKey.set(key, row);
@@ -3980,9 +4001,8 @@ function createMinimalWorkerCtx(
3980
4001
  }
3981
4002
  for (const row of prepared.completedRows) {
3982
4003
  const key =
3983
- typeof row.__deeplineRowKey === 'string'
3984
- ? row.__deeplineRowKey
3985
- : derivePlayRowIdentity(publicCsvInputRow(row), name);
4004
+ resolveMapRowOutcomeKey(row) ??
4005
+ derivePlayRowIdentity(publicCsvInputRow(row), name);
3986
4006
  if (key) {
3987
4007
  completedKeys.add(key);
3988
4008
  preparedKeys.add(key);
@@ -4168,22 +4188,10 @@ function createMinimalWorkerCtx(
4168
4188
  executedIndex: number;
4169
4189
  } => entry !== null,
4170
4190
  );
4171
- // Under the default isolation, every failed row persists as a
4172
- // recoverable `_status='failed'` row (it re-executes free next run).
4173
- // Under `onRowError: 'fail'` the run dies, so a failed row's partial
4174
- // data is persisted ONLY as a last-resort recovery: when this chunk has
4175
- // no other recoverable rows (no successful executed rows and no
4176
- // already-completed rows). That keeps a partial fail-fast run's export
4177
- // to the rows that fully committed before the failure, while an
4178
- // all-rows-failed fail-fast run still exposes the persisted partial
4179
- // cells instead of advertising an empty, unrecoverable dataset.
4180
- const failedRowsToPersist =
4181
- failFastRowErrors &&
4182
- (rowsToPersist.length > 0 ||
4183
- persistedExecutedIndexes.size > 0 ||
4184
- prepared.completedRows.length > 0)
4185
- ? []
4186
- : allFailedRowsToPersist;
4191
+ // Failed rows persist as recoverable `_status='failed'` rows in both
4192
+ // default row isolation and fail-fast mode. A fail-fast run still dies,
4193
+ // but export/retry keeps cells completed before the failing column.
4194
+ const failedRowsToPersist = allFailedRowsToPersist;
4187
4195
  if (rowsToPersist.length === 0 && failedRowsToPersist.length === 0) {
4188
4196
  return;
4189
4197
  }
@@ -4193,32 +4201,27 @@ function createMinimalWorkerCtx(
4193
4201
  outputFields,
4194
4202
  extraOutputFields: Array.from(generatedOutputFields),
4195
4203
  rows: [
4196
- ...rowsToPersist.map(({ row, executedIndex }) => ({
4197
- ...row,
4198
- ...(executedCellMetaPatches[executedIndex]
4199
- ? {
4200
- __deeplineCellMetaPatch:
4201
- executedCellMetaPatches[executedIndex],
4202
- }
4203
- : {}),
4204
- __deeplineRowKey:
4205
- uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
4206
- })),
4204
+ ...rowsToPersist.map(({ row, executedIndex }) =>
4205
+ mapRowOutcomeRuntimeRow(
4206
+ completedMapRowOutcome({
4207
+ key: uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
4208
+ data: row,
4209
+ cellMetaPatch: executedCellMetaPatches[executedIndex],
4210
+ }),
4211
+ ),
4212
+ ),
4207
4213
  // Failed rows persist as recoverable `_status='failed'` sheet
4208
4214
  // rows: partial data + per-cell failure meta + the row error.
4209
- ...failedRowsToPersist.map(({ failure, executedIndex }) => ({
4210
- ...failure.row,
4211
- ...(executedCellMetaPatches[executedIndex]
4212
- ? {
4213
- __deeplineCellMetaPatch:
4214
- executedCellMetaPatches[executedIndex],
4215
- }
4216
- : {}),
4217
- __deeplineRowKey:
4218
- uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
4219
- __deeplineRowStatus: 'failed',
4220
- __deeplineRowError: failure.error,
4221
- })),
4215
+ ...failedRowsToPersist.map(({ failure, executedIndex }) =>
4216
+ mapRowOutcomeRuntimeRow(
4217
+ failedMapRowOutcome({
4218
+ key: uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
4219
+ data: failure.row,
4220
+ cellMetaPatch: executedCellMetaPatches[executedIndex],
4221
+ error: failure.error,
4222
+ }),
4223
+ ),
4224
+ ),
4222
4225
  ],
4223
4226
  });
4224
4227
  for (const { executedIndex } of rowsToPersist) {
@@ -4324,6 +4327,7 @@ function createMinimalWorkerCtx(
4324
4327
  waterfallOpts,
4325
4328
  callbacks,
4326
4329
  workflowStep,
4330
+ resolveToolPacing,
4327
4331
  ),
4328
4332
  };
4329
4333
  let activeField: string | null = null;
@@ -4461,11 +4465,9 @@ function createMinimalWorkerCtx(
4461
4465
  Object.keys(cellMetaPatch).length > 0
4462
4466
  ? cellMetaPatch
4463
4467
  : undefined;
4464
- // Keep the partially-enriched row. Default isolation persists
4465
- // it as `_status='failed'` so the row can re-execute free on
4466
- // the next run. Fail-fast persists failed rows only after the
4467
- // chunk settles and only when every row failed; otherwise only
4468
- // fully committed successful rows are recoverable.
4468
+ // Keep the partially-enriched row. It persists as
4469
+ // `_status='failed'` so export/retry can recover cells that
4470
+ // completed before the row error.
4469
4471
  failedRowEntries[myIndex] = {
4470
4472
  row: enriched as T & Record<string, unknown>,
4471
4473
  error: message,
@@ -4485,7 +4487,7 @@ function createMinimalWorkerCtx(
4485
4487
  `Row ${absoluteIndex} of ctx.dataset("${name}") failed` +
4486
4488
  `${activeField ? ` at column "${activeField}"` : ''}: ${message} ` +
4487
4489
  (failFastRowErrors
4488
- ? '(row recorded as failed; onRowError:"fail" persists it only if every row fails)'
4490
+ ? '(row recorded as failed; onRowError:"fail" fails the run after recoverable cells persist)'
4489
4491
  : '(row recorded as failed; sibling rows continue and the row re-executes on the next run)'),
4490
4492
  ts: nowMs(),
4491
4493
  });
@@ -4596,12 +4598,12 @@ function createMinimalWorkerCtx(
4596
4598
  const resultByKey = new Map<string, T & Record<string, unknown>>();
4597
4599
  for (const completedRow of prepared.completedRows) {
4598
4600
  const key =
4599
- typeof completedRow.__deeplineRowKey === 'string'
4600
- ? completedRow.__deeplineRowKey
4601
- : derivePlayRowIdentity(publicCsvInputRow(completedRow), name);
4601
+ resolveMapRowOutcomeKey(completedRow) ??
4602
+ derivePlayRowIdentity(publicCsvInputRow(completedRow), name);
4602
4603
  if (key) {
4603
- const cleanedRow = publicCsvOutputRow(completedRow);
4604
- delete cleanedRow.__deeplineRowKey;
4604
+ const cleanedRow = stripMapRowOutcomeRuntimeFields(
4605
+ publicCsvOutputRow(completedRow),
4606
+ );
4605
4607
  resultByKey.set(key, cleanedRow as T & Record<string, unknown>);
4606
4608
  }
4607
4609
  }
@@ -4880,12 +4882,11 @@ function createMinimalWorkerCtx(
4880
4882
  if (failFastRowErrors && totalRowsFailed > 0 && totalRowsWritten > 0) {
4881
4883
  // onRowError:'fail', PARTIAL failure (some rows committed): fail the run
4882
4884
  // without finalizing the dataset. The committed rows already persisted
4883
- // per chunk and are surfaced as a recovered dataset (the failed rows'
4884
- // partial data was intentionally NOT persisted here only the rows that
4885
- // fully committed before the failure are recoverable). We reach this
4886
- // AFTER the failing chunk completed normally (no per-row throw inside
4887
- // the durable chunk step, so no chunk-step retry storm); later chunks
4888
- // were skipped by the fail-fast short-circuit in the chunk loop.
4885
+ // per chunk and are surfaced as a recovered dataset alongside failed
4886
+ // rows' partial cells. We reach this AFTER the failing chunk completed
4887
+ // normally (no per-row throw inside the durable chunk step, so no
4888
+ // chunk-step retry storm); later chunks were skipped by the fail-fast
4889
+ // short-circuit in the chunk loop.
4889
4890
  const firstError = totalRowFailureSamples[0]?.error ?? 'unknown error';
4890
4891
  throw new Error(
4891
4892
  `ctx.dataset("${name}") failed for ${totalRowsFailed} executed row(s) under onRowError:'fail'. ` +
@@ -5247,6 +5248,7 @@ function createMinimalWorkerCtx(
5247
5248
  opts,
5248
5249
  callbacks,
5249
5250
  workflowStep,
5251
+ resolveToolPacing,
5250
5252
  );
5251
5253
  },
5252
5254
  async sleep(ms: number): Promise<void> {