deepline 0.1.11 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +18 -10
  2. package/dist/cli/index.js +1795 -1052
  3. package/dist/cli/index.mjs +1795 -1053
  4. package/dist/index.d.mts +427 -308
  5. package/dist/index.d.ts +427 -308
  6. package/dist/index.js +391 -326
  7. package/dist/index.mjs +391 -325
  8. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +88 -22
  9. package/dist/repo/apps/play-runner-workers/src/entry.ts +804 -1253
  10. package/dist/repo/sdk/src/client.ts +287 -47
  11. package/dist/repo/sdk/src/config.ts +125 -8
  12. package/dist/repo/sdk/src/http.ts +10 -2
  13. package/dist/repo/sdk/src/index.ts +7 -16
  14. package/dist/repo/sdk/src/play.ts +105 -140
  15. package/dist/repo/sdk/src/plays/bundle-play-file.ts +23 -6
  16. package/dist/repo/sdk/src/plays/local-file-discovery.ts +207 -160
  17. package/dist/repo/sdk/src/tool-output.ts +0 -146
  18. package/dist/repo/sdk/src/types.ts +27 -0
  19. package/dist/repo/sdk/src/version.ts +2 -2
  20. package/dist/repo/sdk/src/worker-play-entry.ts +3 -0
  21. package/dist/repo/shared_libs/play-runtime/csv-rename.ts +180 -0
  22. package/dist/repo/shared_libs/play-runtime/tool-result.ts +250 -133
  23. package/dist/repo/shared_libs/plays/bundling/index.ts +274 -234
  24. package/dist/repo/shared_libs/plays/dataset.ts +29 -1
  25. package/package.json +5 -4
  26. package/dist/cli/index.js.map +0 -1
  27. package/dist/cli/index.mjs.map +0 -1
  28. package/dist/index.js.map +0 -1
  29. package/dist/index.mjs.map +0 -1
  30. package/dist/repo/apps/play-runner-workers/src/runtime/README.md +0 -21
  31. package/dist/repo/apps/play-runner-workers/src/runtime/batching.ts +0 -177
  32. package/dist/repo/apps/play-runner-workers/src/runtime/execution-plan.ts +0 -52
  33. package/dist/repo/apps/play-runner-workers/src/runtime/tool-batch.ts +0 -100
  34. package/dist/repo/apps/play-runner-workers/src/runtime/tool-result.ts +0 -184
  35. package/dist/repo/sdk/src/cli/commands/auth.ts +0 -500
  36. package/dist/repo/sdk/src/cli/commands/billing.ts +0 -188
  37. package/dist/repo/sdk/src/cli/commands/csv.ts +0 -123
  38. package/dist/repo/sdk/src/cli/commands/db.ts +0 -119
  39. package/dist/repo/sdk/src/cli/commands/feedback.ts +0 -40
  40. package/dist/repo/sdk/src/cli/commands/org.ts +0 -117
  41. package/dist/repo/sdk/src/cli/commands/play.ts +0 -3307
  42. package/dist/repo/sdk/src/cli/commands/tools.ts +0 -687
  43. package/dist/repo/sdk/src/cli/dataset-stats.ts +0 -341
  44. package/dist/repo/sdk/src/cli/index.ts +0 -148
  45. package/dist/repo/sdk/src/cli/progress.ts +0 -149
  46. package/dist/repo/sdk/src/cli/skills-sync.ts +0 -141
  47. package/dist/repo/sdk/src/cli/trace.ts +0 -61
  48. package/dist/repo/sdk/src/cli/utils.ts +0 -145
  49. package/dist/repo/sdk/src/compat.ts +0 -77
  50. package/dist/repo/shared_libs/observability/node-tracing.ts +0 -129
  51. package/dist/repo/shared_libs/observability/tracing.ts +0 -98
  52. package/dist/repo/shared_libs/play-runtime/context.ts +0 -3999
  53. package/dist/repo/shared_libs/play-runtime/ctx-contract.ts +0 -250
  54. package/dist/repo/shared_libs/play-runtime/ctx-types.ts +0 -713
  55. package/dist/repo/shared_libs/play-runtime/dataset-id.ts +0 -10
  56. package/dist/repo/shared_libs/play-runtime/db-session-crypto.ts +0 -304
  57. package/dist/repo/shared_libs/play-runtime/db-session.ts +0 -462
  58. package/dist/repo/shared_libs/play-runtime/live-events.ts +0 -214
  59. package/dist/repo/shared_libs/play-runtime/live-state-contract.ts +0 -50
  60. package/dist/repo/shared_libs/play-runtime/map-execution-frame.ts +0 -114
  61. package/dist/repo/shared_libs/play-runtime/map-row-identity.ts +0 -158
  62. package/dist/repo/shared_libs/play-runtime/progress-emitter.ts +0 -172
  63. package/dist/repo/shared_libs/play-runtime/protocol.ts +0 -121
  64. package/dist/repo/shared_libs/play-runtime/public-play-contract.ts +0 -42
  65. package/dist/repo/shared_libs/play-runtime/result-normalization.ts +0 -33
  66. package/dist/repo/shared_libs/play-runtime/runtime-api.ts +0 -1873
  67. package/dist/repo/shared_libs/play-runtime/runtime-constraints.ts +0 -2
  68. package/dist/repo/shared_libs/play-runtime/runtime-pg-driver-neon-serverless.ts +0 -201
  69. package/dist/repo/shared_libs/play-runtime/runtime-pg-driver-pg.ts +0 -48
  70. package/dist/repo/shared_libs/play-runtime/runtime-pg-driver.ts +0 -84
  71. package/dist/repo/shared_libs/play-runtime/static-pipeline-types.ts +0 -147
  72. package/dist/repo/shared_libs/play-runtime/suspension.ts +0 -68
  73. package/dist/repo/shared_libs/play-runtime/tracing.ts +0 -31
  74. package/dist/repo/shared_libs/play-runtime/waterfall-replay.ts +0 -75
  75. package/dist/repo/shared_libs/play-runtime/worker-api-types.ts +0 -140
  76. package/dist/repo/shared_libs/plays/artifact-transport.ts +0 -14
  77. package/dist/repo/shared_libs/plays/artifact-types.ts +0 -49
  78. package/dist/repo/shared_libs/plays/compiler-manifest.ts +0 -186
  79. package/dist/repo/shared_libs/plays/definition.ts +0 -264
  80. package/dist/repo/shared_libs/plays/file-refs.ts +0 -11
  81. package/dist/repo/shared_libs/plays/rate-limit-scheduler.ts +0 -206
  82. package/dist/repo/shared_libs/plays/resolve-static-pipeline.ts +0 -164
  83. package/dist/repo/shared_libs/plays/runtime-validation.ts +0 -415
  84. package/dist/repo/shared_libs/temporal/constants.ts +0 -39
  85. package/dist/repo/shared_libs/temporal/preview-config.ts +0 -153
@@ -17,10 +17,10 @@
17
17
  * to load CJS at runtime). The play here is statically imported and
18
18
  * bundled into the Worker script.
19
19
  * - Workers don't have node:fs / source-map-support. Stack traces are raw.
20
- * - Direct postgres (`pg` library) won't bundle for per-play Workers. This
21
- * harness keeps the per-play bundle isolate-safe and sends heavy sheet IO
22
- * to the long-lived harness Worker, which owns the Workers-compatible
23
- * Postgres driver and chunked write path.
20
+ * - Direct postgres (`pg` library) won't bundle for Workers. This harness
21
+ * uses HTTP-only ctx every ctx.csv / ctx.tool / row write goes through
22
+ * the runtime API endpoint, not direct DB. That keeps the Worker bundle
23
+ * compatible with the V8 isolate runtime.
24
24
  *
25
25
  * Status: experimental. First cut targets tool-basic (ctx.csv + ctx.map +
26
26
  * ctx.tool). Plays that depend on the full ctx surface (durable sleep,
@@ -33,13 +33,28 @@ import {
33
33
  type WorkflowEvent,
34
34
  type WorkflowStep,
35
35
  } from 'cloudflare:workers';
36
- import { type ExecutionPlan } from '../../../shared_libs/play-runtime/execution-plan';
36
+ import {
37
+ chooseMapChunkSize,
38
+ deterministicMapChunkStepName,
39
+ type ExecutionPlan,
40
+ } from '../../../shared_libs/play-runtime/execution-plan';
41
+ import {
42
+ compileRequestsWithStrategy,
43
+ executeChunkedRequests,
44
+ type ChunkExecutionResult,
45
+ } from '../../../shared_libs/play-runtime/batch-runtime';
46
+ import { getDefaultPlayRuntimeBatchStrategy } from '../../../shared_libs/play-runtime/default-batch-strategies';
37
47
  import type { AnyBatchOperationStrategy } from '../../../shared_libs/play-runtime/batching-types';
38
48
  import {
39
- runtimeBillingActions,
40
- runtimeRunActions,
41
- type RuntimeApiAction,
42
- } from '../../../shared_libs/play-runtime/runtime-actions';
49
+ createToolBatchExecutor,
50
+ type ToolBatchRequest,
51
+ } from '../../../shared_libs/play-runtime/tool-batch-executor';
52
+ import {
53
+ createToolExecuteResult,
54
+ isToolExecuteResult,
55
+ type ToolExecuteResult,
56
+ type ToolResultMetadataInput,
57
+ } from '../../../shared_libs/play-runtime/tool-result';
43
58
  import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
44
59
  import type { PlayRuntimeManifestMap } from '../../../shared_libs/plays/compiler-manifest';
45
60
  import {
@@ -64,31 +79,15 @@ import {
64
79
  // re-bundle harness internals into per-play. Keep that in mind.
65
80
  import {
66
81
  harnessFetchStagedFile,
67
- harnessPersistCompletedSheetRows,
68
- harnessRuntimeApiCall,
69
82
  harnessStartSheetDataset,
70
83
  setHarnessBinding,
71
84
  } from '../../../sdk/src/plays/harness-stub';
72
85
  import {
73
- chooseMapChunkSize,
74
- deterministicMapChunkStepName,
75
- } from './runtime/execution-plan';
76
- import {
77
- compileRequestsWithStrategy,
78
- executeChunkedRequests,
79
- getDefaultPlayRuntimeBatchStrategy,
80
- type ChunkExecutionResult,
81
- } from './runtime/batching';
82
- import {
83
- createToolBatchExecutor,
84
- type ToolBatchRequest,
85
- } from './runtime/tool-batch';
86
- import {
87
- createToolExecuteResult,
88
- isToolExecuteResult,
89
- type ToolExecuteResult,
90
- type ToolResultMetadataInput,
91
- } from './runtime/tool-result';
86
+ applyCsvRenameProjection,
87
+ stripCsvProjectedFields,
88
+ cloneCsvAliasedRow,
89
+ type CsvRenameOptions,
90
+ } from '../../../shared_libs/play-runtime/csv-rename';
92
91
  import { coordinatorRequestHeaders } from '../../../shared_libs/play-runtime/coordinator-headers';
93
92
 
94
93
  // The play's default export. The bundler injects this — see bundle-play-file.ts.
@@ -149,6 +148,19 @@ type WorkerEnv = {
149
148
  PLAY_ASSETS?: {
150
149
  readText(logicalPath: string): Promise<string>;
151
150
  };
151
+ /**
152
+ * In-process Fetcher constructed by the coordinator and handed to the
153
+ * per-graphHash play Worker. When present, runtime callbacks
154
+ * (`/api/v2/plays/internal/runtime`, `/api/v2/plays/internal/*`,
155
+ * `/api/v2/plays/runtime-tools/*`) skip the public callback URL and route
156
+ * directly through the coordinator's process to the configured app — saves
157
+ * the *.workers.dev → CF edge → cloudflared → localhost chain on every
158
+ * runtime callback. Absent on legacy coordinator deploys; the fetch
159
+ * helpers fall back to `globalThis.fetch(req.baseUrl + path)`.
160
+ */
161
+ RUNTIME_API?: {
162
+ fetch(input: Request): Promise<Response>;
163
+ };
152
164
  /**
153
165
  * Loopback RPC binding into the coordinator Worker. Used for CF-to-CF
154
166
  * child orchestration so nested plays do not bounce through a public
@@ -176,7 +188,7 @@ type WorkerEnv = {
176
188
  ): Promise<Record<string, unknown>>;
177
189
  recordPerfTrace(
178
190
  runId: string,
179
- payload: DynamicWorkerPerfTracePayload,
191
+ payload: Record<string, unknown>,
180
192
  ): Promise<void>;
181
193
  };
182
194
  /**
@@ -192,8 +204,20 @@ type WorkerEnv = {
192
204
  * loud error. Loud failures > silent fallbacks.
193
205
  */
194
206
  HARNESS?: import('../../play-harness-worker/src/rpc-types').PlayHarnessRpc;
207
+ VERCEL_PROTECTION_BYPASS_TOKEN?: string;
195
208
  };
196
209
 
210
+ let cachedRuntimeApiBinding: WorkerEnv['RUNTIME_API'] | null = null;
211
+ let cachedRuntimeApiVercelBypassToken: string | null = null;
212
+ function captureRuntimeApiBinding(env: WorkerEnv): void {
213
+ cachedRuntimeApiBinding = env.RUNTIME_API ?? null;
214
+ cachedRuntimeApiVercelBypassToken =
215
+ typeof env.VERCEL_PROTECTION_BYPASS_TOKEN === 'string' &&
216
+ env.VERCEL_PROTECTION_BYPASS_TOKEN.trim()
217
+ ? env.VERCEL_PROTECTION_BYPASS_TOKEN.trim()
218
+ : null;
219
+ }
220
+
197
221
  let cachedCoordinatorBinding: WorkerEnv['COORDINATOR'] | null = null;
198
222
  function captureCoordinatorBinding(env: WorkerEnv): void {
199
223
  cachedCoordinatorBinding = env.COORDINATOR ?? null;
@@ -215,8 +239,10 @@ function captureHarnessBinding(env: WorkerEnv): void {
215
239
 
216
240
  /**
217
241
  * One-shot per-isolate harness wiring probe. Logs a single line that
218
- * reports broken wiring on this isolate's first run. Successful probes stay
219
- * silent so fresh-graph benchmark runs do not pay for diagnostic tail traffic.
242
+ * confirms whether `env.HARNESS` resolved to a live Worker on this
243
+ * isolate's first run. We use this as a low-noise diagnostic operators
244
+ * can grep for in any env — local dev, preview, prod — without having
245
+ * to plumb a separate health route or instrument SDK call sites.
220
246
  *
221
247
  * Behavior on fail:
222
248
  * - Binding missing entirely → log clearly that HARNESS is unwired so
@@ -239,9 +265,8 @@ async function probeHarnessOnce(
239
265
  if (harnessProbeFiredForIsolate) return;
240
266
  harnessProbeFiredForIsolate = true;
241
267
  if (!env.HARNESS) {
242
- void runPrefix;
243
- console.warn(
244
- `[harness-probe] env.HARNESS unwired — coordinator did not pass the binding. ` +
268
+ console.log(
269
+ `${runPrefix} [harness-probe] env.HARNESS unwired — coordinator did not pass the binding. ` +
245
270
  `Per-play SDK call sites that reach into the harness will throw clearly. ` +
246
271
  `See apps/play-harness-worker/README.md.`,
247
272
  );
@@ -251,104 +276,33 @@ async function probeHarnessOnce(
251
276
  typeof (env.HARNESS as { ping?: () => Promise<{ ok: true; ts: number }> })
252
277
  .ping !== 'function'
253
278
  ) {
254
- void runPrefix;
255
- console.warn(
256
- `[harness-probe] env.HARNESS is present but does not expose ping(); ` +
279
+ console.log(
280
+ `${runPrefix} [harness-probe] env.HARNESS is present but does not expose ping(); ` +
257
281
  `continuing and relying on the first real call to fail if the contract changed.`,
258
282
  );
259
283
  return;
260
284
  }
261
285
  try {
262
- await env.HARNESS.ping();
286
+ const result = await env.HARNESS.ping();
287
+ console.log(
288
+ `${runPrefix} [harness-probe] env.HARNESS connected ts=${result.ts}`,
289
+ );
263
290
  } catch (error) {
264
291
  const message = error instanceof Error ? error.message : String(error);
265
- void runPrefix;
266
- console.warn(
267
- `[harness-probe] env.HARNESS resolved but ping failed: ${message}`,
292
+ console.log(
293
+ `${runPrefix} [harness-probe] env.HARNESS resolved but ping failed: ${message}`,
268
294
  );
269
295
  }
270
296
  }
271
297
  /**
272
- * Routes Deepline API requests through the long-lived harness Worker. Passing
273
- * explicit JSON over typed RPC avoids Request/Authorization header cloning
274
- * quirks across WorkerEntrypoint boundaries.
298
+ * Routes runtime API requests through the in-process RUNTIME_API binding when
299
+ * Cloudflare exposes the coordinator WorkerEntrypoint export. Some workflow
300
+ * execution paths do not expose those exports; there we keep the older public
301
+ * fetch transport so the play still reaches the same authenticated handler.
275
302
  */
276
303
  const RUNTIME_API_TIMEOUT_MS = 30_000;
277
304
  const RUNTIME_API_PLAY_RUN_TIMEOUT_MS = 75_000;
278
- const HARNESS_RUNTIME_FORWARD_HEADERS = [
279
- 'x-deepline-request-id',
280
- EXECUTE_TOOL_METADATA_HEADER,
281
- ] as const;
282
- const TOOL_HTTP_MAX_ATTEMPTS = 3;
283
- const TOOL_RETRY_AFTER_DEFAULT_MS = 1_000;
284
- const TOOL_RETRY_AFTER_MAX_MS = 30 * 60 * 1_000;
285
- const TOOL_IN_MEMORY_RETRY_SLEEP_MAX_MS = 30_000;
286
- const TRANSIENT_HTTP_RETRY_SAFE_TOOL_IDS = new Set(['test_transient_500']);
287
-
288
- function parseRetryAfterMs(header: string | null): number {
289
- if (!header) return TOOL_RETRY_AFTER_DEFAULT_MS;
290
- const numeric = Number(header);
291
- if (Number.isFinite(numeric) && numeric >= 0) {
292
- return Math.max(1, Math.ceil(numeric * 1_000));
293
- }
294
- const retryAt = Date.parse(header);
295
- if (Number.isFinite(retryAt)) {
296
- return Math.max(1, retryAt - Date.now());
297
- }
298
- return TOOL_RETRY_AFTER_DEFAULT_MS;
299
- }
300
-
301
- function isRetryableToolHttpStatus(toolId: string, status: number): boolean {
302
- if (status === 429) return true;
303
- return (
304
- status >= 500 &&
305
- status < 600 &&
306
- TRANSIENT_HTTP_RETRY_SAFE_TOOL_IDS.has(toolId)
307
- );
308
- }
309
-
310
- async function sleepForToolRetry(input: {
311
- workflowStep?: WorkflowStep;
312
- toolId: string;
313
- status: number;
314
- attempt: number;
315
- retryAfterMs: number;
316
- retryKey?: string | null;
317
- toolInput: Record<string, unknown>;
318
- }): Promise<void> {
319
- const retryAfterMs = Math.max(1, Math.ceil(input.retryAfterMs));
320
- if (retryAfterMs > TOOL_RETRY_AFTER_MAX_MS) {
321
- throw new Error(
322
- `tool ${input.toolId} returned ${input.status} with retry-after ${retryAfterMs}ms, above max supported retry wait ${TOOL_RETRY_AFTER_MAX_MS}ms.`,
323
- );
324
- }
325
-
326
- if (input.workflowStep) {
327
- const inputHash = (
328
- await hashJson({
329
- key: input.retryKey ?? '',
330
- input: input.toolInput,
331
- })
332
- ).slice(0, 16);
333
- await (
334
- input.workflowStep.sleep as unknown as (
335
- name: string,
336
- duration: number,
337
- ) => Promise<void>
338
- )(
339
- `tool-retry:${input.toolId}:${input.status}:attempt-${input.attempt}:${inputHash}`,
340
- retryAfterMs,
341
- );
342
- return;
343
- }
344
-
345
- if (retryAfterMs > TOOL_IN_MEMORY_RETRY_SLEEP_MAX_MS) {
346
- throw new Error(
347
- `tool ${input.toolId} returned ${input.status} with retry-after ${retryAfterMs}ms, but no durable workflow step was available.`,
348
- );
349
- }
350
- await new Promise((resolve) => setTimeout(resolve, retryAfterMs));
351
- }
305
+ let loggedMissingRuntimeApiBinding = false;
352
306
 
353
307
  async function fetchRuntimeApi(
354
308
  baseUrl: string,
@@ -359,50 +313,54 @@ async function fetchRuntimeApi(
359
313
  path === '/api/v2/plays/run'
360
314
  ? RUNTIME_API_PLAY_RUN_TIMEOUT_MS
361
315
  : RUNTIME_API_TIMEOUT_MS;
362
- const headers = new Headers(init.headers);
363
- const authorization = headers.get('authorization') ?? '';
364
- const bearerPrefix = 'Bearer ';
365
- if (!authorization.startsWith(bearerPrefix)) {
366
- throw new Error(
367
- `[play-harness] Deepline API call requires a Bearer executor token. path=${path}`,
316
+ const controller = new AbortController();
317
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
318
+ try {
319
+ const mergedInit: RequestInit = {
320
+ ...init,
321
+ headers: runtimeApiHeaders(init.headers, cachedRuntimeApiBinding == null),
322
+ signal: controller.signal,
323
+ };
324
+ if (!cachedRuntimeApiBinding) {
325
+ if (!loggedMissingRuntimeApiBinding) {
326
+ loggedMissingRuntimeApiBinding = true;
327
+ console.warn(
328
+ `[play-harness] RUNTIME_API binding missing; using public runtime API transport. path=${path}`,
329
+ );
330
+ }
331
+ return await fetch(`${baseUrl.replace(/\/$/, '')}${path}`, mergedInit);
332
+ }
333
+ return await cachedRuntimeApiBinding.fetch(
334
+ new Request(`${baseUrl.replace(/\/$/, '')}${path}`, mergedInit),
368
335
  );
369
- }
370
-
371
- const forwardedHeaders: Record<string, string> = {};
372
- for (const name of HARNESS_RUNTIME_FORWARD_HEADERS) {
373
- const value = headers.get(name);
374
- if (value) forwardedHeaders[name] = value;
375
- }
376
-
377
- let body: unknown = {};
378
- if (typeof init.body === 'string' && init.body.trim()) {
379
- try {
380
- body = JSON.parse(init.body) as unknown;
381
- } catch (error) {
336
+ } catch (err) {
337
+ if (err instanceof Error && err.name === 'AbortError') {
382
338
  throw new Error(
383
- `[play-harness] Deepline API call requires a JSON string body. path=${path} error=${
384
- error instanceof Error ? error.message : String(error)
385
- }`,
339
+ `[play-harness] runtime API call timed out after ${timeoutMs}ms. path=${path} baseUrl=${baseUrl}`,
386
340
  );
387
341
  }
388
- } else if (init.body != null) {
389
- throw new Error(
390
- `[play-harness] Deepline API call requires a JSON string body. path=${path}`,
391
- );
342
+ throw err;
343
+ } finally {
344
+ clearTimeout(timer);
392
345
  }
346
+ }
393
347
 
394
- void baseUrl;
395
- const result = await harnessRuntimeApiCall({
396
- executorToken: authorization.slice(bearerPrefix.length).trim(),
397
- path,
398
- body,
399
- headers: Object.keys(forwardedHeaders).length ? forwardedHeaders : undefined,
400
- timeoutMs,
401
- });
402
- return new Response(result.body, {
403
- status: result.status,
404
- headers: result.headers,
405
- });
348
+ function runtimeApiHeaders(
349
+ headers: HeadersInit | undefined,
350
+ includeVercelBypass: boolean,
351
+ ): Headers {
352
+ const next = new Headers(headers);
353
+ if (includeVercelBypass) {
354
+ const bypassToken = cachedVercelProtectionBypassToken();
355
+ if (bypassToken) {
356
+ next.set('x-vercel-protection-bypass', bypassToken);
357
+ }
358
+ }
359
+ return next;
360
+ }
361
+
362
+ function cachedVercelProtectionBypassToken(): string | null {
363
+ return cachedRuntimeApiVercelBypassToken;
406
364
  }
407
365
 
408
366
  const WORKER_PLAY_CALL_LIMITS = {
@@ -424,6 +382,20 @@ function makeWorkerDataset<T extends Record<string, unknown>>(
424
382
  count?: number;
425
383
  datasetKind?: 'csv' | 'map';
426
384
  cacheSummary?: string | null;
385
+ workProgress?: {
386
+ total: number;
387
+ executed: number;
388
+ reused: number;
389
+ skipped: number;
390
+ pending: number;
391
+ failed: number;
392
+ degraded?: boolean;
393
+ duplicates?: {
394
+ exact?: number;
395
+ semantic?: number;
396
+ rejected?: number;
397
+ };
398
+ };
427
399
  },
428
400
  ): T[] & {
429
401
  count(): Promise<number>;
@@ -437,6 +409,7 @@ function makeWorkerDataset<T extends Record<string, unknown>>(
437
409
  const count = Math.max(0, Math.floor(options?.count ?? rows.length));
438
410
  const datasetKind = options?.datasetKind ?? 'map';
439
411
  const cacheSummary = options?.cacheSummary ?? null;
412
+ const workProgress = options?.workProgress;
440
413
  // Build the array result. JSON.stringify on arrays calls toJSON only if
441
414
  // present on the array itself — we attach below. The dataset metadata is
442
415
  // also exposed via own properties so plays can `enriched.count()` etc.
@@ -448,7 +421,7 @@ function makeWorkerDataset<T extends Record<string, unknown>>(
448
421
  datasetId: string;
449
422
  tableNamespace: string;
450
423
  };
451
- const previewLimit = 10;
424
+ const previewLimit = 5;
452
425
  const inferredColumns = (() => {
453
426
  const cols = new Set<string>();
454
427
  for (const r of rows) {
@@ -489,6 +462,10 @@ function makeWorkerDataset<T extends Record<string, unknown>>(
489
462
  value: cacheSummary,
490
463
  enumerable: false,
491
464
  });
465
+ Object.defineProperty(arr, '__deeplineWorkProgress', {
466
+ value: workProgress,
467
+ enumerable: false,
468
+ });
492
469
  // Plays often `return { rows: dataset, count: N }`. JSON.stringify on the
493
470
  // array would normally produce `[row, row, ...]` — we want the dataset
494
471
  // envelope shape instead so assertions seeing `result.rows.columns` pass.
@@ -509,6 +486,9 @@ function makeWorkerDataset<T extends Record<string, unknown>>(
509
486
  preview: plainRows,
510
487
  tableNamespace: name,
511
488
  ...(cacheSummary ? { cacheSummary } : {}),
489
+ ...(workProgress
490
+ ? { _metadata: { workProgress } }
491
+ : {}),
512
492
  };
513
493
  },
514
494
  enumerable: false,
@@ -528,65 +508,30 @@ type RunnerEvent =
528
508
  | { type: 'error'; message: string; stack?: string; ts: number };
529
509
 
530
510
  type WorkflowRunOutput = {
511
+ playName: string;
531
512
  result: unknown;
532
513
  outputRows: number;
533
514
  durationMs: number;
534
515
  };
535
516
 
536
- type DynamicWorkerPerfTracePayload = {
537
- ts: number;
538
- source: 'dynamic_worker';
539
- runId: string;
540
- phase: string;
541
- ms: number;
542
- graphHash?: string | null;
543
- [key: string]: unknown;
544
- };
545
-
546
517
  function nowMs(): number {
547
518
  return Date.now();
548
519
  }
549
520
 
550
- function recordDynamicWorkerPerfTrace(input: {
551
- env: WorkerEnv;
552
- runId: string;
553
- phase: string;
554
- ms: number;
555
- graphHash?: string | null;
556
- extra?: Record<string, unknown>;
557
- waitUntil?: (promise: Promise<unknown>) => void;
558
- }): void {
559
- if (!input.runId || !input.phase) return;
560
- const payload: DynamicWorkerPerfTracePayload = {
561
- ts: Date.now(),
562
- source: 'dynamic_worker',
563
- runId: input.runId,
564
- phase: input.phase,
565
- ms: input.ms,
566
- ...(input.graphHash ? { graphHash: input.graphHash } : {}),
567
- ...(input.extra ?? {}),
568
- };
569
- console.log(`[perf-trace] ${JSON.stringify(payload)}`);
570
- const send = input.env.COORDINATOR?.recordPerfTrace(
571
- input.runId,
572
- payload,
573
- ).catch((error) => {
574
- console.error(
575
- `[play-harness] non-fatal dynamic trace append failed runId=${input.runId}: ${
576
- error instanceof Error ? error.message : String(error)
577
- }`,
578
- );
579
- });
580
- if (send && input.waitUntil) {
581
- input.waitUntil(send);
582
- }
583
- }
584
-
585
521
  function makeRequestId(): string {
586
522
  // Workers crypto.randomUUID is available without nodejs_compat.
587
523
  return crypto.randomUUID();
588
524
  }
589
525
 
526
+ function publicCsvInputRow<T extends Record<string, unknown>>(row: T): T {
527
+ const stripped = stripCsvProjectedFields(row) as Record<string, unknown>;
528
+ return Object.fromEntries(
529
+ Object.entries(stripped).filter(
530
+ ([fieldName]) => !fieldName.startsWith('__deepline'),
531
+ ),
532
+ ) as T;
533
+ }
534
+
590
535
  /**
591
536
  * Strip credentials and JWT-shaped tokens from any string before it lands in
592
537
  * a log buffer or upstream error message. The harness routinely echoes
@@ -609,11 +554,11 @@ function redactSecretsFromLogString(value: string): string {
609
554
  async function postRuntimeApi<T>(
610
555
  baseUrl: string,
611
556
  executorToken: string,
612
- body: RuntimeApiAction,
557
+ body: unknown,
613
558
  ): Promise<T> {
614
- // Routes through the long-lived harness Worker so the real HTTPS request is
615
- // assembled after the RPC boundary, preserving the executor Authorization
616
- // header while keeping HTTP client code out of per-play bundles.
559
+ // Routes through the in-process RUNTIME_API binding when present; otherwise
560
+ // falls back to a public fetch against `${baseUrl}${path}`. Either path
561
+ // hits the same handler with the same auth only the transport changes.
617
562
  const res = await fetchRuntimeApi(baseUrl, '/api/v2/plays/internal/runtime', {
618
563
  method: 'POST',
619
564
  headers: {
@@ -674,7 +619,7 @@ function describeRuntimeApiBody(body: unknown): string {
674
619
  async function postRuntimeApiBestEffort(
675
620
  baseUrl: string,
676
621
  executorToken: string,
677
- body: RuntimeApiAction,
622
+ body: unknown,
678
623
  ): Promise<boolean> {
679
624
  try {
680
625
  await postRuntimeApi(baseUrl, executorToken, body);
@@ -689,67 +634,6 @@ async function postRuntimeApiBestEffort(
689
634
  }
690
635
  }
691
636
 
692
- type WorkerRuntimeTransport = {
693
- postRuntimeApi<T>(body: RuntimeApiAction): Promise<T>;
694
- postRuntimeApiBestEffort(body: RuntimeApiAction): Promise<boolean>;
695
- submitChild(body: Record<string, unknown>): Promise<{
696
- workflowId?: string;
697
- runId?: string;
698
- status?: string;
699
- mode?: string;
700
- output?: unknown;
701
- result?: unknown;
702
- error?: unknown;
703
- logs?: string[];
704
- timings?: Array<{ phase: string; ms: number }>;
705
- }>;
706
- signalParentTerminal(input: {
707
- status: 'completed' | 'failed' | 'cancelled';
708
- result?: Record<string, unknown> | null;
709
- error?: string | null;
710
- }): Promise<void>;
711
- };
712
-
713
- function createWorkerRuntimeTransport(req: RunRequest): WorkerRuntimeTransport {
714
- return {
715
- postRuntimeApi: <T>(body: RuntimeApiAction) =>
716
- postRuntimeApi<T>(req.baseUrl, req.executorToken, body),
717
- postRuntimeApiBestEffort: (body: RuntimeApiAction) =>
718
- postRuntimeApiBestEffort(req.baseUrl, req.executorToken, body),
719
- submitChild: (body: Record<string, unknown>) =>
720
- submitChildPlayThroughCoordinator({ req, body }),
721
- signalParentTerminal: (input) =>
722
- signalParentPlayTerminal({ req, ...input }),
723
- };
724
- }
725
-
726
- type WorkerChildPlayExecutor = {
727
- submit(
728
- body: Record<string, unknown>,
729
- ): ReturnType<WorkerRuntimeTransport['submitChild']>;
730
- waitTerminal(input: {
731
- workflowStep?: WorkflowStep;
732
- workflowId: string;
733
- playName: string;
734
- key: string;
735
- timeoutMs: number;
736
- }): Promise<unknown>;
737
- };
738
-
739
- function createWorkerChildPlayExecutor(input: {
740
- req: RunRequest;
741
- transport: WorkerRuntimeTransport;
742
- }): WorkerChildPlayExecutor {
743
- return {
744
- submit: (body) => input.transport.submitChild(body),
745
- waitTerminal: (waitInput) =>
746
- waitForChildPlayTerminalEvent({
747
- req: input.req,
748
- ...waitInput,
749
- }),
750
- };
751
- }
752
-
753
637
  async function submitChildPlayThroughCoordinator(input: {
754
638
  req: RunRequest;
755
639
  body: unknown;
@@ -792,7 +676,17 @@ async function submitChildPlayThroughCoordinator(input: {
792
676
  },
793
677
  );
794
678
  const text = await res.text().catch(() => '');
795
- let parsed: { workflowId?: string; runId?: string; error?: unknown } = {};
679
+ let parsed: {
680
+ workflowId?: string;
681
+ runId?: string;
682
+ status?: string;
683
+ mode?: string;
684
+ output?: unknown;
685
+ result?: unknown;
686
+ error?: unknown;
687
+ logs?: string[];
688
+ timings?: Array<{ phase: string; ms: number }>;
689
+ } = {};
796
690
  try {
797
691
  parsed = text ? JSON.parse(text) : {};
798
692
  } catch {
@@ -880,6 +774,11 @@ function childPlayEventKey(input: { key: string; workflowId: string }): string {
880
774
  return `child_play_${hashChildPlayEventKey(`${input.key}:${input.workflowId}`)}_${readableKey}`;
881
775
  }
882
776
 
777
+ function workflowTimeoutFromMs(timeoutMs: number): string {
778
+ const seconds = Math.max(1, Math.ceil(timeoutMs / 1000));
779
+ return `${seconds} second${seconds === 1 ? '' : 's'}`;
780
+ }
781
+
883
782
  async function waitForChildPlayTerminalEvent(input: {
884
783
  req: RunRequest;
885
784
  workflowStep?: WorkflowStep;
@@ -900,11 +799,11 @@ async function waitForChildPlayTerminalEvent(input: {
900
799
  const event = (await (
901
800
  input.workflowStep.waitForEvent as unknown as (
902
801
  name: string,
903
- options: { type: string; timeout: number },
802
+ options: { type: string; timeout: string },
904
803
  ) => Promise<{ payload: unknown }>
905
804
  )(`child_play_terminal:${eventKey}`, {
906
805
  type: integrationEventType(eventKey),
907
- timeout: input.timeoutMs,
806
+ timeout: workflowTimeoutFromMs(input.timeoutMs),
908
807
  })) as { payload: unknown };
909
808
  const rawPayload = isRecord(event.payload) ? event.payload : {};
910
809
  const payload = isRecord(rawPayload.data) ? rawPayload.data : rawPayload;
@@ -998,11 +897,7 @@ async function signalParentPlayTerminal(input: {
998
897
 
999
898
  async function executeTool(
1000
899
  req: RunRequest,
1001
- args: {
1002
- toolId: string;
1003
- input: Record<string, unknown>;
1004
- retryKey?: string | null;
1005
- },
900
+ args: { id: string; toolId: string; input: Record<string, unknown> },
1006
901
  workflowStep?: WorkflowStep,
1007
902
  ): Promise<ToolExecuteResult> {
1008
903
  if (args.toolId === 'test_wait_for_event' && workflowStep) {
@@ -1018,76 +913,52 @@ async function executeTool(
1018
913
  // service bindings, NOT through HTTP from this worker. Removing the
1019
914
  // dispatcher-side coordinatorUrl plumbing intentionally turns the old
1020
915
  // HTTP-based dedup helpers into dead code.
1021
- return callToolDirect(
1022
- req,
1023
- args.toolId,
1024
- args.input,
1025
- workflowStep,
1026
- args.retryKey,
1027
- );
916
+ return callToolDirect(req, args);
1028
917
  }
1029
918
 
1030
- function integrationEventType(eventKey: string): string {
1031
- return workflowEventType(`integration_event_${eventKey}`);
919
+ function isToolExecuteRecord(value: unknown): value is Record<string, unknown> {
920
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
1032
921
  }
1033
922
 
1034
- const activeSyntheticIntegrationWaitsByRun = new Map<
1035
- string,
1036
- Map<string, number>
1037
- >();
1038
-
1039
- function updateActiveSyntheticIntegrationWait(input: {
1040
- runId: string;
1041
- eventKey: string;
1042
- timeoutMs: number;
1043
- waiting: boolean;
1044
- }): { eventKey: string; waitUntil: number } | null {
1045
- const waits =
1046
- activeSyntheticIntegrationWaitsByRun.get(input.runId) ??
1047
- new Map<string, number>();
1048
- if (input.waiting) {
1049
- waits.set(input.eventKey, Date.now() + input.timeoutMs);
1050
- } else {
1051
- waits.delete(input.eventKey);
923
+ function normalizeToolExecuteArgs(
924
+ requestOrKey: unknown,
925
+ toolId?: unknown,
926
+ input?: unknown,
927
+ ): { id: string; toolId: string; input: Record<string, unknown> } {
928
+ if (isToolExecuteRecord(requestOrKey)) {
929
+ const id = requestOrKey.id;
930
+ const tool = requestOrKey.tool;
931
+ const requestInput = requestOrKey.input;
932
+ if (
933
+ typeof id !== 'string' ||
934
+ !id.trim() ||
935
+ typeof tool !== 'string' ||
936
+ !tool ||
937
+ !isToolExecuteRecord(requestInput)
938
+ ) {
939
+ throw new Error(
940
+ 'ctx.tools.execute({ id, tool, input }) requires a non-empty id, tool string, and input object.',
941
+ );
942
+ }
943
+ return { id: id.trim(), toolId: tool, input: requestInput };
1052
944
  }
1053
- if (waits.size === 0) {
1054
- activeSyntheticIntegrationWaitsByRun.delete(input.runId);
1055
- return null;
945
+
946
+ if (
947
+ typeof requestOrKey !== 'string' ||
948
+ !requestOrKey.trim() ||
949
+ typeof toolId !== 'string' ||
950
+ !toolId ||
951
+ !isToolExecuteRecord(input)
952
+ ) {
953
+ throw new Error(
954
+ 'ctx.tools.execute(key, toolId, input) requires a tool ID and input object.',
955
+ );
1056
956
  }
1057
- activeSyntheticIntegrationWaitsByRun.set(input.runId, waits);
1058
- const firstEntry = waits.entries().next().value;
1059
- if (!firstEntry) return null;
1060
- const [eventKey, waitUntil] = firstEntry;
1061
- return { eventKey, waitUntil };
957
+ return { id: requestOrKey.trim(), toolId, input };
1062
958
  }
1063
959
 
1064
- async function updateSyntheticIntegrationWaitStatus(input: {
1065
- req: RunRequest;
1066
- eventKey: string;
1067
- timeoutMs: number;
1068
- waiting: boolean;
1069
- }): Promise<void> {
1070
- const activeWait = updateActiveSyntheticIntegrationWait({
1071
- runId: input.req.runId,
1072
- eventKey: input.eventKey,
1073
- timeoutMs: input.timeoutMs,
1074
- waiting: input.waiting,
1075
- });
1076
- await postRuntimeApiBestEffort(
1077
- input.req.baseUrl,
1078
- input.req.executorToken,
1079
- runtimeRunActions.updateStatus({
1080
- playId: input.req.runId,
1081
- status: 'running',
1082
- runtimeBackend: 'cf_workflows_dynamic_worker',
1083
- waitKind: activeWait ? 'integration_event_batch' : null,
1084
- waitUntil: activeWait?.waitUntil ?? null,
1085
- activeBoundaryId: activeWait
1086
- ? `integration_event:${activeWait.eventKey}`
1087
- : null,
1088
- lastCheckpointAt: Date.now(),
1089
- }),
1090
- );
960
+ function integrationEventType(eventKey: string): string {
961
+ return workflowEventType(`integration_event_${eventKey}`);
1091
962
  }
1092
963
 
1093
964
  async function waitForSyntheticIntegrationEvent(
@@ -1103,23 +974,26 @@ async function waitForSyntheticIntegrationEvent(
1103
974
  typeof input.timeout_ms === 'number' && Number.isFinite(input.timeout_ms)
1104
975
  ? Math.max(1, Math.round(input.timeout_ms))
1105
976
  : 30_000;
977
+ await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
978
+ action: 'update_run_status',
979
+ playId: req.runId,
980
+ status: 'running',
981
+ runtimeBackend: 'cf_workflows_dynamic_worker',
982
+ waitKind: 'integration_event_batch',
983
+ waitUntil: nowMs() + timeoutMs,
984
+ activeBoundaryId: `integration_event:${eventKey}`,
985
+ lastCheckpointAt: nowMs(),
986
+ });
1106
987
  try {
1107
- const eventPromise = (
988
+ const event = (await (
1108
989
  workflowStep.waitForEvent as unknown as (
1109
990
  name: string,
1110
- options: { type: string; timeout: number },
991
+ options: { type: string; timeout: string },
1111
992
  ) => Promise<{ payload: unknown }>
1112
993
  )(`integration_event:${eventKey}`, {
1113
994
  type: integrationEventType(eventKey),
1114
- timeout: timeoutMs,
1115
- });
1116
- await updateSyntheticIntegrationWaitStatus({
1117
- req,
1118
- eventKey,
1119
- timeoutMs,
1120
- waiting: true,
1121
- });
1122
- const event = (await eventPromise) as { payload: unknown };
995
+ timeout: workflowTimeoutFromMs(timeoutMs),
996
+ })) as { payload: unknown };
1123
997
  const payload =
1124
998
  event.payload &&
1125
999
  typeof event.payload === 'object' &&
@@ -1152,23 +1026,14 @@ async function waitForSyntheticIntegrationEvent(
1152
1026
  resumed: false,
1153
1027
  timed_out: true,
1154
1028
  };
1155
- } finally {
1156
- await updateSyntheticIntegrationWaitStatus({
1157
- req,
1158
- eventKey,
1159
- timeoutMs,
1160
- waiting: false,
1161
- });
1162
1029
  }
1163
1030
  }
1164
1031
 
1165
1032
  async function callToolDirect(
1166
1033
  req: RunRequest,
1167
- toolId: string,
1168
- input: Record<string, unknown>,
1169
- workflowStep?: WorkflowStep,
1170
- retryKey?: string | null,
1034
+ args: { id: string; toolId: string; input: Record<string, unknown> },
1171
1035
  ): Promise<ToolExecuteResult> {
1036
+ const { id, toolId, input } = args;
1172
1037
  if (toolId === 'test_rate_limit') {
1173
1038
  return wrapWorkerToolResult(
1174
1039
  toolId,
@@ -1184,60 +1049,56 @@ async function callToolDirect(
1184
1049
  );
1185
1050
  }
1186
1051
  const path = `/api/v2/integrations/${encodeURIComponent(toolId)}/execute`;
1187
- let res!: Response;
1188
- for (let attempt = 1; ; attempt += 1) {
1189
- res = await fetchRuntimeApi(req.baseUrl, path, {
1052
+ const maxAttempts = 3;
1053
+ let lastError: Error | null = null;
1054
+
1055
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
1056
+ const res = await fetchRuntimeApi(req.baseUrl, path, {
1190
1057
  method: 'POST',
1191
1058
  headers: {
1192
1059
  'content-type': 'application/json',
1193
1060
  authorization: `Bearer ${req.executorToken}`,
1194
- 'x-deepline-request-id': makeRequestId(),
1061
+ 'x-deepline-request-id': `${req.runId}:${toolId}:${id}:attempt:${attempt}`,
1195
1062
  [EXECUTE_TOOL_METADATA_HEADER]: 'true',
1196
1063
  },
1197
1064
  body: JSON.stringify({ payload: input }),
1198
1065
  });
1199
- const retryable = isRetryableToolHttpStatus(toolId, res.status);
1200
- if (!retryable || attempt >= TOOL_HTTP_MAX_ATTEMPTS) {
1201
- break;
1066
+ if (res.ok) {
1067
+ const body = (await res.json()) as Record<string, unknown>;
1068
+ const result = (body.result ?? body) as unknown;
1069
+ const status =
1070
+ typeof body.status === 'string'
1071
+ ? body.status
1072
+ : result == null
1073
+ ? 'no_result'
1074
+ : 'completed';
1075
+ return wrapWorkerToolResult(
1076
+ toolId,
1077
+ result,
1078
+ parseExecuteToolMetadata(toolId, body),
1079
+ status,
1080
+ );
1202
1081
  }
1203
- const retryAfterMs = parseRetryAfterMs(res.headers.get('retry-after'));
1204
- console.log(
1205
- `[play-harness] tool ${toolId} returned ${res.status}; retrying after ${retryAfterMs}ms attempt=${attempt}/${TOOL_HTTP_MAX_ATTEMPTS}`,
1206
- );
1207
- await res.body?.cancel().catch(() => undefined);
1208
- await sleepForToolRetry({
1209
- workflowStep,
1210
- toolId,
1211
- status: res.status,
1212
- attempt,
1213
- retryAfterMs,
1214
- retryKey,
1215
- toolInput: input,
1216
- });
1217
- }
1218
- if (!res.ok) {
1082
+
1219
1083
  const text = await res.text().catch(() => '');
1220
- throw new Error(`tool ${toolId} ${res.status}: ${text.slice(0, 500)}`);
1084
+ lastError = new Error(
1085
+ `tool ${toolId} ${res.status} attempt ${attempt}/${maxAttempts}: ${text.slice(0, 500)}`,
1086
+ );
1087
+ const retryable =
1088
+ res.status === 429 ||
1089
+ (res.status >= 500 && WORKER_RETRY_SAFE_5XX_TOOLS.has(toolId));
1090
+ if (!retryable || attempt >= maxAttempts) {
1091
+ throw lastError;
1092
+ }
1093
+ const retryAfterSeconds = Number(res.headers.get('retry-after'));
1094
+ const delayMs =
1095
+ Number.isFinite(retryAfterSeconds) && retryAfterSeconds > 0
1096
+ ? Math.min(5_000, Math.ceil(retryAfterSeconds * 1000))
1097
+ : 1_000;
1098
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
1221
1099
  }
1222
- // Match shared_libs/play-runtime/context.ts:callToolAPI:
1223
- // data = body
1224
- // normalized = normalizePlayToolResult(data.result ?? data)
1225
- // where normalizePlayToolResult recursively unwraps `{data: X}` → `X`.
1226
- const body = (await res.json()) as Record<string, unknown>;
1227
- const picked = (body.result ?? body) as unknown;
1228
- const result = normalizePlayToolResult(picked);
1229
- const status =
1230
- typeof body.status === 'string'
1231
- ? body.status
1232
- : result == null
1233
- ? 'no_result'
1234
- : 'completed';
1235
- return wrapWorkerToolResult(
1236
- toolId,
1237
- result,
1238
- parseExecuteToolMetadata(toolId, body),
1239
- status,
1240
- );
1100
+
1101
+ throw lastError ?? new Error(`tool ${toolId} failed before execution.`);
1241
1102
  }
1242
1103
 
1243
1104
  function parseExecuteToolMetadata(
@@ -1482,22 +1343,23 @@ type WorkerInlineWaterfallSpec = {
1482
1343
  input: Record<string, unknown>,
1483
1344
  ctx: {
1484
1345
  tools: {
1485
- execute(request: WorkerToolExecutionRequest): Promise<unknown>;
1346
+ execute(
1347
+ key: string,
1348
+ toolId: string,
1349
+ input: Record<string, unknown>,
1350
+ ): Promise<unknown>;
1486
1351
  };
1487
- tool(request: WorkerToolExecutionRequest): Promise<unknown>;
1352
+ tool(
1353
+ key: string,
1354
+ toolId: string,
1355
+ input: Record<string, unknown>,
1356
+ ): Promise<unknown>;
1488
1357
  },
1489
1358
  ) => unknown | Promise<unknown>;
1490
1359
  }
1491
1360
  >;
1492
1361
  };
1493
1362
 
1494
- type WorkerToolExecutionRequest = {
1495
- id: string;
1496
- tool: string;
1497
- input: Record<string, unknown>;
1498
- description?: string;
1499
- };
1500
-
1501
1363
  type WorkerWaterfallOptions = {
1502
1364
  providers?: string[];
1503
1365
  min_results?: number;
@@ -1524,8 +1386,8 @@ type WorkerStepResolution = {
1524
1386
  };
1525
1387
 
1526
1388
  type WorkerToolBatchRequest = {
1389
+ id: string;
1527
1390
  toolId: string;
1528
- retryKey?: string | null;
1529
1391
  input: Record<string, unknown>;
1530
1392
  workflowStep?: WorkflowStep;
1531
1393
  resolve: (value: unknown) => void;
@@ -1533,6 +1395,7 @@ type WorkerToolBatchRequest = {
1533
1395
  };
1534
1396
 
1535
1397
  const WORKER_TOOL_BATCH_GRACE_MS = 15;
1398
+ const WORKER_RETRY_SAFE_5XX_TOOLS = new Set(['test_transient_500']);
1536
1399
 
1537
1400
  function stepProgramColumnName(parentField: string, stepId: string): string {
1538
1401
  return sqlSafePlayColumnName(`${parentField}.${stepId}`);
@@ -1545,20 +1408,13 @@ class WorkerToolBatchScheduler {
1545
1408
  constructor(private readonly req: RunRequest) {}
1546
1409
 
1547
1410
  execute(
1548
- retryKey: string | null,
1411
+ id: string,
1549
1412
  toolId: string,
1550
1413
  input: Record<string, unknown>,
1551
1414
  workflowStep?: WorkflowStep,
1552
1415
  ): Promise<unknown> {
1553
1416
  return new Promise((resolve, reject) => {
1554
- this.queue.push({
1555
- toolId,
1556
- retryKey,
1557
- input,
1558
- workflowStep,
1559
- resolve,
1560
- reject,
1561
- });
1417
+ this.queue.push({ id, toolId, input, workflowStep, resolve, reject });
1562
1418
  this.scheduleDrain();
1563
1419
  });
1564
1420
  }
@@ -1612,7 +1468,7 @@ class WorkerToolBatchScheduler {
1612
1468
  request.resolve(
1613
1469
  await executeTool(
1614
1470
  this.req,
1615
- { toolId, input: request.input, retryKey: request.retryKey },
1471
+ { id: request.id, toolId, input: request.input },
1616
1472
  request.workflowStep,
1617
1473
  ),
1618
1474
  );
@@ -1662,17 +1518,11 @@ async function executeBatchedWorkerToolGroup(input: {
1662
1518
  requests: compiledBatches,
1663
1519
  batchSize: Math.max(1, Math.min(4, compiledBatches.length || 1)),
1664
1520
  execute: async (batch) =>
1665
- await executeTool(
1666
- input.req,
1667
- {
1668
- toolId: batch.batchOperation,
1669
- input: batch.batchPayload,
1670
- retryKey: batch.memberRequests
1671
- .map((request) => request.retryKey ?? '')
1672
- .join('|'),
1673
- },
1674
- batch.memberRequests[0]?.workflowStep,
1675
- ),
1521
+ await executeTool(input.req, {
1522
+ id: `batch:${batch.memberRequests.map((request) => request.id).join('|')}`,
1523
+ toolId: batch.batchOperation,
1524
+ input: batch.batchPayload,
1525
+ }),
1676
1526
  onChunkComplete: async (
1677
1527
  chunkResults: Array<
1678
1528
  ChunkExecutionResult<(typeof compiledBatches)[number], unknown>
@@ -1680,7 +1530,9 @@ async function executeBatchedWorkerToolGroup(input: {
1680
1530
  ) => {
1681
1531
  for (const entry of chunkResults) {
1682
1532
  const batchResult = isToolExecuteResult(entry.result)
1683
- ? entry.result.result
1533
+ ? isRecordLike(entry.result.result)
1534
+ ? entry.result.result.data
1535
+ : undefined
1684
1536
  : entry.result;
1685
1537
  const splitResults =
1686
1538
  batchResult != null
@@ -1741,6 +1593,7 @@ type WorkerMapChunkSummary<T extends Record<string, unknown>> = {
1741
1593
  rowsWritten: number;
1742
1594
  rowsExecuted: number;
1743
1595
  rowsCached: number;
1596
+ rowsDuplicateReused: number;
1744
1597
  rowsInserted: number;
1745
1598
  rowsSkipped: number;
1746
1599
  outputDatasetId: string;
@@ -1785,7 +1638,6 @@ type WorkerStepProgram = {
1785
1638
  type WorkerMapOptions = {
1786
1639
  description?: string;
1787
1640
  concurrency?: number;
1788
- staleAfterSeconds?: number;
1789
1641
  key?:
1790
1642
  | string
1791
1643
  | readonly string[]
@@ -1795,46 +1647,6 @@ type WorkerMapOptions = {
1795
1647
  ) => string | number | readonly unknown[]);
1796
1648
  };
1797
1649
 
1798
- function workerMapRowIdentity(
1799
- row: Record<string, unknown>,
1800
- tableNamespace: string,
1801
- opts: WorkerMapOptions | undefined,
1802
- index = 0,
1803
- ): string {
1804
- const key = opts?.key;
1805
- if (!key) return derivePlayRowIdentity(row, tableNamespace);
1806
- const raw =
1807
- typeof key === 'function'
1808
- ? key(row, index)
1809
- : typeof key === 'string'
1810
- ? row[key]
1811
- : key.map((fieldName) => row[fieldName]);
1812
- const normalized = normalizeWorkerExplicitMapKey(raw);
1813
- if (!normalized) {
1814
- throw new Error(
1815
- `ctx.map("${tableNamespace}") key produced an empty value for row ${index}. ` +
1816
- 'Use non-empty stable input columns or return a non-empty string, number, or tuple.',
1817
- );
1818
- }
1819
- return derivePlayRowIdentityFromKey(normalized, tableNamespace);
1820
- }
1821
-
1822
- function normalizeWorkerExplicitMapKey(value: unknown): string {
1823
- if (Array.isArray(value)) {
1824
- const parts = value.map((entry) =>
1825
- normalizeWorkerExplicitMapKeyPart(entry),
1826
- );
1827
- return parts.every(Boolean) ? JSON.stringify(parts) : '';
1828
- }
1829
- return normalizeWorkerExplicitMapKeyPart(value);
1830
- }
1831
-
1832
- function normalizeWorkerExplicitMapKeyPart(value: unknown): string {
1833
- if (typeof value === 'number')
1834
- return Number.isFinite(value) ? String(value) : '';
1835
- return String(value ?? '').trim();
1836
- }
1837
-
1838
1650
  function isWorkerStepProgram(value: unknown): value is WorkerStepProgram {
1839
1651
  return (
1840
1652
  !!value &&
@@ -1844,16 +1656,6 @@ function isWorkerStepProgram(value: unknown): value is WorkerStepProgram {
1844
1656
  );
1845
1657
  }
1846
1658
 
1847
- function isWorkerMapDefinitionOptions(
1848
- value: unknown,
1849
- ): value is Omit<WorkerMapOptions, 'description' | 'concurrency'> {
1850
- if (!value || typeof value !== 'object' || Array.isArray(value)) {
1851
- return false;
1852
- }
1853
- const keys = Object.keys(value);
1854
- return keys.every((key) => key === 'key' || key === 'staleAfterSeconds');
1855
- }
1856
-
1857
1659
  function isWorkerConditionalStepResolver(
1858
1660
  value: unknown,
1859
1661
  ): value is WorkerConditionalStepResolver {
@@ -1917,24 +1719,34 @@ async function executeWorkerStepProgram(
1917
1719
  path: string[];
1918
1720
  outputs: RecordedStepProgramOutput[];
1919
1721
  },
1722
+ workflowStep?: WorkflowStep,
1920
1723
  ): Promise<unknown> {
1921
- const currentRow: Record<string, unknown> = { ...inputRow };
1724
+ let currentRow: Record<string, unknown> = cloneCsvAliasedRow(inputRow);
1922
1725
  for (const step of program.steps) {
1923
1726
  const stepPath = [...(recorder?.path ?? []), step.name];
1924
- const resolution = await executeWorkerStepResolver(
1925
- step.resolver,
1926
- currentRow,
1927
- ctx,
1928
- index,
1929
- recorder
1930
- ? {
1931
- ...recorder,
1932
- path: stepPath,
1933
- }
1934
- : undefined,
1935
- );
1727
+ const runStep = async () =>
1728
+ await executeWorkerStepResolver(
1729
+ step.resolver,
1730
+ currentRow,
1731
+ ctx,
1732
+ index,
1733
+ recorder
1734
+ ? {
1735
+ ...recorder,
1736
+ path: stepPath,
1737
+ }
1738
+ : undefined,
1739
+ );
1740
+ const resolution = workflowStep
1741
+ ? await (
1742
+ workflowStep.do as unknown as (
1743
+ name: string,
1744
+ callback: () => Promise<WorkerStepResolution>,
1745
+ ) => Promise<WorkerStepResolution>
1746
+ )(stepPath.join('.'), runStep)
1747
+ : await runStep();
1936
1748
  const value = resolution.value;
1937
- currentRow[step.name] = value;
1749
+ currentRow = cloneCsvAliasedRow(currentRow, { [step.name]: value });
1938
1750
  if (recorder) {
1939
1751
  const stepId = stepPath.join('.');
1940
1752
  recorder.outputs.push({
@@ -1968,7 +1780,6 @@ async function executeWorkerWaterfall(
1968
1780
  toolNameOrSpec: string | WorkerInlineWaterfallSpec,
1969
1781
  input: Record<string, unknown>,
1970
1782
  opts?: WorkerWaterfallOptions,
1971
- workflowStep?: WorkflowStep,
1972
1783
  ): Promise<unknown | null> {
1973
1784
  // Inline-spec form
1974
1785
  if (typeof toolNameOrSpec === 'object' && toolNameOrSpec) {
@@ -1980,38 +1791,25 @@ async function executeWorkerWaterfall(
1980
1791
  if (isWorkerInlineCodeStep(step)) {
1981
1792
  result = await step.run(input, {
1982
1793
  tools: {
1983
- execute: async (request) =>
1794
+ execute: async (
1795
+ requestOrKey: unknown,
1796
+ toolId?: unknown,
1797
+ toolInput?: unknown,
1798
+ ) =>
1984
1799
  await executeTool(
1985
1800
  req,
1986
- {
1987
- toolId: request.tool,
1988
- input: request.input,
1989
- retryKey: request.id,
1990
- },
1991
- workflowStep,
1801
+ normalizeToolExecuteArgs(requestOrKey, toolId, toolInput),
1992
1802
  ),
1993
1803
  },
1994
- tool: async (request) =>
1995
- await executeTool(
1996
- req,
1997
- {
1998
- toolId: request.tool,
1999
- input: request.input,
2000
- retryKey: request.id,
2001
- },
2002
- workflowStep,
2003
- ),
1804
+ tool: async (key, toolId, toolInput) =>
1805
+ await executeTool(req, { id: key, toolId, input: toolInput }),
2004
1806
  });
2005
1807
  } else {
2006
- result = await executeTool(
2007
- req,
2008
- {
2009
- toolId: step.toolId,
2010
- input: step.mapInput(input),
2011
- retryKey: step.id,
2012
- },
2013
- workflowStep,
2014
- );
1808
+ result = await executeTool(req, {
1809
+ id: step.id,
1810
+ toolId: step.toolId,
1811
+ input: step.mapInput(input),
1812
+ });
2015
1813
  }
2016
1814
  } catch {
2017
1815
  continue;
@@ -2097,11 +1895,7 @@ async function executeWorkerWaterfall(
2097
1895
  const providers = opts?.providers ?? [];
2098
1896
  if (providers.length === 0) {
2099
1897
  try {
2100
- return await executeTool(
2101
- req,
2102
- { toolId: toolName, input, retryKey: toolName },
2103
- workflowStep,
2104
- );
1898
+ return await executeTool(req, { id: toolName, toolId: toolName, input });
2105
1899
  } catch {
2106
1900
  return null;
2107
1901
  }
@@ -2109,15 +1903,11 @@ async function executeWorkerWaterfall(
2109
1903
  let lastError: Error | null = null;
2110
1904
  for (const provider of providers) {
2111
1905
  try {
2112
- const result = await executeTool(
2113
- req,
2114
- {
2115
- toolId: toolName,
2116
- input: { ...input, provider },
2117
- retryKey: `${toolName}:${provider}`,
2118
- },
2119
- workflowStep,
2120
- );
1906
+ const result = await executeTool(req, {
1907
+ id: `${toolName}:${provider}`,
1908
+ toolId: toolName,
1909
+ input: { ...input, provider },
1910
+ });
2121
1911
  if (resultHasContent(result)) {
2122
1912
  recorder.push({
2123
1913
  waterfallId: toolName,
@@ -2134,18 +1924,6 @@ async function executeWorkerWaterfall(
2134
1924
  return null;
2135
1925
  }
2136
1926
 
2137
- function normalizePlayToolResult(value: unknown): unknown {
2138
- if (!isRecordLike(value)) return value;
2139
- if ('data' in value) return normalizePlayToolResult(value.data);
2140
- if ('result' in value) {
2141
- const normalizedResult = normalizePlayToolResult(value.result);
2142
- if (normalizedResult !== value.result) {
2143
- return { ...value, result: normalizedResult };
2144
- }
2145
- }
2146
- return value;
2147
- }
2148
-
2149
1927
  async function hashJson(value: unknown): Promise<string> {
2150
1928
  const bytes = new TextEncoder().encode(canonicalizeJson(value));
2151
1929
  const digest = await crypto.subtle.digest('SHA-256', bytes);
@@ -2266,18 +2044,12 @@ async function* streamCsvRowsFromBody<T extends Record<string, unknown>>(
2266
2044
  const flushPhysicalRowsAsObjects = (terminal: boolean): T[][] => {
2267
2045
  const yielded: T[][] = [];
2268
2046
  if (physicalRowBuffer.length === 0) return yielded;
2269
- let startIndex = 0;
2270
2047
  if (!headers) {
2271
- headers = physicalRowBuffer[0] ?? null;
2048
+ headers = physicalRowBuffer.shift() ?? null;
2272
2049
  if (!headers) return yielded;
2273
- startIndex = 1;
2274
2050
  }
2275
- for (
2276
- let rowIndex = startIndex;
2277
- rowIndex < physicalRowBuffer.length;
2278
- rowIndex += 1
2279
- ) {
2280
- const cells = physicalRowBuffer[rowIndex]!;
2051
+ while (physicalRowBuffer.length > 0) {
2052
+ const cells = physicalRowBuffer.shift()!;
2281
2053
  const obj: Record<string, unknown> = {};
2282
2054
  for (let c = 0; c < headers.length; c += 1) {
2283
2055
  obj[headers[c]!] = cells[c] ?? '';
@@ -2288,7 +2060,6 @@ async function* streamCsvRowsFromBody<T extends Record<string, unknown>>(
2288
2060
  pendingChunk = [];
2289
2061
  }
2290
2062
  }
2291
- physicalRowBuffer.length = 0;
2292
2063
  if (terminal && pendingChunk.length > 0) {
2293
2064
  yielded.push(pendingChunk);
2294
2065
  pendingChunk = [];
@@ -2333,12 +2104,7 @@ async function openR2BodyStream(input: {
2333
2104
  return object.body;
2334
2105
  }
2335
2106
  }
2336
- const packagedAsset = input.req.packagedFiles?.some(
2337
- (file) =>
2338
- file.playPath === input.logicalPath ||
2339
- file.playPath === input.logicalPath.replace(/^\.\//, ''),
2340
- );
2341
- if (input.env.PLAY_ASSETS && packagedAsset) {
2107
+ if (input.env.PLAY_ASSETS) {
2342
2108
  try {
2343
2109
  const text = await input.env.PLAY_ASSETS.readText(input.logicalPath);
2344
2110
  const bytes = new TextEncoder().encode(text);
@@ -2355,80 +2121,28 @@ async function openR2BodyStream(input: {
2355
2121
  }
2356
2122
  }
2357
2123
 
2358
- return openHarnessRangedBodyStream(input);
2359
- }
2360
-
2361
- async function openHarnessRangedBodyStream(input: {
2362
- req: RunRequest;
2363
- logicalPath: string;
2364
- storageKey: string;
2365
- }): Promise<ReadableStream<Uint8Array>> {
2366
- const headResponse = await harnessFetchStagedFile({
2124
+ // The harness fetch path returns a real Response body backed by R2.
2125
+ // Errors are loud: we want CI / regression failures to surface the real
2126
+ // cause (auth, missing object, network) rather than getting squashed into a
2127
+ // generic "R2 asset is not reachable".
2128
+ const response = await harnessFetchStagedFile({
2367
2129
  executorToken: input.req.executorToken,
2368
2130
  storageKey: input.storageKey,
2369
- method: 'HEAD',
2370
2131
  });
2371
- if (headResponse.status === 404) {
2132
+ if (response.status === 404) {
2372
2133
  throw new Error(
2373
2134
  `ctx.csv("${input.logicalPath}"): harness R2 fetch returned 404 for storageKey=${input.storageKey}. ` +
2374
2135
  `The staged file is missing from R2; the upload either failed silently before the run started, ` +
2375
- `or the storageKey threaded through the workflow params no longer matches what the harness resolves.`,
2376
- );
2377
- }
2378
- if (!headResponse.ok) {
2379
- const body = await headResponse.text().catch(() => '');
2380
- throw new Error(
2381
- `ctx.csv("${input.logicalPath}"): harness R2 metadata fetch failed ${headResponse.status}: ${body.slice(0, 400)}`,
2136
+ `or the storageKey threaded through the workflow params no longer matches what the harness resolves.`,
2382
2137
  );
2383
2138
  }
2384
- const rawSize =
2385
- headResponse.headers.get('x-deepline-object-size') ??
2386
- headResponse.headers.get('content-length') ??
2387
- '';
2388
- const objectSize = Number(rawSize);
2389
- if (!Number.isSafeInteger(objectSize) || objectSize < 0) {
2139
+ if (!response.ok || !response.body) {
2140
+ const body = await response.text().catch(() => '');
2390
2141
  throw new Error(
2391
- `ctx.csv("${input.logicalPath}"): harness R2 metadata missing a valid object size for storageKey=${input.storageKey}.`,
2142
+ `ctx.csv("${input.logicalPath}"): harness R2 fetch failed ${response.status}: ${body.slice(0, 400)}`,
2392
2143
  );
2393
2144
  }
2394
-
2395
- let offset = 0;
2396
- return new ReadableStream<Uint8Array>({
2397
- async pull(controller) {
2398
- if (offset >= objectSize) {
2399
- controller.close();
2400
- return;
2401
- }
2402
- const length = Math.min(
2403
- TARGET_CSV_DECODE_CHUNK_BYTES,
2404
- objectSize - offset,
2405
- );
2406
- const response = await harnessFetchStagedFile({
2407
- executorToken: input.req.executorToken,
2408
- storageKey: input.storageKey,
2409
- range: { offset, length },
2410
- });
2411
- if (response.status === 404) {
2412
- throw new Error(
2413
- `ctx.csv("${input.logicalPath}"): harness R2 range fetch returned 404 for storageKey=${input.storageKey}.`,
2414
- );
2415
- }
2416
- if (!response.ok || response.status !== 206) {
2417
- const body = await response.text().catch(() => '');
2418
- throw new Error(
2419
- `ctx.csv("${input.logicalPath}"): harness R2 range fetch failed ${response.status}: ${body.slice(0, 400)}`,
2420
- );
2421
- }
2422
- const bytes = new Uint8Array(await response.arrayBuffer());
2423
- if (bytes.length === 0 && length > 0) {
2424
- throw new Error(
2425
- `ctx.csv("${input.logicalPath}"): harness R2 range fetch returned an empty body before EOF at offset=${offset}.`,
2426
- );
2427
- }
2428
- offset += bytes.length;
2429
- controller.enqueue(bytes);
2430
- },
2431
- });
2145
+ return response.body;
2432
2146
  }
2433
2147
 
2434
2148
  /**
@@ -2454,11 +2168,11 @@ type StreamingCsvDataset<T extends Record<string, unknown>> = T[] & {
2454
2168
  };
2455
2169
 
2456
2170
  const MAX_MATERIALIZE_ROWS_DEFAULT = 50_000;
2457
- const STREAMING_MAP_DEFAULT_CHUNK_SIZE = 5_000;
2458
2171
 
2459
2172
  function makeStreamingCsvDataset<T extends Record<string, unknown>>(input: {
2460
2173
  name: string;
2461
2174
  logicalPath: string;
2175
+ renameOptions?: CsvRenameOptions;
2462
2176
  open: () => Promise<ReadableStream<Uint8Array> | null>;
2463
2177
  }): StreamingCsvDataset<T> {
2464
2178
  const datasetId = `csv:${input.name}`;
@@ -2472,7 +2186,12 @@ function makeStreamingCsvDataset<T extends Record<string, unknown>>(input: {
2472
2186
  `ctx.csv("${input.logicalPath}"): R2 asset is not reachable (no PLAYS_BUCKET binding and signed URL unavailable).`,
2473
2187
  );
2474
2188
  }
2475
- yield* streamCsvRowsFromBody<T>(body, Math.max(1, Math.floor(chunkSize)));
2189
+ for await (const chunk of streamCsvRowsFromBody<T>(
2190
+ body,
2191
+ Math.max(1, Math.floor(chunkSize)),
2192
+ )) {
2193
+ yield applyCsvRenameProjection(chunk, input.renameOptions) as T[];
2194
+ }
2476
2195
  }
2477
2196
 
2478
2197
  Object.defineProperty(arr, 'iterChunks', {
@@ -2629,9 +2348,12 @@ async function persistCompletedMapRows(input: {
2629
2348
  extraOutputFields?: string[];
2630
2349
  }): Promise<void> {
2631
2350
  if (input.rows.length === 0) return;
2632
- await harnessPersistCompletedSheetRows({
2633
- baseUrl: input.req.baseUrl,
2634
- executorToken: input.req.executorToken,
2351
+ await postRuntimeApi<{
2352
+ ok: true;
2353
+ rowsWritten: number;
2354
+ tableNamespace: string;
2355
+ }>(input.req.baseUrl, input.req.executorToken, {
2356
+ action: 'persist_completed_sheet_rows',
2635
2357
  playName: input.req.playName,
2636
2358
  tableNamespace: input.tableNamespace,
2637
2359
  sheetContract: requireSheetContract(input.req, input.tableNamespace),
@@ -2643,7 +2365,6 @@ async function persistCompletedMapRows(input: {
2643
2365
  ),
2644
2366
  ],
2645
2367
  runId: input.req.runId,
2646
- userEmail: input.req.userEmail,
2647
2368
  });
2648
2369
  }
2649
2370
 
@@ -2660,15 +2381,19 @@ async function prepareMapRows(input: {
2660
2381
  if (input.rows.length === 0) {
2661
2382
  return { inserted: 0, skipped: 0, pendingRows: [], completedRows: [] };
2662
2383
  }
2663
- const result = await harnessStartSheetDataset({
2664
- baseUrl: input.req.baseUrl,
2665
- executorToken: input.req.executorToken,
2384
+ const result = await postRuntimeApi<{
2385
+ inserted: number;
2386
+ skipped: number;
2387
+ pendingRows: Record<string, unknown>[];
2388
+ completedRows: Record<string, unknown>[];
2389
+ tableNamespace: string;
2390
+ }>(input.req.baseUrl, input.req.executorToken, {
2391
+ action: 'start_sheet_dataset',
2666
2392
  playName: input.req.playName,
2667
2393
  tableNamespace: input.tableNamespace,
2668
2394
  sheetContract: requireSheetContract(input.req, input.tableNamespace),
2669
2395
  rows: input.rows.map((row) => ({ ...row })),
2670
2396
  runId: input.req.runId,
2671
- userEmail: input.req.userEmail,
2672
2397
  });
2673
2398
  return {
2674
2399
  inserted: result.inserted,
@@ -2678,26 +2403,6 @@ async function prepareMapRows(input: {
2678
2403
  };
2679
2404
  }
2680
2405
 
2681
- type WorkerMapExecutor = {
2682
- prepareRows(input: {
2683
- tableNamespace: string;
2684
- rows: Record<string, unknown>[];
2685
- }): ReturnType<typeof prepareMapRows>;
2686
- persistCompletedRows(input: {
2687
- tableNamespace: string;
2688
- rows: Record<string, unknown>[];
2689
- outputFields: string[];
2690
- extraOutputFields?: string[];
2691
- }): ReturnType<typeof persistCompletedMapRows>;
2692
- };
2693
-
2694
- function createWorkerMapExecutor(req: RunRequest): WorkerMapExecutor {
2695
- return {
2696
- prepareRows: (input) => prepareMapRows({ req, ...input }),
2697
- persistCompletedRows: (input) => persistCompletedMapRows({ req, ...input }),
2698
- };
2699
- }
2700
-
2701
2406
  /**
2702
2407
  * Builds the minimal HTTP-backed ctx surface needed to run tool-basic-shaped
2703
2408
  * plays. NOT a full implementation of shared_libs/play-runtime/context.ts.
@@ -2705,13 +2410,8 @@ function createWorkerMapExecutor(req: RunRequest): WorkerMapExecutor {
2705
2410
  * Supported:
2706
2411
  * - ctx.log(msg)
2707
2412
  * - ctx.csv(filename | inline rows) (calls runtime API for file resolve)
2708
- * - ctx.map(name, rows, opts).step(...).run(...)
2709
- * - ctx.tools.execute({
2710
- id: namespace,
2711
- tool: op,
2712
- input: input,
2713
- ...(opts),
2714
- })
2413
+ * - ctx.map(name, rows, fields, opts)
2414
+ * - ctx.tools.execute(namespace, op, input, opts)
2715
2415
  * - ctx.runPlay(key, playRef, input, opts)
2716
2416
  *
2717
2417
  * Not supported (will throw):
@@ -2773,7 +2473,7 @@ function releaseChildPlayConcurrency(
2773
2473
  inFlightByPlayName[playName] = next;
2774
2474
  }
2775
2475
 
2776
- function createWorkerCtxFactory(
2476
+ function createMinimalWorkerCtx(
2777
2477
  req: RunRequest,
2778
2478
  emitEvent: (event: RunnerEvent) => void,
2779
2479
  env: WorkerEnv,
@@ -2783,9 +2483,6 @@ function createWorkerCtxFactory(
2783
2483
  let playCallCount = 0;
2784
2484
  const parentChildCalls: Record<string, number> = {};
2785
2485
  const inFlightChildCallsByPlayName: Record<string, number> = {};
2786
- const mapExecutor = createWorkerMapExecutor(req);
2787
- const transport = createWorkerRuntimeTransport(req);
2788
- const childPlayExecutor = createWorkerChildPlayExecutor({ req, transport });
2789
2486
  const rootGovernance = req.playCallGovernance;
2790
2487
  const rootRunId = rootGovernance?.rootRunId ?? req.runId;
2791
2488
  // Local ancestry chain that always ENDS with the currently-executing play
@@ -2829,16 +2526,88 @@ function createWorkerCtxFactory(
2829
2526
  candidate.mapName === name || candidate.tableNamespace === name,
2830
2527
  );
2831
2528
  const streaming = isStreamingDataset<T>(sliced);
2832
- const preferredChunkSize =
2833
- planMap?.defaultChunkSize ?? STREAMING_MAP_DEFAULT_CHUNK_SIZE;
2529
+ // For streaming inputs we don't know the row count upfront — pass
2530
+ // `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
2531
+ // default chunk size rather than trying to budget against an unknown.
2834
2532
  const rowsPerChunk = chooseMapChunkSize({
2835
- totalRows: streaming ? preferredChunkSize + 1 : sliced.length,
2533
+ totalRows: streaming ? 0 : sliced.length,
2836
2534
  mapCount: Math.max(1, plan?.maps.length ?? 1),
2837
2535
  stepsPerChunk: planMap?.stepsPerChunk ?? 1,
2838
- preferredChunkSize,
2536
+ preferredChunkSize: planMap?.defaultChunkSize,
2839
2537
  softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
2840
2538
  });
2841
2539
  const outputFields = fieldEntries.map(([field]) => field);
2540
+ const explicitRowKeysSeen =
2541
+ opts?.key === undefined ? null : new Map<string, number>();
2542
+ const resolveExplicitKeyValue = (
2543
+ row: Record<string, unknown>,
2544
+ index: number,
2545
+ ): string | null => {
2546
+ const inputRow = publicCsvInputRow(row);
2547
+ const keyOption = opts?.key;
2548
+ if (keyOption === undefined) {
2549
+ return null;
2550
+ }
2551
+ const raw =
2552
+ typeof keyOption === 'function'
2553
+ ? keyOption(inputRow, index)
2554
+ : typeof keyOption === 'string'
2555
+ ? inputRow[keyOption]
2556
+ : keyOption.map((fieldName) => inputRow[fieldName]);
2557
+ const parts = Array.isArray(raw) ? raw : [raw];
2558
+ if (parts.some((part) => part === null || part === undefined)) {
2559
+ throw new Error(
2560
+ `ctx.map("${name}") key returned null or undefined for row ${index}. ` +
2561
+ 'Return a non-empty string or number derived from a stable input column.',
2562
+ );
2563
+ }
2564
+ const normalizedParts = parts.map((part) => {
2565
+ if (typeof part === 'number') {
2566
+ return Number.isFinite(part) ? String(part) : '';
2567
+ }
2568
+ return String(part).trim();
2569
+ });
2570
+ if (normalizedParts.some((part) => !part)) {
2571
+ throw new Error(
2572
+ `ctx.map("${name}") key returned an empty value for row ${index}. ` +
2573
+ 'Return a non-empty string or finite number derived from a stable input column.',
2574
+ );
2575
+ }
2576
+ const keyValue =
2577
+ normalizedParts.length === 1
2578
+ ? normalizedParts[0]!
2579
+ : JSON.stringify(normalizedParts);
2580
+ return keyValue;
2581
+ };
2582
+ const resolveRowKey = (
2583
+ row: Record<string, unknown>,
2584
+ index: number,
2585
+ ): string => {
2586
+ const inputRow = publicCsvInputRow(row);
2587
+ const explicitKeyValue = resolveExplicitKeyValue(row, index);
2588
+ return explicitKeyValue == null
2589
+ ? derivePlayRowIdentity(inputRow, name)
2590
+ : derivePlayRowIdentityFromKey(explicitKeyValue, name);
2591
+ };
2592
+ const assertUniqueExplicitRowKeys = (
2593
+ chunkRows: readonly Record<string, unknown>[],
2594
+ chunkStart: number,
2595
+ ) => {
2596
+ if (!explicitRowKeysSeen) return;
2597
+ for (let localIndex = 0; localIndex < chunkRows.length; localIndex += 1) {
2598
+ const index = chunkStart + localIndex;
2599
+ const keyValue = resolveExplicitKeyValue(chunkRows[localIndex]!, index);
2600
+ if (keyValue == null) continue;
2601
+ const previousIndex = explicitRowKeysSeen?.get(keyValue);
2602
+ if (previousIndex !== undefined) {
2603
+ throw new Error(
2604
+ `ctx.map("${name}") key function produced duplicate value "${keyValue}" for rows ${previousIndex} and ${index}. ` +
2605
+ 'Each row must produce a unique key. Combine columns (e.g. `${row.email}|${row.company}`) or pick a column that is unique per row.',
2606
+ );
2607
+ }
2608
+ explicitRowKeysSeen?.set(keyValue, index);
2609
+ }
2610
+ };
2842
2611
 
2843
2612
  const processChunk = async (
2844
2613
  chunkRows: T[],
@@ -2846,16 +2615,17 @@ function createWorkerCtxFactory(
2846
2615
  chunkIndex: number,
2847
2616
  ): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
2848
2617
  assertNotAborted(abortSignal);
2849
- const prepared = await mapExecutor.prepareRows({
2618
+ const chunkEntries = chunkRows.map((row, localIndex) => {
2619
+ const absoluteIndex = baseOffset + chunkStart + localIndex;
2620
+ const rowKey = resolveRowKey(row, absoluteIndex);
2621
+ return { row, absoluteIndex, rowKey };
2622
+ });
2623
+ const prepared = await prepareMapRows({
2624
+ req,
2850
2625
  tableNamespace: name,
2851
- rows: chunkRows.map((row, index) => ({
2626
+ rows: chunkEntries.map(({ row, rowKey }) => ({
2852
2627
  ...row,
2853
- __deeplineRowKey: workerMapRowIdentity(
2854
- row,
2855
- name,
2856
- opts,
2857
- baseOffset + chunkStart + index,
2858
- ),
2628
+ __deeplineRowKey: rowKey,
2859
2629
  })),
2860
2630
  });
2861
2631
  const pendingKeys = new Set<string>();
@@ -2865,7 +2635,7 @@ function createWorkerCtxFactory(
2865
2635
  const key =
2866
2636
  typeof row.__deeplineRowKey === 'string'
2867
2637
  ? row.__deeplineRowKey
2868
- : workerMapRowIdentity(row, name, opts);
2638
+ : derivePlayRowIdentity(publicCsvInputRow(row), name);
2869
2639
  if (key) {
2870
2640
  pendingKeys.add(key);
2871
2641
  preparedKeys.add(key);
@@ -2875,20 +2645,29 @@ function createWorkerCtxFactory(
2875
2645
  const key =
2876
2646
  typeof row.__deeplineRowKey === 'string'
2877
2647
  ? row.__deeplineRowKey
2878
- : workerMapRowIdentity(row, name, opts);
2648
+ : derivePlayRowIdentity(publicCsvInputRow(row), name);
2879
2649
  if (key) {
2880
2650
  completedKeys.add(key);
2881
2651
  preparedKeys.add(key);
2882
2652
  }
2883
2653
  }
2884
- const missingPreparedRows = chunkRows.filter((row) => {
2885
- const key = workerMapRowIdentity(row, name, opts);
2886
- return !key || !preparedKeys.has(key);
2887
- });
2888
- const rowsToExecute = chunkRows.filter((row) => {
2889
- const key = workerMapRowIdentity(row, name, opts);
2890
- return !key || pendingKeys.has(key) || !completedKeys.has(key);
2891
- });
2654
+ const missingPreparedRows = chunkEntries.filter(
2655
+ ({ rowKey }) => !preparedKeys.has(rowKey),
2656
+ );
2657
+ const rowsToExecuteEntries = chunkEntries.filter(
2658
+ ({ rowKey }) => pendingKeys.has(rowKey) || !completedKeys.has(rowKey),
2659
+ );
2660
+ const uniqueRowsToExecuteEntries = [
2661
+ ...new Map(
2662
+ rowsToExecuteEntries.map((entry) => [entry.rowKey, entry]),
2663
+ ).values(),
2664
+ ];
2665
+ const duplicateInputReuseCount = Math.max(
2666
+ 0,
2667
+ chunkEntries.length -
2668
+ new Set(chunkEntries.map((entry) => entry.rowKey)).size,
2669
+ );
2670
+ const rowsToExecute = uniqueRowsToExecuteEntries.map(({ row }) => row);
2892
2671
  const rowsInserted = prepared.inserted + missingPreparedRows.length;
2893
2672
  const rowsSkipped = Math.max(
2894
2673
  0,
@@ -2912,9 +2691,10 @@ function createWorkerCtxFactory(
2912
2691
  if (abortSignal?.aborted) return;
2913
2692
  const myIndex = idx++;
2914
2693
  if (myIndex >= rowsToExecute.length) return;
2915
- const row = rowsToExecute[myIndex]!;
2916
- const absoluteIndex = baseOffset + chunkStart + myIndex;
2917
- const enriched: Record<string, unknown> = { ...row };
2694
+ const entry = uniqueRowsToExecuteEntries[myIndex]!;
2695
+ const row = entry.row;
2696
+ const absoluteIndex = entry.absoluteIndex;
2697
+ const enriched: Record<string, unknown> = cloneCsvAliasedRow(row);
2918
2698
  const fieldOutputs: Record<string, unknown> = {};
2919
2699
  const cellMetaPatch: Record<
2920
2700
  string,
@@ -2925,25 +2705,35 @@ function createWorkerCtxFactory(
2925
2705
  const rowCtx = {
2926
2706
  ...(ctx as Record<string, unknown>),
2927
2707
  tool: async (
2928
- request: WorkerToolExecutionRequest,
2708
+ key: string,
2709
+ toolId: string,
2710
+ input: Record<string, unknown>,
2929
2711
  ): Promise<unknown> => {
2930
2712
  assertNotAborted(abortSignal);
2931
2713
  return await toolBatchScheduler.execute(
2932
- request.id,
2933
- request.tool,
2934
- request.input,
2714
+ key,
2715
+ toolId,
2716
+ input,
2935
2717
  workflowStep,
2936
2718
  );
2937
2719
  },
2938
2720
  tools: {
2939
2721
  ...((ctx as { tools?: Record<string, unknown> }).tools ?? {}),
2940
2722
  execute: async (
2941
- request: WorkerToolExecutionRequest,
2723
+ requestOrKey: unknown,
2724
+ toolId?: unknown,
2725
+ input?: unknown,
2726
+ _opts?: { description?: string },
2942
2727
  ): Promise<unknown> => {
2943
2728
  assertNotAborted(abortSignal);
2729
+ const request = normalizeToolExecuteArgs(
2730
+ requestOrKey,
2731
+ toolId,
2732
+ input,
2733
+ );
2944
2734
  return await toolBatchScheduler.execute(
2945
2735
  request.id,
2946
- request.tool,
2736
+ request.toolId,
2947
2737
  request.input,
2948
2738
  workflowStep,
2949
2739
  );
@@ -2960,7 +2750,6 @@ function createWorkerCtxFactory(
2960
2750
  toolNameOrSpec,
2961
2751
  waterfallInput,
2962
2752
  waterfallOpts,
2963
- workflowStep,
2964
2753
  ),
2965
2754
  };
2966
2755
  for (const [key, value] of fieldEntries) {
@@ -3010,13 +2799,33 @@ function createWorkerCtxFactory(
3010
2799
  })(),
3011
2800
  );
3012
2801
  }
3013
- await Promise.all(workers);
3014
- if (executedRows.length > 0) {
3015
- await mapExecutor.persistCompletedRows({
2802
+ const persistExecutedRows = async () => {
2803
+ const rowsToPersist = executedRows
2804
+ .map((row, executedIndex) =>
2805
+ row
2806
+ ? {
2807
+ row,
2808
+ executedIndex,
2809
+ }
2810
+ : null,
2811
+ )
2812
+ .filter(
2813
+ (
2814
+ entry,
2815
+ ): entry is {
2816
+ row: T & Record<string, unknown>;
2817
+ executedIndex: number;
2818
+ } => entry !== null,
2819
+ );
2820
+ if (rowsToPersist.length === 0) {
2821
+ return;
2822
+ }
2823
+ await persistCompletedMapRows({
2824
+ req,
3016
2825
  tableNamespace: name,
3017
2826
  outputFields,
3018
2827
  extraOutputFields: Array.from(generatedOutputFields),
3019
- rows: executedRows.map((row, executedIndex) => ({
2828
+ rows: rowsToPersist.map(({ row, executedIndex }) => ({
3020
2829
  ...row,
3021
2830
  ...(executedCellMetaPatches[executedIndex]
3022
2831
  ? {
@@ -3024,22 +2833,29 @@ function createWorkerCtxFactory(
3024
2833
  executedCellMetaPatches[executedIndex],
3025
2834
  }
3026
2835
  : {}),
3027
- __deeplineRowKey: workerMapRowIdentity(
3028
- rowsToExecute[executedIndex]!,
3029
- name,
3030
- opts,
3031
- ),
2836
+ __deeplineRowKey:
2837
+ uniqueRowsToExecuteEntries[executedIndex]!.rowKey,
3032
2838
  })),
3033
2839
  });
2840
+ };
2841
+ const workerResults = await Promise.allSettled(workers);
2842
+ await persistExecutedRows();
2843
+ const rejectedWorker = workerResults.find(
2844
+ (result): result is PromiseRejectedResult =>
2845
+ result.status === 'rejected',
2846
+ );
2847
+ if (rejectedWorker) {
2848
+ throw rejectedWorker.reason;
3034
2849
  }
3035
2850
  const resultByKey = new Map<string, T & Record<string, unknown>>();
3036
2851
  for (const completedRow of prepared.completedRows) {
3037
2852
  const key =
3038
2853
  typeof completedRow.__deeplineRowKey === 'string'
3039
2854
  ? completedRow.__deeplineRowKey
3040
- : workerMapRowIdentity(completedRow, name, opts);
2855
+ : derivePlayRowIdentity(publicCsvInputRow(completedRow), name);
3041
2856
  if (key) {
3042
- const { __deeplineRowKey: _rowKey, ...cleanedRow } = completedRow;
2857
+ const { __deeplineRowKey: _rowKey, ...cleanedRow } =
2858
+ publicCsvInputRow(completedRow);
3043
2859
  void _rowKey;
3044
2860
  resultByKey.set(key, cleanedRow as T & Record<string, unknown>);
3045
2861
  }
@@ -3050,17 +2866,13 @@ function createWorkerCtxFactory(
3050
2866
  executedIndex += 1
3051
2867
  ) {
3052
2868
  const executedRow = executedRows[executedIndex]!;
3053
- const key = workerMapRowIdentity(
3054
- rowsToExecute[executedIndex]!,
3055
- name,
3056
- opts,
3057
- );
2869
+ const key = uniqueRowsToExecuteEntries[executedIndex]!.rowKey;
3058
2870
  if (key) resultByKey.set(key, executedRow);
3059
2871
  }
3060
2872
  const out = chunkRows
3061
- .map((row) => {
3062
- const key = workerMapRowIdentity(row, name, opts);
3063
- return key ? resultByKey.get(key) : undefined;
2873
+ .map((_row, index) => {
2874
+ const key = chunkEntries[index]!.rowKey;
2875
+ return resultByKey.get(key);
3064
2876
  })
3065
2877
  .filter((row): row is T & Record<string, unknown> => Boolean(row));
3066
2878
  return {
@@ -3070,18 +2882,20 @@ function createWorkerCtxFactory(
3070
2882
  rowsRead: chunkRows.length,
3071
2883
  rowsWritten: out.length,
3072
2884
  rowsExecuted: executedRows.length,
3073
- rowsCached: prepared.completedRows.length,
2885
+ rowsCached: Math.max(0, out.length - executedRows.length),
2886
+ rowsDuplicateReused: duplicateInputReuseCount,
3074
2887
  rowsInserted,
3075
2888
  rowsSkipped,
3076
2889
  outputDatasetId: `map:${name}`,
3077
2890
  hash: await hashJson(out),
3078
- preview: toWorkflowSerializableValue(out.slice(0, 10)),
2891
+ preview: toWorkflowSerializableValue(out.slice(0, 5)),
3079
2892
  };
3080
2893
  };
3081
2894
 
3082
2895
  const out: Array<T & Record<string, unknown>> = [];
3083
2896
  let totalRowsExecuted = 0;
3084
2897
  let totalRowsCached = 0;
2898
+ let totalRowsDuplicateReused = 0;
3085
2899
  let totalRowsInserted = 0;
3086
2900
  let totalRowsSkipped = 0;
3087
2901
 
@@ -3124,6 +2938,17 @@ function createWorkerCtxFactory(
3124
2938
  return makeWorkerDataset(name, out, {
3125
2939
  count: totalRowsWritten,
3126
2940
  cacheSummary,
2941
+ workProgress: {
2942
+ total: totalRowsWritten,
2943
+ executed: totalRowsExecuted,
2944
+ reused: totalRowsCached,
2945
+ skipped: totalRowsCached,
2946
+ pending: 0,
2947
+ failed: 0,
2948
+ ...(totalRowsDuplicateReused > 0
2949
+ ? { duplicates: { exact: totalRowsDuplicateReused } }
2950
+ : {}),
2951
+ },
3127
2952
  });
3128
2953
  };
3129
2954
 
@@ -3135,6 +2960,7 @@ function createWorkerCtxFactory(
3135
2960
  for await (const chunkRows of streamingDataset.iterChunks(rowsPerChunk)) {
3136
2961
  assertNotAborted(abortSignal);
3137
2962
  if (chunkRows.length === 0) continue;
2963
+ assertUniqueExplicitRowKeys(chunkRows, chunkStart);
3138
2964
  const chunkResult = await runChunkStep(
3139
2965
  chunkRows,
3140
2966
  chunkStart,
@@ -3143,6 +2969,7 @@ function createWorkerCtxFactory(
3143
2969
  totalRowsWritten += chunkResult.rowsWritten;
3144
2970
  totalRowsExecuted += chunkResult.rowsExecuted;
3145
2971
  totalRowsCached += chunkResult.rowsCached;
2972
+ totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
3146
2973
  totalRowsInserted += chunkResult.rowsInserted;
3147
2974
  totalRowsSkipped += chunkResult.rowsSkipped;
3148
2975
  if (out.length < 10) {
@@ -3161,10 +2988,12 @@ function createWorkerCtxFactory(
3161
2988
  const end = Math.min(sliced.length, start + rowsPerChunk);
3162
2989
  const chunkRows = sliced.slice(start, end);
3163
2990
  const chunkIndex = Math.floor(start / rowsPerChunk);
2991
+ assertUniqueExplicitRowKeys(chunkRows, start);
3164
2992
  const chunkResult = await runChunkStep(chunkRows, start, chunkIndex);
3165
2993
  totalRowsWritten += chunkResult.rowsWritten;
3166
2994
  totalRowsExecuted += chunkResult.rowsExecuted;
3167
2995
  totalRowsCached += chunkResult.rowsCached;
2996
+ totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
3168
2997
  totalRowsInserted += chunkResult.rowsInserted;
3169
2998
  totalRowsSkipped += chunkResult.rowsSkipped;
3170
2999
  if (out.length < 10) {
@@ -3174,9 +3003,11 @@ function createWorkerCtxFactory(
3174
3003
  return finalize(totalRowsWritten);
3175
3004
  }
3176
3005
 
3006
+ assertUniqueExplicitRowKeys(sliced, 0);
3177
3007
  const chunkResult = await runChunkStep(sliced, 0, 0);
3178
3008
  totalRowsExecuted = chunkResult.rowsExecuted;
3179
3009
  totalRowsCached = chunkResult.rowsCached;
3010
+ totalRowsDuplicateReused = chunkResult.rowsDuplicateReused;
3180
3011
  totalRowsInserted = chunkResult.rowsInserted;
3181
3012
  totalRowsSkipped = chunkResult.rowsSkipped;
3182
3013
  out.push(...chunkResult.preview);
@@ -3192,10 +3023,6 @@ function createWorkerCtxFactory(
3192
3023
  constructor(
3193
3024
  private readonly name: string,
3194
3025
  private readonly rows: T[],
3195
- private readonly mapOptions?: Omit<
3196
- WorkerMapOptions,
3197
- 'description' | 'concurrency'
3198
- >,
3199
3026
  ) {}
3200
3027
 
3201
3028
  step(name: string, resolver: WorkerStepProgramStep['resolver']): this {
@@ -3207,23 +3034,10 @@ function createWorkerCtxFactory(
3207
3034
  }
3208
3035
 
3209
3036
  run(opts?: WorkerMapOptions): Promise<unknown> {
3210
- if (
3211
- opts &&
3212
- Object.keys(opts).some(
3213
- (optionKey) => optionKey !== 'description' && optionKey !== 'concurrency',
3214
- )
3215
- ) {
3216
- throw new Error(
3217
- 'ctx.map(...).run() only accepts description and concurrency.',
3218
- );
3219
- }
3220
3037
  const fields = Object.fromEntries(
3221
3038
  this.program.steps.map((step) => [step.name, step.resolver]),
3222
3039
  );
3223
- return runMap(this.name, this.rows, fields, {
3224
- ...this.mapOptions,
3225
- ...opts,
3226
- });
3040
+ return runMap(this.name, this.rows, fields, opts);
3227
3041
  }
3228
3042
  }
3229
3043
 
@@ -3246,36 +3060,66 @@ function createWorkerCtxFactory(
3246
3060
  },
3247
3061
  async step<T>(name: string, callback: () => Promise<T> | T): Promise<T> {
3248
3062
  assertNotAborted(abortSignal);
3249
- if (!workflowStep) {
3250
- return await callback();
3063
+ if (!name.trim()) {
3064
+ throw new Error('ctx.step(name, callback) requires a name.');
3251
3065
  }
3252
- return await (
3253
- workflowStep.do as unknown as (
3254
- name: string,
3255
- callback: () => Promise<T>,
3256
- ) => Promise<T>
3257
- )(name, async () => {
3258
- assertNotAborted(abortSignal);
3259
- return await callback();
3260
- });
3066
+ // Static pipeline JS blocks are already Workflow steps in the Workers
3067
+ // backend. Nesting another `step.do` here can leave preview runs parked
3068
+ // inside the JS stage before they reach subsequent event waits.
3069
+ return await callback();
3070
+ },
3071
+ async runSteps<T>(
3072
+ program: WorkerStepProgram,
3073
+ input: Record<string, unknown>,
3074
+ opts?: { description?: string },
3075
+ ): Promise<T> {
3076
+ assertNotAborted(abortSignal);
3077
+ if (!isWorkerStepProgram(program)) {
3078
+ throw new Error('ctx.runSteps(program, input) requires steps().');
3079
+ }
3080
+ if (opts?.description) {
3081
+ emitEvent({
3082
+ type: 'log',
3083
+ level: 'info',
3084
+ message: String(opts.description),
3085
+ ts: nowMs(),
3086
+ });
3087
+ }
3088
+ return (await executeWorkerStepProgram(
3089
+ program,
3090
+ input,
3091
+ ctx,
3092
+ 0,
3093
+ undefined,
3094
+ workflowStep,
3095
+ )) as T;
3261
3096
  },
3262
3097
  async csv<T extends Record<string, unknown> = Record<string, unknown>>(
3263
3098
  arg: unknown,
3099
+ options?: CsvRenameOptions,
3264
3100
  ): Promise<T[]> {
3265
3101
  if (Array.isArray(arg)) {
3266
3102
  // Inline rows passed at call site — already in memory, keep the
3267
3103
  // legacy array-backed dataset shape.
3268
- return makeWorkerDataset('csv', arg as T[], {
3269
- datasetKind: 'csv',
3270
- }) as unknown as T[];
3104
+ return makeWorkerDataset(
3105
+ 'csv',
3106
+ applyCsvRenameProjection(arg as T[], options),
3107
+ {
3108
+ datasetKind: 'csv',
3109
+ },
3110
+ ) as unknown as T[];
3271
3111
  }
3272
3112
  const filename = String(arg ?? '');
3273
3113
  if (req.inlineCsv && filename === req.inlineCsv.name) {
3274
3114
  // Inline CSV pre-staged by the dispatcher (small files <1 MiB). Already
3275
3115
  // in memory; no streaming needed.
3276
- return makeWorkerDataset('csv', req.inlineCsv.rows as T[], {
3277
- datasetKind: 'csv',
3278
- }) as unknown as T[];
3116
+ return makeWorkerDataset(
3117
+ 'csv',
3118
+ applyCsvRenameProjection(req.inlineCsv.rows as T[], options),
3119
+ {
3120
+ datasetKind: 'csv',
3121
+ },
3122
+ ) as unknown as T[];
3279
3123
  }
3280
3124
  // Resolution order: explicit inputR2Keys (runtime input) → packaged
3281
3125
  // files (relative-path imports bundled with the play artifact).
@@ -3303,6 +3147,7 @@ function createWorkerCtxFactory(
3303
3147
  return makeStreamingCsvDataset<T>({
3304
3148
  name: filename,
3305
3149
  logicalPath: filename,
3150
+ renameOptions: options,
3306
3151
  open: () =>
3307
3152
  openR2BodyStream({
3308
3153
  req,
@@ -3327,18 +3172,10 @@ function createWorkerCtxFactory(
3327
3172
  ) => Promise<unknown> | unknown)
3328
3173
  >
3329
3174
  | WorkerStepProgram,
3330
- opts?: { description?: string; concurrency?: number },
3175
+ opts?: WorkerMapOptions,
3331
3176
  ): unknown {
3332
- if (
3333
- arguments.length <= 2 ||
3334
- fieldsDef === undefined ||
3335
- isWorkerMapDefinitionOptions(fieldsDef)
3336
- ) {
3337
- return new WorkerMapBuilder(
3338
- name,
3339
- rows,
3340
- fieldsDef as Omit<WorkerMapOptions, 'description' | 'concurrency'>,
3341
- );
3177
+ if (arguments.length <= 2 || fieldsDef === undefined) {
3178
+ return new WorkerMapBuilder(name, rows);
3342
3179
  }
3343
3180
  if (isWorkerStepProgram(fieldsDef)) {
3344
3181
  const fields = Object.fromEntries(
@@ -3346,7 +3183,9 @@ function createWorkerCtxFactory(
3346
3183
  );
3347
3184
  return runMap(name, rows, fields, opts);
3348
3185
  }
3349
- throw new Error('ctx.map() accepts key, rows, and map options only.');
3186
+ throw new Error(
3187
+ 'ctx.map(key, rows, fields, options) was removed. Use ctx.map(key, rows).step(...).run(options).',
3188
+ );
3350
3189
  /*
3351
3190
  const sliced = rows;
3352
3191
  const baseOffset = 0;
@@ -3357,13 +3196,14 @@ function createWorkerCtxFactory(
3357
3196
  candidate.mapName === name || candidate.tableNamespace === name,
3358
3197
  );
3359
3198
  const streaming = isStreamingDataset<T>(sliced);
3360
- const preferredChunkSize =
3361
- planMap?.defaultChunkSize ?? STREAMING_MAP_DEFAULT_CHUNK_SIZE;
3199
+ // For streaming inputs we don't know the row count upfront — pass
3200
+ // `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
3201
+ // default chunk size rather than trying to budget against an unknown.
3362
3202
  const rowsPerChunk = chooseMapChunkSize({
3363
- totalRows: streaming ? preferredChunkSize + 1 : sliced.length,
3203
+ totalRows: streaming ? 0 : sliced.length,
3364
3204
  mapCount: Math.max(1, plan?.maps.length ?? 1),
3365
3205
  stepsPerChunk: planMap?.stepsPerChunk ?? 1,
3366
- preferredChunkSize,
3206
+ preferredChunkSize: planMap?.defaultChunkSize,
3367
3207
  softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
3368
3208
  });
3369
3209
  const outputFields = fieldEntries.map(([field]) => field);
@@ -3387,7 +3227,7 @@ function createWorkerCtxFactory(
3387
3227
  const completedKeys = new Set<string>();
3388
3228
  const preparedKeys = new Set<string>();
3389
3229
  for (const row of prepared.pendingRows) {
3390
- const key = derivePlayRowIdentity(row, name);
3230
+ const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3391
3231
  if (key) {
3392
3232
  pendingKeys.add(key);
3393
3233
  preparedKeys.add(key);
@@ -3397,18 +3237,18 @@ function createWorkerCtxFactory(
3397
3237
  const key =
3398
3238
  typeof row.__deeplineRowKey === 'string'
3399
3239
  ? row.__deeplineRowKey
3400
- : derivePlayRowIdentity(row, name);
3240
+ : derivePlayRowIdentity(publicCsvInputRow(row), name);
3401
3241
  if (key) {
3402
3242
  completedKeys.add(key);
3403
3243
  preparedKeys.add(key);
3404
3244
  }
3405
3245
  }
3406
3246
  const missingPreparedRows = chunkRows.filter((row) => {
3407
- const key = derivePlayRowIdentity(row, name);
3247
+ const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3408
3248
  return !key || !preparedKeys.has(key);
3409
3249
  });
3410
3250
  const rowsToExecute = chunkRows.filter((row) => {
3411
- const key = derivePlayRowIdentity(row, name);
3251
+ const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3412
3252
  return !key || pendingKeys.has(key) || !completedKeys.has(key);
3413
3253
  });
3414
3254
  const rowsInserted = prepared.inserted + missingPreparedRows.length;
@@ -3432,7 +3272,7 @@ function createWorkerCtxFactory(
3432
3272
  if (myIndex >= rowsToExecute.length) return;
3433
3273
  const row = rowsToExecute[myIndex]!;
3434
3274
  const absoluteIndex = baseOffset + chunkStart + myIndex;
3435
- const enriched: Record<string, unknown> = { ...row };
3275
+ const enriched: Record<string, unknown> = cloneCsvAliasedRow(row);
3436
3276
  const fieldOutputs: Record<string, unknown> = {};
3437
3277
  const waterfallOutputs: RecordedWaterfallOutput[] = [];
3438
3278
  const rowCtx = {
@@ -3448,7 +3288,6 @@ function createWorkerCtxFactory(
3448
3288
  toolNameOrSpec,
3449
3289
  waterfallInput,
3450
3290
  waterfallOpts,
3451
- workflowStep,
3452
3291
  ),
3453
3292
  };
3454
3293
  for (const [key, value] of fieldEntries) {
@@ -3490,7 +3329,7 @@ function createWorkerCtxFactory(
3490
3329
  rows: executedRows.map((row, executedIndex) => ({
3491
3330
  ...row,
3492
3331
  __deeplineRowKey: derivePlayRowIdentity(
3493
- rowsToExecute[executedIndex]!,
3332
+ publicCsvInputRow(rowsToExecute[executedIndex]!),
3494
3333
  name,
3495
3334
  ),
3496
3335
  })),
@@ -3501,9 +3340,10 @@ function createWorkerCtxFactory(
3501
3340
  const key =
3502
3341
  typeof completedRow.__deeplineRowKey === 'string'
3503
3342
  ? completedRow.__deeplineRowKey
3504
- : derivePlayRowIdentity(completedRow, name);
3343
+ : derivePlayRowIdentity(publicCsvInputRow(completedRow), name);
3505
3344
  if (key) {
3506
- const { __deeplineRowKey: _rowKey, ...cleanedRow } = completedRow;
3345
+ const { __deeplineRowKey: _rowKey, ...cleanedRow } =
3346
+ publicCsvInputRow(completedRow);
3507
3347
  void _rowKey;
3508
3348
  resultByKey.set(key, cleanedRow as T & Record<string, unknown>);
3509
3349
  }
@@ -3515,14 +3355,14 @@ function createWorkerCtxFactory(
3515
3355
  ) {
3516
3356
  const executedRow = executedRows[executedIndex]!;
3517
3357
  const key = derivePlayRowIdentity(
3518
- rowsToExecute[executedIndex]!,
3358
+ publicCsvInputRow(rowsToExecute[executedIndex]!),
3519
3359
  name,
3520
3360
  );
3521
3361
  if (key) resultByKey.set(key, executedRow);
3522
3362
  }
3523
3363
  const out = chunkRows
3524
3364
  .map((row) => {
3525
- const key = derivePlayRowIdentity(row, name);
3365
+ const key = derivePlayRowIdentity(publicCsvInputRow(row), name);
3526
3366
  return key ? resultByKey.get(key) : undefined;
3527
3367
  })
3528
3368
  .filter((row): row is T & Record<string, unknown> => Boolean(row));
@@ -3538,7 +3378,7 @@ function createWorkerCtxFactory(
3538
3378
  rowsSkipped,
3539
3379
  outputDatasetId: `map:${name}`,
3540
3380
  hash: await hashJson(out),
3541
- preview: toWorkflowSerializableValue(out.slice(0, 10)),
3381
+ preview: toWorkflowSerializableValue(out.slice(0, 5)),
3542
3382
  };
3543
3383
  };
3544
3384
 
@@ -3587,6 +3427,14 @@ function createWorkerCtxFactory(
3587
3427
  return makeWorkerDataset(name, out, {
3588
3428
  count: totalRowsWritten,
3589
3429
  cacheSummary,
3430
+ workProgress: {
3431
+ total: totalRowsWritten,
3432
+ executed: totalRowsExecuted,
3433
+ reused: totalRowsCached,
3434
+ skipped: totalRowsCached,
3435
+ pending: 0,
3436
+ failed: 0,
3437
+ },
3590
3438
  });
3591
3439
  };
3592
3440
 
@@ -3653,28 +3501,25 @@ function createWorkerCtxFactory(
3653
3501
  return finalize(chunkResult.rowsWritten);
3654
3502
  */
3655
3503
  },
3656
- tool: async (request: WorkerToolExecutionRequest): Promise<unknown> => {
3504
+ tool: async (
3505
+ key: string,
3506
+ toolId: string,
3507
+ input: Record<string, unknown>,
3508
+ ): Promise<unknown> => {
3657
3509
  assertNotAborted(abortSignal);
3658
- return executeTool(
3659
- req,
3660
- {
3661
- toolId: request.tool,
3662
- input: request.input,
3663
- retryKey: request.id,
3664
- },
3665
- workflowStep,
3666
- );
3510
+ return executeTool(req, { id: key, toolId, input }, workflowStep);
3667
3511
  },
3668
3512
  tools: {
3669
- async execute(request: WorkerToolExecutionRequest): Promise<unknown> {
3513
+ async execute(
3514
+ requestOrKey: unknown,
3515
+ toolId?: unknown,
3516
+ input?: unknown,
3517
+ _opts?: { description?: string },
3518
+ ): Promise<unknown> {
3670
3519
  assertNotAborted(abortSignal);
3671
3520
  return executeTool(
3672
3521
  req,
3673
- {
3674
- toolId: request.tool,
3675
- input: request.input,
3676
- retryKey: request.id,
3677
- },
3522
+ normalizeToolExecuteArgs(requestOrKey, toolId, input),
3678
3523
  workflowStep,
3679
3524
  );
3680
3525
  },
@@ -3702,14 +3547,7 @@ function createWorkerCtxFactory(
3702
3547
  input: Record<string, unknown>,
3703
3548
  opts?: WorkerWaterfallOptions,
3704
3549
  ): Promise<unknown | null> {
3705
- return executeWorkerWaterfall(
3706
- req,
3707
- [],
3708
- toolNameOrSpec,
3709
- input,
3710
- opts,
3711
- workflowStep,
3712
- );
3550
+ return executeWorkerWaterfall(req, [], toolNameOrSpec, input, opts);
3713
3551
  },
3714
3552
  async sleep(ms: number): Promise<void> {
3715
3553
  assertNotAborted(abortSignal);
@@ -3798,33 +3636,33 @@ function createWorkerCtxFactory(
3798
3636
  }
3799
3637
  try {
3800
3638
  const childSubmitStartedAt = nowMs();
3801
- let started: Awaited<
3802
- ReturnType<typeof submitChildPlayThroughCoordinator>
3803
- >;
3639
+ let started: { workflowId?: string; runId?: string; error?: unknown };
3804
3640
  try {
3805
- started = await childPlayExecutor.submit({
3806
- name: resolvedName,
3807
- input: isRecord(input) ? input : {},
3808
- orgId: req.orgId,
3809
- callbackBaseUrl: req.baseUrl,
3810
- parentExecutorToken: req.executorToken,
3811
- userEmail: req.userEmail ?? '',
3812
- profile: 'workers_edge',
3813
- manifest: childManifest,
3814
- childPlayManifests: req.childPlayManifests ?? null,
3815
- internalRunPlay: {
3816
- rootRunId,
3817
- parentRunId: req.runId,
3818
- parentPlayName: req.playName,
3819
- key: normalizedKey,
3820
- // Per the lineage validator: ancestry tail must equal the
3821
- // executor token's play name (the parent making this call).
3822
- ancestryPlayIds,
3823
- callDepth: nextDepth,
3824
- description:
3825
- typeof options?.description === 'string'
3826
- ? options.description
3827
- : null,
3641
+ started = await submitChildPlayThroughCoordinator({
3642
+ req,
3643
+ body: {
3644
+ name: resolvedName,
3645
+ input: isRecord(input) ? input : {},
3646
+ orgId: req.orgId,
3647
+ parentExecutorToken: req.executorToken,
3648
+ userEmail: req.userEmail ?? '',
3649
+ profile: 'workers_edge',
3650
+ manifest: childManifest,
3651
+ childPlayManifests: req.childPlayManifests ?? null,
3652
+ internalRunPlay: {
3653
+ rootRunId,
3654
+ parentRunId: req.runId,
3655
+ parentPlayName: req.playName,
3656
+ key: normalizedKey,
3657
+ // Per the lineage validator: ancestry tail must equal the
3658
+ // executor token's play name (the parent making this call).
3659
+ ancestryPlayIds,
3660
+ callDepth: nextDepth,
3661
+ description:
3662
+ typeof options?.description === 'string'
3663
+ ? options.description
3664
+ : null,
3665
+ },
3828
3666
  },
3829
3667
  });
3830
3668
  } catch (error) {
@@ -3870,112 +3708,12 @@ function createWorkerCtxFactory(
3870
3708
  fanoutIndex: nextParentCalls - 1,
3871
3709
  ms: nowMs() - childSubmitStartedAt,
3872
3710
  status: 'ok',
3873
- mode:
3874
- typeof started.mode === 'string' ? started.mode : 'workflow_child',
3875
3711
  });
3876
- const terminalStatus =
3877
- typeof started.status === 'string'
3878
- ? started.status.toLowerCase()
3879
- : '';
3880
- if (started.mode === 'inline_dynamic_worker') {
3881
- const timingSummary =
3882
- Array.isArray(started.timings) && started.timings.length > 0
3883
- ? started.timings
3884
- .map(
3885
- (timing) =>
3886
- `${timing.phase.replace('coordinator.inline_child_', '')}:${timing.ms}ms`,
3887
- )
3888
- .join(' ')
3889
- : 'timings:none';
3890
- emitEvent({
3891
- type: 'log',
3892
- level: terminalStatus === 'failed' ? 'error' : 'info',
3893
- message: `Inline child ${resolvedName} (${workflowId}) boundary=${terminalStatus || 'submitted'} ${timingSummary}`,
3894
- ts: nowMs(),
3895
- });
3896
- const childLogs = Array.isArray(started.logs)
3897
- ? started.logs.filter((line) => typeof line === 'string')
3898
- : [];
3899
- for (const line of childLogs.slice(0, 24)) {
3900
- emitEvent({
3901
- type: 'log',
3902
- level: 'info',
3903
- message: ` ${resolvedName}> ${line}`,
3904
- ts: nowMs(),
3905
- });
3906
- }
3907
- if (childLogs.length > 24) {
3908
- emitEvent({
3909
- type: 'log',
3910
- level: 'info',
3911
- message: ` ${resolvedName}> ... ${childLogs.length - 24} more inline child log lines omitted`,
3912
- ts: nowMs(),
3913
- });
3914
- }
3915
- }
3916
- if (terminalStatus === 'completed') {
3917
- console.info('[play.runtime.span]', {
3918
- event: 'play.runtime.span',
3919
- phase: 'child_wait',
3920
- runId: req.runId,
3921
- parentRunId: req.runId,
3922
- childRunId: workflowId,
3923
- playName: resolvedName,
3924
- graphHash: req.graphHash ?? null,
3925
- depth: nextDepth,
3926
- fanoutIndex: nextParentCalls - 1,
3927
- ms: 0,
3928
- status: 'ok',
3929
- mode:
3930
- typeof started.mode === 'string'
3931
- ? started.mode
3932
- : 'inline_terminal',
3933
- });
3934
- emitEvent({
3935
- type: 'log',
3936
- level: 'info',
3937
- message: `Completed child play ${resolvedName} (${normalizedKey})`,
3938
- ts: nowMs(),
3939
- });
3940
- return 'output' in started ? started.output : started.result;
3941
- }
3942
- if (terminalStatus === 'failed') {
3943
- const inlineError = isRecord(started.error) ? started.error : null;
3944
- const message =
3945
- (typeof inlineError?.message === 'string' &&
3946
- inlineError.message.trim()) ||
3947
- (typeof started.error === 'string' && started.error.trim()) ||
3948
- `Child play ${resolvedName} (${workflowId}) failed.`;
3949
- console.info('[play.runtime.span]', {
3950
- event: 'play.runtime.span',
3951
- phase: 'child_wait',
3952
- runId: req.runId,
3953
- parentRunId: req.runId,
3954
- childRunId: workflowId,
3955
- playName: resolvedName,
3956
- graphHash: req.graphHash ?? null,
3957
- depth: nextDepth,
3958
- fanoutIndex: nextParentCalls - 1,
3959
- ms: 0,
3960
- status: 'failed',
3961
- mode:
3962
- typeof started.mode === 'string'
3963
- ? started.mode
3964
- : 'inline_terminal',
3965
- errorCode: 'CHILD_INLINE_FAILED',
3966
- });
3967
- emitEvent({
3968
- type: 'log',
3969
- level: 'error',
3970
- message: `Inline child ${resolvedName} (${workflowId}) failed: ${message}`,
3971
- ts: nowMs(),
3972
- });
3973
- throw new Error(message);
3974
- }
3975
3712
  const childWaitStartedAt = nowMs();
3976
3713
  let result: unknown;
3977
3714
  try {
3978
- result = await childPlayExecutor.waitTerminal({
3715
+ result = await waitForChildPlayTerminalEvent({
3716
+ req,
3979
3717
  workflowStep,
3980
3718
  workflowId,
3981
3719
  playName: resolvedName,
@@ -4053,11 +3791,11 @@ function createWorkerCtxFactory(
4053
3791
  const event = (await (
4054
3792
  workflowStep.waitForEvent as unknown as (
4055
3793
  name: string,
4056
- options: { type: string; timeout: number },
3794
+ options: { type: string; timeout: string },
4057
3795
  ) => Promise<{ payload: unknown }>
4058
3796
  )(`wait_for_event:${workflowEventType(eventType)}`, {
4059
3797
  type: workflowEventType(eventType),
4060
- timeout: timeoutMs,
3798
+ timeout: workflowTimeoutFromMs(timeoutMs),
4061
3799
  })) as { payload: unknown };
4062
3800
  return event.payload ?? null;
4063
3801
  },
@@ -4086,9 +3824,12 @@ async function handleRun(request: Request, env: WorkerEnv): Promise<Response> {
4086
3824
  (async () => {
4087
3825
  try {
4088
3826
  installProcessExitTrap();
3827
+ const runPrefix = `[deepline-run:${req.runId}]`;
4089
3828
  captureCoordinatorBinding(env);
3829
+ captureRuntimeApiBinding(env);
4090
3830
  captureHarnessBinding(env);
4091
- const ctx = createWorkerCtxFactory(req, emit, env);
3831
+ await probeHarnessOnce(env, runPrefix);
3832
+ const ctx = createMinimalWorkerCtx(req, emit, env);
4092
3833
  const result = await (
4093
3834
  playFn as (
4094
3835
  ctx: unknown,
@@ -4142,8 +3883,6 @@ async function executeRunRequest(
4142
3883
  workflowStep?: WorkflowStep,
4143
3884
  options?: {
4144
3885
  persistResultDatasets?: boolean;
4145
- signalParentTerminal?: boolean;
4146
- waitUntil?: (promise: Promise<unknown>) => void;
4147
3886
  /**
4148
3887
  * Cooperative cancellation token. CF Workflows surfaces termination as a
4149
3888
  * thrown error from any in-progress step; the harness catches that, flips
@@ -4156,7 +3895,6 @@ async function executeRunRequest(
4156
3895
  const startedAt = nowMs();
4157
3896
  const abortController = options?.abortController ?? new AbortController();
4158
3897
  const abortSignal = abortController.signal;
4159
- const transport = createWorkerRuntimeTransport(req);
4160
3898
  // Maintain a rolling buffer of log lines emitted during the run. This is
4161
3899
  // what the play-page UI consumes via Convex polling + diffPlayRunStreamEvents
4162
3900
  // → play.run.log SSE events. Without periodic flushing, the play page only
@@ -4184,15 +3922,14 @@ async function executeRunRequest(
4184
3922
  .catch(() => undefined)
4185
3923
  .then(async () => {
4186
3924
  try {
4187
- await transport.postRuntimeApi(
4188
- runtimeRunActions.updateStatus({
4189
- playId: req.runId,
4190
- status: 'running',
4191
- runtimeBackend: 'cf_workflows_dynamic_worker',
4192
- liveLogs: snapshot,
4193
- lastCheckpointAt: now,
4194
- }),
4195
- );
3925
+ await postRuntimeApi(req.baseUrl, req.executorToken, {
3926
+ action: 'update_run_status',
3927
+ playId: req.runId,
3928
+ status: 'running',
3929
+ runtimeBackend: 'cf_workflows_dynamic_worker',
3930
+ liveLogs: snapshot,
3931
+ lastCheckpointAt: now,
3932
+ });
4196
3933
  } catch {
4197
3934
  // Best-effort; the terminal update still carries the final logs.
4198
3935
  }
@@ -4214,7 +3951,7 @@ async function executeRunRequest(
4214
3951
  emit(event);
4215
3952
  };
4216
3953
 
4217
- const ctx = createWorkerCtxFactory(
3954
+ const ctx = createMinimalWorkerCtx(
4218
3955
  req,
4219
3956
  wrappedEmit,
4220
3957
  env,
@@ -4222,105 +3959,48 @@ async function executeRunRequest(
4222
3959
  abortSignal,
4223
3960
  );
4224
3961
  try {
4225
- const playStartedAt = nowMs();
4226
3962
  const result = await (
4227
3963
  playFn as (
4228
3964
  ctx: unknown,
4229
3965
  input: Record<string, unknown>,
4230
3966
  ) => Promise<unknown>
4231
3967
  )(ctx, req.runtimeInput);
4232
- recordDynamicWorkerPerfTrace({
4233
- env,
4234
- runId: req.runId,
4235
- phase: 'dynamic_worker.play_fn',
4236
- ms: nowMs() - playStartedAt,
4237
- graphHash: req.graphHash ?? null,
4238
- waitUntil: options?.waitUntil,
4239
- });
4240
3968
  const serializedResult = serializePlayReturnValue(result);
4241
3969
  if (options?.persistResultDatasets) {
4242
3970
  await liveLogFlushInFlight.catch(() => undefined);
4243
- const persistStartedAt = nowMs();
4244
3971
  await persistResultDatasets(req, serializedResult);
4245
- recordDynamicWorkerPerfTrace({
4246
- env,
4247
- runId: req.runId,
4248
- phase: 'dynamic_worker.persist_result_datasets',
4249
- ms: nowMs() - persistStartedAt,
4250
- graphHash: req.graphHash ?? null,
4251
- waitUntil: options?.waitUntil,
3972
+ const terminalResult = trimResultForStatus(serializedResult);
3973
+ await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
3974
+ action: 'update_run_status',
3975
+ playId: req.runId,
3976
+ status: 'completed',
3977
+ result: terminalResult,
3978
+ runtimeBackend: 'cf_workflows_dynamic_worker',
3979
+ waitKind: null,
3980
+ waitUntil: null,
3981
+ activeBoundaryId: null,
3982
+ liveLogs,
3983
+ lastCheckpointAt: nowMs(),
4252
3984
  });
4253
- const billingCapMustBlockTerminal =
4254
- extractMaxCreditsPerRun(req.contractSnapshot) !== null;
4255
- const finalizeBilling = () =>
4256
- finalizeWorkerComputeBilling({
4257
- req,
4258
- transport,
4259
- success: true,
4260
- actionEstimate: 4,
4261
- });
4262
- if (billingCapMustBlockTerminal) {
4263
- await finalizeBilling();
4264
- }
4265
- const terminalResult = trimResultForStatus(serializedResult) as
4266
- | Record<string, unknown>
4267
- | null;
4268
- const terminalStartedAt = nowMs();
4269
- await transport.postRuntimeApiBestEffort(
4270
- runtimeRunActions.updateStatus({
4271
- playId: req.runId,
4272
- status: 'completed',
4273
- result: terminalResult,
4274
- runtimeBackend: 'cf_workflows_dynamic_worker',
4275
- liveLogs,
4276
- lastCheckpointAt: nowMs(),
4277
- }),
4278
- );
4279
- recordDynamicWorkerPerfTrace({
4280
- env,
4281
- runId: req.runId,
4282
- phase: 'dynamic_worker.terminal_status_update',
4283
- ms: nowMs() - terminalStartedAt,
4284
- graphHash: req.graphHash ?? null,
4285
- waitUntil: options?.waitUntil,
3985
+ await finalizeWorkerComputeBilling({
3986
+ req,
3987
+ success: true,
3988
+ actionEstimate: 4,
4286
3989
  });
4287
- if (!billingCapMustBlockTerminal) {
4288
- const finalizeBillingPromise = finalizeBilling();
4289
- if (options?.waitUntil) {
4290
- options.waitUntil(
4291
- finalizeBillingPromise.catch((finalizeError) => {
4292
- console.error(
4293
- `[play-harness] non-fatal compute billing finalize failed runId=${req.runId}: ${
4294
- finalizeError instanceof Error
4295
- ? finalizeError.message
4296
- : String(finalizeError)
4297
- }`,
4298
- );
4299
- }),
4300
- );
4301
- } else {
4302
- await finalizeBillingPromise;
4303
- }
4304
- }
4305
- }
4306
- if (options?.signalParentTerminal !== false) {
4307
- await transport
4308
- .signalParentTerminal({
4309
- status: 'completed',
4310
- result: trimResultForStatus(serializedResult) as Record<
4311
- string,
4312
- unknown
4313
- >,
4314
- })
4315
- .catch((error) => {
4316
- console.error(
4317
- `[play-harness] non-fatal parent completion signal failed runId=${req.runId}: ${
4318
- error instanceof Error ? error.message : String(error)
4319
- }`,
4320
- );
4321
- });
4322
3990
  }
3991
+ await signalParentPlayTerminal({
3992
+ req,
3993
+ status: 'completed',
3994
+ result: trimResultForStatus(serializedResult) as Record<string, unknown>,
3995
+ }).catch((error) => {
3996
+ console.error(
3997
+ `[play-harness] non-fatal parent completion signal failed runId=${req.runId}: ${
3998
+ error instanceof Error ? error.message : String(error)
3999
+ }`,
4000
+ );
4001
+ });
4323
4002
  return {
4003
+ playName: req.playName,
4324
4004
  result: serializedResult,
4325
4005
  outputRows: inferOutputRows(serializedResult),
4326
4006
  durationMs: nowMs() - startedAt,
@@ -4337,23 +4017,23 @@ async function executeRunRequest(
4337
4017
  const message = error instanceof Error ? error.message : String(error);
4338
4018
  if (options?.persistResultDatasets) {
4339
4019
  await liveLogFlushInFlight.catch(() => undefined);
4340
- await transport.postRuntimeApiBestEffort(
4341
- runtimeRunActions.updateStatus({
4342
- playId: req.runId,
4343
- status: aborted ? 'cancelled' : 'failed',
4344
- error: message,
4345
- runtimeBackend: 'cf_workflows_dynamic_worker',
4346
- liveLogs,
4347
- lastCheckpointAt: nowMs(),
4348
- }),
4349
- );
4350
- const finalizeBillingPromise = finalizeWorkerComputeBilling({
4020
+ await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
4021
+ action: 'update_run_status',
4022
+ playId: req.runId,
4023
+ status: aborted ? 'cancelled' : 'failed',
4024
+ error: message,
4025
+ runtimeBackend: 'cf_workflows_dynamic_worker',
4026
+ waitKind: null,
4027
+ waitUntil: null,
4028
+ activeBoundaryId: null,
4029
+ liveLogs,
4030
+ lastCheckpointAt: nowMs(),
4031
+ });
4032
+ await finalizeWorkerComputeBilling({
4351
4033
  req,
4352
- transport,
4353
4034
  success: false,
4354
4035
  actionEstimate: 4,
4355
- });
4356
- const logFinalizeError = (finalizeError: unknown) => {
4036
+ }).catch((finalizeError) => {
4357
4037
  console.error(
4358
4038
  `[play-harness] non-fatal compute billing finalize failed runId=${req.runId}: ${
4359
4039
  finalizeError instanceof Error
@@ -4361,21 +4041,13 @@ async function executeRunRequest(
4361
4041
  : String(finalizeError)
4362
4042
  }`,
4363
4043
  );
4364
- };
4365
- if (options?.waitUntil) {
4366
- options.waitUntil(finalizeBillingPromise.catch(logFinalizeError));
4367
- } else {
4368
- await finalizeBillingPromise.catch(logFinalizeError);
4369
- }
4370
- }
4371
- if (options?.signalParentTerminal !== false) {
4372
- await transport
4373
- .signalParentTerminal({
4374
- status: aborted ? 'cancelled' : 'failed',
4375
- error: message,
4376
- })
4377
- .catch(() => null);
4044
+ });
4378
4045
  }
4046
+ await signalParentPlayTerminal({
4047
+ req,
4048
+ status: aborted ? 'cancelled' : 'failed',
4049
+ error: message,
4050
+ }).catch(() => null);
4379
4051
  throw error;
4380
4052
  }
4381
4053
  }
@@ -4397,39 +4069,36 @@ function extractMaxCreditsPerRun(contractSnapshot: unknown): number | null {
4397
4069
 
4398
4070
  async function finalizeWorkerComputeBilling(input: {
4399
4071
  req: RunRequest;
4400
- transport?: WorkerRuntimeTransport;
4401
4072
  success: boolean;
4402
4073
  actionEstimate: number;
4403
4074
  }): Promise<void> {
4404
4075
  const maxCreditsPerRun = extractMaxCreditsPerRun(input.req.contractSnapshot);
4405
- const transport = input.transport ?? createWorkerRuntimeTransport(input.req);
4406
- await transport.postRuntimeApi(
4407
- runtimeBillingActions.finalize({
4408
- sessionId: input.req.runId,
4409
- orgId: input.req.orgId,
4410
- operation: 'workflow_run',
4411
- status: input.success ? 'completed' : 'error',
4412
- workflowId: input.req.runId,
4413
- runId: input.req.runId,
4414
- ...(maxCreditsPerRun !== null ? { maxCreditsPerRun } : {}),
4415
- finalItem: {
4416
- itemId: `cloudflare-workflows:${input.req.runId}`,
4417
- source: 'cloudflare_workflows',
4418
- unit: 'action',
4419
- units: Math.max(1, Math.ceil(input.actionEstimate)),
4420
- providerCostUsd: Number(
4421
- (
4422
- Math.max(1, Math.ceil(input.actionEstimate)) *
4423
- (50 / 1_000_000)
4424
- ).toFixed(12),
4425
- ),
4426
- metadata: {
4427
- workflowId: input.req.runId,
4428
- actionEstimate: input.actionEstimate,
4429
- },
4076
+ await postRuntimeApi(input.req.baseUrl, input.req.executorToken, {
4077
+ action: 'compute_billing_finalize',
4078
+ sessionId: input.req.runId,
4079
+ orgId: input.req.orgId,
4080
+ operation: 'workflow_run',
4081
+ status: input.success ? 'completed' : 'error',
4082
+ workflowId: input.req.runId,
4083
+ runId: input.req.runId,
4084
+ ...(maxCreditsPerRun !== null ? { maxCreditsPerRun } : {}),
4085
+ finalItem: {
4086
+ itemId: `cloudflare-workflows:${input.req.runId}`,
4087
+ source: 'cloudflare_workflows',
4088
+ unit: 'action',
4089
+ units: Math.max(1, Math.ceil(input.actionEstimate)),
4090
+ providerCostUsd: Number(
4091
+ (
4092
+ Math.max(1, Math.ceil(input.actionEstimate)) *
4093
+ (50 / 1_000_000)
4094
+ ).toFixed(12),
4095
+ ),
4096
+ metadata: {
4097
+ workflowId: input.req.runId,
4098
+ actionEstimate: input.actionEstimate,
4430
4099
  },
4431
- }),
4432
- );
4100
+ },
4101
+ });
4433
4102
  }
4434
4103
 
4435
4104
  function isInlineCsv(
@@ -4593,7 +4262,7 @@ function trimResultShape(value: unknown): unknown {
4593
4262
  const out: Record<string, unknown> = {};
4594
4263
  for (const [key, child] of Object.entries(value)) {
4595
4264
  if (key === 'preview' && Array.isArray(child) && value.kind === 'dataset') {
4596
- out[key] = child.slice(0, 10).map(trimResultShape);
4265
+ out[key] = child.slice(0, 5).map(trimResultShape);
4597
4266
  } else {
4598
4267
  out[key] = trimResultShape(child);
4599
4268
  }
@@ -4631,20 +4300,34 @@ function serializeValue(value: unknown, depth: number): unknown {
4631
4300
  ? (value as unknown as { __deeplineCacheSummary: string })
4632
4301
  .__deeplineCacheSummary
4633
4302
  : null;
4634
- const rows = value
4303
+ const workProgress =
4304
+ isRecord(
4305
+ (value as unknown as { __deeplineWorkProgress?: unknown })
4306
+ .__deeplineWorkProgress,
4307
+ )
4308
+ ? (value as unknown as { __deeplineWorkProgress: Record<string, unknown> })
4309
+ .__deeplineWorkProgress
4310
+ : null;
4311
+ const previewRows = value
4312
+ .slice(0, 5)
4635
4313
  .map((row) => serializeValue(row, depth + 1))
4636
4314
  .filter(isRecord);
4637
4315
  if (tableNamespace && datasetId) {
4638
- const columns = inferColumns(rows);
4316
+ const columns = inferColumns(
4317
+ value.map((row) => serializeValue(row, depth + 1)).filter(isRecord),
4318
+ );
4639
4319
  return {
4640
4320
  kind: 'dataset' as const,
4641
4321
  datasetKind,
4642
4322
  datasetId,
4643
4323
  count: datasetCount,
4644
4324
  columns,
4645
- preview: rows,
4325
+ preview: previewRows,
4646
4326
  tableNamespace,
4647
4327
  ...(cacheSummary ? { cacheSummary } : {}),
4328
+ ...(workProgress
4329
+ ? { _metadata: { workProgress } }
4330
+ : {}),
4648
4331
  };
4649
4332
  }
4650
4333
  return value.map((entry) => serializeValue(entry, depth + 1));
@@ -4729,18 +4412,16 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4729
4412
  ): Promise<unknown> {
4730
4413
  const req = runRequestFromWorkflowParams(event.payload);
4731
4414
  const runPrefix = `[deepline-run:${req.runId}]`;
4732
- recordDynamicWorkerPerfTrace({
4733
- env: this.env,
4734
- runId: req.runId,
4735
- phase: 'dynamic_worker.tenant_workflow_run_start',
4736
- ms: 0,
4737
- graphHash: req.graphHash ?? null,
4738
- waitUntil:
4739
- typeof this.ctx.waitUntil === 'function'
4740
- ? (promise) => this.ctx.waitUntil(promise)
4741
- : undefined,
4742
- });
4415
+ // DEBUG: confirm TenantWorkflow.run was invoked at all. If this log
4416
+ // appears in tail (parent's tail consumer captures harness logs by
4417
+ // the deepline-run prefix in flushTailRunLogs), the throw is
4418
+ // somewhere inside executeRunRequest. If it doesn't appear, the
4419
+ // throw is in the framework wrapper between the loader and run().
4420
+ console.log(
4421
+ `${runPrefix} TenantWorkflow.run entered baseUrl=${req.baseUrl}`,
4422
+ );
4743
4423
  captureCoordinatorBinding(this.env);
4424
+ captureRuntimeApiBinding(this.env);
4744
4425
  // Hand the harness service binding (if wired) to the SDK-side stub.
4745
4426
  // Must run BEFORE any SDK call site that would reach into HARNESS,
4746
4427
  // i.e. before user play code is invoked. Idempotent within a run.
@@ -4749,23 +4430,10 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4749
4430
  // same isolate). Awaited so the result is in the log before user code
4750
4431
  // begins, but never throws — broken HARNESS at probe time doesn't
4751
4432
  // block the play; real call-site errors do.
4752
- const harnessProbeStartedAt = nowMs();
4753
4433
  await probeHarnessOnce(this.env, runPrefix);
4754
- recordDynamicWorkerPerfTrace({
4755
- env: this.env,
4756
- runId: req.runId,
4757
- phase: 'dynamic_worker.harness_probe',
4758
- ms: nowMs() - harnessProbeStartedAt,
4759
- graphHash: req.graphHash ?? null,
4760
- waitUntil:
4761
- typeof this.ctx.waitUntil === 'function'
4762
- ? (promise) => this.ctx.waitUntil(promise)
4763
- : undefined,
4764
- });
4765
4434
  const abortController = new AbortController();
4766
- const executeStartedAt = nowMs();
4767
4435
  try {
4768
- const output = (await executeRunRequest(
4436
+ return (await executeRunRequest(
4769
4437
  req,
4770
4438
  this.env,
4771
4439
  (runnerEvent) => {
@@ -4778,47 +4446,9 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4778
4446
  }
4779
4447
  },
4780
4448
  step,
4781
- {
4782
- persistResultDatasets: !req.playCallGovernance,
4783
- abortController,
4784
- waitUntil:
4785
- typeof this.ctx.waitUntil === 'function'
4786
- ? (promise) => this.ctx.waitUntil(promise)
4787
- : undefined,
4788
- },
4449
+ { persistResultDatasets: !req.playCallGovernance, abortController },
4789
4450
  )) as Record<string, unknown>;
4790
- recordDynamicWorkerPerfTrace({
4791
- env: this.env,
4792
- runId: req.runId,
4793
- phase: 'dynamic_worker.execute_run_request',
4794
- ms: nowMs() - executeStartedAt,
4795
- graphHash: req.graphHash ?? null,
4796
- extra: { ok: true },
4797
- waitUntil:
4798
- typeof this.ctx.waitUntil === 'function'
4799
- ? (promise) => this.ctx.waitUntil(promise)
4800
- : undefined,
4801
- });
4802
- return output;
4803
4451
  } catch (error) {
4804
- recordDynamicWorkerPerfTrace({
4805
- env: this.env,
4806
- runId: req.runId,
4807
- phase: 'dynamic_worker.execute_run_request',
4808
- ms: nowMs() - executeStartedAt,
4809
- graphHash: req.graphHash ?? null,
4810
- extra: {
4811
- ok: false,
4812
- error:
4813
- error instanceof Error
4814
- ? error.message.slice(0, 300)
4815
- : String(error),
4816
- },
4817
- waitUntil:
4818
- typeof this.ctx.waitUntil === 'function'
4819
- ? (promise) => this.ctx.waitUntil(promise)
4820
- : undefined,
4821
- });
4822
4452
  // CF Workflows + the dynamic-workflows framework swallow the error
4823
4453
  // message and surface only "internal error; reference = <id>" via
4824
4454
  // instance.status(). The per-play Worker's console.error doesn't
@@ -4840,13 +4470,12 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4840
4470
  // so this callback is the ONLY way the real error message reaches the
4841
4471
  // user via tail/SSE. Retry with backoff before giving up; if we drop
4842
4472
  // it, the user is stuck staring at the opaque CF reference id.
4843
- const errorPayload = JSON.stringify(
4844
- runtimeRunActions.updateStatus({
4845
- playId: req.runId,
4846
- status: 'failed',
4847
- error: `TenantWorkflow.run threw: ${detail.name ?? 'Error'}: ${detail.message}\n${detail.stack ?? ''}`,
4848
- }),
4849
- );
4473
+ const errorPayload = JSON.stringify({
4474
+ action: 'update_run_status',
4475
+ playId: req.runId,
4476
+ status: 'failed',
4477
+ error: `TenantWorkflow.run threw: ${detail.name ?? 'Error'}: ${detail.message}\n${detail.stack ?? ''}`,
4478
+ });
4850
4479
  const backoffMs = [200, 500, 1500];
4851
4480
  let lastCallbackError: unknown = null;
4852
4481
  let delivered = false;
@@ -4903,80 +4532,17 @@ export class TenantWorkflow extends WorkflowEntrypoint<
4903
4532
  }
4904
4533
  }
4905
4534
 
4906
- async function handleInlineRun(
4907
- request: Request,
4908
- env: WorkerEnv,
4909
- ): Promise<Response> {
4910
- const parseStartedAt = nowMs();
4911
- const req = (await request.json()) as RunRequest;
4912
- const runPrefix = `[deepline-run:${req.runId}]`;
4913
- const events: RunnerEvent[] = [];
4914
- captureCoordinatorBinding(env);
4915
- captureHarnessBinding(env);
4916
- await probeHarnessOnce(env, runPrefix);
4917
- try {
4918
- const output = await executeRunRequest(
4919
- req,
4920
- env,
4921
- (runnerEvent) => {
4922
- events.push(runnerEvent);
4923
- if (runnerEvent.type === 'log') {
4924
- console.log(`${runPrefix} ${runnerEvent.message}`);
4925
- } else if (runnerEvent.type === 'error') {
4926
- console.error(
4927
- `${runPrefix} ${runnerEvent.message}${runnerEvent.stack ? `\n${runnerEvent.stack}` : ''}`,
4928
- );
4929
- }
4930
- },
4931
- undefined,
4932
- {
4933
- persistResultDatasets: false,
4934
- signalParentTerminal: false,
4935
- },
4936
- );
4937
- return Response.json({
4938
- status: 'completed',
4939
- result: output.result,
4940
- outputRows: output.outputRows,
4941
- durationMs: output.durationMs,
4942
- parseMs: nowMs() - parseStartedAt,
4943
- events,
4944
- });
4945
- } catch (error) {
4946
- const message = error instanceof Error ? error.message : String(error);
4947
- const stack = error instanceof Error ? error.stack : undefined;
4948
- events.push({
4949
- type: 'error',
4950
- message,
4951
- ...(stack ? { stack } : {}),
4952
- ts: nowMs(),
4953
- });
4954
- return Response.json(
4955
- {
4956
- status: 'failed',
4957
- error: { message, ...(stack ? { stack } : {}) },
4958
- parseMs: nowMs() - parseStartedAt,
4959
- events,
4960
- },
4961
- { status: 200 },
4962
- );
4963
- }
4964
- }
4965
-
4966
4535
  const workerEntrypoint = {
4967
4536
  async fetch(request: Request, env: WorkerEnv): Promise<Response> {
4968
4537
  const url = new URL(request.url);
4969
4538
  if (request.method === 'POST' && url.pathname === '/start') {
4970
- const startTotalStartedAt = Date.now();
4971
4539
  if (!env.WORKFLOWS) {
4972
4540
  return new Response('missing WORKFLOWS binding', { status: 500 });
4973
4541
  }
4974
- const parseStartedAt = Date.now();
4975
4542
  const body = (await request.json().catch(() => null)) as {
4976
4543
  id?: string;
4977
4544
  payload?: Record<string, unknown>;
4978
4545
  } | null;
4979
- const parseMs = Date.now() - parseStartedAt;
4980
4546
  if (!body?.id || !isRecord(body.payload)) {
4981
4547
  return new Response('invalid workflow start body', { status: 400 });
4982
4548
  }
@@ -4992,16 +4558,6 @@ const workerEntrypoint = {
4992
4558
  typeof body.payload.graphHash === 'string'
4993
4559
  ? body.payload.graphHash
4994
4560
  : null;
4995
- console.log(
4996
- `[perf-trace] ${JSON.stringify({
4997
- ts: Date.now(),
4998
- source: 'dynamic_worker',
4999
- runId,
5000
- phase: 'dynamic_worker.start_request_parse',
5001
- ms: parseMs,
5002
- ...(graphHash ? { graphHash } : {}),
5003
- })}`,
5004
- );
5005
4561
  console.log(
5006
4562
  `[perf-trace] ${JSON.stringify({
5007
4563
  ts: Date.now(),
@@ -5016,18 +4572,13 @@ const workerEntrypoint = {
5016
4572
  id: instance.id,
5017
4573
  status: 'submitted',
5018
4574
  timingsMs: {
5019
- startRequestParse: parseMs,
5020
4575
  workflowCreate: workflowCreateMs,
5021
- startTotal: Date.now() - startTotalStartedAt,
5022
4576
  },
5023
4577
  });
5024
4578
  }
5025
4579
  if (request.method === 'POST' && url.pathname === '/run') {
5026
4580
  return handleRun(request, env);
5027
4581
  }
5028
- if (request.method === 'POST' && url.pathname === '/run-inline') {
5029
- return handleInlineRun(request, env);
5030
- }
5031
4582
  if (request.method === 'GET' && url.pathname === '/health') {
5032
4583
  return new Response('ok', { status: 200 });
5033
4584
  }