deepline 0.1.24 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +344 -179
- package/dist/cli/index.mjs +296 -130
- package/dist/index.d.mts +3 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +2 -2
- package/dist/index.mjs +2 -2
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +75 -62
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +6 -1
- package/dist/repo/apps/play-runner-workers/src/entry.ts +1102 -711
- package/dist/repo/apps/play-runner-workers/src/runtime/dataset-handles.ts +418 -0
- package/dist/repo/sdk/src/client.ts +5 -1
- package/dist/repo/sdk/src/plays/bundle-play-file.ts +1 -1
- package/dist/repo/sdk/src/plays/harness-stub.ts +23 -35
- package/dist/repo/sdk/src/version.ts +1 -1
- package/dist/repo/shared_libs/play-runtime/execution-plan.ts +18 -8
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +5 -4
- package/dist/repo/shared_libs/play-runtime/step-lifecycle-tracker.ts +228 -0
- package/dist/repo/shared_libs/plays/bundling/index.ts +90 -51
- package/package.json +1 -1
- package/dist/repo/shared_libs/play-runtime/runtime-actions.ts +0 -208
|
@@ -63,12 +63,37 @@ import {
|
|
|
63
63
|
derivePlayRowIdentityFromKey,
|
|
64
64
|
} from '../../../shared_libs/plays/row-identity';
|
|
65
65
|
import {
|
|
66
|
+
getTopLevelPipelineSubsteps,
|
|
66
67
|
getCompiledPipelineSubsteps,
|
|
67
68
|
flattenStaticPipeline,
|
|
68
69
|
resolveSheetContractForTableNamespace,
|
|
69
70
|
sqlSafePlayColumnName,
|
|
71
|
+
type PlayStaticSubstep,
|
|
70
72
|
type PlayStaticPipeline,
|
|
73
|
+
type PlaySheetContract,
|
|
71
74
|
} from '../../../shared_libs/plays/static-pipeline';
|
|
75
|
+
import {
|
|
76
|
+
PlayStepLifecycleTracker,
|
|
77
|
+
type PlayStepLifecycleEvent,
|
|
78
|
+
} from '../../../shared_libs/play-runtime/step-lifecycle-tracker';
|
|
79
|
+
import type {
|
|
80
|
+
PlayRunLedgerEvent,
|
|
81
|
+
PlayRunLedgerStepProgress,
|
|
82
|
+
PlayRunLedgerStepStatus,
|
|
83
|
+
} from '../../../shared_libs/play-runtime/run-ledger';
|
|
84
|
+
import {
|
|
85
|
+
createCsvDatasetHandle,
|
|
86
|
+
createInlineDatasetHandle,
|
|
87
|
+
createMaterializedDatasetHandle,
|
|
88
|
+
createPersistedDatasetHandle,
|
|
89
|
+
datasetRowCountHint,
|
|
90
|
+
isDatasetHandle,
|
|
91
|
+
iterDatasetChunks,
|
|
92
|
+
WORKER_DATASET_IN_MEMORY_ROWS,
|
|
93
|
+
WORKER_DATASET_PREVIEW_ROWS,
|
|
94
|
+
type WorkerDatasetHandle,
|
|
95
|
+
type WorkerDatasetInput,
|
|
96
|
+
} from './runtime/dataset-handles';
|
|
72
97
|
// The harness stub forwards leaf calls (validation, runtime-api HTTP) into
|
|
73
98
|
// the long-lived Play Harness Worker via env.HARNESS. We import the
|
|
74
99
|
// `setHarnessBinding` setter eagerly so it's available the moment
|
|
@@ -80,9 +105,10 @@ import {
|
|
|
80
105
|
// modules without going through this stub is how we'd accidentally
|
|
81
106
|
// re-bundle harness internals into per-play. Keep that in mind.
|
|
82
107
|
import {
|
|
83
|
-
harnessFetchStagedFile,
|
|
84
108
|
harnessPersistCompletedSheetRows,
|
|
85
109
|
harnessPrewarmPostgresSessions,
|
|
110
|
+
harnessReadSheetDatasetRows,
|
|
111
|
+
harnessReadStagedFileChunk,
|
|
86
112
|
harnessStartSheetDataset,
|
|
87
113
|
setHarnessBinding,
|
|
88
114
|
} from '../../../sdk/src/plays/harness-stub';
|
|
@@ -115,12 +141,14 @@ type RunRequest = {
|
|
|
115
141
|
runtimeInput: Record<string, unknown>;
|
|
116
142
|
/** Optional inline CSV rows (for plays where ctx.csv was passed inline data). */
|
|
117
143
|
inlineCsv?: { name: string; rows: Record<string, unknown>[] } | null;
|
|
118
|
-
/**
|
|
119
|
-
|
|
144
|
+
/** Staged input files keyed by logical filename (used by ctx.csv). */
|
|
145
|
+
inputFiles?: Record<string, WorkerFileRef> | null;
|
|
120
146
|
/** Files packaged with the play artifact (relative-path imports). */
|
|
121
147
|
packagedFiles?: Array<{
|
|
122
148
|
playPath: string;
|
|
123
149
|
storageKey: string;
|
|
150
|
+
contentType?: string | null;
|
|
151
|
+
bytes?: number | null;
|
|
124
152
|
}> | null;
|
|
125
153
|
/** Partition fan-out: only process rows[start..end) of a sliced dataset. */
|
|
126
154
|
partitionRange?: { start: number; end: number } | null;
|
|
@@ -148,6 +176,14 @@ type RunRequest = {
|
|
|
148
176
|
totalRows?: number;
|
|
149
177
|
};
|
|
150
178
|
|
|
179
|
+
type WorkerFileRef = {
|
|
180
|
+
logicalPath: string;
|
|
181
|
+
fileName: string;
|
|
182
|
+
storageKey: string;
|
|
183
|
+
contentType?: string | null;
|
|
184
|
+
bytes?: number | null;
|
|
185
|
+
};
|
|
186
|
+
|
|
151
187
|
const EXECUTE_TOOL_METADATA_HEADER = 'x-deepline-include-tool-metadata';
|
|
152
188
|
|
|
153
189
|
/** R2 binding injected by the Worker runtime (when present in deploy metadata). */
|
|
@@ -315,6 +351,7 @@ async function probeHarnessOnce(
|
|
|
315
351
|
*/
|
|
316
352
|
const RUNTIME_API_TIMEOUT_MS = 30_000;
|
|
317
353
|
const RUNTIME_API_PLAY_RUN_TIMEOUT_MS = 75_000;
|
|
354
|
+
const RUNTIME_API_RETRY_DELAYS_MS = [250, 750, 1500] as const;
|
|
318
355
|
let loggedMissingRuntimeApiBinding = false;
|
|
319
356
|
|
|
320
357
|
async function fetchRuntimeApi(
|
|
@@ -383,132 +420,6 @@ const WORKER_PLAY_CALL_LIMITS = {
|
|
|
383
420
|
maxConcurrentPlayCalls: 16,
|
|
384
421
|
};
|
|
385
422
|
|
|
386
|
-
/**
|
|
387
|
-
* Produces a dataset-envelope-shaped object compatible with the legacy
|
|
388
|
-
* SerializedPlayDataset shape (kind/datasetKind/count/columns/preview) so
|
|
389
|
-
* tests + assertions that probe `result.rows.columns` etc. work without the
|
|
390
|
-
* ctx changing semantics. Plays still iterate rows via array semantics.
|
|
391
|
-
*/
|
|
392
|
-
function makeWorkerDataset<T extends Record<string, unknown>>(
|
|
393
|
-
name: string,
|
|
394
|
-
rows: T[],
|
|
395
|
-
options?: {
|
|
396
|
-
count?: number;
|
|
397
|
-
datasetKind?: 'csv' | 'map';
|
|
398
|
-
cacheSummary?: string | null;
|
|
399
|
-
workProgress?: {
|
|
400
|
-
total: number;
|
|
401
|
-
executed: number;
|
|
402
|
-
reused: number;
|
|
403
|
-
skipped: number;
|
|
404
|
-
pending: number;
|
|
405
|
-
failed: number;
|
|
406
|
-
degraded?: boolean;
|
|
407
|
-
duplicates?: {
|
|
408
|
-
exact?: number;
|
|
409
|
-
semantic?: number;
|
|
410
|
-
rejected?: number;
|
|
411
|
-
};
|
|
412
|
-
};
|
|
413
|
-
},
|
|
414
|
-
): T[] & {
|
|
415
|
-
count(): Promise<number>;
|
|
416
|
-
peek(limit?: number): Promise<T[]>;
|
|
417
|
-
materialize(limit?: number): Promise<T[]>;
|
|
418
|
-
toJSON(): unknown;
|
|
419
|
-
datasetId: string;
|
|
420
|
-
tableNamespace: string;
|
|
421
|
-
} {
|
|
422
|
-
const datasetId = `map:${name}`;
|
|
423
|
-
const count = Math.max(0, Math.floor(options?.count ?? rows.length));
|
|
424
|
-
const datasetKind = options?.datasetKind ?? 'map';
|
|
425
|
-
const cacheSummary = options?.cacheSummary ?? null;
|
|
426
|
-
const workProgress = options?.workProgress;
|
|
427
|
-
// Build the array result. JSON.stringify on arrays calls toJSON only if
|
|
428
|
-
// present on the array itself — we attach below. The dataset metadata is
|
|
429
|
-
// also exposed via own properties so plays can `enriched.count()` etc.
|
|
430
|
-
const arr = rows as T[] & {
|
|
431
|
-
count(): Promise<number>;
|
|
432
|
-
peek(limit?: number): Promise<T[]>;
|
|
433
|
-
materialize(limit?: number): Promise<T[]>;
|
|
434
|
-
toJSON(): unknown;
|
|
435
|
-
datasetId: string;
|
|
436
|
-
tableNamespace: string;
|
|
437
|
-
};
|
|
438
|
-
const previewLimit = 5;
|
|
439
|
-
const inferredColumns = (() => {
|
|
440
|
-
const cols = new Set<string>();
|
|
441
|
-
for (const r of rows) {
|
|
442
|
-
for (const k of Object.keys(r)) cols.add(k);
|
|
443
|
-
}
|
|
444
|
-
return [...cols];
|
|
445
|
-
})();
|
|
446
|
-
Object.defineProperty(arr, 'count', {
|
|
447
|
-
value: async () => count,
|
|
448
|
-
enumerable: false,
|
|
449
|
-
});
|
|
450
|
-
Object.defineProperty(arr, 'peek', {
|
|
451
|
-
value: async (limit = previewLimit) => rows.slice(0, Math.max(0, limit)),
|
|
452
|
-
enumerable: false,
|
|
453
|
-
});
|
|
454
|
-
Object.defineProperty(arr, 'materialize', {
|
|
455
|
-
value: async (limit?: number) =>
|
|
456
|
-
limit === undefined ? [...rows] : rows.slice(0, Math.max(0, limit)),
|
|
457
|
-
enumerable: false,
|
|
458
|
-
});
|
|
459
|
-
Object.defineProperty(arr, 'datasetId', {
|
|
460
|
-
value: datasetId,
|
|
461
|
-
enumerable: true,
|
|
462
|
-
});
|
|
463
|
-
Object.defineProperty(arr, 'tableNamespace', {
|
|
464
|
-
value: name,
|
|
465
|
-
enumerable: true,
|
|
466
|
-
});
|
|
467
|
-
Object.defineProperty(arr, '__deeplineDatasetCount', {
|
|
468
|
-
value: count,
|
|
469
|
-
enumerable: false,
|
|
470
|
-
});
|
|
471
|
-
Object.defineProperty(arr, '__deeplineDatasetKind', {
|
|
472
|
-
value: datasetKind,
|
|
473
|
-
enumerable: false,
|
|
474
|
-
});
|
|
475
|
-
Object.defineProperty(arr, '__deeplineCacheSummary', {
|
|
476
|
-
value: cacheSummary,
|
|
477
|
-
enumerable: false,
|
|
478
|
-
});
|
|
479
|
-
Object.defineProperty(arr, '__deeplineWorkProgress', {
|
|
480
|
-
value: workProgress,
|
|
481
|
-
enumerable: false,
|
|
482
|
-
});
|
|
483
|
-
// Plays often `return { rows: dataset, count: N }`. JSON.stringify on the
|
|
484
|
-
// array would normally produce `[row, row, ...]` — we want the dataset
|
|
485
|
-
// envelope shape instead so assertions seeing `result.rows.columns` pass.
|
|
486
|
-
// toJSON on an array is honored by JSON.stringify per ES spec.
|
|
487
|
-
// toJSON includes ALL rows so the workflow DO can persist the full
|
|
488
|
-
// dataset to the sheet table. We clone via plain-object copy to avoid
|
|
489
|
-
// re-entrant toJSON resolution (the dataset IS an array; passing it back
|
|
490
|
-
// via `preview: arr` would recurse forever through this same toJSON).
|
|
491
|
-
Object.defineProperty(arr, 'toJSON', {
|
|
492
|
-
value: () => {
|
|
493
|
-
const plainRows = rows.map((r) => ({ ...r }));
|
|
494
|
-
return {
|
|
495
|
-
kind: 'dataset' as const,
|
|
496
|
-
datasetKind,
|
|
497
|
-
datasetId,
|
|
498
|
-
count,
|
|
499
|
-
columns: inferredColumns,
|
|
500
|
-
preview: plainRows,
|
|
501
|
-
tableNamespace: name,
|
|
502
|
-
...(cacheSummary ? { cacheSummary } : {}),
|
|
503
|
-
...(workProgress ? { _metadata: { workProgress } } : {}),
|
|
504
|
-
};
|
|
505
|
-
},
|
|
506
|
-
enumerable: false,
|
|
507
|
-
});
|
|
508
|
-
void previewLimit;
|
|
509
|
-
return arr;
|
|
510
|
-
}
|
|
511
|
-
|
|
512
423
|
type RunnerEvent =
|
|
513
424
|
| {
|
|
514
425
|
type: 'log';
|
|
@@ -533,12 +444,53 @@ type WorkerCtxCallbacks = {
|
|
|
533
444
|
nodeId: string;
|
|
534
445
|
progress: LiveNodeProgressSnapshot;
|
|
535
446
|
}) => void;
|
|
447
|
+
onMapStarted?: (nodeId: string, at?: number) => void;
|
|
448
|
+
onMapCompleted?: (nodeId: string, at?: number) => void;
|
|
449
|
+
onToolCalled?: (toolId: string, at?: number) => void;
|
|
450
|
+
onToolFailed?: (toolId: string, at?: number) => void;
|
|
536
451
|
};
|
|
537
452
|
|
|
538
453
|
function nowMs(): number {
|
|
539
454
|
return Date.now();
|
|
540
455
|
}
|
|
541
456
|
|
|
457
|
+
function getStaticSubstepNodeId(
|
|
458
|
+
substep: PlayStaticSubstep,
|
|
459
|
+
index: number,
|
|
460
|
+
): string {
|
|
461
|
+
switch (substep.type) {
|
|
462
|
+
case 'csv':
|
|
463
|
+
return `csv:${substep.field || index}`;
|
|
464
|
+
case 'map':
|
|
465
|
+
return `map:${substep.tableNamespace ?? substep.field}`;
|
|
466
|
+
case 'tool':
|
|
467
|
+
return `tool:${substep.field}:${substep.toolId}`;
|
|
468
|
+
case 'waterfall':
|
|
469
|
+
return `waterfall:${substep.id ?? substep.field}`;
|
|
470
|
+
case 'play_call':
|
|
471
|
+
return `play_call:${substep.field}:${substep.playId}`;
|
|
472
|
+
case 'run_javascript':
|
|
473
|
+
return `run_javascript:${substep.alias}`;
|
|
474
|
+
case 'code':
|
|
475
|
+
return `code:${substep.field || index}`;
|
|
476
|
+
default:
|
|
477
|
+
return `node:${index}`;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
function buildOrderedNodeList(
|
|
482
|
+
contractSnapshot: unknown,
|
|
483
|
+
): Array<{ nodeId: string; type: string }> {
|
|
484
|
+
const snapshot = isRecord(contractSnapshot) ? contractSnapshot : null;
|
|
485
|
+
const substeps = getTopLevelPipelineSubsteps(
|
|
486
|
+
(snapshot?.staticPipeline as PlayStaticPipeline | null | undefined) ?? null,
|
|
487
|
+
);
|
|
488
|
+
return substeps.map((substep, index) => ({
|
|
489
|
+
nodeId: getStaticSubstepNodeId(substep, index),
|
|
490
|
+
type: substep.type,
|
|
491
|
+
}));
|
|
492
|
+
}
|
|
493
|
+
|
|
542
494
|
function recordRunnerPerfTrace(input: {
|
|
543
495
|
req: RunRequest;
|
|
544
496
|
phase: string;
|
|
@@ -557,7 +509,7 @@ function recordRunnerPerfTrace(input: {
|
|
|
557
509
|
source: 'dynamic_worker' as const,
|
|
558
510
|
runId: input.req.runId,
|
|
559
511
|
phase: `runner.${input.phase}`,
|
|
560
|
-
|
|
512
|
+
ms: input.ms ?? 0,
|
|
561
513
|
...(input.extra ?? {}),
|
|
562
514
|
};
|
|
563
515
|
console.log(
|
|
@@ -614,44 +566,73 @@ async function postRuntimeApi<T>(
|
|
|
614
566
|
// Routes through the in-process RUNTIME_API binding when present; otherwise
|
|
615
567
|
// falls back to a public fetch against `${baseUrl}${path}`. Either path
|
|
616
568
|
// hits the same handler with the same auth — only the transport changes.
|
|
617
|
-
const
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
'
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
569
|
+
const serializedBody = JSON.stringify(body);
|
|
570
|
+
let lastError: unknown = null;
|
|
571
|
+
for (let attempt = 0; attempt <= RUNTIME_API_RETRY_DELAYS_MS.length; attempt += 1) {
|
|
572
|
+
let res: Response;
|
|
573
|
+
try {
|
|
574
|
+
res = await fetchRuntimeApi(baseUrl, '/api/v2/plays/internal/runtime', {
|
|
575
|
+
method: 'POST',
|
|
576
|
+
headers: {
|
|
577
|
+
'content-type': 'application/json',
|
|
578
|
+
authorization: `Bearer ${executorToken}`,
|
|
579
|
+
'x-deepline-request-id': makeRequestId(),
|
|
580
|
+
},
|
|
581
|
+
body: serializedBody,
|
|
582
|
+
});
|
|
583
|
+
} catch (error) {
|
|
584
|
+
lastError = error;
|
|
585
|
+
if (
|
|
586
|
+
attempt >= RUNTIME_API_RETRY_DELAYS_MS.length ||
|
|
587
|
+
!isRetryableRuntimeApiError(error)
|
|
588
|
+
) {
|
|
589
|
+
throw error;
|
|
590
|
+
}
|
|
591
|
+
await sleepRuntimeApiRetry(attempt);
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
if (res.ok) {
|
|
596
|
+
return (await res.json()) as T;
|
|
597
|
+
}
|
|
598
|
+
|
|
627
599
|
const text = await res.text().catch(() => '');
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
600
|
+
const redacted = redactSecretsFromLogString(text.slice(0, 500));
|
|
601
|
+
lastError = new Error(`runtime API ${res.status}: ${redacted}`);
|
|
602
|
+
if (
|
|
603
|
+
attempt >= RUNTIME_API_RETRY_DELAYS_MS.length ||
|
|
604
|
+
!isRetryableRuntimeApiResponse(res.status, text)
|
|
605
|
+
) {
|
|
606
|
+
throw lastError;
|
|
607
|
+
}
|
|
608
|
+
await sleepRuntimeApiRetry(attempt);
|
|
631
609
|
}
|
|
632
|
-
|
|
610
|
+
throw lastError instanceof Error ? lastError : new Error(String(lastError));
|
|
633
611
|
}
|
|
634
612
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
)
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
'x-deepline-request-id': makeRequestId(),
|
|
646
|
-
},
|
|
647
|
-
body: JSON.stringify(body),
|
|
648
|
-
});
|
|
649
|
-
if (!res.ok) {
|
|
650
|
-
const text = await res.text().catch(() => '');
|
|
651
|
-
throw new Error(
|
|
652
|
-
`Deepline API ${path} ${res.status}: ${redactSecretsFromLogString(text.slice(0, 500))}`,
|
|
653
|
-
);
|
|
613
|
+
function isRetryableRuntimeApiError(error: unknown): boolean {
|
|
614
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
615
|
+
return /timed out|timeout|fetch failed|ECONNRESET|ECONNREFUSED|UND_ERR_CONNECT_TIMEOUT/i.test(
|
|
616
|
+
message,
|
|
617
|
+
);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
function isRetryableRuntimeApiResponse(status: number, body: string): boolean {
|
|
621
|
+
if (status === 408 || status === 429 || status === 502 || status === 503 || status === 504) {
|
|
622
|
+
return true;
|
|
654
623
|
}
|
|
624
|
+
return (
|
|
625
|
+
status === 500 &&
|
|
626
|
+
/timeout exceeded when trying to connect|timed out|fetch failed|ECONNRESET|UND_ERR_CONNECT_TIMEOUT/i.test(
|
|
627
|
+
body,
|
|
628
|
+
)
|
|
629
|
+
);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
async function sleepRuntimeApiRetry(attempt: number): Promise<void> {
|
|
633
|
+
await new Promise((resolve) =>
|
|
634
|
+
setTimeout(resolve, RUNTIME_API_RETRY_DELAYS_MS[attempt] ?? 0),
|
|
635
|
+
);
|
|
655
636
|
}
|
|
656
637
|
|
|
657
638
|
function describeRuntimeApiBody(body: unknown): string {
|
|
@@ -973,6 +954,21 @@ async function executeTool(
|
|
|
973
954
|
return callToolDirect(req, args);
|
|
974
955
|
}
|
|
975
956
|
|
|
957
|
+
async function executeToolWithLifecycle(
|
|
958
|
+
req: RunRequest,
|
|
959
|
+
args: { id: string; toolId: string; input: Record<string, unknown> },
|
|
960
|
+
workflowStep: WorkflowStep | undefined,
|
|
961
|
+
callbacks: WorkerCtxCallbacks | undefined,
|
|
962
|
+
): Promise<ToolExecuteResult> {
|
|
963
|
+
callbacks?.onToolCalled?.(args.toolId, nowMs());
|
|
964
|
+
try {
|
|
965
|
+
return await executeTool(req, args, workflowStep);
|
|
966
|
+
} catch (error) {
|
|
967
|
+
callbacks?.onToolFailed?.(args.toolId, nowMs());
|
|
968
|
+
throw error;
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
|
|
976
972
|
function isToolExecuteRecord(value: unknown): value is Record<string, unknown> {
|
|
977
973
|
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
978
974
|
}
|
|
@@ -1032,14 +1028,19 @@ async function waitForSyntheticIntegrationEvent(
|
|
|
1032
1028
|
? Math.max(1, Math.round(input.timeout_ms))
|
|
1033
1029
|
: 30_000;
|
|
1034
1030
|
await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
|
|
1035
|
-
action: '
|
|
1031
|
+
action: 'append_run_events',
|
|
1036
1032
|
playId: req.runId,
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1033
|
+
events: [
|
|
1034
|
+
{
|
|
1035
|
+
type: 'log.appended',
|
|
1036
|
+
runId: req.runId,
|
|
1037
|
+
source: 'worker',
|
|
1038
|
+
occurredAt: nowMs(),
|
|
1039
|
+
lines: [
|
|
1040
|
+
`Waiting for integration_event:${eventKey} for up to ${timeoutMs}ms.`,
|
|
1041
|
+
],
|
|
1042
|
+
} satisfies PlayRunLedgerEvent,
|
|
1043
|
+
],
|
|
1043
1044
|
});
|
|
1044
1045
|
try {
|
|
1045
1046
|
const event = (await (
|
|
@@ -1751,6 +1752,7 @@ type WorkerMapChunkSummary<T extends Record<string, unknown>> = {
|
|
|
1751
1752
|
outputDatasetId: string;
|
|
1752
1753
|
hash: string;
|
|
1753
1754
|
preview: T[];
|
|
1755
|
+
cachedRows?: T[];
|
|
1754
1756
|
};
|
|
1755
1757
|
|
|
1756
1758
|
function toWorkflowSerializableValue<T>(value: T): T {
|
|
@@ -1932,6 +1934,8 @@ async function executeWorkerWaterfall(
|
|
|
1932
1934
|
toolNameOrSpec: string | WorkerInlineWaterfallSpec,
|
|
1933
1935
|
input: Record<string, unknown>,
|
|
1934
1936
|
opts?: WorkerWaterfallOptions,
|
|
1937
|
+
callbacks?: WorkerCtxCallbacks,
|
|
1938
|
+
workflowStep?: WorkflowStep,
|
|
1935
1939
|
): Promise<unknown | null> {
|
|
1936
1940
|
// Inline-spec form
|
|
1937
1941
|
if (typeof toolNameOrSpec === 'object' && toolNameOrSpec) {
|
|
@@ -1948,20 +1952,32 @@ async function executeWorkerWaterfall(
|
|
|
1948
1952
|
toolId?: unknown,
|
|
1949
1953
|
toolInput?: unknown,
|
|
1950
1954
|
) =>
|
|
1951
|
-
await
|
|
1955
|
+
await executeToolWithLifecycle(
|
|
1952
1956
|
req,
|
|
1953
1957
|
normalizeToolExecuteArgs(requestOrKey, toolId, toolInput),
|
|
1958
|
+
workflowStep,
|
|
1959
|
+
callbacks,
|
|
1954
1960
|
),
|
|
1955
1961
|
},
|
|
1956
1962
|
tool: async (key, toolId, toolInput) =>
|
|
1957
|
-
await
|
|
1963
|
+
await executeToolWithLifecycle(
|
|
1964
|
+
req,
|
|
1965
|
+
{ id: key, toolId, input: toolInput },
|
|
1966
|
+
workflowStep,
|
|
1967
|
+
callbacks,
|
|
1968
|
+
),
|
|
1958
1969
|
});
|
|
1959
1970
|
} else {
|
|
1960
|
-
result = await
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1971
|
+
result = await executeToolWithLifecycle(
|
|
1972
|
+
req,
|
|
1973
|
+
{
|
|
1974
|
+
id: step.id,
|
|
1975
|
+
toolId: step.toolId,
|
|
1976
|
+
input: step.mapInput(input),
|
|
1977
|
+
},
|
|
1978
|
+
workflowStep,
|
|
1979
|
+
callbacks,
|
|
1980
|
+
);
|
|
1965
1981
|
}
|
|
1966
1982
|
} catch {
|
|
1967
1983
|
continue;
|
|
@@ -2047,7 +2063,12 @@ async function executeWorkerWaterfall(
|
|
|
2047
2063
|
const providers = opts?.providers ?? [];
|
|
2048
2064
|
if (providers.length === 0) {
|
|
2049
2065
|
try {
|
|
2050
|
-
return await
|
|
2066
|
+
return await executeToolWithLifecycle(
|
|
2067
|
+
req,
|
|
2068
|
+
{ id: toolName, toolId: toolName, input },
|
|
2069
|
+
workflowStep,
|
|
2070
|
+
callbacks,
|
|
2071
|
+
);
|
|
2051
2072
|
} catch {
|
|
2052
2073
|
return null;
|
|
2053
2074
|
}
|
|
@@ -2055,11 +2076,16 @@ async function executeWorkerWaterfall(
|
|
|
2055
2076
|
let lastError: Error | null = null;
|
|
2056
2077
|
for (const provider of providers) {
|
|
2057
2078
|
try {
|
|
2058
|
-
const result = await
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2079
|
+
const result = await executeToolWithLifecycle(
|
|
2080
|
+
req,
|
|
2081
|
+
{
|
|
2082
|
+
id: `${toolName}:${provider}`,
|
|
2083
|
+
toolId: toolName,
|
|
2084
|
+
input: { ...input, provider },
|
|
2085
|
+
},
|
|
2086
|
+
workflowStep,
|
|
2087
|
+
callbacks,
|
|
2088
|
+
);
|
|
2063
2089
|
if (resultHasContent(result)) {
|
|
2064
2090
|
recorder.push({
|
|
2065
2091
|
waterfallId: toolName,
|
|
@@ -2114,6 +2140,118 @@ function makeCsvParserState(): CsvParserState {
|
|
|
2114
2140
|
return { field: '', row: [], inQuotes: false, pendingCr: false };
|
|
2115
2141
|
}
|
|
2116
2142
|
|
|
2143
|
+
function normalizeExpectedBytes(value: unknown): number | null {
|
|
2144
|
+
return typeof value === 'number' && Number.isSafeInteger(value) && value >= 0
|
|
2145
|
+
? value
|
|
2146
|
+
: null;
|
|
2147
|
+
}
|
|
2148
|
+
|
|
2149
|
+
function hasByteLengthMismatch(
|
|
2150
|
+
expectedBytes: number | null | undefined,
|
|
2151
|
+
actualBytes: number | null | undefined,
|
|
2152
|
+
): boolean {
|
|
2153
|
+
return (
|
|
2154
|
+
typeof expectedBytes === 'number' &&
|
|
2155
|
+
typeof actualBytes === 'number' &&
|
|
2156
|
+
actualBytes !== expectedBytes
|
|
2157
|
+
);
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
async function* iterReadableStreamChunks(
|
|
2161
|
+
body: ReadableStream<Uint8Array>,
|
|
2162
|
+
): AsyncGenerator<Uint8Array, void, void> {
|
|
2163
|
+
const reader = body.getReader();
|
|
2164
|
+
try {
|
|
2165
|
+
while (true) {
|
|
2166
|
+
const { done, value } = await reader.read();
|
|
2167
|
+
if (done) return;
|
|
2168
|
+
if (value && value.byteLength > 0) yield value;
|
|
2169
|
+
}
|
|
2170
|
+
} finally {
|
|
2171
|
+
reader.releaseLock();
|
|
2172
|
+
}
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
function singleByteChunk(bytes: Uint8Array): AsyncIterable<Uint8Array> {
|
|
2176
|
+
return {
|
|
2177
|
+
async *[Symbol.asyncIterator]() {
|
|
2178
|
+
if (bytes.byteLength > 0) yield bytes;
|
|
2179
|
+
},
|
|
2180
|
+
};
|
|
2181
|
+
}
|
|
2182
|
+
|
|
2183
|
+
async function* guardExpectedByteChunks(input: {
|
|
2184
|
+
req: RunRequest;
|
|
2185
|
+
logicalPath: string;
|
|
2186
|
+
storageKey: string;
|
|
2187
|
+
source: string;
|
|
2188
|
+
chunks: AsyncIterable<Uint8Array>;
|
|
2189
|
+
expectedBytes?: number | null;
|
|
2190
|
+
reportedBytes?: number | null;
|
|
2191
|
+
fallback?: () => AsyncIterable<Uint8Array>;
|
|
2192
|
+
}): AsyncGenerator<Uint8Array, void, void> {
|
|
2193
|
+
const expectedBytes =
|
|
2194
|
+
normalizeExpectedBytes(input.expectedBytes) ??
|
|
2195
|
+
normalizeExpectedBytes(input.reportedBytes);
|
|
2196
|
+
let bytesRead = 0;
|
|
2197
|
+
let sawChunk = false;
|
|
2198
|
+
let skippedEmptyChunks = 0;
|
|
2199
|
+
|
|
2200
|
+
for await (const value of input.chunks) {
|
|
2201
|
+
if (!value || value.byteLength === 0) {
|
|
2202
|
+
skippedEmptyChunks += 1;
|
|
2203
|
+
continue;
|
|
2204
|
+
}
|
|
2205
|
+
sawChunk = true;
|
|
2206
|
+
bytesRead += value.byteLength;
|
|
2207
|
+
yield value;
|
|
2208
|
+
}
|
|
2209
|
+
|
|
2210
|
+
if (!sawChunk) {
|
|
2211
|
+
if (typeof expectedBytes === 'number' && expectedBytes > 0) {
|
|
2212
|
+
recordRunnerPerfTrace({
|
|
2213
|
+
req: input.req,
|
|
2214
|
+
phase: 'csv.open_empty_body',
|
|
2215
|
+
extra: {
|
|
2216
|
+
source: input.source,
|
|
2217
|
+
logicalPath: input.logicalPath,
|
|
2218
|
+
expectedBytes,
|
|
2219
|
+
reportedBytes: normalizeExpectedBytes(input.reportedBytes),
|
|
2220
|
+
skippedEmptyChunks,
|
|
2221
|
+
storageKey: input.storageKey,
|
|
2222
|
+
},
|
|
2223
|
+
});
|
|
2224
|
+
if (input.fallback) {
|
|
2225
|
+
yield* input.fallback();
|
|
2226
|
+
return;
|
|
2227
|
+
}
|
|
2228
|
+
throw new Error(
|
|
2229
|
+
`ctx.csv("${input.logicalPath}"): ${input.source} returned an empty body for ` +
|
|
2230
|
+
`${expectedBytes} byte staged file ${input.storageKey}.`,
|
|
2231
|
+
);
|
|
2232
|
+
}
|
|
2233
|
+
return;
|
|
2234
|
+
}
|
|
2235
|
+
|
|
2236
|
+
if (expectedBytes !== null && bytesRead !== expectedBytes) {
|
|
2237
|
+
recordRunnerPerfTrace({
|
|
2238
|
+
req: input.req,
|
|
2239
|
+
phase: 'csv.read_mismatch',
|
|
2240
|
+
extra: {
|
|
2241
|
+
source: input.source,
|
|
2242
|
+
logicalPath: input.logicalPath,
|
|
2243
|
+
expectedBytes,
|
|
2244
|
+
actualBytes: bytesRead,
|
|
2245
|
+
storageKey: input.storageKey,
|
|
2246
|
+
},
|
|
2247
|
+
});
|
|
2248
|
+
throw new Error(
|
|
2249
|
+
`ctx.csv("${input.logicalPath}"): ${input.source} streamed ${bytesRead} bytes ` +
|
|
2250
|
+
`for ${expectedBytes} byte staged file ${input.storageKey}.`,
|
|
2251
|
+
);
|
|
2252
|
+
}
|
|
2253
|
+
}
|
|
2254
|
+
|
|
2117
2255
|
/**
|
|
2118
2256
|
* Push one buffered text chunk through the CSV state machine. Accumulates
|
|
2119
2257
|
* fully-terminated rows into `out`; partial trailing field/row stays in
|
|
@@ -2182,11 +2320,10 @@ function flushCsvParser(state: CsvParserState, out: string[][]): void {
|
|
|
2182
2320
|
* to every subsequent row. Stops cleanly on stream end and flushes any
|
|
2183
2321
|
* trailing row.
|
|
2184
2322
|
*/
|
|
2185
|
-
async function*
|
|
2186
|
-
|
|
2323
|
+
async function* streamCsvRowsFromByteChunks<T extends Record<string, unknown>>(
|
|
2324
|
+
byteChunks: AsyncIterable<Uint8Array>,
|
|
2187
2325
|
chunkSize: number,
|
|
2188
2326
|
): AsyncGenerator<T[], void, void> {
|
|
2189
|
-
const reader = body.getReader();
|
|
2190
2327
|
const decoder = new TextDecoder('utf-8');
|
|
2191
2328
|
const state = makeCsvParserState();
|
|
2192
2329
|
const physicalRowBuffer: string[][] = [];
|
|
@@ -2195,7 +2332,13 @@ async function* streamCsvRowsFromBody<T extends Record<string, unknown>>(
|
|
|
2195
2332
|
|
|
2196
2333
|
const flushPhysicalRowsAsObjects = (terminal: boolean): T[][] => {
|
|
2197
2334
|
const yielded: T[][] = [];
|
|
2198
|
-
if (physicalRowBuffer.length === 0)
|
|
2335
|
+
if (physicalRowBuffer.length === 0) {
|
|
2336
|
+
if (terminal && pendingChunk.length > 0) {
|
|
2337
|
+
yielded.push(pendingChunk);
|
|
2338
|
+
pendingChunk = [];
|
|
2339
|
+
}
|
|
2340
|
+
return yielded;
|
|
2341
|
+
}
|
|
2199
2342
|
if (!headers) {
|
|
2200
2343
|
headers = physicalRowBuffer.shift() ?? null;
|
|
2201
2344
|
if (!headers) return yielded;
|
|
@@ -2219,224 +2362,237 @@ async function* streamCsvRowsFromBody<T extends Record<string, unknown>>(
|
|
|
2219
2362
|
return yielded;
|
|
2220
2363
|
};
|
|
2221
2364
|
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
pushCsvTextIntoParser(state, text, physicalRowBuffer);
|
|
2232
|
-
for (const chunk of flushPhysicalRowsAsObjects(false)) yield chunk;
|
|
2233
|
-
}
|
|
2234
|
-
} finally {
|
|
2235
|
-
reader.releaseLock();
|
|
2365
|
+
for await (const value of byteChunks) {
|
|
2366
|
+
if (value.byteLength === 0) continue;
|
|
2367
|
+
const text = decoder.decode(value, { stream: true });
|
|
2368
|
+
pushCsvTextIntoParser(state, text, physicalRowBuffer);
|
|
2369
|
+
for (const chunk of flushPhysicalRowsAsObjects(false)) yield chunk;
|
|
2370
|
+
}
|
|
2371
|
+
const trailingText = decoder.decode();
|
|
2372
|
+
if (trailingText) {
|
|
2373
|
+
pushCsvTextIntoParser(state, trailingText, physicalRowBuffer);
|
|
2236
2374
|
}
|
|
2375
|
+
flushCsvParser(state, physicalRowBuffer);
|
|
2376
|
+
for (const chunk of flushPhysicalRowsAsObjects(true)) yield chunk;
|
|
2237
2377
|
void TARGET_CSV_DECODE_CHUNK_BYTES; // referenced for future tuning
|
|
2238
2378
|
}
|
|
2239
2379
|
|
|
2380
|
+
function readHarnessStagedFileChunks(input: {
|
|
2381
|
+
req: RunRequest;
|
|
2382
|
+
logicalPath: string;
|
|
2383
|
+
storageKey: string;
|
|
2384
|
+
expectedBytes?: number | null;
|
|
2385
|
+
}): AsyncIterable<Uint8Array> {
|
|
2386
|
+
const expectedBytes = normalizeExpectedBytes(input.expectedBytes);
|
|
2387
|
+
return {
|
|
2388
|
+
async *[Symbol.asyncIterator]() {
|
|
2389
|
+
let offset = 0;
|
|
2390
|
+
let observedBytes = 0;
|
|
2391
|
+
let objectSize: number | null = null;
|
|
2392
|
+
let loggedOpen = false;
|
|
2393
|
+
let done = false;
|
|
2394
|
+
|
|
2395
|
+
while (!done) {
|
|
2396
|
+
const result = await harnessReadStagedFileChunk({
|
|
2397
|
+
storageKey: input.storageKey,
|
|
2398
|
+
offset,
|
|
2399
|
+
length: TARGET_CSV_DECODE_CHUNK_BYTES,
|
|
2400
|
+
});
|
|
2401
|
+
const actualObjectSize = normalizeExpectedBytes(result.objectSize);
|
|
2402
|
+
if (actualObjectSize === null) {
|
|
2403
|
+
throw new Error(
|
|
2404
|
+
`ctx.csv("${input.logicalPath}"): harness returned an invalid object size for ${input.storageKey}.`,
|
|
2405
|
+
);
|
|
2406
|
+
}
|
|
2407
|
+
if (objectSize !== null && objectSize !== actualObjectSize) {
|
|
2408
|
+
throw new Error(
|
|
2409
|
+
`ctx.csv("${input.logicalPath}"): staged file size changed while streaming ` +
|
|
2410
|
+
`${input.storageKey}; started at ${objectSize} bytes, now ${actualObjectSize}.`,
|
|
2411
|
+
);
|
|
2412
|
+
}
|
|
2413
|
+
objectSize = actualObjectSize;
|
|
2414
|
+
if (hasByteLengthMismatch(expectedBytes, actualObjectSize)) {
|
|
2415
|
+
throw new Error(
|
|
2416
|
+
`ctx.csv("${input.logicalPath}"): harness staged file size mismatch for ` +
|
|
2417
|
+
`storageKey=${input.storageKey}; expected ${expectedBytes} bytes, got ${actualObjectSize}.`,
|
|
2418
|
+
);
|
|
2419
|
+
}
|
|
2420
|
+
if (result.offset !== offset) {
|
|
2421
|
+
throw new Error(
|
|
2422
|
+
`ctx.csv("${input.logicalPath}"): harness returned offset ${result.offset} while ` +
|
|
2423
|
+
`reading offset ${offset} from ${input.storageKey}.`,
|
|
2424
|
+
);
|
|
2425
|
+
}
|
|
2426
|
+
|
|
2427
|
+
const chunk = result.chunk;
|
|
2428
|
+
if (!(chunk instanceof Uint8Array)) {
|
|
2429
|
+
throw new Error(
|
|
2430
|
+
`ctx.csv("${input.logicalPath}"): harness returned a non-byte chunk for ${input.storageKey}.`,
|
|
2431
|
+
);
|
|
2432
|
+
}
|
|
2433
|
+
if (chunk.byteLength !== result.bytesRead) {
|
|
2434
|
+
throw new Error(
|
|
2435
|
+
`ctx.csv("${input.logicalPath}"): harness chunk metadata mismatch for ` +
|
|
2436
|
+
`${input.storageKey}; bytesRead=${result.bytesRead}, chunk=${chunk.byteLength}.`,
|
|
2437
|
+
);
|
|
2438
|
+
}
|
|
2439
|
+
if (chunk.byteLength === 0 && !result.done) {
|
|
2440
|
+
throw new Error(
|
|
2441
|
+
`ctx.csv("${input.logicalPath}"): harness returned an empty non-terminal chunk for ${input.storageKey}.`,
|
|
2442
|
+
);
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
if (!loggedOpen) {
|
|
2446
|
+
loggedOpen = true;
|
|
2447
|
+
recordRunnerPerfTrace({
|
|
2448
|
+
req: input.req,
|
|
2449
|
+
phase: 'csv.open',
|
|
2450
|
+
extra: {
|
|
2451
|
+
source: 'harness_rpc_range',
|
|
2452
|
+
logicalPath: input.logicalPath,
|
|
2453
|
+
expectedBytes,
|
|
2454
|
+
actualBytes: actualObjectSize,
|
|
2455
|
+
chunkBytes: TARGET_CSV_DECODE_CHUNK_BYTES,
|
|
2456
|
+
storageKey: input.storageKey,
|
|
2457
|
+
},
|
|
2458
|
+
});
|
|
2459
|
+
}
|
|
2460
|
+
|
|
2461
|
+
offset += chunk.byteLength;
|
|
2462
|
+
observedBytes += chunk.byteLength;
|
|
2463
|
+
done = result.done;
|
|
2464
|
+
if (chunk.byteLength > 0) yield chunk;
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
const requiredBytes = expectedBytes ?? objectSize;
|
|
2468
|
+
if (typeof requiredBytes === 'number' && observedBytes !== requiredBytes) {
|
|
2469
|
+
recordRunnerPerfTrace({
|
|
2470
|
+
req: input.req,
|
|
2471
|
+
phase: 'csv.read_mismatch',
|
|
2472
|
+
extra: {
|
|
2473
|
+
source: 'harness_rpc_range',
|
|
2474
|
+
logicalPath: input.logicalPath,
|
|
2475
|
+
expectedBytes: requiredBytes,
|
|
2476
|
+
actualBytes: observedBytes,
|
|
2477
|
+
storageKey: input.storageKey,
|
|
2478
|
+
},
|
|
2479
|
+
});
|
|
2480
|
+
throw new Error(
|
|
2481
|
+
`ctx.csv("${input.logicalPath}"): harness streamed ${observedBytes} bytes ` +
|
|
2482
|
+
`for ${requiredBytes} byte staged file ${input.storageKey}.`,
|
|
2483
|
+
);
|
|
2484
|
+
}
|
|
2485
|
+
},
|
|
2486
|
+
};
|
|
2487
|
+
}
|
|
2488
|
+
|
|
2240
2489
|
/**
|
|
2241
|
-
*
|
|
2490
|
+
* Dataset source adapter that returns byte chunks. Per-play Workers loaded via
|
|
2242
2491
|
* WorkerLoader cannot accept a raw R2Bucket binding (CF Workflows refuses to
|
|
2243
2492
|
* serialize R2Bucket through its workflow-state path), so per-play Workers
|
|
2244
2493
|
* stream staged files through the long-lived harness Worker service binding.
|
|
2245
2494
|
* Returns null only if the asset is genuinely missing (404).
|
|
2246
2495
|
*/
|
|
2247
|
-
async function
|
|
2496
|
+
async function openFileByteChunks(input: {
|
|
2248
2497
|
req: RunRequest;
|
|
2249
2498
|
env: WorkerEnv;
|
|
2250
2499
|
logicalPath: string;
|
|
2251
|
-
|
|
2252
|
-
}): Promise<
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
return object.body;
|
|
2257
|
-
}
|
|
2258
|
-
}
|
|
2259
|
-
if (input.env.PLAY_ASSETS) {
|
|
2260
|
-
try {
|
|
2261
|
-
const text = await input.env.PLAY_ASSETS.readText(input.logicalPath);
|
|
2262
|
-
const bytes = new TextEncoder().encode(text);
|
|
2263
|
-
return new ReadableStream<Uint8Array>({
|
|
2264
|
-
start(controller) {
|
|
2265
|
-
controller.enqueue(bytes);
|
|
2266
|
-
controller.close();
|
|
2267
|
-
},
|
|
2268
|
-
});
|
|
2269
|
-
} catch (error) {
|
|
2270
|
-
if (!/missing from R2|not found|No such object/i.test(String(error))) {
|
|
2271
|
-
throw error;
|
|
2272
|
-
}
|
|
2273
|
-
}
|
|
2274
|
-
}
|
|
2275
|
-
|
|
2276
|
-
// The harness fetch path returns a real Response body backed by R2.
|
|
2277
|
-
// Errors are loud: we want CI / regression failures to surface the real
|
|
2278
|
-
// cause (auth, missing object, network) rather than getting squashed into a
|
|
2279
|
-
// generic "R2 asset is not reachable".
|
|
2280
|
-
const response = await harnessFetchStagedFile({
|
|
2281
|
-
executorToken: input.req.executorToken,
|
|
2282
|
-
storageKey: input.storageKey,
|
|
2283
|
-
});
|
|
2284
|
-
if (response.status === 404) {
|
|
2500
|
+
file: WorkerFileRef;
|
|
2501
|
+
}): Promise<AsyncIterable<Uint8Array> | null> {
|
|
2502
|
+
const storageKey = input.file.storageKey;
|
|
2503
|
+
const expectedBytes = normalizeExpectedBytes(input.file.bytes);
|
|
2504
|
+
if (expectedBytes === null) {
|
|
2285
2505
|
throw new Error(
|
|
2286
|
-
`ctx.csv("${input.logicalPath}"):
|
|
2287
|
-
`The staged file is missing from R2; the upload either failed silently before the run started, ` +
|
|
2288
|
-
`or the storageKey threaded through the workflow params no longer matches what the harness resolves.`,
|
|
2506
|
+
`ctx.csv("${input.logicalPath}"): staged dataset handle is missing a byte length for ${storageKey}.`,
|
|
2289
2507
|
);
|
|
2290
2508
|
}
|
|
2291
|
-
if (
|
|
2292
|
-
const
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
if (!body) {
|
|
2337
|
-
throw new Error(
|
|
2338
|
-
`ctx.csv("${input.logicalPath}"): R2 asset is not reachable (no PLAYS_BUCKET binding and signed URL unavailable).`,
|
|
2339
|
-
);
|
|
2340
|
-
}
|
|
2341
|
-
for await (const chunk of streamCsvRowsFromBody<T>(
|
|
2342
|
-
body,
|
|
2343
|
-
Math.max(1, Math.floor(chunkSize)),
|
|
2344
|
-
)) {
|
|
2345
|
-
yield applyCsvRenameProjection(chunk, input.renameOptions) as T[];
|
|
2509
|
+
if (input.env.PLAYS_BUCKET) {
|
|
2510
|
+
const object = await input.env.PLAYS_BUCKET.get(storageKey);
|
|
2511
|
+
if (object) {
|
|
2512
|
+
if (hasByteLengthMismatch(expectedBytes, object.size)) {
|
|
2513
|
+
recordRunnerPerfTrace({
|
|
2514
|
+
req: input.req,
|
|
2515
|
+
phase: 'csv.open_mismatch',
|
|
2516
|
+
extra: {
|
|
2517
|
+
source: 'direct_r2',
|
|
2518
|
+
logicalPath: input.logicalPath,
|
|
2519
|
+
expectedBytes,
|
|
2520
|
+
actualBytes: object.size,
|
|
2521
|
+
storageKey,
|
|
2522
|
+
},
|
|
2523
|
+
});
|
|
2524
|
+
await object.body.cancel().catch(() => undefined);
|
|
2525
|
+
} else {
|
|
2526
|
+
recordRunnerPerfTrace({
|
|
2527
|
+
req: input.req,
|
|
2528
|
+
phase: 'csv.open',
|
|
2529
|
+
extra: {
|
|
2530
|
+
source: 'direct_r2',
|
|
2531
|
+
logicalPath: input.logicalPath,
|
|
2532
|
+
expectedBytes,
|
|
2533
|
+
actualBytes: object.size,
|
|
2534
|
+
storageKey,
|
|
2535
|
+
},
|
|
2536
|
+
});
|
|
2537
|
+
return guardExpectedByteChunks({
|
|
2538
|
+
req: input.req,
|
|
2539
|
+
logicalPath: input.logicalPath,
|
|
2540
|
+
storageKey,
|
|
2541
|
+
source: 'direct_r2',
|
|
2542
|
+
chunks: iterReadableStreamChunks(object.body),
|
|
2543
|
+
expectedBytes,
|
|
2544
|
+
reportedBytes: object.size,
|
|
2545
|
+
fallback: () =>
|
|
2546
|
+
readHarnessStagedFileChunks({
|
|
2547
|
+
req: input.req,
|
|
2548
|
+
logicalPath: input.logicalPath,
|
|
2549
|
+
storageKey,
|
|
2550
|
+
expectedBytes,
|
|
2551
|
+
}),
|
|
2552
|
+
});
|
|
2553
|
+
}
|
|
2346
2554
|
}
|
|
2347
2555
|
}
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
for await (const chunk of doStream(1_000)) {
|
|
2358
|
-
for (const row of chunk) yield row;
|
|
2359
|
-
}
|
|
2360
|
-
},
|
|
2361
|
-
enumerable: false,
|
|
2362
|
-
});
|
|
2363
|
-
Object.defineProperty(arr, 'count', {
|
|
2364
|
-
value: async () => {
|
|
2365
|
-
if (cachedCount !== null) return cachedCount;
|
|
2366
|
-
let total = 0;
|
|
2367
|
-
for await (const chunk of doStream(5_000)) total += chunk.length;
|
|
2368
|
-
cachedCount = total;
|
|
2369
|
-
return total;
|
|
2370
|
-
},
|
|
2371
|
-
enumerable: false,
|
|
2372
|
-
});
|
|
2373
|
-
Object.defineProperty(arr, 'peek', {
|
|
2374
|
-
value: async (limit = 10) => {
|
|
2375
|
-
const out: T[] = [];
|
|
2376
|
-
for await (const chunk of doStream(Math.max(1, limit))) {
|
|
2377
|
-
for (const row of chunk) {
|
|
2378
|
-
out.push(row);
|
|
2379
|
-
if (out.length >= limit) return out;
|
|
2380
|
-
}
|
|
2556
|
+
if (input.env.PLAY_ASSETS) {
|
|
2557
|
+
try {
|
|
2558
|
+
const text = await input.env.PLAY_ASSETS.readText(input.logicalPath);
|
|
2559
|
+
const bytes = new TextEncoder().encode(text);
|
|
2560
|
+
if (hasByteLengthMismatch(expectedBytes, bytes.byteLength)) {
|
|
2561
|
+
throw new Error(
|
|
2562
|
+
`ctx.csv("${input.logicalPath}"): packaged asset size mismatch for ` +
|
|
2563
|
+
`storageKey=${storageKey}; expected ${expectedBytes} bytes, got ${bytes.byteLength}.`,
|
|
2564
|
+
);
|
|
2381
2565
|
}
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2566
|
+
recordRunnerPerfTrace({
|
|
2567
|
+
req: input.req,
|
|
2568
|
+
phase: 'csv.open',
|
|
2569
|
+
extra: {
|
|
2570
|
+
source: 'play_assets',
|
|
2571
|
+
logicalPath: input.logicalPath,
|
|
2572
|
+
expectedBytes,
|
|
2573
|
+
actualBytes: bytes.byteLength,
|
|
2574
|
+
storageKey,
|
|
2575
|
+
},
|
|
2576
|
+
});
|
|
2577
|
+
return singleByteChunk(bytes);
|
|
2578
|
+
} catch (error) {
|
|
2579
|
+
if (!/missing from R2|not found|No such object/i.test(String(error))) {
|
|
2580
|
+
throw error;
|
|
2397
2581
|
}
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
enumerable: false,
|
|
2401
|
-
});
|
|
2402
|
-
Object.defineProperty(arr, 'datasetId', {
|
|
2403
|
-
value: datasetId,
|
|
2404
|
-
enumerable: true,
|
|
2405
|
-
});
|
|
2406
|
-
Object.defineProperty(arr, 'tableNamespace', {
|
|
2407
|
-
value: input.name,
|
|
2408
|
-
enumerable: true,
|
|
2409
|
-
});
|
|
2410
|
-
Object.defineProperty(arr, '__deeplineStreamingDataset', {
|
|
2411
|
-
value: true,
|
|
2412
|
-
enumerable: false,
|
|
2413
|
-
});
|
|
2414
|
-
Object.defineProperty(arr, '__deeplineDatasetKind', {
|
|
2415
|
-
value: 'csv',
|
|
2416
|
-
enumerable: false,
|
|
2417
|
-
});
|
|
2418
|
-
Object.defineProperty(arr, 'toJSON', {
|
|
2419
|
-
value: () => ({
|
|
2420
|
-
kind: 'dataset' as const,
|
|
2421
|
-
datasetKind: 'csv',
|
|
2422
|
-
datasetId,
|
|
2423
|
-
count: cachedCount,
|
|
2424
|
-
streaming: true,
|
|
2425
|
-
tableNamespace: input.name,
|
|
2426
|
-
}),
|
|
2427
|
-
enumerable: false,
|
|
2428
|
-
});
|
|
2429
|
-
return arr;
|
|
2430
|
-
}
|
|
2582
|
+
}
|
|
2583
|
+
}
|
|
2431
2584
|
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
|
|
2585
|
+
// Dynamic Workers cannot receive a raw R2Bucket binding, and both previous
|
|
2586
|
+
// fallbacks were different data planes: service-binding fetch bodies could
|
|
2587
|
+
// arrive empty across WorkerLoader isolates, while app-signed URLs pointed at
|
|
2588
|
+
// the app namespace instead of the preview harness namespace. The harness owns
|
|
2589
|
+
// staged R2 now, so the only fallback is typed bounded range RPC.
|
|
2590
|
+
return readHarnessStagedFileChunks({
|
|
2591
|
+
req: input.req,
|
|
2592
|
+
logicalPath: input.logicalPath,
|
|
2593
|
+
storageKey,
|
|
2594
|
+
expectedBytes,
|
|
2595
|
+
});
|
|
2440
2596
|
}
|
|
2441
2597
|
|
|
2442
2598
|
/**
|
|
@@ -2492,6 +2648,60 @@ function requireSheetContract(
|
|
|
2492
2648
|
return contract;
|
|
2493
2649
|
}
|
|
2494
2650
|
|
|
2651
|
+
function isDatasetPayloadField(field: string): boolean {
|
|
2652
|
+
return (
|
|
2653
|
+
field.length > 0 &&
|
|
2654
|
+
!field.startsWith('__deepline') &&
|
|
2655
|
+
field !== '_key' &&
|
|
2656
|
+
field !== '_status' &&
|
|
2657
|
+
field !== '_run_id' &&
|
|
2658
|
+
field !== '_error' &&
|
|
2659
|
+
field !== '_stage' &&
|
|
2660
|
+
field !== '_provider' &&
|
|
2661
|
+
field !== '_input_index' &&
|
|
2662
|
+
field !== '_created_at' &&
|
|
2663
|
+
field !== '_updated_at' &&
|
|
2664
|
+
field !== '_cell_meta'
|
|
2665
|
+
);
|
|
2666
|
+
}
|
|
2667
|
+
|
|
2668
|
+
function augmentSheetContractWithDatasetFields(input: {
|
|
2669
|
+
contract: PlaySheetContract;
|
|
2670
|
+
rows: readonly Record<string, unknown>[];
|
|
2671
|
+
outputFields?: readonly string[];
|
|
2672
|
+
}): PlaySheetContract {
|
|
2673
|
+
const outputFields = new Set(input.outputFields ?? []);
|
|
2674
|
+
const existingFields = new Set(
|
|
2675
|
+
input.contract.columns.flatMap((column) =>
|
|
2676
|
+
typeof column.field === 'string' ? [column.field] : [],
|
|
2677
|
+
),
|
|
2678
|
+
);
|
|
2679
|
+
const existingSqlNames = new Set(
|
|
2680
|
+
input.contract.columns.map((column) => column.sqlName),
|
|
2681
|
+
);
|
|
2682
|
+
const columns = [...input.contract.columns];
|
|
2683
|
+
for (const row of input.rows) {
|
|
2684
|
+
for (const field of Object.keys(row)) {
|
|
2685
|
+
if (!isDatasetPayloadField(field) || existingFields.has(field)) {
|
|
2686
|
+
continue;
|
|
2687
|
+
}
|
|
2688
|
+
const sqlName = sqlSafePlayColumnName(field);
|
|
2689
|
+
if (existingSqlNames.has(sqlName)) {
|
|
2690
|
+
continue;
|
|
2691
|
+
}
|
|
2692
|
+
existingFields.add(field);
|
|
2693
|
+
existingSqlNames.add(sqlName);
|
|
2694
|
+
columns.push({
|
|
2695
|
+
id: `runtime:${input.contract.tableNamespace}:${field}`,
|
|
2696
|
+
sqlName,
|
|
2697
|
+
source: outputFields.has(field) ? 'mapField' : 'input',
|
|
2698
|
+
field,
|
|
2699
|
+
});
|
|
2700
|
+
}
|
|
2701
|
+
}
|
|
2702
|
+
return { ...input.contract, columns };
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2495
2705
|
async function persistCompletedMapRows(input: {
|
|
2496
2706
|
req: RunRequest;
|
|
2497
2707
|
tableNamespace: string;
|
|
@@ -2500,19 +2710,24 @@ async function persistCompletedMapRows(input: {
|
|
|
2500
2710
|
extraOutputFields?: string[];
|
|
2501
2711
|
}): Promise<void> {
|
|
2502
2712
|
if (input.rows.length === 0) return;
|
|
2713
|
+
const outputFields = [
|
|
2714
|
+
...input.outputFields,
|
|
2715
|
+
...(input.extraOutputFields ?? []).filter(
|
|
2716
|
+
(field) => !input.outputFields.includes(field),
|
|
2717
|
+
),
|
|
2718
|
+
];
|
|
2503
2719
|
await harnessPersistCompletedSheetRows({
|
|
2504
2720
|
baseUrl: input.req.baseUrl,
|
|
2505
2721
|
executorToken: input.req.executorToken,
|
|
2506
2722
|
playName: input.req.playName,
|
|
2507
2723
|
tableNamespace: input.tableNamespace,
|
|
2508
|
-
sheetContract:
|
|
2724
|
+
sheetContract: augmentSheetContractWithDatasetFields({
|
|
2725
|
+
contract: requireSheetContract(input.req, input.tableNamespace),
|
|
2726
|
+
rows: input.rows,
|
|
2727
|
+
outputFields,
|
|
2728
|
+
}),
|
|
2509
2729
|
rows: input.rows,
|
|
2510
|
-
outputFields
|
|
2511
|
-
...input.outputFields,
|
|
2512
|
-
...(input.extraOutputFields ?? []).filter(
|
|
2513
|
-
(field) => !input.outputFields.includes(field),
|
|
2514
|
-
),
|
|
2515
|
-
],
|
|
2730
|
+
outputFields,
|
|
2516
2731
|
runId: input.req.runId,
|
|
2517
2732
|
userEmail: input.req.userEmail,
|
|
2518
2733
|
preloadedDbSessions: input.req.preloadedDbSessions ?? null,
|
|
@@ -2537,7 +2752,10 @@ async function prepareMapRows(input: {
|
|
|
2537
2752
|
executorToken: input.req.executorToken,
|
|
2538
2753
|
playName: input.req.playName,
|
|
2539
2754
|
tableNamespace: input.tableNamespace,
|
|
2540
|
-
sheetContract:
|
|
2755
|
+
sheetContract: augmentSheetContractWithDatasetFields({
|
|
2756
|
+
contract: requireSheetContract(input.req, input.tableNamespace),
|
|
2757
|
+
rows: input.rows,
|
|
2758
|
+
}),
|
|
2541
2759
|
rows: input.rows.map((row) => ({ ...row })),
|
|
2542
2760
|
runId: input.req.runId,
|
|
2543
2761
|
userEmail: input.req.userEmail,
|
|
@@ -2700,7 +2918,7 @@ function createMinimalWorkerCtx(
|
|
|
2700
2918
|
const callDepth = rootGovernance?.callDepth ?? 0;
|
|
2701
2919
|
const runMap = async <T extends Record<string, unknown>>(
|
|
2702
2920
|
name: string,
|
|
2703
|
-
rows: T
|
|
2921
|
+
rows: WorkerDatasetInput<T>,
|
|
2704
2922
|
fieldsDef: Record<
|
|
2705
2923
|
string,
|
|
2706
2924
|
| unknown
|
|
@@ -2715,7 +2933,8 @@ function createMinimalWorkerCtx(
|
|
|
2715
2933
|
): Promise<unknown> => {
|
|
2716
2934
|
const mapStartedAt = nowMs();
|
|
2717
2935
|
const mapNodeId = `map:${name}`;
|
|
2718
|
-
const
|
|
2936
|
+
const inputRows = rows;
|
|
2937
|
+
const rowCountHint = datasetRowCountHint(inputRows);
|
|
2719
2938
|
const baseOffset = 0;
|
|
2720
2939
|
const fieldEntries = Object.entries(fieldsDef);
|
|
2721
2940
|
const plan = req.executionPlan;
|
|
@@ -2723,12 +2942,8 @@ function createMinimalWorkerCtx(
|
|
|
2723
2942
|
(candidate) =>
|
|
2724
2943
|
candidate.mapName === name || candidate.tableNamespace === name,
|
|
2725
2944
|
);
|
|
2726
|
-
const streaming = isStreamingDataset<T>(sliced);
|
|
2727
|
-
// For streaming inputs we don't know the row count upfront — pass
|
|
2728
|
-
// `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
|
|
2729
|
-
// default chunk size rather than trying to budget against an unknown.
|
|
2730
2945
|
const rowsPerChunk = chooseMapChunkSize({
|
|
2731
|
-
totalRows:
|
|
2946
|
+
totalRows: rowCountHint,
|
|
2732
2947
|
mapCount: Math.max(1, plan?.maps.length ?? 1),
|
|
2733
2948
|
stepsPerChunk: planMap?.stepsPerChunk ?? 1,
|
|
2734
2949
|
preferredChunkSize: planMap?.defaultChunkSize,
|
|
@@ -2750,14 +2965,12 @@ function createMinimalWorkerCtx(
|
|
|
2750
2965
|
typeof total === 'number' && Number.isFinite(total) && total > 0
|
|
2751
2966
|
? `${completed.toLocaleString()} / ${total.toLocaleString()} rows processed`
|
|
2752
2967
|
: `${completed.toLocaleString()} rows processed`;
|
|
2968
|
+
callbacks?.onMapStarted?.(mapNodeId, mapStartedAt);
|
|
2753
2969
|
updateMapProgress({
|
|
2754
2970
|
completed: 0,
|
|
2755
|
-
total:
|
|
2971
|
+
total: rowCountHint ?? undefined,
|
|
2756
2972
|
startedAt: mapStartedAt,
|
|
2757
|
-
message: formatMapProgressMessage(
|
|
2758
|
-
0,
|
|
2759
|
-
streaming ? undefined : sliced.length,
|
|
2760
|
-
),
|
|
2973
|
+
message: formatMapProgressMessage(0, rowCountHint ?? undefined),
|
|
2761
2974
|
});
|
|
2762
2975
|
const explicitRowKeysSeen =
|
|
2763
2976
|
opts?.key === undefined ? null : new Map<string, number>();
|
|
@@ -2983,6 +3196,7 @@ function createMinimalWorkerCtx(
|
|
|
2983
3196
|
input?: unknown,
|
|
2984
3197
|
_opts?: { description?: string },
|
|
2985
3198
|
): Promise<unknown> => {
|
|
3199
|
+
void _opts;
|
|
2986
3200
|
assertNotAborted(abortSignal);
|
|
2987
3201
|
const request = normalizeToolExecuteArgs(
|
|
2988
3202
|
requestOrKey,
|
|
@@ -3008,6 +3222,8 @@ function createMinimalWorkerCtx(
|
|
|
3008
3222
|
toolNameOrSpec,
|
|
3009
3223
|
waterfallInput,
|
|
3010
3224
|
waterfallOpts,
|
|
3225
|
+
callbacks,
|
|
3226
|
+
workflowStep,
|
|
3011
3227
|
),
|
|
3012
3228
|
};
|
|
3013
3229
|
for (const [key, value] of fieldEntries) {
|
|
@@ -3219,10 +3435,16 @@ function createMinimalWorkerCtx(
|
|
|
3219
3435
|
outputDatasetId: `map:${name}`,
|
|
3220
3436
|
hash,
|
|
3221
3437
|
preview: toWorkflowSerializableValue(out.slice(0, 5)),
|
|
3438
|
+
cachedRows:
|
|
3439
|
+
out.length <= WORKER_DATASET_IN_MEMORY_ROWS
|
|
3440
|
+
? toWorkflowSerializableValue(out)
|
|
3441
|
+
: undefined,
|
|
3222
3442
|
};
|
|
3223
3443
|
};
|
|
3224
3444
|
|
|
3225
|
-
const
|
|
3445
|
+
const previewRows: Array<T & Record<string, unknown>> = [];
|
|
3446
|
+
const cachedRows: Array<T & Record<string, unknown>> = [];
|
|
3447
|
+
let canCacheRows = true;
|
|
3226
3448
|
let totalRowsExecuted = 0;
|
|
3227
3449
|
let totalRowsCached = 0;
|
|
3228
3450
|
let totalRowsDuplicateReused = 0;
|
|
@@ -3260,6 +3482,7 @@ function createMinimalWorkerCtx(
|
|
|
3260
3482
|
`(${totalRowsExecuted} executed, ${totalRowsCached} already satisfied) ` +
|
|
3261
3483
|
`inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
|
|
3262
3484
|
const completedAt = nowMs();
|
|
3485
|
+
callbacks?.onMapCompleted?.(mapNodeId, completedAt);
|
|
3263
3486
|
updateMapProgress({
|
|
3264
3487
|
completed: totalRowsWritten,
|
|
3265
3488
|
total: totalRowsWritten,
|
|
@@ -3273,9 +3496,29 @@ function createMinimalWorkerCtx(
|
|
|
3273
3496
|
message: cacheSummary,
|
|
3274
3497
|
ts: nowMs(),
|
|
3275
3498
|
});
|
|
3276
|
-
return
|
|
3499
|
+
return createPersistedDatasetHandle({
|
|
3500
|
+
playName: req.playName,
|
|
3501
|
+
name,
|
|
3277
3502
|
count: totalRowsWritten,
|
|
3278
|
-
|
|
3503
|
+
previewRows,
|
|
3504
|
+
cachedRows: canCacheRows ? cachedRows : null,
|
|
3505
|
+
readRows: async ({ limit, offset }) => {
|
|
3506
|
+
const result = await harnessReadSheetDatasetRows({
|
|
3507
|
+
baseUrl: req.baseUrl,
|
|
3508
|
+
executorToken: req.executorToken,
|
|
3509
|
+
playName: req.playName,
|
|
3510
|
+
tableNamespace: name,
|
|
3511
|
+
runId: req.runId,
|
|
3512
|
+
limit,
|
|
3513
|
+
offset,
|
|
3514
|
+
userEmail: req.userEmail,
|
|
3515
|
+
preloadedDbSessions: req.preloadedDbSessions ?? null,
|
|
3516
|
+
});
|
|
3517
|
+
return result.rows as Array<T & Record<string, unknown>>;
|
|
3518
|
+
},
|
|
3519
|
+
trace: (phase, ms, extra) =>
|
|
3520
|
+
recordRunnerPerfTrace({ req, phase, ms, extra }),
|
|
3521
|
+
nowMs,
|
|
3279
3522
|
workProgress: {
|
|
3280
3523
|
total: totalRowsWritten,
|
|
3281
3524
|
executed: totalRowsExecuted,
|
|
@@ -3290,110 +3533,61 @@ function createMinimalWorkerCtx(
|
|
|
3290
3533
|
});
|
|
3291
3534
|
};
|
|
3292
3535
|
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3536
|
+
let totalRowsWritten = 0;
|
|
3537
|
+
let chunkIndex = 0;
|
|
3538
|
+
let chunkStart = 0;
|
|
3539
|
+
for await (const chunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
|
|
3540
|
+
assertNotAborted(abortSignal);
|
|
3541
|
+
if (chunkRows.length === 0) continue;
|
|
3542
|
+
assertUniqueExplicitRowKeys(chunkRows, chunkStart);
|
|
3543
|
+
const chunkResult = await runChunkStep(chunkRows, chunkStart, chunkIndex);
|
|
3544
|
+
totalRowsWritten += chunkResult.rowsWritten;
|
|
3545
|
+
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3546
|
+
totalRowsCached += chunkResult.rowsCached;
|
|
3547
|
+
totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
|
|
3548
|
+
totalRowsInserted += chunkResult.rowsInserted;
|
|
3549
|
+
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3550
|
+
updateMapProgress({
|
|
3551
|
+
completed: totalRowsWritten,
|
|
3552
|
+
total: rowCountHint ?? undefined,
|
|
3553
|
+
message: formatMapProgressMessage(
|
|
3554
|
+
totalRowsWritten,
|
|
3555
|
+
rowCountHint ?? undefined,
|
|
3556
|
+
),
|
|
3557
|
+
});
|
|
3558
|
+
if (previewRows.length < WORKER_DATASET_PREVIEW_ROWS) {
|
|
3559
|
+
previewRows.push(
|
|
3560
|
+
...chunkResult.preview.slice(
|
|
3561
|
+
0,
|
|
3562
|
+
WORKER_DATASET_PREVIEW_ROWS - previewRows.length,
|
|
3563
|
+
),
|
|
3306
3564
|
);
|
|
3307
|
-
totalRowsWritten += chunkResult.rowsWritten;
|
|
3308
|
-
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3309
|
-
totalRowsCached += chunkResult.rowsCached;
|
|
3310
|
-
totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
|
|
3311
|
-
totalRowsInserted += chunkResult.rowsInserted;
|
|
3312
|
-
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3313
|
-
updateMapProgress({
|
|
3314
|
-
completed: totalRowsWritten,
|
|
3315
|
-
message: formatMapProgressMessage(totalRowsWritten),
|
|
3316
|
-
});
|
|
3317
|
-
if (out.length < 10) {
|
|
3318
|
-
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3319
|
-
}
|
|
3320
|
-
chunkStart += chunkRows.length;
|
|
3321
|
-
chunkIndex += 1;
|
|
3322
3565
|
}
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
});
|
|
3334
|
-
return dataset;
|
|
3335
|
-
}
|
|
3336
|
-
|
|
3337
|
-
if (workflowStep && sliced.length > rowsPerChunk) {
|
|
3338
|
-
let totalRowsWritten = 0;
|
|
3339
|
-
for (let start = 0; start < sliced.length; start += rowsPerChunk) {
|
|
3340
|
-
assertNotAborted(abortSignal);
|
|
3341
|
-
const end = Math.min(sliced.length, start + rowsPerChunk);
|
|
3342
|
-
const chunkRows = sliced.slice(start, end);
|
|
3343
|
-
const chunkIndex = Math.floor(start / rowsPerChunk);
|
|
3344
|
-
assertUniqueExplicitRowKeys(chunkRows, start);
|
|
3345
|
-
const chunkResult = await runChunkStep(chunkRows, start, chunkIndex);
|
|
3346
|
-
totalRowsWritten += chunkResult.rowsWritten;
|
|
3347
|
-
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3348
|
-
totalRowsCached += chunkResult.rowsCached;
|
|
3349
|
-
totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
|
|
3350
|
-
totalRowsInserted += chunkResult.rowsInserted;
|
|
3351
|
-
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3352
|
-
updateMapProgress({
|
|
3353
|
-
completed: totalRowsWritten,
|
|
3354
|
-
total: sliced.length,
|
|
3355
|
-
message: formatMapProgressMessage(totalRowsWritten, sliced.length),
|
|
3356
|
-
});
|
|
3357
|
-
if (out.length < 10) {
|
|
3358
|
-
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3566
|
+
if (canCacheRows) {
|
|
3567
|
+
const nextRows = chunkResult.cachedRows ?? [];
|
|
3568
|
+
if (
|
|
3569
|
+
nextRows.length === chunkResult.rowsWritten &&
|
|
3570
|
+
cachedRows.length + nextRows.length <= WORKER_DATASET_IN_MEMORY_ROWS
|
|
3571
|
+
) {
|
|
3572
|
+
cachedRows.push(...nextRows);
|
|
3573
|
+
} else {
|
|
3574
|
+
cachedRows.length = 0;
|
|
3575
|
+
canCacheRows = false;
|
|
3359
3576
|
}
|
|
3360
3577
|
}
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
req,
|
|
3364
|
-
phase: 'runner.map.total',
|
|
3365
|
-
ms: nowMs() - mapStartedAt,
|
|
3366
|
-
extra: {
|
|
3367
|
-
mapName: name,
|
|
3368
|
-
rowsWritten: totalRowsWritten,
|
|
3369
|
-
streaming: false,
|
|
3370
|
-
},
|
|
3371
|
-
});
|
|
3372
|
-
return dataset;
|
|
3578
|
+
chunkStart += chunkRows.length;
|
|
3579
|
+
chunkIndex += 1;
|
|
3373
3580
|
}
|
|
3374
|
-
|
|
3375
|
-
assertUniqueExplicitRowKeys(sliced, 0);
|
|
3376
|
-
const chunkResult = await runChunkStep(sliced, 0, 0);
|
|
3377
|
-
totalRowsExecuted = chunkResult.rowsExecuted;
|
|
3378
|
-
totalRowsCached = chunkResult.rowsCached;
|
|
3379
|
-
totalRowsDuplicateReused = chunkResult.rowsDuplicateReused;
|
|
3380
|
-
totalRowsInserted = chunkResult.rowsInserted;
|
|
3381
|
-
totalRowsSkipped = chunkResult.rowsSkipped;
|
|
3382
|
-
out.push(...chunkResult.preview);
|
|
3383
|
-
updateMapProgress({
|
|
3384
|
-
completed: chunkResult.rowsWritten,
|
|
3385
|
-
total: sliced.length,
|
|
3386
|
-
message: formatMapProgressMessage(chunkResult.rowsWritten, sliced.length),
|
|
3387
|
-
});
|
|
3388
|
-
const dataset = finalize(chunkResult.rowsWritten);
|
|
3581
|
+
const dataset = finalize(totalRowsWritten);
|
|
3389
3582
|
recordRunnerPerfTrace({
|
|
3390
3583
|
req,
|
|
3391
3584
|
phase: 'runner.map.total',
|
|
3392
3585
|
ms: nowMs() - mapStartedAt,
|
|
3393
3586
|
extra: {
|
|
3394
3587
|
mapName: name,
|
|
3395
|
-
rowsWritten:
|
|
3396
|
-
streaming:
|
|
3588
|
+
rowsWritten: totalRowsWritten,
|
|
3589
|
+
inputKind: rowCountHint === null ? 'streaming' : 'known_count',
|
|
3590
|
+
chunks: chunkIndex,
|
|
3397
3591
|
},
|
|
3398
3592
|
});
|
|
3399
3593
|
return dataset;
|
|
@@ -3407,7 +3601,7 @@ function createMinimalWorkerCtx(
|
|
|
3407
3601
|
|
|
3408
3602
|
constructor(
|
|
3409
3603
|
private readonly name: string,
|
|
3410
|
-
private readonly rows: T
|
|
3604
|
+
private readonly rows: WorkerDatasetInput<T>,
|
|
3411
3605
|
) {}
|
|
3412
3606
|
|
|
3413
3607
|
step(name: string, resolver: WorkerStepProgramStep['resolver']): this {
|
|
@@ -3482,18 +3676,13 @@ function createMinimalWorkerCtx(
|
|
|
3482
3676
|
async csv<T extends Record<string, unknown> = Record<string, unknown>>(
|
|
3483
3677
|
arg: unknown,
|
|
3484
3678
|
options?: CsvRenameOptions,
|
|
3485
|
-
): Promise<T
|
|
3679
|
+
): Promise<WorkerDatasetHandle<T>> {
|
|
3486
3680
|
const csvStartedAt = nowMs();
|
|
3487
3681
|
if (Array.isArray(arg)) {
|
|
3488
|
-
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
applyCsvRenameProjection(arg as T[], options),
|
|
3493
|
-
{
|
|
3494
|
-
datasetKind: 'csv',
|
|
3495
|
-
},
|
|
3496
|
-
) as unknown as T[];
|
|
3682
|
+
const dataset = createInlineDatasetHandle(
|
|
3683
|
+
applyCsvRenameProjection(arg as T[], options) as T[],
|
|
3684
|
+
{ name: 'csv', kind: 'csv' },
|
|
3685
|
+
);
|
|
3497
3686
|
recordRunnerPerfTrace({
|
|
3498
3687
|
req,
|
|
3499
3688
|
phase: 'runner.csv',
|
|
@@ -3504,15 +3693,10 @@ function createMinimalWorkerCtx(
|
|
|
3504
3693
|
}
|
|
3505
3694
|
const filename = String(arg ?? '');
|
|
3506
3695
|
if (req.inlineCsv && filename === req.inlineCsv.name) {
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
applyCsvRenameProjection(req.inlineCsv.rows as T[], options),
|
|
3512
|
-
{
|
|
3513
|
-
datasetKind: 'csv',
|
|
3514
|
-
},
|
|
3515
|
-
) as unknown as T[];
|
|
3696
|
+
const dataset = createInlineDatasetHandle(
|
|
3697
|
+
applyCsvRenameProjection(req.inlineCsv.rows as T[], options) as T[],
|
|
3698
|
+
{ name: filename, kind: 'csv' },
|
|
3699
|
+
);
|
|
3516
3700
|
recordRunnerPerfTrace({
|
|
3517
3701
|
req,
|
|
3518
3702
|
phase: 'runner.csv',
|
|
@@ -3521,52 +3705,72 @@ function createMinimalWorkerCtx(
|
|
|
3521
3705
|
});
|
|
3522
3706
|
return dataset;
|
|
3523
3707
|
}
|
|
3524
|
-
// Resolution order: explicit
|
|
3708
|
+
// Resolution order: explicit inputFiles (runtime input) → packaged
|
|
3525
3709
|
// files (relative-path imports bundled with the play artifact).
|
|
3526
|
-
let
|
|
3527
|
-
if (!
|
|
3710
|
+
let file = req.inputFiles?.[filename] ?? null;
|
|
3711
|
+
if (!file && req.packagedFiles) {
|
|
3528
3712
|
const matchByPath = req.packagedFiles.find(
|
|
3529
3713
|
(f) =>
|
|
3530
3714
|
f.playPath === filename ||
|
|
3531
3715
|
f.playPath === filename.replace(/^\.\//, ''),
|
|
3532
3716
|
);
|
|
3533
|
-
if (matchByPath)
|
|
3717
|
+
if (matchByPath) {
|
|
3718
|
+
file = {
|
|
3719
|
+
logicalPath: matchByPath.playPath,
|
|
3720
|
+
fileName: matchByPath.playPath.split('/').pop() ?? matchByPath.playPath,
|
|
3721
|
+
storageKey: matchByPath.storageKey,
|
|
3722
|
+
contentType: matchByPath.contentType,
|
|
3723
|
+
bytes: matchByPath.bytes,
|
|
3724
|
+
};
|
|
3725
|
+
}
|
|
3534
3726
|
}
|
|
3535
|
-
if (!
|
|
3727
|
+
if (!file?.storageKey) {
|
|
3536
3728
|
throw new Error(
|
|
3537
3729
|
`ctx.csv("${filename}"): no inline rows or R2 asset binding registered. ` +
|
|
3538
|
-
'Pass inline rows, or upload to R2 and register packagedFiles/
|
|
3730
|
+
'Pass inline rows, or upload to R2 and register packagedFiles/inputFiles in the run config.',
|
|
3731
|
+
);
|
|
3732
|
+
}
|
|
3733
|
+
const selectedFile = file;
|
|
3734
|
+
const expectedBytes = normalizeExpectedBytes(selectedFile.bytes);
|
|
3735
|
+
if (expectedBytes === null) {
|
|
3736
|
+
throw new Error(
|
|
3737
|
+
`ctx.csv("${filename}"): staged dataset handle is missing a byte length for ` +
|
|
3738
|
+
`${selectedFile.storageKey}. Re-stage the file with bytes metadata.`,
|
|
3539
3739
|
);
|
|
3540
3740
|
}
|
|
3541
|
-
|
|
3542
|
-
// pulls 1 MiB-ish text chunks from R2 and yields parsed row chunks.
|
|
3543
|
-
// ctx.map detects the streaming surface via __deeplineStreamingDataset
|
|
3544
|
-
// and switches its chunked execution loop to consume iterChunks
|
|
3545
|
-
// directly, so 2M-row CSVs never get fully materialized in memory.
|
|
3546
|
-
const storageKey = r2Key;
|
|
3547
|
-
const dataset = makeStreamingCsvDataset<T>({
|
|
3741
|
+
const dataset = createCsvDatasetHandle<T>({
|
|
3548
3742
|
name: filename,
|
|
3549
3743
|
logicalPath: filename,
|
|
3744
|
+
expectedBytes,
|
|
3550
3745
|
renameOptions: options,
|
|
3746
|
+
nowMs,
|
|
3747
|
+
streamRows: streamCsvRowsFromByteChunks,
|
|
3748
|
+
trace: (phase, ms, extra) =>
|
|
3749
|
+
recordRunnerPerfTrace({ req, phase, ms, extra }),
|
|
3551
3750
|
open: () =>
|
|
3552
|
-
|
|
3751
|
+
openFileByteChunks({
|
|
3553
3752
|
req,
|
|
3554
3753
|
env,
|
|
3555
3754
|
logicalPath: filename,
|
|
3556
|
-
|
|
3755
|
+
file: selectedFile,
|
|
3557
3756
|
}),
|
|
3558
|
-
})
|
|
3757
|
+
});
|
|
3559
3758
|
recordRunnerPerfTrace({
|
|
3560
3759
|
req,
|
|
3561
3760
|
phase: 'runner.csv',
|
|
3562
3761
|
ms: nowMs() - csvStartedAt,
|
|
3563
|
-
extra: {
|
|
3762
|
+
extra: {
|
|
3763
|
+
mode: 'streaming_file',
|
|
3764
|
+
filename,
|
|
3765
|
+
expectedBytes,
|
|
3766
|
+
storageKey: selectedFile.storageKey,
|
|
3767
|
+
},
|
|
3564
3768
|
});
|
|
3565
3769
|
return dataset;
|
|
3566
3770
|
},
|
|
3567
3771
|
map<T extends Record<string, unknown>>(
|
|
3568
3772
|
name: string,
|
|
3569
|
-
rows: T
|
|
3773
|
+
rows: WorkerDatasetInput<T>,
|
|
3570
3774
|
fieldsDef?:
|
|
3571
3775
|
| Record<
|
|
3572
3776
|
string,
|
|
@@ -3600,7 +3804,12 @@ function createMinimalWorkerCtx(
|
|
|
3600
3804
|
input: Record<string, unknown>,
|
|
3601
3805
|
): Promise<unknown> => {
|
|
3602
3806
|
assertNotAborted(abortSignal);
|
|
3603
|
-
return
|
|
3807
|
+
return executeToolWithLifecycle(
|
|
3808
|
+
req,
|
|
3809
|
+
{ id: key, toolId, input },
|
|
3810
|
+
workflowStep,
|
|
3811
|
+
callbacks,
|
|
3812
|
+
);
|
|
3604
3813
|
},
|
|
3605
3814
|
tools: {
|
|
3606
3815
|
async execute(
|
|
@@ -3609,11 +3818,13 @@ function createMinimalWorkerCtx(
|
|
|
3609
3818
|
input?: unknown,
|
|
3610
3819
|
_opts?: { description?: string },
|
|
3611
3820
|
): Promise<unknown> {
|
|
3821
|
+
void _opts;
|
|
3612
3822
|
assertNotAborted(abortSignal);
|
|
3613
|
-
return
|
|
3823
|
+
return executeToolWithLifecycle(
|
|
3614
3824
|
req,
|
|
3615
3825
|
normalizeToolExecuteArgs(requestOrKey, toolId, input),
|
|
3616
3826
|
workflowStep,
|
|
3827
|
+
callbacks,
|
|
3617
3828
|
);
|
|
3618
3829
|
},
|
|
3619
3830
|
},
|
|
@@ -3640,7 +3851,15 @@ function createMinimalWorkerCtx(
|
|
|
3640
3851
|
input: Record<string, unknown>,
|
|
3641
3852
|
opts?: WorkerWaterfallOptions,
|
|
3642
3853
|
): Promise<unknown | null> {
|
|
3643
|
-
return executeWorkerWaterfall(
|
|
3854
|
+
return executeWorkerWaterfall(
|
|
3855
|
+
req,
|
|
3856
|
+
[],
|
|
3857
|
+
toolNameOrSpec,
|
|
3858
|
+
input,
|
|
3859
|
+
opts,
|
|
3860
|
+
callbacks,
|
|
3861
|
+
workflowStep,
|
|
3862
|
+
);
|
|
3644
3863
|
},
|
|
3645
3864
|
async sleep(ms: number): Promise<void> {
|
|
3646
3865
|
assertNotAborted(abortSignal);
|
|
@@ -3993,17 +4212,10 @@ async function handleRun(request: Request, env: WorkerEnv): Promise<Response> {
|
|
|
3993
4212
|
});
|
|
3994
4213
|
}
|
|
3995
4214
|
|
|
3996
|
-
/** Cap on
|
|
3997
|
-
const
|
|
3998
|
-
/** Min wall-clock interval between live-
|
|
3999
|
-
const
|
|
4000
|
-
/**
|
|
4001
|
-
* Initial flush delay for live logs. Short plays should not pay an extra
|
|
4002
|
-
* non-terminal Convex write just to show a transient "running" log state; the
|
|
4003
|
-
* terminal status carries the full log buffer. Longer plays still flush early
|
|
4004
|
-
* enough for the dashboard to feel alive.
|
|
4005
|
-
*/
|
|
4006
|
-
const LIVE_LOG_FIRST_FLUSH_DELAY_MS = 30_000;
|
|
4215
|
+
/** Cap on run log lines retained in the terminal output compatibility shape. */
|
|
4216
|
+
const RUN_LOG_BUFFER_LIMIT = 500;
|
|
4217
|
+
/** Min wall-clock interval between live run-ledger flushes during a run. */
|
|
4218
|
+
const RUN_LEDGER_FLUSH_INTERVAL_MS = 500;
|
|
4007
4219
|
|
|
4008
4220
|
async function executeRunRequest(
|
|
4009
4221
|
req: RunRequest,
|
|
@@ -4046,86 +4258,235 @@ async function executeRunRequest(
|
|
|
4046
4258
|
sessions: req.preloadedDbSessions?.length ?? 0,
|
|
4047
4259
|
},
|
|
4048
4260
|
});
|
|
4049
|
-
|
|
4050
|
-
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
let
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
4261
|
+
let runLogBuffer: string[] = [];
|
|
4262
|
+
let pendingRunLogLines: string[] = [];
|
|
4263
|
+
let stepProgressByNodeId: LiveNodeProgressMap = {};
|
|
4264
|
+
let dirtyProgressNodeIds = new Set<string>();
|
|
4265
|
+
let pendingLedgerEvents: PlayRunLedgerEvent[] = [
|
|
4266
|
+
{
|
|
4267
|
+
type: 'run.started',
|
|
4268
|
+
runId: req.runId,
|
|
4269
|
+
playName: req.playName,
|
|
4270
|
+
source: 'worker',
|
|
4271
|
+
occurredAt: startedAt,
|
|
4272
|
+
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4273
|
+
},
|
|
4274
|
+
];
|
|
4275
|
+
let lastLedgerFlushAt = 0;
|
|
4276
|
+
let ledgerFlushInFlight: Promise<void> = Promise.resolve();
|
|
4277
|
+
|
|
4278
|
+
const appendRunLogLine = (line: string) => {
|
|
4060
4279
|
const trimmed = redactSecretsFromLogString(line.trim());
|
|
4061
4280
|
if (!trimmed) return;
|
|
4062
|
-
|
|
4063
|
-
|
|
4281
|
+
runLogBuffer = [...runLogBuffer, trimmed].slice(-RUN_LOG_BUFFER_LIMIT);
|
|
4282
|
+
pendingRunLogLines = [...pendingRunLogLines, trimmed].slice(
|
|
4283
|
+
-RUN_LOG_BUFFER_LIMIT,
|
|
4284
|
+
);
|
|
4064
4285
|
};
|
|
4065
|
-
|
|
4286
|
+
|
|
4287
|
+
const updateStepProgress = (input: {
|
|
4066
4288
|
nodeId: string;
|
|
4067
4289
|
progress: LiveNodeProgressSnapshot;
|
|
4068
4290
|
}) => {
|
|
4069
4291
|
const nodeId = input.nodeId.trim();
|
|
4070
4292
|
if (!nodeId) return;
|
|
4071
|
-
|
|
4072
|
-
...
|
|
4293
|
+
stepProgressByNodeId = {
|
|
4294
|
+
...stepProgressByNodeId,
|
|
4073
4295
|
[nodeId]: {
|
|
4074
|
-
...(
|
|
4296
|
+
...(stepProgressByNodeId[nodeId] ?? {}),
|
|
4075
4297
|
...input.progress,
|
|
4076
4298
|
},
|
|
4077
4299
|
};
|
|
4300
|
+
dirtyProgressNodeIds.add(nodeId);
|
|
4301
|
+
};
|
|
4302
|
+
|
|
4303
|
+
const stepProgressSnapshot = () => ({ ...stepProgressByNodeId });
|
|
4304
|
+
|
|
4305
|
+
const appendStepLifecycleEvent = (event: PlayStepLifecycleEvent) => {
|
|
4306
|
+
updateStepProgress({
|
|
4307
|
+
nodeId: event.nodeId,
|
|
4308
|
+
progress: {
|
|
4309
|
+
...(event.transition === 'started'
|
|
4310
|
+
? { startedAt: event.at }
|
|
4311
|
+
: { completedAt: event.at }),
|
|
4312
|
+
updatedAt: event.at,
|
|
4313
|
+
},
|
|
4314
|
+
});
|
|
4315
|
+
pendingLedgerEvents = [
|
|
4316
|
+
...pendingLedgerEvents,
|
|
4317
|
+
{
|
|
4318
|
+
type:
|
|
4319
|
+
event.transition === 'started'
|
|
4320
|
+
? 'step.started'
|
|
4321
|
+
: event.transition === 'failed'
|
|
4322
|
+
? 'step.failed'
|
|
4323
|
+
: 'step.completed',
|
|
4324
|
+
runId: req.runId,
|
|
4325
|
+
source: 'worker',
|
|
4326
|
+
occurredAt: event.at,
|
|
4327
|
+
stepId: event.nodeId,
|
|
4328
|
+
kind: event.type,
|
|
4329
|
+
},
|
|
4330
|
+
];
|
|
4331
|
+
flushLedgerEvents(false);
|
|
4332
|
+
};
|
|
4333
|
+
|
|
4334
|
+
const drainPendingLedgerEvents = (
|
|
4335
|
+
occurredAt: number,
|
|
4336
|
+
): PlayRunLedgerEvent[] => {
|
|
4337
|
+
const events = pendingLedgerEvents;
|
|
4338
|
+
pendingLedgerEvents = [];
|
|
4339
|
+
|
|
4340
|
+
if (pendingRunLogLines.length > 0) {
|
|
4341
|
+
events.push({
|
|
4342
|
+
type: 'log.appended',
|
|
4343
|
+
runId: req.runId,
|
|
4344
|
+
source: 'worker',
|
|
4345
|
+
occurredAt,
|
|
4346
|
+
lines: pendingRunLogLines,
|
|
4347
|
+
});
|
|
4348
|
+
pendingRunLogLines = [];
|
|
4349
|
+
}
|
|
4350
|
+
|
|
4351
|
+
if (dirtyProgressNodeIds.size > 0) {
|
|
4352
|
+
for (const nodeId of dirtyProgressNodeIds) {
|
|
4353
|
+
const progress = stepProgressByNodeId[nodeId];
|
|
4354
|
+
if (!progress) continue;
|
|
4355
|
+
const normalizedProgress: PlayRunLedgerStepProgress = {
|
|
4356
|
+
...(typeof progress.completed === 'number'
|
|
4357
|
+
? { completed: progress.completed }
|
|
4358
|
+
: {}),
|
|
4359
|
+
...(typeof progress.total === 'number'
|
|
4360
|
+
? { total: progress.total }
|
|
4361
|
+
: {}),
|
|
4362
|
+
...(typeof progress.failed === 'number'
|
|
4363
|
+
? { failed: progress.failed }
|
|
4364
|
+
: {}),
|
|
4365
|
+
...(typeof progress.message === 'string' && progress.message
|
|
4366
|
+
? { message: progress.message }
|
|
4367
|
+
: {}),
|
|
4368
|
+
...(typeof progress.artifactTableNamespace === 'string' ||
|
|
4369
|
+
progress.artifactTableNamespace === null
|
|
4370
|
+
? { artifactTableNamespace: progress.artifactTableNamespace }
|
|
4371
|
+
: {}),
|
|
4372
|
+
updatedAt:
|
|
4373
|
+
typeof progress.updatedAt === 'number'
|
|
4374
|
+
? progress.updatedAt
|
|
4375
|
+
: occurredAt,
|
|
4376
|
+
};
|
|
4377
|
+
const status: PlayRunLedgerStepStatus =
|
|
4378
|
+
typeof progress.completedAt === 'number' ? 'completed' : 'running';
|
|
4379
|
+
events.push({
|
|
4380
|
+
type: 'step.progress',
|
|
4381
|
+
runId: req.runId,
|
|
4382
|
+
source: 'worker',
|
|
4383
|
+
occurredAt:
|
|
4384
|
+
typeof progress.updatedAt === 'number'
|
|
4385
|
+
? progress.updatedAt
|
|
4386
|
+
: occurredAt,
|
|
4387
|
+
stepId: nodeId,
|
|
4388
|
+
status,
|
|
4389
|
+
progress: normalizedProgress,
|
|
4390
|
+
});
|
|
4391
|
+
}
|
|
4392
|
+
dirtyProgressNodeIds = new Set<string>();
|
|
4393
|
+
}
|
|
4394
|
+
|
|
4395
|
+
return events;
|
|
4078
4396
|
};
|
|
4079
|
-
|
|
4080
|
-
const
|
|
4397
|
+
|
|
4398
|
+
const flushLedgerEvents = (force: boolean): void => {
|
|
4081
4399
|
if (!options?.persistResultDatasets) return;
|
|
4082
|
-
if (!liveLogsDirty && !force) return;
|
|
4083
4400
|
const now = nowMs();
|
|
4084
|
-
if (!force && now -
|
|
4085
|
-
|
|
4086
|
-
|
|
4087
|
-
const
|
|
4088
|
-
|
|
4401
|
+
if (!force && now - lastLedgerFlushAt < RUN_LEDGER_FLUSH_INTERVAL_MS) {
|
|
4402
|
+
return;
|
|
4403
|
+
}
|
|
4404
|
+
const events = drainPendingLedgerEvents(now);
|
|
4405
|
+
if (events.length === 0) return;
|
|
4406
|
+
lastLedgerFlushAt = now;
|
|
4407
|
+
ledgerFlushInFlight = ledgerFlushInFlight
|
|
4089
4408
|
.catch(() => undefined)
|
|
4090
4409
|
.then(async () => {
|
|
4091
4410
|
try {
|
|
4092
4411
|
await postRuntimeApi(req.baseUrl, req.executorToken, {
|
|
4093
|
-
action: '
|
|
4412
|
+
action: 'append_run_events',
|
|
4094
4413
|
playId: req.runId,
|
|
4095
|
-
|
|
4096
|
-
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4097
|
-
liveLogs: snapshot,
|
|
4098
|
-
liveNodeProgress: liveNodeProgressSnapshot(),
|
|
4099
|
-
lastCheckpointAt: now,
|
|
4414
|
+
events,
|
|
4100
4415
|
});
|
|
4101
4416
|
} catch {
|
|
4102
|
-
|
|
4417
|
+
pendingLedgerEvents = [...events, ...pendingLedgerEvents];
|
|
4418
|
+
throw new Error('runtime run-ledger append failed');
|
|
4103
4419
|
}
|
|
4420
|
+
})
|
|
4421
|
+
.catch(() => undefined);
|
|
4422
|
+
};
|
|
4423
|
+
|
|
4424
|
+
const flushTerminalLedgerEvents = async (
|
|
4425
|
+
terminalEvent: PlayRunLedgerEvent,
|
|
4426
|
+
): Promise<void> => {
|
|
4427
|
+
if (!options?.persistResultDatasets) return;
|
|
4428
|
+
await ledgerFlushInFlight.catch(() => undefined);
|
|
4429
|
+
const now = nowMs();
|
|
4430
|
+
pendingLedgerEvents = [...pendingLedgerEvents, terminalEvent];
|
|
4431
|
+
const events = drainPendingLedgerEvents(now);
|
|
4432
|
+
if (events.length === 0) return;
|
|
4433
|
+
try {
|
|
4434
|
+
await postRuntimeApi(req.baseUrl, req.executorToken, {
|
|
4435
|
+
action: 'append_run_events',
|
|
4436
|
+
playId: req.runId,
|
|
4437
|
+
events,
|
|
4104
4438
|
});
|
|
4439
|
+
} catch (error) {
|
|
4440
|
+
pendingLedgerEvents = [...events, ...pendingLedgerEvents];
|
|
4441
|
+
throw error;
|
|
4442
|
+
}
|
|
4443
|
+
};
|
|
4444
|
+
|
|
4445
|
+
const orderedNodes = buildOrderedNodeList(req.contractSnapshot);
|
|
4446
|
+
const stepLifecycle =
|
|
4447
|
+
orderedNodes.length > 0
|
|
4448
|
+
? new PlayStepLifecycleTracker(
|
|
4449
|
+
orderedNodes,
|
|
4450
|
+
() => stepProgressByNodeId,
|
|
4451
|
+
appendStepLifecycleEvent,
|
|
4452
|
+
nowMs,
|
|
4453
|
+
)
|
|
4454
|
+
: null;
|
|
4455
|
+
const workerCallbacks: WorkerCtxCallbacks = {
|
|
4456
|
+
onNodeProgress: (input) => {
|
|
4457
|
+
updateStepProgress(input);
|
|
4458
|
+
flushLedgerEvents(false);
|
|
4459
|
+
},
|
|
4460
|
+
onMapStarted: (nodeId, at) => stepLifecycle?.onMapStarted(nodeId, at),
|
|
4461
|
+
onMapCompleted: (nodeId, at) => stepLifecycle?.onMapCompleted(nodeId, at),
|
|
4462
|
+
onToolCalled: (toolId, at) => stepLifecycle?.onToolCalled(toolId, at),
|
|
4463
|
+
onToolFailed: (toolId, at) => stepLifecycle?.onToolFailed(toolId, at),
|
|
4105
4464
|
};
|
|
4106
4465
|
|
|
4107
4466
|
const wrappedEmit = (event: RunnerEvent) => {
|
|
4108
4467
|
if (event.type === 'log') {
|
|
4109
|
-
|
|
4110
|
-
|
|
4468
|
+
appendRunLogLine(event.message);
|
|
4469
|
+
flushLedgerEvents(false);
|
|
4111
4470
|
} else if (event.type === 'error') {
|
|
4112
4471
|
// Sanitize the inbound message before it enters the live-log buffer.
|
|
4113
4472
|
// The downstream `emit` still receives the raw event so the console /
|
|
4114
4473
|
// NDJSON stream can keep its full debugging fidelity.
|
|
4115
4474
|
const sanitizedMessage = redactSecretsFromLogString(event.message);
|
|
4116
|
-
|
|
4117
|
-
|
|
4475
|
+
appendRunLogLine(`[error] ${sanitizedMessage}`);
|
|
4476
|
+
flushLedgerEvents(true);
|
|
4118
4477
|
}
|
|
4119
4478
|
emit(event);
|
|
4120
4479
|
};
|
|
4121
4480
|
|
|
4481
|
+
stepLifecycle?.markPreMapStepsStarted(startedAt);
|
|
4482
|
+
flushLedgerEvents(false);
|
|
4122
4483
|
const ctx = createMinimalWorkerCtx(
|
|
4123
4484
|
req,
|
|
4124
4485
|
wrappedEmit,
|
|
4125
4486
|
env,
|
|
4126
4487
|
workflowStep,
|
|
4127
4488
|
abortSignal,
|
|
4128
|
-
|
|
4489
|
+
workerCallbacks,
|
|
4129
4490
|
);
|
|
4130
4491
|
try {
|
|
4131
4492
|
const playStartedAt = nowMs();
|
|
@@ -4140,6 +4501,7 @@ async function executeRunRequest(
|
|
|
4140
4501
|
phase: 'runner.play_function',
|
|
4141
4502
|
ms: nowMs() - playStartedAt,
|
|
4142
4503
|
});
|
|
4504
|
+
stepLifecycle?.markAllTerminal(nowMs());
|
|
4143
4505
|
const serializeStartedAt = nowMs();
|
|
4144
4506
|
const serializedResult = serializePlayReturnValue(result);
|
|
4145
4507
|
recordRunnerPerfTrace({
|
|
@@ -4149,41 +4511,21 @@ async function executeRunRequest(
|
|
|
4149
4511
|
});
|
|
4150
4512
|
if (options?.persistResultDatasets) {
|
|
4151
4513
|
const persistStartedAt = nowMs();
|
|
4152
|
-
await
|
|
4514
|
+
await ledgerFlushInFlight.catch(() => undefined);
|
|
4153
4515
|
recordRunnerPerfTrace({
|
|
4154
4516
|
req,
|
|
4155
|
-
phase: 'runner.
|
|
4517
|
+
phase: 'runner.run_ledger_flush_wait',
|
|
4156
4518
|
ms: nowMs() - persistStartedAt,
|
|
4157
4519
|
});
|
|
4158
4520
|
const resultDatasetStartedAt = nowMs();
|
|
4159
|
-
await persistResultDatasets(req, serializedResult);
|
|
4521
|
+
await persistResultDatasets(req, result, serializedResult);
|
|
4160
4522
|
recordRunnerPerfTrace({
|
|
4161
4523
|
req,
|
|
4162
4524
|
phase: 'runner.persist_result_datasets',
|
|
4163
4525
|
ms: nowMs() - resultDatasetStartedAt,
|
|
4164
4526
|
});
|
|
4165
4527
|
const terminalResult = trimResultForStatus(serializedResult);
|
|
4166
|
-
const
|
|
4167
|
-
await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
|
|
4168
|
-
action: 'update_run_status',
|
|
4169
|
-
playId: req.runId,
|
|
4170
|
-
status: 'completed',
|
|
4171
|
-
error: null,
|
|
4172
|
-
result: terminalResult,
|
|
4173
|
-
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4174
|
-
waitKind: null,
|
|
4175
|
-
waitUntil: null,
|
|
4176
|
-
activeBoundaryId: null,
|
|
4177
|
-
liveLogs,
|
|
4178
|
-
liveNodeProgress: liveNodeProgressSnapshot(),
|
|
4179
|
-
lastCheckpointAt: nowMs(),
|
|
4180
|
-
});
|
|
4181
|
-
recordRunnerPerfTrace({
|
|
4182
|
-
req,
|
|
4183
|
-
phase: 'runner.terminal_status_update',
|
|
4184
|
-
ms: nowMs() - terminalUpdateStartedAt,
|
|
4185
|
-
});
|
|
4186
|
-
|
|
4528
|
+
const terminalOccurredAt = nowMs();
|
|
4187
4529
|
const billingStartedAt = nowMs();
|
|
4188
4530
|
await finalizeWorkerComputeBilling({
|
|
4189
4531
|
req,
|
|
@@ -4195,6 +4537,20 @@ async function executeRunRequest(
|
|
|
4195
4537
|
phase: 'runner.compute_billing_finalize',
|
|
4196
4538
|
ms: nowMs() - billingStartedAt,
|
|
4197
4539
|
});
|
|
4540
|
+
|
|
4541
|
+
const terminalUpdateStartedAt = nowMs();
|
|
4542
|
+
await flushTerminalLedgerEvents({
|
|
4543
|
+
type: 'run.completed',
|
|
4544
|
+
runId: req.runId,
|
|
4545
|
+
source: 'worker',
|
|
4546
|
+
occurredAt: terminalOccurredAt,
|
|
4547
|
+
result: terminalResult,
|
|
4548
|
+
});
|
|
4549
|
+
recordRunnerPerfTrace({
|
|
4550
|
+
req,
|
|
4551
|
+
phase: 'runner.terminal_ledger_append',
|
|
4552
|
+
ms: nowMs() - terminalUpdateStartedAt,
|
|
4553
|
+
});
|
|
4198
4554
|
}
|
|
4199
4555
|
const parentSignalStartedAt = nowMs();
|
|
4200
4556
|
await signalParentPlayTerminal({
|
|
@@ -4222,11 +4578,12 @@ async function executeRunRequest(
|
|
|
4222
4578
|
playName: req.playName,
|
|
4223
4579
|
result: serializedResult,
|
|
4224
4580
|
outputRows: inferOutputRows(serializedResult),
|
|
4225
|
-
liveLogs,
|
|
4226
|
-
liveNodeProgress:
|
|
4581
|
+
liveLogs: runLogBuffer,
|
|
4582
|
+
liveNodeProgress: stepProgressSnapshot(),
|
|
4227
4583
|
durationMs: nowMs() - startedAt,
|
|
4228
4584
|
};
|
|
4229
4585
|
} catch (error) {
|
|
4586
|
+
stepLifecycle?.markStartedFailed(nowMs());
|
|
4230
4587
|
const aborted = isAbortLikeError(error);
|
|
4231
4588
|
if (aborted) {
|
|
4232
4589
|
// Flip the controller so any concurrent user code observes the abort
|
|
@@ -4237,19 +4594,15 @@ async function executeRunRequest(
|
|
|
4237
4594
|
}
|
|
4238
4595
|
const message = error instanceof Error ? error.message : String(error);
|
|
4239
4596
|
if (options?.persistResultDatasets) {
|
|
4240
|
-
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4597
|
+
appendRunLogLine(
|
|
4598
|
+
`${aborted ? '[cancelled]' : '[error]'} ${redactSecretsFromLogString(message)}`,
|
|
4599
|
+
);
|
|
4600
|
+
await flushTerminalLedgerEvents({
|
|
4601
|
+
type: aborted ? 'run.cancelled' : 'run.failed',
|
|
4602
|
+
runId: req.runId,
|
|
4603
|
+
source: 'worker',
|
|
4604
|
+
occurredAt: nowMs(),
|
|
4245
4605
|
error: message,
|
|
4246
|
-
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4247
|
-
waitKind: null,
|
|
4248
|
-
waitUntil: null,
|
|
4249
|
-
activeBoundaryId: null,
|
|
4250
|
-
liveLogs,
|
|
4251
|
-
liveNodeProgress: liveNodeProgressSnapshot(),
|
|
4252
|
-
lastCheckpointAt: nowMs(),
|
|
4253
4606
|
});
|
|
4254
4607
|
await finalizeWorkerComputeBilling({
|
|
4255
4608
|
req,
|
|
@@ -4338,6 +4691,12 @@ function runRequestFromWorkflowParams(
|
|
|
4338
4691
|
): RunRequest {
|
|
4339
4692
|
const inputFile = isRecord(params.inputFile) ? params.inputFile : null;
|
|
4340
4693
|
const fileName = String(inputFile?.name ?? inputFile?.path ?? 'input.csv');
|
|
4694
|
+
const inputStorageKey =
|
|
4695
|
+
typeof inputFile?.r2Key === 'string'
|
|
4696
|
+
? inputFile.r2Key
|
|
4697
|
+
: typeof inputFile?.storageKey === 'string'
|
|
4698
|
+
? inputFile.storageKey
|
|
4699
|
+
: null;
|
|
4341
4700
|
return {
|
|
4342
4701
|
runId: String(params.runId ?? ''),
|
|
4343
4702
|
callbackUrl: String(params.baseUrl ?? ''),
|
|
@@ -4350,14 +4709,28 @@ function runRequestFromWorkflowParams(
|
|
|
4350
4709
|
? (params.input as Record<string, unknown>)
|
|
4351
4710
|
: {},
|
|
4352
4711
|
inlineCsv: isInlineCsv(params.inlineCsv) ? params.inlineCsv : null,
|
|
4353
|
-
|
|
4354
|
-
inputFile &&
|
|
4355
|
-
? {
|
|
4712
|
+
inputFiles:
|
|
4713
|
+
inputFile && inputStorageKey
|
|
4714
|
+
? {
|
|
4715
|
+
[fileName]: {
|
|
4716
|
+
logicalPath: String(inputFile.logicalPath ?? inputFile.path ?? fileName),
|
|
4717
|
+
fileName,
|
|
4718
|
+
storageKey: inputStorageKey,
|
|
4719
|
+
contentType:
|
|
4720
|
+
typeof inputFile.contentType === 'string'
|
|
4721
|
+
? inputFile.contentType
|
|
4722
|
+
: null,
|
|
4723
|
+
bytes: normalizeExpectedBytes(inputFile.bytes),
|
|
4724
|
+
},
|
|
4725
|
+
}
|
|
4356
4726
|
: null,
|
|
4357
4727
|
packagedFiles: Array.isArray(params.packagedFiles)
|
|
4358
4728
|
? params.packagedFiles.filter(isRecord).map((file) => ({
|
|
4359
4729
|
playPath: String(file.playPath ?? ''),
|
|
4360
4730
|
storageKey: String(file.storageKey ?? ''),
|
|
4731
|
+
contentType:
|
|
4732
|
+
typeof file.contentType === 'string' ? file.contentType : null,
|
|
4733
|
+
bytes: normalizeExpectedBytes(file.bytes),
|
|
4361
4734
|
}))
|
|
4362
4735
|
: null,
|
|
4363
4736
|
partitionRange: null,
|
|
@@ -4425,11 +4798,39 @@ function isPlayCallGovernanceSnapshot(
|
|
|
4425
4798
|
async function persistResultDatasets(
|
|
4426
4799
|
req: RunRequest,
|
|
4427
4800
|
result: unknown,
|
|
4801
|
+
serializedResult: unknown,
|
|
4428
4802
|
): Promise<void> {
|
|
4429
|
-
const
|
|
4803
|
+
const persistedNamespaces = new Set<string>();
|
|
4804
|
+
for (const dataset of collectDatasetHandles(result)) {
|
|
4805
|
+
if (dataset.datasetKind === 'map') continue;
|
|
4806
|
+
let inputOffset = 0;
|
|
4807
|
+
for await (const chunk of iterDatasetChunks(
|
|
4808
|
+
dataset.handle,
|
|
4809
|
+
RESULT_DATASET_PERSIST_CHUNK_ROWS,
|
|
4810
|
+
)) {
|
|
4811
|
+
if (chunk.length === 0) continue;
|
|
4812
|
+
await harnessStartSheetDataset({
|
|
4813
|
+
baseUrl: req.baseUrl,
|
|
4814
|
+
executorToken: req.executorToken,
|
|
4815
|
+
playName: req.playName,
|
|
4816
|
+
tableNamespace: dataset.tableNamespace,
|
|
4817
|
+
sheetContract: requireSheetContract(req, dataset.tableNamespace),
|
|
4818
|
+
rows: chunk.map((row) => ({ ...row })),
|
|
4819
|
+
runId: req.runId,
|
|
4820
|
+
inputOffset,
|
|
4821
|
+
userEmail: req.userEmail,
|
|
4822
|
+
preloadedDbSessions: req.preloadedDbSessions ?? null,
|
|
4823
|
+
});
|
|
4824
|
+
inputOffset += chunk.length;
|
|
4825
|
+
}
|
|
4826
|
+
persistedNamespaces.add(dataset.tableNamespace);
|
|
4827
|
+
}
|
|
4828
|
+
|
|
4829
|
+
const datasets = collectDatasetEnvelopes(serializedResult);
|
|
4430
4830
|
for (const dataset of datasets) {
|
|
4431
4831
|
if (dataset.datasetKind === 'map') continue;
|
|
4432
4832
|
if (dataset.rows.length === 0) continue;
|
|
4833
|
+
if (persistedNamespaces.has(dataset.tableNamespace)) continue;
|
|
4433
4834
|
await harnessStartSheetDataset({
|
|
4434
4835
|
baseUrl: req.baseUrl,
|
|
4435
4836
|
executorToken: req.executorToken,
|
|
@@ -4438,12 +4839,63 @@ async function persistResultDatasets(
|
|
|
4438
4839
|
sheetContract: requireSheetContract(req, dataset.tableNamespace),
|
|
4439
4840
|
rows: dataset.rows,
|
|
4440
4841
|
runId: req.runId,
|
|
4842
|
+
inputOffset: 0,
|
|
4441
4843
|
userEmail: req.userEmail,
|
|
4442
4844
|
preloadedDbSessions: req.preloadedDbSessions ?? null,
|
|
4443
4845
|
});
|
|
4444
4846
|
}
|
|
4445
4847
|
}
|
|
4446
4848
|
|
|
4849
|
+
const RESULT_DATASET_PERSIST_CHUNK_ROWS = 5_000;
|
|
4850
|
+
|
|
4851
|
+
function collectDatasetHandles(value: unknown): Array<{
|
|
4852
|
+
tableNamespace: string;
|
|
4853
|
+
datasetKind: 'csv' | 'map' | null;
|
|
4854
|
+
handle: WorkerDatasetHandle<Record<string, unknown>>;
|
|
4855
|
+
}> {
|
|
4856
|
+
const datasets: Array<{
|
|
4857
|
+
tableNamespace: string;
|
|
4858
|
+
datasetKind: 'csv' | 'map' | null;
|
|
4859
|
+
handle: WorkerDatasetHandle<Record<string, unknown>>;
|
|
4860
|
+
}> = [];
|
|
4861
|
+
const seen = new WeakSet<object>();
|
|
4862
|
+
const walk = (candidate: unknown, depth: number) => {
|
|
4863
|
+
if (depth > 12 || candidate == null) return;
|
|
4864
|
+
if (isDatasetHandle(candidate)) {
|
|
4865
|
+
const metadata = candidate.toJSON() as Record<string, unknown>;
|
|
4866
|
+
const tableNamespace =
|
|
4867
|
+
typeof metadata.tableNamespace === 'string'
|
|
4868
|
+
? metadata.tableNamespace
|
|
4869
|
+
: null;
|
|
4870
|
+
const datasetKind =
|
|
4871
|
+
metadata.datasetKind === 'csv' || metadata.datasetKind === 'map'
|
|
4872
|
+
? metadata.datasetKind
|
|
4873
|
+
: null;
|
|
4874
|
+
if (tableNamespace) {
|
|
4875
|
+
datasets.push({
|
|
4876
|
+
tableNamespace,
|
|
4877
|
+
datasetKind,
|
|
4878
|
+
handle: candidate as WorkerDatasetHandle<Record<string, unknown>>,
|
|
4879
|
+
});
|
|
4880
|
+
}
|
|
4881
|
+
return;
|
|
4882
|
+
}
|
|
4883
|
+
if (Array.isArray(candidate)) {
|
|
4884
|
+
for (const item of candidate) walk(item, depth + 1);
|
|
4885
|
+
return;
|
|
4886
|
+
}
|
|
4887
|
+
if (typeof candidate !== 'object') return;
|
|
4888
|
+
const object = candidate as Record<string, unknown>;
|
|
4889
|
+
if (seen.has(object)) return;
|
|
4890
|
+
seen.add(object);
|
|
4891
|
+
for (const child of Object.values(object)) {
|
|
4892
|
+
walk(child, depth + 1);
|
|
4893
|
+
}
|
|
4894
|
+
};
|
|
4895
|
+
walk(value, 0);
|
|
4896
|
+
return datasets;
|
|
4897
|
+
}
|
|
4898
|
+
|
|
4447
4899
|
function serializePlayReturnValue(value: unknown): unknown {
|
|
4448
4900
|
return serializeValue(value, 0);
|
|
4449
4901
|
}
|
|
@@ -4498,64 +4950,10 @@ function trimResultShape(value: unknown): unknown {
|
|
|
4498
4950
|
|
|
4499
4951
|
function serializeValue(value: unknown, depth: number): unknown {
|
|
4500
4952
|
if (depth > 20 || value == null) return value;
|
|
4953
|
+
if (isDatasetHandle(value)) {
|
|
4954
|
+
return serializeValue(value.toJSON(), depth + 1);
|
|
4955
|
+
}
|
|
4501
4956
|
if (Array.isArray(value)) {
|
|
4502
|
-
const tableNamespace =
|
|
4503
|
-
typeof (value as unknown as { tableNamespace?: unknown })
|
|
4504
|
-
.tableNamespace === 'string'
|
|
4505
|
-
? (value as unknown as { tableNamespace: string }).tableNamespace
|
|
4506
|
-
: null;
|
|
4507
|
-
const datasetId =
|
|
4508
|
-
typeof (value as unknown as { datasetId?: unknown }).datasetId ===
|
|
4509
|
-
'string'
|
|
4510
|
-
? (value as unknown as { datasetId: string }).datasetId
|
|
4511
|
-
: null;
|
|
4512
|
-
const datasetCount =
|
|
4513
|
-
typeof (value as unknown as { __deeplineDatasetCount?: unknown })
|
|
4514
|
-
.__deeplineDatasetCount === 'number'
|
|
4515
|
-
? (value as unknown as { __deeplineDatasetCount: number })
|
|
4516
|
-
.__deeplineDatasetCount
|
|
4517
|
-
: value.length;
|
|
4518
|
-
const datasetKind =
|
|
4519
|
-
(value as unknown as { __deeplineDatasetKind?: unknown })
|
|
4520
|
-
.__deeplineDatasetKind === 'csv'
|
|
4521
|
-
? 'csv'
|
|
4522
|
-
: 'map';
|
|
4523
|
-
const cacheSummary =
|
|
4524
|
-
typeof (value as unknown as { __deeplineCacheSummary?: unknown })
|
|
4525
|
-
.__deeplineCacheSummary === 'string'
|
|
4526
|
-
? (value as unknown as { __deeplineCacheSummary: string })
|
|
4527
|
-
.__deeplineCacheSummary
|
|
4528
|
-
: null;
|
|
4529
|
-
const workProgress = isRecord(
|
|
4530
|
-
(value as unknown as { __deeplineWorkProgress?: unknown })
|
|
4531
|
-
.__deeplineWorkProgress,
|
|
4532
|
-
)
|
|
4533
|
-
? (
|
|
4534
|
-
value as unknown as {
|
|
4535
|
-
__deeplineWorkProgress: Record<string, unknown>;
|
|
4536
|
-
}
|
|
4537
|
-
).__deeplineWorkProgress
|
|
4538
|
-
: null;
|
|
4539
|
-
const previewRows = value
|
|
4540
|
-
.slice(0, 5)
|
|
4541
|
-
.map((row) => serializeValue(row, depth + 1))
|
|
4542
|
-
.filter(isRecord);
|
|
4543
|
-
if (tableNamespace && datasetId) {
|
|
4544
|
-
const columns = inferColumns(
|
|
4545
|
-
value.map((row) => serializeValue(row, depth + 1)).filter(isRecord),
|
|
4546
|
-
);
|
|
4547
|
-
return {
|
|
4548
|
-
kind: 'dataset' as const,
|
|
4549
|
-
datasetKind,
|
|
4550
|
-
datasetId,
|
|
4551
|
-
count: datasetCount,
|
|
4552
|
-
columns,
|
|
4553
|
-
preview: previewRows,
|
|
4554
|
-
tableNamespace,
|
|
4555
|
-
...(cacheSummary ? { cacheSummary } : {}),
|
|
4556
|
-
...(workProgress ? { _metadata: { workProgress } } : {}),
|
|
4557
|
-
};
|
|
4558
|
-
}
|
|
4559
4957
|
return value.map((entry) => serializeValue(entry, depth + 1));
|
|
4560
4958
|
}
|
|
4561
4959
|
if (typeof value !== 'object') return value;
|
|
@@ -4566,16 +4964,6 @@ function serializeValue(value: unknown, depth: number): unknown {
|
|
|
4566
4964
|
return out;
|
|
4567
4965
|
}
|
|
4568
4966
|
|
|
4569
|
-
function inferColumns(rows: ReadonlyArray<Record<string, unknown>>): string[] {
|
|
4570
|
-
const columns = new Set<string>();
|
|
4571
|
-
for (const row of rows) {
|
|
4572
|
-
for (const key of Object.keys(row)) {
|
|
4573
|
-
columns.add(key);
|
|
4574
|
-
}
|
|
4575
|
-
}
|
|
4576
|
-
return [...columns];
|
|
4577
|
-
}
|
|
4578
|
-
|
|
4579
4967
|
function collectDatasetEnvelopes(value: unknown): Array<{
|
|
4580
4968
|
tableNamespace: string;
|
|
4581
4969
|
datasetKind: 'csv' | 'map' | null;
|
|
@@ -4714,10 +5102,17 @@ export class TenantWorkflow extends WorkflowEntrypoint<
|
|
|
4714
5102
|
// user via tail/SSE. Retry with backoff before giving up; if we drop
|
|
4715
5103
|
// it, the user is stuck staring at the opaque CF reference id.
|
|
4716
5104
|
const errorPayload = JSON.stringify({
|
|
4717
|
-
action: '
|
|
5105
|
+
action: 'append_run_events',
|
|
4718
5106
|
playId: req.runId,
|
|
4719
|
-
|
|
4720
|
-
|
|
5107
|
+
events: [
|
|
5108
|
+
{
|
|
5109
|
+
type: 'run.failed',
|
|
5110
|
+
runId: req.runId,
|
|
5111
|
+
source: 'worker',
|
|
5112
|
+
occurredAt: nowMs(),
|
|
5113
|
+
error: `TenantWorkflow.run threw: ${detail.name ?? 'Error'}: ${detail.message}\n${detail.stack ?? ''}`,
|
|
5114
|
+
} satisfies PlayRunLedgerEvent,
|
|
5115
|
+
],
|
|
4721
5116
|
});
|
|
4722
5117
|
const backoffMs = [200, 500, 1500];
|
|
4723
5118
|
let lastCallbackError: unknown = null;
|
|
@@ -4850,22 +5245,18 @@ function inferOutputRows(result: unknown): number {
|
|
|
4850
5245
|
const datasets: number[] = [];
|
|
4851
5246
|
const walk = (value: unknown, depth: number) => {
|
|
4852
5247
|
if (depth > 6 || value == null) return;
|
|
5248
|
+
if (isDatasetHandle(value)) {
|
|
5249
|
+
datasets.push(value.toJSON().count);
|
|
5250
|
+
return;
|
|
5251
|
+
}
|
|
4853
5252
|
if (Array.isArray(value)) {
|
|
4854
5253
|
for (const item of value) walk(item, depth + 1);
|
|
4855
5254
|
return;
|
|
4856
5255
|
}
|
|
4857
5256
|
if (typeof value !== 'object') return;
|
|
4858
5257
|
const record = value as Record<string, unknown>;
|
|
4859
|
-
if (
|
|
4860
|
-
|
|
4861
|
-
(typeof record.count === 'number' ||
|
|
4862
|
-
typeof record.__deeplineDatasetCount === 'number')
|
|
4863
|
-
) {
|
|
4864
|
-
datasets.push(
|
|
4865
|
-
typeof record.count === 'number'
|
|
4866
|
-
? record.count
|
|
4867
|
-
: Number(record.__deeplineDatasetCount),
|
|
4868
|
-
);
|
|
5258
|
+
if (typeof record.tableNamespace === 'string' && typeof record.count === 'number') {
|
|
5259
|
+
datasets.push(record.count);
|
|
4869
5260
|
}
|
|
4870
5261
|
for (const [key, child] of Object.entries(record)) {
|
|
4871
5262
|
if (key === 'preview') continue;
|