deepline 0.1.25 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +118 -85
- package/dist/cli/index.mjs +103 -69
- package/dist/index.d.mts +3 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +2 -2
- package/dist/index.mjs +2 -2
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +434 -102
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +6 -1
- package/dist/repo/apps/play-runner-workers/src/entry.ts +1169 -719
- package/dist/repo/apps/play-runner-workers/src/runtime/dataset-handles.ts +418 -0
- package/dist/repo/sdk/src/client.ts +5 -1
- package/dist/repo/sdk/src/plays/bundle-play-file.ts +1 -1
- package/dist/repo/sdk/src/plays/harness-stub.ts +25 -55
- package/dist/repo/sdk/src/version.ts +1 -1
- package/dist/repo/shared_libs/play-runtime/execution-plan.ts +18 -8
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +5 -4
- package/dist/repo/shared_libs/play-runtime/step-lifecycle-tracker.ts +228 -0
- package/dist/repo/shared_libs/plays/bundling/index.ts +90 -51
- package/package.json +1 -1
- package/dist/repo/shared_libs/play-runtime/runtime-actions.ts +0 -208
|
@@ -63,12 +63,37 @@ import {
|
|
|
63
63
|
derivePlayRowIdentityFromKey,
|
|
64
64
|
} from '../../../shared_libs/plays/row-identity';
|
|
65
65
|
import {
|
|
66
|
+
getTopLevelPipelineSubsteps,
|
|
66
67
|
getCompiledPipelineSubsteps,
|
|
67
68
|
flattenStaticPipeline,
|
|
68
69
|
resolveSheetContractForTableNamespace,
|
|
69
70
|
sqlSafePlayColumnName,
|
|
71
|
+
type PlayStaticSubstep,
|
|
70
72
|
type PlayStaticPipeline,
|
|
73
|
+
type PlaySheetContract,
|
|
71
74
|
} from '../../../shared_libs/plays/static-pipeline';
|
|
75
|
+
import {
|
|
76
|
+
PlayStepLifecycleTracker,
|
|
77
|
+
type PlayStepLifecycleEvent,
|
|
78
|
+
} from '../../../shared_libs/play-runtime/step-lifecycle-tracker';
|
|
79
|
+
import type {
|
|
80
|
+
PlayRunLedgerEvent,
|
|
81
|
+
PlayRunLedgerStepProgress,
|
|
82
|
+
PlayRunLedgerStepStatus,
|
|
83
|
+
} from '../../../shared_libs/play-runtime/run-ledger';
|
|
84
|
+
import {
|
|
85
|
+
createCsvDatasetHandle,
|
|
86
|
+
createInlineDatasetHandle,
|
|
87
|
+
createMaterializedDatasetHandle,
|
|
88
|
+
createPersistedDatasetHandle,
|
|
89
|
+
datasetRowCountHint,
|
|
90
|
+
isDatasetHandle,
|
|
91
|
+
iterDatasetChunks,
|
|
92
|
+
WORKER_DATASET_IN_MEMORY_ROWS,
|
|
93
|
+
WORKER_DATASET_PREVIEW_ROWS,
|
|
94
|
+
type WorkerDatasetHandle,
|
|
95
|
+
type WorkerDatasetInput,
|
|
96
|
+
} from './runtime/dataset-handles';
|
|
72
97
|
// The harness stub forwards leaf calls (validation, runtime-api HTTP) into
|
|
73
98
|
// the long-lived Play Harness Worker via env.HARNESS. We import the
|
|
74
99
|
// `setHarnessBinding` setter eagerly so it's available the moment
|
|
@@ -80,9 +105,9 @@ import {
|
|
|
80
105
|
// modules without going through this stub is how we'd accidentally
|
|
81
106
|
// re-bundle harness internals into per-play. Keep that in mind.
|
|
82
107
|
import {
|
|
83
|
-
harnessFetchStagedFile,
|
|
84
108
|
harnessPersistCompletedSheetRows,
|
|
85
|
-
|
|
109
|
+
harnessReadSheetDatasetRows,
|
|
110
|
+
harnessReadStagedFileChunk,
|
|
86
111
|
harnessStartSheetDataset,
|
|
87
112
|
setHarnessBinding,
|
|
88
113
|
} from '../../../sdk/src/plays/harness-stub';
|
|
@@ -115,12 +140,14 @@ type RunRequest = {
|
|
|
115
140
|
runtimeInput: Record<string, unknown>;
|
|
116
141
|
/** Optional inline CSV rows (for plays where ctx.csv was passed inline data). */
|
|
117
142
|
inlineCsv?: { name: string; rows: Record<string, unknown>[] } | null;
|
|
118
|
-
/**
|
|
119
|
-
|
|
143
|
+
/** Staged input files keyed by logical filename (used by ctx.csv). */
|
|
144
|
+
inputFiles?: Record<string, WorkerFileRef> | null;
|
|
120
145
|
/** Files packaged with the play artifact (relative-path imports). */
|
|
121
146
|
packagedFiles?: Array<{
|
|
122
147
|
playPath: string;
|
|
123
148
|
storageKey: string;
|
|
149
|
+
contentType?: string | null;
|
|
150
|
+
bytes?: number | null;
|
|
124
151
|
}> | null;
|
|
125
152
|
/** Partition fan-out: only process rows[start..end) of a sliced dataset. */
|
|
126
153
|
partitionRange?: { start: number; end: number } | null;
|
|
@@ -148,6 +175,14 @@ type RunRequest = {
|
|
|
148
175
|
totalRows?: number;
|
|
149
176
|
};
|
|
150
177
|
|
|
178
|
+
type WorkerFileRef = {
|
|
179
|
+
logicalPath: string;
|
|
180
|
+
fileName: string;
|
|
181
|
+
storageKey: string;
|
|
182
|
+
contentType?: string | null;
|
|
183
|
+
bytes?: number | null;
|
|
184
|
+
};
|
|
185
|
+
|
|
151
186
|
const EXECUTE_TOOL_METADATA_HEADER = 'x-deepline-include-tool-metadata';
|
|
152
187
|
|
|
153
188
|
/** R2 binding injected by the Worker runtime (when present in deploy metadata). */
|
|
@@ -315,6 +350,7 @@ async function probeHarnessOnce(
|
|
|
315
350
|
*/
|
|
316
351
|
const RUNTIME_API_TIMEOUT_MS = 30_000;
|
|
317
352
|
const RUNTIME_API_PLAY_RUN_TIMEOUT_MS = 75_000;
|
|
353
|
+
const RUNTIME_API_RETRY_DELAYS_MS = [250, 750, 1500] as const;
|
|
318
354
|
let loggedMissingRuntimeApiBinding = false;
|
|
319
355
|
|
|
320
356
|
async function fetchRuntimeApi(
|
|
@@ -383,132 +419,6 @@ const WORKER_PLAY_CALL_LIMITS = {
|
|
|
383
419
|
maxConcurrentPlayCalls: 16,
|
|
384
420
|
};
|
|
385
421
|
|
|
386
|
-
/**
|
|
387
|
-
* Produces a dataset-envelope-shaped object compatible with the legacy
|
|
388
|
-
* SerializedPlayDataset shape (kind/datasetKind/count/columns/preview) so
|
|
389
|
-
* tests + assertions that probe `result.rows.columns` etc. work without the
|
|
390
|
-
* ctx changing semantics. Plays still iterate rows via array semantics.
|
|
391
|
-
*/
|
|
392
|
-
function makeWorkerDataset<T extends Record<string, unknown>>(
|
|
393
|
-
name: string,
|
|
394
|
-
rows: T[],
|
|
395
|
-
options?: {
|
|
396
|
-
count?: number;
|
|
397
|
-
datasetKind?: 'csv' | 'map';
|
|
398
|
-
cacheSummary?: string | null;
|
|
399
|
-
workProgress?: {
|
|
400
|
-
total: number;
|
|
401
|
-
executed: number;
|
|
402
|
-
reused: number;
|
|
403
|
-
skipped: number;
|
|
404
|
-
pending: number;
|
|
405
|
-
failed: number;
|
|
406
|
-
degraded?: boolean;
|
|
407
|
-
duplicates?: {
|
|
408
|
-
exact?: number;
|
|
409
|
-
semantic?: number;
|
|
410
|
-
rejected?: number;
|
|
411
|
-
};
|
|
412
|
-
};
|
|
413
|
-
},
|
|
414
|
-
): T[] & {
|
|
415
|
-
count(): Promise<number>;
|
|
416
|
-
peek(limit?: number): Promise<T[]>;
|
|
417
|
-
materialize(limit?: number): Promise<T[]>;
|
|
418
|
-
toJSON(): unknown;
|
|
419
|
-
datasetId: string;
|
|
420
|
-
tableNamespace: string;
|
|
421
|
-
} {
|
|
422
|
-
const datasetId = `map:${name}`;
|
|
423
|
-
const count = Math.max(0, Math.floor(options?.count ?? rows.length));
|
|
424
|
-
const datasetKind = options?.datasetKind ?? 'map';
|
|
425
|
-
const cacheSummary = options?.cacheSummary ?? null;
|
|
426
|
-
const workProgress = options?.workProgress;
|
|
427
|
-
// Build the array result. JSON.stringify on arrays calls toJSON only if
|
|
428
|
-
// present on the array itself — we attach below. The dataset metadata is
|
|
429
|
-
// also exposed via own properties so plays can `enriched.count()` etc.
|
|
430
|
-
const arr = rows as T[] & {
|
|
431
|
-
count(): Promise<number>;
|
|
432
|
-
peek(limit?: number): Promise<T[]>;
|
|
433
|
-
materialize(limit?: number): Promise<T[]>;
|
|
434
|
-
toJSON(): unknown;
|
|
435
|
-
datasetId: string;
|
|
436
|
-
tableNamespace: string;
|
|
437
|
-
};
|
|
438
|
-
const previewLimit = 5;
|
|
439
|
-
const inferredColumns = (() => {
|
|
440
|
-
const cols = new Set<string>();
|
|
441
|
-
for (const r of rows) {
|
|
442
|
-
for (const k of Object.keys(r)) cols.add(k);
|
|
443
|
-
}
|
|
444
|
-
return [...cols];
|
|
445
|
-
})();
|
|
446
|
-
Object.defineProperty(arr, 'count', {
|
|
447
|
-
value: async () => count,
|
|
448
|
-
enumerable: false,
|
|
449
|
-
});
|
|
450
|
-
Object.defineProperty(arr, 'peek', {
|
|
451
|
-
value: async (limit = previewLimit) => rows.slice(0, Math.max(0, limit)),
|
|
452
|
-
enumerable: false,
|
|
453
|
-
});
|
|
454
|
-
Object.defineProperty(arr, 'materialize', {
|
|
455
|
-
value: async (limit?: number) =>
|
|
456
|
-
limit === undefined ? [...rows] : rows.slice(0, Math.max(0, limit)),
|
|
457
|
-
enumerable: false,
|
|
458
|
-
});
|
|
459
|
-
Object.defineProperty(arr, 'datasetId', {
|
|
460
|
-
value: datasetId,
|
|
461
|
-
enumerable: true,
|
|
462
|
-
});
|
|
463
|
-
Object.defineProperty(arr, 'tableNamespace', {
|
|
464
|
-
value: name,
|
|
465
|
-
enumerable: true,
|
|
466
|
-
});
|
|
467
|
-
Object.defineProperty(arr, '__deeplineDatasetCount', {
|
|
468
|
-
value: count,
|
|
469
|
-
enumerable: false,
|
|
470
|
-
});
|
|
471
|
-
Object.defineProperty(arr, '__deeplineDatasetKind', {
|
|
472
|
-
value: datasetKind,
|
|
473
|
-
enumerable: false,
|
|
474
|
-
});
|
|
475
|
-
Object.defineProperty(arr, '__deeplineCacheSummary', {
|
|
476
|
-
value: cacheSummary,
|
|
477
|
-
enumerable: false,
|
|
478
|
-
});
|
|
479
|
-
Object.defineProperty(arr, '__deeplineWorkProgress', {
|
|
480
|
-
value: workProgress,
|
|
481
|
-
enumerable: false,
|
|
482
|
-
});
|
|
483
|
-
// Plays often `return { rows: dataset, count: N }`. JSON.stringify on the
|
|
484
|
-
// array would normally produce `[row, row, ...]` — we want the dataset
|
|
485
|
-
// envelope shape instead so assertions seeing `result.rows.columns` pass.
|
|
486
|
-
// toJSON on an array is honored by JSON.stringify per ES spec.
|
|
487
|
-
// toJSON includes ALL rows so the workflow DO can persist the full
|
|
488
|
-
// dataset to the sheet table. We clone via plain-object copy to avoid
|
|
489
|
-
// re-entrant toJSON resolution (the dataset IS an array; passing it back
|
|
490
|
-
// via `preview: arr` would recurse forever through this same toJSON).
|
|
491
|
-
Object.defineProperty(arr, 'toJSON', {
|
|
492
|
-
value: () => {
|
|
493
|
-
const plainRows = rows.map((r) => ({ ...r }));
|
|
494
|
-
return {
|
|
495
|
-
kind: 'dataset' as const,
|
|
496
|
-
datasetKind,
|
|
497
|
-
datasetId,
|
|
498
|
-
count,
|
|
499
|
-
columns: inferredColumns,
|
|
500
|
-
preview: plainRows,
|
|
501
|
-
tableNamespace: name,
|
|
502
|
-
...(cacheSummary ? { cacheSummary } : {}),
|
|
503
|
-
...(workProgress ? { _metadata: { workProgress } } : {}),
|
|
504
|
-
};
|
|
505
|
-
},
|
|
506
|
-
enumerable: false,
|
|
507
|
-
});
|
|
508
|
-
void previewLimit;
|
|
509
|
-
return arr;
|
|
510
|
-
}
|
|
511
|
-
|
|
512
422
|
type RunnerEvent =
|
|
513
423
|
| {
|
|
514
424
|
type: 'log';
|
|
@@ -533,12 +443,53 @@ type WorkerCtxCallbacks = {
|
|
|
533
443
|
nodeId: string;
|
|
534
444
|
progress: LiveNodeProgressSnapshot;
|
|
535
445
|
}) => void;
|
|
446
|
+
onMapStarted?: (nodeId: string, at?: number) => void;
|
|
447
|
+
onMapCompleted?: (nodeId: string, at?: number) => void;
|
|
448
|
+
onToolCalled?: (toolId: string, at?: number) => void;
|
|
449
|
+
onToolFailed?: (toolId: string, at?: number) => void;
|
|
536
450
|
};
|
|
537
451
|
|
|
538
452
|
function nowMs(): number {
|
|
539
453
|
return Date.now();
|
|
540
454
|
}
|
|
541
455
|
|
|
456
|
+
function getStaticSubstepNodeId(
|
|
457
|
+
substep: PlayStaticSubstep,
|
|
458
|
+
index: number,
|
|
459
|
+
): string {
|
|
460
|
+
switch (substep.type) {
|
|
461
|
+
case 'csv':
|
|
462
|
+
return `csv:${substep.field || index}`;
|
|
463
|
+
case 'map':
|
|
464
|
+
return `map:${substep.tableNamespace ?? substep.field}`;
|
|
465
|
+
case 'tool':
|
|
466
|
+
return `tool:${substep.field}:${substep.toolId}`;
|
|
467
|
+
case 'waterfall':
|
|
468
|
+
return `waterfall:${substep.id ?? substep.field}`;
|
|
469
|
+
case 'play_call':
|
|
470
|
+
return `play_call:${substep.field}:${substep.playId}`;
|
|
471
|
+
case 'run_javascript':
|
|
472
|
+
return `run_javascript:${substep.alias}`;
|
|
473
|
+
case 'code':
|
|
474
|
+
return `code:${substep.field || index}`;
|
|
475
|
+
default:
|
|
476
|
+
return `node:${index}`;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
function buildOrderedNodeList(
|
|
481
|
+
contractSnapshot: unknown,
|
|
482
|
+
): Array<{ nodeId: string; type: string }> {
|
|
483
|
+
const snapshot = isRecord(contractSnapshot) ? contractSnapshot : null;
|
|
484
|
+
const substeps = getTopLevelPipelineSubsteps(
|
|
485
|
+
(snapshot?.staticPipeline as PlayStaticPipeline | null | undefined) ?? null,
|
|
486
|
+
);
|
|
487
|
+
return substeps.map((substep, index) => ({
|
|
488
|
+
nodeId: getStaticSubstepNodeId(substep, index),
|
|
489
|
+
type: substep.type,
|
|
490
|
+
}));
|
|
491
|
+
}
|
|
492
|
+
|
|
542
493
|
function recordRunnerPerfTrace(input: {
|
|
543
494
|
req: RunRequest;
|
|
544
495
|
phase: string;
|
|
@@ -557,7 +508,7 @@ function recordRunnerPerfTrace(input: {
|
|
|
557
508
|
source: 'dynamic_worker' as const,
|
|
558
509
|
runId: input.req.runId,
|
|
559
510
|
phase: `runner.${input.phase}`,
|
|
560
|
-
|
|
511
|
+
ms: input.ms ?? 0,
|
|
561
512
|
...(input.extra ?? {}),
|
|
562
513
|
};
|
|
563
514
|
console.log(
|
|
@@ -614,44 +565,83 @@ async function postRuntimeApi<T>(
|
|
|
614
565
|
// Routes through the in-process RUNTIME_API binding when present; otherwise
|
|
615
566
|
// falls back to a public fetch against `${baseUrl}${path}`. Either path
|
|
616
567
|
// hits the same handler with the same auth — only the transport changes.
|
|
617
|
-
const
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
568
|
+
const serializedBody = JSON.stringify(body);
|
|
569
|
+
let lastError: unknown = null;
|
|
570
|
+
for (
|
|
571
|
+
let attempt = 0;
|
|
572
|
+
attempt <= RUNTIME_API_RETRY_DELAYS_MS.length;
|
|
573
|
+
attempt += 1
|
|
574
|
+
) {
|
|
575
|
+
let res: Response;
|
|
576
|
+
try {
|
|
577
|
+
res = await fetchRuntimeApi(baseUrl, '/api/v2/plays/internal/runtime', {
|
|
578
|
+
method: 'POST',
|
|
579
|
+
headers: {
|
|
580
|
+
'content-type': 'application/json',
|
|
581
|
+
authorization: `Bearer ${executorToken}`,
|
|
582
|
+
'x-deepline-request-id': makeRequestId(),
|
|
583
|
+
},
|
|
584
|
+
body: serializedBody,
|
|
585
|
+
});
|
|
586
|
+
} catch (error) {
|
|
587
|
+
lastError = error;
|
|
588
|
+
if (
|
|
589
|
+
attempt >= RUNTIME_API_RETRY_DELAYS_MS.length ||
|
|
590
|
+
!isRetryableRuntimeApiError(error)
|
|
591
|
+
) {
|
|
592
|
+
throw error;
|
|
593
|
+
}
|
|
594
|
+
await sleepRuntimeApiRetry(attempt);
|
|
595
|
+
continue;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
if (res.ok) {
|
|
599
|
+
return (await res.json()) as T;
|
|
600
|
+
}
|
|
601
|
+
|
|
627
602
|
const text = await res.text().catch(() => '');
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
603
|
+
const redacted = redactSecretsFromLogString(text.slice(0, 500));
|
|
604
|
+
lastError = new Error(`runtime API ${res.status}: ${redacted}`);
|
|
605
|
+
if (
|
|
606
|
+
attempt >= RUNTIME_API_RETRY_DELAYS_MS.length ||
|
|
607
|
+
!isRetryableRuntimeApiResponse(res.status, text)
|
|
608
|
+
) {
|
|
609
|
+
throw lastError;
|
|
610
|
+
}
|
|
611
|
+
await sleepRuntimeApiRetry(attempt);
|
|
631
612
|
}
|
|
632
|
-
|
|
613
|
+
throw lastError instanceof Error ? lastError : new Error(String(lastError));
|
|
633
614
|
}
|
|
634
615
|
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
)
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
throw new Error(
|
|
652
|
-
`Deepline API ${path} ${res.status}: ${redactSecretsFromLogString(text.slice(0, 500))}`,
|
|
653
|
-
);
|
|
616
|
+
function isRetryableRuntimeApiError(error: unknown): boolean {
|
|
617
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
618
|
+
return /timed out|timeout|fetch failed|ECONNRESET|ECONNREFUSED|UND_ERR_CONNECT_TIMEOUT/i.test(
|
|
619
|
+
message,
|
|
620
|
+
);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
function isRetryableRuntimeApiResponse(status: number, body: string): boolean {
|
|
624
|
+
if (
|
|
625
|
+
status === 408 ||
|
|
626
|
+
status === 429 ||
|
|
627
|
+
status === 502 ||
|
|
628
|
+
status === 503 ||
|
|
629
|
+
status === 504
|
|
630
|
+
) {
|
|
631
|
+
return true;
|
|
654
632
|
}
|
|
633
|
+
return (
|
|
634
|
+
status === 500 &&
|
|
635
|
+
/timeout exceeded when trying to connect|timed out|fetch failed|ECONNRESET|UND_ERR_CONNECT_TIMEOUT/i.test(
|
|
636
|
+
body,
|
|
637
|
+
)
|
|
638
|
+
);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
async function sleepRuntimeApiRetry(attempt: number): Promise<void> {
|
|
642
|
+
await new Promise((resolve) =>
|
|
643
|
+
setTimeout(resolve, RUNTIME_API_RETRY_DELAYS_MS[attempt] ?? 0),
|
|
644
|
+
);
|
|
655
645
|
}
|
|
656
646
|
|
|
657
647
|
function describeRuntimeApiBody(body: unknown): string {
|
|
@@ -973,6 +963,21 @@ async function executeTool(
|
|
|
973
963
|
return callToolDirect(req, args);
|
|
974
964
|
}
|
|
975
965
|
|
|
966
|
+
async function executeToolWithLifecycle(
|
|
967
|
+
req: RunRequest,
|
|
968
|
+
args: { id: string; toolId: string; input: Record<string, unknown> },
|
|
969
|
+
workflowStep: WorkflowStep | undefined,
|
|
970
|
+
callbacks: WorkerCtxCallbacks | undefined,
|
|
971
|
+
): Promise<ToolExecuteResult> {
|
|
972
|
+
callbacks?.onToolCalled?.(args.toolId, nowMs());
|
|
973
|
+
try {
|
|
974
|
+
return await executeTool(req, args, workflowStep);
|
|
975
|
+
} catch (error) {
|
|
976
|
+
callbacks?.onToolFailed?.(args.toolId, nowMs());
|
|
977
|
+
throw error;
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
|
|
976
981
|
function isToolExecuteRecord(value: unknown): value is Record<string, unknown> {
|
|
977
982
|
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
978
983
|
}
|
|
@@ -1032,14 +1037,19 @@ async function waitForSyntheticIntegrationEvent(
|
|
|
1032
1037
|
? Math.max(1, Math.round(input.timeout_ms))
|
|
1033
1038
|
: 30_000;
|
|
1034
1039
|
await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
|
|
1035
|
-
action: '
|
|
1040
|
+
action: 'append_run_events',
|
|
1036
1041
|
playId: req.runId,
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1042
|
+
events: [
|
|
1043
|
+
{
|
|
1044
|
+
type: 'log.appended',
|
|
1045
|
+
runId: req.runId,
|
|
1046
|
+
source: 'worker',
|
|
1047
|
+
occurredAt: nowMs(),
|
|
1048
|
+
lines: [
|
|
1049
|
+
`Waiting for integration_event:${eventKey} for up to ${timeoutMs}ms.`,
|
|
1050
|
+
],
|
|
1051
|
+
} satisfies PlayRunLedgerEvent,
|
|
1052
|
+
],
|
|
1043
1053
|
});
|
|
1044
1054
|
try {
|
|
1045
1055
|
const event = (await (
|
|
@@ -1751,6 +1761,7 @@ type WorkerMapChunkSummary<T extends Record<string, unknown>> = {
|
|
|
1751
1761
|
outputDatasetId: string;
|
|
1752
1762
|
hash: string;
|
|
1753
1763
|
preview: T[];
|
|
1764
|
+
cachedRows?: T[];
|
|
1754
1765
|
};
|
|
1755
1766
|
|
|
1756
1767
|
function toWorkflowSerializableValue<T>(value: T): T {
|
|
@@ -1932,6 +1943,8 @@ async function executeWorkerWaterfall(
|
|
|
1932
1943
|
toolNameOrSpec: string | WorkerInlineWaterfallSpec,
|
|
1933
1944
|
input: Record<string, unknown>,
|
|
1934
1945
|
opts?: WorkerWaterfallOptions,
|
|
1946
|
+
callbacks?: WorkerCtxCallbacks,
|
|
1947
|
+
workflowStep?: WorkflowStep,
|
|
1935
1948
|
): Promise<unknown | null> {
|
|
1936
1949
|
// Inline-spec form
|
|
1937
1950
|
if (typeof toolNameOrSpec === 'object' && toolNameOrSpec) {
|
|
@@ -1948,20 +1961,32 @@ async function executeWorkerWaterfall(
|
|
|
1948
1961
|
toolId?: unknown,
|
|
1949
1962
|
toolInput?: unknown,
|
|
1950
1963
|
) =>
|
|
1951
|
-
await
|
|
1964
|
+
await executeToolWithLifecycle(
|
|
1952
1965
|
req,
|
|
1953
1966
|
normalizeToolExecuteArgs(requestOrKey, toolId, toolInput),
|
|
1967
|
+
workflowStep,
|
|
1968
|
+
callbacks,
|
|
1954
1969
|
),
|
|
1955
1970
|
},
|
|
1956
1971
|
tool: async (key, toolId, toolInput) =>
|
|
1957
|
-
await
|
|
1972
|
+
await executeToolWithLifecycle(
|
|
1973
|
+
req,
|
|
1974
|
+
{ id: key, toolId, input: toolInput },
|
|
1975
|
+
workflowStep,
|
|
1976
|
+
callbacks,
|
|
1977
|
+
),
|
|
1958
1978
|
});
|
|
1959
1979
|
} else {
|
|
1960
|
-
result = await
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1980
|
+
result = await executeToolWithLifecycle(
|
|
1981
|
+
req,
|
|
1982
|
+
{
|
|
1983
|
+
id: step.id,
|
|
1984
|
+
toolId: step.toolId,
|
|
1985
|
+
input: step.mapInput(input),
|
|
1986
|
+
},
|
|
1987
|
+
workflowStep,
|
|
1988
|
+
callbacks,
|
|
1989
|
+
);
|
|
1965
1990
|
}
|
|
1966
1991
|
} catch {
|
|
1967
1992
|
continue;
|
|
@@ -2047,7 +2072,12 @@ async function executeWorkerWaterfall(
|
|
|
2047
2072
|
const providers = opts?.providers ?? [];
|
|
2048
2073
|
if (providers.length === 0) {
|
|
2049
2074
|
try {
|
|
2050
|
-
return await
|
|
2075
|
+
return await executeToolWithLifecycle(
|
|
2076
|
+
req,
|
|
2077
|
+
{ id: toolName, toolId: toolName, input },
|
|
2078
|
+
workflowStep,
|
|
2079
|
+
callbacks,
|
|
2080
|
+
);
|
|
2051
2081
|
} catch {
|
|
2052
2082
|
return null;
|
|
2053
2083
|
}
|
|
@@ -2055,11 +2085,16 @@ async function executeWorkerWaterfall(
|
|
|
2055
2085
|
let lastError: Error | null = null;
|
|
2056
2086
|
for (const provider of providers) {
|
|
2057
2087
|
try {
|
|
2058
|
-
const result = await
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2088
|
+
const result = await executeToolWithLifecycle(
|
|
2089
|
+
req,
|
|
2090
|
+
{
|
|
2091
|
+
id: `${toolName}:${provider}`,
|
|
2092
|
+
toolId: toolName,
|
|
2093
|
+
input: { ...input, provider },
|
|
2094
|
+
},
|
|
2095
|
+
workflowStep,
|
|
2096
|
+
callbacks,
|
|
2097
|
+
);
|
|
2063
2098
|
if (resultHasContent(result)) {
|
|
2064
2099
|
recorder.push({
|
|
2065
2100
|
waterfallId: toolName,
|
|
@@ -2114,6 +2149,118 @@ function makeCsvParserState(): CsvParserState {
|
|
|
2114
2149
|
return { field: '', row: [], inQuotes: false, pendingCr: false };
|
|
2115
2150
|
}
|
|
2116
2151
|
|
|
2152
|
+
function normalizeExpectedBytes(value: unknown): number | null {
|
|
2153
|
+
return typeof value === 'number' && Number.isSafeInteger(value) && value >= 0
|
|
2154
|
+
? value
|
|
2155
|
+
: null;
|
|
2156
|
+
}
|
|
2157
|
+
|
|
2158
|
+
function hasByteLengthMismatch(
|
|
2159
|
+
expectedBytes: number | null | undefined,
|
|
2160
|
+
actualBytes: number | null | undefined,
|
|
2161
|
+
): boolean {
|
|
2162
|
+
return (
|
|
2163
|
+
typeof expectedBytes === 'number' &&
|
|
2164
|
+
typeof actualBytes === 'number' &&
|
|
2165
|
+
actualBytes !== expectedBytes
|
|
2166
|
+
);
|
|
2167
|
+
}
|
|
2168
|
+
|
|
2169
|
+
async function* iterReadableStreamChunks(
|
|
2170
|
+
body: ReadableStream<Uint8Array>,
|
|
2171
|
+
): AsyncGenerator<Uint8Array, void, void> {
|
|
2172
|
+
const reader = body.getReader();
|
|
2173
|
+
try {
|
|
2174
|
+
while (true) {
|
|
2175
|
+
const { done, value } = await reader.read();
|
|
2176
|
+
if (done) return;
|
|
2177
|
+
if (value && value.byteLength > 0) yield value;
|
|
2178
|
+
}
|
|
2179
|
+
} finally {
|
|
2180
|
+
reader.releaseLock();
|
|
2181
|
+
}
|
|
2182
|
+
}
|
|
2183
|
+
|
|
2184
|
+
function singleByteChunk(bytes: Uint8Array): AsyncIterable<Uint8Array> {
|
|
2185
|
+
return {
|
|
2186
|
+
async *[Symbol.asyncIterator]() {
|
|
2187
|
+
if (bytes.byteLength > 0) yield bytes;
|
|
2188
|
+
},
|
|
2189
|
+
};
|
|
2190
|
+
}
|
|
2191
|
+
|
|
2192
|
+
async function* guardExpectedByteChunks(input: {
|
|
2193
|
+
req: RunRequest;
|
|
2194
|
+
logicalPath: string;
|
|
2195
|
+
storageKey: string;
|
|
2196
|
+
source: string;
|
|
2197
|
+
chunks: AsyncIterable<Uint8Array>;
|
|
2198
|
+
expectedBytes?: number | null;
|
|
2199
|
+
reportedBytes?: number | null;
|
|
2200
|
+
fallback?: () => AsyncIterable<Uint8Array>;
|
|
2201
|
+
}): AsyncGenerator<Uint8Array, void, void> {
|
|
2202
|
+
const expectedBytes =
|
|
2203
|
+
normalizeExpectedBytes(input.expectedBytes) ??
|
|
2204
|
+
normalizeExpectedBytes(input.reportedBytes);
|
|
2205
|
+
let bytesRead = 0;
|
|
2206
|
+
let sawChunk = false;
|
|
2207
|
+
let skippedEmptyChunks = 0;
|
|
2208
|
+
|
|
2209
|
+
for await (const value of input.chunks) {
|
|
2210
|
+
if (!value || value.byteLength === 0) {
|
|
2211
|
+
skippedEmptyChunks += 1;
|
|
2212
|
+
continue;
|
|
2213
|
+
}
|
|
2214
|
+
sawChunk = true;
|
|
2215
|
+
bytesRead += value.byteLength;
|
|
2216
|
+
yield value;
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
if (!sawChunk) {
|
|
2220
|
+
if (typeof expectedBytes === 'number' && expectedBytes > 0) {
|
|
2221
|
+
recordRunnerPerfTrace({
|
|
2222
|
+
req: input.req,
|
|
2223
|
+
phase: 'csv.open_empty_body',
|
|
2224
|
+
extra: {
|
|
2225
|
+
source: input.source,
|
|
2226
|
+
logicalPath: input.logicalPath,
|
|
2227
|
+
expectedBytes,
|
|
2228
|
+
reportedBytes: normalizeExpectedBytes(input.reportedBytes),
|
|
2229
|
+
skippedEmptyChunks,
|
|
2230
|
+
storageKey: input.storageKey,
|
|
2231
|
+
},
|
|
2232
|
+
});
|
|
2233
|
+
if (input.fallback) {
|
|
2234
|
+
yield* input.fallback();
|
|
2235
|
+
return;
|
|
2236
|
+
}
|
|
2237
|
+
throw new Error(
|
|
2238
|
+
`ctx.csv("${input.logicalPath}"): ${input.source} returned an empty body for ` +
|
|
2239
|
+
`${expectedBytes} byte staged file ${input.storageKey}.`,
|
|
2240
|
+
);
|
|
2241
|
+
}
|
|
2242
|
+
return;
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
if (expectedBytes !== null && bytesRead !== expectedBytes) {
|
|
2246
|
+
recordRunnerPerfTrace({
|
|
2247
|
+
req: input.req,
|
|
2248
|
+
phase: 'csv.read_mismatch',
|
|
2249
|
+
extra: {
|
|
2250
|
+
source: input.source,
|
|
2251
|
+
logicalPath: input.logicalPath,
|
|
2252
|
+
expectedBytes,
|
|
2253
|
+
actualBytes: bytesRead,
|
|
2254
|
+
storageKey: input.storageKey,
|
|
2255
|
+
},
|
|
2256
|
+
});
|
|
2257
|
+
throw new Error(
|
|
2258
|
+
`ctx.csv("${input.logicalPath}"): ${input.source} streamed ${bytesRead} bytes ` +
|
|
2259
|
+
`for ${expectedBytes} byte staged file ${input.storageKey}.`,
|
|
2260
|
+
);
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2263
|
+
|
|
2117
2264
|
/**
|
|
2118
2265
|
* Push one buffered text chunk through the CSV state machine. Accumulates
|
|
2119
2266
|
* fully-terminated rows into `out`; partial trailing field/row stays in
|
|
@@ -2182,11 +2329,10 @@ function flushCsvParser(state: CsvParserState, out: string[][]): void {
|
|
|
2182
2329
|
* to every subsequent row. Stops cleanly on stream end and flushes any
|
|
2183
2330
|
* trailing row.
|
|
2184
2331
|
*/
|
|
2185
|
-
async function*
|
|
2186
|
-
|
|
2332
|
+
async function* streamCsvRowsFromByteChunks<T extends Record<string, unknown>>(
|
|
2333
|
+
byteChunks: AsyncIterable<Uint8Array>,
|
|
2187
2334
|
chunkSize: number,
|
|
2188
2335
|
): AsyncGenerator<T[], void, void> {
|
|
2189
|
-
const reader = body.getReader();
|
|
2190
2336
|
const decoder = new TextDecoder('utf-8');
|
|
2191
2337
|
const state = makeCsvParserState();
|
|
2192
2338
|
const physicalRowBuffer: string[][] = [];
|
|
@@ -2195,7 +2341,13 @@ async function* streamCsvRowsFromBody<T extends Record<string, unknown>>(
|
|
|
2195
2341
|
|
|
2196
2342
|
const flushPhysicalRowsAsObjects = (terminal: boolean): T[][] => {
|
|
2197
2343
|
const yielded: T[][] = [];
|
|
2198
|
-
if (physicalRowBuffer.length === 0)
|
|
2344
|
+
if (physicalRowBuffer.length === 0) {
|
|
2345
|
+
if (terminal && pendingChunk.length > 0) {
|
|
2346
|
+
yielded.push(pendingChunk);
|
|
2347
|
+
pendingChunk = [];
|
|
2348
|
+
}
|
|
2349
|
+
return yielded;
|
|
2350
|
+
}
|
|
2199
2351
|
if (!headers) {
|
|
2200
2352
|
headers = physicalRowBuffer.shift() ?? null;
|
|
2201
2353
|
if (!headers) return yielded;
|
|
@@ -2219,224 +2371,240 @@ async function* streamCsvRowsFromBody<T extends Record<string, unknown>>(
|
|
|
2219
2371
|
return yielded;
|
|
2220
2372
|
};
|
|
2221
2373
|
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
pushCsvTextIntoParser(state, text, physicalRowBuffer);
|
|
2232
|
-
for (const chunk of flushPhysicalRowsAsObjects(false)) yield chunk;
|
|
2233
|
-
}
|
|
2234
|
-
} finally {
|
|
2235
|
-
reader.releaseLock();
|
|
2374
|
+
for await (const value of byteChunks) {
|
|
2375
|
+
if (value.byteLength === 0) continue;
|
|
2376
|
+
const text = decoder.decode(value, { stream: true });
|
|
2377
|
+
pushCsvTextIntoParser(state, text, physicalRowBuffer);
|
|
2378
|
+
for (const chunk of flushPhysicalRowsAsObjects(false)) yield chunk;
|
|
2379
|
+
}
|
|
2380
|
+
const trailingText = decoder.decode();
|
|
2381
|
+
if (trailingText) {
|
|
2382
|
+
pushCsvTextIntoParser(state, trailingText, physicalRowBuffer);
|
|
2236
2383
|
}
|
|
2384
|
+
flushCsvParser(state, physicalRowBuffer);
|
|
2385
|
+
for (const chunk of flushPhysicalRowsAsObjects(true)) yield chunk;
|
|
2237
2386
|
void TARGET_CSV_DECODE_CHUNK_BYTES; // referenced for future tuning
|
|
2238
2387
|
}
|
|
2239
2388
|
|
|
2389
|
+
function readHarnessStagedFileChunks(input: {
|
|
2390
|
+
req: RunRequest;
|
|
2391
|
+
logicalPath: string;
|
|
2392
|
+
storageKey: string;
|
|
2393
|
+
expectedBytes?: number | null;
|
|
2394
|
+
}): AsyncIterable<Uint8Array> {
|
|
2395
|
+
const expectedBytes = normalizeExpectedBytes(input.expectedBytes);
|
|
2396
|
+
return {
|
|
2397
|
+
async *[Symbol.asyncIterator]() {
|
|
2398
|
+
let offset = 0;
|
|
2399
|
+
let observedBytes = 0;
|
|
2400
|
+
let objectSize: number | null = null;
|
|
2401
|
+
let loggedOpen = false;
|
|
2402
|
+
let done = false;
|
|
2403
|
+
|
|
2404
|
+
while (!done) {
|
|
2405
|
+
const result = await harnessReadStagedFileChunk({
|
|
2406
|
+
storageKey: input.storageKey,
|
|
2407
|
+
offset,
|
|
2408
|
+
length: TARGET_CSV_DECODE_CHUNK_BYTES,
|
|
2409
|
+
});
|
|
2410
|
+
const actualObjectSize = normalizeExpectedBytes(result.objectSize);
|
|
2411
|
+
if (actualObjectSize === null) {
|
|
2412
|
+
throw new Error(
|
|
2413
|
+
`ctx.csv("${input.logicalPath}"): harness returned an invalid object size for ${input.storageKey}.`,
|
|
2414
|
+
);
|
|
2415
|
+
}
|
|
2416
|
+
if (objectSize !== null && objectSize !== actualObjectSize) {
|
|
2417
|
+
throw new Error(
|
|
2418
|
+
`ctx.csv("${input.logicalPath}"): staged file size changed while streaming ` +
|
|
2419
|
+
`${input.storageKey}; started at ${objectSize} bytes, now ${actualObjectSize}.`,
|
|
2420
|
+
);
|
|
2421
|
+
}
|
|
2422
|
+
objectSize = actualObjectSize;
|
|
2423
|
+
if (hasByteLengthMismatch(expectedBytes, actualObjectSize)) {
|
|
2424
|
+
throw new Error(
|
|
2425
|
+
`ctx.csv("${input.logicalPath}"): harness staged file size mismatch for ` +
|
|
2426
|
+
`storageKey=${input.storageKey}; expected ${expectedBytes} bytes, got ${actualObjectSize}.`,
|
|
2427
|
+
);
|
|
2428
|
+
}
|
|
2429
|
+
if (result.offset !== offset) {
|
|
2430
|
+
throw new Error(
|
|
2431
|
+
`ctx.csv("${input.logicalPath}"): harness returned offset ${result.offset} while ` +
|
|
2432
|
+
`reading offset ${offset} from ${input.storageKey}.`,
|
|
2433
|
+
);
|
|
2434
|
+
}
|
|
2435
|
+
|
|
2436
|
+
const chunk = result.chunk;
|
|
2437
|
+
if (!(chunk instanceof Uint8Array)) {
|
|
2438
|
+
throw new Error(
|
|
2439
|
+
`ctx.csv("${input.logicalPath}"): harness returned a non-byte chunk for ${input.storageKey}.`,
|
|
2440
|
+
);
|
|
2441
|
+
}
|
|
2442
|
+
if (chunk.byteLength !== result.bytesRead) {
|
|
2443
|
+
throw new Error(
|
|
2444
|
+
`ctx.csv("${input.logicalPath}"): harness chunk metadata mismatch for ` +
|
|
2445
|
+
`${input.storageKey}; bytesRead=${result.bytesRead}, chunk=${chunk.byteLength}.`,
|
|
2446
|
+
);
|
|
2447
|
+
}
|
|
2448
|
+
if (chunk.byteLength === 0 && !result.done) {
|
|
2449
|
+
throw new Error(
|
|
2450
|
+
`ctx.csv("${input.logicalPath}"): harness returned an empty non-terminal chunk for ${input.storageKey}.`,
|
|
2451
|
+
);
|
|
2452
|
+
}
|
|
2453
|
+
|
|
2454
|
+
if (!loggedOpen) {
|
|
2455
|
+
loggedOpen = true;
|
|
2456
|
+
recordRunnerPerfTrace({
|
|
2457
|
+
req: input.req,
|
|
2458
|
+
phase: 'csv.open',
|
|
2459
|
+
extra: {
|
|
2460
|
+
source: 'harness_rpc_range',
|
|
2461
|
+
logicalPath: input.logicalPath,
|
|
2462
|
+
expectedBytes,
|
|
2463
|
+
actualBytes: actualObjectSize,
|
|
2464
|
+
chunkBytes: TARGET_CSV_DECODE_CHUNK_BYTES,
|
|
2465
|
+
storageKey: input.storageKey,
|
|
2466
|
+
},
|
|
2467
|
+
});
|
|
2468
|
+
}
|
|
2469
|
+
|
|
2470
|
+
offset += chunk.byteLength;
|
|
2471
|
+
observedBytes += chunk.byteLength;
|
|
2472
|
+
done = result.done;
|
|
2473
|
+
if (chunk.byteLength > 0) yield chunk;
|
|
2474
|
+
}
|
|
2475
|
+
|
|
2476
|
+
const requiredBytes = expectedBytes ?? objectSize;
|
|
2477
|
+
if (
|
|
2478
|
+
typeof requiredBytes === 'number' &&
|
|
2479
|
+
observedBytes !== requiredBytes
|
|
2480
|
+
) {
|
|
2481
|
+
recordRunnerPerfTrace({
|
|
2482
|
+
req: input.req,
|
|
2483
|
+
phase: 'csv.read_mismatch',
|
|
2484
|
+
extra: {
|
|
2485
|
+
source: 'harness_rpc_range',
|
|
2486
|
+
logicalPath: input.logicalPath,
|
|
2487
|
+
expectedBytes: requiredBytes,
|
|
2488
|
+
actualBytes: observedBytes,
|
|
2489
|
+
storageKey: input.storageKey,
|
|
2490
|
+
},
|
|
2491
|
+
});
|
|
2492
|
+
throw new Error(
|
|
2493
|
+
`ctx.csv("${input.logicalPath}"): harness streamed ${observedBytes} bytes ` +
|
|
2494
|
+
`for ${requiredBytes} byte staged file ${input.storageKey}.`,
|
|
2495
|
+
);
|
|
2496
|
+
}
|
|
2497
|
+
},
|
|
2498
|
+
};
|
|
2499
|
+
}
|
|
2500
|
+
|
|
2240
2501
|
/**
|
|
2241
|
-
*
|
|
2502
|
+
* Dataset source adapter that returns byte chunks. Per-play Workers loaded via
|
|
2242
2503
|
* WorkerLoader cannot accept a raw R2Bucket binding (CF Workflows refuses to
|
|
2243
2504
|
* serialize R2Bucket through its workflow-state path), so per-play Workers
|
|
2244
2505
|
* stream staged files through the long-lived harness Worker service binding.
|
|
2245
2506
|
* Returns null only if the asset is genuinely missing (404).
|
|
2246
2507
|
*/
|
|
2247
|
-
async function
|
|
2508
|
+
async function openFileByteChunks(input: {
|
|
2248
2509
|
req: RunRequest;
|
|
2249
2510
|
env: WorkerEnv;
|
|
2250
2511
|
logicalPath: string;
|
|
2251
|
-
|
|
2252
|
-
}): Promise<
|
|
2512
|
+
file: WorkerFileRef;
|
|
2513
|
+
}): Promise<AsyncIterable<Uint8Array> | null> {
|
|
2514
|
+
const storageKey = input.file.storageKey;
|
|
2515
|
+
const expectedBytes = normalizeExpectedBytes(input.file.bytes);
|
|
2516
|
+
if (expectedBytes === null) {
|
|
2517
|
+
throw new Error(
|
|
2518
|
+
`ctx.csv("${input.logicalPath}"): staged dataset handle is missing a byte length for ${storageKey}.`,
|
|
2519
|
+
);
|
|
2520
|
+
}
|
|
2253
2521
|
if (input.env.PLAYS_BUCKET) {
|
|
2254
|
-
const object = await input.env.PLAYS_BUCKET.get(
|
|
2522
|
+
const object = await input.env.PLAYS_BUCKET.get(storageKey);
|
|
2255
2523
|
if (object) {
|
|
2256
|
-
|
|
2524
|
+
if (hasByteLengthMismatch(expectedBytes, object.size)) {
|
|
2525
|
+
recordRunnerPerfTrace({
|
|
2526
|
+
req: input.req,
|
|
2527
|
+
phase: 'csv.open_mismatch',
|
|
2528
|
+
extra: {
|
|
2529
|
+
source: 'direct_r2',
|
|
2530
|
+
logicalPath: input.logicalPath,
|
|
2531
|
+
expectedBytes,
|
|
2532
|
+
actualBytes: object.size,
|
|
2533
|
+
storageKey,
|
|
2534
|
+
},
|
|
2535
|
+
});
|
|
2536
|
+
await object.body.cancel().catch(() => undefined);
|
|
2537
|
+
} else {
|
|
2538
|
+
recordRunnerPerfTrace({
|
|
2539
|
+
req: input.req,
|
|
2540
|
+
phase: 'csv.open',
|
|
2541
|
+
extra: {
|
|
2542
|
+
source: 'direct_r2',
|
|
2543
|
+
logicalPath: input.logicalPath,
|
|
2544
|
+
expectedBytes,
|
|
2545
|
+
actualBytes: object.size,
|
|
2546
|
+
storageKey,
|
|
2547
|
+
},
|
|
2548
|
+
});
|
|
2549
|
+
return guardExpectedByteChunks({
|
|
2550
|
+
req: input.req,
|
|
2551
|
+
logicalPath: input.logicalPath,
|
|
2552
|
+
storageKey,
|
|
2553
|
+
source: 'direct_r2',
|
|
2554
|
+
chunks: iterReadableStreamChunks(object.body),
|
|
2555
|
+
expectedBytes,
|
|
2556
|
+
reportedBytes: object.size,
|
|
2557
|
+
fallback: () =>
|
|
2558
|
+
readHarnessStagedFileChunks({
|
|
2559
|
+
req: input.req,
|
|
2560
|
+
logicalPath: input.logicalPath,
|
|
2561
|
+
storageKey,
|
|
2562
|
+
expectedBytes,
|
|
2563
|
+
}),
|
|
2564
|
+
});
|
|
2565
|
+
}
|
|
2257
2566
|
}
|
|
2258
2567
|
}
|
|
2259
2568
|
if (input.env.PLAY_ASSETS) {
|
|
2260
2569
|
try {
|
|
2261
2570
|
const text = await input.env.PLAY_ASSETS.readText(input.logicalPath);
|
|
2262
2571
|
const bytes = new TextEncoder().encode(text);
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2572
|
+
if (hasByteLengthMismatch(expectedBytes, bytes.byteLength)) {
|
|
2573
|
+
throw new Error(
|
|
2574
|
+
`ctx.csv("${input.logicalPath}"): packaged asset size mismatch for ` +
|
|
2575
|
+
`storageKey=${storageKey}; expected ${expectedBytes} bytes, got ${bytes.byteLength}.`,
|
|
2576
|
+
);
|
|
2577
|
+
}
|
|
2578
|
+
recordRunnerPerfTrace({
|
|
2579
|
+
req: input.req,
|
|
2580
|
+
phase: 'csv.open',
|
|
2581
|
+
extra: {
|
|
2582
|
+
source: 'play_assets',
|
|
2583
|
+
logicalPath: input.logicalPath,
|
|
2584
|
+
expectedBytes,
|
|
2585
|
+
actualBytes: bytes.byteLength,
|
|
2586
|
+
storageKey,
|
|
2267
2587
|
},
|
|
2268
2588
|
});
|
|
2589
|
+
return singleByteChunk(bytes);
|
|
2269
2590
|
} catch (error) {
|
|
2270
2591
|
if (!/missing from R2|not found|No such object/i.test(String(error))) {
|
|
2271
|
-
throw error;
|
|
2272
|
-
}
|
|
2273
|
-
}
|
|
2274
|
-
}
|
|
2275
|
-
|
|
2276
|
-
// The harness fetch path returns a real Response body backed by R2.
|
|
2277
|
-
// Errors are loud: we want CI / regression failures to surface the real
|
|
2278
|
-
// cause (auth, missing object, network) rather than getting squashed into a
|
|
2279
|
-
// generic "R2 asset is not reachable".
|
|
2280
|
-
const response = await harnessFetchStagedFile({
|
|
2281
|
-
executorToken: input.req.executorToken,
|
|
2282
|
-
storageKey: input.storageKey,
|
|
2283
|
-
});
|
|
2284
|
-
if (response.status === 404) {
|
|
2285
|
-
throw new Error(
|
|
2286
|
-
`ctx.csv("${input.logicalPath}"): harness R2 fetch returned 404 for storageKey=${input.storageKey}. ` +
|
|
2287
|
-
`The staged file is missing from R2; the upload either failed silently before the run started, ` +
|
|
2288
|
-
`or the storageKey threaded through the workflow params no longer matches what the harness resolves.`,
|
|
2289
|
-
);
|
|
2290
|
-
}
|
|
2291
|
-
if (!response.ok || !response.body) {
|
|
2292
|
-
const body = await response.text().catch(() => '');
|
|
2293
|
-
throw new Error(
|
|
2294
|
-
`ctx.csv("${input.logicalPath}"): harness R2 fetch failed ${response.status}: ${body.slice(0, 400)}`,
|
|
2295
|
-
);
|
|
2296
|
-
}
|
|
2297
|
-
return response.body;
|
|
2298
|
-
}
|
|
2299
|
-
|
|
2300
|
-
/**
|
|
2301
|
-
* Streaming CSV dataset. Backed by R2 (or a signed URL when PLAYS_BUCKET
|
|
2302
|
-
* isn't bound). Looks like a length-0 array to plays that pass it straight
|
|
2303
|
-
* to `ctx.map`; ctx.map detects the streaming surface via `iterChunks` and
|
|
2304
|
-
* uses it instead of `slice()`. Plays that try to access rows synchronously
|
|
2305
|
-
* (`csv[0]`, `csv.length`) are intentionally given an empty array — they
|
|
2306
|
-
* must use ctx.map (the supported surface), call `materialize()` (bounded),
|
|
2307
|
-
* or iterate via `for await (const row of csv)`.
|
|
2308
|
-
*/
|
|
2309
|
-
type StreamingCsvDataset<T extends Record<string, unknown>> = T[] & {
|
|
2310
|
-
count(): Promise<number>;
|
|
2311
|
-
peek(limit?: number): Promise<T[]>;
|
|
2312
|
-
materialize(limit?: number): Promise<T[]>;
|
|
2313
|
-
iterChunks(chunkSize: number): AsyncIterable<T[]>;
|
|
2314
|
-
toJSON(): unknown;
|
|
2315
|
-
datasetId: string;
|
|
2316
|
-
tableNamespace: string;
|
|
2317
|
-
__deeplineDatasetKind: 'csv';
|
|
2318
|
-
/** Marker so `ctx.map` can detect this is streaming-only and switch path. */
|
|
2319
|
-
__deeplineStreamingDataset: true;
|
|
2320
|
-
};
|
|
2321
|
-
|
|
2322
|
-
const MAX_MATERIALIZE_ROWS_DEFAULT = 50_000;
|
|
2323
|
-
|
|
2324
|
-
function makeStreamingCsvDataset<T extends Record<string, unknown>>(input: {
|
|
2325
|
-
name: string;
|
|
2326
|
-
logicalPath: string;
|
|
2327
|
-
renameOptions?: CsvRenameOptions;
|
|
2328
|
-
open: () => Promise<ReadableStream<Uint8Array> | null>;
|
|
2329
|
-
}): StreamingCsvDataset<T> {
|
|
2330
|
-
const datasetId = `csv:${input.name}`;
|
|
2331
|
-
const arr = [] as T[] as StreamingCsvDataset<T>;
|
|
2332
|
-
let cachedCount: number | null = null;
|
|
2333
|
-
|
|
2334
|
-
async function* doStream(chunkSize: number): AsyncGenerator<T[], void, void> {
|
|
2335
|
-
const body = await input.open();
|
|
2336
|
-
if (!body) {
|
|
2337
|
-
throw new Error(
|
|
2338
|
-
`ctx.csv("${input.logicalPath}"): R2 asset is not reachable (no PLAYS_BUCKET binding and signed URL unavailable).`,
|
|
2339
|
-
);
|
|
2340
|
-
}
|
|
2341
|
-
for await (const chunk of streamCsvRowsFromBody<T>(
|
|
2342
|
-
body,
|
|
2343
|
-
Math.max(1, Math.floor(chunkSize)),
|
|
2344
|
-
)) {
|
|
2345
|
-
yield applyCsvRenameProjection(chunk, input.renameOptions) as T[];
|
|
2592
|
+
throw error;
|
|
2593
|
+
}
|
|
2346
2594
|
}
|
|
2347
2595
|
}
|
|
2348
2596
|
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
}
|
|
2360
|
-
},
|
|
2361
|
-
enumerable: false,
|
|
2362
|
-
});
|
|
2363
|
-
Object.defineProperty(arr, 'count', {
|
|
2364
|
-
value: async () => {
|
|
2365
|
-
if (cachedCount !== null) return cachedCount;
|
|
2366
|
-
let total = 0;
|
|
2367
|
-
for await (const chunk of doStream(5_000)) total += chunk.length;
|
|
2368
|
-
cachedCount = total;
|
|
2369
|
-
return total;
|
|
2370
|
-
},
|
|
2371
|
-
enumerable: false,
|
|
2372
|
-
});
|
|
2373
|
-
Object.defineProperty(arr, 'peek', {
|
|
2374
|
-
value: async (limit = 10) => {
|
|
2375
|
-
const out: T[] = [];
|
|
2376
|
-
for await (const chunk of doStream(Math.max(1, limit))) {
|
|
2377
|
-
for (const row of chunk) {
|
|
2378
|
-
out.push(row);
|
|
2379
|
-
if (out.length >= limit) return out;
|
|
2380
|
-
}
|
|
2381
|
-
}
|
|
2382
|
-
return out;
|
|
2383
|
-
},
|
|
2384
|
-
enumerable: false,
|
|
2385
|
-
});
|
|
2386
|
-
Object.defineProperty(arr, 'materialize', {
|
|
2387
|
-
value: async (limit?: number) => {
|
|
2388
|
-
const cap = limit ?? MAX_MATERIALIZE_ROWS_DEFAULT;
|
|
2389
|
-
const out: T[] = [];
|
|
2390
|
-
for await (const chunk of doStream(5_000)) {
|
|
2391
|
-
for (const row of chunk) {
|
|
2392
|
-
if (out.length >= cap) {
|
|
2393
|
-
return out;
|
|
2394
|
-
}
|
|
2395
|
-
out.push(row);
|
|
2396
|
-
}
|
|
2397
|
-
}
|
|
2398
|
-
return out;
|
|
2399
|
-
},
|
|
2400
|
-
enumerable: false,
|
|
2401
|
-
});
|
|
2402
|
-
Object.defineProperty(arr, 'datasetId', {
|
|
2403
|
-
value: datasetId,
|
|
2404
|
-
enumerable: true,
|
|
2405
|
-
});
|
|
2406
|
-
Object.defineProperty(arr, 'tableNamespace', {
|
|
2407
|
-
value: input.name,
|
|
2408
|
-
enumerable: true,
|
|
2409
|
-
});
|
|
2410
|
-
Object.defineProperty(arr, '__deeplineStreamingDataset', {
|
|
2411
|
-
value: true,
|
|
2412
|
-
enumerable: false,
|
|
2597
|
+
// Dynamic Workers cannot receive a raw R2Bucket binding, and both previous
|
|
2598
|
+
// fallbacks were different data planes: service-binding fetch bodies could
|
|
2599
|
+
// arrive empty across WorkerLoader isolates, while app-signed URLs pointed at
|
|
2600
|
+
// the app namespace instead of the preview harness namespace. The harness owns
|
|
2601
|
+
// staged R2 now, so the only fallback is typed bounded range RPC.
|
|
2602
|
+
return readHarnessStagedFileChunks({
|
|
2603
|
+
req: input.req,
|
|
2604
|
+
logicalPath: input.logicalPath,
|
|
2605
|
+
storageKey,
|
|
2606
|
+
expectedBytes,
|
|
2413
2607
|
});
|
|
2414
|
-
Object.defineProperty(arr, '__deeplineDatasetKind', {
|
|
2415
|
-
value: 'csv',
|
|
2416
|
-
enumerable: false,
|
|
2417
|
-
});
|
|
2418
|
-
Object.defineProperty(arr, 'toJSON', {
|
|
2419
|
-
value: () => ({
|
|
2420
|
-
kind: 'dataset' as const,
|
|
2421
|
-
datasetKind: 'csv',
|
|
2422
|
-
datasetId,
|
|
2423
|
-
count: cachedCount,
|
|
2424
|
-
streaming: true,
|
|
2425
|
-
tableNamespace: input.name,
|
|
2426
|
-
}),
|
|
2427
|
-
enumerable: false,
|
|
2428
|
-
});
|
|
2429
|
-
return arr;
|
|
2430
|
-
}
|
|
2431
|
-
|
|
2432
|
-
function isStreamingDataset<T extends Record<string, unknown>>(
|
|
2433
|
-
value: unknown,
|
|
2434
|
-
): value is StreamingCsvDataset<T> {
|
|
2435
|
-
return (
|
|
2436
|
-
Array.isArray(value) &&
|
|
2437
|
-
(value as { __deeplineStreamingDataset?: unknown })
|
|
2438
|
-
.__deeplineStreamingDataset === true
|
|
2439
|
-
);
|
|
2440
2608
|
}
|
|
2441
2609
|
|
|
2442
2610
|
/**
|
|
@@ -2492,6 +2660,60 @@ function requireSheetContract(
|
|
|
2492
2660
|
return contract;
|
|
2493
2661
|
}
|
|
2494
2662
|
|
|
2663
|
+
function isDatasetPayloadField(field: string): boolean {
|
|
2664
|
+
return (
|
|
2665
|
+
field.length > 0 &&
|
|
2666
|
+
!field.startsWith('__deepline') &&
|
|
2667
|
+
field !== '_key' &&
|
|
2668
|
+
field !== '_status' &&
|
|
2669
|
+
field !== '_run_id' &&
|
|
2670
|
+
field !== '_error' &&
|
|
2671
|
+
field !== '_stage' &&
|
|
2672
|
+
field !== '_provider' &&
|
|
2673
|
+
field !== '_input_index' &&
|
|
2674
|
+
field !== '_created_at' &&
|
|
2675
|
+
field !== '_updated_at' &&
|
|
2676
|
+
field !== '_cell_meta'
|
|
2677
|
+
);
|
|
2678
|
+
}
|
|
2679
|
+
|
|
2680
|
+
function augmentSheetContractWithDatasetFields(input: {
|
|
2681
|
+
contract: PlaySheetContract;
|
|
2682
|
+
rows: readonly Record<string, unknown>[];
|
|
2683
|
+
outputFields?: readonly string[];
|
|
2684
|
+
}): PlaySheetContract {
|
|
2685
|
+
const outputFields = new Set(input.outputFields ?? []);
|
|
2686
|
+
const existingFields = new Set(
|
|
2687
|
+
input.contract.columns.flatMap((column) =>
|
|
2688
|
+
typeof column.field === 'string' ? [column.field] : [],
|
|
2689
|
+
),
|
|
2690
|
+
);
|
|
2691
|
+
const existingSqlNames = new Set(
|
|
2692
|
+
input.contract.columns.map((column) => column.sqlName),
|
|
2693
|
+
);
|
|
2694
|
+
const columns = [...input.contract.columns];
|
|
2695
|
+
for (const row of input.rows) {
|
|
2696
|
+
for (const field of Object.keys(row)) {
|
|
2697
|
+
if (!isDatasetPayloadField(field) || existingFields.has(field)) {
|
|
2698
|
+
continue;
|
|
2699
|
+
}
|
|
2700
|
+
const sqlName = sqlSafePlayColumnName(field);
|
|
2701
|
+
if (existingSqlNames.has(sqlName)) {
|
|
2702
|
+
continue;
|
|
2703
|
+
}
|
|
2704
|
+
existingFields.add(field);
|
|
2705
|
+
existingSqlNames.add(sqlName);
|
|
2706
|
+
columns.push({
|
|
2707
|
+
id: `runtime:${input.contract.tableNamespace}:${field}`,
|
|
2708
|
+
sqlName,
|
|
2709
|
+
source: outputFields.has(field) ? 'mapField' : 'input',
|
|
2710
|
+
field,
|
|
2711
|
+
});
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
return { ...input.contract, columns };
|
|
2715
|
+
}
|
|
2716
|
+
|
|
2495
2717
|
async function persistCompletedMapRows(input: {
|
|
2496
2718
|
req: RunRequest;
|
|
2497
2719
|
tableNamespace: string;
|
|
@@ -2500,19 +2722,24 @@ async function persistCompletedMapRows(input: {
|
|
|
2500
2722
|
extraOutputFields?: string[];
|
|
2501
2723
|
}): Promise<void> {
|
|
2502
2724
|
if (input.rows.length === 0) return;
|
|
2725
|
+
const outputFields = [
|
|
2726
|
+
...input.outputFields,
|
|
2727
|
+
...(input.extraOutputFields ?? []).filter(
|
|
2728
|
+
(field) => !input.outputFields.includes(field),
|
|
2729
|
+
),
|
|
2730
|
+
];
|
|
2503
2731
|
await harnessPersistCompletedSheetRows({
|
|
2504
2732
|
baseUrl: input.req.baseUrl,
|
|
2505
2733
|
executorToken: input.req.executorToken,
|
|
2506
2734
|
playName: input.req.playName,
|
|
2507
2735
|
tableNamespace: input.tableNamespace,
|
|
2508
|
-
sheetContract:
|
|
2736
|
+
sheetContract: augmentSheetContractWithDatasetFields({
|
|
2737
|
+
contract: requireSheetContract(input.req, input.tableNamespace),
|
|
2738
|
+
rows: input.rows,
|
|
2739
|
+
outputFields,
|
|
2740
|
+
}),
|
|
2509
2741
|
rows: input.rows,
|
|
2510
|
-
outputFields
|
|
2511
|
-
...input.outputFields,
|
|
2512
|
-
...(input.extraOutputFields ?? []).filter(
|
|
2513
|
-
(field) => !input.outputFields.includes(field),
|
|
2514
|
-
),
|
|
2515
|
-
],
|
|
2742
|
+
outputFields,
|
|
2516
2743
|
runId: input.req.runId,
|
|
2517
2744
|
userEmail: input.req.userEmail,
|
|
2518
2745
|
preloadedDbSessions: input.req.preloadedDbSessions ?? null,
|
|
@@ -2537,12 +2764,37 @@ async function prepareMapRows(input: {
|
|
|
2537
2764
|
executorToken: input.req.executorToken,
|
|
2538
2765
|
playName: input.req.playName,
|
|
2539
2766
|
tableNamespace: input.tableNamespace,
|
|
2540
|
-
sheetContract:
|
|
2767
|
+
sheetContract: augmentSheetContractWithDatasetFields({
|
|
2768
|
+
contract: requireSheetContract(input.req, input.tableNamespace),
|
|
2769
|
+
rows: input.rows,
|
|
2770
|
+
}),
|
|
2541
2771
|
rows: input.rows.map((row) => ({ ...row })),
|
|
2542
2772
|
runId: input.req.runId,
|
|
2543
2773
|
userEmail: input.req.userEmail,
|
|
2544
2774
|
preloadedDbSessions: input.req.preloadedDbSessions ?? null,
|
|
2545
2775
|
});
|
|
2776
|
+
for (const timing of result.timings ?? []) {
|
|
2777
|
+
const phase =
|
|
2778
|
+
typeof timing.phase === 'string' && timing.phase.trim()
|
|
2779
|
+
? timing.phase.trim()
|
|
2780
|
+
: 'unknown';
|
|
2781
|
+
const ms =
|
|
2782
|
+
typeof timing.ms === 'number' && Number.isFinite(timing.ms)
|
|
2783
|
+
? timing.ms
|
|
2784
|
+
: 0;
|
|
2785
|
+
const { phase: _phase, ms: _ms, ...extra } = timing;
|
|
2786
|
+
void _phase;
|
|
2787
|
+
void _ms;
|
|
2788
|
+
recordRunnerPerfTrace({
|
|
2789
|
+
req: input.req,
|
|
2790
|
+
phase: `sheet_start.${phase}`,
|
|
2791
|
+
ms,
|
|
2792
|
+
extra: {
|
|
2793
|
+
tableNamespace: input.tableNamespace,
|
|
2794
|
+
...extra,
|
|
2795
|
+
},
|
|
2796
|
+
});
|
|
2797
|
+
}
|
|
2546
2798
|
return {
|
|
2547
2799
|
inserted: result.inserted,
|
|
2548
2800
|
skipped: result.skipped,
|
|
@@ -2700,7 +2952,7 @@ function createMinimalWorkerCtx(
|
|
|
2700
2952
|
const callDepth = rootGovernance?.callDepth ?? 0;
|
|
2701
2953
|
const runMap = async <T extends Record<string, unknown>>(
|
|
2702
2954
|
name: string,
|
|
2703
|
-
rows: T
|
|
2955
|
+
rows: WorkerDatasetInput<T>,
|
|
2704
2956
|
fieldsDef: Record<
|
|
2705
2957
|
string,
|
|
2706
2958
|
| unknown
|
|
@@ -2715,7 +2967,8 @@ function createMinimalWorkerCtx(
|
|
|
2715
2967
|
): Promise<unknown> => {
|
|
2716
2968
|
const mapStartedAt = nowMs();
|
|
2717
2969
|
const mapNodeId = `map:${name}`;
|
|
2718
|
-
const
|
|
2970
|
+
const inputRows = rows;
|
|
2971
|
+
const rowCountHint = datasetRowCountHint(inputRows);
|
|
2719
2972
|
const baseOffset = 0;
|
|
2720
2973
|
const fieldEntries = Object.entries(fieldsDef);
|
|
2721
2974
|
const plan = req.executionPlan;
|
|
@@ -2723,12 +2976,8 @@ function createMinimalWorkerCtx(
|
|
|
2723
2976
|
(candidate) =>
|
|
2724
2977
|
candidate.mapName === name || candidate.tableNamespace === name,
|
|
2725
2978
|
);
|
|
2726
|
-
const streaming = isStreamingDataset<T>(sliced);
|
|
2727
|
-
// For streaming inputs we don't know the row count upfront — pass
|
|
2728
|
-
// `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
|
|
2729
|
-
// default chunk size rather than trying to budget against an unknown.
|
|
2730
2979
|
const rowsPerChunk = chooseMapChunkSize({
|
|
2731
|
-
totalRows:
|
|
2980
|
+
totalRows: rowCountHint,
|
|
2732
2981
|
mapCount: Math.max(1, plan?.maps.length ?? 1),
|
|
2733
2982
|
stepsPerChunk: planMap?.stepsPerChunk ?? 1,
|
|
2734
2983
|
preferredChunkSize: planMap?.defaultChunkSize,
|
|
@@ -2750,14 +2999,12 @@ function createMinimalWorkerCtx(
|
|
|
2750
2999
|
typeof total === 'number' && Number.isFinite(total) && total > 0
|
|
2751
3000
|
? `${completed.toLocaleString()} / ${total.toLocaleString()} rows processed`
|
|
2752
3001
|
: `${completed.toLocaleString()} rows processed`;
|
|
3002
|
+
callbacks?.onMapStarted?.(mapNodeId, mapStartedAt);
|
|
2753
3003
|
updateMapProgress({
|
|
2754
3004
|
completed: 0,
|
|
2755
|
-
total:
|
|
3005
|
+
total: rowCountHint ?? undefined,
|
|
2756
3006
|
startedAt: mapStartedAt,
|
|
2757
|
-
message: formatMapProgressMessage(
|
|
2758
|
-
0,
|
|
2759
|
-
streaming ? undefined : sliced.length,
|
|
2760
|
-
),
|
|
3007
|
+
message: formatMapProgressMessage(0, rowCountHint ?? undefined),
|
|
2761
3008
|
});
|
|
2762
3009
|
const explicitRowKeysSeen =
|
|
2763
3010
|
opts?.key === undefined ? null : new Map<string, number>();
|
|
@@ -2983,6 +3230,7 @@ function createMinimalWorkerCtx(
|
|
|
2983
3230
|
input?: unknown,
|
|
2984
3231
|
_opts?: { description?: string },
|
|
2985
3232
|
): Promise<unknown> => {
|
|
3233
|
+
void _opts;
|
|
2986
3234
|
assertNotAborted(abortSignal);
|
|
2987
3235
|
const request = normalizeToolExecuteArgs(
|
|
2988
3236
|
requestOrKey,
|
|
@@ -3008,6 +3256,8 @@ function createMinimalWorkerCtx(
|
|
|
3008
3256
|
toolNameOrSpec,
|
|
3009
3257
|
waterfallInput,
|
|
3010
3258
|
waterfallOpts,
|
|
3259
|
+
callbacks,
|
|
3260
|
+
workflowStep,
|
|
3011
3261
|
),
|
|
3012
3262
|
};
|
|
3013
3263
|
for (const [key, value] of fieldEntries) {
|
|
@@ -3219,10 +3469,16 @@ function createMinimalWorkerCtx(
|
|
|
3219
3469
|
outputDatasetId: `map:${name}`,
|
|
3220
3470
|
hash,
|
|
3221
3471
|
preview: toWorkflowSerializableValue(out.slice(0, 5)),
|
|
3472
|
+
cachedRows:
|
|
3473
|
+
out.length <= WORKER_DATASET_IN_MEMORY_ROWS
|
|
3474
|
+
? toWorkflowSerializableValue(out)
|
|
3475
|
+
: undefined,
|
|
3222
3476
|
};
|
|
3223
3477
|
};
|
|
3224
3478
|
|
|
3225
|
-
const
|
|
3479
|
+
const previewRows: Array<T & Record<string, unknown>> = [];
|
|
3480
|
+
const cachedRows: Array<T & Record<string, unknown>> = [];
|
|
3481
|
+
let canCacheRows = true;
|
|
3226
3482
|
let totalRowsExecuted = 0;
|
|
3227
3483
|
let totalRowsCached = 0;
|
|
3228
3484
|
let totalRowsDuplicateReused = 0;
|
|
@@ -3260,6 +3516,7 @@ function createMinimalWorkerCtx(
|
|
|
3260
3516
|
`(${totalRowsExecuted} executed, ${totalRowsCached} already satisfied) ` +
|
|
3261
3517
|
`inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
|
|
3262
3518
|
const completedAt = nowMs();
|
|
3519
|
+
callbacks?.onMapCompleted?.(mapNodeId, completedAt);
|
|
3263
3520
|
updateMapProgress({
|
|
3264
3521
|
completed: totalRowsWritten,
|
|
3265
3522
|
total: totalRowsWritten,
|
|
@@ -3273,9 +3530,29 @@ function createMinimalWorkerCtx(
|
|
|
3273
3530
|
message: cacheSummary,
|
|
3274
3531
|
ts: nowMs(),
|
|
3275
3532
|
});
|
|
3276
|
-
return
|
|
3533
|
+
return createPersistedDatasetHandle({
|
|
3534
|
+
playName: req.playName,
|
|
3535
|
+
name,
|
|
3277
3536
|
count: totalRowsWritten,
|
|
3278
|
-
|
|
3537
|
+
previewRows,
|
|
3538
|
+
cachedRows: canCacheRows ? cachedRows : null,
|
|
3539
|
+
readRows: async ({ limit, offset }) => {
|
|
3540
|
+
const result = await harnessReadSheetDatasetRows({
|
|
3541
|
+
baseUrl: req.baseUrl,
|
|
3542
|
+
executorToken: req.executorToken,
|
|
3543
|
+
playName: req.playName,
|
|
3544
|
+
tableNamespace: name,
|
|
3545
|
+
runId: req.runId,
|
|
3546
|
+
limit,
|
|
3547
|
+
offset,
|
|
3548
|
+
userEmail: req.userEmail,
|
|
3549
|
+
preloadedDbSessions: req.preloadedDbSessions ?? null,
|
|
3550
|
+
});
|
|
3551
|
+
return result.rows as Array<T & Record<string, unknown>>;
|
|
3552
|
+
},
|
|
3553
|
+
trace: (phase, ms, extra) =>
|
|
3554
|
+
recordRunnerPerfTrace({ req, phase, ms, extra }),
|
|
3555
|
+
nowMs,
|
|
3279
3556
|
workProgress: {
|
|
3280
3557
|
total: totalRowsWritten,
|
|
3281
3558
|
executed: totalRowsExecuted,
|
|
@@ -3290,110 +3567,61 @@ function createMinimalWorkerCtx(
|
|
|
3290
3567
|
});
|
|
3291
3568
|
};
|
|
3292
3569
|
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3570
|
+
let totalRowsWritten = 0;
|
|
3571
|
+
let chunkIndex = 0;
|
|
3572
|
+
let chunkStart = 0;
|
|
3573
|
+
for await (const chunkRows of iterDatasetChunks(inputRows, rowsPerChunk)) {
|
|
3574
|
+
assertNotAborted(abortSignal);
|
|
3575
|
+
if (chunkRows.length === 0) continue;
|
|
3576
|
+
assertUniqueExplicitRowKeys(chunkRows, chunkStart);
|
|
3577
|
+
const chunkResult = await runChunkStep(chunkRows, chunkStart, chunkIndex);
|
|
3578
|
+
totalRowsWritten += chunkResult.rowsWritten;
|
|
3579
|
+
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3580
|
+
totalRowsCached += chunkResult.rowsCached;
|
|
3581
|
+
totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
|
|
3582
|
+
totalRowsInserted += chunkResult.rowsInserted;
|
|
3583
|
+
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3584
|
+
updateMapProgress({
|
|
3585
|
+
completed: totalRowsWritten,
|
|
3586
|
+
total: rowCountHint ?? undefined,
|
|
3587
|
+
message: formatMapProgressMessage(
|
|
3588
|
+
totalRowsWritten,
|
|
3589
|
+
rowCountHint ?? undefined,
|
|
3590
|
+
),
|
|
3591
|
+
});
|
|
3592
|
+
if (previewRows.length < WORKER_DATASET_PREVIEW_ROWS) {
|
|
3593
|
+
previewRows.push(
|
|
3594
|
+
...chunkResult.preview.slice(
|
|
3595
|
+
0,
|
|
3596
|
+
WORKER_DATASET_PREVIEW_ROWS - previewRows.length,
|
|
3597
|
+
),
|
|
3306
3598
|
);
|
|
3307
|
-
totalRowsWritten += chunkResult.rowsWritten;
|
|
3308
|
-
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3309
|
-
totalRowsCached += chunkResult.rowsCached;
|
|
3310
|
-
totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
|
|
3311
|
-
totalRowsInserted += chunkResult.rowsInserted;
|
|
3312
|
-
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3313
|
-
updateMapProgress({
|
|
3314
|
-
completed: totalRowsWritten,
|
|
3315
|
-
message: formatMapProgressMessage(totalRowsWritten),
|
|
3316
|
-
});
|
|
3317
|
-
if (out.length < 10) {
|
|
3318
|
-
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3319
|
-
}
|
|
3320
|
-
chunkStart += chunkRows.length;
|
|
3321
|
-
chunkIndex += 1;
|
|
3322
3599
|
}
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
});
|
|
3334
|
-
return dataset;
|
|
3335
|
-
}
|
|
3336
|
-
|
|
3337
|
-
if (workflowStep && sliced.length > rowsPerChunk) {
|
|
3338
|
-
let totalRowsWritten = 0;
|
|
3339
|
-
for (let start = 0; start < sliced.length; start += rowsPerChunk) {
|
|
3340
|
-
assertNotAborted(abortSignal);
|
|
3341
|
-
const end = Math.min(sliced.length, start + rowsPerChunk);
|
|
3342
|
-
const chunkRows = sliced.slice(start, end);
|
|
3343
|
-
const chunkIndex = Math.floor(start / rowsPerChunk);
|
|
3344
|
-
assertUniqueExplicitRowKeys(chunkRows, start);
|
|
3345
|
-
const chunkResult = await runChunkStep(chunkRows, start, chunkIndex);
|
|
3346
|
-
totalRowsWritten += chunkResult.rowsWritten;
|
|
3347
|
-
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3348
|
-
totalRowsCached += chunkResult.rowsCached;
|
|
3349
|
-
totalRowsDuplicateReused += chunkResult.rowsDuplicateReused;
|
|
3350
|
-
totalRowsInserted += chunkResult.rowsInserted;
|
|
3351
|
-
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3352
|
-
updateMapProgress({
|
|
3353
|
-
completed: totalRowsWritten,
|
|
3354
|
-
total: sliced.length,
|
|
3355
|
-
message: formatMapProgressMessage(totalRowsWritten, sliced.length),
|
|
3356
|
-
});
|
|
3357
|
-
if (out.length < 10) {
|
|
3358
|
-
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3600
|
+
if (canCacheRows) {
|
|
3601
|
+
const nextRows = chunkResult.cachedRows ?? [];
|
|
3602
|
+
if (
|
|
3603
|
+
nextRows.length === chunkResult.rowsWritten &&
|
|
3604
|
+
cachedRows.length + nextRows.length <= WORKER_DATASET_IN_MEMORY_ROWS
|
|
3605
|
+
) {
|
|
3606
|
+
cachedRows.push(...nextRows);
|
|
3607
|
+
} else {
|
|
3608
|
+
cachedRows.length = 0;
|
|
3609
|
+
canCacheRows = false;
|
|
3359
3610
|
}
|
|
3360
3611
|
}
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
req,
|
|
3364
|
-
phase: 'runner.map.total',
|
|
3365
|
-
ms: nowMs() - mapStartedAt,
|
|
3366
|
-
extra: {
|
|
3367
|
-
mapName: name,
|
|
3368
|
-
rowsWritten: totalRowsWritten,
|
|
3369
|
-
streaming: false,
|
|
3370
|
-
},
|
|
3371
|
-
});
|
|
3372
|
-
return dataset;
|
|
3612
|
+
chunkStart += chunkRows.length;
|
|
3613
|
+
chunkIndex += 1;
|
|
3373
3614
|
}
|
|
3374
|
-
|
|
3375
|
-
assertUniqueExplicitRowKeys(sliced, 0);
|
|
3376
|
-
const chunkResult = await runChunkStep(sliced, 0, 0);
|
|
3377
|
-
totalRowsExecuted = chunkResult.rowsExecuted;
|
|
3378
|
-
totalRowsCached = chunkResult.rowsCached;
|
|
3379
|
-
totalRowsDuplicateReused = chunkResult.rowsDuplicateReused;
|
|
3380
|
-
totalRowsInserted = chunkResult.rowsInserted;
|
|
3381
|
-
totalRowsSkipped = chunkResult.rowsSkipped;
|
|
3382
|
-
out.push(...chunkResult.preview);
|
|
3383
|
-
updateMapProgress({
|
|
3384
|
-
completed: chunkResult.rowsWritten,
|
|
3385
|
-
total: sliced.length,
|
|
3386
|
-
message: formatMapProgressMessage(chunkResult.rowsWritten, sliced.length),
|
|
3387
|
-
});
|
|
3388
|
-
const dataset = finalize(chunkResult.rowsWritten);
|
|
3615
|
+
const dataset = finalize(totalRowsWritten);
|
|
3389
3616
|
recordRunnerPerfTrace({
|
|
3390
3617
|
req,
|
|
3391
3618
|
phase: 'runner.map.total',
|
|
3392
3619
|
ms: nowMs() - mapStartedAt,
|
|
3393
3620
|
extra: {
|
|
3394
3621
|
mapName: name,
|
|
3395
|
-
rowsWritten:
|
|
3396
|
-
streaming:
|
|
3622
|
+
rowsWritten: totalRowsWritten,
|
|
3623
|
+
inputKind: rowCountHint === null ? 'streaming' : 'known_count',
|
|
3624
|
+
chunks: chunkIndex,
|
|
3397
3625
|
},
|
|
3398
3626
|
});
|
|
3399
3627
|
return dataset;
|
|
@@ -3407,7 +3635,7 @@ function createMinimalWorkerCtx(
|
|
|
3407
3635
|
|
|
3408
3636
|
constructor(
|
|
3409
3637
|
private readonly name: string,
|
|
3410
|
-
private readonly rows: T
|
|
3638
|
+
private readonly rows: WorkerDatasetInput<T>,
|
|
3411
3639
|
) {}
|
|
3412
3640
|
|
|
3413
3641
|
step(name: string, resolver: WorkerStepProgramStep['resolver']): this {
|
|
@@ -3482,18 +3710,13 @@ function createMinimalWorkerCtx(
|
|
|
3482
3710
|
async csv<T extends Record<string, unknown> = Record<string, unknown>>(
|
|
3483
3711
|
arg: unknown,
|
|
3484
3712
|
options?: CsvRenameOptions,
|
|
3485
|
-
): Promise<T
|
|
3713
|
+
): Promise<WorkerDatasetHandle<T>> {
|
|
3486
3714
|
const csvStartedAt = nowMs();
|
|
3487
3715
|
if (Array.isArray(arg)) {
|
|
3488
|
-
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3492
|
-
applyCsvRenameProjection(arg as T[], options),
|
|
3493
|
-
{
|
|
3494
|
-
datasetKind: 'csv',
|
|
3495
|
-
},
|
|
3496
|
-
) as unknown as T[];
|
|
3716
|
+
const dataset = createInlineDatasetHandle(
|
|
3717
|
+
applyCsvRenameProjection(arg as T[], options) as T[],
|
|
3718
|
+
{ name: 'csv', kind: 'csv' },
|
|
3719
|
+
);
|
|
3497
3720
|
recordRunnerPerfTrace({
|
|
3498
3721
|
req,
|
|
3499
3722
|
phase: 'runner.csv',
|
|
@@ -3504,15 +3727,10 @@ function createMinimalWorkerCtx(
|
|
|
3504
3727
|
}
|
|
3505
3728
|
const filename = String(arg ?? '');
|
|
3506
3729
|
if (req.inlineCsv && filename === req.inlineCsv.name) {
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
applyCsvRenameProjection(req.inlineCsv.rows as T[], options),
|
|
3512
|
-
{
|
|
3513
|
-
datasetKind: 'csv',
|
|
3514
|
-
},
|
|
3515
|
-
) as unknown as T[];
|
|
3730
|
+
const dataset = createInlineDatasetHandle(
|
|
3731
|
+
applyCsvRenameProjection(req.inlineCsv.rows as T[], options) as T[],
|
|
3732
|
+
{ name: filename, kind: 'csv' },
|
|
3733
|
+
);
|
|
3516
3734
|
recordRunnerPerfTrace({
|
|
3517
3735
|
req,
|
|
3518
3736
|
phase: 'runner.csv',
|
|
@@ -3521,52 +3739,73 @@ function createMinimalWorkerCtx(
|
|
|
3521
3739
|
});
|
|
3522
3740
|
return dataset;
|
|
3523
3741
|
}
|
|
3524
|
-
// Resolution order: explicit
|
|
3742
|
+
// Resolution order: explicit inputFiles (runtime input) → packaged
|
|
3525
3743
|
// files (relative-path imports bundled with the play artifact).
|
|
3526
|
-
let
|
|
3527
|
-
if (!
|
|
3744
|
+
let file = req.inputFiles?.[filename] ?? null;
|
|
3745
|
+
if (!file && req.packagedFiles) {
|
|
3528
3746
|
const matchByPath = req.packagedFiles.find(
|
|
3529
3747
|
(f) =>
|
|
3530
3748
|
f.playPath === filename ||
|
|
3531
3749
|
f.playPath === filename.replace(/^\.\//, ''),
|
|
3532
3750
|
);
|
|
3533
|
-
if (matchByPath)
|
|
3751
|
+
if (matchByPath) {
|
|
3752
|
+
file = {
|
|
3753
|
+
logicalPath: matchByPath.playPath,
|
|
3754
|
+
fileName:
|
|
3755
|
+
matchByPath.playPath.split('/').pop() ?? matchByPath.playPath,
|
|
3756
|
+
storageKey: matchByPath.storageKey,
|
|
3757
|
+
contentType: matchByPath.contentType,
|
|
3758
|
+
bytes: matchByPath.bytes,
|
|
3759
|
+
};
|
|
3760
|
+
}
|
|
3534
3761
|
}
|
|
3535
|
-
if (!
|
|
3762
|
+
if (!file?.storageKey) {
|
|
3536
3763
|
throw new Error(
|
|
3537
3764
|
`ctx.csv("${filename}"): no inline rows or R2 asset binding registered. ` +
|
|
3538
|
-
'Pass inline rows, or upload to R2 and register packagedFiles/
|
|
3765
|
+
'Pass inline rows, or upload to R2 and register packagedFiles/inputFiles in the run config.',
|
|
3766
|
+
);
|
|
3767
|
+
}
|
|
3768
|
+
const selectedFile = file;
|
|
3769
|
+
const expectedBytes = normalizeExpectedBytes(selectedFile.bytes);
|
|
3770
|
+
if (expectedBytes === null) {
|
|
3771
|
+
throw new Error(
|
|
3772
|
+
`ctx.csv("${filename}"): staged dataset handle is missing a byte length for ` +
|
|
3773
|
+
`${selectedFile.storageKey}. Re-stage the file with bytes metadata.`,
|
|
3539
3774
|
);
|
|
3540
3775
|
}
|
|
3541
|
-
|
|
3542
|
-
// pulls 1 MiB-ish text chunks from R2 and yields parsed row chunks.
|
|
3543
|
-
// ctx.map detects the streaming surface via __deeplineStreamingDataset
|
|
3544
|
-
// and switches its chunked execution loop to consume iterChunks
|
|
3545
|
-
// directly, so 2M-row CSVs never get fully materialized in memory.
|
|
3546
|
-
const storageKey = r2Key;
|
|
3547
|
-
const dataset = makeStreamingCsvDataset<T>({
|
|
3776
|
+
const dataset = createCsvDatasetHandle<T>({
|
|
3548
3777
|
name: filename,
|
|
3549
3778
|
logicalPath: filename,
|
|
3779
|
+
expectedBytes,
|
|
3550
3780
|
renameOptions: options,
|
|
3781
|
+
nowMs,
|
|
3782
|
+
streamRows: streamCsvRowsFromByteChunks,
|
|
3783
|
+
trace: (phase, ms, extra) =>
|
|
3784
|
+
recordRunnerPerfTrace({ req, phase, ms, extra }),
|
|
3551
3785
|
open: () =>
|
|
3552
|
-
|
|
3786
|
+
openFileByteChunks({
|
|
3553
3787
|
req,
|
|
3554
3788
|
env,
|
|
3555
3789
|
logicalPath: filename,
|
|
3556
|
-
|
|
3790
|
+
file: selectedFile,
|
|
3557
3791
|
}),
|
|
3558
|
-
})
|
|
3792
|
+
});
|
|
3559
3793
|
recordRunnerPerfTrace({
|
|
3560
3794
|
req,
|
|
3561
3795
|
phase: 'runner.csv',
|
|
3562
3796
|
ms: nowMs() - csvStartedAt,
|
|
3563
|
-
extra: {
|
|
3797
|
+
extra: {
|
|
3798
|
+
mode: 'streaming_file',
|
|
3799
|
+
filename,
|
|
3800
|
+
expectedBytes,
|
|
3801
|
+
storageKey: selectedFile.storageKey,
|
|
3802
|
+
},
|
|
3564
3803
|
});
|
|
3565
3804
|
return dataset;
|
|
3566
3805
|
},
|
|
3567
3806
|
map<T extends Record<string, unknown>>(
|
|
3568
3807
|
name: string,
|
|
3569
|
-
rows: T
|
|
3808
|
+
rows: WorkerDatasetInput<T>,
|
|
3570
3809
|
fieldsDef?:
|
|
3571
3810
|
| Record<
|
|
3572
3811
|
string,
|
|
@@ -3600,7 +3839,12 @@ function createMinimalWorkerCtx(
|
|
|
3600
3839
|
input: Record<string, unknown>,
|
|
3601
3840
|
): Promise<unknown> => {
|
|
3602
3841
|
assertNotAborted(abortSignal);
|
|
3603
|
-
return
|
|
3842
|
+
return executeToolWithLifecycle(
|
|
3843
|
+
req,
|
|
3844
|
+
{ id: key, toolId, input },
|
|
3845
|
+
workflowStep,
|
|
3846
|
+
callbacks,
|
|
3847
|
+
);
|
|
3604
3848
|
},
|
|
3605
3849
|
tools: {
|
|
3606
3850
|
async execute(
|
|
@@ -3609,11 +3853,13 @@ function createMinimalWorkerCtx(
|
|
|
3609
3853
|
input?: unknown,
|
|
3610
3854
|
_opts?: { description?: string },
|
|
3611
3855
|
): Promise<unknown> {
|
|
3856
|
+
void _opts;
|
|
3612
3857
|
assertNotAborted(abortSignal);
|
|
3613
|
-
return
|
|
3858
|
+
return executeToolWithLifecycle(
|
|
3614
3859
|
req,
|
|
3615
3860
|
normalizeToolExecuteArgs(requestOrKey, toolId, input),
|
|
3616
3861
|
workflowStep,
|
|
3862
|
+
callbacks,
|
|
3617
3863
|
);
|
|
3618
3864
|
},
|
|
3619
3865
|
},
|
|
@@ -3640,7 +3886,15 @@ function createMinimalWorkerCtx(
|
|
|
3640
3886
|
input: Record<string, unknown>,
|
|
3641
3887
|
opts?: WorkerWaterfallOptions,
|
|
3642
3888
|
): Promise<unknown | null> {
|
|
3643
|
-
return executeWorkerWaterfall(
|
|
3889
|
+
return executeWorkerWaterfall(
|
|
3890
|
+
req,
|
|
3891
|
+
[],
|
|
3892
|
+
toolNameOrSpec,
|
|
3893
|
+
input,
|
|
3894
|
+
opts,
|
|
3895
|
+
callbacks,
|
|
3896
|
+
workflowStep,
|
|
3897
|
+
);
|
|
3644
3898
|
},
|
|
3645
3899
|
async sleep(ms: number): Promise<void> {
|
|
3646
3900
|
assertNotAborted(abortSignal);
|
|
@@ -3993,17 +4247,10 @@ async function handleRun(request: Request, env: WorkerEnv): Promise<Response> {
|
|
|
3993
4247
|
});
|
|
3994
4248
|
}
|
|
3995
4249
|
|
|
3996
|
-
/** Cap on
|
|
3997
|
-
const
|
|
3998
|
-
/** Min wall-clock interval between live-
|
|
3999
|
-
const
|
|
4000
|
-
/**
|
|
4001
|
-
* Initial flush delay for live logs. Short plays should not pay an extra
|
|
4002
|
-
* non-terminal Convex write just to show a transient "running" log state; the
|
|
4003
|
-
* terminal status carries the full log buffer. Longer plays still flush early
|
|
4004
|
-
* enough for the dashboard to feel alive.
|
|
4005
|
-
*/
|
|
4006
|
-
const LIVE_LOG_FIRST_FLUSH_DELAY_MS = 30_000;
|
|
4250
|
+
/** Cap on run log lines retained in the terminal output compatibility shape. */
|
|
4251
|
+
const RUN_LOG_BUFFER_LIMIT = 500;
|
|
4252
|
+
/** Min wall-clock interval between live run-ledger flushes during a run. */
|
|
4253
|
+
const RUN_LEDGER_FLUSH_INTERVAL_MS = 500;
|
|
4007
4254
|
|
|
4008
4255
|
async function executeRunRequest(
|
|
4009
4256
|
req: RunRequest,
|
|
@@ -4033,99 +4280,240 @@ async function executeRunRequest(
|
|
|
4033
4280
|
});
|
|
4034
4281
|
const abortController = options?.abortController ?? new AbortController();
|
|
4035
4282
|
const abortSignal = abortController.signal;
|
|
4036
|
-
|
|
4037
|
-
|
|
4038
|
-
|
|
4039
|
-
|
|
4040
|
-
|
|
4041
|
-
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
|
|
4045
|
-
|
|
4046
|
-
|
|
4283
|
+
let runLogBuffer: string[] = [];
|
|
4284
|
+
let pendingRunLogLines: string[] = [];
|
|
4285
|
+
let stepProgressByNodeId: LiveNodeProgressMap = {};
|
|
4286
|
+
let dirtyProgressNodeIds = new Set<string>();
|
|
4287
|
+
let pendingLedgerEvents: PlayRunLedgerEvent[] = [
|
|
4288
|
+
{
|
|
4289
|
+
type: 'run.started',
|
|
4290
|
+
runId: req.runId,
|
|
4291
|
+
playName: req.playName,
|
|
4292
|
+
source: 'worker',
|
|
4293
|
+
occurredAt: startedAt,
|
|
4294
|
+
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4047
4295
|
},
|
|
4048
|
-
|
|
4049
|
-
|
|
4050
|
-
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
let liveLogs: string[] = [];
|
|
4054
|
-
let liveLogsDirty = false;
|
|
4055
|
-
let liveNodeProgress: LiveNodeProgressMap = {};
|
|
4056
|
-
let lastLiveLogFlushAt =
|
|
4057
|
-
nowMs() - LIVE_LOG_FLUSH_INTERVAL_MS + LIVE_LOG_FIRST_FLUSH_DELAY_MS;
|
|
4058
|
-
let liveLogFlushInFlight: Promise<void> = Promise.resolve();
|
|
4059
|
-
const appendLiveLog = (line: string) => {
|
|
4296
|
+
];
|
|
4297
|
+
let lastLedgerFlushAt = 0;
|
|
4298
|
+
let ledgerFlushInFlight: Promise<void> = Promise.resolve();
|
|
4299
|
+
|
|
4300
|
+
const appendRunLogLine = (line: string) => {
|
|
4060
4301
|
const trimmed = redactSecretsFromLogString(line.trim());
|
|
4061
4302
|
if (!trimmed) return;
|
|
4062
|
-
|
|
4063
|
-
|
|
4303
|
+
runLogBuffer = [...runLogBuffer, trimmed].slice(-RUN_LOG_BUFFER_LIMIT);
|
|
4304
|
+
pendingRunLogLines = [...pendingRunLogLines, trimmed].slice(
|
|
4305
|
+
-RUN_LOG_BUFFER_LIMIT,
|
|
4306
|
+
);
|
|
4064
4307
|
};
|
|
4065
|
-
|
|
4308
|
+
|
|
4309
|
+
const updateStepProgress = (input: {
|
|
4066
4310
|
nodeId: string;
|
|
4067
4311
|
progress: LiveNodeProgressSnapshot;
|
|
4068
4312
|
}) => {
|
|
4069
4313
|
const nodeId = input.nodeId.trim();
|
|
4070
4314
|
if (!nodeId) return;
|
|
4071
|
-
|
|
4072
|
-
...
|
|
4315
|
+
stepProgressByNodeId = {
|
|
4316
|
+
...stepProgressByNodeId,
|
|
4073
4317
|
[nodeId]: {
|
|
4074
|
-
...(
|
|
4318
|
+
...(stepProgressByNodeId[nodeId] ?? {}),
|
|
4075
4319
|
...input.progress,
|
|
4076
4320
|
},
|
|
4077
4321
|
};
|
|
4322
|
+
dirtyProgressNodeIds.add(nodeId);
|
|
4323
|
+
};
|
|
4324
|
+
|
|
4325
|
+
const stepProgressSnapshot = () => ({ ...stepProgressByNodeId });
|
|
4326
|
+
|
|
4327
|
+
const appendStepLifecycleEvent = (event: PlayStepLifecycleEvent) => {
|
|
4328
|
+
updateStepProgress({
|
|
4329
|
+
nodeId: event.nodeId,
|
|
4330
|
+
progress: {
|
|
4331
|
+
...(event.transition === 'started'
|
|
4332
|
+
? { startedAt: event.at }
|
|
4333
|
+
: { completedAt: event.at }),
|
|
4334
|
+
updatedAt: event.at,
|
|
4335
|
+
},
|
|
4336
|
+
});
|
|
4337
|
+
pendingLedgerEvents = [
|
|
4338
|
+
...pendingLedgerEvents,
|
|
4339
|
+
{
|
|
4340
|
+
type:
|
|
4341
|
+
event.transition === 'started'
|
|
4342
|
+
? 'step.started'
|
|
4343
|
+
: event.transition === 'failed'
|
|
4344
|
+
? 'step.failed'
|
|
4345
|
+
: 'step.completed',
|
|
4346
|
+
runId: req.runId,
|
|
4347
|
+
source: 'worker',
|
|
4348
|
+
occurredAt: event.at,
|
|
4349
|
+
stepId: event.nodeId,
|
|
4350
|
+
kind: event.type,
|
|
4351
|
+
},
|
|
4352
|
+
];
|
|
4353
|
+
flushLedgerEvents(false);
|
|
4354
|
+
};
|
|
4355
|
+
|
|
4356
|
+
const drainPendingLedgerEvents = (
|
|
4357
|
+
occurredAt: number,
|
|
4358
|
+
): PlayRunLedgerEvent[] => {
|
|
4359
|
+
const events = pendingLedgerEvents;
|
|
4360
|
+
pendingLedgerEvents = [];
|
|
4361
|
+
|
|
4362
|
+
if (pendingRunLogLines.length > 0) {
|
|
4363
|
+
events.push({
|
|
4364
|
+
type: 'log.appended',
|
|
4365
|
+
runId: req.runId,
|
|
4366
|
+
source: 'worker',
|
|
4367
|
+
occurredAt,
|
|
4368
|
+
lines: pendingRunLogLines,
|
|
4369
|
+
});
|
|
4370
|
+
pendingRunLogLines = [];
|
|
4371
|
+
}
|
|
4372
|
+
|
|
4373
|
+
if (dirtyProgressNodeIds.size > 0) {
|
|
4374
|
+
for (const nodeId of dirtyProgressNodeIds) {
|
|
4375
|
+
const progress = stepProgressByNodeId[nodeId];
|
|
4376
|
+
if (!progress) continue;
|
|
4377
|
+
const normalizedProgress: PlayRunLedgerStepProgress = {
|
|
4378
|
+
...(typeof progress.completed === 'number'
|
|
4379
|
+
? { completed: progress.completed }
|
|
4380
|
+
: {}),
|
|
4381
|
+
...(typeof progress.total === 'number'
|
|
4382
|
+
? { total: progress.total }
|
|
4383
|
+
: {}),
|
|
4384
|
+
...(typeof progress.failed === 'number'
|
|
4385
|
+
? { failed: progress.failed }
|
|
4386
|
+
: {}),
|
|
4387
|
+
...(typeof progress.message === 'string' && progress.message
|
|
4388
|
+
? { message: progress.message }
|
|
4389
|
+
: {}),
|
|
4390
|
+
...(typeof progress.artifactTableNamespace === 'string' ||
|
|
4391
|
+
progress.artifactTableNamespace === null
|
|
4392
|
+
? { artifactTableNamespace: progress.artifactTableNamespace }
|
|
4393
|
+
: {}),
|
|
4394
|
+
updatedAt:
|
|
4395
|
+
typeof progress.updatedAt === 'number'
|
|
4396
|
+
? progress.updatedAt
|
|
4397
|
+
: occurredAt,
|
|
4398
|
+
};
|
|
4399
|
+
const status: PlayRunLedgerStepStatus =
|
|
4400
|
+
typeof progress.completedAt === 'number' ? 'completed' : 'running';
|
|
4401
|
+
events.push({
|
|
4402
|
+
type: 'step.progress',
|
|
4403
|
+
runId: req.runId,
|
|
4404
|
+
source: 'worker',
|
|
4405
|
+
occurredAt:
|
|
4406
|
+
typeof progress.updatedAt === 'number'
|
|
4407
|
+
? progress.updatedAt
|
|
4408
|
+
: occurredAt,
|
|
4409
|
+
stepId: nodeId,
|
|
4410
|
+
status,
|
|
4411
|
+
progress: normalizedProgress,
|
|
4412
|
+
});
|
|
4413
|
+
}
|
|
4414
|
+
dirtyProgressNodeIds = new Set<string>();
|
|
4415
|
+
}
|
|
4416
|
+
|
|
4417
|
+
return events;
|
|
4078
4418
|
};
|
|
4079
|
-
|
|
4080
|
-
const
|
|
4419
|
+
|
|
4420
|
+
const flushLedgerEvents = (force: boolean): void => {
|
|
4081
4421
|
if (!options?.persistResultDatasets) return;
|
|
4082
|
-
if (!liveLogsDirty && !force) return;
|
|
4083
4422
|
const now = nowMs();
|
|
4084
|
-
if (!force && now -
|
|
4085
|
-
|
|
4086
|
-
|
|
4087
|
-
const
|
|
4088
|
-
|
|
4423
|
+
if (!force && now - lastLedgerFlushAt < RUN_LEDGER_FLUSH_INTERVAL_MS) {
|
|
4424
|
+
return;
|
|
4425
|
+
}
|
|
4426
|
+
const events = drainPendingLedgerEvents(now);
|
|
4427
|
+
if (events.length === 0) return;
|
|
4428
|
+
lastLedgerFlushAt = now;
|
|
4429
|
+
ledgerFlushInFlight = ledgerFlushInFlight
|
|
4089
4430
|
.catch(() => undefined)
|
|
4090
4431
|
.then(async () => {
|
|
4091
4432
|
try {
|
|
4092
4433
|
await postRuntimeApi(req.baseUrl, req.executorToken, {
|
|
4093
|
-
action: '
|
|
4434
|
+
action: 'append_run_events',
|
|
4094
4435
|
playId: req.runId,
|
|
4095
|
-
|
|
4096
|
-
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4097
|
-
liveLogs: snapshot,
|
|
4098
|
-
liveNodeProgress: liveNodeProgressSnapshot(),
|
|
4099
|
-
lastCheckpointAt: now,
|
|
4436
|
+
events,
|
|
4100
4437
|
});
|
|
4101
4438
|
} catch {
|
|
4102
|
-
|
|
4439
|
+
pendingLedgerEvents = [...events, ...pendingLedgerEvents];
|
|
4440
|
+
throw new Error('runtime run-ledger append failed');
|
|
4103
4441
|
}
|
|
4442
|
+
})
|
|
4443
|
+
.catch(() => undefined);
|
|
4444
|
+
};
|
|
4445
|
+
|
|
4446
|
+
const flushTerminalLedgerEvents = async (
|
|
4447
|
+
terminalEvent: PlayRunLedgerEvent,
|
|
4448
|
+
): Promise<void> => {
|
|
4449
|
+
if (!options?.persistResultDatasets) return;
|
|
4450
|
+
await ledgerFlushInFlight.catch(() => undefined);
|
|
4451
|
+
const now = nowMs();
|
|
4452
|
+
pendingRunLogLines = runLogBuffer;
|
|
4453
|
+
dirtyProgressNodeIds = new Set([
|
|
4454
|
+
...dirtyProgressNodeIds,
|
|
4455
|
+
...Object.keys(stepProgressByNodeId),
|
|
4456
|
+
]);
|
|
4457
|
+
pendingLedgerEvents = [...pendingLedgerEvents, terminalEvent];
|
|
4458
|
+
const events = drainPendingLedgerEvents(now);
|
|
4459
|
+
if (events.length === 0) return;
|
|
4460
|
+
try {
|
|
4461
|
+
await postRuntimeApi(req.baseUrl, req.executorToken, {
|
|
4462
|
+
action: 'append_run_events',
|
|
4463
|
+
playId: req.runId,
|
|
4464
|
+
events,
|
|
4104
4465
|
});
|
|
4466
|
+
} catch (error) {
|
|
4467
|
+
pendingLedgerEvents = [...events, ...pendingLedgerEvents];
|
|
4468
|
+
throw error;
|
|
4469
|
+
}
|
|
4470
|
+
};
|
|
4471
|
+
|
|
4472
|
+
const orderedNodes = buildOrderedNodeList(req.contractSnapshot);
|
|
4473
|
+
const stepLifecycle =
|
|
4474
|
+
orderedNodes.length > 0
|
|
4475
|
+
? new PlayStepLifecycleTracker(
|
|
4476
|
+
orderedNodes,
|
|
4477
|
+
() => stepProgressByNodeId,
|
|
4478
|
+
appendStepLifecycleEvent,
|
|
4479
|
+
nowMs,
|
|
4480
|
+
)
|
|
4481
|
+
: null;
|
|
4482
|
+
const workerCallbacks: WorkerCtxCallbacks = {
|
|
4483
|
+
onNodeProgress: (input) => {
|
|
4484
|
+
updateStepProgress(input);
|
|
4485
|
+
flushLedgerEvents(false);
|
|
4486
|
+
},
|
|
4487
|
+
onMapStarted: (nodeId, at) => stepLifecycle?.onMapStarted(nodeId, at),
|
|
4488
|
+
onMapCompleted: (nodeId, at) => stepLifecycle?.onMapCompleted(nodeId, at),
|
|
4489
|
+
onToolCalled: (toolId, at) => stepLifecycle?.onToolCalled(toolId, at),
|
|
4490
|
+
onToolFailed: (toolId, at) => stepLifecycle?.onToolFailed(toolId, at),
|
|
4105
4491
|
};
|
|
4106
4492
|
|
|
4107
4493
|
const wrappedEmit = (event: RunnerEvent) => {
|
|
4108
4494
|
if (event.type === 'log') {
|
|
4109
|
-
|
|
4110
|
-
|
|
4495
|
+
appendRunLogLine(event.message);
|
|
4496
|
+
flushLedgerEvents(false);
|
|
4111
4497
|
} else if (event.type === 'error') {
|
|
4112
4498
|
// Sanitize the inbound message before it enters the live-log buffer.
|
|
4113
4499
|
// The downstream `emit` still receives the raw event so the console /
|
|
4114
4500
|
// NDJSON stream can keep its full debugging fidelity.
|
|
4115
4501
|
const sanitizedMessage = redactSecretsFromLogString(event.message);
|
|
4116
|
-
|
|
4117
|
-
|
|
4502
|
+
appendRunLogLine(`[error] ${sanitizedMessage}`);
|
|
4503
|
+
flushLedgerEvents(true);
|
|
4118
4504
|
}
|
|
4119
4505
|
emit(event);
|
|
4120
4506
|
};
|
|
4121
4507
|
|
|
4508
|
+
stepLifecycle?.markPreMapStepsStarted(startedAt);
|
|
4509
|
+
flushLedgerEvents(false);
|
|
4122
4510
|
const ctx = createMinimalWorkerCtx(
|
|
4123
4511
|
req,
|
|
4124
4512
|
wrappedEmit,
|
|
4125
4513
|
env,
|
|
4126
4514
|
workflowStep,
|
|
4127
4515
|
abortSignal,
|
|
4128
|
-
|
|
4516
|
+
workerCallbacks,
|
|
4129
4517
|
);
|
|
4130
4518
|
try {
|
|
4131
4519
|
const playStartedAt = nowMs();
|
|
@@ -4140,6 +4528,7 @@ async function executeRunRequest(
|
|
|
4140
4528
|
phase: 'runner.play_function',
|
|
4141
4529
|
ms: nowMs() - playStartedAt,
|
|
4142
4530
|
});
|
|
4531
|
+
stepLifecycle?.markAllTerminal(nowMs());
|
|
4143
4532
|
const serializeStartedAt = nowMs();
|
|
4144
4533
|
const serializedResult = serializePlayReturnValue(result);
|
|
4145
4534
|
recordRunnerPerfTrace({
|
|
@@ -4148,53 +4537,74 @@ async function executeRunRequest(
|
|
|
4148
4537
|
ms: nowMs() - serializeStartedAt,
|
|
4149
4538
|
});
|
|
4150
4539
|
if (options?.persistResultDatasets) {
|
|
4151
|
-
const
|
|
4152
|
-
await
|
|
4540
|
+
const ledgerFlushWaitStartedAt = nowMs();
|
|
4541
|
+
await ledgerFlushInFlight.catch(() => undefined);
|
|
4153
4542
|
recordRunnerPerfTrace({
|
|
4154
4543
|
req,
|
|
4155
|
-
phase: 'runner.
|
|
4156
|
-
ms: nowMs() -
|
|
4544
|
+
phase: 'runner.run_ledger_flush_wait',
|
|
4545
|
+
ms: nowMs() - ledgerFlushWaitStartedAt,
|
|
4157
4546
|
});
|
|
4158
4547
|
const resultDatasetStartedAt = nowMs();
|
|
4159
|
-
await persistResultDatasets(req, serializedResult);
|
|
4548
|
+
await persistResultDatasets(req, result, serializedResult);
|
|
4160
4549
|
recordRunnerPerfTrace({
|
|
4161
4550
|
req,
|
|
4162
4551
|
phase: 'runner.persist_result_datasets',
|
|
4163
4552
|
ms: nowMs() - resultDatasetStartedAt,
|
|
4164
4553
|
});
|
|
4165
4554
|
const terminalResult = trimResultForStatus(serializedResult);
|
|
4166
|
-
const
|
|
4167
|
-
|
|
4168
|
-
|
|
4169
|
-
|
|
4170
|
-
|
|
4171
|
-
|
|
4172
|
-
|
|
4173
|
-
|
|
4174
|
-
|
|
4175
|
-
|
|
4176
|
-
|
|
4177
|
-
|
|
4178
|
-
|
|
4179
|
-
|
|
4180
|
-
|
|
4181
|
-
|
|
4182
|
-
|
|
4183
|
-
|
|
4184
|
-
|
|
4555
|
+
const terminalOccurredAt = nowMs();
|
|
4556
|
+
const terminalLedgerPromise = (async () => {
|
|
4557
|
+
const terminalUpdateStartedAt = nowMs();
|
|
4558
|
+
await flushTerminalLedgerEvents({
|
|
4559
|
+
type: 'run.completed',
|
|
4560
|
+
runId: req.runId,
|
|
4561
|
+
source: 'worker',
|
|
4562
|
+
occurredAt: terminalOccurredAt,
|
|
4563
|
+
result: terminalResult,
|
|
4564
|
+
});
|
|
4565
|
+
recordRunnerPerfTrace({
|
|
4566
|
+
req,
|
|
4567
|
+
phase: 'runner.terminal_ledger_append',
|
|
4568
|
+
ms: nowMs() - terminalUpdateStartedAt,
|
|
4569
|
+
});
|
|
4570
|
+
})().catch((error) => {
|
|
4571
|
+
console.error(
|
|
4572
|
+
`[play-harness] non-fatal terminal ledger append failed runId=${req.runId}: ${
|
|
4573
|
+
error instanceof Error ? error.message : String(error)
|
|
4574
|
+
}`,
|
|
4575
|
+
);
|
|
4185
4576
|
});
|
|
4186
4577
|
|
|
4578
|
+
await terminalLedgerPromise;
|
|
4579
|
+
|
|
4187
4580
|
const billingStartedAt = nowMs();
|
|
4188
|
-
|
|
4581
|
+
const billingPromise = finalizeWorkerComputeBilling({
|
|
4189
4582
|
req,
|
|
4190
4583
|
success: true,
|
|
4191
4584
|
actionEstimate: 4,
|
|
4585
|
+
}).then(() => {
|
|
4586
|
+
recordRunnerPerfTrace({
|
|
4587
|
+
req,
|
|
4588
|
+
phase: 'runner.compute_billing_finalize',
|
|
4589
|
+
ms: nowMs() - billingStartedAt,
|
|
4590
|
+
});
|
|
4192
4591
|
});
|
|
4193
|
-
|
|
4194
|
-
|
|
4195
|
-
|
|
4196
|
-
|
|
4197
|
-
|
|
4592
|
+
if (extractMaxCreditsPerRun(req.contractSnapshot) !== null) {
|
|
4593
|
+
await billingPromise;
|
|
4594
|
+
} else {
|
|
4595
|
+
const nonBlockingBillingPromise = billingPromise.catch((error) => {
|
|
4596
|
+
console.error(
|
|
4597
|
+
`[play-harness] non-fatal compute billing finalize failed runId=${req.runId}: ${
|
|
4598
|
+
error instanceof Error ? error.message : String(error)
|
|
4599
|
+
}`,
|
|
4600
|
+
);
|
|
4601
|
+
});
|
|
4602
|
+
if (options?.waitUntil) {
|
|
4603
|
+
options.waitUntil(nonBlockingBillingPromise);
|
|
4604
|
+
} else {
|
|
4605
|
+
await nonBlockingBillingPromise;
|
|
4606
|
+
}
|
|
4607
|
+
}
|
|
4198
4608
|
}
|
|
4199
4609
|
const parentSignalStartedAt = nowMs();
|
|
4200
4610
|
await signalParentPlayTerminal({
|
|
@@ -4222,11 +4632,12 @@ async function executeRunRequest(
|
|
|
4222
4632
|
playName: req.playName,
|
|
4223
4633
|
result: serializedResult,
|
|
4224
4634
|
outputRows: inferOutputRows(serializedResult),
|
|
4225
|
-
liveLogs,
|
|
4226
|
-
liveNodeProgress:
|
|
4635
|
+
liveLogs: runLogBuffer,
|
|
4636
|
+
liveNodeProgress: stepProgressSnapshot(),
|
|
4227
4637
|
durationMs: nowMs() - startedAt,
|
|
4228
4638
|
};
|
|
4229
4639
|
} catch (error) {
|
|
4640
|
+
stepLifecycle?.markStartedFailed(nowMs());
|
|
4230
4641
|
const aborted = isAbortLikeError(error);
|
|
4231
4642
|
if (aborted) {
|
|
4232
4643
|
// Flip the controller so any concurrent user code observes the abort
|
|
@@ -4237,19 +4648,15 @@ async function executeRunRequest(
|
|
|
4237
4648
|
}
|
|
4238
4649
|
const message = error instanceof Error ? error.message : String(error);
|
|
4239
4650
|
if (options?.persistResultDatasets) {
|
|
4240
|
-
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4244
|
-
|
|
4651
|
+
appendRunLogLine(
|
|
4652
|
+
`${aborted ? '[cancelled]' : '[error]'} ${redactSecretsFromLogString(message)}`,
|
|
4653
|
+
);
|
|
4654
|
+
await flushTerminalLedgerEvents({
|
|
4655
|
+
type: aborted ? 'run.cancelled' : 'run.failed',
|
|
4656
|
+
runId: req.runId,
|
|
4657
|
+
source: 'worker',
|
|
4658
|
+
occurredAt: nowMs(),
|
|
4245
4659
|
error: message,
|
|
4246
|
-
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
4247
|
-
waitKind: null,
|
|
4248
|
-
waitUntil: null,
|
|
4249
|
-
activeBoundaryId: null,
|
|
4250
|
-
liveLogs,
|
|
4251
|
-
liveNodeProgress: liveNodeProgressSnapshot(),
|
|
4252
|
-
lastCheckpointAt: nowMs(),
|
|
4253
4660
|
});
|
|
4254
4661
|
await finalizeWorkerComputeBilling({
|
|
4255
4662
|
req,
|
|
@@ -4338,6 +4745,12 @@ function runRequestFromWorkflowParams(
|
|
|
4338
4745
|
): RunRequest {
|
|
4339
4746
|
const inputFile = isRecord(params.inputFile) ? params.inputFile : null;
|
|
4340
4747
|
const fileName = String(inputFile?.name ?? inputFile?.path ?? 'input.csv');
|
|
4748
|
+
const inputStorageKey =
|
|
4749
|
+
typeof inputFile?.r2Key === 'string'
|
|
4750
|
+
? inputFile.r2Key
|
|
4751
|
+
: typeof inputFile?.storageKey === 'string'
|
|
4752
|
+
? inputFile.storageKey
|
|
4753
|
+
: null;
|
|
4341
4754
|
return {
|
|
4342
4755
|
runId: String(params.runId ?? ''),
|
|
4343
4756
|
callbackUrl: String(params.baseUrl ?? ''),
|
|
@@ -4350,14 +4763,30 @@ function runRequestFromWorkflowParams(
|
|
|
4350
4763
|
? (params.input as Record<string, unknown>)
|
|
4351
4764
|
: {},
|
|
4352
4765
|
inlineCsv: isInlineCsv(params.inlineCsv) ? params.inlineCsv : null,
|
|
4353
|
-
|
|
4354
|
-
inputFile &&
|
|
4355
|
-
? {
|
|
4766
|
+
inputFiles:
|
|
4767
|
+
inputFile && inputStorageKey
|
|
4768
|
+
? {
|
|
4769
|
+
[fileName]: {
|
|
4770
|
+
logicalPath: String(
|
|
4771
|
+
inputFile.logicalPath ?? inputFile.path ?? fileName,
|
|
4772
|
+
),
|
|
4773
|
+
fileName,
|
|
4774
|
+
storageKey: inputStorageKey,
|
|
4775
|
+
contentType:
|
|
4776
|
+
typeof inputFile.contentType === 'string'
|
|
4777
|
+
? inputFile.contentType
|
|
4778
|
+
: null,
|
|
4779
|
+
bytes: normalizeExpectedBytes(inputFile.bytes),
|
|
4780
|
+
},
|
|
4781
|
+
}
|
|
4356
4782
|
: null,
|
|
4357
4783
|
packagedFiles: Array.isArray(params.packagedFiles)
|
|
4358
4784
|
? params.packagedFiles.filter(isRecord).map((file) => ({
|
|
4359
4785
|
playPath: String(file.playPath ?? ''),
|
|
4360
4786
|
storageKey: String(file.storageKey ?? ''),
|
|
4787
|
+
contentType:
|
|
4788
|
+
typeof file.contentType === 'string' ? file.contentType : null,
|
|
4789
|
+
bytes: normalizeExpectedBytes(file.bytes),
|
|
4361
4790
|
}))
|
|
4362
4791
|
: null,
|
|
4363
4792
|
partitionRange: null,
|
|
@@ -4425,11 +4854,39 @@ function isPlayCallGovernanceSnapshot(
|
|
|
4425
4854
|
async function persistResultDatasets(
|
|
4426
4855
|
req: RunRequest,
|
|
4427
4856
|
result: unknown,
|
|
4857
|
+
serializedResult: unknown,
|
|
4428
4858
|
): Promise<void> {
|
|
4429
|
-
const
|
|
4859
|
+
const persistedNamespaces = new Set<string>();
|
|
4860
|
+
for (const dataset of collectDatasetHandles(result)) {
|
|
4861
|
+
if (dataset.datasetKind === 'map') continue;
|
|
4862
|
+
let inputOffset = 0;
|
|
4863
|
+
for await (const chunk of iterDatasetChunks(
|
|
4864
|
+
dataset.handle,
|
|
4865
|
+
RESULT_DATASET_PERSIST_CHUNK_ROWS,
|
|
4866
|
+
)) {
|
|
4867
|
+
if (chunk.length === 0) continue;
|
|
4868
|
+
await harnessStartSheetDataset({
|
|
4869
|
+
baseUrl: req.baseUrl,
|
|
4870
|
+
executorToken: req.executorToken,
|
|
4871
|
+
playName: req.playName,
|
|
4872
|
+
tableNamespace: dataset.tableNamespace,
|
|
4873
|
+
sheetContract: requireSheetContract(req, dataset.tableNamespace),
|
|
4874
|
+
rows: chunk.map((row) => ({ ...row })),
|
|
4875
|
+
runId: req.runId,
|
|
4876
|
+
inputOffset,
|
|
4877
|
+
userEmail: req.userEmail,
|
|
4878
|
+
preloadedDbSessions: req.preloadedDbSessions ?? null,
|
|
4879
|
+
});
|
|
4880
|
+
inputOffset += chunk.length;
|
|
4881
|
+
}
|
|
4882
|
+
persistedNamespaces.add(dataset.tableNamespace);
|
|
4883
|
+
}
|
|
4884
|
+
|
|
4885
|
+
const datasets = collectDatasetEnvelopes(serializedResult);
|
|
4430
4886
|
for (const dataset of datasets) {
|
|
4431
4887
|
if (dataset.datasetKind === 'map') continue;
|
|
4432
4888
|
if (dataset.rows.length === 0) continue;
|
|
4889
|
+
if (persistedNamespaces.has(dataset.tableNamespace)) continue;
|
|
4433
4890
|
await harnessStartSheetDataset({
|
|
4434
4891
|
baseUrl: req.baseUrl,
|
|
4435
4892
|
executorToken: req.executorToken,
|
|
@@ -4438,12 +4895,63 @@ async function persistResultDatasets(
|
|
|
4438
4895
|
sheetContract: requireSheetContract(req, dataset.tableNamespace),
|
|
4439
4896
|
rows: dataset.rows,
|
|
4440
4897
|
runId: req.runId,
|
|
4898
|
+
inputOffset: 0,
|
|
4441
4899
|
userEmail: req.userEmail,
|
|
4442
4900
|
preloadedDbSessions: req.preloadedDbSessions ?? null,
|
|
4443
4901
|
});
|
|
4444
4902
|
}
|
|
4445
4903
|
}
|
|
4446
4904
|
|
|
4905
|
+
const RESULT_DATASET_PERSIST_CHUNK_ROWS = 5_000;
|
|
4906
|
+
|
|
4907
|
+
function collectDatasetHandles(value: unknown): Array<{
|
|
4908
|
+
tableNamespace: string;
|
|
4909
|
+
datasetKind: 'csv' | 'map' | null;
|
|
4910
|
+
handle: WorkerDatasetHandle<Record<string, unknown>>;
|
|
4911
|
+
}> {
|
|
4912
|
+
const datasets: Array<{
|
|
4913
|
+
tableNamespace: string;
|
|
4914
|
+
datasetKind: 'csv' | 'map' | null;
|
|
4915
|
+
handle: WorkerDatasetHandle<Record<string, unknown>>;
|
|
4916
|
+
}> = [];
|
|
4917
|
+
const seen = new WeakSet<object>();
|
|
4918
|
+
const walk = (candidate: unknown, depth: number) => {
|
|
4919
|
+
if (depth > 12 || candidate == null) return;
|
|
4920
|
+
if (isDatasetHandle(candidate)) {
|
|
4921
|
+
const metadata = candidate.toJSON() as Record<string, unknown>;
|
|
4922
|
+
const tableNamespace =
|
|
4923
|
+
typeof metadata.tableNamespace === 'string'
|
|
4924
|
+
? metadata.tableNamespace
|
|
4925
|
+
: null;
|
|
4926
|
+
const datasetKind =
|
|
4927
|
+
metadata.datasetKind === 'csv' || metadata.datasetKind === 'map'
|
|
4928
|
+
? metadata.datasetKind
|
|
4929
|
+
: null;
|
|
4930
|
+
if (tableNamespace) {
|
|
4931
|
+
datasets.push({
|
|
4932
|
+
tableNamespace,
|
|
4933
|
+
datasetKind,
|
|
4934
|
+
handle: candidate as WorkerDatasetHandle<Record<string, unknown>>,
|
|
4935
|
+
});
|
|
4936
|
+
}
|
|
4937
|
+
return;
|
|
4938
|
+
}
|
|
4939
|
+
if (Array.isArray(candidate)) {
|
|
4940
|
+
for (const item of candidate) walk(item, depth + 1);
|
|
4941
|
+
return;
|
|
4942
|
+
}
|
|
4943
|
+
if (typeof candidate !== 'object') return;
|
|
4944
|
+
const object = candidate as Record<string, unknown>;
|
|
4945
|
+
if (seen.has(object)) return;
|
|
4946
|
+
seen.add(object);
|
|
4947
|
+
for (const child of Object.values(object)) {
|
|
4948
|
+
walk(child, depth + 1);
|
|
4949
|
+
}
|
|
4950
|
+
};
|
|
4951
|
+
walk(value, 0);
|
|
4952
|
+
return datasets;
|
|
4953
|
+
}
|
|
4954
|
+
|
|
4447
4955
|
function serializePlayReturnValue(value: unknown): unknown {
|
|
4448
4956
|
return serializeValue(value, 0);
|
|
4449
4957
|
}
|
|
@@ -4498,64 +5006,10 @@ function trimResultShape(value: unknown): unknown {
|
|
|
4498
5006
|
|
|
4499
5007
|
function serializeValue(value: unknown, depth: number): unknown {
|
|
4500
5008
|
if (depth > 20 || value == null) return value;
|
|
5009
|
+
if (isDatasetHandle(value)) {
|
|
5010
|
+
return serializeValue(value.toJSON(), depth + 1);
|
|
5011
|
+
}
|
|
4501
5012
|
if (Array.isArray(value)) {
|
|
4502
|
-
const tableNamespace =
|
|
4503
|
-
typeof (value as unknown as { tableNamespace?: unknown })
|
|
4504
|
-
.tableNamespace === 'string'
|
|
4505
|
-
? (value as unknown as { tableNamespace: string }).tableNamespace
|
|
4506
|
-
: null;
|
|
4507
|
-
const datasetId =
|
|
4508
|
-
typeof (value as unknown as { datasetId?: unknown }).datasetId ===
|
|
4509
|
-
'string'
|
|
4510
|
-
? (value as unknown as { datasetId: string }).datasetId
|
|
4511
|
-
: null;
|
|
4512
|
-
const datasetCount =
|
|
4513
|
-
typeof (value as unknown as { __deeplineDatasetCount?: unknown })
|
|
4514
|
-
.__deeplineDatasetCount === 'number'
|
|
4515
|
-
? (value as unknown as { __deeplineDatasetCount: number })
|
|
4516
|
-
.__deeplineDatasetCount
|
|
4517
|
-
: value.length;
|
|
4518
|
-
const datasetKind =
|
|
4519
|
-
(value as unknown as { __deeplineDatasetKind?: unknown })
|
|
4520
|
-
.__deeplineDatasetKind === 'csv'
|
|
4521
|
-
? 'csv'
|
|
4522
|
-
: 'map';
|
|
4523
|
-
const cacheSummary =
|
|
4524
|
-
typeof (value as unknown as { __deeplineCacheSummary?: unknown })
|
|
4525
|
-
.__deeplineCacheSummary === 'string'
|
|
4526
|
-
? (value as unknown as { __deeplineCacheSummary: string })
|
|
4527
|
-
.__deeplineCacheSummary
|
|
4528
|
-
: null;
|
|
4529
|
-
const workProgress = isRecord(
|
|
4530
|
-
(value as unknown as { __deeplineWorkProgress?: unknown })
|
|
4531
|
-
.__deeplineWorkProgress,
|
|
4532
|
-
)
|
|
4533
|
-
? (
|
|
4534
|
-
value as unknown as {
|
|
4535
|
-
__deeplineWorkProgress: Record<string, unknown>;
|
|
4536
|
-
}
|
|
4537
|
-
).__deeplineWorkProgress
|
|
4538
|
-
: null;
|
|
4539
|
-
const previewRows = value
|
|
4540
|
-
.slice(0, 5)
|
|
4541
|
-
.map((row) => serializeValue(row, depth + 1))
|
|
4542
|
-
.filter(isRecord);
|
|
4543
|
-
if (tableNamespace && datasetId) {
|
|
4544
|
-
const columns = inferColumns(
|
|
4545
|
-
value.map((row) => serializeValue(row, depth + 1)).filter(isRecord),
|
|
4546
|
-
);
|
|
4547
|
-
return {
|
|
4548
|
-
kind: 'dataset' as const,
|
|
4549
|
-
datasetKind,
|
|
4550
|
-
datasetId,
|
|
4551
|
-
count: datasetCount,
|
|
4552
|
-
columns,
|
|
4553
|
-
preview: previewRows,
|
|
4554
|
-
tableNamespace,
|
|
4555
|
-
...(cacheSummary ? { cacheSummary } : {}),
|
|
4556
|
-
...(workProgress ? { _metadata: { workProgress } } : {}),
|
|
4557
|
-
};
|
|
4558
|
-
}
|
|
4559
5013
|
return value.map((entry) => serializeValue(entry, depth + 1));
|
|
4560
5014
|
}
|
|
4561
5015
|
if (typeof value !== 'object') return value;
|
|
@@ -4566,16 +5020,6 @@ function serializeValue(value: unknown, depth: number): unknown {
|
|
|
4566
5020
|
return out;
|
|
4567
5021
|
}
|
|
4568
5022
|
|
|
4569
|
-
function inferColumns(rows: ReadonlyArray<Record<string, unknown>>): string[] {
|
|
4570
|
-
const columns = new Set<string>();
|
|
4571
|
-
for (const row of rows) {
|
|
4572
|
-
for (const key of Object.keys(row)) {
|
|
4573
|
-
columns.add(key);
|
|
4574
|
-
}
|
|
4575
|
-
}
|
|
4576
|
-
return [...columns];
|
|
4577
|
-
}
|
|
4578
|
-
|
|
4579
5023
|
function collectDatasetEnvelopes(value: unknown): Array<{
|
|
4580
5024
|
tableNamespace: string;
|
|
4581
5025
|
datasetKind: 'csv' | 'map' | null;
|
|
@@ -4714,10 +5158,17 @@ export class TenantWorkflow extends WorkflowEntrypoint<
|
|
|
4714
5158
|
// user via tail/SSE. Retry with backoff before giving up; if we drop
|
|
4715
5159
|
// it, the user is stuck staring at the opaque CF reference id.
|
|
4716
5160
|
const errorPayload = JSON.stringify({
|
|
4717
|
-
action: '
|
|
5161
|
+
action: 'append_run_events',
|
|
4718
5162
|
playId: req.runId,
|
|
4719
|
-
|
|
4720
|
-
|
|
5163
|
+
events: [
|
|
5164
|
+
{
|
|
5165
|
+
type: 'run.failed',
|
|
5166
|
+
runId: req.runId,
|
|
5167
|
+
source: 'worker',
|
|
5168
|
+
occurredAt: nowMs(),
|
|
5169
|
+
error: `TenantWorkflow.run threw: ${detail.name ?? 'Error'}: ${detail.message}\n${detail.stack ?? ''}`,
|
|
5170
|
+
} satisfies PlayRunLedgerEvent,
|
|
5171
|
+
],
|
|
4721
5172
|
});
|
|
4722
5173
|
const backoffMs = [200, 500, 1500];
|
|
4723
5174
|
let lastCallbackError: unknown = null;
|
|
@@ -4850,6 +5301,10 @@ function inferOutputRows(result: unknown): number {
|
|
|
4850
5301
|
const datasets: number[] = [];
|
|
4851
5302
|
const walk = (value: unknown, depth: number) => {
|
|
4852
5303
|
if (depth > 6 || value == null) return;
|
|
5304
|
+
if (isDatasetHandle(value)) {
|
|
5305
|
+
datasets.push(value.toJSON().count);
|
|
5306
|
+
return;
|
|
5307
|
+
}
|
|
4853
5308
|
if (Array.isArray(value)) {
|
|
4854
5309
|
for (const item of value) walk(item, depth + 1);
|
|
4855
5310
|
return;
|
|
@@ -4858,14 +5313,9 @@ function inferOutputRows(result: unknown): number {
|
|
|
4858
5313
|
const record = value as Record<string, unknown>;
|
|
4859
5314
|
if (
|
|
4860
5315
|
typeof record.tableNamespace === 'string' &&
|
|
4861
|
-
|
|
4862
|
-
typeof record.__deeplineDatasetCount === 'number')
|
|
5316
|
+
typeof record.count === 'number'
|
|
4863
5317
|
) {
|
|
4864
|
-
datasets.push(
|
|
4865
|
-
typeof record.count === 'number'
|
|
4866
|
-
? record.count
|
|
4867
|
-
: Number(record.__deeplineDatasetCount),
|
|
4868
|
-
);
|
|
5318
|
+
datasets.push(record.count);
|
|
4869
5319
|
}
|
|
4870
5320
|
for (const [key, child] of Object.entries(record)) {
|
|
4871
5321
|
if (key === 'preview') continue;
|