deepline 0.1.20 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +338 -118
- package/dist/cli/index.mjs +338 -118
- package/dist/index.d.mts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +51 -6
- package/dist/index.mjs +51 -6
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +888 -227
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +540 -36
- package/dist/repo/apps/play-runner-workers/src/entry.ts +330 -374
- package/dist/repo/sdk/src/client.ts +46 -4
- package/dist/repo/sdk/src/http.ts +19 -1
- package/dist/repo/sdk/src/plays/harness-stub.ts +12 -0
- package/dist/repo/sdk/src/version.ts +1 -1
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +3 -6
- package/package.json +1 -1
|
@@ -56,6 +56,7 @@ import {
|
|
|
56
56
|
type ToolResultMetadataInput,
|
|
57
57
|
} from '../../../shared_libs/play-runtime/tool-result';
|
|
58
58
|
import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
|
|
59
|
+
import type { PreloadedRuntimeDbSession } from '../../../shared_libs/play-runtime/db-session';
|
|
59
60
|
import type { PlayRuntimeManifestMap } from '../../../shared_libs/plays/compiler-manifest';
|
|
60
61
|
import {
|
|
61
62
|
derivePlayRowIdentity,
|
|
@@ -80,6 +81,8 @@ import {
|
|
|
80
81
|
// re-bundle harness internals into per-play. Keep that in mind.
|
|
81
82
|
import {
|
|
82
83
|
harnessFetchStagedFile,
|
|
84
|
+
harnessPersistCompletedSheetRows,
|
|
85
|
+
harnessPrewarmPostgresSessions,
|
|
83
86
|
harnessStartSheetDataset,
|
|
84
87
|
setHarnessBinding,
|
|
85
88
|
} from '../../../sdk/src/plays/harness-stub';
|
|
@@ -129,6 +132,7 @@ type RunRequest = {
|
|
|
129
132
|
childPlayManifests?: PlayRuntimeManifestMap | null;
|
|
130
133
|
/** Internal ctx.runPlay lineage. Public SDK/users never see this. */
|
|
131
134
|
playCallGovernance?: PlayCallGovernanceSnapshot | null;
|
|
135
|
+
preloadedDbSessions?: PreloadedRuntimeDbSession[] | null;
|
|
132
136
|
/** Cloudflare coordinator URL for direct Workflow control-plane signals. */
|
|
133
137
|
coordinatorUrl?: string | null;
|
|
134
138
|
/** Request-scoped coordinator auth token for preview/dev direct control calls. */
|
|
@@ -191,6 +195,10 @@ type WorkerEnv = {
|
|
|
191
195
|
runId: string,
|
|
192
196
|
payload: Record<string, unknown>,
|
|
193
197
|
): Promise<void>;
|
|
198
|
+
recordRunEvent(
|
|
199
|
+
runId: string,
|
|
200
|
+
event: Record<string, unknown>,
|
|
201
|
+
): Promise<void>;
|
|
194
202
|
};
|
|
195
203
|
/**
|
|
196
204
|
* Service binding to the long-lived Play Harness Worker
|
|
@@ -520,6 +528,34 @@ function nowMs(): number {
|
|
|
520
528
|
return Date.now();
|
|
521
529
|
}
|
|
522
530
|
|
|
531
|
+
function recordRunnerPerfTrace(input: {
|
|
532
|
+
req: RunRequest;
|
|
533
|
+
phase: string;
|
|
534
|
+
ms?: number;
|
|
535
|
+
extra?: Record<string, unknown>;
|
|
536
|
+
}): void {
|
|
537
|
+
if (!input.req.runId || !input.phase) return;
|
|
538
|
+
const payload = {
|
|
539
|
+
ts: Date.now(),
|
|
540
|
+
source: 'dynamic_worker' as const,
|
|
541
|
+
runId: input.req.runId,
|
|
542
|
+
phase: `runner.${input.phase}`,
|
|
543
|
+
...(input.ms !== undefined ? { ms: input.ms } : {}),
|
|
544
|
+
...(input.extra ?? {}),
|
|
545
|
+
};
|
|
546
|
+
console.log(
|
|
547
|
+
`[deepline-run:${input.req.runId}] [perf-trace] ${JSON.stringify(payload)}`,
|
|
548
|
+
);
|
|
549
|
+
cachedCoordinatorBinding
|
|
550
|
+
?.recordPerfTrace(input.req.runId, payload)
|
|
551
|
+
.catch((error: unknown) => {
|
|
552
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
553
|
+
console.warn(
|
|
554
|
+
`[deepline-run:${input.req.runId}] failed to forward runner perf trace: ${message}`,
|
|
555
|
+
);
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
|
|
523
559
|
function makeRequestId(): string {
|
|
524
560
|
// Workers crypto.randomUUID is available without nodejs_compat.
|
|
525
561
|
return crypto.randomUUID();
|
|
@@ -660,7 +696,7 @@ async function submitChildPlayThroughCoordinator(input: {
|
|
|
660
696
|
const coordinatorUrl = input.req.coordinatorUrl?.trim();
|
|
661
697
|
if (coordinatorUrl) {
|
|
662
698
|
// Keep child plays on the same coordinator/Workflow submit path as
|
|
663
|
-
// top-level runs
|
|
699
|
+
// top-level runs when the coordinator URL is present.
|
|
664
700
|
const res = await fetch(
|
|
665
701
|
`${coordinatorUrl.replace(/\/$/, '')}/workflow/${encodeURIComponent(
|
|
666
702
|
input.req.runId,
|
|
@@ -1417,7 +1453,23 @@ class WorkerToolBatchScheduler {
|
|
|
1417
1453
|
workflowStep?: WorkflowStep,
|
|
1418
1454
|
): Promise<unknown> {
|
|
1419
1455
|
return new Promise((resolve, reject) => {
|
|
1420
|
-
|
|
1456
|
+
const queuedAt = nowMs();
|
|
1457
|
+
this.queue.push({
|
|
1458
|
+
id,
|
|
1459
|
+
toolId,
|
|
1460
|
+
input,
|
|
1461
|
+
workflowStep,
|
|
1462
|
+
resolve: (value) => {
|
|
1463
|
+
recordRunnerPerfTrace({
|
|
1464
|
+
req: this.req,
|
|
1465
|
+
phase: 'runner.tool.request',
|
|
1466
|
+
ms: nowMs() - queuedAt,
|
|
1467
|
+
extra: { id, toolId },
|
|
1468
|
+
});
|
|
1469
|
+
resolve(value);
|
|
1470
|
+
},
|
|
1471
|
+
reject,
|
|
1472
|
+
});
|
|
1421
1473
|
this.scheduleDrain();
|
|
1422
1474
|
});
|
|
1423
1475
|
}
|
|
@@ -1447,6 +1499,7 @@ class WorkerToolBatchScheduler {
|
|
|
1447
1499
|
const requests = this.queue;
|
|
1448
1500
|
this.queue = [];
|
|
1449
1501
|
this.scheduled = false;
|
|
1502
|
+
const drainStartedAt = nowMs();
|
|
1450
1503
|
await Promise.all(
|
|
1451
1504
|
[...groupWorkerToolRequestsByTool(requests).entries()].map(
|
|
1452
1505
|
async ([toolId, groupedRequests]) => {
|
|
@@ -1454,6 +1507,12 @@ class WorkerToolBatchScheduler {
|
|
|
1454
1507
|
},
|
|
1455
1508
|
),
|
|
1456
1509
|
);
|
|
1510
|
+
recordRunnerPerfTrace({
|
|
1511
|
+
req: this.req,
|
|
1512
|
+
phase: 'runner.tool.drain',
|
|
1513
|
+
ms: nowMs() - drainStartedAt,
|
|
1514
|
+
extra: { requests: requests.length },
|
|
1515
|
+
});
|
|
1457
1516
|
if (this.queue.length > 0) {
|
|
1458
1517
|
this.scheduleDrain();
|
|
1459
1518
|
}
|
|
@@ -1465,6 +1524,7 @@ class WorkerToolBatchScheduler {
|
|
|
1465
1524
|
): Promise<void> {
|
|
1466
1525
|
const strategy = getDefaultPlayRuntimeBatchStrategy(toolId);
|
|
1467
1526
|
if (!strategy || toolId === 'test_wait_for_event') {
|
|
1527
|
+
const groupStartedAt = nowMs();
|
|
1468
1528
|
await Promise.all(
|
|
1469
1529
|
requests.map(async (request) => {
|
|
1470
1530
|
try {
|
|
@@ -1480,14 +1540,27 @@ class WorkerToolBatchScheduler {
|
|
|
1480
1540
|
}
|
|
1481
1541
|
}),
|
|
1482
1542
|
);
|
|
1543
|
+
recordRunnerPerfTrace({
|
|
1544
|
+
req: this.req,
|
|
1545
|
+
phase: 'runner.tool.group',
|
|
1546
|
+
ms: nowMs() - groupStartedAt,
|
|
1547
|
+
extra: { toolId, requests: requests.length, batched: false },
|
|
1548
|
+
});
|
|
1483
1549
|
return;
|
|
1484
1550
|
}
|
|
1485
1551
|
|
|
1552
|
+
const batchStartedAt = nowMs();
|
|
1486
1553
|
await executeBatchedWorkerToolGroup({
|
|
1487
1554
|
req: this.req,
|
|
1488
1555
|
requests,
|
|
1489
1556
|
strategy,
|
|
1490
1557
|
});
|
|
1558
|
+
recordRunnerPerfTrace({
|
|
1559
|
+
req: this.req,
|
|
1560
|
+
phase: 'runner.tool.group',
|
|
1561
|
+
ms: nowMs() - batchStartedAt,
|
|
1562
|
+
extra: { toolId, requests: requests.length, batched: true },
|
|
1563
|
+
});
|
|
1491
1564
|
}
|
|
1492
1565
|
}
|
|
1493
1566
|
|
|
@@ -2292,7 +2365,7 @@ function isStreamingDataset<T extends Record<string, unknown>>(
|
|
|
2292
2365
|
|
|
2293
2366
|
/**
|
|
2294
2367
|
* Resolve the sheet contract for a (play, namespace) from the request's
|
|
2295
|
-
* contractSnapshot. Required by
|
|
2368
|
+
* contractSnapshot. Required by direct runtime sheet persistence.
|
|
2296
2369
|
*/
|
|
2297
2370
|
function resolveSheetContractFromReq(
|
|
2298
2371
|
req: RunRequest,
|
|
@@ -2320,13 +2393,13 @@ function resolveSheetContractFromReq(
|
|
|
2320
2393
|
}
|
|
2321
2394
|
|
|
2322
2395
|
/**
|
|
2323
|
-
* Direct-Neon writes from the
|
|
2324
|
-
* Postgres URL via `create_db_session`
|
|
2325
|
-
*
|
|
2326
|
-
*
|
|
2396
|
+
* Direct-Neon writes from the shared harness Worker. Resolves the org runtime
|
|
2397
|
+
* Postgres URL via `create_db_session` inside that long-lived Worker, then
|
|
2398
|
+
* runs the same row persistence path Daytona uses without bundling Neon into
|
|
2399
|
+
* every per-play dynamic Worker.
|
|
2327
2400
|
*
|
|
2328
|
-
*
|
|
2329
|
-
*
|
|
2401
|
+
* If the contract/session can't be obtained, fail loudly rather than silently
|
|
2402
|
+
* regressing to per-chunk Vercel hops.
|
|
2330
2403
|
*/
|
|
2331
2404
|
function requireSheetContract(
|
|
2332
2405
|
req: RunRequest,
|
|
@@ -2351,12 +2424,9 @@ async function persistCompletedMapRows(input: {
|
|
|
2351
2424
|
extraOutputFields?: string[];
|
|
2352
2425
|
}): Promise<void> {
|
|
2353
2426
|
if (input.rows.length === 0) return;
|
|
2354
|
-
await
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
tableNamespace: string;
|
|
2358
|
-
}>(input.req.baseUrl, input.req.executorToken, {
|
|
2359
|
-
action: 'persist_completed_sheet_rows',
|
|
2427
|
+
await harnessPersistCompletedSheetRows({
|
|
2428
|
+
baseUrl: input.req.baseUrl,
|
|
2429
|
+
executorToken: input.req.executorToken,
|
|
2360
2430
|
playName: input.req.playName,
|
|
2361
2431
|
tableNamespace: input.tableNamespace,
|
|
2362
2432
|
sheetContract: requireSheetContract(input.req, input.tableNamespace),
|
|
@@ -2368,6 +2438,7 @@ async function persistCompletedMapRows(input: {
|
|
|
2368
2438
|
),
|
|
2369
2439
|
],
|
|
2370
2440
|
runId: input.req.runId,
|
|
2441
|
+
userEmail: input.req.userEmail,
|
|
2371
2442
|
});
|
|
2372
2443
|
}
|
|
2373
2444
|
|
|
@@ -2384,19 +2455,15 @@ async function prepareMapRows(input: {
|
|
|
2384
2455
|
if (input.rows.length === 0) {
|
|
2385
2456
|
return { inserted: 0, skipped: 0, pendingRows: [], completedRows: [] };
|
|
2386
2457
|
}
|
|
2387
|
-
const result = await
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
pendingRows: Record<string, unknown>[];
|
|
2391
|
-
completedRows: Record<string, unknown>[];
|
|
2392
|
-
tableNamespace: string;
|
|
2393
|
-
}>(input.req.baseUrl, input.req.executorToken, {
|
|
2394
|
-
action: 'start_sheet_dataset',
|
|
2458
|
+
const result = await harnessStartSheetDataset({
|
|
2459
|
+
baseUrl: input.req.baseUrl,
|
|
2460
|
+
executorToken: input.req.executorToken,
|
|
2395
2461
|
playName: input.req.playName,
|
|
2396
2462
|
tableNamespace: input.tableNamespace,
|
|
2397
2463
|
sheetContract: requireSheetContract(input.req, input.tableNamespace),
|
|
2398
2464
|
rows: input.rows.map((row) => ({ ...row })),
|
|
2399
2465
|
runId: input.req.runId,
|
|
2466
|
+
userEmail: input.req.userEmail,
|
|
2400
2467
|
});
|
|
2401
2468
|
return {
|
|
2402
2469
|
inserted: result.inserted,
|
|
@@ -2567,6 +2634,7 @@ function createMinimalWorkerCtx(
|
|
|
2567
2634
|
>,
|
|
2568
2635
|
opts?: WorkerMapOptions,
|
|
2569
2636
|
): Promise<unknown> => {
|
|
2637
|
+
const mapStartedAt = nowMs();
|
|
2570
2638
|
const sliced = rows;
|
|
2571
2639
|
const baseOffset = 0;
|
|
2572
2640
|
const fieldEntries = Object.entries(fieldsDef);
|
|
@@ -2669,12 +2737,21 @@ function createMinimalWorkerCtx(
|
|
|
2669
2737
|
chunkStart: number,
|
|
2670
2738
|
chunkIndex: number,
|
|
2671
2739
|
): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
|
|
2740
|
+
const chunkStartedAt = nowMs();
|
|
2672
2741
|
assertNotAborted(abortSignal);
|
|
2742
|
+
const keyStartedAt = nowMs();
|
|
2673
2743
|
const chunkEntries = chunkRows.map((row, localIndex) => {
|
|
2674
2744
|
const absoluteIndex = baseOffset + chunkStart + localIndex;
|
|
2675
2745
|
const rowKey = resolveRowKey(row, absoluteIndex);
|
|
2676
2746
|
return { row, absoluteIndex, rowKey };
|
|
2677
2747
|
});
|
|
2748
|
+
recordRunnerPerfTrace({
|
|
2749
|
+
req,
|
|
2750
|
+
phase: 'runner.map_chunk.keys',
|
|
2751
|
+
ms: nowMs() - keyStartedAt,
|
|
2752
|
+
extra: { mapName: name, chunkIndex, rows: chunkRows.length },
|
|
2753
|
+
});
|
|
2754
|
+
const prepareStartedAt = nowMs();
|
|
2678
2755
|
const prepared = await prepareMapRows({
|
|
2679
2756
|
req,
|
|
2680
2757
|
tableNamespace: name,
|
|
@@ -2683,6 +2760,20 @@ function createMinimalWorkerCtx(
|
|
|
2683
2760
|
__deeplineRowKey: rowKey,
|
|
2684
2761
|
})),
|
|
2685
2762
|
});
|
|
2763
|
+
recordRunnerPerfTrace({
|
|
2764
|
+
req,
|
|
2765
|
+
phase: 'runner.map_chunk.prepare_rows',
|
|
2766
|
+
ms: nowMs() - prepareStartedAt,
|
|
2767
|
+
extra: {
|
|
2768
|
+
mapName: name,
|
|
2769
|
+
chunkIndex,
|
|
2770
|
+
rows: chunkRows.length,
|
|
2771
|
+
inserted: prepared.inserted,
|
|
2772
|
+
skipped: prepared.skipped,
|
|
2773
|
+
pendingRows: prepared.pendingRows.length,
|
|
2774
|
+
completedRows: prepared.completedRows.length,
|
|
2775
|
+
},
|
|
2776
|
+
});
|
|
2686
2777
|
const pendingKeys = new Set<string>();
|
|
2687
2778
|
const completedKeys = new Set<string>();
|
|
2688
2779
|
const preparedKeys = new Set<string>();
|
|
@@ -2901,8 +2992,56 @@ function createMinimalWorkerCtx(
|
|
|
2901
2992
|
})),
|
|
2902
2993
|
});
|
|
2903
2994
|
};
|
|
2995
|
+
const workersStartedAt = nowMs();
|
|
2904
2996
|
const workerResults = await Promise.allSettled(workers);
|
|
2905
|
-
|
|
2997
|
+
recordRunnerPerfTrace({
|
|
2998
|
+
req,
|
|
2999
|
+
phase: 'runner.map_chunk.execute_workers',
|
|
3000
|
+
ms: nowMs() - workersStartedAt,
|
|
3001
|
+
extra: {
|
|
3002
|
+
mapName: name,
|
|
3003
|
+
chunkIndex,
|
|
3004
|
+
rowsToExecute: rowsToExecute.length,
|
|
3005
|
+
concurrency,
|
|
3006
|
+
},
|
|
3007
|
+
});
|
|
3008
|
+
const persistRowsStartedAt = nowMs();
|
|
3009
|
+
recordRunnerPerfTrace({
|
|
3010
|
+
req,
|
|
3011
|
+
phase: 'runner.map_chunk.persist_rows_start',
|
|
3012
|
+
ms: 0,
|
|
3013
|
+
extra: {
|
|
3014
|
+
mapName: name,
|
|
3015
|
+
chunkIndex,
|
|
3016
|
+
rowsToExecute: rowsToExecute.length,
|
|
3017
|
+
},
|
|
3018
|
+
});
|
|
3019
|
+
try {
|
|
3020
|
+
await persistExecutedRows();
|
|
3021
|
+
recordRunnerPerfTrace({
|
|
3022
|
+
req,
|
|
3023
|
+
phase: 'runner.map_chunk.persist_rows',
|
|
3024
|
+
ms: nowMs() - persistRowsStartedAt,
|
|
3025
|
+
extra: {
|
|
3026
|
+
mapName: name,
|
|
3027
|
+
chunkIndex,
|
|
3028
|
+
rowsToExecute: rowsToExecute.length,
|
|
3029
|
+
},
|
|
3030
|
+
});
|
|
3031
|
+
} catch (error) {
|
|
3032
|
+
recordRunnerPerfTrace({
|
|
3033
|
+
req,
|
|
3034
|
+
phase: 'runner.map_chunk.persist_rows_error',
|
|
3035
|
+
ms: nowMs() - persistRowsStartedAt,
|
|
3036
|
+
extra: {
|
|
3037
|
+
mapName: name,
|
|
3038
|
+
chunkIndex,
|
|
3039
|
+
rowsToExecute: rowsToExecute.length,
|
|
3040
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3041
|
+
},
|
|
3042
|
+
});
|
|
3043
|
+
throw error;
|
|
3044
|
+
}
|
|
2906
3045
|
const rejectedWorker = workerResults.find(
|
|
2907
3046
|
(result): result is PromiseRejectedResult =>
|
|
2908
3047
|
result.status === 'rejected',
|
|
@@ -2942,6 +3081,27 @@ function createMinimalWorkerCtx(
|
|
|
2942
3081
|
return resultByKey.get(key);
|
|
2943
3082
|
})
|
|
2944
3083
|
.filter((row): row is T & Record<string, unknown> => Boolean(row));
|
|
3084
|
+
const hashStartedAt = nowMs();
|
|
3085
|
+
const hash = await hashJson(out);
|
|
3086
|
+
recordRunnerPerfTrace({
|
|
3087
|
+
req,
|
|
3088
|
+
phase: 'runner.map_chunk.hash',
|
|
3089
|
+
ms: nowMs() - hashStartedAt,
|
|
3090
|
+
extra: { mapName: name, chunkIndex, rows: out.length },
|
|
3091
|
+
});
|
|
3092
|
+
recordRunnerPerfTrace({
|
|
3093
|
+
req,
|
|
3094
|
+
phase: 'runner.map_chunk.total',
|
|
3095
|
+
ms: nowMs() - chunkStartedAt,
|
|
3096
|
+
extra: {
|
|
3097
|
+
mapName: name,
|
|
3098
|
+
chunkIndex,
|
|
3099
|
+
rowsRead: chunkRows.length,
|
|
3100
|
+
rowsWritten: out.length,
|
|
3101
|
+
rowsExecuted: executedRows.length,
|
|
3102
|
+
rowsCached: Math.max(0, out.length - executedRows.length),
|
|
3103
|
+
},
|
|
3104
|
+
});
|
|
2945
3105
|
return {
|
|
2946
3106
|
chunkIndex,
|
|
2947
3107
|
rangeStart: baseOffset + chunkStart,
|
|
@@ -2954,7 +3114,7 @@ function createMinimalWorkerCtx(
|
|
|
2954
3114
|
rowsInserted,
|
|
2955
3115
|
rowsSkipped,
|
|
2956
3116
|
outputDatasetId: `map:${name}`,
|
|
2957
|
-
hash
|
|
3117
|
+
hash,
|
|
2958
3118
|
preview: toWorkflowSerializableValue(out.slice(0, 5)),
|
|
2959
3119
|
};
|
|
2960
3120
|
};
|
|
@@ -2978,7 +3138,7 @@ function createMinimalWorkerCtx(
|
|
|
2978
3138
|
workflowStep.do as unknown as (
|
|
2979
3139
|
name: string,
|
|
2980
3140
|
config: {
|
|
2981
|
-
retries: { limit: number; delay:
|
|
3141
|
+
retries: { limit: number; delay: number; backoff: 'exponential' };
|
|
2982
3142
|
},
|
|
2983
3143
|
callback: () => Promise<
|
|
2984
3144
|
WorkerMapChunkSummary<T & Record<string, unknown>>
|
|
@@ -2986,7 +3146,7 @@ function createMinimalWorkerCtx(
|
|
|
2986
3146
|
) => Promise<WorkerMapChunkSummary<T & Record<string, unknown>>>
|
|
2987
3147
|
)(
|
|
2988
3148
|
deterministicMapChunkStepName({ mapName: name, chunkIndex }),
|
|
2989
|
-
{ retries: { limit: 5, delay:
|
|
3149
|
+
{ retries: { limit: 5, delay: 100, backoff: 'exponential' } },
|
|
2990
3150
|
async () => await processChunk(chunkRows, chunkStart, chunkIndex),
|
|
2991
3151
|
)) as WorkerMapChunkSummary<T & Record<string, unknown>>;
|
|
2992
3152
|
};
|
|
@@ -3045,7 +3205,14 @@ function createMinimalWorkerCtx(
|
|
|
3045
3205
|
chunkStart += chunkRows.length;
|
|
3046
3206
|
chunkIndex += 1;
|
|
3047
3207
|
}
|
|
3048
|
-
|
|
3208
|
+
const dataset = finalize(totalRowsWritten);
|
|
3209
|
+
recordRunnerPerfTrace({
|
|
3210
|
+
req,
|
|
3211
|
+
phase: 'runner.map.total',
|
|
3212
|
+
ms: nowMs() - mapStartedAt,
|
|
3213
|
+
extra: { mapName: name, rowsWritten: totalRowsWritten, streaming: true },
|
|
3214
|
+
});
|
|
3215
|
+
return dataset;
|
|
3049
3216
|
}
|
|
3050
3217
|
|
|
3051
3218
|
if (workflowStep && sliced.length > rowsPerChunk) {
|
|
@@ -3067,7 +3234,14 @@ function createMinimalWorkerCtx(
|
|
|
3067
3234
|
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3068
3235
|
}
|
|
3069
3236
|
}
|
|
3070
|
-
|
|
3237
|
+
const dataset = finalize(totalRowsWritten);
|
|
3238
|
+
recordRunnerPerfTrace({
|
|
3239
|
+
req,
|
|
3240
|
+
phase: 'runner.map.total',
|
|
3241
|
+
ms: nowMs() - mapStartedAt,
|
|
3242
|
+
extra: { mapName: name, rowsWritten: totalRowsWritten, streaming: false },
|
|
3243
|
+
});
|
|
3244
|
+
return dataset;
|
|
3071
3245
|
}
|
|
3072
3246
|
|
|
3073
3247
|
assertUniqueExplicitRowKeys(sliced, 0);
|
|
@@ -3078,7 +3252,18 @@ function createMinimalWorkerCtx(
|
|
|
3078
3252
|
totalRowsInserted = chunkResult.rowsInserted;
|
|
3079
3253
|
totalRowsSkipped = chunkResult.rowsSkipped;
|
|
3080
3254
|
out.push(...chunkResult.preview);
|
|
3081
|
-
|
|
3255
|
+
const dataset = finalize(chunkResult.rowsWritten);
|
|
3256
|
+
recordRunnerPerfTrace({
|
|
3257
|
+
req,
|
|
3258
|
+
phase: 'runner.map.total',
|
|
3259
|
+
ms: nowMs() - mapStartedAt,
|
|
3260
|
+
extra: {
|
|
3261
|
+
mapName: name,
|
|
3262
|
+
rowsWritten: chunkResult.rowsWritten,
|
|
3263
|
+
streaming: false,
|
|
3264
|
+
},
|
|
3265
|
+
});
|
|
3266
|
+
return dataset;
|
|
3082
3267
|
};
|
|
3083
3268
|
|
|
3084
3269
|
class WorkerMapBuilder<T extends Record<string, unknown>> {
|
|
@@ -3165,28 +3350,43 @@ function createMinimalWorkerCtx(
|
|
|
3165
3350
|
arg: unknown,
|
|
3166
3351
|
options?: CsvRenameOptions,
|
|
3167
3352
|
): Promise<T[]> {
|
|
3353
|
+
const csvStartedAt = nowMs();
|
|
3168
3354
|
if (Array.isArray(arg)) {
|
|
3169
3355
|
// Inline rows passed at call site — already in memory, keep the
|
|
3170
3356
|
// legacy array-backed dataset shape.
|
|
3171
|
-
|
|
3357
|
+
const dataset = makeWorkerDataset(
|
|
3172
3358
|
'csv',
|
|
3173
3359
|
applyCsvRenameProjection(arg as T[], options),
|
|
3174
3360
|
{
|
|
3175
3361
|
datasetKind: 'csv',
|
|
3176
3362
|
},
|
|
3177
3363
|
) as unknown as T[];
|
|
3364
|
+
recordRunnerPerfTrace({
|
|
3365
|
+
req,
|
|
3366
|
+
phase: 'runner.csv',
|
|
3367
|
+
ms: nowMs() - csvStartedAt,
|
|
3368
|
+
extra: { mode: 'inline_array', rows: arg.length },
|
|
3369
|
+
});
|
|
3370
|
+
return dataset;
|
|
3178
3371
|
}
|
|
3179
3372
|
const filename = String(arg ?? '');
|
|
3180
3373
|
if (req.inlineCsv && filename === req.inlineCsv.name) {
|
|
3181
3374
|
// Inline CSV pre-staged by the dispatcher (small files <1 MiB). Already
|
|
3182
3375
|
// in memory; no streaming needed.
|
|
3183
|
-
|
|
3376
|
+
const dataset = makeWorkerDataset(
|
|
3184
3377
|
'csv',
|
|
3185
3378
|
applyCsvRenameProjection(req.inlineCsv.rows as T[], options),
|
|
3186
3379
|
{
|
|
3187
3380
|
datasetKind: 'csv',
|
|
3188
3381
|
},
|
|
3189
3382
|
) as unknown as T[];
|
|
3383
|
+
recordRunnerPerfTrace({
|
|
3384
|
+
req,
|
|
3385
|
+
phase: 'runner.csv',
|
|
3386
|
+
ms: nowMs() - csvStartedAt,
|
|
3387
|
+
extra: { mode: 'inline_csv', rows: req.inlineCsv.rows.length },
|
|
3388
|
+
});
|
|
3389
|
+
return dataset;
|
|
3190
3390
|
}
|
|
3191
3391
|
// Resolution order: explicit inputR2Keys (runtime input) → packaged
|
|
3192
3392
|
// files (relative-path imports bundled with the play artifact).
|
|
@@ -3211,7 +3411,7 @@ function createMinimalWorkerCtx(
|
|
|
3211
3411
|
// and switches its chunked execution loop to consume iterChunks
|
|
3212
3412
|
// directly, so 2M-row CSVs never get fully materialized in memory.
|
|
3213
3413
|
const storageKey = r2Key;
|
|
3214
|
-
|
|
3414
|
+
const dataset = makeStreamingCsvDataset<T>({
|
|
3215
3415
|
name: filename,
|
|
3216
3416
|
logicalPath: filename,
|
|
3217
3417
|
renameOptions: options,
|
|
@@ -3223,6 +3423,13 @@ function createMinimalWorkerCtx(
|
|
|
3223
3423
|
storageKey,
|
|
3224
3424
|
}),
|
|
3225
3425
|
}) as unknown as T[];
|
|
3426
|
+
recordRunnerPerfTrace({
|
|
3427
|
+
req,
|
|
3428
|
+
phase: 'runner.csv',
|
|
3429
|
+
ms: nowMs() - csvStartedAt,
|
|
3430
|
+
extra: { mode: 'streaming_r2', filename },
|
|
3431
|
+
});
|
|
3432
|
+
return dataset;
|
|
3226
3433
|
},
|
|
3227
3434
|
map<T extends Record<string, unknown>>(
|
|
3228
3435
|
name: string,
|
|
@@ -3253,346 +3460,6 @@ function createMinimalWorkerCtx(
|
|
|
3253
3460
|
throw new Error(
|
|
3254
3461
|
'ctx.map(key, rows, fields, options) was removed. Use ctx.map(key, rows).step(...).run(options).',
|
|
3255
3462
|
);
|
|
3256
|
-
/*
|
|
3257
|
-
const sliced = rows;
|
|
3258
|
-
const baseOffset = 0;
|
|
3259
|
-
const fieldEntries = Object.entries(fieldsDef);
|
|
3260
|
-
const plan = req.executionPlan;
|
|
3261
|
-
const planMap = plan?.maps.find(
|
|
3262
|
-
(candidate) =>
|
|
3263
|
-
candidate.mapName === name || candidate.tableNamespace === name,
|
|
3264
|
-
);
|
|
3265
|
-
const streaming = isStreamingDataset<T>(sliced);
|
|
3266
|
-
// For streaming inputs we don't know the row count upfront — pass
|
|
3267
|
-
// `totalRows: 0` so chooseMapChunkSize falls back to the preferred /
|
|
3268
|
-
// default chunk size rather than trying to budget against an unknown.
|
|
3269
|
-
const rowsPerChunk = chooseMapChunkSize({
|
|
3270
|
-
totalRows: streaming ? 0 : sliced.length,
|
|
3271
|
-
mapCount: Math.max(1, plan?.maps.length ?? 1),
|
|
3272
|
-
stepsPerChunk: planMap?.stepsPerChunk ?? 1,
|
|
3273
|
-
preferredChunkSize: planMap?.defaultChunkSize,
|
|
3274
|
-
softWorkflowStepBudget: plan?.chunkPlan.softWorkflowStepBudget,
|
|
3275
|
-
});
|
|
3276
|
-
const outputFields = fieldEntries.map(([field]) => field);
|
|
3277
|
-
|
|
3278
|
-
// Workflow steps have bounded CPU but unbounded wall time, so long
|
|
3279
|
-
// network calls are OK here as long as we checkpoint by chunk and do not
|
|
3280
|
-
// collapse a giant map into one monolithic step result.
|
|
3281
|
-
|
|
3282
|
-
const processChunk = async (
|
|
3283
|
-
chunkRows: T[],
|
|
3284
|
-
chunkStart: number,
|
|
3285
|
-
chunkIndex: number,
|
|
3286
|
-
): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
|
|
3287
|
-
assertNotAborted(abortSignal);
|
|
3288
|
-
const prepared = await prepareMapRows({
|
|
3289
|
-
req,
|
|
3290
|
-
tableNamespace: name,
|
|
3291
|
-
rows: chunkRows,
|
|
3292
|
-
});
|
|
3293
|
-
const pendingKeys = new Set<string>();
|
|
3294
|
-
const completedKeys = new Set<string>();
|
|
3295
|
-
const preparedKeys = new Set<string>();
|
|
3296
|
-
for (const row of prepared.pendingRows) {
|
|
3297
|
-
const key = derivePlayRowIdentity(
|
|
3298
|
-
publicCsvInputRow(row),
|
|
3299
|
-
name,
|
|
3300
|
-
mapLogicFingerprint,
|
|
3301
|
-
);
|
|
3302
|
-
if (key) {
|
|
3303
|
-
pendingKeys.add(key);
|
|
3304
|
-
preparedKeys.add(key);
|
|
3305
|
-
}
|
|
3306
|
-
}
|
|
3307
|
-
for (const row of prepared.completedRows) {
|
|
3308
|
-
const key =
|
|
3309
|
-
typeof row.__deeplineRowKey === 'string'
|
|
3310
|
-
? row.__deeplineRowKey
|
|
3311
|
-
: derivePlayRowIdentity(
|
|
3312
|
-
publicCsvInputRow(row),
|
|
3313
|
-
name,
|
|
3314
|
-
mapLogicFingerprint,
|
|
3315
|
-
);
|
|
3316
|
-
if (key) {
|
|
3317
|
-
completedKeys.add(key);
|
|
3318
|
-
preparedKeys.add(key);
|
|
3319
|
-
}
|
|
3320
|
-
}
|
|
3321
|
-
const missingPreparedRows = chunkRows.filter((row) => {
|
|
3322
|
-
const key = derivePlayRowIdentity(
|
|
3323
|
-
publicCsvInputRow(row),
|
|
3324
|
-
name,
|
|
3325
|
-
mapLogicFingerprint,
|
|
3326
|
-
);
|
|
3327
|
-
return !key || !preparedKeys.has(key);
|
|
3328
|
-
});
|
|
3329
|
-
const rowsToExecute = chunkRows.filter((row) => {
|
|
3330
|
-
const key = derivePlayRowIdentity(
|
|
3331
|
-
publicCsvInputRow(row),
|
|
3332
|
-
name,
|
|
3333
|
-
mapLogicFingerprint,
|
|
3334
|
-
);
|
|
3335
|
-
return !key || pendingKeys.has(key) || !completedKeys.has(key);
|
|
3336
|
-
});
|
|
3337
|
-
const rowsInserted = prepared.inserted + missingPreparedRows.length;
|
|
3338
|
-
const rowsSkipped = Math.max(
|
|
3339
|
-
0,
|
|
3340
|
-
prepared.skipped - missingPreparedRows.length,
|
|
3341
|
-
);
|
|
3342
|
-
const concurrency = Math.max(1, Math.min(opts?.concurrency ?? 10, 100));
|
|
3343
|
-
const executedRows: Array<T & Record<string, unknown>> = new Array(
|
|
3344
|
-
rowsToExecute.length,
|
|
3345
|
-
);
|
|
3346
|
-
const generatedOutputFields = new Set<string>();
|
|
3347
|
-
let idx = 0;
|
|
3348
|
-
const workers: Array<Promise<void>> = [];
|
|
3349
|
-
for (let w = 0; w < concurrency; w += 1) {
|
|
3350
|
-
workers.push(
|
|
3351
|
-
(async () => {
|
|
3352
|
-
while (true) {
|
|
3353
|
-
if (abortSignal?.aborted) return;
|
|
3354
|
-
const myIndex = idx++;
|
|
3355
|
-
if (myIndex >= rowsToExecute.length) return;
|
|
3356
|
-
const row = rowsToExecute[myIndex]!;
|
|
3357
|
-
const absoluteIndex = baseOffset + chunkStart + myIndex;
|
|
3358
|
-
const enriched: Record<string, unknown> = cloneCsvAliasedRow(row);
|
|
3359
|
-
const fieldOutputs: Record<string, unknown> = {};
|
|
3360
|
-
const waterfallOutputs: RecordedWaterfallOutput[] = [];
|
|
3361
|
-
const rowCtx = {
|
|
3362
|
-
...(ctx as Record<string, unknown>),
|
|
3363
|
-
waterfall: (
|
|
3364
|
-
toolNameOrSpec: string | WorkerInlineWaterfallSpec,
|
|
3365
|
-
waterfallInput: Record<string, unknown>,
|
|
3366
|
-
waterfallOpts?: WorkerWaterfallOptions,
|
|
3367
|
-
) =>
|
|
3368
|
-
executeWorkerWaterfall(
|
|
3369
|
-
req,
|
|
3370
|
-
waterfallOutputs,
|
|
3371
|
-
toolNameOrSpec,
|
|
3372
|
-
waterfallInput,
|
|
3373
|
-
waterfallOpts,
|
|
3374
|
-
),
|
|
3375
|
-
};
|
|
3376
|
-
for (const [key, value] of fieldEntries) {
|
|
3377
|
-
if (typeof value === 'function') {
|
|
3378
|
-
const resolved = await (
|
|
3379
|
-
value as (
|
|
3380
|
-
r: T,
|
|
3381
|
-
c: unknown,
|
|
3382
|
-
f: Record<string, unknown>,
|
|
3383
|
-
i: number,
|
|
3384
|
-
) => Promise<unknown> | unknown
|
|
3385
|
-
)(row, rowCtx, fieldOutputs, absoluteIndex);
|
|
3386
|
-
enriched[key] = resolved;
|
|
3387
|
-
fieldOutputs[key] = resolved;
|
|
3388
|
-
} else {
|
|
3389
|
-
enriched[key] = value;
|
|
3390
|
-
fieldOutputs[key] = value;
|
|
3391
|
-
}
|
|
3392
|
-
}
|
|
3393
|
-
for (const waterfallOutput of waterfallOutputs) {
|
|
3394
|
-
const columnName =
|
|
3395
|
-
`${sqlishIdentifierPart(waterfallOutput.waterfallId)}__` +
|
|
3396
|
-
sqlishIdentifierPart(waterfallOutput.stepId);
|
|
3397
|
-
enriched[columnName] = waterfallOutput.value;
|
|
3398
|
-
generatedOutputFields.add(columnName);
|
|
3399
|
-
}
|
|
3400
|
-
executedRows[myIndex] = enriched as T & Record<string, unknown>;
|
|
3401
|
-
}
|
|
3402
|
-
})(),
|
|
3403
|
-
);
|
|
3404
|
-
}
|
|
3405
|
-
await Promise.all(workers);
|
|
3406
|
-
if (executedRows.length > 0) {
|
|
3407
|
-
await persistCompletedMapRows({
|
|
3408
|
-
req,
|
|
3409
|
-
tableNamespace: name,
|
|
3410
|
-
outputFields,
|
|
3411
|
-
extraOutputFields: Array.from(generatedOutputFields),
|
|
3412
|
-
rows: executedRows.map((row, executedIndex) => ({
|
|
3413
|
-
...row,
|
|
3414
|
-
__deeplineRowKey: derivePlayRowIdentity(
|
|
3415
|
-
publicCsvInputRow(rowsToExecute[executedIndex]!),
|
|
3416
|
-
name,
|
|
3417
|
-
mapLogicFingerprint,
|
|
3418
|
-
),
|
|
3419
|
-
})),
|
|
3420
|
-
});
|
|
3421
|
-
}
|
|
3422
|
-
const resultByKey = new Map<string, T & Record<string, unknown>>();
|
|
3423
|
-
for (const completedRow of prepared.completedRows) {
|
|
3424
|
-
const key =
|
|
3425
|
-
typeof completedRow.__deeplineRowKey === 'string'
|
|
3426
|
-
? completedRow.__deeplineRowKey
|
|
3427
|
-
: derivePlayRowIdentity(
|
|
3428
|
-
publicCsvInputRow(completedRow),
|
|
3429
|
-
name,
|
|
3430
|
-
mapLogicFingerprint,
|
|
3431
|
-
);
|
|
3432
|
-
if (key) {
|
|
3433
|
-
const { __deeplineRowKey: _rowKey, ...cleanedRow } =
|
|
3434
|
-
publicCsvInputRow(completedRow);
|
|
3435
|
-
void _rowKey;
|
|
3436
|
-
resultByKey.set(key, cleanedRow as T & Record<string, unknown>);
|
|
3437
|
-
}
|
|
3438
|
-
}
|
|
3439
|
-
for (
|
|
3440
|
-
let executedIndex = 0;
|
|
3441
|
-
executedIndex < executedRows.length;
|
|
3442
|
-
executedIndex += 1
|
|
3443
|
-
) {
|
|
3444
|
-
const executedRow = executedRows[executedIndex]!;
|
|
3445
|
-
const key = derivePlayRowIdentity(
|
|
3446
|
-
publicCsvInputRow(rowsToExecute[executedIndex]!),
|
|
3447
|
-
name,
|
|
3448
|
-
mapLogicFingerprint,
|
|
3449
|
-
);
|
|
3450
|
-
if (key) resultByKey.set(key, executedRow);
|
|
3451
|
-
}
|
|
3452
|
-
const out = chunkRows
|
|
3453
|
-
.map((row) => {
|
|
3454
|
-
const key = derivePlayRowIdentity(
|
|
3455
|
-
publicCsvInputRow(row),
|
|
3456
|
-
name,
|
|
3457
|
-
mapLogicFingerprint,
|
|
3458
|
-
);
|
|
3459
|
-
return key ? resultByKey.get(key) : undefined;
|
|
3460
|
-
})
|
|
3461
|
-
.filter((row): row is T & Record<string, unknown> => Boolean(row));
|
|
3462
|
-
return {
|
|
3463
|
-
chunkIndex,
|
|
3464
|
-
rangeStart: baseOffset + chunkStart,
|
|
3465
|
-
rangeEnd: baseOffset + chunkStart + out.length,
|
|
3466
|
-
rowsRead: chunkRows.length,
|
|
3467
|
-
rowsWritten: out.length,
|
|
3468
|
-
rowsExecuted: executedRows.length,
|
|
3469
|
-
rowsCached: prepared.completedRows.length,
|
|
3470
|
-
rowsInserted,
|
|
3471
|
-
rowsSkipped,
|
|
3472
|
-
outputDatasetId: `map:${name}`,
|
|
3473
|
-
hash: await hashJson(out),
|
|
3474
|
-
preview: toWorkflowSerializableValue(out.slice(0, 5)),
|
|
3475
|
-
};
|
|
3476
|
-
};
|
|
3477
|
-
|
|
3478
|
-
const out: Array<T & Record<string, unknown>> = [];
|
|
3479
|
-
let totalRowsExecuted = 0;
|
|
3480
|
-
let totalRowsCached = 0;
|
|
3481
|
-
let totalRowsInserted = 0;
|
|
3482
|
-
let totalRowsSkipped = 0;
|
|
3483
|
-
|
|
3484
|
-
const runChunkStep = async (
|
|
3485
|
-
chunkRows: T[],
|
|
3486
|
-
chunkStart: number,
|
|
3487
|
-
chunkIndex: number,
|
|
3488
|
-
): Promise<WorkerMapChunkSummary<T & Record<string, unknown>>> => {
|
|
3489
|
-
if (!workflowStep) {
|
|
3490
|
-
return await processChunk(chunkRows, chunkStart, chunkIndex);
|
|
3491
|
-
}
|
|
3492
|
-
return (await (
|
|
3493
|
-
workflowStep.do as unknown as (
|
|
3494
|
-
name: string,
|
|
3495
|
-
config: {
|
|
3496
|
-
retries: { limit: number; delay: string; backoff: 'exponential' };
|
|
3497
|
-
},
|
|
3498
|
-
callback: () => Promise<
|
|
3499
|
-
WorkerMapChunkSummary<T & Record<string, unknown>>
|
|
3500
|
-
>,
|
|
3501
|
-
) => Promise<WorkerMapChunkSummary<T & Record<string, unknown>>>
|
|
3502
|
-
)(
|
|
3503
|
-
deterministicMapChunkStepName({ mapName: name, chunkIndex }),
|
|
3504
|
-
{ retries: { limit: 5, delay: '5 seconds', backoff: 'exponential' } },
|
|
3505
|
-
async () => await processChunk(chunkRows, chunkStart, chunkIndex),
|
|
3506
|
-
)) as WorkerMapChunkSummary<T & Record<string, unknown>>;
|
|
3507
|
-
};
|
|
3508
|
-
|
|
3509
|
-
const finalize = (totalRowsWritten: number) => {
|
|
3510
|
-
const cacheSummary =
|
|
3511
|
-
`Map completed: ${totalRowsWritten} results ` +
|
|
3512
|
-
`(${totalRowsExecuted} executed, ${totalRowsCached} already satisfied) ` +
|
|
3513
|
-
`inserted=${totalRowsInserted} skipped=${totalRowsSkipped}`;
|
|
3514
|
-
emitEvent({
|
|
3515
|
-
type: 'log',
|
|
3516
|
-
level: 'info',
|
|
3517
|
-
message: cacheSummary,
|
|
3518
|
-
ts: nowMs(),
|
|
3519
|
-
});
|
|
3520
|
-
return makeWorkerDataset(name, out, {
|
|
3521
|
-
count: totalRowsWritten,
|
|
3522
|
-
cacheSummary,
|
|
3523
|
-
workProgress: {
|
|
3524
|
-
total: totalRowsWritten,
|
|
3525
|
-
executed: totalRowsExecuted,
|
|
3526
|
-
reused: totalRowsCached,
|
|
3527
|
-
skipped: totalRowsCached,
|
|
3528
|
-
pending: 0,
|
|
3529
|
-
failed: 0,
|
|
3530
|
-
},
|
|
3531
|
-
});
|
|
3532
|
-
};
|
|
3533
|
-
|
|
3534
|
-
// Streaming path: input came from `ctx.csv` over R2 and we never
|
|
3535
|
-
// materialized the full row array. Pull row chunks from the dataset's
|
|
3536
|
-
// iterChunks() and run each through processChunk inside its own
|
|
3537
|
-
// workflow step. Memory stays bounded by `rowsPerChunk`.
|
|
3538
|
-
if (streaming) {
|
|
3539
|
-
let totalRowsWritten = 0;
|
|
3540
|
-
let chunkIndex = 0;
|
|
3541
|
-
let chunkStart = 0;
|
|
3542
|
-
const streamingDataset = sliced as unknown as StreamingCsvDataset<T>;
|
|
3543
|
-
for await (const chunkRows of streamingDataset.iterChunks(
|
|
3544
|
-
rowsPerChunk,
|
|
3545
|
-
)) {
|
|
3546
|
-
assertNotAborted(abortSignal);
|
|
3547
|
-
if (chunkRows.length === 0) continue;
|
|
3548
|
-
const chunkResult = await runChunkStep(
|
|
3549
|
-
chunkRows,
|
|
3550
|
-
chunkStart,
|
|
3551
|
-
chunkIndex,
|
|
3552
|
-
);
|
|
3553
|
-
totalRowsWritten += chunkResult.rowsWritten;
|
|
3554
|
-
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3555
|
-
totalRowsCached += chunkResult.rowsCached;
|
|
3556
|
-
totalRowsInserted += chunkResult.rowsInserted;
|
|
3557
|
-
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3558
|
-
if (out.length < 10) {
|
|
3559
|
-
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3560
|
-
}
|
|
3561
|
-
chunkStart += chunkRows.length;
|
|
3562
|
-
chunkIndex += 1;
|
|
3563
|
-
}
|
|
3564
|
-
return finalize(totalRowsWritten);
|
|
3565
|
-
}
|
|
3566
|
-
|
|
3567
|
-
// Materialized array path (inline rows or small CSV).
|
|
3568
|
-
if (workflowStep && sliced.length > rowsPerChunk) {
|
|
3569
|
-
let totalRowsWritten = 0;
|
|
3570
|
-
for (let start = 0; start < sliced.length; start += rowsPerChunk) {
|
|
3571
|
-
assertNotAborted(abortSignal);
|
|
3572
|
-
const end = Math.min(sliced.length, start + rowsPerChunk);
|
|
3573
|
-
const chunkRows = sliced.slice(start, end);
|
|
3574
|
-
const chunkIndex = Math.floor(start / rowsPerChunk);
|
|
3575
|
-
const chunkResult = await runChunkStep(chunkRows, start, chunkIndex);
|
|
3576
|
-
totalRowsWritten += chunkResult.rowsWritten;
|
|
3577
|
-
totalRowsExecuted += chunkResult.rowsExecuted;
|
|
3578
|
-
totalRowsCached += chunkResult.rowsCached;
|
|
3579
|
-
totalRowsInserted += chunkResult.rowsInserted;
|
|
3580
|
-
totalRowsSkipped += chunkResult.rowsSkipped;
|
|
3581
|
-
if (out.length < 10) {
|
|
3582
|
-
out.push(...chunkResult.preview.slice(0, 10 - out.length));
|
|
3583
|
-
}
|
|
3584
|
-
}
|
|
3585
|
-
return finalize(totalRowsWritten);
|
|
3586
|
-
}
|
|
3587
|
-
|
|
3588
|
-
const chunkResult = await processChunk(sliced, 0, 0);
|
|
3589
|
-
totalRowsExecuted = chunkResult.rowsExecuted;
|
|
3590
|
-
totalRowsCached = chunkResult.rowsCached;
|
|
3591
|
-
totalRowsInserted = chunkResult.rowsInserted;
|
|
3592
|
-
totalRowsSkipped = chunkResult.rowsSkipped;
|
|
3593
|
-
out.push(...chunkResult.preview);
|
|
3594
|
-
return finalize(chunkResult.rowsWritten);
|
|
3595
|
-
*/
|
|
3596
3463
|
},
|
|
3597
3464
|
tool: async (
|
|
3598
3465
|
key: string,
|
|
@@ -4012,6 +3879,7 @@ async function executeRunRequest(
|
|
|
4012
3879
|
workflowStep?: WorkflowStep,
|
|
4013
3880
|
options?: {
|
|
4014
3881
|
persistResultDatasets?: boolean;
|
|
3882
|
+
waitUntil?: (promise: Promise<unknown>) => void;
|
|
4015
3883
|
/**
|
|
4016
3884
|
* Cooperative cancellation token. CF Workflows surfaces termination as a
|
|
4017
3885
|
* thrown error from any in-progress step; the harness catches that, flips
|
|
@@ -4022,8 +3890,29 @@ async function executeRunRequest(
|
|
|
4022
3890
|
): Promise<WorkflowRunOutput> {
|
|
4023
3891
|
installProcessExitTrap();
|
|
4024
3892
|
const startedAt = nowMs();
|
|
3893
|
+
recordRunnerPerfTrace({
|
|
3894
|
+
req,
|
|
3895
|
+
phase: 'runner.execute_start',
|
|
3896
|
+
extra: {
|
|
3897
|
+
persistResultDatasets: Boolean(options?.persistResultDatasets),
|
|
3898
|
+
hasWorkflowStep: Boolean(workflowStep),
|
|
3899
|
+
},
|
|
3900
|
+
});
|
|
4025
3901
|
const abortController = options?.abortController ?? new AbortController();
|
|
4026
3902
|
const abortSignal = abortController.signal;
|
|
3903
|
+
const postgresPrewarmStartedAt = nowMs();
|
|
3904
|
+
await harnessPrewarmPostgresSessions({
|
|
3905
|
+
executorToken: req.executorToken,
|
|
3906
|
+
sessions: req.preloadedDbSessions ?? [],
|
|
3907
|
+
});
|
|
3908
|
+
recordRunnerPerfTrace({
|
|
3909
|
+
req,
|
|
3910
|
+
phase: 'runner.prewarm_postgres',
|
|
3911
|
+
ms: nowMs() - postgresPrewarmStartedAt,
|
|
3912
|
+
extra: {
|
|
3913
|
+
sessions: req.preloadedDbSessions?.length ?? 0,
|
|
3914
|
+
},
|
|
3915
|
+
});
|
|
4027
3916
|
// Maintain a rolling buffer of log lines emitted during the run. This is
|
|
4028
3917
|
// what the play-page UI consumes via Convex polling + diffPlayRunStreamEvents
|
|
4029
3918
|
// → play.run.log SSE events. Without periodic flushing, the play page only
|
|
@@ -4088,17 +3977,42 @@ async function executeRunRequest(
|
|
|
4088
3977
|
abortSignal,
|
|
4089
3978
|
);
|
|
4090
3979
|
try {
|
|
3980
|
+
const playStartedAt = nowMs();
|
|
4091
3981
|
const result = await (
|
|
4092
3982
|
playFn as (
|
|
4093
3983
|
ctx: unknown,
|
|
4094
3984
|
input: Record<string, unknown>,
|
|
4095
3985
|
) => Promise<unknown>
|
|
4096
3986
|
)(ctx, req.runtimeInput);
|
|
3987
|
+
recordRunnerPerfTrace({
|
|
3988
|
+
req,
|
|
3989
|
+
phase: 'runner.play_function',
|
|
3990
|
+
ms: nowMs() - playStartedAt,
|
|
3991
|
+
});
|
|
3992
|
+
const serializeStartedAt = nowMs();
|
|
4097
3993
|
const serializedResult = serializePlayReturnValue(result);
|
|
3994
|
+
recordRunnerPerfTrace({
|
|
3995
|
+
req,
|
|
3996
|
+
phase: 'runner.serialize_result',
|
|
3997
|
+
ms: nowMs() - serializeStartedAt,
|
|
3998
|
+
});
|
|
4098
3999
|
if (options?.persistResultDatasets) {
|
|
4000
|
+
const persistStartedAt = nowMs();
|
|
4099
4001
|
await liveLogFlushInFlight.catch(() => undefined);
|
|
4002
|
+
recordRunnerPerfTrace({
|
|
4003
|
+
req,
|
|
4004
|
+
phase: 'runner.live_log_flush_wait',
|
|
4005
|
+
ms: nowMs() - persistStartedAt,
|
|
4006
|
+
});
|
|
4007
|
+
const resultDatasetStartedAt = nowMs();
|
|
4100
4008
|
await persistResultDatasets(req, serializedResult);
|
|
4009
|
+
recordRunnerPerfTrace({
|
|
4010
|
+
req,
|
|
4011
|
+
phase: 'runner.persist_result_datasets',
|
|
4012
|
+
ms: nowMs() - resultDatasetStartedAt,
|
|
4013
|
+
});
|
|
4101
4014
|
const terminalResult = trimResultForStatus(serializedResult);
|
|
4015
|
+
const terminalUpdateStartedAt = nowMs();
|
|
4102
4016
|
await postRuntimeApiBestEffort(req.baseUrl, req.executorToken, {
|
|
4103
4017
|
action: 'update_run_status',
|
|
4104
4018
|
playId: req.runId,
|
|
@@ -4111,12 +4025,24 @@ async function executeRunRequest(
|
|
|
4111
4025
|
liveLogs,
|
|
4112
4026
|
lastCheckpointAt: nowMs(),
|
|
4113
4027
|
});
|
|
4028
|
+
recordRunnerPerfTrace({
|
|
4029
|
+
req,
|
|
4030
|
+
phase: 'runner.terminal_status_update',
|
|
4031
|
+
ms: nowMs() - terminalUpdateStartedAt,
|
|
4032
|
+
});
|
|
4033
|
+
const billingStartedAt = nowMs();
|
|
4114
4034
|
await finalizeWorkerComputeBilling({
|
|
4115
4035
|
req,
|
|
4116
4036
|
success: true,
|
|
4117
4037
|
actionEstimate: 4,
|
|
4118
4038
|
});
|
|
4039
|
+
recordRunnerPerfTrace({
|
|
4040
|
+
req,
|
|
4041
|
+
phase: 'runner.compute_billing_finalize',
|
|
4042
|
+
ms: nowMs() - billingStartedAt,
|
|
4043
|
+
});
|
|
4119
4044
|
}
|
|
4045
|
+
const parentSignalStartedAt = nowMs();
|
|
4120
4046
|
await signalParentPlayTerminal({
|
|
4121
4047
|
req,
|
|
4122
4048
|
status: 'completed',
|
|
@@ -4128,6 +4054,16 @@ async function executeRunRequest(
|
|
|
4128
4054
|
}`,
|
|
4129
4055
|
);
|
|
4130
4056
|
});
|
|
4057
|
+
recordRunnerPerfTrace({
|
|
4058
|
+
req,
|
|
4059
|
+
phase: 'runner.parent_terminal_signal',
|
|
4060
|
+
ms: nowMs() - parentSignalStartedAt,
|
|
4061
|
+
});
|
|
4062
|
+
recordRunnerPerfTrace({
|
|
4063
|
+
req,
|
|
4064
|
+
phase: 'runner.execute_total',
|
|
4065
|
+
ms: nowMs() - startedAt,
|
|
4066
|
+
});
|
|
4131
4067
|
return {
|
|
4132
4068
|
playName: req.playName,
|
|
4133
4069
|
result: serializedResult,
|
|
@@ -4285,6 +4221,9 @@ function runRequestFromWorkflowParams(
|
|
|
4285
4221
|
playCallGovernance: isPlayCallGovernanceSnapshot(params.playCallGovernance)
|
|
4286
4222
|
? params.playCallGovernance
|
|
4287
4223
|
: null,
|
|
4224
|
+
preloadedDbSessions: Array.isArray(params.preloadedDbSessions)
|
|
4225
|
+
? (params.preloadedDbSessions as PreloadedRuntimeDbSession[])
|
|
4226
|
+
: null,
|
|
4288
4227
|
coordinatorUrl:
|
|
4289
4228
|
typeof params.coordinatorUrl === 'string' && params.coordinatorUrl.trim()
|
|
4290
4229
|
? params.coordinatorUrl.trim()
|
|
@@ -4525,6 +4464,7 @@ export class TenantWorkflow extends WorkflowEntrypoint<
|
|
|
4525
4464
|
Record<string, unknown>
|
|
4526
4465
|
> {
|
|
4527
4466
|
declare readonly env: WorkerEnv;
|
|
4467
|
+
declare readonly ctx: ExecutionContext;
|
|
4528
4468
|
|
|
4529
4469
|
/**
|
|
4530
4470
|
* Cancellation model:
|
|
@@ -4568,14 +4508,30 @@ export class TenantWorkflow extends WorkflowEntrypoint<
|
|
|
4568
4508
|
(runnerEvent) => {
|
|
4569
4509
|
if (runnerEvent.type === 'log') {
|
|
4570
4510
|
console.log(`${runPrefix} ${runnerEvent.message}`);
|
|
4511
|
+
void this.env.COORDINATOR?.recordRunEvent(req.runId, {
|
|
4512
|
+
runId: req.runId,
|
|
4513
|
+
type: 'log',
|
|
4514
|
+
line: runnerEvent.message,
|
|
4515
|
+
ts: runnerEvent.ts,
|
|
4516
|
+
}).catch(() => undefined);
|
|
4571
4517
|
} else if (runnerEvent.type === 'error') {
|
|
4572
4518
|
console.error(
|
|
4573
4519
|
`${runPrefix} ${runnerEvent.message}${runnerEvent.stack ? `\n${runnerEvent.stack}` : ''}`,
|
|
4574
4520
|
);
|
|
4521
|
+
void this.env.COORDINATOR?.recordRunEvent(req.runId, {
|
|
4522
|
+
runId: req.runId,
|
|
4523
|
+
type: 'log',
|
|
4524
|
+
line: `[error] ${runnerEvent.message}`,
|
|
4525
|
+
ts: runnerEvent.ts,
|
|
4526
|
+
}).catch(() => undefined);
|
|
4575
4527
|
}
|
|
4576
4528
|
},
|
|
4577
4529
|
step,
|
|
4578
|
-
{
|
|
4530
|
+
{
|
|
4531
|
+
persistResultDatasets: !req.playCallGovernance,
|
|
4532
|
+
abortController,
|
|
4533
|
+
waitUntil: (promise) => this.ctx.waitUntil(promise),
|
|
4534
|
+
},
|
|
4579
4535
|
)) as Record<string, unknown>;
|
|
4580
4536
|
} catch (error) {
|
|
4581
4537
|
// CF Workflows + the dynamic-workflows framework swallow the error
|