deepline 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +324 -0
- package/dist/cli/index.js +6750 -503
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/index.mjs +6735 -512
- package/dist/cli/index.mjs.map +1 -1
- package/dist/index.d.mts +2349 -32
- package/dist/index.d.ts +2349 -32
- package/dist/index.js +1631 -82
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1617 -83
- package/dist/index.mjs.map +1 -1
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +3256 -0
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +710 -0
- package/dist/repo/apps/play-runner-workers/src/entry.ts +5070 -0
- package/dist/repo/apps/play-runner-workers/src/runtime/README.md +21 -0
- package/dist/repo/apps/play-runner-workers/src/runtime/batching.ts +177 -0
- package/dist/repo/apps/play-runner-workers/src/runtime/execution-plan.ts +52 -0
- package/dist/repo/apps/play-runner-workers/src/runtime/tool-batch.ts +100 -0
- package/dist/repo/apps/play-runner-workers/src/runtime/tool-result.ts +184 -0
- package/dist/repo/sdk/src/cli/commands/auth.ts +482 -0
- package/dist/repo/sdk/src/cli/commands/billing.ts +188 -0
- package/dist/repo/sdk/src/cli/commands/csv.ts +123 -0
- package/dist/repo/sdk/src/cli/commands/db.ts +119 -0
- package/dist/repo/sdk/src/cli/commands/feedback.ts +40 -0
- package/dist/repo/sdk/src/cli/commands/org.ts +117 -0
- package/dist/repo/sdk/src/cli/commands/play.ts +3200 -0
- package/dist/repo/sdk/src/cli/commands/tools.ts +687 -0
- package/dist/repo/sdk/src/cli/dataset-stats.ts +341 -0
- package/dist/repo/sdk/src/cli/index.ts +138 -0
- package/dist/repo/sdk/src/cli/progress.ts +135 -0
- package/dist/repo/sdk/src/cli/trace.ts +61 -0
- package/dist/repo/sdk/src/cli/utils.ts +145 -0
- package/dist/repo/sdk/src/client.ts +1188 -0
- package/dist/repo/sdk/src/compat.ts +77 -0
- package/dist/repo/sdk/src/config.ts +285 -0
- package/dist/repo/sdk/src/errors.ts +125 -0
- package/dist/repo/sdk/src/http.ts +391 -0
- package/dist/repo/sdk/src/index.ts +139 -0
- package/dist/repo/sdk/src/play.ts +1330 -0
- package/dist/repo/sdk/src/plays/bundle-play-file.ts +133 -0
- package/dist/repo/sdk/src/plays/harness-stub.ts +210 -0
- package/dist/repo/sdk/src/plays/local-file-discovery.ts +326 -0
- package/dist/repo/sdk/src/tool-output.ts +489 -0
- package/dist/repo/sdk/src/types.ts +669 -0
- package/dist/repo/sdk/src/version.ts +2 -0
- package/dist/repo/sdk/src/worker-play-entry.ts +286 -0
- package/dist/repo/shared_libs/observability/node-tracing.ts +129 -0
- package/dist/repo/shared_libs/observability/tracing.ts +98 -0
- package/dist/repo/shared_libs/play-runtime/backend.ts +139 -0
- package/dist/repo/shared_libs/play-runtime/batch-runtime.ts +182 -0
- package/dist/repo/shared_libs/play-runtime/batching-types.ts +91 -0
- package/dist/repo/shared_libs/play-runtime/context.ts +3999 -0
- package/dist/repo/shared_libs/play-runtime/coordinator-headers.ts +78 -0
- package/dist/repo/shared_libs/play-runtime/ctx-contract.ts +250 -0
- package/dist/repo/shared_libs/play-runtime/ctx-types.ts +713 -0
- package/dist/repo/shared_libs/play-runtime/dataset-id.ts +10 -0
- package/dist/repo/shared_libs/play-runtime/db-session-crypto.ts +304 -0
- package/dist/repo/shared_libs/play-runtime/db-session.ts +462 -0
- package/dist/repo/shared_libs/play-runtime/dedup-backend.ts +0 -0
- package/dist/repo/shared_libs/play-runtime/default-batch-strategies.ts +124 -0
- package/dist/repo/shared_libs/play-runtime/execution-plan.ts +262 -0
- package/dist/repo/shared_libs/play-runtime/live-events.ts +214 -0
- package/dist/repo/shared_libs/play-runtime/live-state-contract.ts +50 -0
- package/dist/repo/shared_libs/play-runtime/map-execution-frame.ts +114 -0
- package/dist/repo/shared_libs/play-runtime/map-row-identity.ts +158 -0
- package/dist/repo/shared_libs/play-runtime/profiles.ts +90 -0
- package/dist/repo/shared_libs/play-runtime/progress-emitter.ts +172 -0
- package/dist/repo/shared_libs/play-runtime/protocol.ts +121 -0
- package/dist/repo/shared_libs/play-runtime/public-play-contract.ts +42 -0
- package/dist/repo/shared_libs/play-runtime/result-normalization.ts +33 -0
- package/dist/repo/shared_libs/play-runtime/runtime-actions.ts +208 -0
- package/dist/repo/shared_libs/play-runtime/runtime-api.ts +1873 -0
- package/dist/repo/shared_libs/play-runtime/runtime-constraints.ts +2 -0
- package/dist/repo/shared_libs/play-runtime/runtime-pg-driver-neon-serverless.ts +201 -0
- package/dist/repo/shared_libs/play-runtime/runtime-pg-driver-pg.ts +48 -0
- package/dist/repo/shared_libs/play-runtime/runtime-pg-driver.ts +84 -0
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +174 -0
- package/dist/repo/shared_libs/play-runtime/static-pipeline-types.ts +147 -0
- package/dist/repo/shared_libs/play-runtime/suspension.ts +68 -0
- package/dist/repo/shared_libs/play-runtime/tool-batch-executor.ts +146 -0
- package/dist/repo/shared_libs/play-runtime/tool-result.ts +387 -0
- package/dist/repo/shared_libs/play-runtime/tracing.ts +31 -0
- package/dist/repo/shared_libs/play-runtime/waterfall-replay.ts +75 -0
- package/dist/repo/shared_libs/play-runtime/worker-api-types.ts +140 -0
- package/dist/repo/shared_libs/plays/artifact-transport.ts +14 -0
- package/dist/repo/shared_libs/plays/artifact-types.ts +49 -0
- package/dist/repo/shared_libs/plays/bundling/index.ts +1346 -0
- package/dist/repo/shared_libs/plays/compiler-manifest.ts +186 -0
- package/dist/repo/shared_libs/plays/contracts.ts +51 -0
- package/dist/repo/shared_libs/plays/dataset.ts +308 -0
- package/dist/repo/shared_libs/plays/definition.ts +264 -0
- package/dist/repo/shared_libs/plays/file-refs.ts +11 -0
- package/dist/repo/shared_libs/plays/rate-limit-scheduler.ts +206 -0
- package/dist/repo/shared_libs/plays/resolve-static-pipeline.ts +164 -0
- package/dist/repo/shared_libs/plays/row-identity.ts +302 -0
- package/dist/repo/shared_libs/plays/runtime-validation.ts +415 -0
- package/dist/repo/shared_libs/plays/static-pipeline.ts +560 -0
- package/dist/repo/shared_libs/temporal/constants.ts +39 -0
- package/dist/repo/shared_libs/temporal/preview-config.ts +153 -0
- package/package.json +14 -12
|
@@ -0,0 +1,3256 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coordinator Worker entry — main module exported by wrangler.toml.
|
|
3
|
+
*
|
|
4
|
+
* Hosts:
|
|
5
|
+
* - DynamicWorkflow (native Cloudflare Dynamic Workflows)
|
|
6
|
+
* - PlayDedup DO (default export class)
|
|
7
|
+
* - HTTP fetch handler for /api/v2/plays/run dispatch (Vercel forwards to here)
|
|
8
|
+
* - WorkerLoader-backed dynamic play Workers
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export { PlayDedup } from './dedup-do';
|
|
12
|
+
|
|
13
|
+
import {
|
|
14
|
+
WorkerEntrypoint,
|
|
15
|
+
WorkflowEntrypoint,
|
|
16
|
+
exports as workersExports,
|
|
17
|
+
} from 'cloudflare:workers';
|
|
18
|
+
import {
|
|
19
|
+
_dispatcherBindingImpl as createDispatcherWorkflowBinding,
|
|
20
|
+
dispatchWorkflow,
|
|
21
|
+
DynamicWorkflowBinding,
|
|
22
|
+
type WorkflowRunner,
|
|
23
|
+
} from '@cloudflare/dynamic-workflows';
|
|
24
|
+
import type { ExecutionPlan } from '../../../shared_libs/play-runtime/execution-plan';
|
|
25
|
+
import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
|
|
26
|
+
import type {
|
|
27
|
+
PlayRuntimeManifest,
|
|
28
|
+
PlayRuntimeManifestMap,
|
|
29
|
+
} from '../../../shared_libs/plays/compiler-manifest';
|
|
30
|
+
import { runtimeRunActions } from '../../../shared_libs/play-runtime/runtime-actions';
|
|
31
|
+
|
|
32
|
+
export { DynamicWorkflowBinding };
|
|
33
|
+
|
|
34
|
+
export type PlayWorkflowParams = {
|
|
35
|
+
runId: string;
|
|
36
|
+
playId: string;
|
|
37
|
+
playName: string;
|
|
38
|
+
artifactStorageKey: string;
|
|
39
|
+
artifactHash: string;
|
|
40
|
+
graphHash: string;
|
|
41
|
+
input: Record<string, unknown>;
|
|
42
|
+
inputFile?: { name?: string; r2Key?: string; path?: string } | null;
|
|
43
|
+
inlineCsv?: { name: string; rows: Record<string, unknown>[] } | null;
|
|
44
|
+
packagedFiles?: Array<{
|
|
45
|
+
playPath: string;
|
|
46
|
+
storageKey: string;
|
|
47
|
+
inlineText?: string;
|
|
48
|
+
}> | null;
|
|
49
|
+
contractSnapshot?: unknown;
|
|
50
|
+
executionPlan?: ExecutionPlan | null;
|
|
51
|
+
childPlayManifests?: PlayRuntimeManifestMap | null;
|
|
52
|
+
playCallGovernance?: PlayCallGovernanceSnapshot | null;
|
|
53
|
+
dynamicWorkerCode?: string | null;
|
|
54
|
+
executorToken: string;
|
|
55
|
+
baseUrl: string;
|
|
56
|
+
orgId: string;
|
|
57
|
+
userEmail: string;
|
|
58
|
+
userId?: string | null;
|
|
59
|
+
runtimeBackend: string;
|
|
60
|
+
dedupBackend: string;
|
|
61
|
+
totalRows?: number;
|
|
62
|
+
coordinatorUrl?: string | null;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
type InlineChildSubmitResult =
|
|
66
|
+
| {
|
|
67
|
+
workflowId: string;
|
|
68
|
+
runId: string;
|
|
69
|
+
status: 'completed';
|
|
70
|
+
mode: 'inline_dynamic_worker';
|
|
71
|
+
output: unknown;
|
|
72
|
+
result: unknown;
|
|
73
|
+
logs: string[];
|
|
74
|
+
timings: CoordinatorTiming[];
|
|
75
|
+
}
|
|
76
|
+
| {
|
|
77
|
+
workflowId: string;
|
|
78
|
+
runId: string;
|
|
79
|
+
status: 'failed';
|
|
80
|
+
mode: 'inline_dynamic_worker';
|
|
81
|
+
error: { message: string; stack?: string | null };
|
|
82
|
+
logs: string[];
|
|
83
|
+
timings: CoordinatorTiming[];
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
type CoordinatorTiming = {
|
|
87
|
+
phase: string;
|
|
88
|
+
ms: number;
|
|
89
|
+
graphHash?: string | null;
|
|
90
|
+
extra?: Record<string, unknown>;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
type CoordinatorPerfTracePayload = {
|
|
94
|
+
ts: number;
|
|
95
|
+
source: 'coordinator' | 'dynamic_worker';
|
|
96
|
+
runId: string;
|
|
97
|
+
phase: string;
|
|
98
|
+
ms: number;
|
|
99
|
+
graphHash?: string | null;
|
|
100
|
+
[key: string]: unknown;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
type CoordinatorPerfTraceInput = {
|
|
104
|
+
runId: string;
|
|
105
|
+
phase: string;
|
|
106
|
+
ms: number;
|
|
107
|
+
graphHash?: string | null;
|
|
108
|
+
extra?: Record<string, unknown>;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
type CoordinatorPerfTraceSink = (event: CoordinatorPerfTraceInput) => void;
|
|
112
|
+
|
|
113
|
+
type InlineWorkerRunResponse = {
|
|
114
|
+
status?: 'completed' | 'failed';
|
|
115
|
+
result?: unknown;
|
|
116
|
+
outputRows?: number;
|
|
117
|
+
durationMs?: number;
|
|
118
|
+
parseMs?: number;
|
|
119
|
+
events?: Array<
|
|
120
|
+
| { type: 'log'; message?: string; level?: string; ts?: number }
|
|
121
|
+
| { type: 'result'; result?: unknown; outputRows?: number; ts?: number }
|
|
122
|
+
| { type: 'error'; message?: string; stack?: string; ts?: number }
|
|
123
|
+
| Record<string, unknown>
|
|
124
|
+
>;
|
|
125
|
+
error?: { message?: string; stack?: string | null };
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
129
|
+
return Boolean(value) && typeof value === 'object' && !Array.isArray(value);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
interface CoordinatorEnv {
|
|
133
|
+
PLAY_WORKFLOW: Workflow<PlayWorkflowParams | PooledWorkflowBootstrapPayload>;
|
|
134
|
+
PLAY_DEDUP: DurableObjectNamespace;
|
|
135
|
+
LOADER?: {
|
|
136
|
+
get(
|
|
137
|
+
id: string,
|
|
138
|
+
load: () => Promise<{
|
|
139
|
+
compatibilityDate: string;
|
|
140
|
+
compatibilityFlags?: string[];
|
|
141
|
+
mainModule: string;
|
|
142
|
+
modules: Record<string, string>;
|
|
143
|
+
env?: Record<string, unknown>;
|
|
144
|
+
globalOutbound?: Fetcher | null;
|
|
145
|
+
}>,
|
|
146
|
+
): {
|
|
147
|
+
getEntrypoint(name?: string): Fetcher & WorkflowRunner;
|
|
148
|
+
};
|
|
149
|
+
};
|
|
150
|
+
PLAYS_BUCKET: R2Bucket;
|
|
151
|
+
DEEPLINE_API_BASE_URL: string;
|
|
152
|
+
DEEPLINE_INTERNAL_TOKEN?: string;
|
|
153
|
+
DEEPLINE_TAIL_LOG_TOKEN?: string;
|
|
154
|
+
VERCEL_PROTECTION_BYPASS_TOKEN?: string;
|
|
155
|
+
DEEPLINE_WORKFLOW_PREWARM_POOL?: string;
|
|
156
|
+
DEEPLINE_WORKFLOW_PREWARM_POOL_SIZE?: string;
|
|
157
|
+
DEEPLINE_PLAY_PREVIEW_SLUG?: string;
|
|
158
|
+
/**
|
|
159
|
+
* Service binding to the long-lived Play Harness Worker
|
|
160
|
+
* (apps/play-harness-worker). Provides typed RPC access to leaf-level
|
|
161
|
+
* helpers (zod validation, runtime-API HTTP forwarder, …) that we
|
|
162
|
+
* deliberately keep OUT of every per-graphHash play bundle.
|
|
163
|
+
*
|
|
164
|
+
* Optional: when missing (e.g. an older deploy that hasn't been wired
|
|
165
|
+
* yet, or a local dev environment running without the harness), the
|
|
166
|
+
* coordinator falls back to passing a `null` HARNESS into per-play
|
|
167
|
+
* Workers and the per-play stub throws a clear error if anyone tries
|
|
168
|
+
* to use it. Loud failures > silent fallbacks.
|
|
169
|
+
*
|
|
170
|
+
* Wired in wrangler.toml as `[[services]] binding = "HARNESS"`.
|
|
171
|
+
*/
|
|
172
|
+
HARNESS?: import('../../play-harness-worker/src/rpc-types').PlayHarnessRpc;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Best-effort disposal of an RPC stub. Workers' RPC machinery emits
|
|
177
|
+
* "stub not disposed" warnings (and, under stress, contributes to V8
|
|
178
|
+
* deopt failures in `wrangler dev` local mode) when these are GC'd
|
|
179
|
+
* instead of explicitly released. Optional chaining keeps this safe
|
|
180
|
+
* for non-stub values.
|
|
181
|
+
*/
|
|
182
|
+
function disposeRpcStub(value: unknown): void {
|
|
183
|
+
if (value == null) return;
|
|
184
|
+
const disposable = value as { [Symbol.dispose]?: () => void };
|
|
185
|
+
try {
|
|
186
|
+
disposable[Symbol.dispose]?.();
|
|
187
|
+
} catch {
|
|
188
|
+
// intentional no-op — disposal is best-effort
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Structured perf trace for the coordinator. Emits one [perf-trace] JSON
|
|
194
|
+
* line that downstream tooling (tests/v2-plays/runner.ts --perf-summary)
|
|
195
|
+
* scrapes from logs/coordinator.log / logs/cf-coordinator-tail.log.
|
|
196
|
+
*
|
|
197
|
+
* Workerd has no filesystem; this is stdout-only. Stays additive next to
|
|
198
|
+
* the existing `[coordinator] ...` logs.
|
|
199
|
+
*/
|
|
200
|
+
function buildCoordinatorPerfTracePayload(
|
|
201
|
+
event: CoordinatorPerfTraceInput,
|
|
202
|
+
): CoordinatorPerfTracePayload | null {
|
|
203
|
+
if (!event.runId || !event.phase) return null;
|
|
204
|
+
return {
|
|
205
|
+
ts: Date.now(),
|
|
206
|
+
source: 'coordinator',
|
|
207
|
+
runId: event.runId,
|
|
208
|
+
phase: event.phase,
|
|
209
|
+
ms: event.ms,
|
|
210
|
+
...(event.graphHash ? { graphHash: event.graphHash } : {}),
|
|
211
|
+
...(event.extra ?? {}),
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function logCoordinatorPerfTrace(payload: CoordinatorPerfTracePayload): void {
|
|
216
|
+
console.log(`[perf-trace] ${JSON.stringify(payload)}`);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function recordCoordinatorPerfTrace(event: CoordinatorPerfTraceInput): void {
|
|
220
|
+
const payload = buildCoordinatorPerfTracePayload(event);
|
|
221
|
+
if (!payload) return;
|
|
222
|
+
logCoordinatorPerfTrace(payload);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async function appendCoordinatorPerfTrace(
|
|
226
|
+
env: CoordinatorEnv,
|
|
227
|
+
payload: CoordinatorPerfTracePayload,
|
|
228
|
+
): Promise<void> {
|
|
229
|
+
const stub = env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(payload.runId));
|
|
230
|
+
const response = await stub.fetch(
|
|
231
|
+
'https://deepline.dedup.internal/trace-add',
|
|
232
|
+
{
|
|
233
|
+
method: 'POST',
|
|
234
|
+
headers: { 'content-type': 'application/json' },
|
|
235
|
+
body: JSON.stringify(payload),
|
|
236
|
+
},
|
|
237
|
+
);
|
|
238
|
+
if (!response.ok) {
|
|
239
|
+
throw new Error(
|
|
240
|
+
`coordinator trace append failed ${response.status}: ${(
|
|
241
|
+
await response.text().catch(() => '')
|
|
242
|
+
).slice(0, 200)}`,
|
|
243
|
+
);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function recordCoordinatorPerfTraceBuffered(
|
|
248
|
+
env: CoordinatorEnv,
|
|
249
|
+
ctx: ExecutionContext | null | undefined,
|
|
250
|
+
event: CoordinatorPerfTraceInput,
|
|
251
|
+
): void {
|
|
252
|
+
const payload = buildCoordinatorPerfTracePayload(event);
|
|
253
|
+
if (!payload) return;
|
|
254
|
+
logCoordinatorPerfTrace(payload);
|
|
255
|
+
const append = appendCoordinatorPerfTrace(env, payload).catch((error) => {
|
|
256
|
+
console.warn('[coordinator] failed to buffer perf trace', {
|
|
257
|
+
runId: payload.runId,
|
|
258
|
+
phase: payload.phase,
|
|
259
|
+
error: error instanceof Error ? error.message : String(error),
|
|
260
|
+
});
|
|
261
|
+
});
|
|
262
|
+
if (typeof ctx?.waitUntil === 'function') {
|
|
263
|
+
ctx.waitUntil(append);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
async function listCoordinatorPerfTrace(
|
|
268
|
+
env: CoordinatorEnv,
|
|
269
|
+
runId: string,
|
|
270
|
+
): Promise<CoordinatorPerfTracePayload[]> {
|
|
271
|
+
const stub = env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(runId));
|
|
272
|
+
const response = await stub.fetch(
|
|
273
|
+
'https://deepline.dedup.internal/trace-list',
|
|
274
|
+
);
|
|
275
|
+
if (!response.ok) {
|
|
276
|
+
throw new Error(`coordinator trace list failed ${response.status}`);
|
|
277
|
+
}
|
|
278
|
+
const body = (await response.json().catch(() => ({}))) as {
|
|
279
|
+
entries?: unknown;
|
|
280
|
+
};
|
|
281
|
+
if (!Array.isArray(body.entries)) return [];
|
|
282
|
+
return body.entries.filter(
|
|
283
|
+
(entry): entry is CoordinatorPerfTracePayload =>
|
|
284
|
+
isRecord(entry) &&
|
|
285
|
+
(entry.source === 'coordinator' || entry.source === 'dynamic_worker') &&
|
|
286
|
+
typeof entry.runId === 'string' &&
|
|
287
|
+
typeof entry.phase === 'string' &&
|
|
288
|
+
typeof entry.ts === 'number' &&
|
|
289
|
+
typeof entry.ms === 'number',
|
|
290
|
+
);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function workflowEventType(name: string): string {
|
|
294
|
+
const normalized = name
|
|
295
|
+
.trim()
|
|
296
|
+
.replace(/[^A-Za-z0-9_-]+/g, '_')
|
|
297
|
+
.replace(/^_+|_+$/g, '')
|
|
298
|
+
.slice(0, 100);
|
|
299
|
+
return normalized || 'deepline_event';
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
type DynamicWorkflowMetadata = {
|
|
303
|
+
runId?: string | null;
|
|
304
|
+
graphHash: string;
|
|
305
|
+
artifactStorageKey: string;
|
|
306
|
+
artifactHash?: string | null;
|
|
307
|
+
dynamicWorkerCode?: string | null;
|
|
308
|
+
packagedFiles?: Array<{
|
|
309
|
+
playPath: string;
|
|
310
|
+
storageKey: string;
|
|
311
|
+
inlineText?: string;
|
|
312
|
+
}> | null;
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
type DispatcherEnvelope = {
|
|
316
|
+
__dispatcherMetadata: DynamicWorkflowMetadata;
|
|
317
|
+
params: PlayWorkflowParams;
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
type PooledWorkflowBootstrapPayload = {
|
|
321
|
+
__deeplinePooledWorkflow: true;
|
|
322
|
+
poolId: string;
|
|
323
|
+
createdAt: number;
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
const WORKFLOW_POOL_PROTOCOL_VERSION = 'pooled-workflow-wait-v3-dynamic-binding';
|
|
327
|
+
const WORKFLOW_POOL_DO_NAME = 'workflow-pool:v2';
|
|
328
|
+
const WORKFLOW_POOL_START_EVENT_TYPE = 'play_start';
|
|
329
|
+
const WORKFLOW_POOL_TTL_MS = 8 * 60 * 1000;
|
|
330
|
+
const WORKFLOW_POOL_DEFAULT_SIZE = 2;
|
|
331
|
+
const WORKFLOW_POOL_READY_TIMEOUT_MS = 1_500;
|
|
332
|
+
const WORKFLOW_POOL_READY_POLL_MS = 250;
|
|
333
|
+
|
|
334
|
+
function buildDynamicWorkflowMetadata(
|
|
335
|
+
params: PlayWorkflowParams,
|
|
336
|
+
): DynamicWorkflowMetadata {
|
|
337
|
+
return {
|
|
338
|
+
runId: params.runId ?? null,
|
|
339
|
+
graphHash: params.graphHash,
|
|
340
|
+
artifactStorageKey: params.artifactStorageKey,
|
|
341
|
+
artifactHash: params.artifactHash ?? null,
|
|
342
|
+
dynamicWorkerCode: params.dynamicWorkerCode ?? null,
|
|
343
|
+
packagedFiles: normalizePackagedFiles(params.packagedFiles),
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function buildDispatcherEnvelope(params: PlayWorkflowParams): DispatcherEnvelope {
|
|
348
|
+
// Mirrors @cloudflare/dynamic-workflows' envelope. We need to send the
|
|
349
|
+
// dispatcher payload via Workflow sendEvent for prewarmed instances; the
|
|
350
|
+
// public wrapper only applies this envelope to create() params.
|
|
351
|
+
return {
|
|
352
|
+
__dispatcherMetadata: buildDynamicWorkflowMetadata(params),
|
|
353
|
+
params,
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
function isPooledWorkflowBootstrapPayload(
|
|
358
|
+
value: unknown,
|
|
359
|
+
): value is PooledWorkflowBootstrapPayload {
|
|
360
|
+
return (
|
|
361
|
+
Boolean(value) &&
|
|
362
|
+
typeof value === 'object' &&
|
|
363
|
+
!Array.isArray(value) &&
|
|
364
|
+
(value as Record<string, unknown>).__deeplinePooledWorkflow === true &&
|
|
365
|
+
typeof (value as Record<string, unknown>).poolId === 'string'
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function readWorkflowTraceContext(event: unknown): {
|
|
370
|
+
runId: string;
|
|
371
|
+
graphHash: string | null;
|
|
372
|
+
instanceId: string | null;
|
|
373
|
+
pooledBootstrap: boolean;
|
|
374
|
+
} {
|
|
375
|
+
const record = isRecord(event) ? event : {};
|
|
376
|
+
const payload = isRecord(record.payload) ? record.payload : {};
|
|
377
|
+
const params = isRecord(payload.params) ? payload.params : null;
|
|
378
|
+
const metadata = isRecord(payload.__dispatcherMetadata)
|
|
379
|
+
? payload.__dispatcherMetadata
|
|
380
|
+
: null;
|
|
381
|
+
const pooled = isPooledWorkflowBootstrapPayload(payload);
|
|
382
|
+
const runId =
|
|
383
|
+
(typeof params?.runId === 'string' && params.runId) ||
|
|
384
|
+
(typeof metadata?.runId === 'string' && metadata.runId) ||
|
|
385
|
+
(pooled && typeof payload.poolId === 'string' ? payload.poolId : null) ||
|
|
386
|
+
(typeof record.instanceId === 'string' && record.instanceId) ||
|
|
387
|
+
'unknown-workflow-run';
|
|
388
|
+
const graphHash =
|
|
389
|
+
(typeof params?.graphHash === 'string' && params.graphHash) ||
|
|
390
|
+
(typeof metadata?.graphHash === 'string' && metadata.graphHash) ||
|
|
391
|
+
(pooled ? 'workflow-pool' : null);
|
|
392
|
+
return {
|
|
393
|
+
runId,
|
|
394
|
+
graphHash,
|
|
395
|
+
instanceId:
|
|
396
|
+
typeof record.instanceId === 'string' && record.instanceId
|
|
397
|
+
? record.instanceId
|
|
398
|
+
: null,
|
|
399
|
+
pooledBootstrap: pooled,
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
function workflowPoolEnabled(env: CoordinatorEnv): boolean {
|
|
404
|
+
const configured = env.DEEPLINE_WORKFLOW_PREWARM_POOL?.trim().toLowerCase();
|
|
405
|
+
if (configured) {
|
|
406
|
+
return configured === '1' || configured === 'true' || configured === 'yes';
|
|
407
|
+
}
|
|
408
|
+
return false;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function workflowPoolTargetSize(env: CoordinatorEnv): number {
|
|
412
|
+
const raw = Number(env.DEEPLINE_WORKFLOW_PREWARM_POOL_SIZE ?? '');
|
|
413
|
+
if (!Number.isFinite(raw) || raw <= 0) return WORKFLOW_POOL_DEFAULT_SIZE;
|
|
414
|
+
return Math.min(Math.max(Math.floor(raw), 1), 5);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
async function createDynamicWorkflowInstance(input: {
|
|
418
|
+
env: CoordinatorEnv;
|
|
419
|
+
id: string;
|
|
420
|
+
params: PlayWorkflowParams;
|
|
421
|
+
}): Promise<WorkflowInstance> {
|
|
422
|
+
const workflow = createDispatcherWorkflowBinding(
|
|
423
|
+
() => input.env.PLAY_WORKFLOW as unknown as Workflow,
|
|
424
|
+
buildDynamicWorkflowMetadata(input.params),
|
|
425
|
+
);
|
|
426
|
+
return workflow.create({
|
|
427
|
+
id: input.id,
|
|
428
|
+
params: input.params,
|
|
429
|
+
});
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function workflowPoolDurableObject(env: CoordinatorEnv): DurableObjectStub {
|
|
433
|
+
return env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(WORKFLOW_POOL_DO_NAME));
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
async function callWorkflowPool<T>(
|
|
437
|
+
env: CoordinatorEnv,
|
|
438
|
+
path: string,
|
|
439
|
+
init?: RequestInit,
|
|
440
|
+
): Promise<T> {
|
|
441
|
+
const response = await workflowPoolDurableObject(env).fetch(
|
|
442
|
+
`https://deepline.workflow-pool.internal${path}`,
|
|
443
|
+
{
|
|
444
|
+
...init,
|
|
445
|
+
headers: {
|
|
446
|
+
'content-type': 'application/json',
|
|
447
|
+
...(init?.headers ?? {}),
|
|
448
|
+
},
|
|
449
|
+
},
|
|
450
|
+
);
|
|
451
|
+
if (!response.ok) {
|
|
452
|
+
throw new Error(
|
|
453
|
+
`workflow pool ${path} failed ${response.status}: ${(
|
|
454
|
+
await response.text().catch(() => '')
|
|
455
|
+
).slice(0, 400)}`,
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
return (await response.json()) as T;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
type WorkflowPoolCounts = {
|
|
462
|
+
available: number;
|
|
463
|
+
warming: number;
|
|
464
|
+
};
|
|
465
|
+
|
|
466
|
+
type WorkflowPoolRefillResult = WorkflowPoolCounts & {
|
|
467
|
+
target: number;
|
|
468
|
+
created: number;
|
|
469
|
+
promoted: number;
|
|
470
|
+
removed: number;
|
|
471
|
+
waitedMs: number;
|
|
472
|
+
waitIterations: number;
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
type WorkflowPoolListEntry = {
|
|
476
|
+
id: string;
|
|
477
|
+
createdAt: number;
|
|
478
|
+
readyAt: number | null;
|
|
479
|
+
expiresAt: number;
|
|
480
|
+
};
|
|
481
|
+
|
|
482
|
+
async function workflowPoolCount(env: CoordinatorEnv): Promise<WorkflowPoolCounts> {
|
|
483
|
+
const body = await callWorkflowPool<{
|
|
484
|
+
available?: unknown;
|
|
485
|
+
warming?: unknown;
|
|
486
|
+
}>(
|
|
487
|
+
env,
|
|
488
|
+
`/pool-count?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
|
|
489
|
+
);
|
|
490
|
+
return {
|
|
491
|
+
available: typeof body.available === 'number' ? body.available : 0,
|
|
492
|
+
warming: typeof body.warming === 'number' ? body.warming : 0,
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
function sleep(ms: number): Promise<void> {
|
|
497
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
async function listWorkflowPoolEntries(
|
|
501
|
+
env: CoordinatorEnv,
|
|
502
|
+
): Promise<WorkflowPoolListEntry[]> {
|
|
503
|
+
const body = await callWorkflowPool<{ entries?: unknown }>(
|
|
504
|
+
env,
|
|
505
|
+
`/pool-list?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
|
|
506
|
+
);
|
|
507
|
+
if (!Array.isArray(body.entries)) return [];
|
|
508
|
+
return body.entries
|
|
509
|
+
.filter((entry): entry is Record<string, unknown> =>
|
|
510
|
+
Boolean(entry && typeof entry === 'object' && !Array.isArray(entry)),
|
|
511
|
+
)
|
|
512
|
+
.map((entry) => ({
|
|
513
|
+
id: typeof entry.id === 'string' ? entry.id : '',
|
|
514
|
+
createdAt:
|
|
515
|
+
typeof entry.createdAt === 'number' && Number.isFinite(entry.createdAt)
|
|
516
|
+
? entry.createdAt
|
|
517
|
+
: 0,
|
|
518
|
+
readyAt:
|
|
519
|
+
typeof entry.readyAt === 'number' && Number.isFinite(entry.readyAt)
|
|
520
|
+
? entry.readyAt
|
|
521
|
+
: null,
|
|
522
|
+
expiresAt:
|
|
523
|
+
typeof entry.expiresAt === 'number' && Number.isFinite(entry.expiresAt)
|
|
524
|
+
? entry.expiresAt
|
|
525
|
+
: 0,
|
|
526
|
+
}))
|
|
527
|
+
.filter((entry) => entry.id);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
async function addWorkflowPoolIds(
|
|
531
|
+
env: CoordinatorEnv,
|
|
532
|
+
ids: string[],
|
|
533
|
+
options?: { ready?: boolean },
|
|
534
|
+
): Promise<void> {
|
|
535
|
+
if (ids.length === 0) return;
|
|
536
|
+
await callWorkflowPool(env, '/pool-add', {
|
|
537
|
+
method: 'POST',
|
|
538
|
+
body: JSON.stringify({
|
|
539
|
+
ids,
|
|
540
|
+
ttlMs: WORKFLOW_POOL_TTL_MS,
|
|
541
|
+
version: WORKFLOW_POOL_PROTOCOL_VERSION,
|
|
542
|
+
ready: options?.ready === true,
|
|
543
|
+
...(options?.ready === true ? { readyAt: Date.now() } : {}),
|
|
544
|
+
}),
|
|
545
|
+
});
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
async function promoteWorkflowPoolIds(
|
|
549
|
+
env: CoordinatorEnv,
|
|
550
|
+
ids: string[],
|
|
551
|
+
): Promise<void> {
|
|
552
|
+
if (ids.length === 0) return;
|
|
553
|
+
await callWorkflowPool(env, '/pool-promote', {
|
|
554
|
+
method: 'POST',
|
|
555
|
+
body: JSON.stringify({
|
|
556
|
+
ids,
|
|
557
|
+
version: WORKFLOW_POOL_PROTOCOL_VERSION,
|
|
558
|
+
}),
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
async function deleteWorkflowPoolIds(
|
|
563
|
+
env: CoordinatorEnv,
|
|
564
|
+
ids: string[],
|
|
565
|
+
): Promise<void> {
|
|
566
|
+
if (ids.length === 0) return;
|
|
567
|
+
await callWorkflowPool(env, '/pool-delete', {
|
|
568
|
+
method: 'POST',
|
|
569
|
+
body: JSON.stringify({
|
|
570
|
+
ids,
|
|
571
|
+
version: WORKFLOW_POOL_PROTOCOL_VERSION,
|
|
572
|
+
}),
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
async function leaseWorkflowPoolId(
|
|
577
|
+
env: CoordinatorEnv,
|
|
578
|
+
): Promise<string | null> {
|
|
579
|
+
const body = await callWorkflowPool<{ id?: unknown }>(
|
|
580
|
+
env,
|
|
581
|
+
`/pool-lease?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
|
|
582
|
+
{
|
|
583
|
+
method: 'POST',
|
|
584
|
+
body: '{}',
|
|
585
|
+
},
|
|
586
|
+
);
|
|
587
|
+
return typeof body.id === 'string' && body.id ? body.id : null;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
async function mapRunToWorkflowInstance(input: {
|
|
591
|
+
env: CoordinatorEnv;
|
|
592
|
+
runId: string;
|
|
593
|
+
instanceId: string;
|
|
594
|
+
}): Promise<void> {
|
|
595
|
+
await callWorkflowPool(input.env, '/pool-map-run', {
|
|
596
|
+
method: 'POST',
|
|
597
|
+
body: JSON.stringify({
|
|
598
|
+
runId: input.runId,
|
|
599
|
+
instanceId: input.instanceId,
|
|
600
|
+
version: WORKFLOW_POOL_PROTOCOL_VERSION,
|
|
601
|
+
}),
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
async function resolveWorkflowInstanceIdForRun(
|
|
606
|
+
env: CoordinatorEnv,
|
|
607
|
+
runId: string,
|
|
608
|
+
): Promise<string> {
|
|
609
|
+
if (!workflowPoolEnabled(env)) {
|
|
610
|
+
return workflowInstanceId(runId);
|
|
611
|
+
}
|
|
612
|
+
const body = await callWorkflowPool<{ instanceId?: unknown }>(
|
|
613
|
+
env,
|
|
614
|
+
`/pool-resolve-run?runId=${encodeURIComponent(runId)}&version=${encodeURIComponent(
|
|
615
|
+
WORKFLOW_POOL_PROTOCOL_VERSION,
|
|
616
|
+
)}`,
|
|
617
|
+
).catch(() => ({ instanceId: null }));
|
|
618
|
+
return typeof body.instanceId === 'string' && body.instanceId
|
|
619
|
+
? body.instanceId
|
|
620
|
+
: workflowInstanceId(runId);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
async function clearWorkflowPool(env: CoordinatorEnv): Promise<number> {
|
|
624
|
+
const entries = await listWorkflowPoolEntries(env).catch(() => []);
|
|
625
|
+
const body = await callWorkflowPool<{ deleted?: unknown }>(
|
|
626
|
+
env,
|
|
627
|
+
`/pool-clear?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
|
|
628
|
+
{ method: 'POST', body: '{}' },
|
|
629
|
+
);
|
|
630
|
+
await Promise.all(
|
|
631
|
+
entries.map(async (entry) => {
|
|
632
|
+
const instance = await env.PLAY_WORKFLOW.get(entry.id);
|
|
633
|
+
try {
|
|
634
|
+
await instance.terminate().catch(() => undefined);
|
|
635
|
+
} finally {
|
|
636
|
+
disposeRpcStub(instance);
|
|
637
|
+
}
|
|
638
|
+
}),
|
|
639
|
+
);
|
|
640
|
+
return typeof body.deleted === 'number' ? body.deleted : 0;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
function workflowStatusName(status: InstanceStatus | null): string {
|
|
644
|
+
return typeof status?.status === 'string' ? status.status : 'unknown';
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
function workflowPoolStatusIsReady(statusName: string): boolean {
|
|
648
|
+
// Cloudflare Workflows may report a run as "running" even while the body is
|
|
649
|
+
// blocked inside step.waitForEvent(). Treat both states as pool-candidates;
|
|
650
|
+
// submitViaPooledWorkflow still falls back to a fresh Workflow if sendEvent
|
|
651
|
+
// fails.
|
|
652
|
+
return statusName === 'waiting' || statusName === 'running';
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
async function waitForWorkflowPoolReady(instance: WorkflowInstance): Promise<{
|
|
656
|
+
ready: boolean;
|
|
657
|
+
status: string;
|
|
658
|
+
ms: number;
|
|
659
|
+
polls: number;
|
|
660
|
+
}> {
|
|
661
|
+
const startedAt = Date.now();
|
|
662
|
+
let lastStatus: InstanceStatus | null = null;
|
|
663
|
+
let polls = 0;
|
|
664
|
+
while (Date.now() - startedAt < WORKFLOW_POOL_READY_TIMEOUT_MS) {
|
|
665
|
+
lastStatus = await instance.status();
|
|
666
|
+
polls += 1;
|
|
667
|
+
const statusName = workflowStatusName(lastStatus);
|
|
668
|
+
if (workflowPoolStatusIsReady(statusName)) {
|
|
669
|
+
return {
|
|
670
|
+
ready: true,
|
|
671
|
+
status: statusName,
|
|
672
|
+
ms: Date.now() - startedAt,
|
|
673
|
+
polls,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
if (
|
|
677
|
+
statusName === 'complete' ||
|
|
678
|
+
statusName === 'errored' ||
|
|
679
|
+
statusName === 'terminated'
|
|
680
|
+
) {
|
|
681
|
+
return {
|
|
682
|
+
ready: false,
|
|
683
|
+
status: statusName,
|
|
684
|
+
ms: Date.now() - startedAt,
|
|
685
|
+
polls,
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
await new Promise((resolve) =>
|
|
689
|
+
setTimeout(resolve, WORKFLOW_POOL_READY_POLL_MS),
|
|
690
|
+
);
|
|
691
|
+
}
|
|
692
|
+
return {
|
|
693
|
+
ready: false,
|
|
694
|
+
status: workflowStatusName(lastStatus),
|
|
695
|
+
ms: Date.now() - startedAt,
|
|
696
|
+
polls,
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
async function refillWorkflowPoolOnce(
|
|
701
|
+
env: CoordinatorEnv,
|
|
702
|
+
): Promise<Omit<WorkflowPoolRefillResult, 'waitedMs' | 'waitIterations'>> {
|
|
703
|
+
if (!workflowPoolEnabled(env)) {
|
|
704
|
+
return {
|
|
705
|
+
available: 0,
|
|
706
|
+
warming: 0,
|
|
707
|
+
target: 0,
|
|
708
|
+
created: 0,
|
|
709
|
+
promoted: 0,
|
|
710
|
+
removed: 0,
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
const target = workflowPoolTargetSize(env);
|
|
714
|
+
const entries = await listWorkflowPoolEntries(env);
|
|
715
|
+
const warmingEntries = entries.filter((entry) => entry.readyAt === null);
|
|
716
|
+
const promotedIds: string[] = [];
|
|
717
|
+
const removedIds: string[] = [];
|
|
718
|
+
for (const entry of warmingEntries) {
|
|
719
|
+
const instance = await env.PLAY_WORKFLOW.get(entry.id);
|
|
720
|
+
try {
|
|
721
|
+
const status = await instance.status().catch(() => null);
|
|
722
|
+
const statusName = workflowStatusName(status);
|
|
723
|
+
if (workflowPoolStatusIsReady(statusName)) {
|
|
724
|
+
promotedIds.push(entry.id);
|
|
725
|
+
} else if (
|
|
726
|
+
statusName === 'complete' ||
|
|
727
|
+
statusName === 'errored' ||
|
|
728
|
+
statusName === 'terminated' ||
|
|
729
|
+
statusName === 'unknown'
|
|
730
|
+
) {
|
|
731
|
+
removedIds.push(entry.id);
|
|
732
|
+
}
|
|
733
|
+
} finally {
|
|
734
|
+
disposeRpcStub(instance);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
await Promise.all([
|
|
738
|
+
promoteWorkflowPoolIds(env, promotedIds),
|
|
739
|
+
deleteWorkflowPoolIds(env, removedIds),
|
|
740
|
+
]);
|
|
741
|
+
const counts = await workflowPoolCount(env);
|
|
742
|
+
const totalTracked = counts.available + counts.warming;
|
|
743
|
+
const needed = Math.max(0, target - totalTracked);
|
|
744
|
+
if (needed === 0) {
|
|
745
|
+
return {
|
|
746
|
+
available: counts.available,
|
|
747
|
+
warming: counts.warming,
|
|
748
|
+
target,
|
|
749
|
+
created: 0,
|
|
750
|
+
promoted: promotedIds.length,
|
|
751
|
+
removed: removedIds.length,
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
const readyCreatedIds: string[] = [];
|
|
755
|
+
const warmingCreatedIds: string[] = [];
|
|
756
|
+
for (let i = 0; i < needed; i += 1) {
|
|
757
|
+
const poolId = `pool-v2-${Date.now().toString(36)}-${crypto.randomUUID().slice(0, 12)}`;
|
|
758
|
+
const instance = await env.PLAY_WORKFLOW.create({
|
|
759
|
+
id: poolId,
|
|
760
|
+
params: {
|
|
761
|
+
__deeplinePooledWorkflow: true,
|
|
762
|
+
poolId,
|
|
763
|
+
createdAt: Date.now(),
|
|
764
|
+
} satisfies PooledWorkflowBootstrapPayload,
|
|
765
|
+
});
|
|
766
|
+
try {
|
|
767
|
+
const readiness = await waitForWorkflowPoolReady(instance);
|
|
768
|
+
recordCoordinatorPerfTrace({
|
|
769
|
+
runId: poolId,
|
|
770
|
+
phase: 'coordinator.workflow_pool_ready',
|
|
771
|
+
ms: readiness.ms,
|
|
772
|
+
graphHash: 'workflow-pool',
|
|
773
|
+
extra: {
|
|
774
|
+
ready: readiness.ready,
|
|
775
|
+
status: readiness.status,
|
|
776
|
+
polls: readiness.polls,
|
|
777
|
+
},
|
|
778
|
+
});
|
|
779
|
+
if (readiness.ready) {
|
|
780
|
+
readyCreatedIds.push(poolId);
|
|
781
|
+
} else if (
|
|
782
|
+
readiness.status === 'complete' ||
|
|
783
|
+
readiness.status === 'errored' ||
|
|
784
|
+
readiness.status === 'terminated' ||
|
|
785
|
+
readiness.status === 'unknown'
|
|
786
|
+
) {
|
|
787
|
+
removedIds.push(poolId);
|
|
788
|
+
await instance.terminate().catch(() => undefined);
|
|
789
|
+
} else {
|
|
790
|
+
warmingCreatedIds.push(poolId);
|
|
791
|
+
}
|
|
792
|
+
} finally {
|
|
793
|
+
disposeRpcStub(instance);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
await Promise.all([
|
|
797
|
+
addWorkflowPoolIds(env, readyCreatedIds, { ready: true }),
|
|
798
|
+
addWorkflowPoolIds(env, warmingCreatedIds, { ready: false }),
|
|
799
|
+
]);
|
|
800
|
+
const finalCounts = await workflowPoolCount(env);
|
|
801
|
+
return {
|
|
802
|
+
available: finalCounts.available,
|
|
803
|
+
warming: finalCounts.warming,
|
|
804
|
+
target,
|
|
805
|
+
created: readyCreatedIds.length + warmingCreatedIds.length,
|
|
806
|
+
promoted: promotedIds.length,
|
|
807
|
+
removed: removedIds.length,
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
async function refillWorkflowPool(
|
|
812
|
+
env: CoordinatorEnv,
|
|
813
|
+
options?: {
|
|
814
|
+
minAvailable?: number;
|
|
815
|
+
waitReady?: boolean;
|
|
816
|
+
waitTimeoutMs?: number;
|
|
817
|
+
},
|
|
818
|
+
): Promise<WorkflowPoolRefillResult> {
|
|
819
|
+
const startedAt = Date.now();
|
|
820
|
+
const minAvailable = Math.max(1, Math.floor(options?.minAvailable ?? 1));
|
|
821
|
+
const waitReady = options?.waitReady === true;
|
|
822
|
+
const waitTimeoutMs =
|
|
823
|
+
typeof options?.waitTimeoutMs === 'number' &&
|
|
824
|
+
Number.isFinite(options.waitTimeoutMs) &&
|
|
825
|
+
options.waitTimeoutMs > 0
|
|
826
|
+
? Math.min(Math.floor(options.waitTimeoutMs), 15_000)
|
|
827
|
+
: 4_000;
|
|
828
|
+
let totals = await refillWorkflowPoolOnce(env);
|
|
829
|
+
let iterations = 0;
|
|
830
|
+
const readyWaitStartedAt = Date.now();
|
|
831
|
+
|
|
832
|
+
while (
|
|
833
|
+
workflowPoolEnabled(env) &&
|
|
834
|
+
waitReady &&
|
|
835
|
+
totals.available < minAvailable &&
|
|
836
|
+
Date.now() - readyWaitStartedAt < waitTimeoutMs
|
|
837
|
+
) {
|
|
838
|
+
iterations += 1;
|
|
839
|
+
await sleep(WORKFLOW_POOL_READY_POLL_MS);
|
|
840
|
+
const next = await refillWorkflowPoolOnce(env);
|
|
841
|
+
totals = {
|
|
842
|
+
...next,
|
|
843
|
+
created: totals.created + next.created,
|
|
844
|
+
promoted: totals.promoted + next.promoted,
|
|
845
|
+
removed: totals.removed + next.removed,
|
|
846
|
+
};
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
const result: WorkflowPoolRefillResult = {
|
|
850
|
+
...totals,
|
|
851
|
+
waitedMs: Date.now() - startedAt,
|
|
852
|
+
waitIterations: iterations,
|
|
853
|
+
};
|
|
854
|
+
recordCoordinatorPerfTrace({
|
|
855
|
+
runId: 'workflow-pool',
|
|
856
|
+
phase: 'coordinator.workflow_pool_refill',
|
|
857
|
+
ms: result.waitedMs,
|
|
858
|
+
graphHash: 'workflow-pool',
|
|
859
|
+
extra: result,
|
|
860
|
+
});
|
|
861
|
+
return result;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
async function submitViaPooledWorkflow(input: {
|
|
865
|
+
env: CoordinatorEnv;
|
|
866
|
+
params: PlayWorkflowParams;
|
|
867
|
+
recordSubmitTiming: (timing: CoordinatorTiming) => void;
|
|
868
|
+
}): Promise<WorkflowInstance | null> {
|
|
869
|
+
if (!workflowPoolEnabled(input.env)) {
|
|
870
|
+
return null;
|
|
871
|
+
}
|
|
872
|
+
const leaseStartedAt = Date.now();
|
|
873
|
+
const pooledInstanceId = await leaseWorkflowPoolId(input.env);
|
|
874
|
+
const missCounts = pooledInstanceId
|
|
875
|
+
? null
|
|
876
|
+
: await workflowPoolCount(input.env).catch(() => null);
|
|
877
|
+
input.recordSubmitTiming({
|
|
878
|
+
phase: 'coordinator.workflow_pool_lease',
|
|
879
|
+
ms: Date.now() - leaseStartedAt,
|
|
880
|
+
graphHash: input.params.graphHash ?? null,
|
|
881
|
+
extra: {
|
|
882
|
+
pooled: Boolean(pooledInstanceId),
|
|
883
|
+
...(missCounts
|
|
884
|
+
? {
|
|
885
|
+
availableAfterMiss: missCounts.available,
|
|
886
|
+
warmingAfterMiss: missCounts.warming,
|
|
887
|
+
}
|
|
888
|
+
: {}),
|
|
889
|
+
},
|
|
890
|
+
});
|
|
891
|
+
if (!pooledInstanceId) {
|
|
892
|
+
return null;
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
const instance = await input.env.PLAY_WORKFLOW.get(pooledInstanceId);
|
|
896
|
+
try {
|
|
897
|
+
const readyCheckStartedAt = Date.now();
|
|
898
|
+
const status = await instance.status().catch(() => null);
|
|
899
|
+
const statusName = workflowStatusName(status);
|
|
900
|
+
input.recordSubmitTiming({
|
|
901
|
+
phase: 'coordinator.workflow_pool_ready_check',
|
|
902
|
+
ms: Date.now() - readyCheckStartedAt,
|
|
903
|
+
graphHash: input.params.graphHash ?? null,
|
|
904
|
+
extra: { instanceId: pooledInstanceId, status: statusName },
|
|
905
|
+
});
|
|
906
|
+
if (!workflowPoolStatusIsReady(statusName)) {
|
|
907
|
+
await instance.terminate().catch(() => undefined);
|
|
908
|
+
disposeRpcStub(instance);
|
|
909
|
+
return null;
|
|
910
|
+
}
|
|
911
|
+
const sendStartedAt = Date.now();
|
|
912
|
+
await instance.sendEvent({
|
|
913
|
+
type: WORKFLOW_POOL_START_EVENT_TYPE,
|
|
914
|
+
payload: buildDispatcherEnvelope(input.params),
|
|
915
|
+
});
|
|
916
|
+
await mapRunToWorkflowInstance({
|
|
917
|
+
env: input.env,
|
|
918
|
+
runId: input.params.runId,
|
|
919
|
+
instanceId: pooledInstanceId,
|
|
920
|
+
});
|
|
921
|
+
input.recordSubmitTiming({
|
|
922
|
+
phase: 'coordinator.workflow_pool_send_event',
|
|
923
|
+
ms: Date.now() - sendStartedAt,
|
|
924
|
+
graphHash: input.params.graphHash ?? null,
|
|
925
|
+
extra: { instanceId: pooledInstanceId },
|
|
926
|
+
});
|
|
927
|
+
return instance;
|
|
928
|
+
} catch (error) {
|
|
929
|
+
disposeRpcStub(instance);
|
|
930
|
+
console.warn('[coordinator.workflow_pool] sendEvent failed; falling back', {
|
|
931
|
+
runId: input.params.runId,
|
|
932
|
+
pooledInstanceId,
|
|
933
|
+
error: error instanceof Error ? error.message : String(error),
|
|
934
|
+
});
|
|
935
|
+
return null;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
function readWorkflowPayload(event: unknown): Record<string, unknown> | null {
|
|
940
|
+
if (!isRecord(event)) return null;
|
|
941
|
+
const payload = event.payload;
|
|
942
|
+
return isRecord(payload) ? payload : null;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
async function markWorkflowRuntimeFailure(input: {
|
|
946
|
+
env: CoordinatorEnv;
|
|
947
|
+
event: unknown;
|
|
948
|
+
error: unknown;
|
|
949
|
+
}): Promise<void> {
|
|
950
|
+
const payload = readWorkflowPayload(input.event);
|
|
951
|
+
if (!payload) return;
|
|
952
|
+
const runId = typeof payload.runId === 'string' ? payload.runId : null;
|
|
953
|
+
const baseUrl = typeof payload.baseUrl === 'string' ? payload.baseUrl : null;
|
|
954
|
+
const executorToken =
|
|
955
|
+
typeof payload.executorToken === 'string' ? payload.executorToken : null;
|
|
956
|
+
if (!runId || !baseUrl || !executorToken) return;
|
|
957
|
+
const errorName =
|
|
958
|
+
input.error instanceof Error && input.error.name
|
|
959
|
+
? input.error.name
|
|
960
|
+
: 'Error';
|
|
961
|
+
const errorMessage =
|
|
962
|
+
input.error instanceof Error ? input.error.message : String(input.error);
|
|
963
|
+
const errorStack =
|
|
964
|
+
input.error instanceof Error && typeof input.error.stack === 'string'
|
|
965
|
+
? input.error.stack.split('\n').slice(0, 12).join('\n')
|
|
966
|
+
: null;
|
|
967
|
+
const headers = new Headers({
|
|
968
|
+
authorization: `Bearer ${executorToken}`,
|
|
969
|
+
'content-type': 'application/json',
|
|
970
|
+
});
|
|
971
|
+
const bypass = input.env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
|
|
972
|
+
if (bypass) headers.set('x-vercel-protection-bypass', bypass);
|
|
973
|
+
const body = JSON.stringify(
|
|
974
|
+
runtimeRunActions.updateStatus({
|
|
975
|
+
playId: runId,
|
|
976
|
+
status: 'failed',
|
|
977
|
+
error: `DynamicWorkflow runner failed: ${errorName}: ${errorMessage}${
|
|
978
|
+
errorStack ? `\n${errorStack}` : ''
|
|
979
|
+
}`,
|
|
980
|
+
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
981
|
+
}),
|
|
982
|
+
);
|
|
983
|
+
const url = `${baseUrl.replace(/\/$/, '')}/api/v2/plays/internal/runtime`;
|
|
984
|
+
const backoffMs = [200, 500, 1500];
|
|
985
|
+
let lastError: unknown = null;
|
|
986
|
+
for (let attempt = 0; attempt <= backoffMs.length; attempt += 1) {
|
|
987
|
+
try {
|
|
988
|
+
const response = await fetch(url, { method: 'POST', headers, body });
|
|
989
|
+
if (response.ok) return;
|
|
990
|
+
lastError = new Error(
|
|
991
|
+
`runtime API responded ${response.status}: ${(await response.text().catch(() => '')).slice(0, 400)}`,
|
|
992
|
+
);
|
|
993
|
+
if (
|
|
994
|
+
response.status >= 400 &&
|
|
995
|
+
response.status < 500 &&
|
|
996
|
+
response.status !== 408 &&
|
|
997
|
+
response.status !== 429
|
|
998
|
+
) {
|
|
999
|
+
break;
|
|
1000
|
+
}
|
|
1001
|
+
} catch (error) {
|
|
1002
|
+
lastError = error;
|
|
1003
|
+
}
|
|
1004
|
+
if (attempt < backoffMs.length) {
|
|
1005
|
+
await new Promise((resolve) => setTimeout(resolve, backoffMs[attempt]));
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
console.error('[coordinator] failed to mark workflow runtime failure', {
|
|
1009
|
+
runId,
|
|
1010
|
+
message: lastError instanceof Error ? lastError.message : String(lastError),
|
|
1011
|
+
});
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
type StoredPlayArtifactPayload = {
|
|
1015
|
+
artifact?: {
|
|
1016
|
+
bundledCode?: string;
|
|
1017
|
+
artifactKind?: string;
|
|
1018
|
+
};
|
|
1019
|
+
};
|
|
1020
|
+
|
|
1021
|
+
const DYNAMIC_WORKER_COMPATIBILITY_DATE = '2026-05-01';
|
|
1022
|
+
|
|
1023
|
+
async function mintChildWorkflowExecutorToken(input: {
|
|
1024
|
+
env: CoordinatorEnv;
|
|
1025
|
+
baseUrl: string;
|
|
1026
|
+
parentExecutorToken: string;
|
|
1027
|
+
parentRunId: string;
|
|
1028
|
+
parentPlayName: string;
|
|
1029
|
+
childRunId: string;
|
|
1030
|
+
childPlayName: string;
|
|
1031
|
+
maxCreditsPerRun?: number | null;
|
|
1032
|
+
}): Promise<string> {
|
|
1033
|
+
const url = `${input.baseUrl.replace(/\/$/, '')}/api/v2/plays/internal/child-executor-token`;
|
|
1034
|
+
const headers = new Headers({
|
|
1035
|
+
authorization: `Bearer ${input.parentExecutorToken}`,
|
|
1036
|
+
'content-type': 'application/json',
|
|
1037
|
+
'x-deepline-request-id': crypto.randomUUID(),
|
|
1038
|
+
});
|
|
1039
|
+
if (input.env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim()) {
|
|
1040
|
+
headers.set(
|
|
1041
|
+
'x-vercel-protection-bypass',
|
|
1042
|
+
input.env.VERCEL_PROTECTION_BYPASS_TOKEN.trim(),
|
|
1043
|
+
);
|
|
1044
|
+
}
|
|
1045
|
+
const response = await fetch(url, {
|
|
1046
|
+
method: 'POST',
|
|
1047
|
+
headers,
|
|
1048
|
+
body: JSON.stringify({
|
|
1049
|
+
parentRunId: input.parentRunId,
|
|
1050
|
+
parentPlayName: input.parentPlayName,
|
|
1051
|
+
childRunId: input.childRunId,
|
|
1052
|
+
childPlayName: input.childPlayName,
|
|
1053
|
+
maxCreditsPerRun: input.maxCreditsPerRun ?? null,
|
|
1054
|
+
}),
|
|
1055
|
+
});
|
|
1056
|
+
const text = await response.text().catch(() => '');
|
|
1057
|
+
let parsed: Record<string, unknown> = {};
|
|
1058
|
+
try {
|
|
1059
|
+
parsed = text ? (JSON.parse(text) as Record<string, unknown>) : {};
|
|
1060
|
+
} catch {
|
|
1061
|
+
parsed = {};
|
|
1062
|
+
}
|
|
1063
|
+
if (!response.ok) {
|
|
1064
|
+
const error = isRecord(parsed.error) ? parsed.error : null;
|
|
1065
|
+
const message =
|
|
1066
|
+
(typeof error?.message === 'string' && error.message.trim()) ||
|
|
1067
|
+
(typeof parsed.error === 'string' && parsed.error.trim()) ||
|
|
1068
|
+
text.slice(0, 800) ||
|
|
1069
|
+
`Origin child executor token mint failed with ${response.status}.`;
|
|
1070
|
+
throw new Error(message);
|
|
1071
|
+
}
|
|
1072
|
+
const executorToken = parsed.executorToken;
|
|
1073
|
+
if (typeof executorToken !== 'string' || !executorToken.trim()) {
|
|
1074
|
+
throw new Error('Origin child executor token response was missing executorToken.');
|
|
1075
|
+
}
|
|
1076
|
+
return executorToken;
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
function buildChildRunId(playName: string): string {
|
|
1080
|
+
const slug =
|
|
1081
|
+
playName
|
|
1082
|
+
.toLowerCase()
|
|
1083
|
+
.replace(/[^a-z0-9-]/g, '-')
|
|
1084
|
+
.replace(/-+/g, '-')
|
|
1085
|
+
.replace(/^-+|-+$/g, '')
|
|
1086
|
+
.slice(0, 64) || 'play';
|
|
1087
|
+
return `play/${slug}/run/${new Date()
|
|
1088
|
+
.toISOString()
|
|
1089
|
+
.replace(/[-:.]/g, '')
|
|
1090
|
+
.replace('T', 't')
|
|
1091
|
+
.replace('Z', '')}-${crypto.randomUUID().slice(0, 8)}`;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
function normalizeRuntimeBaseUrl(value: unknown): string | null {
|
|
1095
|
+
if (typeof value !== 'string') return null;
|
|
1096
|
+
const trimmed = value.trim();
|
|
1097
|
+
if (!trimmed) return null;
|
|
1098
|
+
let parsed: URL;
|
|
1099
|
+
try {
|
|
1100
|
+
parsed = new URL(trimmed);
|
|
1101
|
+
} catch {
|
|
1102
|
+
return null;
|
|
1103
|
+
}
|
|
1104
|
+
if (
|
|
1105
|
+
(parsed.protocol !== 'http:' && parsed.protocol !== 'https:') ||
|
|
1106
|
+
parsed.username ||
|
|
1107
|
+
parsed.password ||
|
|
1108
|
+
parsed.search ||
|
|
1109
|
+
parsed.hash ||
|
|
1110
|
+
(parsed.pathname !== '/' && parsed.pathname !== '')
|
|
1111
|
+
) {
|
|
1112
|
+
return null;
|
|
1113
|
+
}
|
|
1114
|
+
return parsed.toString().replace(/\/$/, '');
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
function resolveRuntimeBaseUrl(env: CoordinatorEnv, body: Record<string, unknown>): string {
|
|
1118
|
+
return normalizeRuntimeBaseUrl(body.callbackBaseUrl) ??
|
|
1119
|
+
normalizeRuntimeBaseUrl(body.baseUrl) ??
|
|
1120
|
+
env.DEEPLINE_API_BASE_URL.replace(/\/$/, '');
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
function validateChildSubmitBody(input: {
|
|
1124
|
+
parentRunId: string;
|
|
1125
|
+
body: Record<string, unknown>;
|
|
1126
|
+
}):
|
|
1127
|
+
| {
|
|
1128
|
+
ok: true;
|
|
1129
|
+
manifest: PlayRuntimeManifest;
|
|
1130
|
+
governance: PlayCallGovernanceSnapshot;
|
|
1131
|
+
childPlayName: string;
|
|
1132
|
+
orgId: string;
|
|
1133
|
+
parentExecutorToken: string;
|
|
1134
|
+
}
|
|
1135
|
+
| { ok: false; status: number; error: Record<string, unknown> } {
|
|
1136
|
+
const { parentRunId, body } = input;
|
|
1137
|
+
const manifest = body.manifest as PlayRuntimeManifest | undefined;
|
|
1138
|
+
const governance = body.internalRunPlay as
|
|
1139
|
+
| PlayCallGovernanceSnapshot
|
|
1140
|
+
| undefined;
|
|
1141
|
+
const childPlayName =
|
|
1142
|
+
typeof body.name === 'string' && body.name.trim()
|
|
1143
|
+
? body.name.trim()
|
|
1144
|
+
: manifest?.playName?.trim();
|
|
1145
|
+
if (
|
|
1146
|
+
!manifest ||
|
|
1147
|
+
!childPlayName ||
|
|
1148
|
+
!manifest.artifactStorageKey ||
|
|
1149
|
+
!manifest.artifactHash ||
|
|
1150
|
+
!manifest.graphHash ||
|
|
1151
|
+
!governance
|
|
1152
|
+
) {
|
|
1153
|
+
return {
|
|
1154
|
+
ok: false,
|
|
1155
|
+
status: 400,
|
|
1156
|
+
error: {
|
|
1157
|
+
code: 'CHILD_MANIFEST_REQUIRED',
|
|
1158
|
+
message: 'submit-child requires a trusted child manifest and lineage.',
|
|
1159
|
+
phase: 'coordinator_child_submit',
|
|
1160
|
+
parentRunId,
|
|
1161
|
+
},
|
|
1162
|
+
};
|
|
1163
|
+
}
|
|
1164
|
+
const orgId = typeof body.orgId === 'string' ? body.orgId : '';
|
|
1165
|
+
if (!orgId) {
|
|
1166
|
+
return {
|
|
1167
|
+
ok: false,
|
|
1168
|
+
status: 400,
|
|
1169
|
+
error: {
|
|
1170
|
+
code: 'CHILD_ORG_REQUIRED',
|
|
1171
|
+
message: 'submit-child requires orgId from the parent runtime.',
|
|
1172
|
+
phase: 'coordinator_child_submit',
|
|
1173
|
+
parentRunId,
|
|
1174
|
+
},
|
|
1175
|
+
};
|
|
1176
|
+
}
|
|
1177
|
+
const parentExecutorToken =
|
|
1178
|
+
typeof body.parentExecutorToken === 'string'
|
|
1179
|
+
? body.parentExecutorToken.trim()
|
|
1180
|
+
: '';
|
|
1181
|
+
if (!parentExecutorToken) {
|
|
1182
|
+
return {
|
|
1183
|
+
ok: false,
|
|
1184
|
+
status: 400,
|
|
1185
|
+
error: {
|
|
1186
|
+
code: 'PARENT_EXECUTOR_TOKEN_REQUIRED',
|
|
1187
|
+
message:
|
|
1188
|
+
'submit-child requires the parent executor token for origin-scoped child token minting.',
|
|
1189
|
+
phase: 'coordinator_child_submit',
|
|
1190
|
+
parentRunId,
|
|
1191
|
+
},
|
|
1192
|
+
};
|
|
1193
|
+
}
|
|
1194
|
+
return {
|
|
1195
|
+
ok: true,
|
|
1196
|
+
manifest,
|
|
1197
|
+
governance,
|
|
1198
|
+
childPlayName,
|
|
1199
|
+
orgId,
|
|
1200
|
+
parentExecutorToken,
|
|
1201
|
+
};
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
function buildChildWorkflowParams(input: {
|
|
1205
|
+
env: CoordinatorEnv;
|
|
1206
|
+
body: Record<string, unknown>;
|
|
1207
|
+
manifest: PlayRuntimeManifest;
|
|
1208
|
+
governance: PlayCallGovernanceSnapshot;
|
|
1209
|
+
childRunId: string;
|
|
1210
|
+
childPlayName: string;
|
|
1211
|
+
childToken: string;
|
|
1212
|
+
orgId: string;
|
|
1213
|
+
coordinatorUrl: string | null;
|
|
1214
|
+
}): PlayWorkflowParams {
|
|
1215
|
+
const {
|
|
1216
|
+
env,
|
|
1217
|
+
body,
|
|
1218
|
+
manifest,
|
|
1219
|
+
governance,
|
|
1220
|
+
childRunId,
|
|
1221
|
+
childPlayName,
|
|
1222
|
+
childToken,
|
|
1223
|
+
orgId,
|
|
1224
|
+
coordinatorUrl,
|
|
1225
|
+
} = input;
|
|
1226
|
+
const baseUrl = resolveRuntimeBaseUrl(env, body);
|
|
1227
|
+
return {
|
|
1228
|
+
runId: childRunId,
|
|
1229
|
+
playId: childRunId,
|
|
1230
|
+
playName: childPlayName,
|
|
1231
|
+
artifactStorageKey: manifest.artifactStorageKey,
|
|
1232
|
+
artifactHash: manifest.artifactHash,
|
|
1233
|
+
graphHash: manifest.graphHash,
|
|
1234
|
+
input: isRecord(body.input) ? body.input : {},
|
|
1235
|
+
contractSnapshot: {
|
|
1236
|
+
source: 'published',
|
|
1237
|
+
revisionVersion: null,
|
|
1238
|
+
staticPipeline: manifest.staticPipeline,
|
|
1239
|
+
billingLimit:
|
|
1240
|
+
typeof manifest.maxCreditsPerRun === 'number'
|
|
1241
|
+
? { maxCreditsPerRun: manifest.maxCreditsPerRun }
|
|
1242
|
+
: null,
|
|
1243
|
+
sourceCode: manifest.sourceCode ?? '',
|
|
1244
|
+
artifactMetadata: {
|
|
1245
|
+
storageKey: manifest.artifactStorageKey,
|
|
1246
|
+
artifactHash: manifest.artifactHash,
|
|
1247
|
+
graphHash: manifest.graphHash,
|
|
1248
|
+
},
|
|
1249
|
+
codeFormat: 'cjs_module',
|
|
1250
|
+
compatibility: {
|
|
1251
|
+
apiVersion: 2,
|
|
1252
|
+
runtimeBackend: 'workers_edge',
|
|
1253
|
+
},
|
|
1254
|
+
},
|
|
1255
|
+
executionPlan: null,
|
|
1256
|
+
childPlayManifests: isRecord(body.childPlayManifests)
|
|
1257
|
+
? (body.childPlayManifests as PlayRuntimeManifestMap)
|
|
1258
|
+
: null,
|
|
1259
|
+
playCallGovernance: governance,
|
|
1260
|
+
dynamicWorkerCode: null,
|
|
1261
|
+
executorToken: childToken,
|
|
1262
|
+
baseUrl,
|
|
1263
|
+
orgId,
|
|
1264
|
+
userEmail: typeof body.userEmail === 'string' ? body.userEmail : '',
|
|
1265
|
+
userId: typeof body.userId === 'string' ? body.userId : null,
|
|
1266
|
+
runtimeBackend: 'cf_workflows_dynamic_worker_inline_child',
|
|
1267
|
+
dedupBackend: 'in_memory',
|
|
1268
|
+
coordinatorUrl,
|
|
1269
|
+
};
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
function runRequestFromPlayWorkflowParams(params: PlayWorkflowParams): Record<string, unknown> {
|
|
1273
|
+
return {
|
|
1274
|
+
runId: params.runId,
|
|
1275
|
+
callbackUrl: params.baseUrl,
|
|
1276
|
+
executorToken: params.executorToken,
|
|
1277
|
+
baseUrl: params.baseUrl,
|
|
1278
|
+
orgId: params.orgId,
|
|
1279
|
+
playName: params.playName,
|
|
1280
|
+
graphHash: params.graphHash,
|
|
1281
|
+
userEmail: params.userEmail,
|
|
1282
|
+
runtimeInput: params.input,
|
|
1283
|
+
inlineCsv: params.inlineCsv ?? null,
|
|
1284
|
+
inputR2Keys:
|
|
1285
|
+
params.inputFile?.r2Key && (params.inputFile.name || params.inputFile.path)
|
|
1286
|
+
? {
|
|
1287
|
+
[String(params.inputFile.name ?? params.inputFile.path)]:
|
|
1288
|
+
params.inputFile.r2Key,
|
|
1289
|
+
}
|
|
1290
|
+
: null,
|
|
1291
|
+
packagedFiles: params.packagedFiles ?? null,
|
|
1292
|
+
partitionRange: null,
|
|
1293
|
+
dedupBackend: params.dedupBackend,
|
|
1294
|
+
resumeFromCheckpointR2Key: null,
|
|
1295
|
+
contractSnapshot: params.contractSnapshot ?? null,
|
|
1296
|
+
executionPlan: params.executionPlan ?? null,
|
|
1297
|
+
childPlayManifests: params.childPlayManifests ?? null,
|
|
1298
|
+
playCallGovernance: params.playCallGovernance ?? null,
|
|
1299
|
+
coordinatorUrl: params.coordinatorUrl ?? null,
|
|
1300
|
+
totalRows: params.totalRows,
|
|
1301
|
+
};
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
async function readLegacyRunStream(response: Response): Promise<{
|
|
1305
|
+
result: unknown;
|
|
1306
|
+
outputRows: number | null;
|
|
1307
|
+
logs: string[];
|
|
1308
|
+
error: { message: string; stack: string | null } | null;
|
|
1309
|
+
}> {
|
|
1310
|
+
if (!response.body) {
|
|
1311
|
+
throw new Error('Legacy child Worker returned an empty stream.');
|
|
1312
|
+
}
|
|
1313
|
+
const decoder = new TextDecoder();
|
|
1314
|
+
const reader = response.body.getReader();
|
|
1315
|
+
const logs: string[] = [];
|
|
1316
|
+
let result: unknown = null;
|
|
1317
|
+
let outputRows: number | null = null;
|
|
1318
|
+
let error: { message: string; stack: string | null } | null = null;
|
|
1319
|
+
let buffered = '';
|
|
1320
|
+
|
|
1321
|
+
const consumeLine = (line: string): void => {
|
|
1322
|
+
const trimmed = line.trim();
|
|
1323
|
+
if (!trimmed) {
|
|
1324
|
+
return;
|
|
1325
|
+
}
|
|
1326
|
+
let event: unknown;
|
|
1327
|
+
try {
|
|
1328
|
+
event = JSON.parse(trimmed);
|
|
1329
|
+
} catch {
|
|
1330
|
+
logs.push(trimmed);
|
|
1331
|
+
return;
|
|
1332
|
+
}
|
|
1333
|
+
if (!event || typeof event !== 'object') {
|
|
1334
|
+
return;
|
|
1335
|
+
}
|
|
1336
|
+
const record = event as Record<string, unknown>;
|
|
1337
|
+
if (record.type === 'log' && typeof record.message === 'string') {
|
|
1338
|
+
logs.push(record.message);
|
|
1339
|
+
return;
|
|
1340
|
+
}
|
|
1341
|
+
if (record.type === 'result') {
|
|
1342
|
+
result = record.result;
|
|
1343
|
+
outputRows =
|
|
1344
|
+
typeof record.outputRows === 'number' ? record.outputRows : null;
|
|
1345
|
+
return;
|
|
1346
|
+
}
|
|
1347
|
+
if (record.type === 'error') {
|
|
1348
|
+
error = {
|
|
1349
|
+
message:
|
|
1350
|
+
typeof record.message === 'string'
|
|
1351
|
+
? record.message
|
|
1352
|
+
: 'Legacy child Worker failed.',
|
|
1353
|
+
stack: typeof record.stack === 'string' ? record.stack : null,
|
|
1354
|
+
};
|
|
1355
|
+
}
|
|
1356
|
+
};
|
|
1357
|
+
|
|
1358
|
+
for (;;) {
|
|
1359
|
+
const { value, done } = await reader.read();
|
|
1360
|
+
if (value) {
|
|
1361
|
+
buffered += decoder.decode(value, { stream: !done });
|
|
1362
|
+
const lines = buffered.split('\n');
|
|
1363
|
+
buffered = lines.pop() ?? '';
|
|
1364
|
+
for (const line of lines) {
|
|
1365
|
+
consumeLine(line);
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
if (done) {
|
|
1369
|
+
buffered += decoder.decode();
|
|
1370
|
+
consumeLine(buffered);
|
|
1371
|
+
break;
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
return { result, outputRows, logs, error };
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
async function executeChildInline(input: {
|
|
1379
|
+
env: CoordinatorEnv;
|
|
1380
|
+
parentRunId: string;
|
|
1381
|
+
body: Record<string, unknown>;
|
|
1382
|
+
}): Promise<InlineChildSubmitResult> {
|
|
1383
|
+
const startedAt = Date.now();
|
|
1384
|
+
const validated = validateChildSubmitBody(input);
|
|
1385
|
+
if (!validated.ok) {
|
|
1386
|
+
throw new Error(String(validated.error.message ?? 'Invalid child submit.'));
|
|
1387
|
+
}
|
|
1388
|
+
const {
|
|
1389
|
+
manifest,
|
|
1390
|
+
governance,
|
|
1391
|
+
childPlayName,
|
|
1392
|
+
orgId,
|
|
1393
|
+
parentExecutorToken,
|
|
1394
|
+
} = validated;
|
|
1395
|
+
const childRunId = buildChildRunId(childPlayName);
|
|
1396
|
+
const timings: CoordinatorTiming[] = [];
|
|
1397
|
+
const trace = (
|
|
1398
|
+
phase: string,
|
|
1399
|
+
phaseStartedAt: number,
|
|
1400
|
+
extra?: Record<string, unknown>,
|
|
1401
|
+
): void => {
|
|
1402
|
+
const timing: CoordinatorTiming = {
|
|
1403
|
+
phase,
|
|
1404
|
+
ms: Date.now() - phaseStartedAt,
|
|
1405
|
+
graphHash: manifest.graphHash,
|
|
1406
|
+
...(extra ? { extra } : {}),
|
|
1407
|
+
};
|
|
1408
|
+
timings.push(timing);
|
|
1409
|
+
recordCoordinatorPerfTrace({
|
|
1410
|
+
runId: childRunId,
|
|
1411
|
+
phase,
|
|
1412
|
+
ms: timing.ms,
|
|
1413
|
+
graphHash: manifest.graphHash,
|
|
1414
|
+
extra: {
|
|
1415
|
+
parentRunId: input.parentRunId,
|
|
1416
|
+
mode: 'inline_dynamic_worker',
|
|
1417
|
+
...(extra ?? {}),
|
|
1418
|
+
},
|
|
1419
|
+
});
|
|
1420
|
+
};
|
|
1421
|
+
recordCoordinatorPerfTrace({
|
|
1422
|
+
runId: childRunId,
|
|
1423
|
+
phase: 'coordinator.inline_child_boundary',
|
|
1424
|
+
ms: 0,
|
|
1425
|
+
graphHash: manifest.graphHash,
|
|
1426
|
+
extra: {
|
|
1427
|
+
parentRunId: input.parentRunId,
|
|
1428
|
+
childPlayName,
|
|
1429
|
+
action: 'start',
|
|
1430
|
+
mode: 'inline_dynamic_worker',
|
|
1431
|
+
},
|
|
1432
|
+
});
|
|
1433
|
+
|
|
1434
|
+
const tokenStartedAt = Date.now();
|
|
1435
|
+
const childToken = await mintChildWorkflowExecutorToken({
|
|
1436
|
+
env: input.env,
|
|
1437
|
+
baseUrl: resolveRuntimeBaseUrl(input.env, input.body),
|
|
1438
|
+
parentExecutorToken,
|
|
1439
|
+
parentRunId: input.parentRunId,
|
|
1440
|
+
parentPlayName:
|
|
1441
|
+
typeof input.body.parentPlayName === 'string' &&
|
|
1442
|
+
input.body.parentPlayName.trim()
|
|
1443
|
+
? input.body.parentPlayName.trim()
|
|
1444
|
+
: governance.parentPlayName,
|
|
1445
|
+
childRunId,
|
|
1446
|
+
childPlayName,
|
|
1447
|
+
maxCreditsPerRun: manifest.maxCreditsPerRun ?? null,
|
|
1448
|
+
});
|
|
1449
|
+
trace('coordinator.inline_child_token', tokenStartedAt);
|
|
1450
|
+
|
|
1451
|
+
const params = buildChildWorkflowParams({
|
|
1452
|
+
env: input.env,
|
|
1453
|
+
body: input.body,
|
|
1454
|
+
manifest,
|
|
1455
|
+
governance,
|
|
1456
|
+
childRunId,
|
|
1457
|
+
childPlayName,
|
|
1458
|
+
childToken,
|
|
1459
|
+
orgId,
|
|
1460
|
+
coordinatorUrl: null,
|
|
1461
|
+
});
|
|
1462
|
+
const loaderStartedAt = Date.now();
|
|
1463
|
+
const stub = loadDynamicPlayWorker(input.env, {
|
|
1464
|
+
runId: childRunId,
|
|
1465
|
+
graphHash: manifest.graphHash,
|
|
1466
|
+
artifactStorageKey: manifest.artifactStorageKey,
|
|
1467
|
+
artifactHash: manifest.artifactHash,
|
|
1468
|
+
dynamicWorkerCode:
|
|
1469
|
+
typeof manifest.dynamicWorkerCode === 'string'
|
|
1470
|
+
? manifest.dynamicWorkerCode
|
|
1471
|
+
: null,
|
|
1472
|
+
packagedFiles: null,
|
|
1473
|
+
});
|
|
1474
|
+
trace('coordinator.inline_child_loader_get', loaderStartedAt);
|
|
1475
|
+
|
|
1476
|
+
let entrypoint: ReturnType<Awaited<typeof stub>['getEntrypoint']> | null =
|
|
1477
|
+
null;
|
|
1478
|
+
let response: Response | null = null;
|
|
1479
|
+
try {
|
|
1480
|
+
const awaitedStub = await stub;
|
|
1481
|
+
const entrypointStartedAt = Date.now();
|
|
1482
|
+
entrypoint = awaitedStub.getEntrypoint();
|
|
1483
|
+
trace('coordinator.inline_child_get_entrypoint', entrypointStartedAt);
|
|
1484
|
+
const fetchStartedAt = Date.now();
|
|
1485
|
+
response = await entrypoint.fetch(
|
|
1486
|
+
new Request('https://deepline.dynamic.internal/run-inline', {
|
|
1487
|
+
method: 'POST',
|
|
1488
|
+
headers: { 'content-type': 'application/json' },
|
|
1489
|
+
body: JSON.stringify(runRequestFromPlayWorkflowParams(params)),
|
|
1490
|
+
}),
|
|
1491
|
+
);
|
|
1492
|
+
trace('coordinator.inline_child_worker_fetch', fetchStartedAt, {
|
|
1493
|
+
status: response.status,
|
|
1494
|
+
endpoint: '/run-inline',
|
|
1495
|
+
});
|
|
1496
|
+
let usedLegacyRunStream = false;
|
|
1497
|
+
if (response.status === 404) {
|
|
1498
|
+
disposeRpcStub(response);
|
|
1499
|
+
const legacyFetchStartedAt = Date.now();
|
|
1500
|
+
response = await entrypoint.fetch(
|
|
1501
|
+
new Request('https://deepline.dynamic.internal/run', {
|
|
1502
|
+
method: 'POST',
|
|
1503
|
+
headers: { 'content-type': 'application/json' },
|
|
1504
|
+
body: JSON.stringify(runRequestFromPlayWorkflowParams(params)),
|
|
1505
|
+
}),
|
|
1506
|
+
);
|
|
1507
|
+
usedLegacyRunStream = true;
|
|
1508
|
+
trace('coordinator.inline_child_worker_fetch', legacyFetchStartedAt, {
|
|
1509
|
+
status: response.status,
|
|
1510
|
+
endpoint: '/run',
|
|
1511
|
+
compatibility: 'legacy_stream',
|
|
1512
|
+
});
|
|
1513
|
+
}
|
|
1514
|
+
if (!response.ok) {
|
|
1515
|
+
const text = await response.text().catch(() => '');
|
|
1516
|
+
throw new Error(
|
|
1517
|
+
`Inline child Worker failed ${response.status}: ${text.slice(0, 800)}`,
|
|
1518
|
+
);
|
|
1519
|
+
}
|
|
1520
|
+
const responseStartedAt = Date.now();
|
|
1521
|
+
const parsed: InlineWorkerRunResponse = usedLegacyRunStream
|
|
1522
|
+
? await readLegacyRunStream(response).then((legacy) => ({
|
|
1523
|
+
status: legacy.error ? 'failed' : 'completed',
|
|
1524
|
+
result: legacy.result,
|
|
1525
|
+
outputRows: legacy.outputRows ?? undefined,
|
|
1526
|
+
events: legacy.logs.map((message) => ({
|
|
1527
|
+
type: 'log',
|
|
1528
|
+
message,
|
|
1529
|
+
})),
|
|
1530
|
+
error: legacy.error ?? undefined,
|
|
1531
|
+
}))
|
|
1532
|
+
: ((await response.json()) as InlineWorkerRunResponse);
|
|
1533
|
+
const logs = (parsed.events ?? []).flatMap((event) => {
|
|
1534
|
+
if (
|
|
1535
|
+
event &&
|
|
1536
|
+
typeof event === 'object' &&
|
|
1537
|
+
event.type === 'log' &&
|
|
1538
|
+
typeof event.message === 'string'
|
|
1539
|
+
) {
|
|
1540
|
+
return [event.message];
|
|
1541
|
+
}
|
|
1542
|
+
return [];
|
|
1543
|
+
});
|
|
1544
|
+
trace('coordinator.inline_child_response', responseStartedAt, {
|
|
1545
|
+
status: parsed.status ?? null,
|
|
1546
|
+
endpoint: usedLegacyRunStream ? '/run' : '/run-inline',
|
|
1547
|
+
logCount: logs.length,
|
|
1548
|
+
outputRows:
|
|
1549
|
+
typeof parsed.outputRows === 'number' ? parsed.outputRows : null,
|
|
1550
|
+
durationMs:
|
|
1551
|
+
typeof parsed.durationMs === 'number' ? parsed.durationMs : null,
|
|
1552
|
+
});
|
|
1553
|
+
trace('coordinator.inline_child_total', startedAt);
|
|
1554
|
+
if (parsed.status === 'failed' || parsed.error) {
|
|
1555
|
+
const error = {
|
|
1556
|
+
message:
|
|
1557
|
+
typeof parsed.error?.message === 'string'
|
|
1558
|
+
? parsed.error.message
|
|
1559
|
+
: 'Inline child Worker failed.',
|
|
1560
|
+
stack:
|
|
1561
|
+
typeof parsed.error?.stack === 'string' ? parsed.error.stack : null,
|
|
1562
|
+
};
|
|
1563
|
+
recordCoordinatorPerfTrace({
|
|
1564
|
+
runId: childRunId,
|
|
1565
|
+
phase: 'coordinator.inline_child_boundary',
|
|
1566
|
+
ms: Date.now() - startedAt,
|
|
1567
|
+
graphHash: manifest.graphHash,
|
|
1568
|
+
extra: {
|
|
1569
|
+
parentRunId: input.parentRunId,
|
|
1570
|
+
childPlayName,
|
|
1571
|
+
action: 'failed',
|
|
1572
|
+
mode: 'inline_dynamic_worker',
|
|
1573
|
+
error: error.message,
|
|
1574
|
+
},
|
|
1575
|
+
});
|
|
1576
|
+
return {
|
|
1577
|
+
workflowId: childRunId,
|
|
1578
|
+
runId: childRunId,
|
|
1579
|
+
status: 'failed',
|
|
1580
|
+
mode: 'inline_dynamic_worker',
|
|
1581
|
+
error,
|
|
1582
|
+
logs,
|
|
1583
|
+
timings,
|
|
1584
|
+
};
|
|
1585
|
+
}
|
|
1586
|
+
recordCoordinatorPerfTrace({
|
|
1587
|
+
runId: childRunId,
|
|
1588
|
+
phase: 'coordinator.inline_child_boundary',
|
|
1589
|
+
ms: Date.now() - startedAt,
|
|
1590
|
+
graphHash: manifest.graphHash,
|
|
1591
|
+
extra: {
|
|
1592
|
+
parentRunId: input.parentRunId,
|
|
1593
|
+
childPlayName,
|
|
1594
|
+
action: 'completed',
|
|
1595
|
+
mode: 'inline_dynamic_worker',
|
|
1596
|
+
},
|
|
1597
|
+
});
|
|
1598
|
+
return {
|
|
1599
|
+
workflowId: childRunId,
|
|
1600
|
+
runId: childRunId,
|
|
1601
|
+
status: 'completed',
|
|
1602
|
+
mode: 'inline_dynamic_worker',
|
|
1603
|
+
result: parsed.result,
|
|
1604
|
+
output: parsed.result,
|
|
1605
|
+
logs,
|
|
1606
|
+
timings,
|
|
1607
|
+
};
|
|
1608
|
+
} finally {
|
|
1609
|
+
disposeRpcStub(response);
|
|
1610
|
+
disposeRpcStub(entrypoint);
|
|
1611
|
+
disposeRpcStub(await stub.catch(() => null));
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
|
|
1615
|
+
/**
|
|
1616
|
+
* In-process Fetcher handed to each per-graphHash play Worker as
|
|
1617
|
+
* `env.RUNTIME_API`. Runs in the coordinator's isolate (not the play's), so
|
|
1618
|
+
* `fetch(target)` here can reach `http://localhost:3000` directly in dev —
|
|
1619
|
+
* no public *.workers.dev → CF edge → cloudflared → localhost chain.
|
|
1620
|
+
*
|
|
1621
|
+
* Has to be a `WorkerEntrypoint` (not a plain closure) because closures
|
|
1622
|
+
* containing captured state aren't structured-cloneable, and Cloudflare
|
|
1623
|
+
* Workflows serializes the dynamic Worker's `env` map when it persists
|
|
1624
|
+
* workflow state. WorkerEntrypoint stubs ARE cloneable.
|
|
1625
|
+
*
|
|
1626
|
+
* Path allowlist: only `/api/v2/plays/*` and `/api/v2/integrations/*`.
|
|
1627
|
+
* Anything else is a sandbox-escape attempt and gets a loud 403 — the
|
|
1628
|
+
* coordinator must NOT proxy a play's request to internal admin routes
|
|
1629
|
+
* even if the play tries to construct such a URL.
|
|
1630
|
+
*/
|
|
1631
|
+
export class RuntimeApi extends WorkerEntrypoint<CoordinatorEnv, undefined> {
|
|
1632
|
+
async fetch(request: Request): Promise<Response> {
|
|
1633
|
+
const incoming = new URL(request.url);
|
|
1634
|
+
const allowed =
|
|
1635
|
+
incoming.pathname.startsWith('/api/v2/plays/') ||
|
|
1636
|
+
incoming.pathname.startsWith('/api/v2/integrations/');
|
|
1637
|
+
if (!allowed) {
|
|
1638
|
+
return new Response(
|
|
1639
|
+
JSON.stringify({
|
|
1640
|
+
error:
|
|
1641
|
+
'RUNTIME_API binding only forwards /api/v2/plays/* and /api/v2/integrations/* paths',
|
|
1642
|
+
attemptedPath: incoming.pathname,
|
|
1643
|
+
}),
|
|
1644
|
+
{ status: 403, headers: { 'content-type': 'application/json' } },
|
|
1645
|
+
);
|
|
1646
|
+
}
|
|
1647
|
+
const apiBaseUrl =
|
|
1648
|
+
typeof this.env.DEEPLINE_API_BASE_URL === 'string' &&
|
|
1649
|
+
this.env.DEEPLINE_API_BASE_URL.trim()
|
|
1650
|
+
? this.env.DEEPLINE_API_BASE_URL.trim()
|
|
1651
|
+
: 'https://code.deepline.com';
|
|
1652
|
+
const target = new URL(incoming.pathname + incoming.search, apiBaseUrl);
|
|
1653
|
+
const forwarded = new Request(target.toString(), request);
|
|
1654
|
+
const bypassToken = this.env.VERCEL_PROTECTION_BYPASS_TOKEN;
|
|
1655
|
+
if (typeof bypassToken === 'string' && bypassToken) {
|
|
1656
|
+
forwarded.headers.set('x-vercel-protection-bypass', bypassToken);
|
|
1657
|
+
}
|
|
1658
|
+
const res = await fetch(forwarded);
|
|
1659
|
+
if (!res.ok) {
|
|
1660
|
+
const body = await res
|
|
1661
|
+
.clone()
|
|
1662
|
+
.text()
|
|
1663
|
+
.catch(() => '');
|
|
1664
|
+
console.error(
|
|
1665
|
+
`[RUNTIME_API] ${incoming.pathname} failed: status=${res.status} ` +
|
|
1666
|
+
`target=${target.toString()} body=${body.slice(0, 500)}`,
|
|
1667
|
+
);
|
|
1668
|
+
}
|
|
1669
|
+
return res;
|
|
1670
|
+
}
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
export class CoordinatorControl extends WorkerEntrypoint<
|
|
1674
|
+
CoordinatorEnv,
|
|
1675
|
+
undefined
|
|
1676
|
+
> {
|
|
1677
|
+
async submitChild(
|
|
1678
|
+
parentRunId: string,
|
|
1679
|
+
body: Record<string, unknown>,
|
|
1680
|
+
): Promise<InlineChildSubmitResult> {
|
|
1681
|
+
return await executeChildInline({
|
|
1682
|
+
env: this.env,
|
|
1683
|
+
parentRunId,
|
|
1684
|
+
body,
|
|
1685
|
+
});
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
async signal(
|
|
1689
|
+
runId: string,
|
|
1690
|
+
body: Record<string, unknown>,
|
|
1691
|
+
): Promise<Record<string, unknown>> {
|
|
1692
|
+
const response = await handleWorkflowRoute({
|
|
1693
|
+
runId,
|
|
1694
|
+
action: 'signal',
|
|
1695
|
+
request: new Request(
|
|
1696
|
+
`https://deepline.coordinator.internal/workflow/${encodeURIComponent(
|
|
1697
|
+
runId,
|
|
1698
|
+
)}/signal`,
|
|
1699
|
+
{
|
|
1700
|
+
method: 'POST',
|
|
1701
|
+
headers: {
|
|
1702
|
+
'content-type': 'application/json',
|
|
1703
|
+
'x-deepline-request-id': crypto.randomUUID(),
|
|
1704
|
+
},
|
|
1705
|
+
body: JSON.stringify(body),
|
|
1706
|
+
},
|
|
1707
|
+
),
|
|
1708
|
+
env: this.env,
|
|
1709
|
+
});
|
|
1710
|
+
const text = await response.text().catch(() => '');
|
|
1711
|
+
let parsed: Record<string, unknown> = {};
|
|
1712
|
+
try {
|
|
1713
|
+
parsed = text ? (JSON.parse(text) as Record<string, unknown>) : {};
|
|
1714
|
+
} catch {
|
|
1715
|
+
parsed = { error: text };
|
|
1716
|
+
}
|
|
1717
|
+
if (!response.ok) {
|
|
1718
|
+
const error = isRecord(parsed.error) ? parsed.error : null;
|
|
1719
|
+
const message =
|
|
1720
|
+
(typeof error?.message === 'string' && error.message.trim()) ||
|
|
1721
|
+
(typeof parsed.error === 'string' && parsed.error.trim()) ||
|
|
1722
|
+
text.slice(0, 800) ||
|
|
1723
|
+
`Coordinator signal failed with ${response.status}.`;
|
|
1724
|
+
throw new Error(message);
|
|
1725
|
+
}
|
|
1726
|
+
return parsed;
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
async recordPerfTrace(
|
|
1730
|
+
runId: string,
|
|
1731
|
+
payload: CoordinatorPerfTracePayload,
|
|
1732
|
+
): Promise<void> {
|
|
1733
|
+
if (!runId || payload.runId !== runId) {
|
|
1734
|
+
throw new Error('Trace runId mismatch.');
|
|
1735
|
+
}
|
|
1736
|
+
await appendCoordinatorPerfTrace(this.env, payload);
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
/**
|
|
1741
|
+
* Custom DynamicWorkflow class — replaces createDynamicWorkflowEntrypoint
|
|
1742
|
+
* so we can wrap the framework's runner.run(innerEvent, step) RPC in a
|
|
1743
|
+
* try/catch and surface the underlying error. The library's helper hides
|
|
1744
|
+
* that call inside dispatchWorkflow which we now invoke directly with
|
|
1745
|
+
* extra instrumentation around the inner runner call.
|
|
1746
|
+
*/
|
|
1747
|
+
export class DynamicWorkflow extends WorkflowEntrypoint<
|
|
1748
|
+
CoordinatorEnv,
|
|
1749
|
+
Record<string, unknown>
|
|
1750
|
+
> {
|
|
1751
|
+
async run(event: unknown, step: unknown): Promise<unknown> {
|
|
1752
|
+
const trace: CoordinatorPerfTraceSink = (traceEvent) =>
|
|
1753
|
+
recordCoordinatorPerfTraceBuffered(this.env, this.ctx, traceEvent);
|
|
1754
|
+
const workflowEvent = event as {
|
|
1755
|
+
payload?: unknown;
|
|
1756
|
+
timestamp?: Date;
|
|
1757
|
+
instanceId?: string;
|
|
1758
|
+
};
|
|
1759
|
+
const entryTrace = readWorkflowTraceContext(event);
|
|
1760
|
+
trace({
|
|
1761
|
+
runId: entryTrace.runId,
|
|
1762
|
+
phase: 'coordinator.workflow_run_entry',
|
|
1763
|
+
ms: 0,
|
|
1764
|
+
graphHash: entryTrace.graphHash,
|
|
1765
|
+
extra: {
|
|
1766
|
+
instanceId: entryTrace.instanceId,
|
|
1767
|
+
pooledBootstrap: entryTrace.pooledBootstrap,
|
|
1768
|
+
},
|
|
1769
|
+
});
|
|
1770
|
+
let dispatchedEvent = event;
|
|
1771
|
+
if (isPooledWorkflowBootstrapPayload(workflowEvent.payload)) {
|
|
1772
|
+
const waitingStep = step as {
|
|
1773
|
+
waitForEvent<T>(
|
|
1774
|
+
name: string,
|
|
1775
|
+
options: { type: string; timeout?: string | number },
|
|
1776
|
+
): Promise<{ payload: Readonly<T>; timestamp: Date; type: string }>;
|
|
1777
|
+
};
|
|
1778
|
+
const waitStartedAt = Date.now();
|
|
1779
|
+
const startEvent = await waitingStep.waitForEvent<DispatcherEnvelope>(
|
|
1780
|
+
'wait for pooled play start',
|
|
1781
|
+
{ type: WORKFLOW_POOL_START_EVENT_TYPE, timeout: '10 minutes' },
|
|
1782
|
+
);
|
|
1783
|
+
dispatchedEvent = {
|
|
1784
|
+
payload: startEvent.payload,
|
|
1785
|
+
timestamp: startEvent.timestamp,
|
|
1786
|
+
instanceId: workflowEvent.instanceId,
|
|
1787
|
+
};
|
|
1788
|
+
const dispatchedTrace = readWorkflowTraceContext(dispatchedEvent);
|
|
1789
|
+
const eventDeliveryMs = Math.max(
|
|
1790
|
+
0,
|
|
1791
|
+
Date.now() - startEvent.timestamp.getTime(),
|
|
1792
|
+
);
|
|
1793
|
+
trace({
|
|
1794
|
+
runId: dispatchedTrace.runId,
|
|
1795
|
+
phase: 'coordinator.workflow_pool_start_event',
|
|
1796
|
+
ms: eventDeliveryMs,
|
|
1797
|
+
graphHash: dispatchedTrace.graphHash,
|
|
1798
|
+
extra: {
|
|
1799
|
+
instanceId: dispatchedTrace.instanceId,
|
|
1800
|
+
eventType: startEvent.type,
|
|
1801
|
+
poolWaitAgeMs: Date.now() - waitStartedAt,
|
|
1802
|
+
},
|
|
1803
|
+
});
|
|
1804
|
+
}
|
|
1805
|
+
const dispatchTrace = readWorkflowTraceContext(dispatchedEvent);
|
|
1806
|
+
trace({
|
|
1807
|
+
runId: dispatchTrace.runId,
|
|
1808
|
+
phase: 'coordinator.dispatch_workflow_entry',
|
|
1809
|
+
ms: 0,
|
|
1810
|
+
graphHash: dispatchTrace.graphHash,
|
|
1811
|
+
extra: {
|
|
1812
|
+
instanceId: dispatchTrace.instanceId,
|
|
1813
|
+
pooledBootstrap: dispatchTrace.pooledBootstrap,
|
|
1814
|
+
},
|
|
1815
|
+
});
|
|
1816
|
+
|
|
1817
|
+
return dispatchWorkflow(
|
|
1818
|
+
{ env: this.env, ctx: this.ctx },
|
|
1819
|
+
dispatchedEvent as Parameters<typeof dispatchWorkflow>[1],
|
|
1820
|
+
step as Parameters<typeof dispatchWorkflow>[2],
|
|
1821
|
+
async ({ metadata, env }) => {
|
|
1822
|
+
const graphHash = readMetadataString(metadata, 'graphHash');
|
|
1823
|
+
const artifactStorageKey = readMetadataString(
|
|
1824
|
+
metadata,
|
|
1825
|
+
'artifactStorageKey',
|
|
1826
|
+
);
|
|
1827
|
+
const runIdForTrace =
|
|
1828
|
+
typeof (metadata as Record<string, unknown>).runId === 'string'
|
|
1829
|
+
? ((metadata as Record<string, unknown>).runId as string)
|
|
1830
|
+
: graphHash;
|
|
1831
|
+
const loaderStartedAt = Date.now();
|
|
1832
|
+
trace({
|
|
1833
|
+
runId: runIdForTrace,
|
|
1834
|
+
phase: 'coordinator.loader_callback_entry',
|
|
1835
|
+
ms: 0,
|
|
1836
|
+
graphHash,
|
|
1837
|
+
extra: { artifactStorageKey },
|
|
1838
|
+
});
|
|
1839
|
+
const stub = loadDynamicPlayWorkerSync(
|
|
1840
|
+
env,
|
|
1841
|
+
{
|
|
1842
|
+
runId: runIdForTrace,
|
|
1843
|
+
graphHash,
|
|
1844
|
+
artifactStorageKey,
|
|
1845
|
+
artifactHash:
|
|
1846
|
+
typeof metadata.artifactHash === 'string'
|
|
1847
|
+
? metadata.artifactHash
|
|
1848
|
+
: null,
|
|
1849
|
+
dynamicWorkerCode:
|
|
1850
|
+
typeof metadata.dynamicWorkerCode === 'string'
|
|
1851
|
+
? metadata.dynamicWorkerCode
|
|
1852
|
+
: null,
|
|
1853
|
+
packagedFiles: normalizePackagedFiles(metadata.packagedFiles),
|
|
1854
|
+
},
|
|
1855
|
+
trace,
|
|
1856
|
+
);
|
|
1857
|
+
const entrypoint = stub.getEntrypoint(
|
|
1858
|
+
'TenantWorkflow',
|
|
1859
|
+
) as unknown as WorkflowRunner;
|
|
1860
|
+
trace({
|
|
1861
|
+
runId: runIdForTrace,
|
|
1862
|
+
phase: 'coordinator.loader_compile',
|
|
1863
|
+
ms: Date.now() - loaderStartedAt,
|
|
1864
|
+
graphHash,
|
|
1865
|
+
});
|
|
1866
|
+
// Wrap the entrypoint so its run() failure surfaces here rather
|
|
1867
|
+
// than disappearing into the framework's silent rpcMethod=run
|
|
1868
|
+
// exception path.
|
|
1869
|
+
return {
|
|
1870
|
+
run: async (innerEvent: unknown, innerStep: unknown) => {
|
|
1871
|
+
const innerStartedAt = Date.now();
|
|
1872
|
+
trace({
|
|
1873
|
+
runId: runIdForTrace,
|
|
1874
|
+
phase: 'coordinator.runner_run_start',
|
|
1875
|
+
ms: 0,
|
|
1876
|
+
graphHash,
|
|
1877
|
+
});
|
|
1878
|
+
try {
|
|
1879
|
+
const result = await (
|
|
1880
|
+
entrypoint as unknown as {
|
|
1881
|
+
run(e: unknown, s: unknown): Promise<unknown>;
|
|
1882
|
+
}
|
|
1883
|
+
).run(innerEvent, innerStep);
|
|
1884
|
+
trace({
|
|
1885
|
+
runId: runIdForTrace,
|
|
1886
|
+
phase: 'coordinator.runner_run',
|
|
1887
|
+
ms: Date.now() - innerStartedAt,
|
|
1888
|
+
graphHash,
|
|
1889
|
+
});
|
|
1890
|
+
return result;
|
|
1891
|
+
} catch (innerError) {
|
|
1892
|
+
console.error('[coordinator] DynamicWorkflow runner.run threw', {
|
|
1893
|
+
graphHash,
|
|
1894
|
+
message:
|
|
1895
|
+
innerError instanceof Error
|
|
1896
|
+
? innerError.message
|
|
1897
|
+
: String(innerError),
|
|
1898
|
+
name: innerError instanceof Error ? innerError.name : null,
|
|
1899
|
+
stack:
|
|
1900
|
+
innerError instanceof Error &&
|
|
1901
|
+
typeof innerError.stack === 'string'
|
|
1902
|
+
? innerError.stack.split('\n').slice(0, 12).join('\n')
|
|
1903
|
+
: null,
|
|
1904
|
+
});
|
|
1905
|
+
await markWorkflowRuntimeFailure({
|
|
1906
|
+
env,
|
|
1907
|
+
event: innerEvent,
|
|
1908
|
+
error: innerError,
|
|
1909
|
+
}).catch((markError) => {
|
|
1910
|
+
console.error(
|
|
1911
|
+
'[coordinator] failed to forward DynamicWorkflow runner error',
|
|
1912
|
+
{
|
|
1913
|
+
graphHash,
|
|
1914
|
+
message:
|
|
1915
|
+
markError instanceof Error
|
|
1916
|
+
? markError.message
|
|
1917
|
+
: String(markError),
|
|
1918
|
+
},
|
|
1919
|
+
);
|
|
1920
|
+
});
|
|
1921
|
+
throw innerError;
|
|
1922
|
+
}
|
|
1923
|
+
},
|
|
1924
|
+
};
|
|
1925
|
+
},
|
|
1926
|
+
);
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
const coordinatorEntrypoint = {
|
|
1931
|
+
/**
|
|
1932
|
+
* HTTP entrypoint for the Vercel app to dispatch into. Routes:
|
|
1933
|
+
* POST /workflow/{runId}/submit → PLAY_WORKFLOW.create({ id, params })
|
|
1934
|
+
* GET /workflow/{runId}/observe → polling-compatible status snapshot
|
|
1935
|
+
* POST /workflow/{runId}/cancel → Workflow instance terminate
|
|
1936
|
+
* POST /workflow/{runId}/signal → integration_event
|
|
1937
|
+
* GET /workflow/{runId}/result → terminal envelope
|
|
1938
|
+
* GET /health → liveness
|
|
1939
|
+
*/
|
|
1940
|
+
async fetch(
|
|
1941
|
+
request: Request,
|
|
1942
|
+
env: CoordinatorEnv,
|
|
1943
|
+
ctx?: ExecutionContext,
|
|
1944
|
+
): Promise<Response> {
|
|
1945
|
+
const url = new URL(request.url);
|
|
1946
|
+
if (url.pathname === '/health') {
|
|
1947
|
+
if (workflowPoolEnabled(env)) {
|
|
1948
|
+
ctx?.waitUntil(refillWorkflowPool(env).catch(() => undefined));
|
|
1949
|
+
}
|
|
1950
|
+
return new Response('ok', { status: 200 });
|
|
1951
|
+
}
|
|
1952
|
+
if (url.pathname === '/warmup/submit') {
|
|
1953
|
+
return await handleCoordinatorWarmup(request, env, ctx);
|
|
1954
|
+
}
|
|
1955
|
+
if (url.pathname === '/workflow-pool/refill') {
|
|
1956
|
+
const warmupToken = env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
|
|
1957
|
+
if (
|
|
1958
|
+
warmupToken &&
|
|
1959
|
+
request.headers.get('x-vercel-protection-bypass') !== warmupToken
|
|
1960
|
+
) {
|
|
1961
|
+
return new Response('unauthorized', { status: 401 });
|
|
1962
|
+
}
|
|
1963
|
+
const startedAt = Date.now();
|
|
1964
|
+
const minAvailableRaw = Number(url.searchParams.get('minAvailable') ?? '');
|
|
1965
|
+
const waitTimeoutMsRaw = Number(url.searchParams.get('waitTimeoutMs') ?? '');
|
|
1966
|
+
const result = await refillWorkflowPool(env, {
|
|
1967
|
+
waitReady: url.searchParams.get('waitReady') === '1',
|
|
1968
|
+
minAvailable:
|
|
1969
|
+
Number.isFinite(minAvailableRaw) && minAvailableRaw > 0
|
|
1970
|
+
? minAvailableRaw
|
|
1971
|
+
: undefined,
|
|
1972
|
+
waitTimeoutMs:
|
|
1973
|
+
Number.isFinite(waitTimeoutMsRaw) && waitTimeoutMsRaw > 0
|
|
1974
|
+
? waitTimeoutMsRaw
|
|
1975
|
+
: undefined,
|
|
1976
|
+
});
|
|
1977
|
+
return Response.json({
|
|
1978
|
+
ok: true,
|
|
1979
|
+
...result,
|
|
1980
|
+
ms: Date.now() - startedAt,
|
|
1981
|
+
});
|
|
1982
|
+
}
|
|
1983
|
+
if (url.pathname === '/workflow-pool/clear') {
|
|
1984
|
+
const warmupToken = env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
|
|
1985
|
+
if (
|
|
1986
|
+
warmupToken &&
|
|
1987
|
+
request.headers.get('x-vercel-protection-bypass') !== warmupToken
|
|
1988
|
+
) {
|
|
1989
|
+
return new Response('unauthorized', { status: 401 });
|
|
1990
|
+
}
|
|
1991
|
+
const startedAt = Date.now();
|
|
1992
|
+
const deleted = await clearWorkflowPool(env);
|
|
1993
|
+
return Response.json({
|
|
1994
|
+
ok: true,
|
|
1995
|
+
deleted,
|
|
1996
|
+
ms: Date.now() - startedAt,
|
|
1997
|
+
});
|
|
1998
|
+
}
|
|
1999
|
+
|
|
2000
|
+
// Workflow routes: /workflow/{runId}/{action}
|
|
2001
|
+
const wfMatch = url.pathname.match(/^\/workflow\/([^/]+)(?:\/(.+))?$/);
|
|
2002
|
+
if (wfMatch) {
|
|
2003
|
+
const runId = decodeURIComponent(wfMatch[1]!);
|
|
2004
|
+
const action = wfMatch[2] ?? '';
|
|
2005
|
+
return await handleWorkflowRoute({ runId, action, request, env, ctx });
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
// Dedup routes: /dedup/{runId}/{action}
|
|
2009
|
+
const dedupMatch = url.pathname.match(/^\/dedup\/([^/]+)(?:\/(.+))?$/);
|
|
2010
|
+
if (dedupMatch) {
|
|
2011
|
+
const runId = decodeURIComponent(dedupMatch[1]!);
|
|
2012
|
+
const action = dedupMatch[2] ?? '';
|
|
2013
|
+
const doId = env.PLAY_DEDUP.idFromName(`dedup:${runId}`);
|
|
2014
|
+
const stub = env.PLAY_DEDUP.get(doId);
|
|
2015
|
+
const internalUrl = `https://internal/${action}`;
|
|
2016
|
+
return stub.fetch(internalUrl, {
|
|
2017
|
+
method: request.method,
|
|
2018
|
+
headers: request.headers,
|
|
2019
|
+
body:
|
|
2020
|
+
request.method === 'GET' || request.method === 'HEAD'
|
|
2021
|
+
? undefined
|
|
2022
|
+
: request.body,
|
|
2023
|
+
});
|
|
2024
|
+
}
|
|
2025
|
+
|
|
2026
|
+
return new Response('not found', { status: 404 });
|
|
2027
|
+
},
|
|
2028
|
+
async tail(events: unknown[], env: CoordinatorEnv): Promise<void> {
|
|
2029
|
+
await flushTailRunLogs(events, env);
|
|
2030
|
+
},
|
|
2031
|
+
};
|
|
2032
|
+
|
|
2033
|
+
export default coordinatorEntrypoint;
|
|
2034
|
+
|
|
2035
|
+
const RUN_LOG_PREFIX_RE = /\[deepline-run:([^\]]+)\]\s*(.*)/;
|
|
2036
|
+
const RUN_ID_RE = /\bplay\/[^/\s]+\/run\/[0-9a-zTt-]+/;
|
|
2037
|
+
|
|
2038
|
+
async function flushTailRunLogs(
|
|
2039
|
+
events: unknown[],
|
|
2040
|
+
env: CoordinatorEnv,
|
|
2041
|
+
): Promise<void> {
|
|
2042
|
+
const token = env.DEEPLINE_TAIL_LOG_TOKEN?.trim();
|
|
2043
|
+
if (!token) {
|
|
2044
|
+
return;
|
|
2045
|
+
}
|
|
2046
|
+
const grouped = new Map<string, string[]>();
|
|
2047
|
+
for (const event of events) {
|
|
2048
|
+
for (const line of extractTailLogLines(event)) {
|
|
2049
|
+
const parsed = parseRunLogLine(line);
|
|
2050
|
+
if (!parsed) {
|
|
2051
|
+
continue;
|
|
2052
|
+
}
|
|
2053
|
+
const existing = grouped.get(parsed.runId) ?? [];
|
|
2054
|
+
existing.push(parsed.line);
|
|
2055
|
+
grouped.set(parsed.runId, existing);
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
await Promise.all(
|
|
2059
|
+
[...grouped.entries()].map(async ([runId, lines]) => {
|
|
2060
|
+
if (lines.length === 0) {
|
|
2061
|
+
return;
|
|
2062
|
+
}
|
|
2063
|
+
await fetch(
|
|
2064
|
+
`${env.DEEPLINE_API_BASE_URL}/api/v2/plays/internal/tail-log`,
|
|
2065
|
+
{
|
|
2066
|
+
method: 'POST',
|
|
2067
|
+
headers: {
|
|
2068
|
+
'content-type': 'application/json',
|
|
2069
|
+
'x-deepline-tail-log-token': token,
|
|
2070
|
+
},
|
|
2071
|
+
body: JSON.stringify({
|
|
2072
|
+
runId,
|
|
2073
|
+
lines: lines.slice(-100),
|
|
2074
|
+
}),
|
|
2075
|
+
},
|
|
2076
|
+
).catch(() => null);
|
|
2077
|
+
}),
|
|
2078
|
+
);
|
|
2079
|
+
}
|
|
2080
|
+
|
|
2081
|
+
function extractTailLogLines(event: unknown): string[] {
|
|
2082
|
+
const lines: string[] = [];
|
|
2083
|
+
const record =
|
|
2084
|
+
event && typeof event === 'object' && !Array.isArray(event)
|
|
2085
|
+
? (event as Record<string, unknown>)
|
|
2086
|
+
: {};
|
|
2087
|
+
const logs = Array.isArray(record.logs) ? record.logs : [];
|
|
2088
|
+
for (const log of logs) {
|
|
2089
|
+
if (typeof log === 'string') {
|
|
2090
|
+
lines.push(log);
|
|
2091
|
+
continue;
|
|
2092
|
+
}
|
|
2093
|
+
if (!log || typeof log !== 'object' || Array.isArray(log)) {
|
|
2094
|
+
continue;
|
|
2095
|
+
}
|
|
2096
|
+
const logRecord = log as Record<string, unknown>;
|
|
2097
|
+
const message = logRecord.message;
|
|
2098
|
+
if (Array.isArray(message)) {
|
|
2099
|
+
lines.push(message.map(formatTailLogPart).join(' '));
|
|
2100
|
+
} else if (typeof message === 'string') {
|
|
2101
|
+
lines.push(message);
|
|
2102
|
+
}
|
|
2103
|
+
}
|
|
2104
|
+
const exceptions = Array.isArray(record.exceptions) ? record.exceptions : [];
|
|
2105
|
+
for (const exception of exceptions) {
|
|
2106
|
+
lines.push(formatTailLogPart(exception));
|
|
2107
|
+
}
|
|
2108
|
+
return lines.map((line) => line.trim()).filter(Boolean);
|
|
2109
|
+
}
|
|
2110
|
+
|
|
2111
|
+
function formatTailLogPart(value: unknown): string {
|
|
2112
|
+
if (typeof value === 'string') {
|
|
2113
|
+
return value;
|
|
2114
|
+
}
|
|
2115
|
+
if (value instanceof Error) {
|
|
2116
|
+
return value.stack ?? value.message;
|
|
2117
|
+
}
|
|
2118
|
+
try {
|
|
2119
|
+
return JSON.stringify(value);
|
|
2120
|
+
} catch {
|
|
2121
|
+
return String(value);
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
function parseRunLogLine(line: string): { runId: string; line: string } | null {
|
|
2126
|
+
if (line.includes('[perf-trace]')) {
|
|
2127
|
+
return null;
|
|
2128
|
+
}
|
|
2129
|
+
const prefixed = line.match(RUN_LOG_PREFIX_RE);
|
|
2130
|
+
if (prefixed?.[1]) {
|
|
2131
|
+
const message = prefixed[2]?.trim() ?? '';
|
|
2132
|
+
return {
|
|
2133
|
+
runId: prefixed[1],
|
|
2134
|
+
line: message || line,
|
|
2135
|
+
};
|
|
2136
|
+
}
|
|
2137
|
+
const runId = line.match(RUN_ID_RE)?.[0];
|
|
2138
|
+
if (!runId) {
|
|
2139
|
+
return null;
|
|
2140
|
+
}
|
|
2141
|
+
return { runId, line };
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
async function handleWorkflowRoute(input: {
|
|
2145
|
+
runId: string;
|
|
2146
|
+
action: string;
|
|
2147
|
+
request: Request;
|
|
2148
|
+
env: CoordinatorEnv;
|
|
2149
|
+
ctx?: ExecutionContext;
|
|
2150
|
+
}): Promise<Response> {
|
|
2151
|
+
const { runId, action, request, env } = input;
|
|
2152
|
+
const defaultInstanceId = workflowInstanceId(runId);
|
|
2153
|
+
if (action === 'submit') {
|
|
2154
|
+
const submitStartedAt = Date.now();
|
|
2155
|
+
const coordinatorTimings: CoordinatorTiming[] = [];
|
|
2156
|
+
let submittedRunId = runId;
|
|
2157
|
+
const recordSubmitTiming = (timing: CoordinatorTiming): void => {
|
|
2158
|
+
coordinatorTimings.push(timing);
|
|
2159
|
+
recordCoordinatorPerfTraceBuffered(env, input.ctx, {
|
|
2160
|
+
runId: submittedRunId,
|
|
2161
|
+
phase: timing.phase,
|
|
2162
|
+
ms: timing.ms,
|
|
2163
|
+
graphHash: timing.graphHash ?? null,
|
|
2164
|
+
extra: timing.extra,
|
|
2165
|
+
});
|
|
2166
|
+
};
|
|
2167
|
+
const parseStartedAt = Date.now();
|
|
2168
|
+
const params = (await request.json()) as PlayWorkflowParams;
|
|
2169
|
+
submittedRunId = params.runId ?? runId;
|
|
2170
|
+
recordSubmitTiming({
|
|
2171
|
+
phase: 'coordinator.submit_parse_body',
|
|
2172
|
+
ms: Date.now() - parseStartedAt,
|
|
2173
|
+
graphHash: params.graphHash ?? null,
|
|
2174
|
+
extra: {
|
|
2175
|
+
hasDynamicWorkerCode: Boolean(params.dynamicWorkerCode),
|
|
2176
|
+
dynamicWorkerBytes:
|
|
2177
|
+
typeof params.dynamicWorkerCode === 'string'
|
|
2178
|
+
? params.dynamicWorkerCode.length
|
|
2179
|
+
: 0,
|
|
2180
|
+
},
|
|
2181
|
+
});
|
|
2182
|
+
if (!env.LOADER) {
|
|
2183
|
+
throw new Error(
|
|
2184
|
+
'Cloudflare Dynamic Workflows require a Worker Loader binding named LOADER.',
|
|
2185
|
+
);
|
|
2186
|
+
}
|
|
2187
|
+
if (!env.HARNESS) {
|
|
2188
|
+
throw new Error(
|
|
2189
|
+
'Cloudflare Dynamic Workflows require a service binding named HARNESS. ' +
|
|
2190
|
+
'Start apps/play-harness-worker before the coordinator or fix wrangler.toml services.',
|
|
2191
|
+
);
|
|
2192
|
+
}
|
|
2193
|
+
let instance: WorkflowInstance | null = null;
|
|
2194
|
+
try {
|
|
2195
|
+
const dispatchStartedAt = Date.now();
|
|
2196
|
+
const poolStartedAt = Date.now();
|
|
2197
|
+
instance = await submitViaPooledWorkflow({
|
|
2198
|
+
env,
|
|
2199
|
+
params,
|
|
2200
|
+
recordSubmitTiming,
|
|
2201
|
+
});
|
|
2202
|
+
const usedWorkflowPool = Boolean(instance);
|
|
2203
|
+
recordSubmitTiming({
|
|
2204
|
+
phase: 'coordinator.workflow_pool_attempt',
|
|
2205
|
+
ms: Date.now() - poolStartedAt,
|
|
2206
|
+
graphHash: params.graphHash ?? null,
|
|
2207
|
+
extra: { usedPool: usedWorkflowPool },
|
|
2208
|
+
});
|
|
2209
|
+
if (!instance) {
|
|
2210
|
+
const createStartedAt = Date.now();
|
|
2211
|
+
instance = await createDynamicWorkflowInstance({
|
|
2212
|
+
env,
|
|
2213
|
+
id: defaultInstanceId,
|
|
2214
|
+
params,
|
|
2215
|
+
});
|
|
2216
|
+
recordSubmitTiming({
|
|
2217
|
+
phase: 'coordinator.workflow_create',
|
|
2218
|
+
ms: Date.now() - createStartedAt,
|
|
2219
|
+
graphHash: params.graphHash ?? null,
|
|
2220
|
+
extra: { instanceId: instance.id, pooled: false },
|
|
2221
|
+
});
|
|
2222
|
+
} else {
|
|
2223
|
+
recordSubmitTiming({
|
|
2224
|
+
phase: 'coordinator.workflow_create',
|
|
2225
|
+
ms: 0,
|
|
2226
|
+
graphHash: params.graphHash ?? null,
|
|
2227
|
+
extra: { instanceId: instance.id, pooled: true },
|
|
2228
|
+
});
|
|
2229
|
+
}
|
|
2230
|
+
recordSubmitTiming({
|
|
2231
|
+
phase: 'coordinator.dispatch_workflow',
|
|
2232
|
+
ms: Date.now() - dispatchStartedAt,
|
|
2233
|
+
graphHash: params.graphHash ?? null,
|
|
2234
|
+
extra: { pooled: usedWorkflowPool },
|
|
2235
|
+
});
|
|
2236
|
+
const totalMs = Date.now() - submitStartedAt;
|
|
2237
|
+
recordSubmitTiming({
|
|
2238
|
+
phase: 'coordinator.submit_total',
|
|
2239
|
+
ms: totalMs,
|
|
2240
|
+
graphHash: params.graphHash ?? null,
|
|
2241
|
+
});
|
|
2242
|
+
recordSubmitTiming({
|
|
2243
|
+
phase: 'coordinator.submit_accepted',
|
|
2244
|
+
ms: totalMs,
|
|
2245
|
+
graphHash: params.graphHash ?? null,
|
|
2246
|
+
});
|
|
2247
|
+
return Response.json({
|
|
2248
|
+
runId,
|
|
2249
|
+
status: 'submitted',
|
|
2250
|
+
instanceState: null,
|
|
2251
|
+
coordinatorTimings,
|
|
2252
|
+
});
|
|
2253
|
+
} finally {
|
|
2254
|
+
input.ctx?.waitUntil(refillWorkflowPool(env).catch(() => undefined));
|
|
2255
|
+
disposeRpcStub(instance);
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
|
|
2259
|
+
if (action === 'submit-child') {
|
|
2260
|
+
const startedAt = Date.now();
|
|
2261
|
+
try {
|
|
2262
|
+
const body = (await request.json().catch(() => null)) as Record<
|
|
2263
|
+
string,
|
|
2264
|
+
unknown
|
|
2265
|
+
> | null;
|
|
2266
|
+
if (!body || typeof body !== 'object') {
|
|
2267
|
+
return Response.json(
|
|
2268
|
+
{
|
|
2269
|
+
error: {
|
|
2270
|
+
code: 'CHILD_SUBMIT_BODY_REQUIRED',
|
|
2271
|
+
message: 'submit-child requires a JSON body.',
|
|
2272
|
+
phase: 'coordinator_child_submit',
|
|
2273
|
+
parentRunId: runId,
|
|
2274
|
+
},
|
|
2275
|
+
},
|
|
2276
|
+
{ status: 400 },
|
|
2277
|
+
);
|
|
2278
|
+
}
|
|
2279
|
+
const manifest = body.manifest as PlayRuntimeManifest | undefined;
|
|
2280
|
+
const governance = body.internalRunPlay as
|
|
2281
|
+
| PlayCallGovernanceSnapshot
|
|
2282
|
+
| undefined;
|
|
2283
|
+
const childPlayName =
|
|
2284
|
+
typeof body.name === 'string' && body.name.trim()
|
|
2285
|
+
? body.name.trim()
|
|
2286
|
+
: manifest?.playName?.trim();
|
|
2287
|
+
if (
|
|
2288
|
+
!manifest ||
|
|
2289
|
+
!childPlayName ||
|
|
2290
|
+
!manifest.artifactStorageKey ||
|
|
2291
|
+
!manifest.artifactHash ||
|
|
2292
|
+
!manifest.graphHash ||
|
|
2293
|
+
!governance
|
|
2294
|
+
) {
|
|
2295
|
+
return Response.json(
|
|
2296
|
+
{
|
|
2297
|
+
error: {
|
|
2298
|
+
code: 'CHILD_MANIFEST_REQUIRED',
|
|
2299
|
+
message:
|
|
2300
|
+
'submit-child requires a trusted child manifest and lineage.',
|
|
2301
|
+
phase: 'coordinator_child_submit',
|
|
2302
|
+
parentRunId: runId,
|
|
2303
|
+
},
|
|
2304
|
+
},
|
|
2305
|
+
{ status: 400 },
|
|
2306
|
+
);
|
|
2307
|
+
}
|
|
2308
|
+
const childRunId = buildChildRunId(childPlayName);
|
|
2309
|
+
const orgId = typeof body.orgId === 'string' ? body.orgId : '';
|
|
2310
|
+
if (!orgId) {
|
|
2311
|
+
return Response.json(
|
|
2312
|
+
{
|
|
2313
|
+
error: {
|
|
2314
|
+
code: 'CHILD_ORG_REQUIRED',
|
|
2315
|
+
message: 'submit-child requires orgId from the parent runtime.',
|
|
2316
|
+
phase: 'coordinator_child_submit',
|
|
2317
|
+
parentRunId: runId,
|
|
2318
|
+
},
|
|
2319
|
+
},
|
|
2320
|
+
{ status: 400 },
|
|
2321
|
+
);
|
|
2322
|
+
}
|
|
2323
|
+
const parentExecutorToken =
|
|
2324
|
+
typeof body.parentExecutorToken === 'string'
|
|
2325
|
+
? body.parentExecutorToken.trim()
|
|
2326
|
+
: '';
|
|
2327
|
+
if (!parentExecutorToken) {
|
|
2328
|
+
return Response.json(
|
|
2329
|
+
{
|
|
2330
|
+
error: {
|
|
2331
|
+
code: 'PARENT_EXECUTOR_TOKEN_REQUIRED',
|
|
2332
|
+
message:
|
|
2333
|
+
'submit-child requires the parent executor token for origin-scoped child token minting.',
|
|
2334
|
+
phase: 'coordinator_child_submit',
|
|
2335
|
+
parentRunId: runId,
|
|
2336
|
+
},
|
|
2337
|
+
},
|
|
2338
|
+
{ status: 400 },
|
|
2339
|
+
);
|
|
2340
|
+
}
|
|
2341
|
+
const childToken = await mintChildWorkflowExecutorToken({
|
|
2342
|
+
env,
|
|
2343
|
+
baseUrl: resolveRuntimeBaseUrl(env, body),
|
|
2344
|
+
parentExecutorToken,
|
|
2345
|
+
parentRunId: runId,
|
|
2346
|
+
parentPlayName:
|
|
2347
|
+
typeof body.parentPlayName === 'string' && body.parentPlayName.trim()
|
|
2348
|
+
? body.parentPlayName.trim()
|
|
2349
|
+
: governance.parentPlayName,
|
|
2350
|
+
childRunId,
|
|
2351
|
+
childPlayName,
|
|
2352
|
+
maxCreditsPerRun: manifest.maxCreditsPerRun ?? null,
|
|
2353
|
+
});
|
|
2354
|
+
const submitResponse = await handleWorkflowRoute({
|
|
2355
|
+
runId: childRunId,
|
|
2356
|
+
action: 'submit',
|
|
2357
|
+
request: new Request(
|
|
2358
|
+
`https://deepline.coordinator.internal/workflow/${encodeURIComponent(
|
|
2359
|
+
childRunId,
|
|
2360
|
+
)}/submit`,
|
|
2361
|
+
{
|
|
2362
|
+
method: 'POST',
|
|
2363
|
+
headers: { 'content-type': 'application/json' },
|
|
2364
|
+
body: JSON.stringify({
|
|
2365
|
+
runId: childRunId,
|
|
2366
|
+
playId: childRunId,
|
|
2367
|
+
playName: childPlayName,
|
|
2368
|
+
artifactStorageKey: manifest.artifactStorageKey,
|
|
2369
|
+
artifactHash: manifest.artifactHash,
|
|
2370
|
+
graphHash: manifest.graphHash,
|
|
2371
|
+
input: isRecord(body.input) ? body.input : {},
|
|
2372
|
+
contractSnapshot: {
|
|
2373
|
+
source: 'published',
|
|
2374
|
+
revisionVersion: null,
|
|
2375
|
+
staticPipeline: manifest.staticPipeline,
|
|
2376
|
+
billingLimit:
|
|
2377
|
+
typeof manifest.maxCreditsPerRun === 'number'
|
|
2378
|
+
? { maxCreditsPerRun: manifest.maxCreditsPerRun }
|
|
2379
|
+
: null,
|
|
2380
|
+
sourceCode: manifest.sourceCode ?? '',
|
|
2381
|
+
artifactMetadata: {
|
|
2382
|
+
storageKey: manifest.artifactStorageKey,
|
|
2383
|
+
artifactHash: manifest.artifactHash,
|
|
2384
|
+
graphHash: manifest.graphHash,
|
|
2385
|
+
},
|
|
2386
|
+
codeFormat: 'cjs_module',
|
|
2387
|
+
compatibility: {
|
|
2388
|
+
apiVersion: 2,
|
|
2389
|
+
runtimeBackend: 'workers_edge',
|
|
2390
|
+
},
|
|
2391
|
+
},
|
|
2392
|
+
executionPlan: null,
|
|
2393
|
+
childPlayManifests: isRecord(body.childPlayManifests)
|
|
2394
|
+
? (body.childPlayManifests as PlayRuntimeManifestMap)
|
|
2395
|
+
: null,
|
|
2396
|
+
playCallGovernance: governance,
|
|
2397
|
+
dynamicWorkerCode: null,
|
|
2398
|
+
executorToken: childToken,
|
|
2399
|
+
baseUrl: resolveRuntimeBaseUrl(env, body),
|
|
2400
|
+
orgId,
|
|
2401
|
+
userEmail:
|
|
2402
|
+
typeof body.userEmail === 'string' ? body.userEmail : '',
|
|
2403
|
+
userId: typeof body.userId === 'string' ? body.userId : null,
|
|
2404
|
+
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
2405
|
+
dedupBackend: 'in_memory',
|
|
2406
|
+
coordinatorUrl: new URL(request.url).origin,
|
|
2407
|
+
} satisfies PlayWorkflowParams),
|
|
2408
|
+
},
|
|
2409
|
+
),
|
|
2410
|
+
env,
|
|
2411
|
+
});
|
|
2412
|
+
const responseText = await submitResponse.text().catch(() => '');
|
|
2413
|
+
recordCoordinatorPerfTrace({
|
|
2414
|
+
runId,
|
|
2415
|
+
phase: 'coordinator.child_submit',
|
|
2416
|
+
ms: Date.now() - startedAt,
|
|
2417
|
+
extra: { status: submitResponse.status, childRunId },
|
|
2418
|
+
});
|
|
2419
|
+
console.info('[play.runtime.span]', {
|
|
2420
|
+
event: 'play.runtime.span',
|
|
2421
|
+
phase: 'coordinator_child_submit',
|
|
2422
|
+
runId,
|
|
2423
|
+
parentRunId: runId,
|
|
2424
|
+
childRunId,
|
|
2425
|
+
playName: childPlayName,
|
|
2426
|
+
ms: Date.now() - startedAt,
|
|
2427
|
+
status: submitResponse.ok ? 'ok' : 'failed',
|
|
2428
|
+
...(submitResponse.ok
|
|
2429
|
+
? {}
|
|
2430
|
+
: { errorCode: 'COORDINATOR_CHILD_SUBMIT_FAILED' }),
|
|
2431
|
+
});
|
|
2432
|
+
if (!submitResponse.ok) {
|
|
2433
|
+
return new Response(responseText, {
|
|
2434
|
+
status: submitResponse.status,
|
|
2435
|
+
headers: {
|
|
2436
|
+
'content-type':
|
|
2437
|
+
submitResponse.headers.get('content-type') ?? 'application/json',
|
|
2438
|
+
'cache-control': 'no-store',
|
|
2439
|
+
},
|
|
2440
|
+
});
|
|
2441
|
+
}
|
|
2442
|
+
return Response.json(
|
|
2443
|
+
{
|
|
2444
|
+
workflowId: childRunId,
|
|
2445
|
+
runId: childRunId,
|
|
2446
|
+
status: 'started',
|
|
2447
|
+
coordinator: JSON.parse(responseText || '{}'),
|
|
2448
|
+
},
|
|
2449
|
+
{
|
|
2450
|
+
headers: {
|
|
2451
|
+
'cache-control': 'no-store',
|
|
2452
|
+
},
|
|
2453
|
+
},
|
|
2454
|
+
);
|
|
2455
|
+
} catch (error) {
|
|
2456
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2457
|
+
console.error('[coordinator.child_submit.error]', {
|
|
2458
|
+
runId,
|
|
2459
|
+
ms: Date.now() - startedAt,
|
|
2460
|
+
error: message,
|
|
2461
|
+
});
|
|
2462
|
+
return Response.json(
|
|
2463
|
+
{
|
|
2464
|
+
error: {
|
|
2465
|
+
code: 'COORDINATOR_CHILD_SUBMIT_FAILED',
|
|
2466
|
+
message,
|
|
2467
|
+
phase: 'coordinator_child_submit',
|
|
2468
|
+
parentRunId: runId,
|
|
2469
|
+
},
|
|
2470
|
+
},
|
|
2471
|
+
{
|
|
2472
|
+
status: 500,
|
|
2473
|
+
headers: { 'cache-control': 'no-store' },
|
|
2474
|
+
},
|
|
2475
|
+
);
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
|
|
2479
|
+
// get() throws if the instance doesn't exist (Workflows local-mode wipes
|
|
2480
|
+
// state on wrangler dev reload, and superseded `--force` runs may target
|
|
2481
|
+
// an instance that was never created). Treat that as a no-op cancel.
|
|
2482
|
+
let instance: WorkflowInstance | null = null;
|
|
2483
|
+
try {
|
|
2484
|
+
const instanceId = await resolveWorkflowInstanceIdForRun(env, runId);
|
|
2485
|
+
instance = await env.PLAY_WORKFLOW.get(instanceId);
|
|
2486
|
+
} catch (error) {
|
|
2487
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2488
|
+
if (
|
|
2489
|
+
!/not[ _]found|not_found|does not exist|no such instance|404/i.test(
|
|
2490
|
+
message,
|
|
2491
|
+
)
|
|
2492
|
+
) {
|
|
2493
|
+
throw error;
|
|
2494
|
+
}
|
|
2495
|
+
}
|
|
2496
|
+
try {
|
|
2497
|
+
if (action === 'cancel') {
|
|
2498
|
+
if (!instance) {
|
|
2499
|
+
return Response.json({ runId, status: 'cancelled' });
|
|
2500
|
+
}
|
|
2501
|
+
try {
|
|
2502
|
+
await instance.terminate();
|
|
2503
|
+
} catch (error) {
|
|
2504
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2505
|
+
// Tolerate four classes of error here:
|
|
2506
|
+
// - already-terminal (complete / errored / terminated)
|
|
2507
|
+
// - "Cannot terminate instance since its on a finite state"
|
|
2508
|
+
// (the runtime's wording for "already finished")
|
|
2509
|
+
// - "not implemented" (wrangler dev local mode doesn't support
|
|
2510
|
+
// instance.terminate() yet — silently no-op there)
|
|
2511
|
+
// - "not found" (instance never existed)
|
|
2512
|
+
if (
|
|
2513
|
+
!/complete|terminated|errored|finite state|cannot[ _]terminate|not[ _]implemented|not[ _]found|404/i.test(
|
|
2514
|
+
message,
|
|
2515
|
+
)
|
|
2516
|
+
) {
|
|
2517
|
+
throw error;
|
|
2518
|
+
}
|
|
2519
|
+
}
|
|
2520
|
+
return Response.json({ runId, status: 'cancelled' });
|
|
2521
|
+
}
|
|
2522
|
+
if (!instance) {
|
|
2523
|
+
return new Response('not found', { status: 404 });
|
|
2524
|
+
}
|
|
2525
|
+
if (action === 'signal') {
|
|
2526
|
+
const body = (await request.json().catch(() => ({}))) as Record<
|
|
2527
|
+
string,
|
|
2528
|
+
unknown
|
|
2529
|
+
>;
|
|
2530
|
+
const eventKey =
|
|
2531
|
+
typeof body.eventKey === 'string' && body.eventKey.trim()
|
|
2532
|
+
? body.eventKey.trim()
|
|
2533
|
+
: typeof body.event_key === 'string' && body.event_key.trim()
|
|
2534
|
+
? body.event_key.trim()
|
|
2535
|
+
: '';
|
|
2536
|
+
const eventType =
|
|
2537
|
+
typeof body.type === 'string' && body.type.trim()
|
|
2538
|
+
? body.type.trim()
|
|
2539
|
+
: eventKey
|
|
2540
|
+
? `integration_event_${eventKey}`
|
|
2541
|
+
: 'integration_event';
|
|
2542
|
+
await instance.sendEvent({
|
|
2543
|
+
type: workflowEventType(eventType),
|
|
2544
|
+
payload: body,
|
|
2545
|
+
});
|
|
2546
|
+
return Response.json({
|
|
2547
|
+
runId,
|
|
2548
|
+
status: mapWorkflowStatus(await instance.status()),
|
|
2549
|
+
});
|
|
2550
|
+
}
|
|
2551
|
+
if (
|
|
2552
|
+
action === 'result' ||
|
|
2553
|
+
action === 'status' ||
|
|
2554
|
+
action === 'observe' ||
|
|
2555
|
+
action === ''
|
|
2556
|
+
) {
|
|
2557
|
+
const observeWaitMs =
|
|
2558
|
+
action === 'observe'
|
|
2559
|
+
? Math.min(
|
|
2560
|
+
Math.max(
|
|
2561
|
+
Number(new URL(request.url).searchParams.get('waitMs') ?? '0'),
|
|
2562
|
+
0,
|
|
2563
|
+
),
|
|
2564
|
+
2_000,
|
|
2565
|
+
)
|
|
2566
|
+
: 0;
|
|
2567
|
+
const includeTrace =
|
|
2568
|
+
new URL(request.url).searchParams.get('trace') === '1';
|
|
2569
|
+
const statusStartedAt = Date.now();
|
|
2570
|
+
let status = await instance.status();
|
|
2571
|
+
let statusPolls = 1;
|
|
2572
|
+
while (
|
|
2573
|
+
observeWaitMs > 0 &&
|
|
2574
|
+
Date.now() - statusStartedAt < observeWaitMs
|
|
2575
|
+
) {
|
|
2576
|
+
const result = mapWorkflowResult(runId, status);
|
|
2577
|
+
if (
|
|
2578
|
+
result.status === 'completed' ||
|
|
2579
|
+
result.status === 'failed' ||
|
|
2580
|
+
result.status === 'cancelled'
|
|
2581
|
+
) {
|
|
2582
|
+
break;
|
|
2583
|
+
}
|
|
2584
|
+
await new Promise((resolve) => setTimeout(resolve, 75));
|
|
2585
|
+
status = await instance.status();
|
|
2586
|
+
statusPolls += 1;
|
|
2587
|
+
}
|
|
2588
|
+
const result = mapWorkflowResult(runId, status);
|
|
2589
|
+
const observeMs = Date.now() - statusStartedAt;
|
|
2590
|
+
// If we forced a permanent-error fail-fast (status='failed' even
|
|
2591
|
+
// though the underlying instance is still 'running' mid-retry),
|
|
2592
|
+
// terminate on the CF side once so the retry loop stops burning
|
|
2593
|
+
// compute. Best-effort: tolerate races where the instance has
|
|
2594
|
+
// already gone terminal between our status() and terminate().
|
|
2595
|
+
if (
|
|
2596
|
+
result.status === 'failed' &&
|
|
2597
|
+
status.status !== 'errored' &&
|
|
2598
|
+
status.status !== 'terminated' &&
|
|
2599
|
+
status.status !== 'complete'
|
|
2600
|
+
) {
|
|
2601
|
+
await instance.terminate().catch((error: unknown) => {
|
|
2602
|
+
const message =
|
|
2603
|
+
error instanceof Error ? error.message : String(error);
|
|
2604
|
+
if (
|
|
2605
|
+
!/complete|terminated|errored|finite state|cannot[ _]terminate|not[ _]implemented|not[ _]found|404/i.test(
|
|
2606
|
+
message,
|
|
2607
|
+
)
|
|
2608
|
+
) {
|
|
2609
|
+
console.warn(
|
|
2610
|
+
'[coordinator] terminate-after-permanent-error failed',
|
|
2611
|
+
{
|
|
2612
|
+
runId,
|
|
2613
|
+
error: message,
|
|
2614
|
+
},
|
|
2615
|
+
);
|
|
2616
|
+
}
|
|
2617
|
+
});
|
|
2618
|
+
}
|
|
2619
|
+
const coordinatorTrace = includeTrace
|
|
2620
|
+
? await listCoordinatorPerfTrace(env, runId).catch(() => [])
|
|
2621
|
+
: [];
|
|
2622
|
+
return Response.json({
|
|
2623
|
+
...result,
|
|
2624
|
+
coordinatorObserve: {
|
|
2625
|
+
ms: observeMs,
|
|
2626
|
+
waitMs: observeWaitMs,
|
|
2627
|
+
workflowStatus: status.status,
|
|
2628
|
+
statusPolls,
|
|
2629
|
+
instanceId: instance.id,
|
|
2630
|
+
},
|
|
2631
|
+
...(includeTrace ? { coordinatorTrace } : {}),
|
|
2632
|
+
});
|
|
2633
|
+
}
|
|
2634
|
+
return new Response('not found', { status: 404 });
|
|
2635
|
+
} finally {
|
|
2636
|
+
disposeRpcStub(instance);
|
|
2637
|
+
}
|
|
2638
|
+
}
|
|
2639
|
+
|
|
2640
|
+
function workflowInstanceId(runId: string): string {
|
|
2641
|
+
const normalized = runId.toLowerCase().replace(/[^a-z0-9_-]+/g, '-');
|
|
2642
|
+
if (normalized.length > 0 && normalized.length <= 100) {
|
|
2643
|
+
return normalized;
|
|
2644
|
+
}
|
|
2645
|
+
return `run-${stableHash(runId)}`;
|
|
2646
|
+
}
|
|
2647
|
+
|
|
2648
|
+
function stableHash(value: string): string {
|
|
2649
|
+
let hash = 2166136261;
|
|
2650
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
2651
|
+
hash ^= value.charCodeAt(index);
|
|
2652
|
+
hash = Math.imul(hash, 16777619);
|
|
2653
|
+
}
|
|
2654
|
+
return (hash >>> 0).toString(36);
|
|
2655
|
+
}
|
|
2656
|
+
|
|
2657
|
+
/**
|
|
2658
|
+
* Synchronous wrapper around env.LOADER.get for use inside the
|
|
2659
|
+
* createDynamicWorkflowEntrypoint loader callback. The framework's loader
|
|
2660
|
+
* type allows async, but the official example
|
|
2661
|
+
* (cloudflare/dynamic-workflows repo) uses sync; the microtask hop a
|
|
2662
|
+
* top-level `await` introduces appears to interact badly with how the
|
|
2663
|
+
* framework wires up the returned entrypoint stub for runner.run() RPC.
|
|
2664
|
+
*/
|
|
2665
|
+
function loadDynamicPlayWorkerSync(
|
|
2666
|
+
env: CoordinatorEnv,
|
|
2667
|
+
metadata: DynamicWorkflowMetadata,
|
|
2668
|
+
trace: CoordinatorPerfTraceSink = recordCoordinatorPerfTrace,
|
|
2669
|
+
): {
|
|
2670
|
+
getEntrypoint(name?: string): Fetcher & WorkflowRunner;
|
|
2671
|
+
} {
|
|
2672
|
+
if (!env.LOADER) {
|
|
2673
|
+
throw new Error(
|
|
2674
|
+
'Cloudflare Dynamic Workflows require a Worker Loader binding named LOADER.',
|
|
2675
|
+
);
|
|
2676
|
+
}
|
|
2677
|
+
if (!env.HARNESS) {
|
|
2678
|
+
throw new Error(
|
|
2679
|
+
'Cloudflare Dynamic Workflows require a service binding named HARNESS. ' +
|
|
2680
|
+
'Start apps/play-harness-worker before the coordinator or fix wrangler.toml services.',
|
|
2681
|
+
);
|
|
2682
|
+
}
|
|
2683
|
+
const graphHash = metadata.graphHash.trim();
|
|
2684
|
+
if (!graphHash) {
|
|
2685
|
+
throw new Error('Dynamic play worker requires graphHash metadata.');
|
|
2686
|
+
}
|
|
2687
|
+
const artifactStorageKey = metadata.artifactStorageKey.trim();
|
|
2688
|
+
if (!artifactStorageKey) {
|
|
2689
|
+
throw new Error(
|
|
2690
|
+
'Dynamic play worker requires artifactStorageKey metadata.',
|
|
2691
|
+
);
|
|
2692
|
+
}
|
|
2693
|
+
const artifactIdentity =
|
|
2694
|
+
metadata.artifactHash?.trim() || stableHash(artifactStorageKey);
|
|
2695
|
+
const workerCacheKey = `play:${graphHash}:${artifactIdentity}:harness=h3`;
|
|
2696
|
+
const runIdForTrace = metadata.runId ?? graphHash;
|
|
2697
|
+
const loaderGetStartedAt = Date.now();
|
|
2698
|
+
const stub = env.LOADER.get(workerCacheKey, async () => {
|
|
2699
|
+
const bundledCode = await loadDynamicWorkerBundledCode({
|
|
2700
|
+
env,
|
|
2701
|
+
metadata,
|
|
2702
|
+
graphHash,
|
|
2703
|
+
artifactStorageKey,
|
|
2704
|
+
workerCacheKey,
|
|
2705
|
+
runIdForTrace,
|
|
2706
|
+
trace,
|
|
2707
|
+
});
|
|
2708
|
+
return {
|
|
2709
|
+
compatibilityDate: DYNAMIC_WORKER_COMPATIBILITY_DATE,
|
|
2710
|
+
compatibilityFlags: ['nodejs_compat'],
|
|
2711
|
+
mainModule: 'index.js',
|
|
2712
|
+
modules: { 'index.js': bundledCode },
|
|
2713
|
+
env: {
|
|
2714
|
+
// Service binding to the long-lived Play Harness Worker.
|
|
2715
|
+
// Per-play code reaches this via `env.HARNESS.<method>(...)` —
|
|
2716
|
+
// see sdk/src/plays/harness-stub.ts. May be undefined in dev
|
|
2717
|
+
// environments where the harness Worker isn't running yet, in
|
|
2718
|
+
// which case the per-play stub throws a loud error on first use
|
|
2719
|
+
// (no silent fallbacks — see harness-stub.ts → requireBinding).
|
|
2720
|
+
HARNESS: env.HARNESS,
|
|
2721
|
+
// NOTE: We intentionally do NOT pass `env.PLAYS_BUCKET` (an R2Bucket
|
|
2722
|
+
// binding) through to the per-play Worker's env. Including a raw
|
|
2723
|
+
// R2Bucket in the dynamically-loaded Worker's env makes Cloudflare
|
|
2724
|
+
// Workflows fail at workflow init with
|
|
2725
|
+
// "Could not serialize object of type 'R2Bucket'."
|
|
2726
|
+
// because the framework persists workflow state through a JSDevalue
|
|
2727
|
+
// path that R2Bucket bindings do not support. Per-play harness reads
|
|
2728
|
+
// (CSVs, packaged imports) go through the long-lived HARNESS Worker.
|
|
2729
|
+
},
|
|
2730
|
+
};
|
|
2731
|
+
});
|
|
2732
|
+
trace({
|
|
2733
|
+
runId: runIdForTrace,
|
|
2734
|
+
phase: 'coordinator.loader_get_call',
|
|
2735
|
+
ms: Date.now() - loaderGetStartedAt,
|
|
2736
|
+
graphHash,
|
|
2737
|
+
extra: { workerCacheKey },
|
|
2738
|
+
});
|
|
2739
|
+
return stub;
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2742
|
+
async function loadDynamicPlayWorker(
|
|
2743
|
+
env: CoordinatorEnv,
|
|
2744
|
+
metadata: DynamicWorkflowMetadata,
|
|
2745
|
+
trace: CoordinatorPerfTraceSink = recordCoordinatorPerfTrace,
|
|
2746
|
+
): Promise<{
|
|
2747
|
+
getEntrypoint(name?: string): Fetcher & WorkflowRunner;
|
|
2748
|
+
}> {
|
|
2749
|
+
if (!env.LOADER) {
|
|
2750
|
+
throw new Error(
|
|
2751
|
+
'Cloudflare Dynamic Workflows require a Worker Loader binding named LOADER.',
|
|
2752
|
+
);
|
|
2753
|
+
}
|
|
2754
|
+
if (!env.HARNESS) {
|
|
2755
|
+
throw new Error(
|
|
2756
|
+
'Cloudflare Dynamic Workflows require a service binding named HARNESS. ' +
|
|
2757
|
+
'Start apps/play-harness-worker before the coordinator or fix wrangler.toml services.',
|
|
2758
|
+
);
|
|
2759
|
+
}
|
|
2760
|
+
const graphHash = metadata.graphHash.trim();
|
|
2761
|
+
if (!graphHash) {
|
|
2762
|
+
throw new Error('Dynamic play worker requires graphHash metadata.');
|
|
2763
|
+
}
|
|
2764
|
+
const artifactStorageKey = metadata.artifactStorageKey.trim();
|
|
2765
|
+
if (!artifactStorageKey) {
|
|
2766
|
+
throw new Error(
|
|
2767
|
+
'Dynamic play worker requires artifactStorageKey metadata.',
|
|
2768
|
+
);
|
|
2769
|
+
}
|
|
2770
|
+
const artifactIdentity =
|
|
2771
|
+
metadata.artifactHash?.trim() || stableHash(artifactStorageKey);
|
|
2772
|
+
const workerCacheKey = `play:${graphHash}:${artifactIdentity}:harness=h3`;
|
|
2773
|
+
const runIdForTrace = metadata.runId ?? graphHash;
|
|
2774
|
+
const loaderGetStartedAt = Date.now();
|
|
2775
|
+
const stub = env.LOADER.get(workerCacheKey, async () => {
|
|
2776
|
+
const bundledCode = await loadDynamicWorkerBundledCode({
|
|
2777
|
+
env,
|
|
2778
|
+
metadata,
|
|
2779
|
+
graphHash,
|
|
2780
|
+
artifactStorageKey,
|
|
2781
|
+
workerCacheKey,
|
|
2782
|
+
runIdForTrace,
|
|
2783
|
+
trace,
|
|
2784
|
+
});
|
|
2785
|
+
return {
|
|
2786
|
+
compatibilityDate: DYNAMIC_WORKER_COMPATIBILITY_DATE,
|
|
2787
|
+
compatibilityFlags: ['nodejs_compat'],
|
|
2788
|
+
mainModule: 'index.js',
|
|
2789
|
+
modules: { 'index.js': bundledCode },
|
|
2790
|
+
env: {
|
|
2791
|
+
// Mirror of the sync loader (above) — see that copy for the
|
|
2792
|
+
// architectural rationale. The dynamic worker env is intentionally
|
|
2793
|
+
// minimal; runtime callbacks and file reads go through HARNESS, and
|
|
2794
|
+
// child workflow control uses the coordinator URL in the run request.
|
|
2795
|
+
HARNESS: env.HARNESS,
|
|
2796
|
+
},
|
|
2797
|
+
};
|
|
2798
|
+
});
|
|
2799
|
+
trace({
|
|
2800
|
+
runId: runIdForTrace,
|
|
2801
|
+
phase: 'coordinator.loader_get_call',
|
|
2802
|
+
ms: Date.now() - loaderGetStartedAt,
|
|
2803
|
+
graphHash,
|
|
2804
|
+
extra: { workerCacheKey },
|
|
2805
|
+
});
|
|
2806
|
+
return stub;
|
|
2807
|
+
}
|
|
2808
|
+
|
|
2809
|
+
async function loadDynamicWorkerBundledCode(input: {
|
|
2810
|
+
env: CoordinatorEnv;
|
|
2811
|
+
metadata: DynamicWorkflowMetadata;
|
|
2812
|
+
graphHash: string;
|
|
2813
|
+
artifactStorageKey: string;
|
|
2814
|
+
workerCacheKey: string;
|
|
2815
|
+
runIdForTrace: string;
|
|
2816
|
+
trace: CoordinatorPerfTraceSink;
|
|
2817
|
+
}): Promise<string> {
|
|
2818
|
+
const callbackStartedAt = Date.now();
|
|
2819
|
+
let codeSource: 'inline' | 'r2' = 'inline';
|
|
2820
|
+
let r2Ms = 0;
|
|
2821
|
+
const artifact = input.metadata.dynamicWorkerCode
|
|
2822
|
+
? null
|
|
2823
|
+
: await (async () => {
|
|
2824
|
+
codeSource = 'r2';
|
|
2825
|
+
const r2StartedAt = Date.now();
|
|
2826
|
+
try {
|
|
2827
|
+
return await loadStoredPlayArtifactFromR2(
|
|
2828
|
+
input.env,
|
|
2829
|
+
input.artifactStorageKey,
|
|
2830
|
+
);
|
|
2831
|
+
} finally {
|
|
2832
|
+
r2Ms = Date.now() - r2StartedAt;
|
|
2833
|
+
input.trace({
|
|
2834
|
+
runId: input.runIdForTrace,
|
|
2835
|
+
phase: 'coordinator.loader_code_r2_get',
|
|
2836
|
+
ms: r2Ms,
|
|
2837
|
+
graphHash: input.graphHash,
|
|
2838
|
+
extra: { artifactStorageKey: input.artifactStorageKey },
|
|
2839
|
+
});
|
|
2840
|
+
}
|
|
2841
|
+
})();
|
|
2842
|
+
const bundledCode =
|
|
2843
|
+
input.metadata.dynamicWorkerCode ?? artifact?.artifact?.bundledCode;
|
|
2844
|
+
if (typeof bundledCode !== 'string' || bundledCode.length === 0) {
|
|
2845
|
+
throw new Error(
|
|
2846
|
+
`Stored play artifact ${input.artifactStorageKey} does not contain bundledCode.`,
|
|
2847
|
+
);
|
|
2848
|
+
}
|
|
2849
|
+
input.trace({
|
|
2850
|
+
runId: input.runIdForTrace,
|
|
2851
|
+
phase: 'coordinator.loader_code_callback',
|
|
2852
|
+
ms: Date.now() - callbackStartedAt,
|
|
2853
|
+
graphHash: input.graphHash,
|
|
2854
|
+
extra: {
|
|
2855
|
+
codeSource,
|
|
2856
|
+
r2Ms,
|
|
2857
|
+
workerCacheKey: input.workerCacheKey,
|
|
2858
|
+
bundledCodeBytes: bundledCode.length,
|
|
2859
|
+
},
|
|
2860
|
+
});
|
|
2861
|
+
return bundledCode;
|
|
2862
|
+
}
|
|
2863
|
+
|
|
2864
|
+
function normalizePackagedFiles(
|
|
2865
|
+
value: unknown,
|
|
2866
|
+
): NonNullable<DynamicWorkflowMetadata['packagedFiles']> {
|
|
2867
|
+
if (!Array.isArray(value)) return [];
|
|
2868
|
+
return value
|
|
2869
|
+
.filter((entry): entry is Record<string, unknown> =>
|
|
2870
|
+
Boolean(entry && typeof entry === 'object' && !Array.isArray(entry)),
|
|
2871
|
+
)
|
|
2872
|
+
.map((entry) => ({
|
|
2873
|
+
playPath: String(entry.playPath ?? '').replace(/^\.\//, ''),
|
|
2874
|
+
storageKey: String(entry.storageKey ?? ''),
|
|
2875
|
+
inlineText:
|
|
2876
|
+
typeof entry.inlineText === 'string' ? entry.inlineText : undefined,
|
|
2877
|
+
}))
|
|
2878
|
+
.filter(
|
|
2879
|
+
(entry) => entry.playPath.length > 0 && entry.storageKey.length > 0,
|
|
2880
|
+
);
|
|
2881
|
+
}
|
|
2882
|
+
|
|
2883
|
+
const COORDINATOR_WARMUP_DYNAMIC_WORKER_CODE = `
|
|
2884
|
+
import { WorkflowEntrypoint } from "cloudflare:workers";
|
|
2885
|
+
|
|
2886
|
+
export class TenantWorkflow extends WorkflowEntrypoint {
|
|
2887
|
+
async run(event) {
|
|
2888
|
+
const payload = event && typeof event === "object" ? event.payload : null;
|
|
2889
|
+
const runId = payload && typeof payload.runId === "string" ? payload.runId : "warmup";
|
|
2890
|
+
const startedAt = Date.now();
|
|
2891
|
+
if (this.env.RUNTIME_API) {
|
|
2892
|
+
await this.env.RUNTIME_API.fetch(new Request("https://deepline.runtime.internal/api/v2/plays/internal/runtime", {
|
|
2893
|
+
method: "POST",
|
|
2894
|
+
headers: { "content-type": "application/json" },
|
|
2895
|
+
body: "{}"
|
|
2896
|
+
})).catch(() => null);
|
|
2897
|
+
}
|
|
2898
|
+
console.log("[perf-trace] " + JSON.stringify({
|
|
2899
|
+
ts: Date.now(),
|
|
2900
|
+
source: "dynamic_worker",
|
|
2901
|
+
runId,
|
|
2902
|
+
phase: "dynamic_worker.warmup_run",
|
|
2903
|
+
ms: Date.now() - startedAt,
|
|
2904
|
+
graphHash: "coordinator-warmup-v1"
|
|
2905
|
+
}));
|
|
2906
|
+
return { ok: true, warmup: true };
|
|
2907
|
+
}
|
|
2908
|
+
}
|
|
2909
|
+
|
|
2910
|
+
export default {
|
|
2911
|
+
async fetch(request, env) {
|
|
2912
|
+
const url = new URL(request.url);
|
|
2913
|
+
if (request.method === "POST" && url.pathname === "/start") {
|
|
2914
|
+
const body = await request.json().catch(() => null);
|
|
2915
|
+
if (!body || typeof body.id !== "string" || !body.payload || typeof body.payload !== "object") {
|
|
2916
|
+
return new Response("invalid workflow start body", { status: 400 });
|
|
2917
|
+
}
|
|
2918
|
+
const createStartedAt = Date.now();
|
|
2919
|
+
const instance = await env.WORKFLOWS.create({ id: body.id, params: body.payload });
|
|
2920
|
+
const workflowCreateMs = Date.now() - createStartedAt;
|
|
2921
|
+
return Response.json({
|
|
2922
|
+
id: instance.id,
|
|
2923
|
+
status: "submitted",
|
|
2924
|
+
timingsMs: { workflowCreate: workflowCreateMs }
|
|
2925
|
+
});
|
|
2926
|
+
}
|
|
2927
|
+
if (request.method === "GET" && url.pathname === "/health") {
|
|
2928
|
+
return new Response("ok", { status: 200 });
|
|
2929
|
+
}
|
|
2930
|
+
return new Response("not found", { status: 404 });
|
|
2931
|
+
}
|
|
2932
|
+
};
|
|
2933
|
+
`;
|
|
2934
|
+
|
|
2935
|
+
async function handleCoordinatorWarmup(
|
|
2936
|
+
request: Request,
|
|
2937
|
+
env: CoordinatorEnv,
|
|
2938
|
+
ctx?: ExecutionContext,
|
|
2939
|
+
): Promise<Response> {
|
|
2940
|
+
if (request.method !== 'POST') {
|
|
2941
|
+
return new Response('method not allowed', { status: 405 });
|
|
2942
|
+
}
|
|
2943
|
+
const warmupToken = env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
|
|
2944
|
+
if (
|
|
2945
|
+
warmupToken &&
|
|
2946
|
+
request.headers.get('x-vercel-protection-bypass') !== warmupToken
|
|
2947
|
+
) {
|
|
2948
|
+
return new Response('unauthorized', { status: 401 });
|
|
2949
|
+
}
|
|
2950
|
+
const url = new URL(request.url);
|
|
2951
|
+
const label = url.searchParams.get('label')?.trim() || 'warmup';
|
|
2952
|
+
const runId = `coordinator/warmup/${label}/${Date.now().toString(36)}`;
|
|
2953
|
+
const artifactHash = `coordinator-warmup-${stableHash(COORDINATOR_WARMUP_DYNAMIC_WORKER_CODE)}`;
|
|
2954
|
+
const artifactStorageKey = `coordinator-warmup/${artifactHash}.json`;
|
|
2955
|
+
await env.PLAYS_BUCKET.put(
|
|
2956
|
+
artifactStorageKey,
|
|
2957
|
+
JSON.stringify({
|
|
2958
|
+
artifact: {
|
|
2959
|
+
artifactKind: 'esm_workers',
|
|
2960
|
+
bundledCode: COORDINATOR_WARMUP_DYNAMIC_WORKER_CODE,
|
|
2961
|
+
},
|
|
2962
|
+
} satisfies StoredPlayArtifactPayload),
|
|
2963
|
+
{
|
|
2964
|
+
httpMetadata: {
|
|
2965
|
+
contentType: 'application/json',
|
|
2966
|
+
},
|
|
2967
|
+
},
|
|
2968
|
+
);
|
|
2969
|
+
const params: PlayWorkflowParams = {
|
|
2970
|
+
runId,
|
|
2971
|
+
playId: runId,
|
|
2972
|
+
playName: 'coordinator-warmup',
|
|
2973
|
+
artifactStorageKey,
|
|
2974
|
+
artifactHash,
|
|
2975
|
+
graphHash: artifactHash,
|
|
2976
|
+
input: {},
|
|
2977
|
+
inputFile: null,
|
|
2978
|
+
inlineCsv: null,
|
|
2979
|
+
packagedFiles: null,
|
|
2980
|
+
contractSnapshot: null,
|
|
2981
|
+
executionPlan: null,
|
|
2982
|
+
playCallGovernance: null,
|
|
2983
|
+
dynamicWorkerCode: null,
|
|
2984
|
+
executorToken: 'coordinator-warmup',
|
|
2985
|
+
baseUrl: env.DEEPLINE_API_BASE_URL,
|
|
2986
|
+
orgId: 'coordinator-warmup',
|
|
2987
|
+
userEmail: 'coordinator-warmup@deepline.local',
|
|
2988
|
+
userId: 'coordinator-warmup',
|
|
2989
|
+
runtimeBackend: 'cf_workflows_dynamic_worker',
|
|
2990
|
+
dedupBackend: 'durable_object',
|
|
2991
|
+
};
|
|
2992
|
+
const startedAt = Date.now();
|
|
2993
|
+
const response = await handleWorkflowRoute({
|
|
2994
|
+
runId,
|
|
2995
|
+
action: 'submit',
|
|
2996
|
+
request: new Request(
|
|
2997
|
+
`https://deepline.coordinator.internal/workflow/${encodeURIComponent(runId)}/submit`,
|
|
2998
|
+
{
|
|
2999
|
+
method: 'POST',
|
|
3000
|
+
headers: { 'content-type': 'application/json' },
|
|
3001
|
+
body: JSON.stringify(params),
|
|
3002
|
+
},
|
|
3003
|
+
),
|
|
3004
|
+
env,
|
|
3005
|
+
});
|
|
3006
|
+
const text = await response.text().catch(() => '');
|
|
3007
|
+
const terminalWaitStartedAt = Date.now();
|
|
3008
|
+
let terminalState: Record<string, unknown> | null = null;
|
|
3009
|
+
if (response.ok) {
|
|
3010
|
+
const instance = await env.PLAY_WORKFLOW.get(
|
|
3011
|
+
await resolveWorkflowInstanceIdForRun(env, runId),
|
|
3012
|
+
);
|
|
3013
|
+
try {
|
|
3014
|
+
while (Date.now() - terminalWaitStartedAt < 10_000) {
|
|
3015
|
+
const status = await instance.status();
|
|
3016
|
+
terminalState = mapWorkflowResult(runId, status);
|
|
3017
|
+
const mappedStatus = String(terminalState.status ?? '');
|
|
3018
|
+
if (
|
|
3019
|
+
mappedStatus === 'completed' ||
|
|
3020
|
+
mappedStatus === 'failed' ||
|
|
3021
|
+
mappedStatus === 'cancelled'
|
|
3022
|
+
) {
|
|
3023
|
+
break;
|
|
3024
|
+
}
|
|
3025
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
3026
|
+
}
|
|
3027
|
+
} finally {
|
|
3028
|
+
disposeRpcStub(instance);
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
recordCoordinatorPerfTrace({
|
|
3032
|
+
runId,
|
|
3033
|
+
phase: 'coordinator.warmup_terminal_wait',
|
|
3034
|
+
ms: Date.now() - terminalWaitStartedAt,
|
|
3035
|
+
graphHash: params.graphHash,
|
|
3036
|
+
extra: { status: terminalState?.status ?? null, label },
|
|
3037
|
+
});
|
|
3038
|
+
recordCoordinatorPerfTrace({
|
|
3039
|
+
runId,
|
|
3040
|
+
phase: 'coordinator.warmup_submit_total',
|
|
3041
|
+
ms: Date.now() - startedAt,
|
|
3042
|
+
graphHash: params.graphHash,
|
|
3043
|
+
extra: { status: response.status, label },
|
|
3044
|
+
});
|
|
3045
|
+
const poolRefillPromise = refillWorkflowPool(env, {
|
|
3046
|
+
waitReady: true,
|
|
3047
|
+
minAvailable: 1,
|
|
3048
|
+
}).catch(() => ({
|
|
3049
|
+
available: 0,
|
|
3050
|
+
warming: 0,
|
|
3051
|
+
target: 0,
|
|
3052
|
+
created: 0,
|
|
3053
|
+
promoted: 0,
|
|
3054
|
+
removed: 0,
|
|
3055
|
+
waitedMs: 0,
|
|
3056
|
+
waitIterations: 0,
|
|
3057
|
+
}));
|
|
3058
|
+
ctx?.waitUntil(poolRefillPromise.then(() => undefined));
|
|
3059
|
+
let body: unknown = null;
|
|
3060
|
+
try {
|
|
3061
|
+
body = text ? JSON.parse(text) : null;
|
|
3062
|
+
} catch {
|
|
3063
|
+
body = text;
|
|
3064
|
+
}
|
|
3065
|
+
const terminalStatus = String(terminalState?.status ?? '');
|
|
3066
|
+
const warmupCompleted = response.ok && terminalStatus === 'completed';
|
|
3067
|
+
const responseStatus = warmupCompleted
|
|
3068
|
+
? 200
|
|
3069
|
+
: terminalStatus === 'running' || terminalStatus === 'sleeping'
|
|
3070
|
+
? 504
|
|
3071
|
+
: response.ok
|
|
3072
|
+
? 500
|
|
3073
|
+
: response.status;
|
|
3074
|
+
return Response.json(
|
|
3075
|
+
{
|
|
3076
|
+
ok: warmupCompleted,
|
|
3077
|
+
runId,
|
|
3078
|
+
status: response.status,
|
|
3079
|
+
body,
|
|
3080
|
+
terminalState,
|
|
3081
|
+
workflowPool: await poolRefillPromise,
|
|
3082
|
+
},
|
|
3083
|
+
{ status: responseStatus },
|
|
3084
|
+
);
|
|
3085
|
+
}
|
|
3086
|
+
|
|
3087
|
+
/**
|
|
3088
|
+
* Returns a structured-cloneable `Fetcher` stub for the `RuntimeApi`
|
|
3089
|
+
* WorkerEntrypoint. The stub goes into the per-graphHash play Worker's
|
|
3090
|
+
* `env.RUNTIME_API`. When the harness calls `env.RUNTIME_API.fetch(req)`,
|
|
3091
|
+
* the request is RPC-dispatched into the `RuntimeApi.fetch` method on the
|
|
3092
|
+
* coordinator side, which path-allowlists it and forwards to
|
|
3093
|
+
* `DEEPLINE_API_BASE_URL` directly. Skips the public *.workers.dev → CF
|
|
3094
|
+
* edge → cloudflared → localhost chain that the harness's old
|
|
3095
|
+
* `fetch(req.baseUrl + path)` path traverses.
|
|
3096
|
+
*
|
|
3097
|
+
* Implemented as a WorkerEntrypoint (not a plain closure) because Cloudflare
|
|
3098
|
+
* Workflows serializes the dynamic Worker's env when persisting workflow
|
|
3099
|
+
* state, and closures containing captured locals aren't
|
|
3100
|
+
* structured-cloneable. WorkerEntrypoint stubs ARE cloneable — same trick
|
|
3101
|
+
* `makePlayAssetsBinding` already uses.
|
|
3102
|
+
*
|
|
3103
|
+
* Falls back transparently in legacy coordinators: if the binding isn't
|
|
3104
|
+
* present in `env`, the harness uses its existing `fetch(req.baseUrl + path)`
|
|
3105
|
+
* path, which still works (just slower).
|
|
3106
|
+
*/
|
|
3107
|
+
function makeRuntimeApiBinding(): { fetch(req: Request): Promise<Response> } {
|
|
3108
|
+
const exports = workersExports as unknown as {
|
|
3109
|
+
RuntimeApi?: (init: { props: undefined }) => {
|
|
3110
|
+
fetch(req: Request): Promise<Response>;
|
|
3111
|
+
};
|
|
3112
|
+
};
|
|
3113
|
+
const ctor = exports.RuntimeApi;
|
|
3114
|
+
if (typeof ctor !== 'function') {
|
|
3115
|
+
throw new Error(
|
|
3116
|
+
'RuntimeApi is not registered on cloudflare:workers exports.',
|
|
3117
|
+
);
|
|
3118
|
+
}
|
|
3119
|
+
return ctor({ props: undefined });
|
|
3120
|
+
}
|
|
3121
|
+
|
|
3122
|
+
function makeCoordinatorControlBinding(): {
|
|
3123
|
+
submitChild(
|
|
3124
|
+
parentRunId: string,
|
|
3125
|
+
body: Record<string, unknown>,
|
|
3126
|
+
): Promise<{ workflowId?: string; runId?: string; error?: unknown }>;
|
|
3127
|
+
signal(
|
|
3128
|
+
runId: string,
|
|
3129
|
+
body: Record<string, unknown>,
|
|
3130
|
+
): Promise<Record<string, unknown>>;
|
|
3131
|
+
recordPerfTrace(
|
|
3132
|
+
runId: string,
|
|
3133
|
+
payload: CoordinatorPerfTracePayload,
|
|
3134
|
+
): Promise<void>;
|
|
3135
|
+
} {
|
|
3136
|
+
const exports = workersExports as unknown as {
|
|
3137
|
+
CoordinatorControl?: (init: { props: undefined }) => {
|
|
3138
|
+
submitChild(
|
|
3139
|
+
parentRunId: string,
|
|
3140
|
+
body: Record<string, unknown>,
|
|
3141
|
+
): Promise<{ workflowId?: string; runId?: string; error?: unknown }>;
|
|
3142
|
+
signal(
|
|
3143
|
+
runId: string,
|
|
3144
|
+
body: Record<string, unknown>,
|
|
3145
|
+
): Promise<Record<string, unknown>>;
|
|
3146
|
+
recordPerfTrace(
|
|
3147
|
+
runId: string,
|
|
3148
|
+
payload: CoordinatorPerfTracePayload,
|
|
3149
|
+
): Promise<void>;
|
|
3150
|
+
};
|
|
3151
|
+
};
|
|
3152
|
+
const ctor = exports.CoordinatorControl;
|
|
3153
|
+
if (typeof ctor !== 'function') {
|
|
3154
|
+
throw new Error(
|
|
3155
|
+
'CoordinatorControl is not registered on cloudflare:workers exports.',
|
|
3156
|
+
);
|
|
3157
|
+
}
|
|
3158
|
+
return ctor({ props: undefined });
|
|
3159
|
+
}
|
|
3160
|
+
|
|
3161
|
+
async function loadStoredPlayArtifactFromR2(
|
|
3162
|
+
env: CoordinatorEnv,
|
|
3163
|
+
storageKey: string,
|
|
3164
|
+
): Promise<StoredPlayArtifactPayload> {
|
|
3165
|
+
const object = await env.PLAYS_BUCKET.get(storageKey);
|
|
3166
|
+
if (!object) {
|
|
3167
|
+
throw new Error(`Stored play artifact missing from R2: ${storageKey}`);
|
|
3168
|
+
}
|
|
3169
|
+
return JSON.parse(await object.text()) as StoredPlayArtifactPayload;
|
|
3170
|
+
}
|
|
3171
|
+
|
|
3172
|
+
function readMetadataString(
|
|
3173
|
+
metadata: Record<string, unknown>,
|
|
3174
|
+
key: keyof DynamicWorkflowMetadata,
|
|
3175
|
+
): string {
|
|
3176
|
+
const value = metadata[key];
|
|
3177
|
+
if (typeof value !== 'string' || !value.trim()) {
|
|
3178
|
+
throw new Error(`Dynamic workflow metadata missing ${key}.`);
|
|
3179
|
+
}
|
|
3180
|
+
return value;
|
|
3181
|
+
}
|
|
3182
|
+
|
|
3183
|
+
function mapWorkflowResult(
|
|
3184
|
+
runId: string,
|
|
3185
|
+
status: InstanceStatus,
|
|
3186
|
+
): Record<string, unknown> {
|
|
3187
|
+
const error = readWorkflowError(status);
|
|
3188
|
+
const mapped = resolveTerminalStatus(status, error);
|
|
3189
|
+
const output =
|
|
3190
|
+
status.output && typeof status.output === 'object'
|
|
3191
|
+
? (status.output as Record<string, unknown>)
|
|
3192
|
+
: null;
|
|
3193
|
+
return {
|
|
3194
|
+
runId,
|
|
3195
|
+
status: mapped,
|
|
3196
|
+
result: output?.result ?? status.output ?? null,
|
|
3197
|
+
error,
|
|
3198
|
+
totalRows: output?.totalRows ?? output?.outputRows ?? null,
|
|
3199
|
+
durationMs: output?.durationMs ?? null,
|
|
3200
|
+
wait:
|
|
3201
|
+
mapped === 'sleeping'
|
|
3202
|
+
? {
|
|
3203
|
+
kind: 'integration_event_batch',
|
|
3204
|
+
}
|
|
3205
|
+
: null,
|
|
3206
|
+
};
|
|
3207
|
+
}
|
|
3208
|
+
|
|
3209
|
+
function mapWorkflowStatus(
|
|
3210
|
+
status: InstanceStatus,
|
|
3211
|
+
): 'running' | 'sleeping' | 'completed' | 'failed' | 'cancelled' {
|
|
3212
|
+
if (status.status === 'complete') return 'completed';
|
|
3213
|
+
if (status.status === 'errored') return 'failed';
|
|
3214
|
+
if (status.status === 'terminated') return 'cancelled';
|
|
3215
|
+
if (status.status === 'waiting' || status.status === 'paused')
|
|
3216
|
+
return 'sleeping';
|
|
3217
|
+
return 'running';
|
|
3218
|
+
}
|
|
3219
|
+
|
|
3220
|
+
/**
|
|
3221
|
+
* CF Dynamic Workflows retries failed workflow bodies silently for ~3
|
|
3222
|
+
* minutes before transitioning the instance to `errored`. During that
|
|
3223
|
+
* retry window `status.status` stays `running`/`paused`/`waiting`, but
|
|
3224
|
+
* **`status.error`** gets populated as soon as the first attempt fails —
|
|
3225
|
+
* with CF's generic `"internal error; reference = <id>"` (they never
|
|
3226
|
+
* surface the actual JS error message via this API). That's enough
|
|
3227
|
+
* signal: any error presence on a non-terminal instance means at least
|
|
3228
|
+
* one attempt has thrown, and the user is better served by failing fast
|
|
3229
|
+
* than by waiting 3 minutes for CF's retry budget to exhaust.
|
|
3230
|
+
*
|
|
3231
|
+
* If the failure was actually transient and the next retry would have
|
|
3232
|
+
* succeeded, the user can re-run. The previous "match on permanent-
|
|
3233
|
+
* error patterns" approach didn't help because the error message we
|
|
3234
|
+
* see from CF is never a JS error name — it's always the generic
|
|
3235
|
+
* reference id.
|
|
3236
|
+
*/
|
|
3237
|
+
function resolveTerminalStatus(
|
|
3238
|
+
status: InstanceStatus,
|
|
3239
|
+
error: string | null,
|
|
3240
|
+
): 'running' | 'sleeping' | 'completed' | 'failed' | 'cancelled' {
|
|
3241
|
+
const mapped = mapWorkflowStatus(status);
|
|
3242
|
+
if ((mapped === 'running' || mapped === 'sleeping') && error) {
|
|
3243
|
+
return 'failed';
|
|
3244
|
+
}
|
|
3245
|
+
return mapped;
|
|
3246
|
+
}
|
|
3247
|
+
|
|
3248
|
+
function readWorkflowError(status: InstanceStatus): string | null {
|
|
3249
|
+
const error = status.error as unknown;
|
|
3250
|
+
if (!error) return null;
|
|
3251
|
+
if (typeof error === 'string') return error;
|
|
3252
|
+
if (typeof error === 'object' && 'message' in error) {
|
|
3253
|
+
return String((error as { message?: unknown }).message ?? '');
|
|
3254
|
+
}
|
|
3255
|
+
return String(error);
|
|
3256
|
+
}
|