deepline 0.1.20 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,7 @@ import {
23
23
  } from '@cloudflare/dynamic-workflows';
24
24
  import type { ExecutionPlan } from '../../../shared_libs/play-runtime/execution-plan';
25
25
  import type { PlayCallGovernanceSnapshot } from '../../../shared_libs/play-runtime/scheduler-backend';
26
+ import type { PreloadedRuntimeDbSession } from '../../../shared_libs/play-runtime/db-session';
26
27
  import type {
27
28
  PlayRuntimeManifest,
28
29
  PlayRuntimeManifestMap,
@@ -54,6 +55,7 @@ export type PlayWorkflowParams = {
54
55
  executionPlan?: ExecutionPlan | null;
55
56
  childPlayManifests?: PlayRuntimeManifestMap | null;
56
57
  playCallGovernance?: PlayCallGovernanceSnapshot | null;
58
+ preloadedDbSessions?: PreloadedRuntimeDbSession[] | null;
57
59
  dynamicWorkerCode?: string | null;
58
60
  executorToken: string;
59
61
  baseUrl: string;
@@ -115,6 +117,57 @@ type CoordinatorPerfTraceInput = {
115
117
 
116
118
  type CoordinatorPerfTraceSink = (event: CoordinatorPerfTraceInput) => void;
117
119
 
120
+ type CoordinatorTerminalState = {
121
+ runId: string;
122
+ status: 'completed' | 'failed' | 'cancelled';
123
+ result?: unknown;
124
+ error?: string | null;
125
+ totalRows?: unknown;
126
+ durationMs?: unknown;
127
+ playName?: string | null;
128
+ completedAt?: number;
129
+ };
130
+
131
+ type CoordinatorRunEvent =
132
+ | {
133
+ seq?: number;
134
+ runId: string;
135
+ type: 'status';
136
+ status: string;
137
+ ts: number;
138
+ logs?: string[];
139
+ }
140
+ | {
141
+ seq?: number;
142
+ runId: string;
143
+ type: 'log';
144
+ line: string;
145
+ ts: number;
146
+ }
147
+ | {
148
+ seq?: number;
149
+ runId: string;
150
+ type: 'progress';
151
+ status: string;
152
+ ts: number;
153
+ logs?: string[];
154
+ activeNodeId?: string | null;
155
+ activeArtifactTableNamespace?: string | null;
156
+ updatedAt?: number | null;
157
+ }
158
+ | {
159
+ seq?: number;
160
+ runId: string;
161
+ type: 'terminal';
162
+ status: 'completed' | 'failed' | 'cancelled';
163
+ ts: number;
164
+ result?: unknown;
165
+ error?: string | null;
166
+ totalRows?: unknown;
167
+ durationMs?: unknown;
168
+ playName?: string | null;
169
+ };
170
+
118
171
  type InlineWorkerRunResponse = {
119
172
  status?: 'completed' | 'failed';
120
173
  result?: unknown;
@@ -176,7 +229,12 @@ interface CoordinatorEnv {
176
229
  HARNESS?: import('../../play-harness-worker/src/rpc-types').PlayHarnessRpc;
177
230
  }
178
231
 
179
- const WORKFLOW_READ_ONLY_ACTIONS = new Set(['', 'observe', 'result', 'status']);
232
+ const WORKFLOW_READ_ONLY_ACTIONS = new Set([
233
+ '',
234
+ 'result',
235
+ 'status',
236
+ 'tail',
237
+ ]);
180
238
 
181
239
  function authorizeCoordinatorControlRequest(input: {
182
240
  request: Request;
@@ -327,6 +385,106 @@ async function listCoordinatorPerfTrace(
327
385
  );
328
386
  }
329
387
 
388
+ async function writeCoordinatorTerminalState(
389
+ env: CoordinatorEnv,
390
+ state: CoordinatorTerminalState,
391
+ ): Promise<void> {
392
+ const stub = env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(state.runId));
393
+ const response = await stub.fetch(
394
+ 'https://deepline.dedup.internal/terminal-set',
395
+ {
396
+ method: 'POST',
397
+ headers: { 'content-type': 'application/json' },
398
+ body: JSON.stringify({
399
+ ...state,
400
+ completedAt: state.completedAt ?? Date.now(),
401
+ }),
402
+ },
403
+ );
404
+ if (!response.ok) {
405
+ throw new Error(`coordinator terminal set failed ${response.status}`);
406
+ }
407
+ }
408
+
409
+ async function appendCoordinatorRunEvent(
410
+ env: CoordinatorEnv,
411
+ event: CoordinatorRunEvent,
412
+ ): Promise<void> {
413
+ const stub = env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(event.runId));
414
+ const response = await stub.fetch('https://deepline.dedup.internal/event-add', {
415
+ method: 'POST',
416
+ headers: { 'content-type': 'application/json' },
417
+ body: JSON.stringify(event),
418
+ });
419
+ if (!response.ok) {
420
+ throw new Error(`coordinator event append failed ${response.status}`);
421
+ }
422
+ }
423
+
424
+ async function listCoordinatorRunEvents(input: {
425
+ env: CoordinatorEnv;
426
+ runId: string;
427
+ afterSeq: number;
428
+ timeoutMs: number;
429
+ }): Promise<{ events: CoordinatorRunEvent[]; latestSeq: number }> {
430
+ const stub = input.env.PLAY_DEDUP.get(
431
+ input.env.PLAY_DEDUP.idFromName(input.runId),
432
+ );
433
+ const response = await stub.fetch(
434
+ `https://deepline.dedup.internal/event-list?afterSeq=${encodeURIComponent(
435
+ String(Math.max(0, Math.floor(input.afterSeq))),
436
+ )}&timeoutMs=${encodeURIComponent(
437
+ String(Math.max(0, Math.floor(input.timeoutMs))),
438
+ )}`,
439
+ );
440
+ if (!response.ok) {
441
+ throw new Error(`coordinator event list failed ${response.status}`);
442
+ }
443
+ const body = (await response.json().catch(() => ({}))) as {
444
+ events?: unknown;
445
+ latestSeq?: unknown;
446
+ };
447
+ return {
448
+ events: Array.isArray(body.events)
449
+ ? body.events.filter(
450
+ (event): event is CoordinatorRunEvent =>
451
+ isRecord(event) &&
452
+ typeof event.runId === 'string' &&
453
+ event.runId === input.runId &&
454
+ typeof event.type === 'string' &&
455
+ typeof event.ts === 'number',
456
+ )
457
+ : [],
458
+ latestSeq: typeof body.latestSeq === 'number' ? body.latestSeq : input.afterSeq,
459
+ };
460
+ }
461
+
462
+ async function readCoordinatorTerminalState(
463
+ env: CoordinatorEnv,
464
+ runId: string,
465
+ ): Promise<CoordinatorTerminalState | null> {
466
+ const stub = env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(runId));
467
+ const response = await stub.fetch(
468
+ 'https://deepline.dedup.internal/terminal-get',
469
+ );
470
+ if (!response.ok) {
471
+ throw new Error(`coordinator terminal get failed ${response.status}`);
472
+ }
473
+ const body = (await response.json().catch(() => ({}))) as {
474
+ state?: unknown;
475
+ };
476
+ const state = body.state;
477
+ if (!isRecord(state) || state.runId !== runId) return null;
478
+ if (
479
+ state.status !== 'completed' &&
480
+ state.status !== 'failed' &&
481
+ state.status !== 'cancelled'
482
+ ) {
483
+ return null;
484
+ }
485
+ return state as CoordinatorTerminalState;
486
+ }
487
+
330
488
  function workflowEventType(name: string): string {
331
489
  const normalized = name
332
490
  .trim()
@@ -361,17 +519,20 @@ type PooledWorkflowBootstrapPayload = {
361
519
  };
362
520
 
363
521
  const WORKFLOW_POOL_PROTOCOL_VERSION =
364
- 'pooled-workflow-wait-v4-waiting-only';
522
+ 'pooled-workflow-wait-v14-ready-signal-http-storage';
365
523
  const WORKFLOW_POOL_DO_NAME = 'workflow-pool:v2';
366
524
  const WORKFLOW_POOL_START_EVENT_TYPE = 'play_start';
367
525
  const WORKFLOW_POOL_TTL_MS = 8 * 60 * 1000;
368
- const WORKFLOW_POOL_TARGET_SIZE = 2;
526
+ const WORKFLOW_POOL_TARGET_SIZE = 16;
369
527
  const WORKFLOW_POOL_READY_TIMEOUT_MS = 1_500;
370
528
  const WORKFLOW_POOL_READY_POLL_MS = 250;
371
529
  const WORKFLOW_POOL_REFILL_ON_MISS_TIMEOUT_MS = 2_500;
372
- const WORKFLOW_POOL_REFILL_ON_MISS_MIN_AVAILABLE = 1;
530
+ const WORKFLOW_POOL_REFILL_ON_MISS_MIN_AVAILABLE = 4;
373
531
  const WORKFLOW_POOL_CONTROL_TIMEOUT_MS = 750;
374
-
532
+ const SUBMIT_INITIAL_STATE_MAX_WAIT_MS = 0;
533
+ const SUBMIT_INITIAL_STATE_POLL_MS = 50;
534
+ const WORKFLOW_POOL_DISABLED_REASON =
535
+ 'Cloudflare Workflows start runs directly; waitForEvent is reserved for real durable external waits.';
375
536
  function buildDynamicWorkflowMetadata(
376
537
  params: PlayWorkflowParams,
377
538
  ): DynamicWorkflowMetadata {
@@ -442,13 +603,45 @@ function readWorkflowTraceContext(event: unknown): {
442
603
  }
443
604
 
444
605
  function workflowPoolEnabled(): boolean {
445
- return true;
606
+ return false;
446
607
  }
447
608
 
448
609
  function workflowPoolTargetSize(): number {
449
610
  return WORKFLOW_POOL_TARGET_SIZE;
450
611
  }
451
612
 
613
+ async function waitForSubmitInitialState(input: {
614
+ instance: WorkflowInstance;
615
+ runId: string;
616
+ waitMs: number;
617
+ }): Promise<Record<string, unknown> | null> {
618
+ const waitMs = Math.max(
619
+ 0,
620
+ Math.min(Math.floor(input.waitMs), SUBMIT_INITIAL_STATE_MAX_WAIT_MS),
621
+ );
622
+ if (waitMs <= 0) return null;
623
+ const startedAt = Date.now();
624
+ let status = await input.instance.status();
625
+ while (Date.now() - startedAt < waitMs) {
626
+ const result = mapWorkflowResult(input.runId, status);
627
+ if (
628
+ result.status === 'completed' ||
629
+ result.status === 'failed' ||
630
+ result.status === 'cancelled'
631
+ ) {
632
+ return result as unknown as Record<string, unknown>;
633
+ }
634
+ await sleep(SUBMIT_INITIAL_STATE_POLL_MS);
635
+ status = await input.instance.status();
636
+ }
637
+ const result = mapWorkflowResult(input.runId, status);
638
+ return result.status === 'completed' ||
639
+ result.status === 'failed' ||
640
+ result.status === 'cancelled'
641
+ ? (result as unknown as Record<string, unknown>)
642
+ : null;
643
+ }
644
+
452
645
  async function createDynamicWorkflowInstance(input: {
453
646
  env: CoordinatorEnv;
454
647
  id: string;
@@ -477,23 +670,32 @@ async function callWorkflowPool<T>(
477
670
  1,
478
671
  Math.floor(init?.timeoutMs ?? WORKFLOW_POOL_CONTROL_TIMEOUT_MS),
479
672
  );
480
- const controller = new AbortController();
481
- const timer = setTimeout(() => controller.abort(), timeoutMs);
673
+ let timeoutId: ReturnType<typeof setTimeout> | null = null;
482
674
  try {
483
675
  const fetchInit: RequestInit = { ...(init ?? {}) };
484
676
  delete (fetchInit as { timeoutMs?: number }).timeoutMs;
485
677
  delete fetchInit.signal;
486
- const response = await workflowPoolDurableObject(env).fetch(
487
- `https://deepline.workflow-pool.internal${path}`,
488
- {
489
- ...fetchInit,
490
- signal: controller.signal,
491
- headers: {
492
- 'content-type': 'application/json',
493
- ...(init?.headers ?? {}),
678
+ const response = await Promise.race([
679
+ workflowPoolDurableObject(env).fetch(
680
+ `https://deepline.workflow-pool.internal${path}`,
681
+ {
682
+ ...fetchInit,
683
+ headers: {
684
+ 'content-type': 'application/json',
685
+ ...(init?.headers ?? {}),
686
+ },
494
687
  },
495
- },
496
- );
688
+ ),
689
+ new Promise<Response>((_, reject) => {
690
+ timeoutId = setTimeout(
691
+ () =>
692
+ reject(
693
+ new Error(`workflow pool ${path} timed out after ${timeoutMs}ms`),
694
+ ),
695
+ timeoutMs,
696
+ );
697
+ }),
698
+ ]);
497
699
  if (!response.ok) {
498
700
  throw new Error(
499
701
  `workflow pool ${path} failed ${response.status}: ${(
@@ -511,7 +713,7 @@ async function callWorkflowPool<T>(
511
713
  }
512
714
  throw error;
513
715
  } finally {
514
- clearTimeout(timer);
716
+ if (timeoutId) clearTimeout(timeoutId);
515
717
  }
516
718
  }
517
719
 
@@ -531,6 +733,7 @@ type WorkflowPoolRefillResult = WorkflowPoolCounts & {
531
733
 
532
734
  type WorkflowPoolListEntry = {
533
735
  id: string;
736
+ state: string;
534
737
  createdAt: number;
535
738
  readyAt: number | null;
536
739
  expiresAt: number;
@@ -568,6 +771,7 @@ async function listWorkflowPoolEntries(
568
771
  )
569
772
  .map((entry) => ({
570
773
  id: typeof entry.id === 'string' ? entry.id : '',
774
+ state: typeof entry.state === 'string' ? entry.state : '',
571
775
  createdAt:
572
776
  typeof entry.createdAt === 'number' && Number.isFinite(entry.createdAt)
573
777
  ? entry.createdAt
@@ -602,6 +806,24 @@ async function addWorkflowPoolIds(
602
806
  });
603
807
  }
604
808
 
809
+ async function markWorkflowPoolIdReady(
810
+ env: CoordinatorEnv,
811
+ poolId: string,
812
+ ): Promise<boolean> {
813
+ const body = await callWorkflowPool<{ ready?: unknown }>(
814
+ env,
815
+ '/pool-ready',
816
+ {
817
+ method: 'POST',
818
+ body: JSON.stringify({
819
+ poolId,
820
+ version: WORKFLOW_POOL_PROTOCOL_VERSION,
821
+ }),
822
+ },
823
+ );
824
+ return body.ready === true;
825
+ }
826
+
605
827
  async function promoteWorkflowPoolIds(
606
828
  env: CoordinatorEnv,
607
829
  ids: string[],
@@ -632,13 +854,14 @@ async function deleteWorkflowPoolIds(
632
854
 
633
855
  async function leaseWorkflowPoolId(
634
856
  env: CoordinatorEnv,
857
+ runId: string,
635
858
  ): Promise<string | null> {
636
859
  const body = await callWorkflowPool<{ id?: unknown }>(
637
860
  env,
638
- `/pool-lease?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
861
+ `/pool-claim?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
639
862
  {
640
863
  method: 'POST',
641
- body: '{}',
864
+ body: JSON.stringify({ runId }),
642
865
  },
643
866
  );
644
867
  return typeof body.id === 'string' && body.id ? body.id : null;
@@ -702,27 +925,36 @@ function workflowStatusName(status: InstanceStatus | null): string {
702
925
  }
703
926
 
704
927
  function workflowPoolStatusIsReady(statusName: string): boolean {
705
- // Only a Workflow explicitly blocked on waitForEvent is safe to lease.
706
- // A generic "running" instance may already be executing a previous play; if
707
- // we send a new start event to it, the submitted run can stay running forever
708
- // while callbacks continue updating the previous runId.
709
- return statusName === 'waiting';
928
+ // This is only a liveness guard. Readiness itself comes from the pooled
929
+ // Workflow calling /pool-ready after waitForEvent("play_start") has been
930
+ // created, because Cloudflare may report an armed wait as "running".
931
+ return statusName === 'running' || statusName === 'waiting';
710
932
  }
711
933
 
712
- async function waitForWorkflowPoolReady(instance: WorkflowInstance): Promise<{
934
+ async function waitForWorkflowPoolReadySignal(input: {
935
+ env: CoordinatorEnv;
936
+ instance: WorkflowInstance;
937
+ poolId: string;
938
+ }): Promise<{
713
939
  ready: boolean;
714
940
  status: string;
715
941
  ms: number;
716
942
  polls: number;
717
943
  }> {
718
944
  const startedAt = Date.now();
719
- let lastStatus: InstanceStatus | null = null;
945
+ let lastStatusName = 'unknown';
720
946
  let polls = 0;
721
947
  while (Date.now() - startedAt < WORKFLOW_POOL_READY_TIMEOUT_MS) {
722
- lastStatus = await instance.status();
723
948
  polls += 1;
724
- const statusName = workflowStatusName(lastStatus);
725
- if (workflowPoolStatusIsReady(statusName)) {
949
+ const [entry, status] = await Promise.all([
950
+ listWorkflowPoolEntries(input.env)
951
+ .then((entries) => entries.find((candidate) => candidate.id === input.poolId))
952
+ .catch(() => undefined),
953
+ input.instance.status().catch(() => null),
954
+ ]);
955
+ const statusName = workflowStatusName(status);
956
+ lastStatusName = statusName;
957
+ if (entry?.state === 'ready' && entry.readyAt !== null) {
726
958
  return {
727
959
  ready: true,
728
960
  status: statusName,
@@ -733,7 +965,8 @@ async function waitForWorkflowPoolReady(instance: WorkflowInstance): Promise<{
733
965
  if (
734
966
  statusName === 'complete' ||
735
967
  statusName === 'errored' ||
736
- statusName === 'terminated'
968
+ statusName === 'terminated' ||
969
+ statusName === 'unknown'
737
970
  ) {
738
971
  return {
739
972
  ready: false,
@@ -742,13 +975,11 @@ async function waitForWorkflowPoolReady(instance: WorkflowInstance): Promise<{
742
975
  polls,
743
976
  };
744
977
  }
745
- await new Promise((resolve) =>
746
- setTimeout(resolve, WORKFLOW_POOL_READY_POLL_MS),
747
- );
978
+ await sleep(WORKFLOW_POOL_READY_POLL_MS);
748
979
  }
749
980
  return {
750
981
  ready: false,
751
- status: workflowStatusName(lastStatus),
982
+ status: lastStatusName,
752
983
  ms: Date.now() - startedAt,
753
984
  polls,
754
985
  };
@@ -775,11 +1006,13 @@ async function refillWorkflowPoolOnce(
775
1006
  for (const entry of warmingEntries) {
776
1007
  const instance = await env.PLAY_WORKFLOW.get(entry.id);
777
1008
  try {
1009
+ if (entry.state === 'ready' && entry.readyAt !== null) {
1010
+ promotedIds.push(entry.id);
1011
+ continue;
1012
+ }
778
1013
  const status = await instance.status().catch(() => null);
779
1014
  const statusName = workflowStatusName(status);
780
- if (workflowPoolStatusIsReady(statusName)) {
781
- promotedIds.push(entry.id);
782
- } else if (
1015
+ if (
783
1016
  statusName === 'complete' ||
784
1017
  statusName === 'errored' ||
785
1018
  statusName === 'terminated' ||
@@ -808,48 +1041,64 @@ async function refillWorkflowPoolOnce(
808
1041
  removed: removedIds.length,
809
1042
  };
810
1043
  }
811
- const readyCreatedIds: string[] = [];
812
- const warmingCreatedIds: string[] = [];
813
- for (let i = 0; i < needed; i += 1) {
814
- const poolId = `pool-v2-${Date.now().toString(36)}-${crypto.randomUUID().slice(0, 12)}`;
815
- const instance = await env.PLAY_WORKFLOW.create({
816
- id: poolId,
817
- params: {
818
- __deeplinePooledWorkflow: true,
819
- poolId,
820
- createdAt: Date.now(),
821
- } satisfies PooledWorkflowBootstrapPayload,
822
- });
823
- try {
824
- const readiness = await waitForWorkflowPoolReady(instance);
825
- recordCoordinatorPerfTrace({
826
- runId: poolId,
827
- phase: 'coordinator.workflow_pool_ready',
828
- ms: readiness.ms,
829
- graphHash: 'workflow-pool',
830
- extra: {
831
- ready: readiness.ready,
832
- status: readiness.status,
833
- polls: readiness.polls,
834
- },
1044
+ const created = await Promise.all(
1045
+ Array.from({ length: needed }, async () => {
1046
+ const poolId = `pool-v2-${Date.now().toString(36)}-${crypto.randomUUID().slice(0, 12)}`;
1047
+ await addWorkflowPoolIds(env, [poolId], { ready: false });
1048
+ const instance = await env.PLAY_WORKFLOW.create({
1049
+ id: poolId,
1050
+ params: {
1051
+ __deeplinePooledWorkflow: true,
1052
+ poolId,
1053
+ createdAt: Date.now(),
1054
+ } satisfies PooledWorkflowBootstrapPayload,
835
1055
  });
836
- if (readiness.ready) {
837
- readyCreatedIds.push(poolId);
838
- } else if (
839
- readiness.status === 'complete' ||
840
- readiness.status === 'errored' ||
841
- readiness.status === 'terminated' ||
842
- readiness.status === 'unknown'
843
- ) {
844
- removedIds.push(poolId);
845
- await instance.terminate().catch(() => undefined);
846
- } else {
847
- warmingCreatedIds.push(poolId);
1056
+ try {
1057
+ const readiness = await waitForWorkflowPoolReadySignal({
1058
+ env,
1059
+ instance,
1060
+ poolId,
1061
+ });
1062
+ recordCoordinatorPerfTrace({
1063
+ runId: poolId,
1064
+ phase: 'coordinator.workflow_pool_ready',
1065
+ ms: readiness.ms,
1066
+ graphHash: 'workflow-pool',
1067
+ extra: {
1068
+ ready: readiness.ready,
1069
+ status: readiness.status,
1070
+ polls: readiness.polls,
1071
+ },
1072
+ });
1073
+ if (readiness.ready) {
1074
+ return { id: poolId, state: 'ready' as const };
1075
+ }
1076
+ if (
1077
+ readiness.status === 'complete' ||
1078
+ readiness.status === 'errored' ||
1079
+ readiness.status === 'terminated' ||
1080
+ readiness.status === 'unknown'
1081
+ ) {
1082
+ await instance.terminate().catch(() => undefined);
1083
+ return { id: poolId, state: 'removed' as const };
1084
+ }
1085
+ return { id: poolId, state: 'warming' as const };
1086
+ } finally {
1087
+ disposeRpcStub(instance);
848
1088
  }
849
- } finally {
850
- disposeRpcStub(instance);
851
- }
852
- }
1089
+ }),
1090
+ );
1091
+ const readyCreatedIds = created
1092
+ .filter((entry) => entry.state === 'ready')
1093
+ .map((entry) => entry.id);
1094
+ const warmingCreatedIds = created
1095
+ .filter((entry) => entry.state === 'warming')
1096
+ .map((entry) => entry.id);
1097
+ removedIds.push(
1098
+ ...created
1099
+ .filter((entry) => entry.state === 'removed')
1100
+ .map((entry) => entry.id),
1101
+ );
853
1102
  await Promise.all([
854
1103
  addWorkflowPoolIds(env, readyCreatedIds, { ready: true }),
855
1104
  addWorkflowPoolIds(env, warmingCreatedIds, { ready: false }),
@@ -928,7 +1177,10 @@ async function submitViaPooledWorkflow(input: {
928
1177
  }
929
1178
  const leaseStartedAt = Date.now();
930
1179
  let leaseError: string | null = null;
931
- let pooledInstanceId = await leaseWorkflowPoolId(input.env).catch((error) => {
1180
+ const pooledInstanceId = await leaseWorkflowPoolId(
1181
+ input.env,
1182
+ input.params.runId,
1183
+ ).catch((error) => {
932
1184
  leaseError = error instanceof Error ? error.message : String(error);
933
1185
  return null;
934
1186
  });
@@ -952,50 +1204,26 @@ async function submitViaPooledWorkflow(input: {
952
1204
  });
953
1205
 
954
1206
  if (!pooledInstanceId) {
955
- // A pool miss is often a timing gap rather than a true lack of warm
956
- // capacity. Wait briefly for refill/promotion, then retry lease once
957
- // before falling back to a cold workflow create.
958
- const refillStartedAt = Date.now();
959
- const refillResult = await refillWorkflowPool(input.env, {
960
- waitReady: true,
961
- minAvailable: WORKFLOW_POOL_REFILL_ON_MISS_MIN_AVAILABLE,
962
- waitTimeoutMs: WORKFLOW_POOL_REFILL_ON_MISS_TIMEOUT_MS,
963
- }).catch(() => null);
1207
+ // A pool miss must not block the user path. Refilling is handled by the
1208
+ // caller's waitUntil after submit, so fall through to cold create now.
1209
+ const counts = missCounts ?? (await workflowPoolCount(input.env).catch(() => null));
964
1210
  input.recordSubmitTiming({
965
1211
  phase: 'coordinator.workflow_pool_refill_on_miss',
966
- ms: Date.now() - refillStartedAt,
1212
+ ms: 0,
967
1213
  graphHash: input.params.graphHash ?? null,
968
- extra:
969
- refillResult === null
970
- ? { ok: false }
971
- : {
972
- ok: true,
973
- available: refillResult.available,
974
- warming: refillResult.warming,
975
- created: refillResult.created,
976
- promoted: refillResult.promoted,
977
- removed: refillResult.removed,
978
- waitedMs: refillResult.waitedMs,
979
- waitIterations: refillResult.waitIterations,
980
- },
1214
+ extra: {
1215
+ skipped: true,
1216
+ reason: 'pool_miss_does_not_block_submit',
1217
+ ...(counts
1218
+ ? {
1219
+ available: counts.available,
1220
+ warming: counts.warming,
1221
+ waitedMs: 0,
1222
+ waitIterations: 0,
1223
+ }
1224
+ : {}),
1225
+ },
981
1226
  });
982
- if (refillResult?.available) {
983
- const retryStartedAt = Date.now();
984
- let retryLeaseError: string | null = null;
985
- pooledInstanceId = await leaseWorkflowPoolId(input.env).catch((error) => {
986
- retryLeaseError = error instanceof Error ? error.message : String(error);
987
- return null;
988
- });
989
- input.recordSubmitTiming({
990
- phase: 'coordinator.workflow_pool_lease_retry',
991
- ms: Date.now() - retryStartedAt,
992
- graphHash: input.params.graphHash ?? null,
993
- extra: {
994
- pooled: Boolean(pooledInstanceId),
995
- ...(retryLeaseError ? { error: retryLeaseError } : {}),
996
- },
997
- });
998
- }
999
1227
  }
1000
1228
 
1001
1229
  if (!pooledInstanceId) {
@@ -1032,20 +1260,6 @@ async function submitViaPooledWorkflow(input: {
1032
1260
  });
1033
1261
  return null;
1034
1262
  }
1035
- try {
1036
- await mapRunToWorkflowInstance({
1037
- env: input.env,
1038
- runId: input.params.runId,
1039
- instanceId: pooledInstanceId,
1040
- });
1041
- } catch (error) {
1042
- disposeRpcStub(instance);
1043
- throw new Error(
1044
- `workflow pool mapRunToWorkflowInstance failed after pooled workflow start for ${input.params.runId}: ${
1045
- error instanceof Error ? error.message : String(error)
1046
- }`,
1047
- );
1048
- }
1049
1263
  input.recordSubmitTiming({
1050
1264
  phase: 'coordinator.workflow_pool_send_event',
1051
1265
  ms: Date.now() - sendStartedAt,
@@ -1416,6 +1630,7 @@ function runRequestFromPlayWorkflowParams(params: PlayWorkflowParams): Record<st
1416
1630
  executionPlan: params.executionPlan ?? null,
1417
1631
  childPlayManifests: params.childPlayManifests ?? null,
1418
1632
  playCallGovernance: params.playCallGovernance ?? null,
1633
+ preloadedDbSessions: params.preloadedDbSessions ?? null,
1419
1634
  coordinatorUrl: params.coordinatorUrl ?? null,
1420
1635
  totalRows: params.totalRows,
1421
1636
  };
@@ -1770,6 +1985,13 @@ export class RuntimeApi extends WorkerEntrypoint<CoordinatorEnv, undefined> {
1770
1985
  ? this.env.DEEPLINE_API_BASE_URL.trim()
1771
1986
  : 'https://code.deepline.com';
1772
1987
  const target = new URL(incoming.pathname + incoming.search, apiBaseUrl);
1988
+ const runtimeStatusBody =
1989
+ incoming.pathname === '/api/v2/plays/internal/runtime'
1990
+ ? await request
1991
+ .clone()
1992
+ .json()
1993
+ .catch(() => null)
1994
+ : null;
1773
1995
  const forwarded = new Request(target.toString(), request);
1774
1996
  const bypassToken = this.env.VERCEL_PROTECTION_BYPASS_TOKEN;
1775
1997
  if (typeof bypassToken === 'string' && bypassToken) {
@@ -1785,9 +2007,41 @@ export class RuntimeApi extends WorkerEntrypoint<CoordinatorEnv, undefined> {
1785
2007
  `[RUNTIME_API] ${incoming.pathname} failed: status=${res.status} ` +
1786
2008
  `target=${target.toString()} body=${body.slice(0, 500)}`,
1787
2009
  );
2010
+ } else {
2011
+ await this.recordRuntimeStatusEvent(runtimeStatusBody).catch(() => null);
1788
2012
  }
1789
2013
  return res;
1790
2014
  }
2015
+
2016
+ private async recordRuntimeStatusEvent(body: unknown): Promise<void> {
2017
+ if (!isRecord(body) || body.action !== 'update_run_status') {
2018
+ return;
2019
+ }
2020
+ const runId = typeof body.playId === 'string' ? body.playId : '';
2021
+ const status = typeof body.status === 'string' ? body.status : '';
2022
+ if (!runId || !status) {
2023
+ return;
2024
+ }
2025
+ await appendCoordinatorRunEvent(this.env, {
2026
+ runId,
2027
+ type: 'progress',
2028
+ status,
2029
+ ts: Date.now(),
2030
+ logs: Array.isArray(body.liveLogs)
2031
+ ? body.liveLogs.filter((line): line is string => typeof line === 'string')
2032
+ : undefined,
2033
+ activeNodeId:
2034
+ typeof body.activeNodeId === 'string' ? body.activeNodeId : null,
2035
+ activeArtifactTableNamespace:
2036
+ typeof body.activeArtifactTableNamespace === 'string'
2037
+ ? body.activeArtifactTableNamespace
2038
+ : null,
2039
+ updatedAt:
2040
+ typeof body.lastCheckpointAt === 'number'
2041
+ ? body.lastCheckpointAt
2042
+ : null,
2043
+ });
2044
+ }
1791
2045
  }
1792
2046
 
1793
2047
  export class CoordinatorControl extends WorkerEntrypoint<
@@ -1855,6 +2109,13 @@ export class CoordinatorControl extends WorkerEntrypoint<
1855
2109
  }
1856
2110
  await appendCoordinatorPerfTrace(this.env, payload);
1857
2111
  }
2112
+
2113
+ async recordRunEvent(runId: string, event: CoordinatorRunEvent): Promise<void> {
2114
+ if (!runId || event.runId !== runId) {
2115
+ throw new Error('Run event runId mismatch.');
2116
+ }
2117
+ await appendCoordinatorRunEvent(this.env, event);
2118
+ }
1858
2119
  }
1859
2120
 
1860
2121
  /**
@@ -1889,6 +2150,7 @@ export class DynamicWorkflow extends WorkflowEntrypoint<
1889
2150
  });
1890
2151
  let dispatchedEvent = event;
1891
2152
  if (isPooledWorkflowBootstrapPayload(workflowEvent.payload)) {
2153
+ const pooledPayload = workflowEvent.payload;
1892
2154
  const waitingStep = step as {
1893
2155
  waitForEvent<T>(
1894
2156
  name: string,
@@ -1896,10 +2158,19 @@ export class DynamicWorkflow extends WorkflowEntrypoint<
1896
2158
  ): Promise<{ payload: Readonly<T>; timestamp: Date; type: string }>;
1897
2159
  };
1898
2160
  const waitStartedAt = Date.now();
1899
- const startEvent = await waitingStep.waitForEvent<DispatcherEnvelope>(
2161
+ const startEventPromise = waitingStep.waitForEvent<DispatcherEnvelope>(
1900
2162
  'wait for pooled play start',
1901
2163
  { type: WORKFLOW_POOL_START_EVENT_TYPE, timeout: '10 minutes' },
1902
2164
  );
2165
+ await markWorkflowPoolIdReady(this.env, pooledPayload.poolId).catch(
2166
+ (error) => {
2167
+ console.warn('[coordinator.workflow_pool] ready signal failed', {
2168
+ poolId: pooledPayload.poolId,
2169
+ message: error instanceof Error ? error.message : String(error),
2170
+ });
2171
+ },
2172
+ );
2173
+ const startEvent = await startEventPromise;
1903
2174
  dispatchedEvent = {
1904
2175
  payload: startEvent.payload,
1905
2176
  timestamp: startEvent.timestamp,
@@ -2001,6 +2272,18 @@ export class DynamicWorkflow extends WorkflowEntrypoint<
2001
2272
  run(e: unknown, s: unknown): Promise<unknown>;
2002
2273
  }
2003
2274
  ).run(innerEvent, innerStep);
2275
+ const output = isRecord(result) ? result : null;
2276
+ await writeCoordinatorTerminalState(env, {
2277
+ runId: runIdForTrace,
2278
+ status: 'completed',
2279
+ result: output?.result ?? result,
2280
+ totalRows: output?.totalRows ?? output?.outputRows ?? null,
2281
+ durationMs: output?.durationMs ?? null,
2282
+ playName:
2283
+ typeof output?.playName === 'string'
2284
+ ? output.playName
2285
+ : null,
2286
+ });
2004
2287
  trace({
2005
2288
  runId: runIdForTrace,
2006
2289
  phase: 'coordinator.runner_run',
@@ -2038,6 +2321,14 @@ export class DynamicWorkflow extends WorkflowEntrypoint<
2038
2321
  },
2039
2322
  );
2040
2323
  });
2324
+ await writeCoordinatorTerminalState(env, {
2325
+ runId: runIdForTrace,
2326
+ status: 'failed',
2327
+ error:
2328
+ innerError instanceof Error
2329
+ ? innerError.message
2330
+ : String(innerError),
2331
+ }).catch(() => undefined);
2041
2332
  throw innerError;
2042
2333
  }
2043
2334
  },
@@ -2051,7 +2342,7 @@ const coordinatorEntrypoint = {
2051
2342
  /**
2052
2343
  * HTTP entrypoint for the Vercel app to dispatch into. Routes:
2053
2344
  * POST /workflow/{runId}/submit → PLAY_WORKFLOW.create({ id, params })
2054
- * GET /workflow/{runId}/observe polling-compatible status snapshot
2345
+ * GET /workflow/{runId}/tail ordered live run events after a cursor
2055
2346
  * POST /workflow/{runId}/cancel → Workflow instance terminate
2056
2347
  * POST /workflow/{runId}/signal → integration_event
2057
2348
  * GET /workflow/{runId}/result → terminal envelope
@@ -2097,6 +2388,11 @@ const coordinatorEntrypoint = {
2097
2388
  deployMarker: env.DEEPLINE_COORDINATOR_DEPLOY_MARKER ?? null,
2098
2389
  });
2099
2390
  }
2391
+ if (url.pathname === '/staged-files/put') {
2392
+ const authError = authorizeCoordinatorControlRequest({ request, env });
2393
+ if (authError) return authError;
2394
+ return await handleStagedFilePut(request, env);
2395
+ }
2100
2396
  if (url.pathname === '/workflow-pool/refill') {
2101
2397
  const internalAuthError = authorizeCoordinatorControlRequest({
2102
2398
  request,
@@ -2151,6 +2447,33 @@ const coordinatorEntrypoint = {
2151
2447
  ms: Date.now() - startedAt,
2152
2448
  });
2153
2449
  }
2450
+ if (url.pathname === '/workflow-pool/debug') {
2451
+ const internalAuthError = authorizeCoordinatorControlRequest({
2452
+ request,
2453
+ env,
2454
+ });
2455
+ if (internalAuthError) return internalAuthError;
2456
+ const entries = await listWorkflowPoolEntries(env);
2457
+ const detailed = [];
2458
+ for (const entry of entries) {
2459
+ const instance = await env.PLAY_WORKFLOW.get(entry.id);
2460
+ try {
2461
+ const status = await instance.status().catch(() => null);
2462
+ detailed.push({
2463
+ ...entry,
2464
+ status: workflowStatusName(status),
2465
+ mappedStatus: status ? mapWorkflowStatus(status) : 'running',
2466
+ });
2467
+ } finally {
2468
+ disposeRpcStub(instance);
2469
+ }
2470
+ }
2471
+ return Response.json({
2472
+ ok: true,
2473
+ enabled: workflowPoolEnabled(),
2474
+ entries: detailed,
2475
+ });
2476
+ }
2154
2477
 
2155
2478
  // Workflow routes: /workflow/{runId}/{action}
2156
2479
  const wfMatch = url.pathname.match(/^\/workflow\/([^/]+)(?:\/(.+))?$/);
@@ -2362,45 +2685,54 @@ async function handleWorkflowRoute(input: {
2362
2685
  let instance: WorkflowInstance | null = null;
2363
2686
  try {
2364
2687
  const dispatchStartedAt = Date.now();
2365
- const poolStartedAt = Date.now();
2366
- instance = await submitViaPooledWorkflow({
2688
+ recordSubmitTiming({
2689
+ phase: 'coordinator.workflow_pool_attempt',
2690
+ ms: 0,
2691
+ graphHash: params.graphHash ?? null,
2692
+ extra: {
2693
+ usedPool: false,
2694
+ disabled: true,
2695
+ reason: WORKFLOW_POOL_DISABLED_REASON,
2696
+ },
2697
+ });
2698
+ const createStartedAt = Date.now();
2699
+ instance = await createDynamicWorkflowInstance({
2367
2700
  env,
2701
+ id: defaultInstanceId,
2368
2702
  params,
2369
- recordSubmitTiming,
2370
2703
  });
2371
- const usedWorkflowPool = Boolean(instance);
2372
2704
  recordSubmitTiming({
2373
- phase: 'coordinator.workflow_pool_attempt',
2374
- ms: Date.now() - poolStartedAt,
2705
+ phase: 'coordinator.workflow_create',
2706
+ ms: Date.now() - createStartedAt,
2375
2707
  graphHash: params.graphHash ?? null,
2376
- extra: { usedPool: usedWorkflowPool },
2708
+ extra: { instanceId: instance.id },
2377
2709
  });
2378
- if (!instance) {
2379
- const createStartedAt = Date.now();
2380
- instance = await createDynamicWorkflowInstance({
2381
- env,
2382
- id: defaultInstanceId,
2383
- params,
2384
- });
2385
- recordSubmitTiming({
2386
- phase: 'coordinator.workflow_create',
2387
- ms: Date.now() - createStartedAt,
2388
- graphHash: params.graphHash ?? null,
2389
- extra: { instanceId: instance.id, pooled: false },
2390
- });
2391
- } else {
2392
- recordSubmitTiming({
2393
- phase: 'coordinator.workflow_create',
2394
- ms: 0,
2395
- graphHash: params.graphHash ?? null,
2396
- extra: { instanceId: instance.id, pooled: true },
2397
- });
2398
- }
2399
2710
  recordSubmitTiming({
2400
2711
  phase: 'coordinator.dispatch_workflow',
2401
2712
  ms: Date.now() - dispatchStartedAt,
2402
2713
  graphHash: params.graphHash ?? null,
2403
- extra: { pooled: usedWorkflowPool },
2714
+ extra: { startMode: 'direct_workflow_create' },
2715
+ });
2716
+ const initialWaitMsRaw = Number(
2717
+ new URL(request.url).searchParams.get('initialWaitMs') ?? '0',
2718
+ );
2719
+ const initialStateStartedAt = Date.now();
2720
+ const instanceState = await waitForSubmitInitialState({
2721
+ instance,
2722
+ runId: submittedRunId,
2723
+ waitMs: Number.isFinite(initialWaitMsRaw) ? initialWaitMsRaw : 0,
2724
+ });
2725
+ recordSubmitTiming({
2726
+ phase: 'coordinator.submit_initial_state',
2727
+ ms: Date.now() - initialStateStartedAt,
2728
+ graphHash: params.graphHash ?? null,
2729
+ extra: {
2730
+ waitMs: Number.isFinite(initialWaitMsRaw) ? initialWaitMsRaw : 0,
2731
+ status:
2732
+ typeof instanceState?.status === 'string'
2733
+ ? instanceState.status
2734
+ : null,
2735
+ },
2404
2736
  });
2405
2737
  const totalMs = Date.now() - submitStartedAt;
2406
2738
  recordSubmitTiming({
@@ -2416,11 +2748,11 @@ async function handleWorkflowRoute(input: {
2416
2748
  return Response.json({
2417
2749
  runId,
2418
2750
  status: 'submitted',
2419
- instanceState: null,
2751
+ workflowInstanceId: instance.id,
2752
+ instanceState,
2420
2753
  coordinatorTimings,
2421
2754
  });
2422
2755
  } finally {
2423
- input.ctx?.waitUntil(refillWorkflowPool(env).catch(() => undefined));
2424
2756
  disposeRpcStub(instance);
2425
2757
  }
2426
2758
  }
@@ -2650,12 +2982,84 @@ async function handleWorkflowRoute(input: {
2650
2982
  }
2651
2983
  }
2652
2984
 
2985
+ if (action === 'tail') {
2986
+ const url = new URL(request.url);
2987
+ const waitMs = Math.min(
2988
+ Math.max(Number(url.searchParams.get('waitMs') ?? '0'), 0),
2989
+ 30_000,
2990
+ );
2991
+ const afterSeq = Math.max(
2992
+ 0,
2993
+ Math.floor(Number(url.searchParams.get('afterSeq') ?? '0')),
2994
+ );
2995
+ const includeTrace = url.searchParams.get('trace') === '1';
2996
+ const statusStartedAt = Date.now();
2997
+ const eventResult = await listCoordinatorRunEvents({
2998
+ env,
2999
+ runId,
3000
+ afterSeq,
3001
+ timeoutMs: waitMs,
3002
+ }).catch(() => null);
3003
+ const coordinatorTrace =
3004
+ includeTrace && eventResult?.events.length
3005
+ ? await listCoordinatorPerfTrace(env, runId).catch(() => [])
3006
+ : [];
3007
+ const terminalEvent = eventResult?.events.find(
3008
+ (event): event is Extract<CoordinatorRunEvent, { type: 'terminal' }> =>
3009
+ event.type === 'terminal',
3010
+ );
3011
+ if (terminalEvent) {
3012
+ return Response.json({
3013
+ runId,
3014
+ ...(terminalEvent.playName ? { playName: terminalEvent.playName } : {}),
3015
+ status: terminalEvent.status,
3016
+ result: terminalEvent.result ?? null,
3017
+ error: terminalEvent.error ?? null,
3018
+ totalRows: terminalEvent.totalRows ?? null,
3019
+ durationMs: terminalEvent.durationMs ?? null,
3020
+ events: eventResult?.events ?? [],
3021
+ latestSeq: eventResult?.latestSeq ?? afterSeq,
3022
+ wait: null,
3023
+ coordinatorObserve: {
3024
+ ms: Date.now() - statusStartedAt,
3025
+ waitMs,
3026
+ workflowStatus: 'terminal-event',
3027
+ statusPolls: 0,
3028
+ instanceId: null,
3029
+ },
3030
+ ...(includeTrace ? { coordinatorTrace } : {}),
3031
+ });
3032
+ }
3033
+ return Response.json({
3034
+ runId,
3035
+ status: 'running',
3036
+ events: eventResult?.events ?? [],
3037
+ latestSeq: eventResult?.latestSeq ?? afterSeq,
3038
+ wait: null,
3039
+ coordinatorObserve: {
3040
+ ms: Date.now() - statusStartedAt,
3041
+ waitMs,
3042
+ workflowStatus:
3043
+ eventResult?.events.length ? 'event' : 'event-timeout',
3044
+ statusPolls: 0,
3045
+ instanceId: null,
3046
+ },
3047
+ ...(includeTrace ? { coordinatorTrace } : {}),
3048
+ });
3049
+ }
3050
+
2653
3051
  // get() throws if the instance doesn't exist (Workflows local-mode wipes
2654
3052
  // state on wrangler dev reload, and superseded `--force` runs may target
2655
3053
  // an instance that was never created). Treat that as a no-op cancel.
2656
3054
  let instance: WorkflowInstance | null = null;
2657
3055
  try {
2658
- const instanceId = await resolveWorkflowInstanceIdForRun(env, runId);
3056
+ const requestedInstanceId = new URL(request.url).searchParams
3057
+ .get('instanceId')
3058
+ ?.trim();
3059
+ const instanceId =
3060
+ requestedInstanceId && !isWorkflowMutatingAction(action)
3061
+ ? requestedInstanceId
3062
+ : await resolveWorkflowInstanceIdForRun(env, runId);
2659
3063
  instance = await env.PLAY_WORKFLOW.get(instanceId);
2660
3064
  } catch (error) {
2661
3065
  const message = error instanceof Error ? error.message : String(error);
@@ -2723,42 +3127,38 @@ async function handleWorkflowRoute(input: {
2723
3127
  });
2724
3128
  }
2725
3129
  if (
2726
- action === 'result' ||
2727
- action === 'status' ||
2728
- action === 'observe' ||
2729
- action === ''
3130
+ action === 'result' || action === 'status' || action === ''
2730
3131
  ) {
2731
- const observeWaitMs =
2732
- action === 'observe'
2733
- ? Math.min(
2734
- Math.max(
2735
- Number(new URL(request.url).searchParams.get('waitMs') ?? '0'),
2736
- 0,
2737
- ),
2738
- 2_000,
2739
- )
2740
- : 0;
2741
3132
  const includeTrace =
2742
3133
  new URL(request.url).searchParams.get('trace') === '1';
2743
3134
  const statusStartedAt = Date.now();
2744
- let status = await instance.status();
2745
- let statusPolls = 1;
2746
- while (
2747
- observeWaitMs > 0 &&
2748
- Date.now() - statusStartedAt < observeWaitMs
2749
- ) {
2750
- const result = mapWorkflowResult(runId, status);
2751
- if (
2752
- result.status === 'completed' ||
2753
- result.status === 'failed' ||
2754
- result.status === 'cancelled'
2755
- ) {
2756
- break;
2757
- }
2758
- await new Promise((resolve) => setTimeout(resolve, 75));
2759
- status = await instance.status();
2760
- statusPolls += 1;
3135
+ const terminalState = await readCoordinatorTerminalState(env, runId).catch(
3136
+ () => null,
3137
+ );
3138
+ if (terminalState) {
3139
+ const coordinatorTrace = includeTrace
3140
+ ? await listCoordinatorPerfTrace(env, runId).catch(() => [])
3141
+ : [];
3142
+ return Response.json({
3143
+ runId,
3144
+ ...(terminalState.playName ? { playName: terminalState.playName } : {}),
3145
+ status: terminalState.status,
3146
+ result: terminalState.result ?? null,
3147
+ error: terminalState.error ?? null,
3148
+ totalRows: terminalState.totalRows ?? null,
3149
+ durationMs: terminalState.durationMs ?? null,
3150
+ wait: null,
3151
+ coordinatorObserve: {
3152
+ ms: Date.now() - statusStartedAt,
3153
+ waitMs: 0,
3154
+ workflowStatus: 'terminal-cache',
3155
+ statusPolls: 0,
3156
+ instanceId: instance.id,
3157
+ },
3158
+ ...(includeTrace ? { coordinatorTrace } : {}),
3159
+ });
2761
3160
  }
3161
+ const status = await instance.status();
2762
3162
  const result = mapWorkflowResult(runId, status);
2763
3163
  const observeMs = Date.now() - statusStartedAt;
2764
3164
  // If we forced a permanent-error fail-fast (status='failed' even
@@ -2797,9 +3197,9 @@ async function handleWorkflowRoute(input: {
2797
3197
  ...result,
2798
3198
  coordinatorObserve: {
2799
3199
  ms: observeMs,
2800
- waitMs: observeWaitMs,
3200
+ waitMs: 0,
2801
3201
  workflowStatus: status.status,
2802
- statusPolls,
3202
+ statusPolls: 1,
2803
3203
  instanceId: instance.id,
2804
3204
  },
2805
3205
  ...(includeTrace ? { coordinatorTrace } : {}),
@@ -2828,6 +3228,54 @@ function stableHash(value: string): string {
2828
3228
  return (hash >>> 0).toString(36);
2829
3229
  }
2830
3230
 
3231
+ const DYNAMIC_PLAY_WORKER_HARNESS_VERSION =
3232
+ 'h6-runtime-api-coordinator-deploy-scoped';
3233
+ const DYNAMIC_WORKER_BUNDLED_CODE_CACHE_MAX_ENTRIES = 64;
3234
+ const dynamicWorkerBundledCodeCache = new Map<string, string>();
3235
+
3236
+ function dynamicPlayWorkerCacheKey(input: {
3237
+ env: CoordinatorEnv;
3238
+ graphHash: string;
3239
+ artifactIdentity: string;
3240
+ }): string {
3241
+ const deployMarker =
3242
+ input.env.DEEPLINE_COORDINATOR_DEPLOY_MARKER?.trim() || 'local';
3243
+ return [
3244
+ 'play',
3245
+ input.graphHash,
3246
+ input.artifactIdentity,
3247
+ `harness=${DYNAMIC_PLAY_WORKER_HARNESS_VERSION}`,
3248
+ `deploy=${deployMarker}`,
3249
+ ].join(':');
3250
+ }
3251
+
3252
+ function dynamicWorkerBundledCodeCacheKey(input: {
3253
+ artifactStorageKey: string;
3254
+ artifactHash?: string | null;
3255
+ }): string {
3256
+ return `${input.artifactHash?.trim() || 'no-hash'}:${input.artifactStorageKey}`;
3257
+ }
3258
+
3259
+ function readDynamicWorkerBundledCodeCache(key: string): string | null {
3260
+ const cached = dynamicWorkerBundledCodeCache.get(key);
3261
+ if (cached === undefined) return null;
3262
+ dynamicWorkerBundledCodeCache.delete(key);
3263
+ dynamicWorkerBundledCodeCache.set(key, cached);
3264
+ return cached;
3265
+ }
3266
+
3267
+ function writeDynamicWorkerBundledCodeCache(key: string, code: string): void {
3268
+ dynamicWorkerBundledCodeCache.set(key, code);
3269
+ while (
3270
+ dynamicWorkerBundledCodeCache.size >
3271
+ DYNAMIC_WORKER_BUNDLED_CODE_CACHE_MAX_ENTRIES
3272
+ ) {
3273
+ const oldestKey = dynamicWorkerBundledCodeCache.keys().next().value;
3274
+ if (typeof oldestKey !== 'string') break;
3275
+ dynamicWorkerBundledCodeCache.delete(oldestKey);
3276
+ }
3277
+ }
3278
+
2831
3279
  /**
2832
3280
  * Synchronous wrapper around env.LOADER.get for use inside the
2833
3281
  * createDynamicWorkflowEntrypoint loader callback. The framework's loader
@@ -2866,7 +3314,11 @@ function loadDynamicPlayWorkerSync(
2866
3314
  }
2867
3315
  const artifactIdentity =
2868
3316
  metadata.artifactHash?.trim() || stableHash(artifactStorageKey);
2869
- const workerCacheKey = `play:${graphHash}:${artifactIdentity}:harness=h5-runtime-api-coordinator`;
3317
+ const workerCacheKey = dynamicPlayWorkerCacheKey({
3318
+ env,
3319
+ graphHash,
3320
+ artifactIdentity,
3321
+ });
2870
3322
  const runIdForTrace = metadata.runId ?? graphHash;
2871
3323
  const loaderGetStartedAt = Date.now();
2872
3324
  const stub = env.LOADER.get(workerCacheKey, async () => {
@@ -2952,7 +3404,11 @@ async function loadDynamicPlayWorker(
2952
3404
  }
2953
3405
  const artifactIdentity =
2954
3406
  metadata.artifactHash?.trim() || stableHash(artifactStorageKey);
2955
- const workerCacheKey = `play:${graphHash}:${artifactIdentity}:harness=h5-runtime-api-coordinator`;
3407
+ const workerCacheKey = dynamicPlayWorkerCacheKey({
3408
+ env,
3409
+ graphHash,
3410
+ artifactIdentity,
3411
+ });
2956
3412
  const runIdForTrace = metadata.runId ?? graphHash;
2957
3413
  const loaderGetStartedAt = Date.now();
2958
3414
  const stub = env.LOADER.get(workerCacheKey, async () => {
@@ -3002,31 +3458,42 @@ async function loadDynamicWorkerBundledCode(input: {
3002
3458
  trace: CoordinatorPerfTraceSink;
3003
3459
  }): Promise<string> {
3004
3460
  const callbackStartedAt = Date.now();
3005
- let codeSource: 'inline' | 'r2' = 'inline';
3461
+ let codeSource: 'inline' | 'r2' | 'memory' = 'inline';
3006
3462
  let r2Ms = 0;
3007
- const artifact = input.metadata.dynamicWorkerCode
3008
- ? null
3009
- : await (async () => {
3010
- codeSource = 'r2';
3011
- const r2StartedAt = Date.now();
3012
- try {
3013
- return await loadStoredPlayArtifactFromR2(
3014
- input.env,
3015
- input.artifactStorageKey,
3016
- );
3017
- } finally {
3018
- r2Ms = Date.now() - r2StartedAt;
3019
- input.trace({
3020
- runId: input.runIdForTrace,
3021
- phase: 'coordinator.loader_code_r2_get',
3022
- ms: r2Ms,
3023
- graphHash: input.graphHash,
3024
- extra: { artifactStorageKey: input.artifactStorageKey },
3025
- });
3026
- }
3027
- })();
3028
- const bundledCode =
3029
- input.metadata.dynamicWorkerCode ?? artifact?.artifact?.bundledCode;
3463
+ const codeCacheKey = dynamicWorkerBundledCodeCacheKey({
3464
+ artifactStorageKey: input.artifactStorageKey,
3465
+ artifactHash: input.metadata.artifactHash,
3466
+ });
3467
+ let bundledCode = input.metadata.dynamicWorkerCode ?? null;
3468
+ if (!bundledCode) {
3469
+ bundledCode = readDynamicWorkerBundledCodeCache(codeCacheKey);
3470
+ if (bundledCode) {
3471
+ codeSource = 'memory';
3472
+ }
3473
+ }
3474
+ if (!bundledCode) {
3475
+ codeSource = 'r2';
3476
+ const r2StartedAt = Date.now();
3477
+ try {
3478
+ const artifact = await loadStoredPlayArtifactFromR2(
3479
+ input.env,
3480
+ input.artifactStorageKey,
3481
+ );
3482
+ bundledCode = artifact?.artifact?.bundledCode ?? null;
3483
+ if (typeof bundledCode === 'string' && bundledCode.length > 0) {
3484
+ writeDynamicWorkerBundledCodeCache(codeCacheKey, bundledCode);
3485
+ }
3486
+ } finally {
3487
+ r2Ms = Date.now() - r2StartedAt;
3488
+ input.trace({
3489
+ runId: input.runIdForTrace,
3490
+ phase: 'coordinator.loader_code_r2_get',
3491
+ ms: r2Ms,
3492
+ graphHash: input.graphHash,
3493
+ extra: { artifactStorageKey: input.artifactStorageKey },
3494
+ });
3495
+ }
3496
+ }
3030
3497
  if (typeof bundledCode !== 'string' || bundledCode.length === 0) {
3031
3498
  throw new Error(
3032
3499
  `Stored play artifact ${input.artifactStorageKey} does not contain bundledCode.`,
@@ -3118,6 +3585,192 @@ export default {
3118
3585
  };
3119
3586
  `;
3120
3587
 
3588
+ function isAllowedStagedFileStorageKey(key: string): boolean {
3589
+ if (!key || key.length > 2_048) return false;
3590
+ if (key.startsWith('/') || key.includes('..') || key.includes('\\')) {
3591
+ return false;
3592
+ }
3593
+ return key.startsWith('plays/v2/orgs/') || key.includes('/plays/v2/orgs/');
3594
+ }
3595
+
3596
+ function decodeBase64ToUint8Array(value: string): Uint8Array {
3597
+ const binary = atob(value);
3598
+ const bytes = new Uint8Array(binary.length);
3599
+ for (let index = 0; index < binary.length; index += 1) {
3600
+ bytes[index] = binary.charCodeAt(index);
3601
+ }
3602
+ return bytes;
3603
+ }
3604
+
3605
+ async function handleStagedFilePut(
3606
+ request: Request,
3607
+ env: CoordinatorEnv,
3608
+ ): Promise<Response> {
3609
+ if (request.method !== 'POST') {
3610
+ return new Response('method not allowed', { status: 405 });
3611
+ }
3612
+ const url = new URL(request.url);
3613
+ const rawKey = url.searchParams.get('key')?.trim() ?? '';
3614
+ if (rawKey) {
3615
+ const key = rawKey;
3616
+ const contentType =
3617
+ url.searchParams.get('contentType')?.trim() ||
3618
+ 'application/octet-stream';
3619
+ const expectedBytes = Number(url.searchParams.get('bytes') ?? 'NaN');
3620
+ if (!isAllowedStagedFileStorageKey(key)) {
3621
+ return Response.json(
3622
+ { error: 'invalid staged file key' },
3623
+ { status: 400 },
3624
+ );
3625
+ }
3626
+ if (!Number.isSafeInteger(expectedBytes) || expectedBytes < 0) {
3627
+ return Response.json({ error: 'bytes is required' }, { status: 400 });
3628
+ }
3629
+ const existing = await headExistingStagedFile(env, key, expectedBytes);
3630
+ if (existing.exists) {
3631
+ console.info('[perf][coordinator.staged_file_put]', {
3632
+ key,
3633
+ bytes: expectedBytes,
3634
+ headMs: existing.ms,
3635
+ putMs: 0,
3636
+ ms: existing.ms,
3637
+ transport: 'raw',
3638
+ skipped: true,
3639
+ });
3640
+ return Response.json({
3641
+ ok: true,
3642
+ key,
3643
+ bytes: expectedBytes,
3644
+ existed: true,
3645
+ timingsMs: { head: existing.ms, put: 0 },
3646
+ });
3647
+ }
3648
+ const readStartedAt = Date.now();
3649
+ const bytes = new Uint8Array(await request.arrayBuffer());
3650
+ const readMs = Date.now() - readStartedAt;
3651
+ if (bytes.byteLength !== expectedBytes) {
3652
+ return Response.json(
3653
+ { error: 'staged file byte length mismatch' },
3654
+ { status: 400 },
3655
+ );
3656
+ }
3657
+ const putStartedAt = Date.now();
3658
+ await env.PLAYS_BUCKET.put(key, bytes, {
3659
+ httpMetadata: { contentType },
3660
+ });
3661
+ const putMs = Date.now() - putStartedAt;
3662
+ console.info('[perf][coordinator.staged_file_put]', {
3663
+ key,
3664
+ bytes: bytes.byteLength,
3665
+ readMs,
3666
+ putMs,
3667
+ ms: readMs + putMs,
3668
+ transport: 'raw',
3669
+ });
3670
+ return Response.json({
3671
+ ok: true,
3672
+ key,
3673
+ bytes: bytes.byteLength,
3674
+ timingsMs: { read: readMs, put: putMs },
3675
+ });
3676
+ }
3677
+ const body = (await request.json().catch(() => null)) as
3678
+ | {
3679
+ key?: unknown;
3680
+ contentBase64?: unknown;
3681
+ contentType?: unknown;
3682
+ bytes?: unknown;
3683
+ }
3684
+ | null;
3685
+ const key = typeof body?.key === 'string' ? body.key.trim() : '';
3686
+ const contentBase64 =
3687
+ typeof body?.contentBase64 === 'string' ? body.contentBase64 : '';
3688
+ const contentType =
3689
+ typeof body?.contentType === 'string' && body.contentType.trim()
3690
+ ? body.contentType.trim()
3691
+ : 'application/octet-stream';
3692
+ const expectedBytes = typeof body?.bytes === 'number' ? body.bytes : NaN;
3693
+ if (!isAllowedStagedFileStorageKey(key)) {
3694
+ return Response.json(
3695
+ { error: 'invalid staged file key' },
3696
+ { status: 400 },
3697
+ );
3698
+ }
3699
+ if (
3700
+ !contentBase64 ||
3701
+ !Number.isSafeInteger(expectedBytes) ||
3702
+ expectedBytes < 0
3703
+ ) {
3704
+ return Response.json(
3705
+ { error: 'contentBase64 and bytes are required' },
3706
+ { status: 400 },
3707
+ );
3708
+ }
3709
+ const existing = await headExistingStagedFile(env, key, expectedBytes);
3710
+ if (existing.exists) {
3711
+ console.info('[perf][coordinator.staged_file_put]', {
3712
+ key,
3713
+ bytes: expectedBytes,
3714
+ headMs: existing.ms,
3715
+ putMs: 0,
3716
+ ms: existing.ms,
3717
+ transport: 'base64',
3718
+ skipped: true,
3719
+ });
3720
+ return Response.json({
3721
+ ok: true,
3722
+ key,
3723
+ bytes: expectedBytes,
3724
+ existed: true,
3725
+ timingsMs: { head: existing.ms, put: 0 },
3726
+ });
3727
+ }
3728
+ const decodeStartedAt = Date.now();
3729
+ const bytes = decodeBase64ToUint8Array(contentBase64);
3730
+ const decodeMs = Date.now() - decodeStartedAt;
3731
+ if (bytes.byteLength !== expectedBytes) {
3732
+ return Response.json(
3733
+ { error: 'staged file byte length mismatch' },
3734
+ { status: 400 },
3735
+ );
3736
+ }
3737
+ const putStartedAt = Date.now();
3738
+ await env.PLAYS_BUCKET.put(key, bytes, {
3739
+ httpMetadata: { contentType },
3740
+ });
3741
+ const putMs = Date.now() - putStartedAt;
3742
+ console.info('[perf][coordinator.staged_file_put]', {
3743
+ key,
3744
+ bytes: bytes.byteLength,
3745
+ decodeMs,
3746
+ putMs,
3747
+ ms: decodeMs + putMs,
3748
+ });
3749
+ return Response.json({
3750
+ ok: true,
3751
+ key,
3752
+ bytes: bytes.byteLength,
3753
+ timingsMs: { decode: decodeMs, put: putMs },
3754
+ });
3755
+ }
3756
+
3757
+ async function headExistingStagedFile(
3758
+ env: CoordinatorEnv,
3759
+ key: string,
3760
+ expectedBytes: number,
3761
+ ): Promise<{ exists: boolean; ms: number }> {
3762
+ const startedAt = Date.now();
3763
+ const object = await env.PLAYS_BUCKET.head(key).catch(() => null);
3764
+ const ms = Date.now() - startedAt;
3765
+ if (!object) {
3766
+ return { exists: false, ms };
3767
+ }
3768
+ if (typeof object.size === 'number' && object.size !== expectedBytes) {
3769
+ return { exists: false, ms };
3770
+ }
3771
+ return { exists: true, ms };
3772
+ }
3773
+
3121
3774
  async function handleCoordinatorWarmup(
3122
3775
  request: Request,
3123
3776
  env: CoordinatorEnv,
@@ -3329,6 +3982,10 @@ function makeCoordinatorControlBinding():
3329
3982
  runId: string,
3330
3983
  payload: CoordinatorPerfTracePayload,
3331
3984
  ): Promise<void>;
3985
+ recordRunEvent(
3986
+ runId: string,
3987
+ event: CoordinatorRunEvent,
3988
+ ): Promise<void>;
3332
3989
  };
3333
3990
  }
3334
3991
  | Record<string, never> {
@@ -3346,6 +4003,10 @@ function makeCoordinatorControlBinding():
3346
4003
  runId: string,
3347
4004
  payload: CoordinatorPerfTracePayload,
3348
4005
  ): Promise<void>;
4006
+ recordRunEvent(
4007
+ runId: string,
4008
+ event: CoordinatorRunEvent,
4009
+ ): Promise<void>;
3349
4010
  };
3350
4011
  };
3351
4012
  const ctor = exports.CoordinatorControl;