deepline 0.1.78 → 0.1.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/cli/index.js +69 -37
  2. package/dist/cli/index.mjs +69 -37
  3. package/dist/index.d.mts +32 -1
  4. package/dist/index.d.ts +32 -1
  5. package/dist/index.js +7 -4
  6. package/dist/index.mjs +7 -4
  7. package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
  8. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1320 -1644
  9. package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +515 -648
  10. package/dist/repo/apps/play-runner-workers/src/entry.ts +896 -354
  11. package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +209 -0
  12. package/dist/repo/sdk/src/client.ts +9 -2
  13. package/dist/repo/sdk/src/release.ts +2 -2
  14. package/dist/repo/sdk/src/types.ts +5 -0
  15. package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
  16. package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
  17. package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
  18. package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
  19. package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
  20. package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
  21. package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
  22. package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
  23. package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
  24. package/dist/repo/shared_libs/plays/static-pipeline.ts +314 -1
  25. package/dist/repo/shared_libs/temporal/constants.ts +38 -0
  26. package/package.json +1 -1
@@ -53,6 +53,14 @@ import {
53
53
  decideWorkflowPlatformRetry,
54
54
  PLATFORM_DEPLOY_WORKFLOW_RETRY_LIMIT,
55
55
  } from './workflow-retry';
56
+ import {
57
+ WORKFLOW_RETRY_PARAMS_EXTERNALIZE_AFTER_BYTES,
58
+ WORKFLOW_RETRY_PARAMS_MAX_BYTES,
59
+ buildWorkflowRetryParams,
60
+ jsonByteLength,
61
+ workflowRetryParamsStorageKey,
62
+ type WorkflowRetryParamsRef,
63
+ } from './workflow-retry-state';
56
64
  import { sanitizeLiveLogLines } from './runtime/live-progress';
57
65
 
58
66
  export { DynamicWorkflowBinding };
@@ -104,6 +112,7 @@ export type PlayWorkflowParams = {
104
112
  totalRows?: number;
105
113
  coordinatorUrl?: string | null;
106
114
  coordinatorInternalToken?: string | null;
115
+ submittedAt?: number | null;
107
116
  };
108
117
 
109
118
  type InlineChildSubmitResult =
@@ -167,6 +176,12 @@ type CoordinatorTerminalState = {
167
176
  completedAt?: number;
168
177
  };
169
178
 
179
+ type CoordinatorChildTerminalState = {
180
+ eventKey: string;
181
+ data: unknown;
182
+ storedAt: number;
183
+ };
184
+
170
185
  type CoordinatorRunEvent =
171
186
  | {
172
187
  seq?: number;
@@ -216,6 +231,11 @@ type InlineWorkerRunResponse = {
216
231
  outputRows?: number;
217
232
  durationMs?: number;
218
233
  parseMs?: number;
234
+ timings?: Array<{
235
+ phase?: unknown;
236
+ ms?: unknown;
237
+ extra?: unknown;
238
+ }>;
219
239
  events?: Array<
220
240
  | { type: 'log'; message?: string; level?: string; ts?: number }
221
241
  | { type: 'result'; result?: unknown; outputRows?: number; ts?: number }
@@ -230,7 +250,7 @@ function isRecord(value: unknown): value is Record<string, unknown> {
230
250
  }
231
251
 
232
252
  interface CoordinatorEnv {
233
- PLAY_WORKFLOW: Workflow<PlayWorkflowParams | PooledWorkflowBootstrapPayload>;
253
+ PLAY_WORKFLOW: Workflow<PlayWorkflowParams>;
234
254
  PLAY_DEDUP: DurableObjectNamespace;
235
255
  LOADER?: {
236
256
  get(
@@ -523,6 +543,67 @@ async function readCoordinatorTerminalState(
523
543
  return state as CoordinatorTerminalState;
524
544
  }
525
545
 
546
+ async function writeCoordinatorChildTerminalState(input: {
547
+ env: CoordinatorEnv;
548
+ parentRunId: string;
549
+ eventKey: string;
550
+ data: unknown;
551
+ }): Promise<void> {
552
+ const stub = input.env.PLAY_DEDUP.get(
553
+ input.env.PLAY_DEDUP.idFromName(input.parentRunId),
554
+ );
555
+ const response = await stub.fetch(
556
+ 'https://deepline.dedup.internal/child-terminal-set',
557
+ {
558
+ method: 'POST',
559
+ headers: { 'content-type': 'application/json' },
560
+ body: JSON.stringify({
561
+ eventKey: input.eventKey,
562
+ data: input.data,
563
+ storedAt: Date.now(),
564
+ }),
565
+ },
566
+ );
567
+ if (!response.ok) {
568
+ throw new Error(`coordinator child terminal set failed ${response.status}`);
569
+ }
570
+ }
571
+
572
+ async function readCoordinatorChildTerminalState(input: {
573
+ env: CoordinatorEnv;
574
+ parentRunId: string;
575
+ eventKey: string;
576
+ timeoutMs?: number;
577
+ }): Promise<CoordinatorChildTerminalState | null> {
578
+ const stub = input.env.PLAY_DEDUP.get(
579
+ input.env.PLAY_DEDUP.idFromName(input.parentRunId),
580
+ );
581
+ const endpoint =
582
+ input.timeoutMs && input.timeoutMs > 0
583
+ ? 'child-terminal-await'
584
+ : 'child-terminal-get';
585
+ const timeoutParam =
586
+ input.timeoutMs && input.timeoutMs > 0
587
+ ? `&timeoutMs=${encodeURIComponent(String(Math.floor(input.timeoutMs)))}`
588
+ : '';
589
+ const response = await stub.fetch(
590
+ `https://deepline.dedup.internal/${endpoint}?eventKey=${encodeURIComponent(
591
+ input.eventKey,
592
+ )}${timeoutParam}`,
593
+ );
594
+ if (!response.ok) {
595
+ throw new Error(
596
+ `coordinator child terminal ${endpoint} failed ${response.status}`,
597
+ );
598
+ }
599
+ const body = (await response.json().catch(() => ({}))) as {
600
+ state?: unknown;
601
+ };
602
+ const state = body.state;
603
+ if (!isRecord(state) || state.eventKey !== input.eventKey) return null;
604
+ return state as CoordinatorChildTerminalState;
605
+ }
606
+
526
607
  function workflowEventType(name: string): string {
527
608
  const normalized = name
528
609
  .trim()
@@ -547,36 +628,9 @@ type DynamicWorkflowMetadata = {
547
628
  }> | null;
548
629
  };
549
630
 
550
- type DispatcherEnvelope = {
551
- __dispatcherMetadata: DynamicWorkflowMetadata;
552
- params: PlayWorkflowParams;
553
- };
554
-
555
- type PooledWorkflowBootstrapPayload = {
556
- __deeplinePooledWorkflow: true;
557
- poolId: string;
558
- createdAt: number;
559
- };
560
-
561
- const WORKFLOW_POOL_PROTOCOL_VERSION =
562
- 'pooled-workflow-wait-v14-ready-signal-http-storage';
563
- const WORKFLOW_POOL_DO_NAME = 'workflow-pool:v2';
564
- const WORKFLOW_POOL_START_EVENT_TYPE = 'play_start';
565
- const WORKFLOW_POOL_TTL_MS = 8 * 60 * 1000;
566
- const WORKFLOW_POOL_TARGET_SIZE = 0;
567
- const WORKFLOW_POOL_READY_TIMEOUT_MS = 1_500;
568
- const WORKFLOW_POOL_READY_POLL_MS = 250;
569
- const WORKFLOW_POOL_REFILL_ON_MISS_TIMEOUT_MS = 2_500;
570
- const WORKFLOW_POOL_REFILL_ON_MISS_MIN_AVAILABLE = 4;
571
- const WORKFLOW_POOL_CONTROL_TIMEOUT_MS = 750;
572
- const WORKFLOW_POOL_START_ACK_TIMEOUT_MS = 750;
573
- const WORKFLOW_POOL_START_ACK_POLL_MS = 25;
574
631
  const SUBMIT_INITIAL_STATE_MAX_WAIT_MS = 0;
575
632
  const SUBMIT_INITIAL_STATE_POLL_MS = 50;
576
633
  const WORKFLOW_RETRY_STATE_TTL_MS = 60 * 60 * 1000;
577
- const WORKFLOW_POOL_PREWARM_ESCALATE_TARGET_AFTER_MS = 250;
578
- const WORKFLOW_POOL_SCHEDULED_REFILL_MIN_AVAILABLE = 1;
579
- const WORKFLOW_POOL_SCHEDULED_REFILL_TIMEOUT_MS = 10_000;
580
634
 
581
635
  function buildDynamicWorkflowMetadata(
582
636
  params: PlayWorkflowParams,
@@ -591,35 +645,11 @@ function buildDynamicWorkflowMetadata(
591
645
  };
592
646
  }
593
647
 
594
- function buildDispatcherEnvelope(
595
- params: PlayWorkflowParams,
596
- ): DispatcherEnvelope {
597
- // Mirrors @cloudflare/dynamic-workflows' envelope. We need to send the
598
- // dispatcher payload via Workflow sendEvent for prewarmed instances; the
599
- // public wrapper only applies this envelope to create() params.
600
- return {
601
- __dispatcherMetadata: buildDynamicWorkflowMetadata(params),
602
- params,
603
- };
604
- }
605
-
606
- function isPooledWorkflowBootstrapPayload(
607
- value: unknown,
608
- ): value is PooledWorkflowBootstrapPayload {
609
- return (
610
- Boolean(value) &&
611
- typeof value === 'object' &&
612
- !Array.isArray(value) &&
613
- (value as Record<string, unknown>).__deeplinePooledWorkflow === true &&
614
- typeof (value as Record<string, unknown>).poolId === 'string'
615
- );
616
- }
617
-
618
648
  function readWorkflowTraceContext(event: unknown): {
619
649
  runId: string;
620
650
  graphHash: string | null;
621
651
  instanceId: string | null;
622
- pooledBootstrap: boolean;
652
+ submittedAt: number | null;
623
653
  } {
624
654
  const record = isRecord(event) ? event : {};
625
655
  const payload = isRecord(record.payload) ? record.payload : {};
@@ -627,17 +657,15 @@ function readWorkflowTraceContext(event: unknown): {
627
657
  const metadata = isRecord(payload.__dispatcherMetadata)
628
658
  ? payload.__dispatcherMetadata
629
659
  : null;
630
- const pooled = isPooledWorkflowBootstrapPayload(payload);
631
660
  const runId =
632
661
  (typeof params?.runId === 'string' && params.runId) ||
633
662
  (typeof metadata?.runId === 'string' && metadata.runId) ||
634
- (pooled && typeof payload.poolId === 'string' ? payload.poolId : null) ||
635
663
  (typeof record.instanceId === 'string' && record.instanceId) ||
636
664
  'unknown-workflow-run';
637
665
  const graphHash =
638
666
  (typeof params?.graphHash === 'string' && params.graphHash) ||
639
667
  (typeof metadata?.graphHash === 'string' && metadata.graphHash) ||
640
- (pooled ? 'workflow-pool' : null);
668
+ null;
641
669
  return {
642
670
  runId,
643
671
  graphHash,
@@ -645,18 +673,14 @@ function readWorkflowTraceContext(event: unknown): {
645
673
  typeof record.instanceId === 'string' && record.instanceId
646
674
  ? record.instanceId
647
675
  : null,
648
- pooledBootstrap: pooled,
676
+ submittedAt:
677
+ typeof params?.submittedAt === 'number' &&
678
+ Number.isFinite(params.submittedAt)
679
+ ? params.submittedAt
680
+ : null,
649
681
  };
650
682
  }
651
683
 
652
- function workflowPoolEnabled(): boolean {
653
- return WORKFLOW_POOL_TARGET_SIZE > 0;
654
- }
655
-
656
- function workflowPoolTargetSize(): number {
657
- return WORKFLOW_POOL_TARGET_SIZE;
658
- }
659
-
660
684
  async function waitForSubmitInitialState(input: {
661
685
  instance: WorkflowInstance;
662
686
  runId: string;
@@ -704,87 +728,110 @@ async function createDynamicWorkflowInstance(input: {
704
728
  });
705
729
  }
706
730
 
731
+ function runScopedDurableObject(
732
+ env: CoordinatorEnv,
733
+ runId: string,
734
+ ): DurableObjectStub {
735
+ return env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(runId));
736
+ }
737
+
707
738
  /**
708
- * Returns the slug-rooted Durable Scope key for the workflow pool DO.
709
- *
710
- * The pool DO name is a *shared* (non-runId) key, so without slug-scoping
711
- * a single Cloudflare account running multiple PR previews would collide
712
- * on `workflow-pool:v2`. The slug isolates the pool per-preview.
713
- *
714
- * See docs/adr/0005-durable-scope.md.
739
+ * Address the rate-state Durable Object for a single `<orgId>:<provider>`
740
+ * bucket. Keying the DO by the bucket id (not the run id) makes one
741
+ * single-threaded instance own that bucket's request window across every
742
+ * isolate of every run in the org which is exactly what the distributed Rate
743
+ * State Backend needs. Reuses the PlayDedup namespace (the DO already hosts the
744
+ * token-bucket handlers) so no extra binding is required.
715
745
  */
716
- function workflowPoolDurableObjectName(env: CoordinatorEnv): string {
717
- const slug = env.DEEPLINE_PLAY_PREVIEW_SLUG?.trim();
718
- return slug ? `${slug}:${WORKFLOW_POOL_DO_NAME}` : WORKFLOW_POOL_DO_NAME;
746
+ function rateBucketDurableObject(
747
+ env: CoordinatorEnv,
748
+ bucketId: string,
749
+ ): DurableObjectStub {
750
+ return env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(`rate:${bucketId}`));
719
751
  }
720
752
 
721
- function workflowPoolDurableObject(env: CoordinatorEnv): DurableObjectStub {
722
- return env.PLAY_DEDUP.get(
723
- env.PLAY_DEDUP.idFromName(workflowPoolDurableObjectName(env)),
753
+ async function callRateBucketControl<T>(
754
+ env: CoordinatorEnv,
755
+ bucketId: string,
756
+ path: string,
757
+ body: unknown,
758
+ ): Promise<T> {
759
+ const response = await rateBucketDurableObject(env, bucketId).fetch(
760
+ `https://deepline.rate-state.internal${path}`,
761
+ {
762
+ method: 'POST',
763
+ headers: { 'content-type': 'application/json' },
764
+ body: JSON.stringify(body),
765
+ },
724
766
  );
767
+ if (!response.ok) {
768
+ throw new Error(
769
+ `rate state ${path} failed ${response.status}: ${(
770
+ await response.text().catch(() => '')
771
+ ).slice(0, 400)}`,
772
+ );
773
+ }
774
+ return (await response.json()) as T;
725
775
  }
726
776
 
727
- function runScopedDurableObject(
777
+ async function callRunScopedControl<T>(
728
778
  env: CoordinatorEnv,
729
779
  runId: string,
730
- ): DurableObjectStub {
731
- return env.PLAY_DEDUP.get(env.PLAY_DEDUP.idFromName(runId));
732
- }
733
-
734
- async function callWorkflowPool<T>(
735
- env: CoordinatorEnv,
736
780
  path: string,
737
- init?: RequestInit & { timeoutMs?: number },
781
+ init?: RequestInit,
738
782
  ): Promise<T> {
739
- const timeoutMs = Math.max(
740
- 1,
741
- Math.floor(init?.timeoutMs ?? WORKFLOW_POOL_CONTROL_TIMEOUT_MS),
783
+ const response = await runScopedDurableObject(env, runId).fetch(
784
+ `https://deepline.run-state.internal${path}`,
785
+ {
786
+ ...(init ?? {}),
787
+ headers: {
788
+ 'content-type': 'application/json',
789
+ ...(init?.headers ?? {}),
790
+ },
791
+ },
742
792
  );
743
- let timeoutId: ReturnType<typeof setTimeout> | null = null;
744
- try {
745
- const fetchInit: RequestInit = { ...(init ?? {}) };
746
- delete (fetchInit as { timeoutMs?: number }).timeoutMs;
747
- delete fetchInit.signal;
748
- const response = await Promise.race([
749
- workflowPoolDurableObject(env).fetch(
750
- `https://deepline.workflow-pool.internal${path}`,
751
- {
752
- ...fetchInit,
753
- headers: {
754
- 'content-type': 'application/json',
755
- ...(init?.headers ?? {}),
756
- },
757
- },
758
- ),
759
- new Promise<Response>((_, reject) => {
760
- timeoutId = setTimeout(
761
- () =>
762
- reject(
763
- new Error(`workflow pool ${path} timed out after ${timeoutMs}ms`),
764
- ),
765
- timeoutMs,
766
- );
767
- }),
768
- ]);
769
- if (!response.ok) {
770
- throw new Error(
771
- `workflow pool ${path} failed ${response.status}: ${(
772
- await response.text().catch(() => '')
773
- ).slice(0, 400)}`,
774
- );
775
- }
776
- return (await response.json()) as T;
777
- } catch (error) {
778
- if (
779
- error instanceof Error &&
780
- (error.name === 'AbortError' || error.message.includes('aborted'))
781
- ) {
782
- throw new Error(`workflow pool ${path} timed out after ${timeoutMs}ms`);
783
- }
784
- throw error;
785
- } finally {
786
- if (timeoutId) clearTimeout(timeoutId);
793
+ if (!response.ok) {
794
+ throw new Error(
795
+ `run state ${path} failed ${response.status}: ${(
796
+ await response.text().catch(() => '')
797
+ ).slice(0, 400)}`,
798
+ );
787
799
  }
800
+ return (await response.json()) as T;
801
+ }
802
+
803
+ async function recordWorkflowInstanceId(input: {
804
+ env: CoordinatorEnv;
805
+ runId: string;
806
+ instanceId: string;
807
+ }): Promise<void> {
808
+ await callRunScopedControl<{ ok?: unknown }>(
809
+ input.env,
810
+ input.runId,
811
+ '/workflow-instance-put',
812
+ {
813
+ method: 'POST',
814
+ body: JSON.stringify({
815
+ runId: input.runId,
816
+ instanceId: input.instanceId,
817
+ ttlMs: WORKFLOW_RETRY_STATE_TTL_MS,
818
+ }),
819
+ },
820
+ );
821
+ }
822
+
823
+ async function resolveWorkflowInstanceIdForRun(
824
+ env: CoordinatorEnv,
825
+ runId: string,
826
+ ): Promise<string> {
827
+ const body = await callRunScopedControl<{ instanceId?: unknown }>(
828
+ env,
829
+ runId,
830
+ `/workflow-instance-get?runId=${encodeURIComponent(runId)}`,
831
+ ).catch(() => ({ instanceId: null }));
832
+ return typeof body.instanceId === 'string' && body.instanceId
833
+ ? body.instanceId
834
+ : workflowInstanceId(runId);
788
835
  }
789
836
 
790
837
  function assertEncryptedPreloadedDbSessions(
@@ -804,49 +851,6 @@ function assertEncryptedPreloadedDbSessions(
804
851
  }
805
852
  }
806
853
 
807
- async function persistWorkflowDbSessions(input: {
808
- env: CoordinatorEnv;
809
- runId: string;
810
- sessions: PreloadedRuntimeDbSession[];
811
- }): Promise<NonNullable<PlayWorkflowParams['preloadedDbSessionRef']>> {
812
- assertEncryptedPreloadedDbSessions(input.sessions);
813
- const response = await runScopedDurableObject(input.env, input.runId).fetch(
814
- 'https://deepline.dedup.internal/db-sessions-put',
815
- {
816
- method: 'POST',
817
- headers: { 'content-type': 'application/json' },
818
- body: JSON.stringify({
819
- runId: input.runId,
820
- sessions: input.sessions,
821
- ttlMs: DB_SESSION_DEFAULT_TTL_SECONDS * 1000,
822
- }),
823
- },
824
- );
825
- if (!response.ok) {
826
- throw new Error(
827
- `workflow db session storage failed ${response.status}: ${(
828
- await response.text().catch(() => '')
829
- ).slice(0, 400)}`,
830
- );
831
- }
832
- const body = (await response.json().catch(() => ({}))) as {
833
- sessionCount?: unknown;
834
- expiresAt?: unknown;
835
- };
836
- return {
837
- runId: input.runId,
838
- sessionCount:
839
- typeof body.sessionCount === 'number' &&
840
- Number.isFinite(body.sessionCount)
841
- ? body.sessionCount
842
- : input.sessions.length,
843
- expiresAt:
844
- typeof body.expiresAt === 'number' && Number.isFinite(body.expiresAt)
845
- ? body.expiresAt
846
- : Date.now() + DB_SESSION_DEFAULT_TTL_SECONDS * 1000,
847
- };
848
- }
849
-
850
854
  async function readWorkflowDbSessions(input: {
851
855
  env: CoordinatorEnv;
852
856
  ref: NonNullable<PlayWorkflowParams['preloadedDbSessionRef']>;
@@ -877,34 +881,56 @@ async function readWorkflowDbSessions(input: {
877
881
  return sessions;
878
882
  }
879
883
 
880
- async function externalizeWorkflowDbSessions(input: {
884
+ async function readWorkflowDbSessionsWithRetry(input: {
881
885
  env: CoordinatorEnv;
886
+ ref: NonNullable<PlayWorkflowParams['preloadedDbSessionRef']>;
887
+ }): Promise<PreloadedRuntimeDbSession[]> {
888
+ const delays = [25, 50, 100, 200] as const;
889
+ let lastError: unknown = null;
890
+ for (let attempt = 0; attempt <= delays.length; attempt += 1) {
891
+ try {
892
+ return await readWorkflowDbSessions(input);
893
+ } catch (error) {
894
+ lastError = error;
895
+ const message = error instanceof Error ? error.message : String(error);
896
+ if (
897
+ !message.includes('workflow db session lookup failed 404') ||
898
+ attempt >= delays.length
899
+ ) {
900
+ throw error;
901
+ }
902
+ await sleep(delays[attempt]);
903
+ }
904
+ }
905
+ throw lastError instanceof Error ? lastError : new Error(String(lastError));
906
+ }
907
+
908
+ function externalizedWorkflowDbSessionParams(input: {
882
909
  params: PlayWorkflowParams;
883
- recordSubmitTiming?: (timing: CoordinatorTiming) => void;
884
- }): Promise<PlayWorkflowParams> {
910
+ }): {
911
+ params: PlayWorkflowParams;
912
+ sessions: PreloadedRuntimeDbSession[];
913
+ ref: NonNullable<PlayWorkflowParams['preloadedDbSessionRef']> | null;
914
+ } {
885
915
  const sessions = Array.isArray(input.params.preloadedDbSessions)
886
916
  ? input.params.preloadedDbSessions
887
917
  : [];
888
- if (sessions.length === 0) return input.params;
889
- const startedAt = Date.now();
890
- const ref = await persistWorkflowDbSessions({
891
- env: input.env,
918
+ if (sessions.length === 0) {
919
+ return { params: input.params, sessions, ref: null };
920
+ }
921
+ const ref: NonNullable<PlayWorkflowParams['preloadedDbSessionRef']> = {
892
922
  runId: input.params.runId,
893
- sessions,
894
- });
895
- input.recordSubmitTiming?.({
896
- phase: 'coordinator.workflow_db_sessions_externalized',
897
- ms: Date.now() - startedAt,
898
- graphHash: input.params.graphHash ?? null,
899
- extra: {
900
- sessions: sessions.length,
901
- expiresAt: ref.expiresAt,
902
- },
903
- });
923
+ sessionCount: sessions.length,
924
+ expiresAt: Date.now() + DB_SESSION_DEFAULT_TTL_SECONDS * 1000,
925
+ };
904
926
  return {
905
- ...input.params,
906
- preloadedDbSessions: null,
907
- preloadedDbSessionRef: ref,
927
+ params: {
928
+ ...input.params,
929
+ preloadedDbSessions: null,
930
+ preloadedDbSessionRef: ref,
931
+ },
932
+ sessions,
933
+ ref,
908
934
  };
909
935
  }
910
936
 
@@ -941,7 +967,10 @@ async function hydrateWorkflowDbSessions(input: {
941
967
  const ref = readPreloadedDbSessionRef(params.preloadedDbSessionRef);
942
968
  if (!ref) return input.event;
943
969
  const startedAt = Date.now();
944
- const sessions = await readWorkflowDbSessions({ env: input.env, ref });
970
+ const sessions = await readWorkflowDbSessionsWithRetry({
971
+ env: input.env,
972
+ ref,
973
+ });
945
974
  input.trace({
946
975
  runId: ref.runId,
947
976
  phase: 'coordinator.workflow_db_sessions_hydrated',
@@ -969,278 +998,312 @@ async function hydrateWorkflowDbSessions(input: {
969
998
  };
970
999
  }
971
1000
 
972
- type WorkflowPoolCounts = {
973
- available: number;
974
- warming: number;
975
- };
976
-
977
- type WorkflowPoolRefillResult = WorkflowPoolCounts & {
978
- target: number;
979
- created: number;
980
- promoted: number;
981
- removed: number;
982
- waitedMs: number;
983
- waitIterations: number;
984
- };
985
-
986
- type WorkflowPoolListEntry = {
987
- id: string;
988
- state: string;
989
- createdAt: number;
990
- readyAt: number | null;
991
- expiresAt: number;
992
- };
993
-
994
- async function workflowPoolCount(
995
- env: CoordinatorEnv,
996
- ): Promise<WorkflowPoolCounts> {
997
- const body = await callWorkflowPool<{
998
- available?: unknown;
999
- warming?: unknown;
1000
- }>(
1001
- env,
1002
- `/pool-count?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
1003
- );
1004
- return {
1005
- available: typeof body.available === 'number' ? body.available : 0,
1006
- warming: typeof body.warming === 'number' ? body.warming : 0,
1007
- };
1008
- }
1009
-
1010
1001
  function sleep(ms: number): Promise<void> {
1011
1002
  return new Promise((resolve) => setTimeout(resolve, ms));
1012
1003
  }
1013
1004
 
1014
- async function listWorkflowPoolEntries(
1015
- env: CoordinatorEnv,
1016
- ): Promise<WorkflowPoolListEntry[]> {
1017
- const body = await callWorkflowPool<{ entries?: unknown }>(
1018
- env,
1019
- `/pool-list?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
1020
- );
1021
- if (!Array.isArray(body.entries)) return [];
1022
- return body.entries
1023
- .filter((entry): entry is Record<string, unknown> =>
1024
- Boolean(entry && typeof entry === 'object' && !Array.isArray(entry)),
1025
- )
1026
- .map((entry) => ({
1027
- id: typeof entry.id === 'string' ? entry.id : '',
1028
- state: typeof entry.state === 'string' ? entry.state : '',
1029
- createdAt:
1030
- typeof entry.createdAt === 'number' && Number.isFinite(entry.createdAt)
1031
- ? entry.createdAt
1032
- : 0,
1033
- readyAt:
1034
- typeof entry.readyAt === 'number' && Number.isFinite(entry.readyAt)
1035
- ? entry.readyAt
1036
- : null,
1037
- expiresAt:
1038
- typeof entry.expiresAt === 'number' && Number.isFinite(entry.expiresAt)
1039
- ? entry.expiresAt
1040
- : 0,
1041
- }))
1042
- .filter((entry) => entry.id);
1005
+ function readWorkflowPayload(event: unknown): Record<string, unknown> | null {
1006
+ if (!isRecord(event)) return null;
1007
+ const payload = event.payload;
1008
+ if (!isRecord(payload)) return null;
1009
+ return isRecord(payload.params) ? payload.params : payload;
1043
1010
  }
1044
1011
 
1045
- async function addWorkflowPoolIds(
1046
- env: CoordinatorEnv,
1047
- ids: string[],
1048
- options?: { ready?: boolean },
1049
- ): Promise<void> {
1050
- if (ids.length === 0) return;
1051
- await callWorkflowPool(env, '/pool-add', {
1052
- method: 'POST',
1053
- body: JSON.stringify({
1054
- ids,
1055
- ttlMs: WORKFLOW_POOL_TTL_MS,
1056
- version: WORKFLOW_POOL_PROTOCOL_VERSION,
1057
- ready: options?.ready === true,
1058
- ...(options?.ready === true ? { readyAt: Date.now() } : {}),
1059
- }),
1012
+ async function markWorkflowRuntimeFailure(input: {
1013
+ env: CoordinatorEnv;
1014
+ event: unknown;
1015
+ error: unknown;
1016
+ }): Promise<void> {
1017
+ const payload = readWorkflowPayload(input.event);
1018
+ if (!payload) return;
1019
+ const runId = typeof payload.runId === 'string' ? payload.runId : null;
1020
+ const baseUrl = typeof payload.baseUrl === 'string' ? payload.baseUrl : null;
1021
+ const executorToken =
1022
+ typeof payload.executorToken === 'string' ? payload.executorToken : null;
1023
+ if (!runId || !baseUrl || !executorToken) return;
1024
+ const errorName =
1025
+ input.error instanceof Error && input.error.name
1026
+ ? input.error.name
1027
+ : 'Error';
1028
+ const errorMessage =
1029
+ input.error instanceof Error ? input.error.message : String(input.error);
1030
+ const errorStack =
1031
+ input.error instanceof Error && typeof input.error.stack === 'string'
1032
+ ? input.error.stack.split('\n').slice(0, 12).join('\n')
1033
+ : null;
1034
+ const headers = new Headers({
1035
+ authorization: `Bearer ${executorToken}`,
1036
+ 'content-type': 'application/json',
1060
1037
  });
1061
- }
1062
-
1063
- async function markWorkflowPoolIdReady(
1064
- env: CoordinatorEnv,
1065
- poolId: string,
1066
- ): Promise<boolean> {
1067
- const body = await callWorkflowPool<{ ready?: unknown }>(env, '/pool-ready', {
1068
- method: 'POST',
1069
- body: JSON.stringify({
1070
- poolId,
1071
- version: WORKFLOW_POOL_PROTOCOL_VERSION,
1072
- }),
1038
+ const bypass = input.env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
1039
+ if (bypass) headers.set('x-vercel-protection-bypass', bypass);
1040
+ const body = JSON.stringify({
1041
+ action: 'append_run_events',
1042
+ playId: runId,
1043
+ events: [
1044
+ {
1045
+ type: 'run.failed',
1046
+ runId,
1047
+ source: 'coordinator',
1048
+ occurredAt: Date.now(),
1049
+ error: `DynamicWorkflow runner failed: ${errorName}: ${errorMessage}${
1050
+ errorStack ? `\n${errorStack}` : ''
1051
+ }`,
1052
+ } satisfies PlayRunLedgerEvent,
1053
+ ],
1073
1054
  });
1074
- return body.ready === true;
1075
- }
1076
-
1077
- async function promoteWorkflowPoolIds(
1078
- env: CoordinatorEnv,
1079
- ids: string[],
1080
- ): Promise<void> {
1081
- if (ids.length === 0) return;
1082
- await callWorkflowPool(env, '/pool-promote', {
1083
- method: 'POST',
1084
- body: JSON.stringify({
1085
- ids,
1086
- version: WORKFLOW_POOL_PROTOCOL_VERSION,
1087
- }),
1055
+ const url = `${baseUrl.replace(/\/$/, '')}/api/v2/plays/internal/runtime`;
1056
+ const backoffMs = [200, 500, 1500];
1057
+ let lastError: unknown = null;
1058
+ for (let attempt = 0; attempt <= backoffMs.length; attempt += 1) {
1059
+ try {
1060
+ const response = await fetch(url, { method: 'POST', headers, body });
1061
+ if (response.ok) return;
1062
+ lastError = new Error(
1063
+ `runtime API responded ${response.status}: ${(await response.text().catch(() => '')).slice(0, 400)}`,
1064
+ );
1065
+ if (
1066
+ response.status >= 400 &&
1067
+ response.status < 500 &&
1068
+ response.status !== 408 &&
1069
+ response.status !== 429
1070
+ ) {
1071
+ break;
1072
+ }
1073
+ } catch (error) {
1074
+ lastError = error;
1075
+ }
1076
+ if (attempt < backoffMs.length) {
1077
+ await new Promise((resolve) => setTimeout(resolve, backoffMs[attempt]));
1078
+ }
1079
+ }
1080
+ console.error('[coordinator] failed to mark workflow runtime failure', {
1081
+ runId,
1082
+ message: lastError instanceof Error ? lastError.message : String(lastError),
1088
1083
  });
1089
1084
  }
1090
1085
 
1091
- async function deleteWorkflowPoolIds(
1092
- env: CoordinatorEnv,
1093
- ids: string[],
1094
- ): Promise<void> {
1095
- if (ids.length === 0) return;
1096
- await callWorkflowPool(env, '/pool-delete', {
1097
- method: 'POST',
1098
- body: JSON.stringify({
1099
- ids,
1100
- version: WORKFLOW_POOL_PROTOCOL_VERSION,
1101
- }),
1102
- });
1103
- }
1086
+ type StoredPlayArtifactPayload = {
1087
+ artifact?: {
1088
+ bundledCode?: string;
1089
+ artifactKind?: string;
1090
+ };
1091
+ };
1104
1092
 
1105
- async function leaseWorkflowPoolId(
1106
- env: CoordinatorEnv,
1107
- runId: string,
1108
- ): Promise<string | null> {
1109
- const body = await callWorkflowPool<{ id?: unknown }>(
1110
- env,
1111
- `/pool-claim?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
1112
- {
1113
- method: 'POST',
1114
- body: JSON.stringify({ runId }),
1115
- },
1116
- );
1117
- return typeof body.id === 'string' && body.id ? body.id : null;
1118
- }
1093
+ const DYNAMIC_WORKER_COMPATIBILITY_DATE = '2026-05-01';
1119
1094
 
1120
- async function mapRunToWorkflowInstance(input: {
1095
+ async function persistWorkflowRetryState(input: {
1121
1096
  env: CoordinatorEnv;
1122
1097
  runId: string;
1123
- instanceId: string;
1124
- started?: boolean;
1125
- }): Promise<boolean> {
1126
- const body = await callWorkflowPool<{ mapped?: unknown }>(
1098
+ params: PlayWorkflowParams;
1099
+ }): Promise<void> {
1100
+ const retryParams = buildWorkflowRetryParams(input.params);
1101
+ const paramsBytes = jsonByteLength(retryParams);
1102
+ if (paramsBytes > WORKFLOW_RETRY_PARAMS_MAX_BYTES) {
1103
+ throw new Error(
1104
+ `workflow retry params too large: ${paramsBytes} bytes exceeds ${WORKFLOW_RETRY_PARAMS_MAX_BYTES}. Pass large payloads as staged files or ctx.csv inputs instead of inline JSON.`,
1105
+ );
1106
+ }
1107
+ let body: {
1108
+ runId: string;
1109
+ params?: PlayWorkflowParams;
1110
+ paramsRef?: WorkflowRetryParamsRef;
1111
+ paramsBytes: number;
1112
+ ttlMs: number;
1113
+ };
1114
+ if (paramsBytes > WORKFLOW_RETRY_PARAMS_EXTERNALIZE_AFTER_BYTES) {
1115
+ const serialized = JSON.stringify(retryParams);
1116
+ const hash = stableHash(serialized);
1117
+ const storageKey = workflowRetryParamsStorageKey({
1118
+ runId: input.runId,
1119
+ hash,
1120
+ });
1121
+ await input.env.PLAYS_BUCKET.put(storageKey, serialized, {
1122
+ httpMetadata: { contentType: 'application/json' },
1123
+ });
1124
+ body = {
1125
+ runId: input.runId,
1126
+ paramsRef: {
1127
+ storageKind: 'r2',
1128
+ storageKey,
1129
+ bytes: paramsBytes,
1130
+ hash,
1131
+ expiresAt: Date.now() + WORKFLOW_RETRY_STATE_TTL_MS,
1132
+ },
1133
+ paramsBytes,
1134
+ ttlMs: WORKFLOW_RETRY_STATE_TTL_MS,
1135
+ };
1136
+ } else {
1137
+ body = {
1138
+ runId: input.runId,
1139
+ params: retryParams,
1140
+ paramsBytes,
1141
+ ttlMs: WORKFLOW_RETRY_STATE_TTL_MS,
1142
+ };
1143
+ }
1144
+ await callRunScopedControl<{ ok?: unknown }>(
1127
1145
  input.env,
1128
- '/pool-map-run',
1146
+ input.runId,
1147
+ '/run-retry-state-put',
1129
1148
  {
1130
1149
  method: 'POST',
1131
- body: JSON.stringify({
1132
- runId: input.runId,
1133
- instanceId: input.instanceId,
1134
- started: input.started === true,
1135
- version: WORKFLOW_POOL_PROTOCOL_VERSION,
1136
- }),
1150
+ body: JSON.stringify(body),
1137
1151
  },
1138
1152
  );
1139
- return body.mapped !== false;
1140
1153
  }
1141
1154
 
1142
- async function blockWorkflowPoolRun(input: {
1155
+ async function persistWorkflowLaunchState(input: {
1143
1156
  env: CoordinatorEnv;
1144
1157
  runId: string;
1145
- instanceId: string;
1146
- }): Promise<{ blocked: boolean; started: boolean }> {
1147
- const body = await callWorkflowPool<{
1148
- blocked?: unknown;
1149
- started?: unknown;
1150
- }>(input.env, '/pool-block-run', {
1151
- method: 'POST',
1152
- body: JSON.stringify({
1158
+ params: PlayWorkflowParams;
1159
+ sessions: PreloadedRuntimeDbSession[];
1160
+ }): Promise<{
1161
+ retryExpiresAt?: number;
1162
+ dbSessionsExpiresAt?: number;
1163
+ sessionCount?: number;
1164
+ }> {
1165
+ if (input.sessions.length === 0) {
1166
+ await persistWorkflowRetryState({
1167
+ env: input.env,
1153
1168
  runId: input.runId,
1154
- instanceId: input.instanceId,
1155
- version: WORKFLOW_POOL_PROTOCOL_VERSION,
1156
- }),
1169
+ params: input.params,
1170
+ });
1171
+ return {};
1172
+ }
1173
+ const retryParams = buildWorkflowRetryParams(input.params);
1174
+ const paramsBytes = jsonByteLength(retryParams);
1175
+ let body: {
1176
+ runId: string;
1177
+ params?: PlayWorkflowParams;
1178
+ paramsRef?: WorkflowRetryParamsRef;
1179
+ paramsBytes: number;
1180
+ sessions: PreloadedRuntimeDbSession[];
1181
+ retryTtlMs: number;
1182
+ dbSessionsTtlMs: number;
1183
+ };
1184
+ if (paramsBytes > WORKFLOW_RETRY_PARAMS_EXTERNALIZE_AFTER_BYTES) {
1185
+ const serialized = JSON.stringify(retryParams);
1186
+ const hash = stableHash(serialized);
1187
+ const storageKey = workflowRetryParamsStorageKey({
1188
+ runId: input.runId,
1189
+ hash,
1190
+ });
1191
+ await input.env.PLAYS_BUCKET.put(storageKey, serialized, {
1192
+ httpMetadata: { contentType: 'application/json' },
1193
+ });
1194
+ body = {
1195
+ runId: input.runId,
1196
+ paramsRef: {
1197
+ storageKind: 'r2',
1198
+ storageKey,
1199
+ bytes: paramsBytes,
1200
+ hash,
1201
+ expiresAt: Date.now() + WORKFLOW_RETRY_STATE_TTL_MS,
1202
+ },
1203
+ paramsBytes,
1204
+ sessions: input.sessions,
1205
+ retryTtlMs: WORKFLOW_RETRY_STATE_TTL_MS,
1206
+ dbSessionsTtlMs: DB_SESSION_DEFAULT_TTL_SECONDS * 1000,
1207
+ };
1208
+ } else {
1209
+ body = {
1210
+ runId: input.runId,
1211
+ params: retryParams,
1212
+ paramsBytes,
1213
+ sessions: input.sessions,
1214
+ retryTtlMs: WORKFLOW_RETRY_STATE_TTL_MS,
1215
+ dbSessionsTtlMs: DB_SESSION_DEFAULT_TTL_SECONDS * 1000,
1216
+ };
1217
+ }
1218
+ const response = await callRunScopedControl<{
1219
+ ok?: unknown;
1220
+ retryExpiresAt?: unknown;
1221
+ dbSessionsExpiresAt?: unknown;
1222
+ sessionCount?: unknown;
1223
+ }>(input.env, input.runId, '/run-launch-state-put', {
1224
+ method: 'POST',
1225
+ body: JSON.stringify(body),
1157
1226
  });
1158
1227
  return {
1159
- blocked: body.blocked === true,
1160
- started: body.started === true,
1228
+ retryExpiresAt:
1229
+ typeof response.retryExpiresAt === 'number'
1230
+ ? response.retryExpiresAt
1231
+ : undefined,
1232
+ dbSessionsExpiresAt:
1233
+ typeof response.dbSessionsExpiresAt === 'number'
1234
+ ? response.dbSessionsExpiresAt
1235
+ : undefined,
1236
+ sessionCount:
1237
+ typeof response.sessionCount === 'number'
1238
+ ? response.sessionCount
1239
+ : undefined,
1161
1240
  };
1162
1241
  }
1163
1242
 
1164
- async function readWorkflowPoolRunMapping(input: {
1243
+ async function hydrateWorkflowRetryParams(input: {
1165
1244
  env: CoordinatorEnv;
1166
- runId: string;
1167
- }): Promise<{ instanceId: string | null; startedAt: number | null }> {
1168
- const body = await callWorkflowPool<{
1169
- instanceId?: unknown;
1170
- startedAt?: unknown;
1171
- }>(
1172
- input.env,
1173
- `/pool-resolve-run?runId=${encodeURIComponent(input.runId)}&version=${encodeURIComponent(
1174
- WORKFLOW_POOL_PROTOCOL_VERSION,
1175
- )}`,
1176
- ).catch(() => ({ instanceId: null, startedAt: null }));
1177
- return {
1178
- instanceId:
1179
- typeof body.instanceId === 'string' && body.instanceId
1180
- ? body.instanceId
1181
- : null,
1182
- startedAt:
1183
- typeof body.startedAt === 'number' && Number.isFinite(body.startedAt)
1184
- ? body.startedAt
1185
- : null,
1186
- };
1245
+ params: unknown;
1246
+ paramsRef: unknown;
1247
+ }): Promise<PlayWorkflowParams | null> {
1248
+ if (isRecord(input.params)) {
1249
+ return input.params as PlayWorkflowParams;
1250
+ }
1251
+ if (!isRecord(input.paramsRef)) {
1252
+ return null;
1253
+ }
1254
+ const storageKind = input.paramsRef.storageKind;
1255
+ const storageKey = input.paramsRef.storageKey;
1256
+ const expectedBytes = input.paramsRef.bytes;
1257
+ const expectedHash = input.paramsRef.hash;
1258
+ if (
1259
+ storageKind !== 'r2' ||
1260
+ typeof storageKey !== 'string' ||
1261
+ !storageKey.startsWith('plays/workflow-retry-params/') ||
1262
+ typeof expectedBytes !== 'number' ||
1263
+ !Number.isFinite(expectedBytes) ||
1264
+ typeof expectedHash !== 'string' ||
1265
+ !expectedHash
1266
+ ) {
1267
+ throw new Error('Invalid workflow retry params reference.');
1268
+ }
1269
+ const object = await input.env.PLAYS_BUCKET.get(storageKey);
1270
+ if (!object) {
1271
+ throw new Error(`Workflow retry params missing from R2: ${storageKey}`);
1272
+ }
1273
+ const text = await object.text();
1274
+ const actualBytes = new TextEncoder().encode(text).length;
1275
+ if (actualBytes !== expectedBytes) {
1276
+ throw new Error(
1277
+ `Workflow retry params byte length mismatch: expected ${expectedBytes}, got ${actualBytes}.`,
1278
+ );
1279
+ }
1280
+ const actualHash = stableHash(text);
1281
+ if (actualHash !== expectedHash) {
1282
+ throw new Error('Workflow retry params hash mismatch.');
1283
+ }
1284
+ const parsed = JSON.parse(text) as unknown;
1285
+ return isRecord(parsed) ? (parsed as PlayWorkflowParams) : null;
1187
1286
  }
1188
1287
 
1189
- async function persistWorkflowRetryState(input: {
1190
- env: CoordinatorEnv;
1288
+ function workflowRetryStatePersistenceErrorResponse(input: {
1191
1289
  runId: string;
1192
- params: PlayWorkflowParams;
1193
- }): Promise<void> {
1194
- const retryParams: PlayWorkflowParams = {
1195
- ...input.params,
1196
- dynamicWorkerCode: null,
1197
- contractSnapshot: stripRetrySourceSnapshot(input.params.contractSnapshot),
1198
- childPlayManifests: stripRetryChildManifestCode(
1199
- input.params.childPlayManifests,
1200
- ),
1201
- packagedFiles:
1202
- input.params.packagedFiles?.map((file) => ({
1203
- playPath: file.playPath,
1204
- storageKey: file.storageKey,
1205
- contentType: file.contentType,
1206
- bytes: file.bytes,
1207
- })) ?? null,
1208
- };
1209
- await callWorkflowPool<{ ok?: unknown }>(input.env, '/run-retry-state-put', {
1210
- method: 'POST',
1211
- body: JSON.stringify({
1212
- runId: input.runId,
1213
- params: retryParams,
1214
- ttlMs: WORKFLOW_RETRY_STATE_TTL_MS,
1215
- }),
1216
- }).catch((error) => {
1217
- console.warn('[coordinator] workflow retry state persistence skipped', {
1218
- runId: input.runId,
1219
- error: error instanceof Error ? error.message : String(error),
1220
- });
1221
- });
1222
- }
1223
-
1224
- function stripRetrySourceSnapshot(snapshot: unknown): unknown {
1225
- if (!isRecord(snapshot)) return snapshot;
1226
- const rest = { ...snapshot };
1227
- delete rest.sourceCode;
1228
- delete rest.sourceFiles;
1229
- return rest;
1230
- }
1231
-
1232
- function stripRetryChildManifestCode(
1233
- manifests: PlayRuntimeManifestMap | null | undefined,
1234
- ): PlayRuntimeManifestMap | null {
1235
- if (!manifests) return null;
1236
- const stripped: PlayRuntimeManifestMap = {};
1237
- for (const [key, manifest] of Object.entries(manifests)) {
1238
- const rest = { ...manifest };
1239
- delete rest.bundledCode;
1240
- delete rest.sourceCode;
1241
- stripped[key] = rest;
1242
- }
1243
- return stripped;
1290
+ error: unknown;
1291
+ }): Response {
1292
+ const message =
1293
+ input.error instanceof Error ? input.error.message : String(input.error);
1294
+ return Response.json(
1295
+ {
1296
+ error: {
1297
+ code: 'WORKFLOW_RETRY_STATE_PERSISTENCE_FAILED',
1298
+ message:
1299
+ 'Failed to persist workflow retry state before dispatching the play run.',
1300
+ phase: 'coordinator_retry_state_persistence',
1301
+ runId: input.runId,
1302
+ cause: message,
1303
+ },
1304
+ },
1305
+ { status: 503 },
1306
+ );
1244
1307
  }
1245
1308
 
1246
1309
  async function claimWorkflowPlatformRetry(input: {
@@ -1251,21 +1314,27 @@ async function claimWorkflowPlatformRetry(input: {
1251
1314
  attempts: number;
1252
1315
  params: PlayWorkflowParams | null;
1253
1316
  }> {
1254
- const body = await callWorkflowPool<{
1317
+ const body = await callRunScopedControl<{
1255
1318
  claimed?: unknown;
1256
1319
  attempts?: unknown;
1257
1320
  params?: unknown;
1258
- }>(input.env, '/run-retry-claim', {
1321
+ paramsRef?: unknown;
1322
+ }>(input.env, input.runId, '/run-retry-claim', {
1259
1323
  method: 'POST',
1260
1324
  body: JSON.stringify({
1261
1325
  runId: input.runId,
1262
1326
  maxAttempts: PLATFORM_DEPLOY_WORKFLOW_RETRY_LIMIT,
1263
1327
  }),
1264
1328
  });
1329
+ const params = await hydrateWorkflowRetryParams({
1330
+ env: input.env,
1331
+ params: body.params,
1332
+ paramsRef: body.paramsRef,
1333
+ });
1265
1334
  return {
1266
1335
  claimed: body.claimed === true,
1267
1336
  attempts: typeof body.attempts === 'number' ? body.attempts : 0,
1268
- params: isRecord(body.params) ? (body.params as PlayWorkflowParams) : null,
1337
+ params,
1269
1338
  };
1270
1339
  }
1271
1340
 
@@ -1302,690 +1371,60 @@ async function restartWorkflowAfterPlatformReset(input: {
1302
1371
  console.warn('[coordinator] workflow platform retry claim failed', {
1303
1372
  runId: input.runId,
1304
1373
  error: error instanceof Error ? error.message : String(error),
1305
- });
1306
- return null;
1307
- });
1308
- if (!claim?.claimed || !claim.params) {
1309
- return { retried: false, result: null };
1310
- }
1311
- const retryInstanceId = workflowRetryInstanceId(input.runId, claim.attempts);
1312
- const retryStartedAt = Date.now();
1313
- let retryInstance: WorkflowInstance | null = null;
1314
- try {
1315
- retryInstance = await createDynamicWorkflowInstance({
1316
- env: input.env,
1317
- id: retryInstanceId,
1318
- params: claim.params,
1319
- });
1320
- await mapRunToWorkflowInstance({
1321
- env: input.env,
1322
- runId: input.runId,
1323
- instanceId: retryInstance.id,
1324
- started: true,
1325
- });
1326
- input.ctx?.waitUntil(input.oldInstance.terminate().catch(() => undefined));
1327
- recordCoordinatorPerfTraceBuffered(input.env, input.ctx, {
1328
- runId: input.runId,
1329
- phase: 'coordinator.platform_deploy_retry',
1330
- ms: Date.now() - retryStartedAt,
1331
- graphHash: claim.params.graphHash ?? null,
1332
- extra: {
1333
- retryAttempt: claim.attempts,
1334
- retryInstanceId: retryInstance.id,
1335
- reason: decision.reason,
1336
- },
1337
- });
1338
- return {
1339
- retried: true,
1340
- result: {
1341
- runId: input.runId,
1342
- playName: claim.params.playName,
1343
- status: 'running',
1344
- result: null,
1345
- error: null,
1346
- retry: {
1347
- reason: decision.reason,
1348
- attempt: claim.attempts,
1349
- message: decision.message,
1350
- },
1351
- },
1352
- };
1353
- } finally {
1354
- disposeRpcStub(retryInstance);
1355
- }
1356
- }
1357
-
1358
- async function waitForWorkflowPoolStartAck(input: {
1359
- env: CoordinatorEnv;
1360
- runId: string;
1361
- instanceId: string;
1362
- timeoutMs: number;
1363
- }): Promise<{
1364
- acknowledged: boolean;
1365
- ms: number;
1366
- polls: number;
1367
- startedAt: number | null;
1368
- mappedInstanceId: string | null;
1369
- }> {
1370
- const startedAt = Date.now();
1371
- let polls = 0;
1372
- let latestMapping: { instanceId: string | null; startedAt: number | null } = {
1373
- instanceId: null,
1374
- startedAt: null,
1375
- };
1376
- while (Date.now() - startedAt < input.timeoutMs) {
1377
- polls += 1;
1378
- latestMapping = await readWorkflowPoolRunMapping({
1379
- env: input.env,
1380
- runId: input.runId,
1381
- });
1382
- if (
1383
- latestMapping.instanceId === input.instanceId &&
1384
- latestMapping.startedAt !== null
1385
- ) {
1386
- return {
1387
- acknowledged: true,
1388
- ms: Date.now() - startedAt,
1389
- polls,
1390
- startedAt: latestMapping.startedAt,
1391
- mappedInstanceId: latestMapping.instanceId,
1392
- };
1393
- }
1394
- await sleep(WORKFLOW_POOL_START_ACK_POLL_MS);
1395
- }
1396
- return {
1397
- acknowledged: false,
1398
- ms: Date.now() - startedAt,
1399
- polls,
1400
- startedAt: latestMapping.startedAt,
1401
- mappedInstanceId: latestMapping.instanceId,
1402
- };
1403
- }
1404
-
1405
- async function resolveWorkflowInstanceIdForRun(
1406
- env: CoordinatorEnv,
1407
- runId: string,
1408
- ): Promise<string> {
1409
- if (!workflowPoolEnabled()) {
1410
- return workflowInstanceId(runId);
1411
- }
1412
- const mapping = await readWorkflowPoolRunMapping({ env, runId });
1413
- return mapping.instanceId ? mapping.instanceId : workflowInstanceId(runId);
1414
- }
1415
-
1416
- async function clearWorkflowPool(env: CoordinatorEnv): Promise<number> {
1417
- const entries = await listWorkflowPoolEntries(env).catch(() => []);
1418
- const body = await callWorkflowPool<{ deleted?: unknown }>(
1419
- env,
1420
- `/pool-clear?version=${encodeURIComponent(WORKFLOW_POOL_PROTOCOL_VERSION)}`,
1421
- { method: 'POST', body: '{}' },
1422
- );
1423
- await Promise.all(
1424
- entries.map(async (entry) => {
1425
- const instance = await getWorkflowPoolInstance(env, entry.id);
1426
- if (!instance) {
1427
- return;
1428
- }
1429
- try {
1430
- await instance.terminate().catch(() => undefined);
1431
- } finally {
1432
- disposeRpcStub(instance);
1433
- }
1434
- }),
1435
- );
1436
- return typeof body.deleted === 'number' ? body.deleted : 0;
1437
- }
1438
-
1439
- function workflowStatusName(status: InstanceStatus | null): string {
1440
- return typeof status?.status === 'string' ? status.status : 'unknown';
1441
- }
1442
-
1443
- function isWorkflowInstanceNotFoundError(error: unknown): boolean {
1444
- const message = error instanceof Error ? error.message : String(error);
1445
- return /not[ _]found|not_found|does not exist|no such instance|404/i.test(
1446
- message,
1447
- );
1448
- }
1449
-
1450
- async function getWorkflowPoolInstance(
1451
- env: CoordinatorEnv,
1452
- instanceId: string,
1453
- ): Promise<WorkflowInstance | null> {
1454
- try {
1455
- return await env.PLAY_WORKFLOW.get(instanceId);
1456
- } catch (error) {
1457
- if (isWorkflowInstanceNotFoundError(error)) {
1458
- return null;
1459
- }
1460
- throw error;
1461
- }
1462
- }
1463
-
1464
- function workflowPoolStatusIsReady(statusName: string): boolean {
1465
- // This is only a liveness guard. Readiness itself comes from the pooled
1466
- // Workflow calling /pool-ready after waitForEvent("play_start") has been
1467
- // created, because Cloudflare may report an armed wait as "running".
1468
- return statusName === 'running' || statusName === 'waiting';
1469
- }
1470
-
1471
- async function waitForWorkflowPoolReadySignal(input: {
1472
- env: CoordinatorEnv;
1473
- instance: WorkflowInstance;
1474
- poolId: string;
1475
- }): Promise<{
1476
- ready: boolean;
1477
- status: string;
1478
- ms: number;
1479
- polls: number;
1480
- }> {
1481
- const startedAt = Date.now();
1482
- let lastStatusName = 'unknown';
1483
- let polls = 0;
1484
- while (Date.now() - startedAt < WORKFLOW_POOL_READY_TIMEOUT_MS) {
1485
- polls += 1;
1486
- const [entry, status] = await Promise.all([
1487
- listWorkflowPoolEntries(input.env)
1488
- .then((entries) =>
1489
- entries.find((candidate) => candidate.id === input.poolId),
1490
- )
1491
- .catch(() => undefined),
1492
- input.instance.status().catch(() => null),
1493
- ]);
1494
- const statusName = workflowStatusName(status);
1495
- lastStatusName = statusName;
1496
- if (entry?.state === 'ready' && entry.readyAt !== null) {
1497
- return {
1498
- ready: true,
1499
- status: statusName,
1500
- ms: Date.now() - startedAt,
1501
- polls,
1502
- };
1503
- }
1504
- if (
1505
- statusName === 'complete' ||
1506
- statusName === 'errored' ||
1507
- statusName === 'terminated' ||
1508
- statusName === 'unknown'
1509
- ) {
1510
- return {
1511
- ready: false,
1512
- status: statusName,
1513
- ms: Date.now() - startedAt,
1514
- polls,
1515
- };
1516
- }
1517
- await sleep(WORKFLOW_POOL_READY_POLL_MS);
1518
- }
1519
- return {
1520
- ready: false,
1521
- status: lastStatusName,
1522
- ms: Date.now() - startedAt,
1523
- polls,
1524
- };
1525
- }
1526
-
1527
- async function refillWorkflowPoolOnce(
1528
- env: CoordinatorEnv,
1529
- ): Promise<Omit<WorkflowPoolRefillResult, 'waitedMs' | 'waitIterations'>> {
1530
- if (!workflowPoolEnabled()) {
1531
- return {
1532
- available: 0,
1533
- warming: 0,
1534
- target: 0,
1535
- created: 0,
1536
- promoted: 0,
1537
- removed: 0,
1538
- };
1539
- }
1540
- const target = workflowPoolTargetSize();
1541
- const entries = await listWorkflowPoolEntries(env);
1542
- const warmingEntries = entries.filter((entry) => entry.readyAt === null);
1543
- const promotedIds: string[] = [];
1544
- const removedIds: string[] = [];
1545
- for (const entry of warmingEntries) {
1546
- const instance = await getWorkflowPoolInstance(env, entry.id);
1547
- if (!instance) {
1548
- removedIds.push(entry.id);
1549
- continue;
1550
- }
1551
- try {
1552
- if (entry.state === 'ready' && entry.readyAt !== null) {
1553
- promotedIds.push(entry.id);
1554
- continue;
1555
- }
1556
- const status = await instance.status().catch(() => null);
1557
- const statusName = workflowStatusName(status);
1558
- if (
1559
- statusName === 'complete' ||
1560
- statusName === 'errored' ||
1561
- statusName === 'terminated' ||
1562
- statusName === 'unknown'
1563
- ) {
1564
- removedIds.push(entry.id);
1565
- }
1566
- } finally {
1567
- disposeRpcStub(instance);
1568
- }
1569
- }
1570
- await Promise.all([
1571
- promoteWorkflowPoolIds(env, promotedIds),
1572
- deleteWorkflowPoolIds(env, removedIds),
1573
- ]);
1574
- const counts = await workflowPoolCount(env);
1575
- const totalTracked = counts.available + counts.warming;
1576
- const needed = Math.max(0, target - totalTracked);
1577
- if (needed === 0) {
1578
- return {
1579
- available: counts.available,
1580
- warming: counts.warming,
1581
- target,
1582
- created: 0,
1583
- promoted: promotedIds.length,
1584
- removed: removedIds.length,
1585
- };
1586
- }
1587
- const created = await Promise.all(
1588
- Array.from({ length: needed }, async () => {
1589
- const poolId = `pool-v2-${Date.now().toString(36)}-${crypto.randomUUID().slice(0, 12)}`;
1590
- await addWorkflowPoolIds(env, [poolId], { ready: false });
1591
- const instance = await env.PLAY_WORKFLOW.create({
1592
- id: poolId,
1593
- params: {
1594
- __deeplinePooledWorkflow: true,
1595
- poolId,
1596
- createdAt: Date.now(),
1597
- } satisfies PooledWorkflowBootstrapPayload,
1598
- });
1599
- try {
1600
- const readiness = await waitForWorkflowPoolReadySignal({
1601
- env,
1602
- instance,
1603
- poolId,
1604
- });
1605
- recordCoordinatorPerfTrace({
1606
- runId: poolId,
1607
- phase: 'coordinator.workflow_pool_ready',
1608
- ms: readiness.ms,
1609
- graphHash: 'workflow-pool',
1610
- extra: {
1611
- ready: readiness.ready,
1612
- status: readiness.status,
1613
- polls: readiness.polls,
1614
- },
1615
- });
1616
- if (readiness.ready) {
1617
- return { id: poolId, state: 'ready' as const };
1618
- }
1619
- if (
1620
- readiness.status === 'complete' ||
1621
- readiness.status === 'errored' ||
1622
- readiness.status === 'terminated' ||
1623
- readiness.status === 'unknown'
1624
- ) {
1625
- await instance.terminate().catch(() => undefined);
1626
- return { id: poolId, state: 'removed' as const };
1627
- }
1628
- return { id: poolId, state: 'warming' as const };
1629
- } finally {
1630
- disposeRpcStub(instance);
1631
- }
1632
- }),
1633
- );
1634
- const readyCreatedIds = created
1635
- .filter((entry) => entry.state === 'ready')
1636
- .map((entry) => entry.id);
1637
- const warmingCreatedIds = created
1638
- .filter((entry) => entry.state === 'warming')
1639
- .map((entry) => entry.id);
1640
- removedIds.push(
1641
- ...created
1642
- .filter((entry) => entry.state === 'removed')
1643
- .map((entry) => entry.id),
1644
- );
1645
- await Promise.all([
1646
- addWorkflowPoolIds(env, readyCreatedIds, { ready: true }),
1647
- addWorkflowPoolIds(env, warmingCreatedIds, { ready: false }),
1648
- ]);
1649
- const finalCounts = await workflowPoolCount(env);
1650
- return {
1651
- available: finalCounts.available,
1652
- warming: finalCounts.warming,
1653
- target,
1654
- created: readyCreatedIds.length + warmingCreatedIds.length,
1655
- promoted: promotedIds.length,
1656
- removed: removedIds.length,
1657
- };
1658
- }
1659
-
1660
- async function refillWorkflowPool(
1661
- env: CoordinatorEnv,
1662
- options?: {
1663
- minAvailable?: number;
1664
- waitReady?: boolean;
1665
- waitTimeoutMs?: number;
1666
- },
1667
- ): Promise<WorkflowPoolRefillResult> {
1668
- const startedAt = Date.now();
1669
- const minAvailable = Math.max(1, Math.floor(options?.minAvailable ?? 1));
1670
- const waitReady = options?.waitReady === true;
1671
- const waitTimeoutMs =
1672
- typeof options?.waitTimeoutMs === 'number' &&
1673
- Number.isFinite(options.waitTimeoutMs) &&
1674
- options.waitTimeoutMs > 0
1675
- ? Math.min(Math.floor(options.waitTimeoutMs), 15_000)
1676
- : 4_000;
1677
- let totals = await refillWorkflowPoolOnce(env);
1678
- let iterations = 0;
1679
- const readyWaitStartedAt = Date.now();
1680
-
1681
- while (
1682
- workflowPoolEnabled() &&
1683
- waitReady &&
1684
- totals.available < minAvailable &&
1685
- Date.now() - readyWaitStartedAt < waitTimeoutMs
1686
- ) {
1687
- iterations += 1;
1688
- await sleep(WORKFLOW_POOL_READY_POLL_MS);
1689
- const next = await refillWorkflowPoolOnce(env);
1690
- totals = {
1691
- ...next,
1692
- created: totals.created + next.created,
1693
- promoted: totals.promoted + next.promoted,
1694
- removed: totals.removed + next.removed,
1695
- };
1696
- }
1697
-
1698
- const result: WorkflowPoolRefillResult = {
1699
- ...totals,
1700
- waitedMs: Date.now() - startedAt,
1701
- waitIterations: iterations,
1702
- };
1703
- recordCoordinatorPerfTrace({
1704
- runId: 'workflow-pool',
1705
- phase: 'coordinator.workflow_pool_refill',
1706
- ms: result.waitedMs,
1707
- graphHash: 'workflow-pool',
1708
- extra: result,
1709
- });
1710
- return result;
1711
- }
1712
-
1713
- async function submitViaPooledWorkflow(input: {
1714
- env: CoordinatorEnv;
1715
- params: PlayWorkflowParams;
1716
- recordSubmitTiming: (timing: CoordinatorTiming) => void;
1717
- }): Promise<WorkflowInstance | null> {
1718
- if (!workflowPoolEnabled()) {
1719
- return null;
1720
- }
1721
- const leaseStartedAt = Date.now();
1722
- let leaseError: string | null = null;
1723
- const pooledInstanceId = await leaseWorkflowPoolId(
1724
- input.env,
1725
- input.params.runId,
1726
- ).catch((error) => {
1727
- leaseError = error instanceof Error ? error.message : String(error);
1728
- return null;
1729
- });
1730
- const missCounts = pooledInstanceId
1731
- ? null
1732
- : await workflowPoolCount(input.env).catch(() => null);
1733
- input.recordSubmitTiming({
1734
- phase: 'coordinator.workflow_pool_lease',
1735
- ms: Date.now() - leaseStartedAt,
1736
- graphHash: input.params.graphHash ?? null,
1737
- extra: {
1738
- pooled: Boolean(pooledInstanceId),
1739
- ...(leaseError ? { error: leaseError } : {}),
1740
- ...(missCounts
1741
- ? {
1742
- availableAfterMiss: missCounts.available,
1743
- warmingAfterMiss: missCounts.warming,
1744
- }
1745
- : {}),
1746
- },
1747
- });
1748
-
1749
- if (!pooledInstanceId) {
1750
- // A pool miss must not block the user path. Refilling is handled by the
1751
- // caller's waitUntil after submit, so fall through to cold create now.
1752
- const counts =
1753
- missCounts ?? (await workflowPoolCount(input.env).catch(() => null));
1754
- input.recordSubmitTiming({
1755
- phase: 'coordinator.workflow_pool_refill_on_miss',
1756
- ms: 0,
1757
- graphHash: input.params.graphHash ?? null,
1758
- extra: {
1759
- skipped: true,
1760
- reason: 'pool_miss_does_not_block_submit',
1761
- ...(counts
1762
- ? {
1763
- available: counts.available,
1764
- warming: counts.warming,
1765
- waitedMs: 0,
1766
- waitIterations: 0,
1767
- }
1768
- : {}),
1769
- },
1770
- });
1771
- }
1772
-
1773
- if (!pooledInstanceId) {
1774
- return null;
1775
- }
1776
-
1777
- const instance = await getWorkflowPoolInstance(input.env, pooledInstanceId);
1778
- if (!instance) {
1779
- await blockWorkflowPoolRun({
1780
- env: input.env,
1781
- runId: input.params.runId,
1782
- instanceId: pooledInstanceId,
1783
- }).catch(() => undefined);
1784
- input.recordSubmitTiming({
1785
- phase: 'coordinator.workflow_pool_ready_check',
1786
- ms: Date.now() - leaseStartedAt,
1787
- graphHash: input.params.graphHash ?? null,
1788
- extra: { instanceId: pooledInstanceId, status: 'missing' },
1789
- });
1790
- return null;
1791
- }
1792
- const readyCheckStartedAt = Date.now();
1793
- const status = await instance.status().catch(() => null);
1794
- const statusName = workflowStatusName(status);
1795
- input.recordSubmitTiming({
1796
- phase: 'coordinator.workflow_pool_ready_check',
1797
- ms: Date.now() - readyCheckStartedAt,
1798
- graphHash: input.params.graphHash ?? null,
1799
- extra: { instanceId: pooledInstanceId, status: statusName },
1800
- });
1801
- if (!workflowPoolStatusIsReady(statusName)) {
1802
- await blockWorkflowPoolRun({
1803
- env: input.env,
1804
- runId: input.params.runId,
1805
- instanceId: pooledInstanceId,
1806
- }).catch(() => undefined);
1807
- await instance.terminate().catch(() => undefined);
1808
- disposeRpcStub(instance);
1374
+ });
1809
1375
  return null;
1376
+ });
1377
+ if (!claim?.claimed || !claim.params) {
1378
+ return { retried: false, result: null };
1810
1379
  }
1811
- const sendStartedAt = Date.now();
1380
+ const retryInstanceId = workflowRetryInstanceId(input.runId, claim.attempts);
1381
+ const retryStartedAt = Date.now();
1382
+ let retryInstance: WorkflowInstance | null = null;
1812
1383
  try {
1813
- await instance.sendEvent({
1814
- type: WORKFLOW_POOL_START_EVENT_TYPE,
1815
- payload: buildDispatcherEnvelope(input.params),
1384
+ retryInstance = await createDynamicWorkflowInstance({
1385
+ env: input.env,
1386
+ id: retryInstanceId,
1387
+ params: claim.params,
1816
1388
  });
1817
- } catch (error) {
1818
- await blockWorkflowPoolRun({
1389
+ await recordWorkflowInstanceId({
1819
1390
  env: input.env,
1820
- runId: input.params.runId,
1821
- instanceId: pooledInstanceId,
1822
- }).catch(() => undefined);
1823
- disposeRpcStub(instance);
1824
- console.warn('[coordinator.workflow_pool] sendEvent failed; falling back', {
1825
- runId: input.params.runId,
1826
- pooledInstanceId,
1827
- error: error instanceof Error ? error.message : String(error),
1391
+ runId: input.runId,
1392
+ instanceId: retryInstance.id,
1828
1393
  });
1829
- return null;
1830
- }
1831
- input.recordSubmitTiming({
1832
- phase: 'coordinator.workflow_pool_send_event',
1833
- ms: Date.now() - sendStartedAt,
1834
- graphHash: input.params.graphHash ?? null,
1835
- extra: { instanceId: pooledInstanceId },
1836
- });
1837
- const ack = await waitForWorkflowPoolStartAck({
1838
- env: input.env,
1839
- runId: input.params.runId,
1840
- instanceId: pooledInstanceId,
1841
- timeoutMs: WORKFLOW_POOL_START_ACK_TIMEOUT_MS,
1842
- });
1843
- if (ack.acknowledged) {
1844
- input.recordSubmitTiming({
1845
- phase: 'coordinator.workflow_pool_start_ack',
1846
- ms: ack.ms,
1847
- graphHash: input.params.graphHash ?? null,
1394
+ input.ctx?.waitUntil(input.oldInstance.terminate().catch(() => undefined));
1395
+ recordCoordinatorPerfTraceBuffered(input.env, input.ctx, {
1396
+ runId: input.runId,
1397
+ phase: 'coordinator.platform_deploy_retry',
1398
+ ms: Date.now() - retryStartedAt,
1399
+ graphHash: claim.params.graphHash ?? null,
1848
1400
  extra: {
1849
- acknowledged: true,
1850
- instanceId: pooledInstanceId,
1851
- polls: ack.polls,
1852
- startedAt: ack.startedAt,
1401
+ retryAttempt: claim.attempts,
1402
+ retryInstanceId: retryInstance.id,
1403
+ reason: decision.reason,
1853
1404
  },
1854
1405
  });
1855
- return instance;
1856
- }
1857
-
1858
- const blockStartedAt = Date.now();
1859
- const block = await blockWorkflowPoolRun({
1860
- env: input.env,
1861
- runId: input.params.runId,
1862
- instanceId: pooledInstanceId,
1863
- }).catch(() => ({ blocked: false, started: false }));
1864
- input.recordSubmitTiming({
1865
- phase: 'coordinator.workflow_pool_start_ack',
1866
- ms: ack.ms,
1867
- graphHash: input.params.graphHash ?? null,
1868
- extra: {
1869
- acknowledged: block.started,
1870
- instanceId: pooledInstanceId,
1871
- polls: ack.polls,
1872
- startedAt: ack.startedAt,
1873
- mappedInstanceId: ack.mappedInstanceId,
1874
- blocked: block.blocked,
1875
- blockMs: Date.now() - blockStartedAt,
1876
- },
1877
- });
1878
- if (block.started) {
1879
- return instance;
1880
- }
1881
- await instance.terminate().catch(() => undefined);
1882
- disposeRpcStub(instance);
1883
- input.recordSubmitTiming({
1884
- phase: 'coordinator.workflow_pool_fallback',
1885
- ms: Date.now() - sendStartedAt,
1886
- graphHash: input.params.graphHash ?? null,
1887
- extra: {
1888
- reason: 'start_ack_timeout',
1889
- instanceId: pooledInstanceId,
1890
- ackTimeoutMs: WORKFLOW_POOL_START_ACK_TIMEOUT_MS,
1891
- },
1892
- });
1893
- return null;
1894
- }
1895
-
1896
- function readWorkflowPayload(event: unknown): Record<string, unknown> | null {
1897
- if (!isRecord(event)) return null;
1898
- const payload = event.payload;
1899
- if (!isRecord(payload)) return null;
1900
- return isRecord(payload.params) ? payload.params : payload;
1901
- }
1902
-
1903
- async function markWorkflowRuntimeFailure(input: {
1904
- env: CoordinatorEnv;
1905
- event: unknown;
1906
- error: unknown;
1907
- }): Promise<void> {
1908
- const payload = readWorkflowPayload(input.event);
1909
- if (!payload) return;
1910
- const runId = typeof payload.runId === 'string' ? payload.runId : null;
1911
- const baseUrl = typeof payload.baseUrl === 'string' ? payload.baseUrl : null;
1912
- const executorToken =
1913
- typeof payload.executorToken === 'string' ? payload.executorToken : null;
1914
- if (!runId || !baseUrl || !executorToken) return;
1915
- const errorName =
1916
- input.error instanceof Error && input.error.name
1917
- ? input.error.name
1918
- : 'Error';
1919
- const errorMessage =
1920
- input.error instanceof Error ? input.error.message : String(input.error);
1921
- const errorStack =
1922
- input.error instanceof Error && typeof input.error.stack === 'string'
1923
- ? input.error.stack.split('\n').slice(0, 12).join('\n')
1924
- : null;
1925
- const headers = new Headers({
1926
- authorization: `Bearer ${executorToken}`,
1927
- 'content-type': 'application/json',
1928
- });
1929
- const bypass = input.env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
1930
- if (bypass) headers.set('x-vercel-protection-bypass', bypass);
1931
- const body = JSON.stringify({
1932
- action: 'append_run_events',
1933
- playId: runId,
1934
- events: [
1935
- {
1936
- type: 'run.failed',
1937
- runId,
1938
- source: 'coordinator',
1939
- occurredAt: Date.now(),
1940
- error: `DynamicWorkflow runner failed: ${errorName}: ${errorMessage}${
1941
- errorStack ? `\n${errorStack}` : ''
1942
- }`,
1943
- } satisfies PlayRunLedgerEvent,
1944
- ],
1945
- });
1946
- const url = `${baseUrl.replace(/\/$/, '')}/api/v2/plays/internal/runtime`;
1947
- const backoffMs = [200, 500, 1500];
1948
- let lastError: unknown = null;
1949
- for (let attempt = 0; attempt <= backoffMs.length; attempt += 1) {
1950
- try {
1951
- const response = await fetch(url, { method: 'POST', headers, body });
1952
- if (response.ok) return;
1953
- lastError = new Error(
1954
- `runtime API responded ${response.status}: ${(await response.text().catch(() => '')).slice(0, 400)}`,
1955
- );
1956
- if (
1957
- response.status >= 400 &&
1958
- response.status < 500 &&
1959
- response.status !== 408 &&
1960
- response.status !== 429
1961
- ) {
1962
- break;
1963
- }
1964
- } catch (error) {
1965
- lastError = error;
1966
- }
1967
- if (attempt < backoffMs.length) {
1968
- await new Promise((resolve) => setTimeout(resolve, backoffMs[attempt]));
1969
- }
1406
+ return {
1407
+ retried: true,
1408
+ result: {
1409
+ runId: input.runId,
1410
+ playName: claim.params.playName,
1411
+ status: 'running',
1412
+ result: null,
1413
+ error: null,
1414
+ retry: {
1415
+ reason: decision.reason,
1416
+ attempt: claim.attempts,
1417
+ message: decision.message,
1418
+ },
1419
+ },
1420
+ };
1421
+ } finally {
1422
+ disposeRpcStub(retryInstance);
1970
1423
  }
1971
- console.error('[coordinator] failed to mark workflow runtime failure', {
1972
- runId,
1973
- message: lastError instanceof Error ? lastError.message : String(lastError),
1974
- });
1975
1424
  }
1976
1425
 
1977
- type StoredPlayArtifactPayload = {
1978
- artifact?: {
1979
- bundledCode?: string;
1980
- artifactKind?: string;
1981
- };
1982
- };
1983
-
1984
- const DYNAMIC_WORKER_COMPATIBILITY_DATE = '2026-05-01';
1985
-
1986
1426
  async function mintChildWorkflowExecutorToken(input: {
1987
1427
  env: CoordinatorEnv;
1988
- baseUrl: string;
1989
1428
  parentExecutorToken: string;
1990
1429
  parentRunId: string;
1991
1430
  parentPlayName: string;
@@ -1993,37 +1432,27 @@ async function mintChildWorkflowExecutorToken(input: {
1993
1432
  childPlayName: string;
1994
1433
  maxCreditsPerRun?: number | null;
1995
1434
  }): Promise<string> {
1996
- const url = `${input.baseUrl.replace(/\/$/, '')}/api/v2/plays/internal/child-executor-token`;
1997
- const headers = new Headers({
1998
- authorization: `Bearer ${input.parentExecutorToken}`,
1999
- 'content-type': 'application/json',
2000
- 'x-deepline-request-id': crypto.randomUUID(),
2001
- });
2002
- if (input.env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim()) {
2003
- headers.set(
2004
- 'x-vercel-protection-bypass',
2005
- input.env.VERCEL_PROTECTION_BYPASS_TOKEN.trim(),
2006
- );
2007
- }
2008
- const response = await fetch(url, {
2009
- method: 'POST',
2010
- headers,
2011
- body: JSON.stringify({
1435
+ const response = await input.env.HARNESS.runtimeApiCall({
1436
+ executorToken: input.parentExecutorToken,
1437
+ path: '/api/v2/plays/internal/child-executor-token',
1438
+ headers: { 'x-deepline-request-id': crypto.randomUUID() },
1439
+ timeoutMs: 15_000,
1440
+ body: {
2012
1441
  parentRunId: input.parentRunId,
2013
1442
  parentPlayName: input.parentPlayName,
2014
1443
  childRunId: input.childRunId,
2015
1444
  childPlayName: input.childPlayName,
2016
1445
  maxCreditsPerRun: input.maxCreditsPerRun ?? null,
2017
- }),
1446
+ },
2018
1447
  });
2019
- const text = await response.text().catch(() => '');
1448
+ const text = response.body;
2020
1449
  let parsed: Record<string, unknown> = {};
2021
1450
  try {
2022
1451
  parsed = text ? (JSON.parse(text) as Record<string, unknown>) : {};
2023
1452
  } catch {
2024
1453
  parsed = {};
2025
1454
  }
2026
- if (!response.ok) {
1455
+ if (response.status < 200 || response.status >= 300) {
2027
1456
  const error = isRecord(parsed.error) ? parsed.error : null;
2028
1457
  const message =
2029
1458
  (typeof error?.message === 'string' && error.message.trim()) ||
@@ -2106,7 +1535,6 @@ async function reencryptChildDbSessionForExecutor(input: {
2106
1535
 
2107
1536
  async function createChildRuntimeDbSession(input: {
2108
1537
  env: CoordinatorEnv;
2109
- baseUrl: string;
2110
1538
  childExecutorToken: string;
2111
1539
  childPlayName: string;
2112
1540
  requirement: RuntimeDbSessionRequirement;
@@ -2114,22 +1542,12 @@ async function createChildRuntimeDbSession(input: {
2114
1542
  orgId: string;
2115
1543
  }): Promise<CreateDbSessionResponse> {
2116
1544
  const decryptionKey = await generateDbSessionPostgresUrlDecryptionKey();
2117
- const url = `${input.baseUrl.replace(/\/$/, '')}/api/v2/plays/internal/runtime`;
2118
- const headers = new Headers({
2119
- authorization: `Bearer ${input.childExecutorToken}`,
2120
- 'content-type': 'application/json',
2121
- 'x-deepline-request-id': crypto.randomUUID(),
2122
- });
2123
- if (input.env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim()) {
2124
- headers.set(
2125
- 'x-vercel-protection-bypass',
2126
- input.env.VERCEL_PROTECTION_BYPASS_TOKEN.trim(),
2127
- );
2128
- }
2129
- const response = await fetch(url, {
2130
- method: 'POST',
2131
- headers,
2132
- body: JSON.stringify({
1545
+ const response = await input.env.HARNESS.runtimeApiCall({
1546
+ executorToken: input.childExecutorToken,
1547
+ path: '/api/v2/plays/internal/runtime',
1548
+ headers: { 'x-deepline-request-id': crypto.randomUUID() },
1549
+ timeoutMs: 15_000,
1550
+ body: {
2133
1551
  action: 'create_db_session',
2134
1552
  playName: input.childPlayName,
2135
1553
  target: {
@@ -2142,16 +1560,16 @@ async function createChildRuntimeDbSession(input: {
2142
1560
  ttlSeconds: DB_SESSION_DEFAULT_TTL_SECONDS,
2143
1561
  userEmail: input.userEmail,
2144
1562
  postgresUrlEncryption: decryptionKey.request,
2145
- }),
1563
+ },
2146
1564
  });
2147
- const text = await response.text().catch(() => '');
1565
+ const text = response.body;
2148
1566
  let parsed: unknown = {};
2149
1567
  try {
2150
1568
  parsed = text ? JSON.parse(text) : {};
2151
1569
  } catch {
2152
1570
  parsed = {};
2153
1571
  }
2154
- if (!response.ok) {
1572
+ if (response.status < 200 || response.status >= 300) {
2155
1573
  const error =
2156
1574
  isRecord(parsed) && isRecord(parsed.error) ? parsed.error : {};
2157
1575
  const message =
@@ -2172,52 +1590,224 @@ async function createChildRuntimeDbSession(input: {
2172
1590
  orgId: input.orgId,
2173
1591
  childPlayName: input.childPlayName,
2174
1592
  });
2175
- return await reencryptChildDbSessionForExecutor({
2176
- session,
2177
- decryptionKey,
2178
- childExecutorToken: input.childExecutorToken,
1593
+ return await reencryptChildDbSessionForExecutor({
1594
+ session,
1595
+ decryptionKey,
1596
+ childExecutorToken: input.childExecutorToken,
1597
+ });
1598
+ }
1599
+
1600
+ async function preloadChildRuntimeDbSessions(input: {
1601
+ env: CoordinatorEnv;
1602
+ childExecutorToken: string;
1603
+ childRunId: string;
1604
+ childPlayName: string;
1605
+ manifest: PlayRuntimeManifest;
1606
+ orgId: string;
1607
+ userEmail: string;
1608
+ }): Promise<PreloadedRuntimeDbSession[]> {
1609
+ const startedAt = Date.now();
1610
+ const requirements = planRuntimeDbSessionRequirements(
1611
+ input.manifest.staticPipeline ?? null,
1612
+ );
1613
+ const sessions = await Promise.all(
1614
+ requirements.map(async (requirement) => ({
1615
+ tableNamespace: requirement.tableNamespace,
1616
+ logicalTable: requirement.logicalTable,
1617
+ operations: requirement.operations,
1618
+ ...(requirement.limits ? { limits: requirement.limits } : {}),
1619
+ session: await createChildRuntimeDbSession({
1620
+ env: input.env,
1621
+ childExecutorToken: input.childExecutorToken,
1622
+ childPlayName: input.childPlayName,
1623
+ requirement,
1624
+ userEmail: input.userEmail,
1625
+ orgId: input.orgId,
1626
+ }),
1627
+ })),
1628
+ );
1629
+ recordCoordinatorPerfTrace({
1630
+ runId: input.childRunId,
1631
+ phase: 'coordinator.child_db_session_preload',
1632
+ ms: Date.now() - startedAt,
1633
+ graphHash: input.manifest.graphHash,
1634
+ extra: { sessions: sessions.length },
1635
+ });
1636
+ return sessions;
1637
+ }
1638
+
1639
+ async function registerInlineChildRunWithRuntime(input: {
1640
+ env: CoordinatorEnv;
1641
+ childExecutorToken: string;
1642
+ childRunId: string;
1643
+ childPlayName: string;
1644
+ manifest: PlayRuntimeManifest;
1645
+ governance: PlayCallGovernanceSnapshot;
1646
+ }): Promise<void> {
1647
+ const response = await input.env.HARNESS.runtimeApiCall({
1648
+ executorToken: input.childExecutorToken,
1649
+ path: '/api/v2/plays/internal/runtime',
1650
+ headers: { 'x-deepline-request-id': crypto.randomUUID() },
1651
+ timeoutMs: 15_000,
1652
+ body: {
1653
+ action: 'start_inline_child_run',
1654
+ playName: input.childPlayName,
1655
+ runId: input.childRunId,
1656
+ workflowFamilyKey:
1657
+ input.governance.rootRunId ??
1658
+ input.governance.parentRunId ??
1659
+ input.childRunId,
1660
+ artifactStorageKey: input.manifest.artifactStorageKey,
1661
+ artifactHash: input.manifest.artifactHash,
1662
+ graphHash: input.manifest.graphHash,
1663
+ runtimeBackend: 'workers_edge',
1664
+ schedulerBackend: 'inline_child',
1665
+ executionProfile: 'workers_edge',
1666
+ ...(typeof input.manifest.maxCreditsPerRun === 'number'
1667
+ ? { maxCreditsPerRun: input.manifest.maxCreditsPerRun }
1668
+ : {}),
1669
+ staticPipeline: input.manifest.staticPipeline ?? null,
1670
+ source: 'published',
1671
+ },
1672
+ });
1673
+ if (response.status < 200 || response.status >= 300) {
1674
+ const text = response.body ?? '';
1675
+ throw new Error(
1676
+ `Inline child run registration failed ${response.status}: ${text.slice(0, 800)}`,
1677
+ );
1678
+ }
1679
+ }
1680
+
1681
+ type CoordinatorRuntimeApiTiming = {
1682
+ phase: string;
1683
+ ms: number;
1684
+ bytes?: number;
1685
+ };
1686
+
1687
+ async function callRuntimeApiFromCoordinator(input: {
1688
+ env: CoordinatorEnv;
1689
+ executorToken: string;
1690
+ body: unknown;
1691
+ }): Promise<{
1692
+ status: number;
1693
+ body: string;
1694
+ timings: CoordinatorRuntimeApiTiming[];
1695
+ }> {
1696
+ const timings: CoordinatorRuntimeApiTiming[] = [];
1697
+ const totalStartedAt = Date.now();
1698
+ const recordTiming = (
1699
+ phase: string,
1700
+ startedAt: number,
1701
+ extra?: { bytes?: number },
1702
+ ): void => {
1703
+ timings.push({
1704
+ phase,
1705
+ ms: Date.now() - startedAt,
1706
+ ...(extra?.bytes !== undefined ? { bytes: extra.bytes } : {}),
1707
+ });
1708
+ };
1709
+
1710
+ const buildStartedAt = Date.now();
1711
+ const body = input.body ?? {};
1712
+ const serializedBody = JSON.stringify(body);
1713
+ recordTiming('coordinator.runtime_api.build_request', buildStartedAt, {
1714
+ bytes: serializedBody.length,
1715
+ });
1716
+
1717
+ const fetchStartedAt = Date.now();
1718
+ const response = await input.env.HARNESS.runtimeApiCall({
1719
+ executorToken: input.executorToken,
1720
+ path: '/api/v2/plays/internal/runtime',
1721
+ body,
1722
+ headers: {
1723
+ 'x-deepline-request-id': crypto.randomUUID(),
1724
+ },
1725
+ });
1726
+ recordTiming('coordinator.runtime_api.fetch', fetchStartedAt);
1727
+
1728
+ const bodyStartedAt = Date.now();
1729
+ const responseBody = response.body;
1730
+ recordTiming('coordinator.runtime_api.body', bodyStartedAt, {
1731
+ bytes: responseBody.length,
2179
1732
  });
1733
+ recordTiming('coordinator.runtime_api.total', totalStartedAt);
1734
+ return {
1735
+ status: response.status,
1736
+ body: responseBody,
1737
+ timings,
1738
+ };
2180
1739
  }
2181
1740
 
2182
- async function preloadChildRuntimeDbSessions(input: {
1741
+ async function prepareInlineChildRunWithRuntime(input: {
2183
1742
  env: CoordinatorEnv;
2184
- baseUrl: string;
2185
- childExecutorToken: string;
1743
+ parentExecutorToken: string;
1744
+ parentRunId: string;
1745
+ parentPlayName: string;
2186
1746
  childRunId: string;
2187
1747
  childPlayName: string;
2188
1748
  manifest: PlayRuntimeManifest;
2189
- orgId: string;
1749
+ governance: PlayCallGovernanceSnapshot;
2190
1750
  userEmail: string;
2191
- }): Promise<PreloadedRuntimeDbSession[]> {
2192
- const startedAt = Date.now();
2193
- const requirements = planRuntimeDbSessionRequirements(
2194
- input.manifest.staticPipeline ?? null,
2195
- );
2196
- const sessions = await Promise.all(
2197
- requirements.map(async (requirement) => ({
2198
- tableNamespace: requirement.tableNamespace,
2199
- logicalTable: requirement.logicalTable,
2200
- operations: requirement.operations,
2201
- ...(requirement.limits ? { limits: requirement.limits } : {}),
2202
- session: await createChildRuntimeDbSession({
2203
- env: input.env,
2204
- baseUrl: input.baseUrl,
2205
- childExecutorToken: input.childExecutorToken,
2206
- childPlayName: input.childPlayName,
2207
- requirement,
2208
- userEmail: input.userEmail,
2209
- orgId: input.orgId,
2210
- }),
2211
- })),
2212
- );
2213
- recordCoordinatorPerfTrace({
2214
- runId: input.childRunId,
2215
- phase: 'coordinator.child_db_session_preload',
2216
- ms: Date.now() - startedAt,
2217
- graphHash: input.manifest.graphHash,
2218
- extra: { sessions: sessions.length },
1751
+ }): Promise<{
1752
+ childToken: string;
1753
+ preloadedDbSessions: PreloadedRuntimeDbSession[];
1754
+ prepareTimings: unknown[];
1755
+ transportTimings: unknown[];
1756
+ }> {
1757
+ const response = await callRuntimeApiFromCoordinator({
1758
+ env: input.env,
1759
+ executorToken: input.parentExecutorToken,
1760
+ body: {
1761
+ action: 'prepare_inline_child_run',
1762
+ parentRunId: input.parentRunId,
1763
+ parentPlayName: input.parentPlayName,
1764
+ childRunId: input.childRunId,
1765
+ childPlayName: input.childPlayName,
1766
+ workflowFamilyKey:
1767
+ input.governance.rootRunId ??
1768
+ input.governance.parentRunId ??
1769
+ input.childRunId,
1770
+ artifactStorageKey: input.manifest.artifactStorageKey,
1771
+ artifactHash: input.manifest.artifactHash,
1772
+ graphHash: input.manifest.graphHash,
1773
+ runtimeBackend: 'workers_edge',
1774
+ schedulerBackend: 'inline_child',
1775
+ executionProfile: 'workers_edge',
1776
+ ...(typeof input.manifest.maxCreditsPerRun === 'number'
1777
+ ? { maxCreditsPerRun: input.manifest.maxCreditsPerRun }
1778
+ : {}),
1779
+ staticPipeline: input.manifest.staticPipeline ?? null,
1780
+ source: 'published',
1781
+ userEmail: input.userEmail,
1782
+ },
2219
1783
  });
2220
- return sessions;
1784
+ const text = response.body;
1785
+ let parsed: unknown = {};
1786
+ try {
1787
+ parsed = text ? JSON.parse(text) : {};
1788
+ } catch {
1789
+ parsed = {};
1790
+ }
1791
+ if (response.status < 200 || response.status >= 300) {
1792
+ throw new Error(
1793
+ `Inline child prepare failed ${response.status}: ${text.slice(0, 800)}`,
1794
+ );
1795
+ }
1796
+ if (!isRecord(parsed) || typeof parsed.executorToken !== 'string') {
1797
+ throw new Error('Inline child prepare response was missing executorToken.');
1798
+ }
1799
+ const preloadedDbSessions = Array.isArray(parsed.preloadedDbSessions)
1800
+ ? (parsed.preloadedDbSessions as PreloadedRuntimeDbSession[])
1801
+ : [];
1802
+ const prepareTimings = Array.isArray(parsed.prepareTimings)
1803
+ ? parsed.prepareTimings
1804
+ : [];
1805
+ return {
1806
+ childToken: parsed.executorToken,
1807
+ preloadedDbSessions,
1808
+ prepareTimings,
1809
+ transportTimings: response.timings,
1810
+ };
2221
1811
  }
2222
1812
 
2223
1813
  function buildChildRunId(playName: string): string {
@@ -2476,6 +2066,8 @@ function runRequestFromPlayWorkflowParams(
2476
2066
  childPlayManifests: params.childPlayManifests ?? null,
2477
2067
  playCallGovernance: params.playCallGovernance ?? null,
2478
2068
  preloadedDbSessions: params.preloadedDbSessions ?? null,
2069
+ inlineChildRunRegistered:
2070
+ params.runtimeBackend === 'cf_workflows_dynamic_worker_inline_child',
2479
2071
  coordinatorUrl: params.coordinatorUrl ?? null,
2480
2072
  totalRows: params.totalRows,
2481
2073
  };
@@ -2606,38 +2198,41 @@ async function executeChildInline(input: {
2606
2198
  },
2607
2199
  });
2608
2200
 
2609
- const tokenStartedAt = Date.now();
2610
- const baseUrl = resolveRuntimeBaseUrl(input.env, input.body);
2611
- const childToken = await mintChildWorkflowExecutorToken({
2612
- env: input.env,
2613
- baseUrl,
2614
- parentExecutorToken,
2615
- parentRunId: input.parentRunId,
2616
- parentPlayName:
2617
- typeof input.body.parentPlayName === 'string' &&
2618
- input.body.parentPlayName.trim()
2619
- ? input.body.parentPlayName.trim()
2620
- : governance.parentPlayName,
2621
- childRunId,
2622
- childPlayName,
2623
- maxCreditsPerRun: manifest.maxCreditsPerRun ?? null,
2201
+ const loaderStartedAt = Date.now();
2202
+ const stub = loadDynamicPlayWorker(input.env, {
2203
+ runId: childRunId,
2204
+ graphHash: manifest.graphHash,
2205
+ artifactStorageKey: manifest.artifactStorageKey,
2206
+ artifactHash: manifest.artifactHash,
2207
+ dynamicWorkerCode:
2208
+ typeof manifest.bundledCode === 'string' ? manifest.bundledCode : null,
2209
+ packagedFiles: null,
2624
2210
  });
2625
- trace('coordinator.inline_child_token', tokenStartedAt);
2211
+ trace('coordinator.inline_child_loader_get', loaderStartedAt);
2626
2212
 
2627
- const dbSessionStartedAt = Date.now();
2628
- const preloadedDbSessions = await preloadChildRuntimeDbSessions({
2629
- env: input.env,
2630
- baseUrl,
2631
- childExecutorToken: childToken,
2632
- childRunId,
2633
- childPlayName,
2634
- manifest,
2635
- orgId,
2636
- userEmail:
2637
- typeof input.body.userEmail === 'string' ? input.body.userEmail : '',
2638
- });
2639
- trace('coordinator.inline_child_db_session_preload', dbSessionStartedAt, {
2213
+ const prepareStartedAt = Date.now();
2214
+ const parentPlayName =
2215
+ typeof input.body.parentPlayName === 'string' &&
2216
+ input.body.parentPlayName.trim()
2217
+ ? input.body.parentPlayName.trim()
2218
+ : governance.parentPlayName;
2219
+ const { childToken, preloadedDbSessions, prepareTimings, transportTimings } =
2220
+ await prepareInlineChildRunWithRuntime({
2221
+ env: input.env,
2222
+ parentExecutorToken,
2223
+ parentRunId: input.parentRunId,
2224
+ parentPlayName,
2225
+ childRunId,
2226
+ childPlayName,
2227
+ manifest,
2228
+ governance,
2229
+ userEmail:
2230
+ typeof input.body.userEmail === 'string' ? input.body.userEmail : '',
2231
+ });
2232
+ trace('coordinator.inline_child_prepare', prepareStartedAt, {
2640
2233
  sessions: preloadedDbSessions.length,
2234
+ prepareTimings,
2235
+ transportTimings,
2641
2236
  });
2642
2237
 
2643
2238
  const params = buildChildWorkflowParams({
@@ -2655,17 +2250,6 @@ async function executeChildInline(input: {
2655
2250
  preloadedDbSessions:
2656
2251
  preloadedDbSessions.length > 0 ? preloadedDbSessions : null,
2657
2252
  });
2658
- const loaderStartedAt = Date.now();
2659
- const stub = loadDynamicPlayWorker(input.env, {
2660
- runId: childRunId,
2661
- graphHash: manifest.graphHash,
2662
- artifactStorageKey: manifest.artifactStorageKey,
2663
- artifactHash: manifest.artifactHash,
2664
- dynamicWorkerCode:
2665
- typeof manifest.bundledCode === 'string' ? manifest.bundledCode : null,
2666
- packagedFiles: null,
2667
- });
2668
- trace('coordinator.inline_child_loader_get', loaderStartedAt);
2669
2253
 
2670
2254
  let entrypoint: ReturnType<Awaited<typeof stub>['getEntrypoint']> | null =
2671
2255
  null;
@@ -2676,44 +2260,54 @@ async function executeChildInline(input: {
2676
2260
  entrypoint = awaitedStub.getEntrypoint();
2677
2261
  trace('coordinator.inline_child_get_entrypoint', entrypointStartedAt);
2678
2262
  const fetchStartedAt = Date.now();
2679
- response = await entrypoint.fetch(
2263
+ const inlineResponse = await entrypoint.fetch(
2680
2264
  new Request('https://deepline.dynamic.internal/run-inline', {
2681
2265
  method: 'POST',
2682
2266
  headers: { 'content-type': 'application/json' },
2683
2267
  body: JSON.stringify(runRequestFromPlayWorkflowParams(params)),
2684
2268
  }),
2685
2269
  );
2270
+ if (!inlineResponse) {
2271
+ throw new Error('Inline child Worker returned no response.');
2272
+ }
2273
+ let workerResponse = inlineResponse as Response;
2274
+ response = workerResponse;
2686
2275
  trace('coordinator.inline_child_worker_fetch', fetchStartedAt, {
2687
- status: response.status,
2276
+ status: workerResponse.status,
2688
2277
  endpoint: '/run-inline',
2689
2278
  });
2690
2279
  let usedLegacyRunStream = false;
2691
- if (response.status === 404) {
2692
- disposeRpcStub(response);
2280
+ if (workerResponse.status === 404) {
2281
+ disposeRpcStub(workerResponse);
2693
2282
  const legacyFetchStartedAt = Date.now();
2694
- response = await entrypoint.fetch(
2283
+ const legacyResponse = await entrypoint.fetch(
2695
2284
  new Request('https://deepline.dynamic.internal/run', {
2696
2285
  method: 'POST',
2697
2286
  headers: { 'content-type': 'application/json' },
2698
2287
  body: JSON.stringify(runRequestFromPlayWorkflowParams(params)),
2699
2288
  }),
2700
2289
  );
2290
+ if (!legacyResponse) {
2291
+ throw new Error('Legacy inline child Worker returned no response.');
2292
+ }
2293
+ workerResponse = legacyResponse as Response;
2294
+ response = workerResponse;
2701
2295
  usedLegacyRunStream = true;
2702
2296
  trace('coordinator.inline_child_worker_fetch', legacyFetchStartedAt, {
2703
- status: response.status,
2297
+ status: workerResponse.status,
2704
2298
  endpoint: '/run',
2705
2299
  compatibility: 'legacy_stream',
2706
2300
  });
2707
2301
  }
2708
- if (!response.ok) {
2709
- const text = await response.text().catch(() => '');
2302
+ if (!workerResponse.ok) {
2303
+ const text = await workerResponse.text().catch(() => '');
2710
2304
  throw new Error(
2711
- `Inline child Worker failed ${response.status}: ${text.slice(0, 800)}`,
2305
+ `Inline child Worker failed ${workerResponse.status}: ${text.slice(0, 800)}`,
2712
2306
  );
2713
2307
  }
2714
2308
  const responseStartedAt = Date.now();
2715
2309
  const parsed: InlineWorkerRunResponse = usedLegacyRunStream
2716
- ? await readLegacyRunStream(response).then((legacy) => ({
2310
+ ? await readLegacyRunStream(workerResponse).then((legacy) => ({
2717
2311
  status: legacy.error ? 'failed' : 'completed',
2718
2312
  result: legacy.result,
2719
2313
  outputRows: legacy.outputRows ?? undefined,
@@ -2723,7 +2317,7 @@ async function executeChildInline(input: {
2723
2317
  })),
2724
2318
  error: legacy.error ?? undefined,
2725
2319
  }))
2726
- : ((await response.json()) as InlineWorkerRunResponse);
2320
+ : ((await workerResponse.json()) as InlineWorkerRunResponse);
2727
2321
  const logs = (parsed.events ?? []).flatMap((event) => {
2728
2322
  if (
2729
2323
  event &&
@@ -2744,6 +2338,34 @@ async function executeChildInline(input: {
2744
2338
  durationMs:
2745
2339
  typeof parsed.durationMs === 'number' ? parsed.durationMs : null,
2746
2340
  });
2341
+ for (const timing of parsed.timings ?? []) {
2342
+ if (
2343
+ !timing ||
2344
+ typeof timing !== 'object' ||
2345
+ typeof timing.phase !== 'string' ||
2346
+ typeof timing.ms !== 'number' ||
2347
+ !Number.isFinite(timing.ms)
2348
+ ) {
2349
+ continue;
2350
+ }
2351
+ recordCoordinatorPerfTrace({
2352
+ runId: childRunId,
2353
+ phase: `dynamic_worker.${timing.phase}`,
2354
+ ms: Math.max(0, Math.round(timing.ms)),
2355
+ graphHash: manifest.graphHash,
2356
+ extra: {
2357
+ parentRunId: input.parentRunId,
2358
+ mode: 'inline_dynamic_worker',
2359
+ ...(isRecord(timing.extra) ? timing.extra : {}),
2360
+ },
2361
+ });
2362
+ timings.push({
2363
+ phase: `dynamic_worker.${timing.phase}`,
2364
+ ms: Math.max(0, Math.round(timing.ms)),
2365
+ graphHash: manifest.graphHash,
2366
+ ...(isRecord(timing.extra) ? { extra: timing.extra } : {}),
2367
+ });
2368
+ }
2747
2369
  trace('coordinator.inline_child_total', startedAt);
2748
2370
  if (parsed.status === 'failed' || parsed.error) {
2749
2371
  const error = {
@@ -2788,29 +2410,176 @@ async function executeChildInline(input: {
2788
2410
  action: 'completed',
2789
2411
  mode: 'inline_dynamic_worker',
2790
2412
  },
2791
- });
2792
- return {
2793
- workflowId: childRunId,
2794
- runId: childRunId,
2795
- status: 'completed',
2796
- mode: 'inline_dynamic_worker',
2797
- result: parsed.result,
2798
- output: parsed.result,
2799
- logs,
2800
- timings,
2801
- };
2802
- } finally {
2803
- disposeRpcStub(response);
2804
- disposeRpcStub(entrypoint);
2805
- disposeRpcStub(await stub.catch(() => null));
2806
- }
2413
+ });
2414
+ return {
2415
+ workflowId: childRunId,
2416
+ runId: childRunId,
2417
+ status: 'completed',
2418
+ mode: 'inline_dynamic_worker',
2419
+ result: parsed.result,
2420
+ output: parsed.result,
2421
+ logs,
2422
+ timings,
2423
+ };
2424
+ } finally {
2425
+ disposeRpcStub(response);
2426
+ disposeRpcStub(entrypoint);
2427
+ disposeRpcStub(await stub.catch(() => null));
2428
+ }
2429
+ }
2430
+
2431
+ async function submitChildWorkflowThroughCoordinator(input: {
2432
+ env: CoordinatorEnv;
2433
+ parentRunId: string;
2434
+ body: Record<string, unknown>;
2435
+ coordinatorUrl: string | null;
2436
+ }): Promise<{
2437
+ response: Response;
2438
+ responseText: string;
2439
+ childRunId: string;
2440
+ childPlayName: string;
2441
+ startedAt: number;
2442
+ timings: CoordinatorTiming[];
2443
+ }> {
2444
+ const startedAt = Date.now();
2445
+ const timings: CoordinatorTiming[] = [];
2446
+ const trace = (
2447
+ phase: string,
2448
+ phaseStartedAt: number,
2449
+ graphHash?: string | null,
2450
+ extra?: Record<string, unknown>,
2451
+ ): void => {
2452
+ const timing: CoordinatorTiming = {
2453
+ phase,
2454
+ ms: Date.now() - phaseStartedAt,
2455
+ ...(graphHash ? { graphHash } : {}),
2456
+ ...(extra ? { extra } : {}),
2457
+ };
2458
+ timings.push(timing);
2459
+ recordCoordinatorPerfTrace({
2460
+ runId: input.parentRunId,
2461
+ phase,
2462
+ ms: timing.ms,
2463
+ graphHash: graphHash ?? undefined,
2464
+ extra,
2465
+ });
2466
+ };
2467
+ const validated = validateChildSubmitBody({
2468
+ parentRunId: input.parentRunId,
2469
+ body: input.body,
2470
+ });
2471
+ if (!validated.ok) {
2472
+ return {
2473
+ response: Response.json(
2474
+ { error: validated.error },
2475
+ { status: validated.status },
2476
+ ),
2477
+ responseText: '',
2478
+ childRunId: '',
2479
+ childPlayName: '',
2480
+ startedAt,
2481
+ timings,
2482
+ };
2483
+ }
2484
+ const { manifest, governance, childPlayName, orgId, parentExecutorToken } =
2485
+ validated;
2486
+ const childRunId = buildChildRunId(childPlayName);
2487
+ const baseUrl = resolveRuntimeBaseUrl(input.env, input.body);
2488
+
2489
+ const tokenStartedAt = Date.now();
2490
+ const childToken = await mintChildWorkflowExecutorToken({
2491
+ env: input.env,
2492
+ parentExecutorToken,
2493
+ parentRunId: input.parentRunId,
2494
+ parentPlayName:
2495
+ typeof input.body.parentPlayName === 'string' &&
2496
+ input.body.parentPlayName.trim()
2497
+ ? input.body.parentPlayName.trim()
2498
+ : governance.parentPlayName,
2499
+ childRunId,
2500
+ childPlayName,
2501
+ maxCreditsPerRun: manifest.maxCreditsPerRun ?? null,
2502
+ });
2503
+ trace('coordinator.child_submit_token', tokenStartedAt, manifest.graphHash, {
2504
+ childRunId,
2505
+ childPlayName,
2506
+ });
2507
+
2508
+ const dbSessionStartedAt = Date.now();
2509
+ const preloadedDbSessions = await preloadChildRuntimeDbSessions({
2510
+ env: input.env,
2511
+ childExecutorToken: childToken,
2512
+ childRunId,
2513
+ childPlayName,
2514
+ manifest,
2515
+ orgId,
2516
+ userEmail:
2517
+ typeof input.body.userEmail === 'string' ? input.body.userEmail : '',
2518
+ });
2519
+ trace(
2520
+ 'coordinator.child_submit_db_session_preload',
2521
+ dbSessionStartedAt,
2522
+ manifest.graphHash,
2523
+ { childRunId, sessions: preloadedDbSessions.length },
2524
+ );
2525
+
2526
+ const params = buildChildWorkflowParams({
2527
+ env: input.env,
2528
+ body: input.body,
2529
+ manifest,
2530
+ governance,
2531
+ childRunId,
2532
+ childPlayName,
2533
+ childToken,
2534
+ orgId,
2535
+ coordinatorUrl: input.coordinatorUrl,
2536
+ runtimeBackend: 'cf_workflows_dynamic_worker',
2537
+ dynamicWorkerCode:
2538
+ typeof manifest.bundledCode === 'string' ? manifest.bundledCode : null,
2539
+ preloadedDbSessions:
2540
+ preloadedDbSessions.length > 0 ? preloadedDbSessions : null,
2541
+ });
2542
+
2543
+ const workflowSubmitStartedAt = Date.now();
2544
+ const response = await handleWorkflowRoute({
2545
+ runId: childRunId,
2546
+ action: 'submit',
2547
+ request: new Request(
2548
+ `https://deepline.coordinator.internal/workflow/${encodeURIComponent(
2549
+ childRunId,
2550
+ )}/submit`,
2551
+ {
2552
+ method: 'POST',
2553
+ headers: { 'content-type': 'application/json' },
2554
+ body: JSON.stringify(params),
2555
+ },
2556
+ ),
2557
+ env: input.env,
2558
+ });
2559
+ trace(
2560
+ 'coordinator.child_submit_workflow',
2561
+ workflowSubmitStartedAt,
2562
+ manifest.graphHash,
2563
+ { childRunId, status: response.status },
2564
+ );
2565
+ const responseText = await response.text().catch(() => '');
2566
+ return {
2567
+ response,
2568
+ responseText,
2569
+ childRunId,
2570
+ childPlayName,
2571
+ startedAt,
2572
+ timings,
2573
+ };
2807
2574
  }
2808
2575
 
2809
2576
  /**
2810
2577
  * In-process Fetcher handed to each per-graphHash play Worker as
2811
- * `env.RUNTIME_API`. Runs in the coordinator's isolate (not the play's), so
2812
- * `fetch(target)` here can reach `http://localhost:3000` directly in dev —
2813
- * no public *.workers.dev CF edge cloudflared localhost chain.
2578
+ * `env.RUNTIME_API`. Runs in the coordinator's isolate. Forwards runtime
2579
+ * callbacks to DEEPLINE_API_BASE_URL: in dev (the only mode deployed CF
2580
+ * coordinator + local app) that is the cloudflared tunnel URL exposing the
2581
+ * laptop's app; in prod it is the deployed app URL. There is no
2582
+ * direct-to-localhost path (the local-workerd dev mode was removed).
2814
2583
  *
2815
2584
  * Has to be a `WorkerEntrypoint` (not a plain closure) because closures
2816
2585
  * containing captured state aren't structured-cloneable, and Cloudflare
@@ -2882,6 +2651,49 @@ export class CoordinatorControl extends WorkerEntrypoint<
2882
2651
  });
2883
2652
  }
2884
2653
 
2654
+ async submitWorkflowChild(
2655
+ parentRunId: string,
2656
+ body: Record<string, unknown>,
2657
+ ): Promise<{
2658
+ workflowId?: string;
2659
+ runId?: string;
2660
+ status?: string;
2661
+ mode?: string;
2662
+ timings?: CoordinatorTiming[];
2663
+ coordinator?: unknown;
2664
+ error?: unknown;
2665
+ }> {
2666
+ const { response, responseText, childRunId, timings } =
2667
+ await submitChildWorkflowThroughCoordinator({
2668
+ env: this.env,
2669
+ parentRunId,
2670
+ body,
2671
+ coordinatorUrl: null,
2672
+ });
2673
+ let parsed: unknown = {};
2674
+ try {
2675
+ parsed = responseText ? JSON.parse(responseText) : {};
2676
+ } catch {
2677
+ parsed = { error: responseText };
2678
+ }
2679
+ if (!response.ok) {
2680
+ return {
2681
+ runId: childRunId || undefined,
2682
+ workflowId: childRunId || undefined,
2683
+ status: 'failed',
2684
+ error: isRecord(parsed) ? (parsed.error ?? parsed) : parsed,
2685
+ };
2686
+ }
2687
+ return {
2688
+ workflowId: childRunId,
2689
+ runId: childRunId,
2690
+ status: 'started',
2691
+ mode: 'workflow_rpc',
2692
+ coordinator: parsed,
2693
+ timings,
2694
+ };
2695
+ }
2696
+
2885
2697
  async signal(
2886
2698
  runId: string,
2887
2699
  body: Record<string, unknown>,
@@ -2942,6 +2754,75 @@ export class CoordinatorControl extends WorkerEntrypoint<
2942
2754
  }
2943
2755
  await appendCoordinatorRunEvent(this.env, event);
2944
2756
  }
2757
+
2758
+ async readTerminalState(
2759
+ runId: string,
2760
+ ): Promise<CoordinatorTerminalState | null> {
2761
+ if (!runId) {
2762
+ throw new Error('runId is required.');
2763
+ }
2764
+ return await readCoordinatorTerminalState(this.env, runId);
2765
+ }
2766
+
2767
+ async readChildTerminalState(
2768
+ parentRunId: string,
2769
+ eventKey: string,
2770
+ timeoutMs?: number,
2771
+ ): Promise<CoordinatorChildTerminalState | null> {
2772
+ if (!parentRunId || !eventKey) {
2773
+ throw new Error('parentRunId and eventKey are required.');
2774
+ }
2775
+ return await readCoordinatorChildTerminalState({
2776
+ env: this.env,
2777
+ parentRunId,
2778
+ eventKey,
2779
+ timeoutMs:
2780
+ typeof timeoutMs === 'number' && Number.isFinite(timeoutMs)
2781
+ ? Math.max(0, Math.min(Math.floor(timeoutMs), 30_000))
2782
+ : undefined,
2783
+ });
2784
+ }
2785
+
2786
+ /**
2787
+ * Distributed Rate State Backend acquire: lease up to `requested` request-
2788
+ * window permits for `bucketId` (`<orgId>:<provider>`) from the per-bucket
2789
+ * rate-state Durable Object. See CoordinatorRateStateBackend + dedup-do.ts.
2790
+ */
2791
+ async rateAcquire(input: {
2792
+ bucketId: string;
2793
+ rules: Array<{
2794
+ ruleId: string;
2795
+ requestsPerWindow: number;
2796
+ windowMs: number;
2797
+ maxConcurrency: number | null;
2798
+ }>;
2799
+ requested: number;
2800
+ }): Promise<{ granted: number; waitMs: number }> {
2801
+ if (!input.bucketId || !input.bucketId.trim()) {
2802
+ throw new Error('bucketId is required.');
2803
+ }
2804
+ return await callRateBucketControl<{ granted: number; waitMs: number }>(
2805
+ this.env,
2806
+ input.bucketId,
2807
+ '/rate-acquire',
2808
+ input,
2809
+ );
2810
+ }
2811
+
2812
+ async ratePenalize(input: {
2813
+ bucketId: string;
2814
+ cooldownMs: number;
2815
+ }): Promise<void> {
2816
+ if (!input.bucketId || !input.bucketId.trim()) {
2817
+ throw new Error('bucketId is required.');
2818
+ }
2819
+ await callRateBucketControl<{ ok?: unknown }>(
2820
+ this.env,
2821
+ input.bucketId,
2822
+ '/rate-penalize',
2823
+ input,
2824
+ );
2825
+ }
2945
2826
  }
2946
2827
 
2947
2828
  /**
@@ -2971,80 +2852,20 @@ export class DynamicWorkflow extends WorkflowEntrypoint<
2971
2852
  graphHash: entryTrace.graphHash,
2972
2853
  extra: {
2973
2854
  instanceId: entryTrace.instanceId,
2974
- pooledBootstrap: entryTrace.pooledBootstrap,
2975
2855
  },
2976
2856
  });
2977
- let dispatchedEvent = event;
2978
- if (isPooledWorkflowBootstrapPayload(workflowEvent.payload)) {
2979
- const pooledPayload = workflowEvent.payload;
2980
- const waitingStep = step as {
2981
- waitForEvent<T>(
2982
- name: string,
2983
- options: { type: string; timeout?: string | number },
2984
- ): Promise<{ payload: Readonly<T>; timestamp: Date; type: string }>;
2985
- };
2986
- const waitStartedAt = Date.now();
2987
- const startEventPromise = waitingStep.waitForEvent<DispatcherEnvelope>(
2988
- 'wait for pooled play start',
2989
- { type: WORKFLOW_POOL_START_EVENT_TYPE, timeout: '10 minutes' },
2990
- );
2991
- await markWorkflowPoolIdReady(this.env, pooledPayload.poolId).catch(
2992
- (error) => {
2993
- console.warn('[coordinator.workflow_pool] ready signal failed', {
2994
- poolId: pooledPayload.poolId,
2995
- message: error instanceof Error ? error.message : String(error),
2996
- });
2997
- },
2998
- );
2999
- const startEvent = await startEventPromise;
3000
- dispatchedEvent = {
3001
- payload: startEvent.payload,
3002
- timestamp: startEvent.timestamp,
3003
- instanceId: workflowEvent.instanceId ?? pooledPayload.poolId,
3004
- };
3005
- const dispatchedTrace = readWorkflowTraceContext(dispatchedEvent);
3006
- const mapped = await mapRunToWorkflowInstance({
3007
- env: this.env,
3008
- runId: dispatchedTrace.runId,
3009
- instanceId: pooledPayload.poolId,
3010
- started: true,
3011
- }).catch((error) => {
3012
- console.warn('[coordinator.workflow_pool] start ack failed', {
3013
- poolId: pooledPayload.poolId,
3014
- runId: dispatchedTrace.runId,
3015
- message: error instanceof Error ? error.message : String(error),
3016
- });
3017
- return false;
3018
- });
3019
- if (!mapped) {
3020
- trace({
3021
- runId: dispatchedTrace.runId,
3022
- phase: 'coordinator.workflow_pool_start_blocked',
3023
- ms: 0,
3024
- graphHash: dispatchedTrace.graphHash,
3025
- extra: {
3026
- instanceId: pooledPayload.poolId,
3027
- eventType: startEvent.type,
3028
- },
3029
- });
3030
- return { ok: false, blocked: true, runId: dispatchedTrace.runId };
3031
- }
3032
- const eventDeliveryMs = Math.max(
3033
- 0,
3034
- Date.now() - startEvent.timestamp.getTime(),
3035
- );
2857
+ if (entryTrace.submittedAt !== null) {
3036
2858
  trace({
3037
- runId: dispatchedTrace.runId,
3038
- phase: 'coordinator.workflow_pool_start_event',
3039
- ms: eventDeliveryMs,
3040
- graphHash: dispatchedTrace.graphHash,
2859
+ runId: entryTrace.runId,
2860
+ phase: 'coordinator.workflow_start_gap',
2861
+ ms: Math.max(0, Date.now() - entryTrace.submittedAt),
2862
+ graphHash: entryTrace.graphHash,
3041
2863
  extra: {
3042
- instanceId: dispatchedTrace.instanceId,
3043
- eventType: startEvent.type,
3044
- poolWaitAgeMs: Date.now() - waitStartedAt,
2864
+ instanceId: entryTrace.instanceId,
3045
2865
  },
3046
2866
  });
3047
2867
  }
2868
+ let dispatchedEvent = event;
3048
2869
  dispatchedEvent = await hydrateWorkflowDbSessions({
3049
2870
  env: this.env,
3050
2871
  event: dispatchedEvent,
@@ -3058,7 +2879,6 @@ export class DynamicWorkflow extends WorkflowEntrypoint<
3058
2879
  graphHash: dispatchTrace.graphHash,
3059
2880
  extra: {
3060
2881
  instanceId: dispatchTrace.instanceId,
3061
- pooledBootstrap: dispatchTrace.pooledBootstrap,
3062
2882
  },
3063
2883
  });
3064
2884
 
@@ -3212,9 +3032,6 @@ const coordinatorEntrypoint = {
3212
3032
  ): Promise<Response> {
3213
3033
  const url = new URL(request.url);
3214
3034
  if (url.pathname === '/health') {
3215
- if (workflowPoolEnabled()) {
3216
- ctx?.waitUntil(refillWorkflowPool(env).catch(() => undefined));
3217
- }
3218
3035
  return new Response('ok', { status: 200 });
3219
3036
  }
3220
3037
  if (url.pathname === '/warmup/submit') {
@@ -3250,100 +3067,6 @@ const coordinatorEntrypoint = {
3250
3067
  if (authError) return authError;
3251
3068
  return await handleStagedFilePut(request, env);
3252
3069
  }
3253
- if (url.pathname === '/workflow-pool/refill') {
3254
- const internalAuthError = authorizeCoordinatorControlRequest({
3255
- request,
3256
- env,
3257
- });
3258
- if (internalAuthError) return internalAuthError;
3259
- const warmupToken = env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
3260
- if (
3261
- warmupToken &&
3262
- request.headers.get('x-vercel-protection-bypass') !== warmupToken
3263
- ) {
3264
- return new Response('unauthorized', { status: 401 });
3265
- }
3266
- const startedAt = Date.now();
3267
- const minAvailableRaw = Number(
3268
- url.searchParams.get('minAvailable') ?? '',
3269
- );
3270
- const waitTimeoutMsRaw = Number(
3271
- url.searchParams.get('waitTimeoutMs') ?? '',
3272
- );
3273
- const result = await refillWorkflowPool(env, {
3274
- waitReady: url.searchParams.get('waitReady') === '1',
3275
- minAvailable:
3276
- Number.isFinite(minAvailableRaw) && minAvailableRaw > 0
3277
- ? minAvailableRaw
3278
- : undefined,
3279
- waitTimeoutMs:
3280
- Number.isFinite(waitTimeoutMsRaw) && waitTimeoutMsRaw > 0
3281
- ? waitTimeoutMsRaw
3282
- : undefined,
3283
- });
3284
- return Response.json({
3285
- ok: true,
3286
- ...result,
3287
- ms: Date.now() - startedAt,
3288
- });
3289
- }
3290
- if (url.pathname === '/workflow-pool/clear') {
3291
- const internalAuthError = authorizeCoordinatorControlRequest({
3292
- request,
3293
- env,
3294
- });
3295
- if (internalAuthError) return internalAuthError;
3296
- const warmupToken = env.VERCEL_PROTECTION_BYPASS_TOKEN?.trim();
3297
- if (
3298
- warmupToken &&
3299
- request.headers.get('x-vercel-protection-bypass') !== warmupToken
3300
- ) {
3301
- return new Response('unauthorized', { status: 401 });
3302
- }
3303
- const startedAt = Date.now();
3304
- const deleted = await clearWorkflowPool(env);
3305
- return Response.json({
3306
- ok: true,
3307
- deleted,
3308
- ms: Date.now() - startedAt,
3309
- });
3310
- }
3311
- if (url.pathname === '/workflow-pool/debug') {
3312
- const internalAuthError = authorizeCoordinatorControlRequest({
3313
- request,
3314
- env,
3315
- });
3316
- if (internalAuthError) return internalAuthError;
3317
- const entries = await listWorkflowPoolEntries(env);
3318
- const detailed = [];
3319
- for (const entry of entries) {
3320
- const instance = await getWorkflowPoolInstance(env, entry.id);
3321
- if (!instance) {
3322
- detailed.push({
3323
- ...entry,
3324
- status: 'missing',
3325
- mappedStatus: 'failed',
3326
- });
3327
- continue;
3328
- }
3329
- try {
3330
- const status = await instance.status().catch(() => null);
3331
- detailed.push({
3332
- ...entry,
3333
- status: workflowStatusName(status),
3334
- mappedStatus: status ? mapWorkflowStatus(status) : 'running',
3335
- });
3336
- } finally {
3337
- disposeRpcStub(instance);
3338
- }
3339
- }
3340
- return Response.json({
3341
- ok: true,
3342
- enabled: workflowPoolEnabled(),
3343
- entries: detailed,
3344
- });
3345
- }
3346
-
3347
3070
  // Workflow routes: /workflow/{runId}/{action}
3348
3071
  const wfMatch = url.pathname.match(/^\/workflow\/([^/]+)(?:\/(.+))?$/);
3349
3072
  if (wfMatch) {
@@ -3391,12 +3114,9 @@ const coordinatorEntrypoint = {
3391
3114
  },
3392
3115
  async scheduled(
3393
3116
  _controller: unknown,
3394
- env: CoordinatorEnv,
3395
- ctx?: ExecutionContext,
3396
- ): Promise<void> {
3397
- if (!workflowPoolEnabled()) return;
3398
- ctx?.waitUntil(refillWorkflowPool(env).catch(() => undefined));
3399
- },
3117
+ _env: CoordinatorEnv,
3118
+ _ctx?: ExecutionContext,
3119
+ ): Promise<void> {},
3400
3120
  };
3401
3121
 
3402
3122
  export default coordinatorEntrypoint;
@@ -3601,69 +3321,101 @@ async function handleWorkflowRoute(input: {
3601
3321
  });
3602
3322
  input.ctx?.waitUntil(prewarmPromise);
3603
3323
  }
3604
- const workflowParams = await externalizeWorkflowDbSessions({
3605
- env,
3324
+ const dbSessionExternalization = externalizedWorkflowDbSessionParams({
3606
3325
  params,
3607
- recordSubmitTiming,
3608
- });
3609
- await persistWorkflowRetryState({
3610
- env,
3611
- runId: submittedRunId,
3612
- params: workflowParams,
3613
3326
  });
3614
- let instance: WorkflowInstance | null = null;
3327
+ const workflowParams = dbSessionExternalization.params;
3615
3328
  try {
3616
- const statusEventStartedAt = Date.now();
3617
- await appendCoordinatorRunEvent(env, {
3329
+ const retryStateStartedAt = Date.now();
3330
+ const launchState = await persistWorkflowLaunchState({
3331
+ env,
3618
3332
  runId: submittedRunId,
3619
- type: 'status',
3620
- status: 'running',
3621
- ts: Date.now(),
3333
+ params: workflowParams,
3334
+ sessions: dbSessionExternalization.sessions,
3335
+ });
3336
+ const persistedAt = Date.now();
3337
+ if (dbSessionExternalization.sessions.length > 0) {
3338
+ recordSubmitTiming({
3339
+ phase: 'coordinator.workflow_db_sessions_externalized',
3340
+ ms: persistedAt - retryStateStartedAt,
3341
+ graphHash: params.graphHash ?? null,
3342
+ extra: {
3343
+ sessions:
3344
+ launchState.sessionCount ??
3345
+ dbSessionExternalization.sessions.length,
3346
+ expiresAt:
3347
+ launchState.dbSessionsExpiresAt ??
3348
+ dbSessionExternalization.ref?.expiresAt,
3349
+ combinedLaunchState: true,
3350
+ },
3351
+ });
3352
+ }
3353
+ recordSubmitTiming({
3354
+ phase: 'coordinator.retry_state_persistence',
3355
+ ms: persistedAt - retryStateStartedAt,
3356
+ graphHash: params.graphHash ?? null,
3357
+ extra: {
3358
+ combinedLaunchState: dbSessionExternalization.sessions.length > 0,
3359
+ },
3360
+ });
3361
+ } catch (error) {
3362
+ const errorMessage =
3363
+ error instanceof Error ? error.message : String(error);
3364
+ console.error('[coordinator] workflow retry state persistence failed', {
3365
+ code: 'WORKFLOW_RETRY_STATE_PERSISTENCE_FAILED',
3366
+ runId: submittedRunId,
3367
+ error: errorMessage,
3622
3368
  });
3623
3369
  recordSubmitTiming({
3624
- phase: 'coordinator.submit_status_event',
3625
- ms: Date.now() - statusEventStartedAt,
3370
+ phase: 'coordinator.retry_state_persistence',
3371
+ ms: 0,
3626
3372
  graphHash: params.graphHash ?? null,
3373
+ extra: {
3374
+ status: 'failed',
3375
+ error: errorMessage,
3376
+ },
3377
+ });
3378
+ return workflowRetryStatePersistenceErrorResponse({
3379
+ runId: submittedRunId,
3380
+ error,
3627
3381
  });
3382
+ }
3383
+ workflowParams.submittedAt = Date.now();
3384
+ let instance: WorkflowInstance | null = null;
3385
+ try {
3628
3386
  const dispatchStartedAt = Date.now();
3629
- const poolAttemptStartedAt = Date.now();
3630
- instance = await submitViaPooledWorkflow({
3387
+ const createStartedAt = Date.now();
3388
+ instance = await createDynamicWorkflowInstance({
3631
3389
  env,
3390
+ id: defaultInstanceId,
3632
3391
  params: workflowParams,
3633
- recordSubmitTiming,
3634
3392
  });
3393
+ const workflowCreatedAt = Date.now();
3635
3394
  recordSubmitTiming({
3636
- phase: 'coordinator.workflow_pool_attempt',
3637
- ms: Date.now() - poolAttemptStartedAt,
3395
+ phase: 'coordinator.workflow_create',
3396
+ ms: workflowCreatedAt - createStartedAt,
3638
3397
  graphHash: params.graphHash ?? null,
3639
- extra: {
3640
- usedPool: Boolean(instance),
3641
- enabled: workflowPoolEnabled(),
3642
- },
3398
+ extra: { instanceId: instance.id },
3643
3399
  });
3644
- if (!instance) {
3645
- const createStartedAt = Date.now();
3646
- instance = await createDynamicWorkflowInstance({
3647
- env,
3648
- id: defaultInstanceId,
3649
- params: workflowParams,
3650
- });
3651
- recordSubmitTiming({
3652
- phase: 'coordinator.workflow_create',
3653
- ms: Date.now() - createStartedAt,
3654
- graphHash: params.graphHash ?? null,
3655
- extra: { instanceId: instance.id },
3400
+ const instanceIdRecord = recordWorkflowInstanceId({
3401
+ env,
3402
+ runId: submittedRunId,
3403
+ instanceId: instance.id,
3404
+ }).catch((error) => {
3405
+ console.warn('[coordinator] workflow instance id record failed', {
3406
+ runId: submittedRunId,
3407
+ instanceId: instance?.id ?? null,
3408
+ error: error instanceof Error ? error.message : String(error),
3656
3409
  });
3657
- }
3410
+ });
3411
+ input.ctx?.waitUntil(instanceIdRecord);
3658
3412
  recordSubmitTiming({
3659
3413
  phase: 'coordinator.dispatch_workflow',
3660
3414
  ms: Date.now() - dispatchStartedAt,
3661
3415
  graphHash: params.graphHash ?? null,
3662
3416
  extra: {
3663
- startMode:
3664
- instance.id === defaultInstanceId
3665
- ? 'direct_workflow_create'
3666
- : 'pooled_workflow_start_event',
3417
+ startMode: 'direct_workflow_create',
3418
+ instanceIdRecord: 'waitUntil',
3667
3419
  },
3668
3420
  });
3669
3421
  const initialWaitMsRaw = Number(
@@ -3698,9 +3450,6 @@ async function handleWorkflowRoute(input: {
3698
3450
  ms: totalMs,
3699
3451
  graphHash: params.graphHash ?? null,
3700
3452
  });
3701
- if (workflowPoolEnabled() && instance.id === defaultInstanceId) {
3702
- input.ctx?.waitUntil(refillWorkflowPool(env).catch(() => undefined));
3703
- }
3704
3453
  return Response.json({
3705
3454
  runId,
3706
3455
  status: 'submitted',
@@ -3733,126 +3482,17 @@ async function handleWorkflowRoute(input: {
3733
3482
  { status: 400 },
3734
3483
  );
3735
3484
  }
3736
- const manifest = body.manifest as PlayRuntimeManifest | undefined;
3737
- const governance = body.internalRunPlay as
3738
- | PlayCallGovernanceSnapshot
3739
- | undefined;
3740
- const childPlayName =
3741
- typeof body.name === 'string' && body.name.trim()
3742
- ? body.name.trim()
3743
- : manifest?.playName?.trim();
3744
- if (
3745
- !manifest ||
3746
- !childPlayName ||
3747
- !manifest.artifactStorageKey ||
3748
- !manifest.artifactHash ||
3749
- !manifest.graphHash ||
3750
- !governance
3751
- ) {
3752
- return Response.json(
3753
- {
3754
- error: {
3755
- code: 'CHILD_MANIFEST_REQUIRED',
3756
- message:
3757
- 'submit-child requires a trusted child manifest and lineage.',
3758
- phase: 'coordinator_child_submit',
3759
- parentRunId: runId,
3760
- },
3761
- },
3762
- { status: 400 },
3763
- );
3764
- }
3765
- const childRunId = buildChildRunId(childPlayName);
3766
- const orgId = typeof body.orgId === 'string' ? body.orgId : '';
3767
- if (!orgId) {
3768
- return Response.json(
3769
- {
3770
- error: {
3771
- code: 'CHILD_ORG_REQUIRED',
3772
- message: 'submit-child requires orgId from the parent runtime.',
3773
- phase: 'coordinator_child_submit',
3774
- parentRunId: runId,
3775
- },
3776
- },
3777
- { status: 400 },
3778
- );
3779
- }
3780
- const parentExecutorToken =
3781
- typeof body.parentExecutorToken === 'string'
3782
- ? body.parentExecutorToken.trim()
3783
- : '';
3784
- if (!parentExecutorToken) {
3785
- return Response.json(
3786
- {
3787
- error: {
3788
- code: 'PARENT_EXECUTOR_TOKEN_REQUIRED',
3789
- message:
3790
- 'submit-child requires the parent executor token for origin-scoped child token minting.',
3791
- phase: 'coordinator_child_submit',
3792
- parentRunId: runId,
3793
- },
3794
- },
3795
- { status: 400 },
3796
- );
3797
- }
3798
- const baseUrl = resolveRuntimeBaseUrl(env, body);
3799
- const childToken = await mintChildWorkflowExecutorToken({
3800
- env,
3801
- baseUrl,
3802
- parentExecutorToken,
3803
- parentRunId: runId,
3804
- parentPlayName:
3805
- typeof body.parentPlayName === 'string' && body.parentPlayName.trim()
3806
- ? body.parentPlayName.trim()
3807
- : governance.parentPlayName,
3808
- childRunId,
3809
- childPlayName,
3810
- maxCreditsPerRun: manifest.maxCreditsPerRun ?? null,
3811
- });
3812
- const preloadedDbSessions = await preloadChildRuntimeDbSessions({
3813
- env,
3814
- baseUrl,
3815
- childExecutorToken: childToken,
3485
+ const {
3486
+ response: submitResponse,
3487
+ responseText,
3816
3488
  childRunId,
3817
3489
  childPlayName,
3818
- manifest,
3819
- orgId,
3820
- userEmail: typeof body.userEmail === 'string' ? body.userEmail : '',
3821
- });
3822
- const params = buildChildWorkflowParams({
3490
+ } = await submitChildWorkflowThroughCoordinator({
3823
3491
  env,
3492
+ parentRunId: runId,
3824
3493
  body,
3825
- manifest,
3826
- governance,
3827
- childRunId,
3828
- childPlayName,
3829
- childToken,
3830
- orgId,
3831
3494
  coordinatorUrl: new URL(request.url).origin,
3832
- runtimeBackend: 'cf_workflows_dynamic_worker',
3833
- dynamicWorkerCode:
3834
- typeof manifest.bundledCode === 'string'
3835
- ? manifest.bundledCode
3836
- : null,
3837
- preloadedDbSessions:
3838
- preloadedDbSessions.length > 0 ? preloadedDbSessions : null,
3839
- });
3840
- const submitResponse = await handleWorkflowRoute({
3841
- runId: childRunId,
3842
- action: 'submit',
3843
- request: new Request(
3844
- `https://deepline.coordinator.internal/workflow/${encodeURIComponent(
3845
- childRunId,
3846
- )}/submit`,
3847
- {
3848
- method: 'POST',
3849
- headers: { 'content-type': 'application/json' },
3850
- body: JSON.stringify(params),
3851
- },
3852
- ),
3853
- env,
3854
3495
  });
3855
- const responseText = await submitResponse.text().catch(() => '');
3856
3496
  recordCoordinatorPerfTrace({
3857
3497
  runId,
3858
3498
  phase: 'coordinator.child_submit',
@@ -4004,7 +3644,8 @@ async function handleWorkflowRoute(input: {
4004
3644
  .get('instanceId')
4005
3645
  ?.trim();
4006
3646
  const instanceId =
4007
- requestedInstanceId && !isWorkflowMutatingAction(action)
3647
+ requestedInstanceId &&
3648
+ isWorkflowInstanceIdForRun(runId, requestedInstanceId)
4008
3649
  ? requestedInstanceId
4009
3650
  : await resolveWorkflowInstanceIdForRun(env, runId);
4010
3651
  instance = await env.PLAY_WORKFLOW.get(instanceId);
@@ -4064,6 +3705,20 @@ async function handleWorkflowRoute(input: {
4064
3705
  : eventKey
4065
3706
  ? `integration_event_${eventKey}`
4066
3707
  : 'integration_event';
3708
+ if (body.signal === 'integration_event' && eventKey) {
3709
+ await writeCoordinatorChildTerminalState({
3710
+ env,
3711
+ parentRunId: runId,
3712
+ eventKey,
3713
+ data: body.data ?? body,
3714
+ }).catch((error: unknown) => {
3715
+ console.warn('[coordinator] child terminal cache write failed', {
3716
+ runId,
3717
+ eventKey,
3718
+ error: error instanceof Error ? error.message : String(error),
3719
+ });
3720
+ });
3721
+ }
4067
3722
  await instance.sendEvent({
4068
3723
  type: workflowEventType(eventType),
4069
3724
  payload: body,
@@ -4200,6 +3855,16 @@ function workflowInstanceId(runId: string): string {
4200
3855
  return `run-${stableHash(runId)}`;
4201
3856
  }
4202
3857
 
3858
+ function isWorkflowInstanceIdForRun(
3859
+ runId: string,
3860
+ instanceId: string,
3861
+ ): boolean {
3862
+ const canonical = workflowInstanceId(runId);
3863
+ return (
3864
+ instanceId === canonical || instanceId.startsWith(`${canonical}-retry-`)
3865
+ );
3866
+ }
3867
+
4203
3868
  function stableHash(value: string): string {
4204
3869
  let hash = 2166136261;
4205
3870
  for (let index = 0; index < value.length; index += 1) {
@@ -4323,10 +3988,11 @@ function loadDynamicPlayWorkerSync(
4323
3988
  // miswired environments fail before user code starts.
4324
3989
  HARNESS: env.HARNESS,
4325
3990
  VERCEL_PROTECTION_BYPASS_TOKEN: env.VERCEL_PROTECTION_BYPASS_TOKEN,
4326
- // In-process runtime API bridge used by the play harness for status,
4327
- // tool execution, DB session, and artifact callbacks. This avoids a
4328
- // public fetch hop when Cloudflare exposes the RuntimeApi export.
4329
- ...makeRuntimeApiEnvBinding(),
3991
+ // Runtime API bridge used by the play harness for status, tool
3992
+ // execution, DB session, and artifact callbacks. This uses the
3993
+ // long-lived HARNESS service binding, avoiding public callback HTTP
3994
+ // without relying on dynamic-worker access to named exports.
3995
+ ...makeRuntimeApiEnvBinding(env),
4330
3996
  // In-process coordinator control bridge used by ctx.runPlay and
4331
3997
  // parent terminal signals. This keeps scalar child plays inline with
4332
3998
  // the parent instead of round-tripping through nested Workflow waits.
@@ -4411,7 +4077,7 @@ async function loadDynamicPlayWorker(
4411
4077
  // HARNESS, and child workflow control uses the COORDINATOR binding.
4412
4078
  HARNESS: env.HARNESS,
4413
4079
  VERCEL_PROTECTION_BYPASS_TOKEN: env.VERCEL_PROTECTION_BYPASS_TOKEN,
4414
- ...makeRuntimeApiEnvBinding(),
4080
+ ...makeRuntimeApiEnvBinding(env),
4415
4081
  ...makeCoordinatorControlBinding(),
4416
4082
  },
4417
4083
  };
@@ -4861,20 +4527,6 @@ async function handleCoordinatorWarmup(
4861
4527
  graphHash: params.graphHash,
4862
4528
  extra: { status: response.status, label },
4863
4529
  });
4864
- const poolRefillPromise = refillWorkflowPool(env, {
4865
- waitReady: true,
4866
- minAvailable: 1,
4867
- }).catch(() => ({
4868
- available: 0,
4869
- warming: 0,
4870
- target: 0,
4871
- created: 0,
4872
- promoted: 0,
4873
- removed: 0,
4874
- waitedMs: 0,
4875
- waitIterations: 0,
4876
- }));
4877
- ctx?.waitUntil(poolRefillPromise.then(() => undefined));
4878
4530
  let body: unknown = null;
4879
4531
  try {
4880
4532
  body = text ? JSON.parse(text) : null;
@@ -4897,54 +4549,26 @@ async function handleCoordinatorWarmup(
4897
4549
  status: response.status,
4898
4550
  body,
4899
4551
  terminalState,
4900
- workflowPool: await poolRefillPromise,
4901
4552
  },
4902
4553
  { status: responseStatus },
4903
4554
  );
4904
4555
  }
4905
4556
 
4906
4557
  /**
4907
- * Returns a structured-cloneable `Fetcher` stub for the `RuntimeApi`
4908
- * WorkerEntrypoint. The stub goes into the per-graphHash play Worker's
4909
- * `env.RUNTIME_API`. When the harness calls `env.RUNTIME_API.fetch(req)`,
4910
- * the request is RPC-dispatched into the `RuntimeApi.fetch` method on the
4911
- * coordinator side, which path-allowlists it and forwards to
4912
- * `DEEPLINE_API_BASE_URL` directly. Skips the public *.workers.dev CF
4913
- * edge cloudflared localhost chain that the harness's old
4914
- * `fetch(req.baseUrl + path)` path traverses.
4915
- *
4916
- * Implemented as a WorkerEntrypoint (not a plain closure) because Cloudflare
4917
- * Workflows serializes the dynamic Worker's env when persisting workflow
4918
- * state, and closures containing captured locals aren't
4919
- * structured-cloneable. WorkerEntrypoint stubs ARE cloneable — same trick
4920
- * `makePlayAssetsBinding` already uses.
4921
- *
4922
- * Falls back transparently when Cloudflare does not expose module exports in
4923
- * the current execution path: if the binding is omitted from `env`, the play
4924
- * worker uses its existing `fetch(req.baseUrl + path)` transport.
4558
+ * Returns a structured-cloneable runtime API binding for the per-graphHash
4559
+ * play Worker's `env.RUNTIME_API`. We intentionally pass the long-lived
4560
+ * HARNESS WorkerEntrypoint service binding instead of a plain closure: the
4561
+ * dynamic Worker env is serialized by Cloudflare Workflows, and service
4562
+ * bindings are cloneable while closures are not. The per-play runtime accepts
4563
+ * this binding via `runtimeApiCall(...)`, so callbacks still stay on
4564
+ * service bindings and never fall back to public HTTP.
4925
4565
  */
4926
- let loggedMissingRuntimeApiExport = false;
4927
4566
  let loggedMissingCoordinatorControlExport = false;
4928
4567
 
4929
- function makeRuntimeApiEnvBinding():
4930
- | { RUNTIME_API: { fetch(req: Request): Promise<Response> } }
4931
- | Record<string, never> {
4932
- const exports = workersExports as unknown as {
4933
- RuntimeApi?: (init: { props: undefined }) => {
4934
- fetch(req: Request): Promise<Response>;
4935
- };
4936
- };
4937
- const ctor = exports.RuntimeApi;
4938
- if (typeof ctor !== 'function') {
4939
- if (!loggedMissingRuntimeApiExport) {
4940
- loggedMissingRuntimeApiExport = true;
4941
- console.warn(
4942
- '[coordinator] RuntimeApi is not registered on cloudflare:workers exports; using public runtime API transport.',
4943
- );
4944
- }
4945
- return {};
4946
- }
4947
- return { RUNTIME_API: ctor({ props: undefined }) };
4568
+ function makeRuntimeApiEnvBinding(env: CoordinatorEnv): {
4569
+ RUNTIME_API: CoordinatorEnv['HARNESS'];
4570
+ } {
4571
+ return { RUNTIME_API: env.HARNESS };
4948
4572
  }
4949
4573
 
4950
4574
  function makeCoordinatorControlBinding():
@@ -4954,6 +4578,10 @@ function makeCoordinatorControlBinding():
4954
4578
  parentRunId: string,
4955
4579
  body: Record<string, unknown>,
4956
4580
  ): Promise<{ workflowId?: string; runId?: string; error?: unknown }>;
4581
+ submitWorkflowChild(
4582
+ parentRunId: string,
4583
+ body: Record<string, unknown>,
4584
+ ): Promise<{ workflowId?: string; runId?: string; error?: unknown }>;
4957
4585
  signal(
4958
4586
  runId: string,
4959
4587
  body: Record<string, unknown>,
@@ -4966,6 +4594,28 @@ function makeCoordinatorControlBinding():
4966
4594
  runId: string,
4967
4595
  event: CoordinatorRunEvent,
4968
4596
  ): Promise<void>;
4597
+ readTerminalState(
4598
+ runId: string,
4599
+ ): Promise<CoordinatorTerminalState | null>;
4600
+ readChildTerminalState(
4601
+ parentRunId: string,
4602
+ eventKey: string,
4603
+ timeoutMs?: number,
4604
+ ): Promise<CoordinatorChildTerminalState | null>;
4605
+ rateAcquire(input: {
4606
+ bucketId: string;
4607
+ rules: Array<{
4608
+ ruleId: string;
4609
+ requestsPerWindow: number;
4610
+ windowMs: number;
4611
+ maxConcurrency: number | null;
4612
+ }>;
4613
+ requested: number;
4614
+ }): Promise<{ granted: number; waitMs: number }>;
4615
+ ratePenalize(input: {
4616
+ bucketId: string;
4617
+ cooldownMs: number;
4618
+ }): Promise<void>;
4969
4619
  };
4970
4620
  }
4971
4621
  | Record<string, never> {
@@ -4975,6 +4625,10 @@ function makeCoordinatorControlBinding():
4975
4625
  parentRunId: string,
4976
4626
  body: Record<string, unknown>,
4977
4627
  ): Promise<{ workflowId?: string; runId?: string; error?: unknown }>;
4628
+ submitWorkflowChild(
4629
+ parentRunId: string,
4630
+ body: Record<string, unknown>,
4631
+ ): Promise<{ workflowId?: string; runId?: string; error?: unknown }>;
4978
4632
  signal(
4979
4633
  runId: string,
4980
4634
  body: Record<string, unknown>,
@@ -4984,6 +4638,28 @@ function makeCoordinatorControlBinding():
4984
4638
  payload: CoordinatorPerfTracePayload,
4985
4639
  ): Promise<void>;
4986
4640
  recordRunEvent(runId: string, event: CoordinatorRunEvent): Promise<void>;
4641
+ readTerminalState(
4642
+ runId: string,
4643
+ ): Promise<CoordinatorTerminalState | null>;
4644
+ readChildTerminalState(
4645
+ parentRunId: string,
4646
+ eventKey: string,
4647
+ timeoutMs?: number,
4648
+ ): Promise<CoordinatorChildTerminalState | null>;
4649
+ rateAcquire(input: {
4650
+ bucketId: string;
4651
+ rules: Array<{
4652
+ ruleId: string;
4653
+ requestsPerWindow: number;
4654
+ windowMs: number;
4655
+ maxConcurrency: number | null;
4656
+ }>;
4657
+ requested: number;
4658
+ }): Promise<{ granted: number; waitMs: number }>;
4659
+ ratePenalize(input: {
4660
+ bucketId: string;
4661
+ cooldownMs: number;
4662
+ }): Promise<void>;
4987
4663
  };
4988
4664
  };
4989
4665
  const ctor = exports.CoordinatorControl;