deepline 0.1.79 → 0.1.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +2 -1
  2. package/dist/cli/index.js +76 -42
  3. package/dist/cli/index.mjs +76 -42
  4. package/dist/index.d.mts +9 -1
  5. package/dist/index.d.ts +9 -1
  6. package/dist/index.js +13 -10
  7. package/dist/index.mjs +13 -10
  8. package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
  9. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1103 -1617
  10. package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +506 -654
  11. package/dist/repo/apps/play-runner-workers/src/entry.ts +1148 -598
  12. package/dist/repo/apps/play-runner-workers/src/runtime/tool-http-errors.ts +43 -1
  13. package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +8 -2
  14. package/dist/repo/sdk/src/client.ts +15 -8
  15. package/dist/repo/sdk/src/release.ts +2 -2
  16. package/dist/repo/sdk/src/types.ts +5 -0
  17. package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
  18. package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
  19. package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
  20. package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
  21. package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
  22. package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
  23. package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
  24. package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
  25. package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
  26. package/dist/repo/shared_libs/plays/static-pipeline.ts +56 -3
  27. package/dist/repo/shared_libs/temporal/constants.ts +38 -0
  28. package/package.json +1 -1
  29. package/dist/repo/shared_libs/play-runtime/tool-batch-executor.ts +0 -149
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Play Execution Governor — the deep module that owns execution policy.
3
+ *
4
+ * Both runner Adapters (`cjs_node20`, `esm_workers`) gate their work through one
5
+ * Governor instance per run-attempt so the substrates cannot diverge on
6
+ * concurrency, budgets, or pacing. Adapters keep only substrate mechanism (how
7
+ * to resolve a row, batch a tool call, or submit a child); the Governor owns the
8
+ * "may I, and how many at once" policy. See ADR 0007 + CONTEXT.md.
9
+ *
10
+ * Surface (small, by design):
11
+ * - acquireRowSlot / acquireChildPlaySlot / acquireToolSlot → blocking leases
12
+ * - chargeBudget → throws on breach
13
+ * - forkChild → child lineage snapshot
14
+ * - resolveRowConcurrency / reportProviderBackpressure / snapshot
15
+ */
16
+ import {
17
+ type AdapterId,
18
+ type ResolvedExecutionPolicy,
19
+ resolveExecutionPolicy,
20
+ resolveRowConcurrency,
21
+ } from './policy';
22
+ import {
23
+ noopPacingPermit,
24
+ type PacingRule,
25
+ type RateStateBackend,
26
+ } from './rate-state-backend';
27
+
28
+ export interface WorkLease {
29
+ /** Free the slot / pacing permit. Idempotent. MUST be called in a finally. */
30
+ release(): void;
31
+ }
32
+
33
+ export type BudgetKind =
34
+ | 'playCall'
35
+ | 'toolCall'
36
+ | 'retry'
37
+ | 'descendant'
38
+ | 'waterfallStep';
39
+
40
+ /** Counters that accumulate down the dispatch lineage; thread via the snapshot. */
41
+ export interface GovernanceSnapshot {
42
+ rootRunId: string;
43
+ currentRunId: string;
44
+ currentPlayId: string;
45
+ ancestryPlayIds: string[];
46
+ ancestryRunIds: string[];
47
+ callDepth: number;
48
+ playCallCount: number;
49
+ toolCallCount: number;
50
+ retryCount: number;
51
+ descendantCount: number;
52
+ waterfallStepExecutions: number;
53
+ /** Direct child plays launched, keyed by parent play id. */
54
+ parentChildCalls: Record<string, number>;
55
+ }
56
+
57
+ export class GovernorBudgetError extends Error {
58
+ constructor(
59
+ readonly budget: BudgetKind | 'playDepth' | 'childPerParent',
60
+ readonly observed: number,
61
+ readonly limit: number,
62
+ ) {
63
+ const message =
64
+ budget === 'playDepth'
65
+ ? `Play-call depth exceeded (${observed}/${limit}).`
66
+ : `Play execution ${budget} budget exceeded (${observed}/${limit}).`;
67
+ super(message);
68
+ this.name = 'GovernorBudgetError';
69
+ }
70
+ }
71
+
72
+ /** Maps a toolId to its provider + resolved pacing rules (from rate-limit defs). */
73
+ export type PacingResolver = (
74
+ toolId: string,
75
+ ) => Promise<{ provider: string; rules: PacingRule[] } | null>;
76
+
77
+ export interface PlayExecutionGovernor {
78
+ readonly adapter: AdapterId;
79
+ readonly policy: ResolvedExecutionPolicy;
80
+
81
+ /** Block until a map-row slot is free. */
82
+ acquireRowSlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
83
+ /** Block until a child-play slot is free. */
84
+ acquireChildPlaySlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
85
+ /**
86
+ * Block until a global tool-concurrency slot AND the per-(org,provider) pacer
87
+ * permit are free, then charge the tool-call budget and return a lease. Order:
88
+ * concurrency slot → provider pace → tool budget (charged last so a
89
+ * failed/aborted acquire never consumes budget). A run over tool budget still
90
+ * acquires and holds a slot + pacing permit before the breach is detected; the
91
+ * breach surfaces only once the call is otherwise cleared to run.
92
+ */
93
+ acquireToolSlot(
94
+ toolId: string,
95
+ opts?: { signal?: AbortSignal },
96
+ ): Promise<WorkLease>;
97
+
98
+ /**
99
+ * Suggested batch parallelism for a tool: the provider's own rate hints
100
+ * tightened to the policy's suggested ceiling. No hints → the fallback.
101
+ */
102
+ suggestedParallelism(toolId: string, fallback: number): Promise<number>;
103
+
104
+ /** Increment a monotonic budget counter; throws GovernorBudgetError on breach. */
105
+ chargeBudget(kind: BudgetKind, amount?: number): void;
106
+
107
+ /**
108
+ * Reserve depth + per-parent + descendant budget for a child play and return
109
+ * the snapshot to thread into the child run so budgets accumulate across the
110
+ * lineage (and across isolates on `esm_workers`). Throws on breach.
111
+ *
112
+ * Unlike {@link acquireToolSlot} (which charges last so an aborted acquire
113
+ * never consumes budget), child-lineage counters are charged here at fork
114
+ * time, BEFORE the caller acquires a child-play slot. A slot acquire that then
115
+ * fails (e.g. abort) does NOT refund these counters. This is intentional and
116
+ * safe given the 100k child caps: the charge reserves lineage-global capacity
117
+ * for a launch the caller has committed to, and forkChild must return the
118
+ * threaded snapshot synchronously, so the charge cannot be deferred behind the
119
+ * async slot acquire.
120
+ */
121
+ forkChild(input: {
122
+ childPlayName: string;
123
+ childRunId: string;
124
+ }): GovernanceSnapshot;
125
+
126
+ /** Effective row concurrency: explicit request clamped to [1, rowMax], else default. */
127
+ resolveRowConcurrency(requested?: number): number;
128
+
129
+ /** Feed a provider's Retry-After back into the shared pacer. */
130
+ reportProviderBackpressure(input: {
131
+ provider: string;
132
+ retryAfterMs: number;
133
+ }): void;
134
+
135
+ snapshot(): GovernanceSnapshot;
136
+ }
137
+
138
+ interface GovernorInput {
139
+ adapter: AdapterId;
140
+ scope: { orgId: string; rootRunId: string };
141
+ rateState: RateStateBackend;
142
+ resolvePacing: PacingResolver;
143
+ resume?: GovernanceSnapshot;
144
+ }
145
+
146
+ class Semaphore {
147
+ private inFlight = 0;
148
+ private readonly waiters: Array<() => void> = [];
149
+ constructor(private readonly limit: number) {}
150
+
151
+ async acquire(signal?: AbortSignal): Promise<WorkLease> {
152
+ // Fail fast on an already-aborted signal: the parked-promise abort listener
153
+ // below registers with { once: true } and never fires for a signal that was
154
+ // aborted before we parked, so without this check a full pool would block
155
+ // the waiter until a slot frees (or forever if it never drains).
156
+ if (signal?.aborted) {
157
+ throw signal.reason instanceof Error
158
+ ? signal.reason
159
+ : new Error('Slot acquire aborted.');
160
+ }
161
+ while (this.inFlight >= this.limit) {
162
+ await new Promise<void>((resolve, reject) => {
163
+ const onResolve = () => {
164
+ signal?.removeEventListener('abort', onAbort);
165
+ resolve();
166
+ };
167
+ const onAbort = () => {
168
+ const idx = this.waiters.indexOf(onResolve);
169
+ if (idx >= 0) this.waiters.splice(idx, 1);
170
+ reject(
171
+ signal?.reason instanceof Error
172
+ ? signal.reason
173
+ : new Error('Slot acquire aborted.'),
174
+ );
175
+ };
176
+ this.waiters.push(onResolve);
177
+ signal?.addEventListener('abort', onAbort, { once: true });
178
+ });
179
+ if (signal?.aborted) {
180
+ throw signal.reason instanceof Error
181
+ ? signal.reason
182
+ : new Error('Slot acquire aborted.');
183
+ }
184
+ }
185
+ this.inFlight += 1;
186
+ let released = false;
187
+ return {
188
+ release: () => {
189
+ if (released) return;
190
+ released = true;
191
+ this.inFlight = Math.max(0, this.inFlight - 1);
192
+ this.waiters.shift()?.();
193
+ },
194
+ };
195
+ }
196
+ }
197
+
198
+ export function createDefaultGovernanceSnapshot(scope: {
199
+ orgId: string;
200
+ rootRunId: string;
201
+ rootPlayId?: string;
202
+ }): GovernanceSnapshot {
203
+ return {
204
+ rootRunId: scope.rootRunId,
205
+ currentRunId: scope.rootRunId,
206
+ currentPlayId: scope.rootPlayId ?? scope.rootRunId,
207
+ ancestryPlayIds: scope.rootPlayId ? [scope.rootPlayId] : [],
208
+ ancestryRunIds: [scope.rootRunId],
209
+ callDepth: scope.rootPlayId ? 1 : 0,
210
+ playCallCount: 0,
211
+ toolCallCount: 0,
212
+ retryCount: 0,
213
+ descendantCount: 0,
214
+ waterfallStepExecutions: 0,
215
+ parentChildCalls: {},
216
+ };
217
+ }
218
+
219
+ export function createPlayExecutionGovernor(
220
+ input: GovernorInput,
221
+ ): PlayExecutionGovernor {
222
+ const policy = resolveExecutionPolicy(input.adapter);
223
+ const state: GovernanceSnapshot =
224
+ input.resume ?? createDefaultGovernanceSnapshot(input.scope);
225
+
226
+ const rowSlots = new Semaphore(policy.concurrency.rowMax);
227
+ const childPlaySlots = new Semaphore(policy.concurrency.childPlay);
228
+ const toolSlots = new Semaphore(policy.concurrency.toolCalls);
229
+
230
+ const bucketId = (provider: string) => `${input.scope.orgId}:${provider}`;
231
+
232
+ function chargeBudget(kind: BudgetKind, amount = 1): void {
233
+ switch (kind) {
234
+ case 'playCall':
235
+ state.playCallCount += amount;
236
+ if (state.playCallCount > policy.budgets.maxPlayCallCount)
237
+ throw new GovernorBudgetError('playCall', state.playCallCount, policy.budgets.maxPlayCallCount);
238
+ return;
239
+ case 'toolCall':
240
+ state.toolCallCount += amount;
241
+ if (state.toolCallCount > policy.budgets.maxToolCallCount)
242
+ throw new GovernorBudgetError('toolCall', state.toolCallCount, policy.budgets.maxToolCallCount);
243
+ return;
244
+ case 'retry':
245
+ state.retryCount += amount;
246
+ if (state.retryCount > policy.budgets.maxRetryCount)
247
+ throw new GovernorBudgetError('retry', state.retryCount, policy.budgets.maxRetryCount);
248
+ return;
249
+ case 'descendant':
250
+ state.descendantCount += amount;
251
+ if (state.descendantCount > policy.budgets.maxDescendants)
252
+ throw new GovernorBudgetError('descendant', state.descendantCount, policy.budgets.maxDescendants);
253
+ return;
254
+ case 'waterfallStep':
255
+ state.waterfallStepExecutions += amount;
256
+ if (state.waterfallStepExecutions > policy.budgets.maxWaterfallStepExecutions)
257
+ throw new GovernorBudgetError('waterfallStep', state.waterfallStepExecutions, policy.budgets.maxWaterfallStepExecutions);
258
+ return;
259
+ }
260
+ }
261
+
262
+ return {
263
+ adapter: input.adapter,
264
+ policy,
265
+
266
+ acquireRowSlot: (opts) => rowSlots.acquire(opts?.signal),
267
+ acquireChildPlaySlot: (opts) => childPlaySlots.acquire(opts?.signal),
268
+
269
+ async acquireToolSlot(toolId, opts) {
270
+ // 1. global tool-concurrency slot.
271
+ const slot = await toolSlots.acquire(opts?.signal);
272
+ // 2. per-(org,provider) pacing. The provider comes from the pacing
273
+ // resolver, so callers only need the toolId. No rules → no pacing.
274
+ let permit: { release(): void };
275
+ try {
276
+ const pacing = await input.resolvePacing(toolId);
277
+ permit =
278
+ pacing && pacing.rules.length > 0
279
+ ? await input.rateState.acquire({
280
+ bucketId: bucketId(pacing.provider),
281
+ rules: pacing.rules,
282
+ signal: opts?.signal,
283
+ })
284
+ : noopPacingPermit();
285
+ } catch (error) {
286
+ slot.release();
287
+ throw error;
288
+ }
289
+ // 3. charge the budget only once the call is actually cleared to run, so a
290
+ // failed/aborted acquisition never permanently consumes tool budget.
291
+ try {
292
+ chargeBudget('toolCall');
293
+ } catch (error) {
294
+ permit.release();
295
+ slot.release();
296
+ throw error;
297
+ }
298
+ let released = false;
299
+ return {
300
+ release: () => {
301
+ if (released) return;
302
+ released = true;
303
+ permit.release();
304
+ slot.release();
305
+ },
306
+ };
307
+ },
308
+
309
+ async suggestedParallelism(toolId, fallback) {
310
+ const pacing = await input.resolvePacing(toolId);
311
+ if (!pacing || pacing.rules.length === 0) return fallback;
312
+ const limits = pacing.rules.flatMap((rule) =>
313
+ rule.maxConcurrency != null
314
+ ? [rule.requestsPerWindow, rule.maxConcurrency]
315
+ : [rule.requestsPerWindow],
316
+ );
317
+ return Math.max(
318
+ 1,
319
+ Math.min(policy.pacing.suggestedMaxParallelism, ...limits),
320
+ );
321
+ },
322
+
323
+ chargeBudget,
324
+
325
+ forkChild(childInput) {
326
+ if (state.ancestryPlayIds.includes(childInput.childPlayName)) {
327
+ const chain = [...state.ancestryPlayIds, childInput.childPlayName].join(' -> ');
328
+ throw new Error(`Recursive play graph detected: ${chain}`);
329
+ }
330
+ const nextDepth = state.callDepth + 1;
331
+ if (nextDepth > policy.budgets.maxPlayCallDepth)
332
+ throw new GovernorBudgetError('playDepth', nextDepth, policy.budgets.maxPlayCallDepth);
333
+ const parentKey = state.currentPlayId;
334
+ const nextParent = (state.parentChildCalls[parentKey] ?? 0) + 1;
335
+ if (nextParent > policy.budgets.maxChildPlayCallsPerParent)
336
+ throw new GovernorBudgetError('childPerParent', nextParent, policy.budgets.maxChildPlayCallsPerParent);
337
+ // Charge the run-wide play/descendant budgets on the parent. Charged at
338
+ // fork time (not after the caller's child-play slot acquire) and never
339
+ // refunded if that acquire fails — see the forkChild interface doc.
340
+ chargeBudget('playCall');
341
+ chargeBudget('descendant');
342
+ state.parentChildCalls[parentKey] = nextParent;
343
+ // Child seeds from the parent's accumulated counters → lineage-global budget.
344
+ return {
345
+ rootRunId: state.rootRunId,
346
+ currentRunId: childInput.childRunId,
347
+ currentPlayId: childInput.childPlayName,
348
+ ancestryPlayIds: [...state.ancestryPlayIds, childInput.childPlayName],
349
+ ancestryRunIds: [...state.ancestryRunIds, childInput.childRunId],
350
+ callDepth: nextDepth,
351
+ playCallCount: state.playCallCount,
352
+ toolCallCount: state.toolCallCount,
353
+ retryCount: state.retryCount,
354
+ descendantCount: state.descendantCount,
355
+ waterfallStepExecutions: state.waterfallStepExecutions,
356
+ parentChildCalls: {},
357
+ };
358
+ },
359
+
360
+ resolveRowConcurrency: (requested) => resolveRowConcurrency(policy, requested),
361
+
362
+ reportProviderBackpressure(bp) {
363
+ input.rateState.penalize({
364
+ bucketId: bucketId(bp.provider),
365
+ cooldownMs: bp.retryAfterMs,
366
+ });
367
+ },
368
+
369
+ snapshot: () => ({
370
+ ...state,
371
+ ancestryPlayIds: [...state.ancestryPlayIds],
372
+ ancestryRunIds: [...state.ancestryRunIds],
373
+ parentChildCalls: { ...state.parentChildCalls },
374
+ }),
375
+ };
376
+ }
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Play Execution Governor — the single policy table.
3
+ *
4
+ * This is the ONE source of truth for every concurrency, budget, and pacing
5
+ * number in the play runtime. Both runner Adapters (`cjs_node20` in-process and
6
+ * `esm_workers` cloud) resolve their limits from here, so the two substrates
7
+ * cannot drift. See ADR 0007 and the "Play Execution Governor" entry in
8
+ * CONTEXT.md.
9
+ *
10
+ * Tuning philosophy (product decision):
11
+ * - Caps are GENEROUS and tuned for fast execution. They are runaway /
12
+ * anti-starvation guards, NOT per-workload throttles. A legitimate large run
13
+ * should never hit them.
14
+ * - The real-time bounds on a run are (a) the wall-clock runtime cap and
15
+ * (b) per-provider rate pacing. The counters below only stop pathological
16
+ * recursion / fan-out.
17
+ * - Every value is an EXPLICIT, finite cap. Nothing here is unbounded — there
18
+ * is no `null`, no "off", no implicit infinity. Unaccounted resource use
19
+ * must be impossible.
20
+ *
21
+ * The docs catalog (`src/lib/plays/limits-catalog.ts`) and its generated public
22
+ * + internal pages source their numbers from this table.
23
+ */
24
+
25
+ /** Concurrency ceilings — how much may run at once. */
26
+ export interface ExecutionConcurrencyPolicy {
27
+ /** Map rows resolving their fields concurrently when no `concurrency` is given. */
28
+ readonly rowDefault: number;
29
+ /** Hard ceiling for an explicit map `concurrency` value; larger is clamped. */
30
+ readonly rowMax: number;
31
+ /** Concurrently in-flight child plays (`ctx.runPlay`). Excess launches block. */
32
+ readonly childPlay: number;
33
+ /** Global backstop on concurrently in-flight tool calls across all providers. */
34
+ readonly toolCalls: number;
35
+ }
36
+
37
+ /**
38
+ * Per-run budgets — total attempts allowed before a run is treated as runaway.
39
+ * Accumulated down the dispatch lineage (a child seeds from the parent), so the
40
+ * budget is global across the nested-play tree, not per-worker.
41
+ */
42
+ export interface ExecutionBudgetPolicy {
43
+ /** Max nesting depth of `ctx.runPlay` chains. Deeper is almost certainly a cycle. */
44
+ readonly maxPlayCallDepth: number;
45
+ /** Max total `ctx.runPlay` calls along a lineage. */
46
+ readonly maxPlayCallCount: number;
47
+ /** Max direct child plays one play may launch. */
48
+ readonly maxChildPlayCallsPerParent: number;
49
+ /** Max nested-play descendants created during a run. */
50
+ readonly maxDescendants: number;
51
+ /** Max total tool calls in a run. */
52
+ readonly maxToolCallCount: number;
53
+ /** Max total retries across all steps/tools. */
54
+ readonly maxRetryCount: number;
55
+ /** Max total waterfall-step executions in a run. */
56
+ readonly maxWaterfallStepExecutions: number;
57
+ }
58
+
59
+ /**
60
+ * Per-provider rate pacing — the real outbound throughput governor.
61
+ *
62
+ * SUBSTRATE NOTE: the per-provider request RATE (`requestsPerWindow`/`windowMs`
63
+ * from a provider's `PacingRule`s) is enforced on BOTH substrates. A rule's
64
+ * optional `maxConcurrency` (simultaneous-in-flight cap) is enforced only on
65
+ * `cjs_node20`; on `esm_workers` it is intentionally excluded from the pacing
66
+ * contract because a fanned-out run cannot guarantee the per-isolate release
67
+ * signal an in-flight count needs, so only the org-wide tool-concurrency
68
+ * backstop applies there. See PacingRule.maxConcurrency in rate-state-backend.ts.
69
+ */
70
+ export interface ExecutionPacingPolicy {
71
+ /** RPS applied to a provider that declares no explicit rate limit. */
72
+ readonly defaultProviderRequestsPerSecond: number;
73
+ /** Parallelism the scheduler suggests before a provider's own hints tighten it. */
74
+ readonly suggestedMaxParallelism: number;
75
+ }
76
+
77
+ export interface ResolvedExecutionPolicy {
78
+ readonly concurrency: ExecutionConcurrencyPolicy;
79
+ readonly budgets: ExecutionBudgetPolicy;
80
+ readonly pacing: ExecutionPacingPolicy;
81
+ }
82
+
83
+ /**
84
+ * The shared default policy. Both substrates use this verbatim unless an entry
85
+ * in {@link ADAPTER_POLICY_OVERRIDES} forces a documented difference.
86
+ */
87
+ export const SHARED_EXECUTION_POLICY: ResolvedExecutionPolicy = {
88
+ concurrency: {
89
+ // Map row concurrency is platform-controlled — customers cannot set it — so
90
+ // every map runs at the ceiling: high enough that per-provider pacing (not
91
+ // the row pool) is the bottleneck even for multi-step waterfall rows. Pure-JS
92
+ // maps use a separate fast path and are not bound by this. Kept == rowMax so
93
+ // the default and the cap are the same single platform value.
94
+ rowDefault: 2_000,
95
+ // Hard cap. Above this, more in-flight rows only park memory — outbound is
96
+ // already bounded by tool concurrency + per-provider pacing.
97
+ rowMax: 2_000,
98
+ // Concurrent child-play launches. Generous; each child is a real launch, so
99
+ // this is the one value most likely to need a documented esm_workers
100
+ // override if isolate pressure shows up in E2E.
101
+ childPlay: 32,
102
+ // Global all-provider backstop. Per-provider pacing is the real limit; this
103
+ // just stops a single run from opening an absurd number of sockets at once.
104
+ toolCalls: 256,
105
+ },
106
+ budgets: {
107
+ // Runaway guards, not workload limits. A 5,000-row map calling several tools
108
+ // per row is normal and must fit comfortably under these.
109
+ maxPlayCallDepth: 8,
110
+ maxPlayCallCount: 100_000,
111
+ maxChildPlayCallsPerParent: 100_000,
112
+ maxDescendants: 100_000,
113
+ maxToolCallCount: 5_000_000,
114
+ maxRetryCount: 100_000,
115
+ maxWaterfallStepExecutions: 5_000_000,
116
+ },
117
+ pacing: {
118
+ // Undeclared providers; declared providers (rate-limit-definitions.ts) win.
119
+ defaultProviderRequestsPerSecond: 30,
120
+ suggestedMaxParallelism: 50,
121
+ },
122
+ };
123
+
124
+ export type AdapterId = 'cjs_node20' | 'esm_workers';
125
+
126
+ /**
127
+ * One level deep on purpose: each policy section is a flat record of numbers, so
128
+ * an override is `{ section: { key: value } }`. The merge in
129
+ * {@link resolveExecutionPolicy} is one level; this type matches it exactly so
130
+ * it can never advertise nested-override support the merge doesn't implement.
131
+ */
132
+ type PolicyOverride = {
133
+ [S in keyof ResolvedExecutionPolicy]?: Partial<ResolvedExecutionPolicy[S]>;
134
+ };
135
+
136
+ /**
137
+ * The ONLY sanctioned per-substrate divergence. Empty by design — both
138
+ * substrates run the shared policy. Every entry added here MUST carry a
139
+ * one-line comment citing the substrate constraint that forces it (e.g. isolate
140
+ * CPU/memory). CI may assert this map stays small. This is the anti-drift seam:
141
+ * differences are explicit and justified, never accidental.
142
+ */
143
+ export const ADAPTER_POLICY_OVERRIDES: Record<AdapterId, PolicyOverride> = {
144
+ cjs_node20: {},
145
+ esm_workers: {},
146
+ };
147
+
148
+ /** Merge the shared policy with any documented Adapter override. */
149
+ export function resolveExecutionPolicy(
150
+ adapter: AdapterId,
151
+ ): ResolvedExecutionPolicy {
152
+ const override = ADAPTER_POLICY_OVERRIDES[adapter];
153
+ return {
154
+ concurrency: {
155
+ ...SHARED_EXECUTION_POLICY.concurrency,
156
+ ...override.concurrency,
157
+ },
158
+ budgets: { ...SHARED_EXECUTION_POLICY.budgets, ...override.budgets },
159
+ pacing: { ...SHARED_EXECUTION_POLICY.pacing, ...override.pacing },
160
+ };
161
+ }
162
+
163
+ /**
164
+ * Effective row concurrency for a map: an explicit positive request clamped to
165
+ * `[1, rowMax]`, otherwise the default. Single helper, used by both substrates.
166
+ */
167
+ export function resolveRowConcurrency(
168
+ policy: ResolvedExecutionPolicy,
169
+ requested?: number,
170
+ ): number {
171
+ if (
172
+ typeof requested === 'number' &&
173
+ Number.isFinite(requested) &&
174
+ requested > 0
175
+ ) {
176
+ return Math.min(Math.floor(requested), policy.concurrency.rowMax);
177
+ }
178
+ return policy.concurrency.rowDefault;
179
+ }
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Rate State Backend — the one swappable seam of the Play Execution Governor.
3
+ *
4
+ * Per-`(org, provider)` rate windows are the only execution state that cannot be
5
+ * process-local: on `esm_workers` a run fans child plays across isolates, so the
6
+ * window must be shared. Everything else the Governor owns (budgets, concurrency
7
+ * slots) stays Governor-local and threads down the lineage via the snapshot.
8
+ *
9
+ * Adapters: an in-memory backend for the single-process `cjs_node20` runner, and
10
+ * a shared backend (Run Coordination Cache Durable Object, or server-side Redis)
11
+ * for `esm_workers`. See ADR 0007 and CONTEXT.md (Rate State Backend).
12
+ */
13
+
14
+ /** A single resolved rate rule for a bucket (from rate-limit-definitions). */
15
+ export interface PacingRule {
16
+ readonly ruleId: string;
17
+ readonly requestsPerWindow: number;
18
+ readonly windowMs: number;
19
+ /**
20
+ * Optional simultaneous-in-flight cap for this rule.
21
+ *
22
+ * SUBSTRATE NOTE: `maxConcurrency` is enforced ONLY on `cjs_node20` (the
23
+ * single-process {@link RateStateBackend} can hold a reliable in-flight count
24
+ * and release it on permit.release). On `esm_workers` it is intentionally NOT
25
+ * enforced: a run fans across V8 isolates and a dying isolate cannot guarantee
26
+ * the release signal a leased in-flight count would require, so the
27
+ * coordinator DO debits `requestsPerWindow` only. The Governor's global
28
+ * tool-concurrency semaphore is the cross-isolate in-flight backstop there.
29
+ * `requestsPerWindow`/`windowMs` (the request-RATE governor) ARE enforced on
30
+ * both substrates. See coordinator-rate-state-backend.ts and dedup-do.ts
31
+ * (computeRateAcquire) for the workers side.
32
+ */
33
+ readonly maxConcurrency: number | null;
34
+ }
35
+
36
+ /**
37
+ * Per-tool queue-hint metadata produced by the runtime tool catalog
38
+ * (`src/lib/plays/runtime-tool-metadata.ts`) and surfaced to the runtime via
39
+ * `ContextOptions.getToolQueueHints`. It is the raw provider rate-limit metadata
40
+ * that the runtime maps into one {@link PacingRule} per hint before handing it
41
+ * to the Governor's pacing resolver. The `provider` field identifies the pacing
42
+ * bucket; `bucketId`/`operation` are descriptive and used for logging/grouping.
43
+ */
44
+ export interface PlayQueueHint {
45
+ bucketId: string;
46
+ provider: string;
47
+ operation: string;
48
+ ruleId: string;
49
+ requestsPerWindow: number;
50
+ windowMs: number;
51
+ maxConcurrency: number | null;
52
+ }
53
+
54
+ /** Handle returned by acquire(); release frees any concurrency held by the rules. */
55
+ export interface PacingPermit {
56
+ release(): void;
57
+ }
58
+
59
+ export interface RateStateBackend {
60
+ /**
61
+ * Block until one outbound call is permitted for `bucketId` under all `rules`
62
+ * (request windows always; per-rule `maxConcurrency` only on the in-memory
63
+ * `cjs_node20` backend — see {@link PacingRule.maxConcurrency}), then debit and
64
+ * return a permit. `bucketId` is `${orgId}:${provider}` so the window is global
65
+ * per (org, provider). Resolves immediately when `rules` is empty (provider has
66
+ * no configured limit — pacing is a no-op, the global tool-concurrency backstop
67
+ * still applies).
68
+ */
69
+ acquire(input: {
70
+ bucketId: string;
71
+ rules: readonly PacingRule[];
72
+ signal?: AbortSignal;
73
+ }): Promise<PacingPermit>;
74
+
75
+ /**
76
+ * Feed a server-observed Retry-After back so future acquires for this bucket
77
+ * back off. Advisory and idempotent; never un-charges an in-flight call.
78
+ */
79
+ penalize(input: { bucketId: string; cooldownMs: number }): void;
80
+ }
81
+
82
+ const NOOP_PERMIT: PacingPermit = { release() {} };
83
+
84
+ /** Permit used when a bucket has no rules — nothing to debit or release. */
85
+ export function noopPacingPermit(): PacingPermit {
86
+ return NOOP_PERMIT;
87
+ }
@@ -38,6 +38,18 @@ export function normalizePlayRunFailure(error: unknown): PlayRunFailureDetails {
38
38
  cause: CLOUDFLARE_DURABLE_OBJECT_CODE_UPDATED_ERROR,
39
39
  };
40
40
  }
41
+ const playDepthBudgetMatch = cause.match(
42
+ /Play execution playDepth budget exceeded \((\d+)\/(\d+)\)\./,
43
+ );
44
+ if (playDepthBudgetMatch) {
45
+ return {
46
+ code: 'PLAY_CALL_DEPTH_EXCEEDED',
47
+ phase: 'runtime',
48
+ message: `Play-call depth exceeded (${playDepthBudgetMatch[1]}/${playDepthBudgetMatch[2]}).`,
49
+ retryable: false,
50
+ cause,
51
+ };
52
+ }
41
53
  return {
42
54
  code: 'RUN_FAILED',
43
55
  phase: 'runtime',
@@ -35,6 +35,30 @@ export type PlayCallGovernanceSnapshot = {
35
35
  key: string;
36
36
  ancestryPlayIds: string[];
37
37
  callDepth: number;
38
+ /**
39
+ * Cumulative lineage-global budget counters consumed by ancestors at the
40
+ * moment this child was launched. The child seeds its own counters from these
41
+ * so the corresponding budgets (`maxPlayCallCount`, `maxToolCallCount`,
42
+ * `maxRetryCount`, `maxDescendants`, `maxWaterfallStepExecutions`) accumulate
43
+ * down the dispatch lineage instead of resetting to 0 in each worker isolate —
44
+ * matching the cjs forkChild path, which threads all of them. The Governor
45
+ * documents these budgets as global across the nested-play tree, not
46
+ * per-worker (see policy.ts / rate-state-backend.ts); threading them here is
47
+ * what makes that true on `esm_workers`.
48
+ *
49
+ * `descendantCount` is load-bearing for fan-out: forkChild charges
50
+ * `descendant` on every child launch, so without threading it a deep/wide tree
51
+ * would reset descendant accounting at each isolate and never converge on the
52
+ * lineage-global cap.
53
+ *
54
+ * All optional and fail-safe: if absent (older callers / dropped in transit)
55
+ * the child falls back to 0, i.e. prior behavior.
56
+ */
57
+ playCallCount?: number;
58
+ toolCallCount?: number;
59
+ retryCount?: number;
60
+ descendantCount?: number;
61
+ waterfallStepExecutions?: number;
38
62
  };
39
63
 
40
64
  export type PlaySchedulerSubmitInput = {