deepline 0.1.79 → 0.1.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/cli/index.js +76 -42
- package/dist/cli/index.mjs +76 -42
- package/dist/index.d.mts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +13 -10
- package/dist/index.mjs +13 -10
- package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1103 -1617
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +506 -654
- package/dist/repo/apps/play-runner-workers/src/entry.ts +1148 -598
- package/dist/repo/apps/play-runner-workers/src/runtime/tool-http-errors.ts +43 -1
- package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +8 -2
- package/dist/repo/sdk/src/client.ts +15 -8
- package/dist/repo/sdk/src/release.ts +2 -2
- package/dist/repo/sdk/src/types.ts +5 -0
- package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
- package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
- package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
- package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
- package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
- package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
- package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
- package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
- package/dist/repo/shared_libs/plays/static-pipeline.ts +56 -3
- package/dist/repo/shared_libs/temporal/constants.ts +38 -0
- package/package.json +1 -1
- package/dist/repo/shared_libs/play-runtime/tool-batch-executor.ts +0 -149
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Play Execution Governor — the deep module that owns execution policy.
|
|
3
|
+
*
|
|
4
|
+
* Both runner Adapters (`cjs_node20`, `esm_workers`) gate their work through one
|
|
5
|
+
* Governor instance per run-attempt so the substrates cannot diverge on
|
|
6
|
+
* concurrency, budgets, or pacing. Adapters keep only substrate mechanism (how
|
|
7
|
+
* to resolve a row, batch a tool call, or submit a child); the Governor owns the
|
|
8
|
+
* "may I, and how many at once" policy. See ADR 0007 + CONTEXT.md.
|
|
9
|
+
*
|
|
10
|
+
* Surface (small, by design):
|
|
11
|
+
* - acquireRowSlot / acquireChildPlaySlot / acquireToolSlot → blocking leases
|
|
12
|
+
* - chargeBudget → throws on breach
|
|
13
|
+
* - forkChild → child lineage snapshot
|
|
14
|
+
* - resolveRowConcurrency / reportProviderBackpressure / snapshot
|
|
15
|
+
*/
|
|
16
|
+
import {
|
|
17
|
+
type AdapterId,
|
|
18
|
+
type ResolvedExecutionPolicy,
|
|
19
|
+
resolveExecutionPolicy,
|
|
20
|
+
resolveRowConcurrency,
|
|
21
|
+
} from './policy';
|
|
22
|
+
import {
|
|
23
|
+
noopPacingPermit,
|
|
24
|
+
type PacingRule,
|
|
25
|
+
type RateStateBackend,
|
|
26
|
+
} from './rate-state-backend';
|
|
27
|
+
|
|
28
|
+
export interface WorkLease {
|
|
29
|
+
/** Free the slot / pacing permit. Idempotent. MUST be called in a finally. */
|
|
30
|
+
release(): void;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export type BudgetKind =
|
|
34
|
+
| 'playCall'
|
|
35
|
+
| 'toolCall'
|
|
36
|
+
| 'retry'
|
|
37
|
+
| 'descendant'
|
|
38
|
+
| 'waterfallStep';
|
|
39
|
+
|
|
40
|
+
/** Counters that accumulate down the dispatch lineage; thread via the snapshot. */
|
|
41
|
+
export interface GovernanceSnapshot {
|
|
42
|
+
rootRunId: string;
|
|
43
|
+
currentRunId: string;
|
|
44
|
+
currentPlayId: string;
|
|
45
|
+
ancestryPlayIds: string[];
|
|
46
|
+
ancestryRunIds: string[];
|
|
47
|
+
callDepth: number;
|
|
48
|
+
playCallCount: number;
|
|
49
|
+
toolCallCount: number;
|
|
50
|
+
retryCount: number;
|
|
51
|
+
descendantCount: number;
|
|
52
|
+
waterfallStepExecutions: number;
|
|
53
|
+
/** Direct child plays launched, keyed by parent play id. */
|
|
54
|
+
parentChildCalls: Record<string, number>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export class GovernorBudgetError extends Error {
|
|
58
|
+
constructor(
|
|
59
|
+
readonly budget: BudgetKind | 'playDepth' | 'childPerParent',
|
|
60
|
+
readonly observed: number,
|
|
61
|
+
readonly limit: number,
|
|
62
|
+
) {
|
|
63
|
+
const message =
|
|
64
|
+
budget === 'playDepth'
|
|
65
|
+
? `Play-call depth exceeded (${observed}/${limit}).`
|
|
66
|
+
: `Play execution ${budget} budget exceeded (${observed}/${limit}).`;
|
|
67
|
+
super(message);
|
|
68
|
+
this.name = 'GovernorBudgetError';
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Maps a toolId to its provider + resolved pacing rules (from rate-limit defs). */
|
|
73
|
+
export type PacingResolver = (
|
|
74
|
+
toolId: string,
|
|
75
|
+
) => Promise<{ provider: string; rules: PacingRule[] } | null>;
|
|
76
|
+
|
|
77
|
+
export interface PlayExecutionGovernor {
|
|
78
|
+
readonly adapter: AdapterId;
|
|
79
|
+
readonly policy: ResolvedExecutionPolicy;
|
|
80
|
+
|
|
81
|
+
/** Block until a map-row slot is free. */
|
|
82
|
+
acquireRowSlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
|
|
83
|
+
/** Block until a child-play slot is free. */
|
|
84
|
+
acquireChildPlaySlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
|
|
85
|
+
/**
|
|
86
|
+
* Block until a global tool-concurrency slot AND the per-(org,provider) pacer
|
|
87
|
+
* permit are free, then charge the tool-call budget and return a lease. Order:
|
|
88
|
+
* concurrency slot → provider pace → tool budget (charged last so a
|
|
89
|
+
* failed/aborted acquire never consumes budget). A run over tool budget still
|
|
90
|
+
* acquires and holds a slot + pacing permit before the breach is detected; the
|
|
91
|
+
* breach surfaces only once the call is otherwise cleared to run.
|
|
92
|
+
*/
|
|
93
|
+
acquireToolSlot(
|
|
94
|
+
toolId: string,
|
|
95
|
+
opts?: { signal?: AbortSignal },
|
|
96
|
+
): Promise<WorkLease>;
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Suggested batch parallelism for a tool: the provider's own rate hints
|
|
100
|
+
* tightened to the policy's suggested ceiling. No hints → the fallback.
|
|
101
|
+
*/
|
|
102
|
+
suggestedParallelism(toolId: string, fallback: number): Promise<number>;
|
|
103
|
+
|
|
104
|
+
/** Increment a monotonic budget counter; throws GovernorBudgetError on breach. */
|
|
105
|
+
chargeBudget(kind: BudgetKind, amount?: number): void;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Reserve depth + per-parent + descendant budget for a child play and return
|
|
109
|
+
* the snapshot to thread into the child run so budgets accumulate across the
|
|
110
|
+
* lineage (and across isolates on `esm_workers`). Throws on breach.
|
|
111
|
+
*
|
|
112
|
+
* Unlike {@link acquireToolSlot} (which charges last so an aborted acquire
|
|
113
|
+
* never consumes budget), child-lineage counters are charged here at fork
|
|
114
|
+
* time, BEFORE the caller acquires a child-play slot. A slot acquire that then
|
|
115
|
+
* fails (e.g. abort) does NOT refund these counters. This is intentional and
|
|
116
|
+
* safe given the 100k child caps: the charge reserves lineage-global capacity
|
|
117
|
+
* for a launch the caller has committed to, and forkChild must return the
|
|
118
|
+
* threaded snapshot synchronously, so the charge cannot be deferred behind the
|
|
119
|
+
* async slot acquire.
|
|
120
|
+
*/
|
|
121
|
+
forkChild(input: {
|
|
122
|
+
childPlayName: string;
|
|
123
|
+
childRunId: string;
|
|
124
|
+
}): GovernanceSnapshot;
|
|
125
|
+
|
|
126
|
+
/** Effective row concurrency: explicit request clamped to [1, rowMax], else default. */
|
|
127
|
+
resolveRowConcurrency(requested?: number): number;
|
|
128
|
+
|
|
129
|
+
/** Feed a provider's Retry-After back into the shared pacer. */
|
|
130
|
+
reportProviderBackpressure(input: {
|
|
131
|
+
provider: string;
|
|
132
|
+
retryAfterMs: number;
|
|
133
|
+
}): void;
|
|
134
|
+
|
|
135
|
+
snapshot(): GovernanceSnapshot;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
interface GovernorInput {
|
|
139
|
+
adapter: AdapterId;
|
|
140
|
+
scope: { orgId: string; rootRunId: string };
|
|
141
|
+
rateState: RateStateBackend;
|
|
142
|
+
resolvePacing: PacingResolver;
|
|
143
|
+
resume?: GovernanceSnapshot;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
class Semaphore {
|
|
147
|
+
private inFlight = 0;
|
|
148
|
+
private readonly waiters: Array<() => void> = [];
|
|
149
|
+
constructor(private readonly limit: number) {}
|
|
150
|
+
|
|
151
|
+
async acquire(signal?: AbortSignal): Promise<WorkLease> {
|
|
152
|
+
// Fail fast on an already-aborted signal: the parked-promise abort listener
|
|
153
|
+
// below registers with { once: true } and never fires for a signal that was
|
|
154
|
+
// aborted before we parked, so without this check a full pool would block
|
|
155
|
+
// the waiter until a slot frees (or forever if it never drains).
|
|
156
|
+
if (signal?.aborted) {
|
|
157
|
+
throw signal.reason instanceof Error
|
|
158
|
+
? signal.reason
|
|
159
|
+
: new Error('Slot acquire aborted.');
|
|
160
|
+
}
|
|
161
|
+
while (this.inFlight >= this.limit) {
|
|
162
|
+
await new Promise<void>((resolve, reject) => {
|
|
163
|
+
const onResolve = () => {
|
|
164
|
+
signal?.removeEventListener('abort', onAbort);
|
|
165
|
+
resolve();
|
|
166
|
+
};
|
|
167
|
+
const onAbort = () => {
|
|
168
|
+
const idx = this.waiters.indexOf(onResolve);
|
|
169
|
+
if (idx >= 0) this.waiters.splice(idx, 1);
|
|
170
|
+
reject(
|
|
171
|
+
signal?.reason instanceof Error
|
|
172
|
+
? signal.reason
|
|
173
|
+
: new Error('Slot acquire aborted.'),
|
|
174
|
+
);
|
|
175
|
+
};
|
|
176
|
+
this.waiters.push(onResolve);
|
|
177
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
178
|
+
});
|
|
179
|
+
if (signal?.aborted) {
|
|
180
|
+
throw signal.reason instanceof Error
|
|
181
|
+
? signal.reason
|
|
182
|
+
: new Error('Slot acquire aborted.');
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
this.inFlight += 1;
|
|
186
|
+
let released = false;
|
|
187
|
+
return {
|
|
188
|
+
release: () => {
|
|
189
|
+
if (released) return;
|
|
190
|
+
released = true;
|
|
191
|
+
this.inFlight = Math.max(0, this.inFlight - 1);
|
|
192
|
+
this.waiters.shift()?.();
|
|
193
|
+
},
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function createDefaultGovernanceSnapshot(scope: {
|
|
199
|
+
orgId: string;
|
|
200
|
+
rootRunId: string;
|
|
201
|
+
rootPlayId?: string;
|
|
202
|
+
}): GovernanceSnapshot {
|
|
203
|
+
return {
|
|
204
|
+
rootRunId: scope.rootRunId,
|
|
205
|
+
currentRunId: scope.rootRunId,
|
|
206
|
+
currentPlayId: scope.rootPlayId ?? scope.rootRunId,
|
|
207
|
+
ancestryPlayIds: scope.rootPlayId ? [scope.rootPlayId] : [],
|
|
208
|
+
ancestryRunIds: [scope.rootRunId],
|
|
209
|
+
callDepth: scope.rootPlayId ? 1 : 0,
|
|
210
|
+
playCallCount: 0,
|
|
211
|
+
toolCallCount: 0,
|
|
212
|
+
retryCount: 0,
|
|
213
|
+
descendantCount: 0,
|
|
214
|
+
waterfallStepExecutions: 0,
|
|
215
|
+
parentChildCalls: {},
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export function createPlayExecutionGovernor(
|
|
220
|
+
input: GovernorInput,
|
|
221
|
+
): PlayExecutionGovernor {
|
|
222
|
+
const policy = resolveExecutionPolicy(input.adapter);
|
|
223
|
+
const state: GovernanceSnapshot =
|
|
224
|
+
input.resume ?? createDefaultGovernanceSnapshot(input.scope);
|
|
225
|
+
|
|
226
|
+
const rowSlots = new Semaphore(policy.concurrency.rowMax);
|
|
227
|
+
const childPlaySlots = new Semaphore(policy.concurrency.childPlay);
|
|
228
|
+
const toolSlots = new Semaphore(policy.concurrency.toolCalls);
|
|
229
|
+
|
|
230
|
+
const bucketId = (provider: string) => `${input.scope.orgId}:${provider}`;
|
|
231
|
+
|
|
232
|
+
function chargeBudget(kind: BudgetKind, amount = 1): void {
|
|
233
|
+
switch (kind) {
|
|
234
|
+
case 'playCall':
|
|
235
|
+
state.playCallCount += amount;
|
|
236
|
+
if (state.playCallCount > policy.budgets.maxPlayCallCount)
|
|
237
|
+
throw new GovernorBudgetError('playCall', state.playCallCount, policy.budgets.maxPlayCallCount);
|
|
238
|
+
return;
|
|
239
|
+
case 'toolCall':
|
|
240
|
+
state.toolCallCount += amount;
|
|
241
|
+
if (state.toolCallCount > policy.budgets.maxToolCallCount)
|
|
242
|
+
throw new GovernorBudgetError('toolCall', state.toolCallCount, policy.budgets.maxToolCallCount);
|
|
243
|
+
return;
|
|
244
|
+
case 'retry':
|
|
245
|
+
state.retryCount += amount;
|
|
246
|
+
if (state.retryCount > policy.budgets.maxRetryCount)
|
|
247
|
+
throw new GovernorBudgetError('retry', state.retryCount, policy.budgets.maxRetryCount);
|
|
248
|
+
return;
|
|
249
|
+
case 'descendant':
|
|
250
|
+
state.descendantCount += amount;
|
|
251
|
+
if (state.descendantCount > policy.budgets.maxDescendants)
|
|
252
|
+
throw new GovernorBudgetError('descendant', state.descendantCount, policy.budgets.maxDescendants);
|
|
253
|
+
return;
|
|
254
|
+
case 'waterfallStep':
|
|
255
|
+
state.waterfallStepExecutions += amount;
|
|
256
|
+
if (state.waterfallStepExecutions > policy.budgets.maxWaterfallStepExecutions)
|
|
257
|
+
throw new GovernorBudgetError('waterfallStep', state.waterfallStepExecutions, policy.budgets.maxWaterfallStepExecutions);
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
adapter: input.adapter,
|
|
264
|
+
policy,
|
|
265
|
+
|
|
266
|
+
acquireRowSlot: (opts) => rowSlots.acquire(opts?.signal),
|
|
267
|
+
acquireChildPlaySlot: (opts) => childPlaySlots.acquire(opts?.signal),
|
|
268
|
+
|
|
269
|
+
async acquireToolSlot(toolId, opts) {
|
|
270
|
+
// 1. global tool-concurrency slot.
|
|
271
|
+
const slot = await toolSlots.acquire(opts?.signal);
|
|
272
|
+
// 2. per-(org,provider) pacing. The provider comes from the pacing
|
|
273
|
+
// resolver, so callers only need the toolId. No rules → no pacing.
|
|
274
|
+
let permit: { release(): void };
|
|
275
|
+
try {
|
|
276
|
+
const pacing = await input.resolvePacing(toolId);
|
|
277
|
+
permit =
|
|
278
|
+
pacing && pacing.rules.length > 0
|
|
279
|
+
? await input.rateState.acquire({
|
|
280
|
+
bucketId: bucketId(pacing.provider),
|
|
281
|
+
rules: pacing.rules,
|
|
282
|
+
signal: opts?.signal,
|
|
283
|
+
})
|
|
284
|
+
: noopPacingPermit();
|
|
285
|
+
} catch (error) {
|
|
286
|
+
slot.release();
|
|
287
|
+
throw error;
|
|
288
|
+
}
|
|
289
|
+
// 3. charge the budget only once the call is actually cleared to run, so a
|
|
290
|
+
// failed/aborted acquisition never permanently consumes tool budget.
|
|
291
|
+
try {
|
|
292
|
+
chargeBudget('toolCall');
|
|
293
|
+
} catch (error) {
|
|
294
|
+
permit.release();
|
|
295
|
+
slot.release();
|
|
296
|
+
throw error;
|
|
297
|
+
}
|
|
298
|
+
let released = false;
|
|
299
|
+
return {
|
|
300
|
+
release: () => {
|
|
301
|
+
if (released) return;
|
|
302
|
+
released = true;
|
|
303
|
+
permit.release();
|
|
304
|
+
slot.release();
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
},
|
|
308
|
+
|
|
309
|
+
async suggestedParallelism(toolId, fallback) {
|
|
310
|
+
const pacing = await input.resolvePacing(toolId);
|
|
311
|
+
if (!pacing || pacing.rules.length === 0) return fallback;
|
|
312
|
+
const limits = pacing.rules.flatMap((rule) =>
|
|
313
|
+
rule.maxConcurrency != null
|
|
314
|
+
? [rule.requestsPerWindow, rule.maxConcurrency]
|
|
315
|
+
: [rule.requestsPerWindow],
|
|
316
|
+
);
|
|
317
|
+
return Math.max(
|
|
318
|
+
1,
|
|
319
|
+
Math.min(policy.pacing.suggestedMaxParallelism, ...limits),
|
|
320
|
+
);
|
|
321
|
+
},
|
|
322
|
+
|
|
323
|
+
chargeBudget,
|
|
324
|
+
|
|
325
|
+
forkChild(childInput) {
|
|
326
|
+
if (state.ancestryPlayIds.includes(childInput.childPlayName)) {
|
|
327
|
+
const chain = [...state.ancestryPlayIds, childInput.childPlayName].join(' -> ');
|
|
328
|
+
throw new Error(`Recursive play graph detected: ${chain}`);
|
|
329
|
+
}
|
|
330
|
+
const nextDepth = state.callDepth + 1;
|
|
331
|
+
if (nextDepth > policy.budgets.maxPlayCallDepth)
|
|
332
|
+
throw new GovernorBudgetError('playDepth', nextDepth, policy.budgets.maxPlayCallDepth);
|
|
333
|
+
const parentKey = state.currentPlayId;
|
|
334
|
+
const nextParent = (state.parentChildCalls[parentKey] ?? 0) + 1;
|
|
335
|
+
if (nextParent > policy.budgets.maxChildPlayCallsPerParent)
|
|
336
|
+
throw new GovernorBudgetError('childPerParent', nextParent, policy.budgets.maxChildPlayCallsPerParent);
|
|
337
|
+
// Charge the run-wide play/descendant budgets on the parent. Charged at
|
|
338
|
+
// fork time (not after the caller's child-play slot acquire) and never
|
|
339
|
+
// refunded if that acquire fails — see the forkChild interface doc.
|
|
340
|
+
chargeBudget('playCall');
|
|
341
|
+
chargeBudget('descendant');
|
|
342
|
+
state.parentChildCalls[parentKey] = nextParent;
|
|
343
|
+
// Child seeds from the parent's accumulated counters → lineage-global budget.
|
|
344
|
+
return {
|
|
345
|
+
rootRunId: state.rootRunId,
|
|
346
|
+
currentRunId: childInput.childRunId,
|
|
347
|
+
currentPlayId: childInput.childPlayName,
|
|
348
|
+
ancestryPlayIds: [...state.ancestryPlayIds, childInput.childPlayName],
|
|
349
|
+
ancestryRunIds: [...state.ancestryRunIds, childInput.childRunId],
|
|
350
|
+
callDepth: nextDepth,
|
|
351
|
+
playCallCount: state.playCallCount,
|
|
352
|
+
toolCallCount: state.toolCallCount,
|
|
353
|
+
retryCount: state.retryCount,
|
|
354
|
+
descendantCount: state.descendantCount,
|
|
355
|
+
waterfallStepExecutions: state.waterfallStepExecutions,
|
|
356
|
+
parentChildCalls: {},
|
|
357
|
+
};
|
|
358
|
+
},
|
|
359
|
+
|
|
360
|
+
resolveRowConcurrency: (requested) => resolveRowConcurrency(policy, requested),
|
|
361
|
+
|
|
362
|
+
reportProviderBackpressure(bp) {
|
|
363
|
+
input.rateState.penalize({
|
|
364
|
+
bucketId: bucketId(bp.provider),
|
|
365
|
+
cooldownMs: bp.retryAfterMs,
|
|
366
|
+
});
|
|
367
|
+
},
|
|
368
|
+
|
|
369
|
+
snapshot: () => ({
|
|
370
|
+
...state,
|
|
371
|
+
ancestryPlayIds: [...state.ancestryPlayIds],
|
|
372
|
+
ancestryRunIds: [...state.ancestryRunIds],
|
|
373
|
+
parentChildCalls: { ...state.parentChildCalls },
|
|
374
|
+
}),
|
|
375
|
+
};
|
|
376
|
+
}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Play Execution Governor — the single policy table.
|
|
3
|
+
*
|
|
4
|
+
* This is the ONE source of truth for every concurrency, budget, and pacing
|
|
5
|
+
* number in the play runtime. Both runner Adapters (`cjs_node20` in-process and
|
|
6
|
+
* `esm_workers` cloud) resolve their limits from here, so the two substrates
|
|
7
|
+
* cannot drift. See ADR 0007 and the "Play Execution Governor" entry in
|
|
8
|
+
* CONTEXT.md.
|
|
9
|
+
*
|
|
10
|
+
* Tuning philosophy (product decision):
|
|
11
|
+
* - Caps are GENEROUS and tuned for fast execution. They are runaway /
|
|
12
|
+
* anti-starvation guards, NOT per-workload throttles. A legitimate large run
|
|
13
|
+
* should never hit them.
|
|
14
|
+
* - The real-time bounds on a run are (a) the wall-clock runtime cap and
|
|
15
|
+
* (b) per-provider rate pacing. The counters below only stop pathological
|
|
16
|
+
* recursion / fan-out.
|
|
17
|
+
* - Every value is an EXPLICIT, finite cap. Nothing here is unbounded — there
|
|
18
|
+
* is no `null`, no "off", no implicit infinity. Unaccounted resource use
|
|
19
|
+
* must be impossible.
|
|
20
|
+
*
|
|
21
|
+
* The docs catalog (`src/lib/plays/limits-catalog.ts`) and its generated public
|
|
22
|
+
* + internal pages source their numbers from this table.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/** Concurrency ceilings — how much may run at once. */
|
|
26
|
+
export interface ExecutionConcurrencyPolicy {
|
|
27
|
+
/** Map rows resolving their fields concurrently when no `concurrency` is given. */
|
|
28
|
+
readonly rowDefault: number;
|
|
29
|
+
/** Hard ceiling for an explicit map `concurrency` value; larger is clamped. */
|
|
30
|
+
readonly rowMax: number;
|
|
31
|
+
/** Concurrently in-flight child plays (`ctx.runPlay`). Excess launches block. */
|
|
32
|
+
readonly childPlay: number;
|
|
33
|
+
/** Global backstop on concurrently in-flight tool calls across all providers. */
|
|
34
|
+
readonly toolCalls: number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Per-run budgets — total attempts allowed before a run is treated as runaway.
|
|
39
|
+
* Accumulated down the dispatch lineage (a child seeds from the parent), so the
|
|
40
|
+
* budget is global across the nested-play tree, not per-worker.
|
|
41
|
+
*/
|
|
42
|
+
export interface ExecutionBudgetPolicy {
|
|
43
|
+
/** Max nesting depth of `ctx.runPlay` chains. Deeper is almost certainly a cycle. */
|
|
44
|
+
readonly maxPlayCallDepth: number;
|
|
45
|
+
/** Max total `ctx.runPlay` calls along a lineage. */
|
|
46
|
+
readonly maxPlayCallCount: number;
|
|
47
|
+
/** Max direct child plays one play may launch. */
|
|
48
|
+
readonly maxChildPlayCallsPerParent: number;
|
|
49
|
+
/** Max nested-play descendants created during a run. */
|
|
50
|
+
readonly maxDescendants: number;
|
|
51
|
+
/** Max total tool calls in a run. */
|
|
52
|
+
readonly maxToolCallCount: number;
|
|
53
|
+
/** Max total retries across all steps/tools. */
|
|
54
|
+
readonly maxRetryCount: number;
|
|
55
|
+
/** Max total waterfall-step executions in a run. */
|
|
56
|
+
readonly maxWaterfallStepExecutions: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Per-provider rate pacing — the real outbound throughput governor.
|
|
61
|
+
*
|
|
62
|
+
* SUBSTRATE NOTE: the per-provider request RATE (`requestsPerWindow`/`windowMs`
|
|
63
|
+
* from a provider's `PacingRule`s) is enforced on BOTH substrates. A rule's
|
|
64
|
+
* optional `maxConcurrency` (simultaneous-in-flight cap) is enforced only on
|
|
65
|
+
* `cjs_node20`; on `esm_workers` it is intentionally excluded from the pacing
|
|
66
|
+
* contract because a fanned-out run cannot guarantee the per-isolate release
|
|
67
|
+
* signal an in-flight count needs, so only the org-wide tool-concurrency
|
|
68
|
+
* backstop applies there. See PacingRule.maxConcurrency in rate-state-backend.ts.
|
|
69
|
+
*/
|
|
70
|
+
export interface ExecutionPacingPolicy {
|
|
71
|
+
/** RPS applied to a provider that declares no explicit rate limit. */
|
|
72
|
+
readonly defaultProviderRequestsPerSecond: number;
|
|
73
|
+
/** Parallelism the scheduler suggests before a provider's own hints tighten it. */
|
|
74
|
+
readonly suggestedMaxParallelism: number;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export interface ResolvedExecutionPolicy {
|
|
78
|
+
readonly concurrency: ExecutionConcurrencyPolicy;
|
|
79
|
+
readonly budgets: ExecutionBudgetPolicy;
|
|
80
|
+
readonly pacing: ExecutionPacingPolicy;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* The shared default policy. Both substrates use this verbatim unless an entry
|
|
85
|
+
* in {@link ADAPTER_POLICY_OVERRIDES} forces a documented difference.
|
|
86
|
+
*/
|
|
87
|
+
export const SHARED_EXECUTION_POLICY: ResolvedExecutionPolicy = {
|
|
88
|
+
concurrency: {
|
|
89
|
+
// Map row concurrency is platform-controlled — customers cannot set it — so
|
|
90
|
+
// every map runs at the ceiling: high enough that per-provider pacing (not
|
|
91
|
+
// the row pool) is the bottleneck even for multi-step waterfall rows. Pure-JS
|
|
92
|
+
// maps use a separate fast path and are not bound by this. Kept == rowMax so
|
|
93
|
+
// the default and the cap are the same single platform value.
|
|
94
|
+
rowDefault: 2_000,
|
|
95
|
+
// Hard cap. Above this, more in-flight rows only park memory — outbound is
|
|
96
|
+
// already bounded by tool concurrency + per-provider pacing.
|
|
97
|
+
rowMax: 2_000,
|
|
98
|
+
// Concurrent child-play launches. Generous; each child is a real launch, so
|
|
99
|
+
// this is the one value most likely to need a documented esm_workers
|
|
100
|
+
// override if isolate pressure shows up in E2E.
|
|
101
|
+
childPlay: 32,
|
|
102
|
+
// Global all-provider backstop. Per-provider pacing is the real limit; this
|
|
103
|
+
// just stops a single run from opening an absurd number of sockets at once.
|
|
104
|
+
toolCalls: 256,
|
|
105
|
+
},
|
|
106
|
+
budgets: {
|
|
107
|
+
// Runaway guards, not workload limits. A 5,000-row map calling several tools
|
|
108
|
+
// per row is normal and must fit comfortably under these.
|
|
109
|
+
maxPlayCallDepth: 8,
|
|
110
|
+
maxPlayCallCount: 100_000,
|
|
111
|
+
maxChildPlayCallsPerParent: 100_000,
|
|
112
|
+
maxDescendants: 100_000,
|
|
113
|
+
maxToolCallCount: 5_000_000,
|
|
114
|
+
maxRetryCount: 100_000,
|
|
115
|
+
maxWaterfallStepExecutions: 5_000_000,
|
|
116
|
+
},
|
|
117
|
+
pacing: {
|
|
118
|
+
// Undeclared providers; declared providers (rate-limit-definitions.ts) win.
|
|
119
|
+
defaultProviderRequestsPerSecond: 30,
|
|
120
|
+
suggestedMaxParallelism: 50,
|
|
121
|
+
},
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
export type AdapterId = 'cjs_node20' | 'esm_workers';
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* One level deep on purpose: each policy section is a flat record of numbers, so
|
|
128
|
+
* an override is `{ section: { key: value } }`. The merge in
|
|
129
|
+
* {@link resolveExecutionPolicy} is one level; this type matches it exactly so
|
|
130
|
+
* it can never advertise nested-override support the merge doesn't implement.
|
|
131
|
+
*/
|
|
132
|
+
type PolicyOverride = {
|
|
133
|
+
[S in keyof ResolvedExecutionPolicy]?: Partial<ResolvedExecutionPolicy[S]>;
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* The ONLY sanctioned per-substrate divergence. Empty by design — both
|
|
138
|
+
* substrates run the shared policy. Every entry added here MUST carry a
|
|
139
|
+
* one-line comment citing the substrate constraint that forces it (e.g. isolate
|
|
140
|
+
* CPU/memory). CI may assert this map stays small. This is the anti-drift seam:
|
|
141
|
+
* differences are explicit and justified, never accidental.
|
|
142
|
+
*/
|
|
143
|
+
export const ADAPTER_POLICY_OVERRIDES: Record<AdapterId, PolicyOverride> = {
|
|
144
|
+
cjs_node20: {},
|
|
145
|
+
esm_workers: {},
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
/** Merge the shared policy with any documented Adapter override. */
|
|
149
|
+
export function resolveExecutionPolicy(
|
|
150
|
+
adapter: AdapterId,
|
|
151
|
+
): ResolvedExecutionPolicy {
|
|
152
|
+
const override = ADAPTER_POLICY_OVERRIDES[adapter];
|
|
153
|
+
return {
|
|
154
|
+
concurrency: {
|
|
155
|
+
...SHARED_EXECUTION_POLICY.concurrency,
|
|
156
|
+
...override.concurrency,
|
|
157
|
+
},
|
|
158
|
+
budgets: { ...SHARED_EXECUTION_POLICY.budgets, ...override.budgets },
|
|
159
|
+
pacing: { ...SHARED_EXECUTION_POLICY.pacing, ...override.pacing },
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Effective row concurrency for a map: an explicit positive request clamped to
|
|
165
|
+
* `[1, rowMax]`, otherwise the default. Single helper, used by both substrates.
|
|
166
|
+
*/
|
|
167
|
+
export function resolveRowConcurrency(
|
|
168
|
+
policy: ResolvedExecutionPolicy,
|
|
169
|
+
requested?: number,
|
|
170
|
+
): number {
|
|
171
|
+
if (
|
|
172
|
+
typeof requested === 'number' &&
|
|
173
|
+
Number.isFinite(requested) &&
|
|
174
|
+
requested > 0
|
|
175
|
+
) {
|
|
176
|
+
return Math.min(Math.floor(requested), policy.concurrency.rowMax);
|
|
177
|
+
}
|
|
178
|
+
return policy.concurrency.rowDefault;
|
|
179
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate State Backend — the one swappable seam of the Play Execution Governor.
|
|
3
|
+
*
|
|
4
|
+
* Per-`(org, provider)` rate windows are the only execution state that cannot be
|
|
5
|
+
* process-local: on `esm_workers` a run fans child plays across isolates, so the
|
|
6
|
+
* window must be shared. Everything else the Governor owns (budgets, concurrency
|
|
7
|
+
* slots) stays Governor-local and threads down the lineage via the snapshot.
|
|
8
|
+
*
|
|
9
|
+
* Adapters: an in-memory backend for the single-process `cjs_node20` runner, and
|
|
10
|
+
* a shared backend (Run Coordination Cache Durable Object, or server-side Redis)
|
|
11
|
+
* for `esm_workers`. See ADR 0007 and CONTEXT.md (Rate State Backend).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/** A single resolved rate rule for a bucket (from rate-limit-definitions). */
|
|
15
|
+
export interface PacingRule {
|
|
16
|
+
readonly ruleId: string;
|
|
17
|
+
readonly requestsPerWindow: number;
|
|
18
|
+
readonly windowMs: number;
|
|
19
|
+
/**
|
|
20
|
+
* Optional simultaneous-in-flight cap for this rule.
|
|
21
|
+
*
|
|
22
|
+
* SUBSTRATE NOTE: `maxConcurrency` is enforced ONLY on `cjs_node20` (the
|
|
23
|
+
* single-process {@link RateStateBackend} can hold a reliable in-flight count
|
|
24
|
+
* and release it on permit.release). On `esm_workers` it is intentionally NOT
|
|
25
|
+
* enforced: a run fans across V8 isolates and a dying isolate cannot guarantee
|
|
26
|
+
* the release signal a leased in-flight count would require, so the
|
|
27
|
+
* coordinator DO debits `requestsPerWindow` only. The Governor's global
|
|
28
|
+
* tool-concurrency semaphore is the cross-isolate in-flight backstop there.
|
|
29
|
+
* `requestsPerWindow`/`windowMs` (the request-RATE governor) ARE enforced on
|
|
30
|
+
* both substrates. See coordinator-rate-state-backend.ts and dedup-do.ts
|
|
31
|
+
* (computeRateAcquire) for the workers side.
|
|
32
|
+
*/
|
|
33
|
+
readonly maxConcurrency: number | null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Per-tool queue-hint metadata produced by the runtime tool catalog
|
|
38
|
+
* (`src/lib/plays/runtime-tool-metadata.ts`) and surfaced to the runtime via
|
|
39
|
+
* `ContextOptions.getToolQueueHints`. It is the raw provider rate-limit metadata
|
|
40
|
+
* that the runtime maps into one {@link PacingRule} per hint before handing it
|
|
41
|
+
* to the Governor's pacing resolver. The `provider` field identifies the pacing
|
|
42
|
+
* bucket; `bucketId`/`operation` are descriptive and used for logging/grouping.
|
|
43
|
+
*/
|
|
44
|
+
export interface PlayQueueHint {
|
|
45
|
+
bucketId: string;
|
|
46
|
+
provider: string;
|
|
47
|
+
operation: string;
|
|
48
|
+
ruleId: string;
|
|
49
|
+
requestsPerWindow: number;
|
|
50
|
+
windowMs: number;
|
|
51
|
+
maxConcurrency: number | null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Handle returned by acquire(); release frees any concurrency held by the rules. */
|
|
55
|
+
export interface PacingPermit {
|
|
56
|
+
release(): void;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface RateStateBackend {
|
|
60
|
+
/**
|
|
61
|
+
* Block until one outbound call is permitted for `bucketId` under all `rules`
|
|
62
|
+
* (request windows always; per-rule `maxConcurrency` only on the in-memory
|
|
63
|
+
* `cjs_node20` backend — see {@link PacingRule.maxConcurrency}), then debit and
|
|
64
|
+
* return a permit. `bucketId` is `${orgId}:${provider}` so the window is global
|
|
65
|
+
* per (org, provider). Resolves immediately when `rules` is empty (provider has
|
|
66
|
+
* no configured limit — pacing is a no-op, the global tool-concurrency backstop
|
|
67
|
+
* still applies).
|
|
68
|
+
*/
|
|
69
|
+
acquire(input: {
|
|
70
|
+
bucketId: string;
|
|
71
|
+
rules: readonly PacingRule[];
|
|
72
|
+
signal?: AbortSignal;
|
|
73
|
+
}): Promise<PacingPermit>;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Feed a server-observed Retry-After back so future acquires for this bucket
|
|
77
|
+
* back off. Advisory and idempotent; never un-charges an in-flight call.
|
|
78
|
+
*/
|
|
79
|
+
penalize(input: { bucketId: string; cooldownMs: number }): void;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const NOOP_PERMIT: PacingPermit = { release() {} };
|
|
83
|
+
|
|
84
|
+
/** Permit used when a bucket has no rules — nothing to debit or release. */
|
|
85
|
+
export function noopPacingPermit(): PacingPermit {
|
|
86
|
+
return NOOP_PERMIT;
|
|
87
|
+
}
|
|
@@ -38,6 +38,18 @@ export function normalizePlayRunFailure(error: unknown): PlayRunFailureDetails {
|
|
|
38
38
|
cause: CLOUDFLARE_DURABLE_OBJECT_CODE_UPDATED_ERROR,
|
|
39
39
|
};
|
|
40
40
|
}
|
|
41
|
+
const playDepthBudgetMatch = cause.match(
|
|
42
|
+
/Play execution playDepth budget exceeded \((\d+)\/(\d+)\)\./,
|
|
43
|
+
);
|
|
44
|
+
if (playDepthBudgetMatch) {
|
|
45
|
+
return {
|
|
46
|
+
code: 'PLAY_CALL_DEPTH_EXCEEDED',
|
|
47
|
+
phase: 'runtime',
|
|
48
|
+
message: `Play-call depth exceeded (${playDepthBudgetMatch[1]}/${playDepthBudgetMatch[2]}).`,
|
|
49
|
+
retryable: false,
|
|
50
|
+
cause,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
41
53
|
return {
|
|
42
54
|
code: 'RUN_FAILED',
|
|
43
55
|
phase: 'runtime',
|
|
@@ -35,6 +35,30 @@ export type PlayCallGovernanceSnapshot = {
|
|
|
35
35
|
key: string;
|
|
36
36
|
ancestryPlayIds: string[];
|
|
37
37
|
callDepth: number;
|
|
38
|
+
/**
|
|
39
|
+
* Cumulative lineage-global budget counters consumed by ancestors at the
|
|
40
|
+
* moment this child was launched. The child seeds its own counters from these
|
|
41
|
+
* so the corresponding budgets (`maxPlayCallCount`, `maxToolCallCount`,
|
|
42
|
+
* `maxRetryCount`, `maxDescendants`, `maxWaterfallStepExecutions`) accumulate
|
|
43
|
+
* down the dispatch lineage instead of resetting to 0 in each worker isolate —
|
|
44
|
+
* matching the cjs forkChild path, which threads all of them. The Governor
|
|
45
|
+
* documents these budgets as global across the nested-play tree, not
|
|
46
|
+
* per-worker (see policy.ts / rate-state-backend.ts); threading them here is
|
|
47
|
+
* what makes that true on `esm_workers`.
|
|
48
|
+
*
|
|
49
|
+
* `descendantCount` is load-bearing for fan-out: forkChild charges
|
|
50
|
+
* `descendant` on every child launch, so without threading it a deep/wide tree
|
|
51
|
+
* would reset descendant accounting at each isolate and never converge on the
|
|
52
|
+
* lineage-global cap.
|
|
53
|
+
*
|
|
54
|
+
* All optional and fail-safe: if absent (older callers / dropped in transit)
|
|
55
|
+
* the child falls back to 0, i.e. prior behavior.
|
|
56
|
+
*/
|
|
57
|
+
playCallCount?: number;
|
|
58
|
+
toolCallCount?: number;
|
|
59
|
+
retryCount?: number;
|
|
60
|
+
descendantCount?: number;
|
|
61
|
+
waterfallStepExecutions?: number;
|
|
38
62
|
};
|
|
39
63
|
|
|
40
64
|
export type PlaySchedulerSubmitInput = {
|