deepline 0.1.79 → 0.1.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +68 -31
- package/dist/cli/index.mjs +68 -31
- package/dist/index.d.mts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +7 -4
- package/dist/index.mjs +7 -4
- package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
- package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1102 -1616
- package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +506 -654
- package/dist/repo/apps/play-runner-workers/src/entry.ts +896 -354
- package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +8 -2
- package/dist/repo/sdk/src/client.ts +9 -2
- package/dist/repo/sdk/src/release.ts +2 -2
- package/dist/repo/sdk/src/types.ts +5 -0
- package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
- package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
- package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
- package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
- package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
- package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
- package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
- package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
- package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
- package/dist/repo/shared_libs/plays/static-pipeline.ts +56 -3
- package/dist/repo/shared_libs/temporal/constants.ts +38 -0
- package/package.json +1 -1
|
@@ -6,10 +6,16 @@ import type {
|
|
|
6
6
|
PlayRuntimeManifestMap,
|
|
7
7
|
} from '../../../shared_libs/plays/compiler-manifest';
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import {
|
|
10
|
+
PLAY_SUBMIT_INPUT_INLINE_MAX_BYTES,
|
|
11
|
+
PLAY_SUBMIT_INPUT_MAX_BYTES,
|
|
12
|
+
} from '../../../shared_libs/play-runtime/submit-limits';
|
|
13
|
+
|
|
14
|
+
export const WORKFLOW_RETRY_STATE_TARGET_BYTES =
|
|
15
|
+
PLAY_SUBMIT_INPUT_INLINE_MAX_BYTES;
|
|
10
16
|
export const WORKFLOW_RETRY_PARAMS_EXTERNALIZE_AFTER_BYTES =
|
|
11
17
|
WORKFLOW_RETRY_STATE_TARGET_BYTES;
|
|
12
|
-
export const WORKFLOW_RETRY_PARAMS_MAX_BYTES =
|
|
18
|
+
export const WORKFLOW_RETRY_PARAMS_MAX_BYTES = PLAY_SUBMIT_INPUT_MAX_BYTES;
|
|
13
19
|
|
|
14
20
|
export type WorkflowRetryParamsRef = {
|
|
15
21
|
storageKind: 'r2';
|
|
@@ -642,6 +642,7 @@ export class DeeplineClient {
|
|
|
642
642
|
categories?: string;
|
|
643
643
|
grep?: string;
|
|
644
644
|
grepMode?: 'all' | 'any' | 'phrase';
|
|
645
|
+
compact?: boolean;
|
|
645
646
|
}): Promise<ToolDefinition[]> {
|
|
646
647
|
const params = new URLSearchParams();
|
|
647
648
|
if (options?.categories?.trim()) {
|
|
@@ -651,6 +652,7 @@ export class DeeplineClient {
|
|
|
651
652
|
params.set('grep', options.grep.trim());
|
|
652
653
|
params.set('grep_mode', options.grepMode ?? 'all');
|
|
653
654
|
}
|
|
655
|
+
params.set('compact', options?.compact === true ? 'true' : 'false');
|
|
654
656
|
const suffix = params.toString() ? `?${params.toString()}` : '';
|
|
655
657
|
const res = await this.http.get<{ tools: ToolDefinition[] }>(
|
|
656
658
|
`/api/v2/tools${suffix}`,
|
|
@@ -1433,6 +1435,7 @@ export class DeeplineClient {
|
|
|
1433
1435
|
if (status) {
|
|
1434
1436
|
params.set('status', status);
|
|
1435
1437
|
}
|
|
1438
|
+
params.set('compact', 'true');
|
|
1436
1439
|
const response = await this.http.get<{ runs: PlayRunListItem[] }>(
|
|
1437
1440
|
`/api/v2/runs?${params.toString()}`,
|
|
1438
1441
|
);
|
|
@@ -1636,10 +1639,14 @@ export class DeeplineClient {
|
|
|
1636
1639
|
* @param name - Play name
|
|
1637
1640
|
* @returns Version list (newest first)
|
|
1638
1641
|
*/
|
|
1639
|
-
async listPlayVersions(
|
|
1642
|
+
async listPlayVersions(
|
|
1643
|
+
name: string,
|
|
1644
|
+
options?: { full?: boolean },
|
|
1645
|
+
): Promise<PlayRevisionSummary[]> {
|
|
1640
1646
|
const encodedName = encodeURIComponent(name);
|
|
1647
|
+
const suffix = options?.full ? '?full=true' : '';
|
|
1641
1648
|
const response = await this.http.get<{ versions: PlayRevisionSummary[] }>(
|
|
1642
|
-
`/api/v2/plays/${encodedName}/versions`,
|
|
1649
|
+
`/api/v2/plays/${encodedName}/versions${suffix}`,
|
|
1643
1650
|
);
|
|
1644
1651
|
return response.versions ?? [];
|
|
1645
1652
|
}
|
|
@@ -50,10 +50,10 @@ export type SdkRelease = {
|
|
|
50
50
|
};
|
|
51
51
|
|
|
52
52
|
export const SDK_RELEASE = {
|
|
53
|
-
version: '0.1.
|
|
53
|
+
version: '0.1.80',
|
|
54
54
|
apiContract: '2026-06-dataset-column-cell-stale-hard-cutover',
|
|
55
55
|
supportPolicy: {
|
|
56
|
-
latest: '0.1.
|
|
56
|
+
latest: '0.1.80',
|
|
57
57
|
minimumSupported: '0.1.53',
|
|
58
58
|
deprecatedBelow: '0.1.53',
|
|
59
59
|
},
|
|
@@ -131,6 +131,10 @@ export interface ToolDefinition {
|
|
|
131
131
|
operationId?: string;
|
|
132
132
|
/** Alternative names that resolve to this tool. */
|
|
133
133
|
operationAliases?: string[];
|
|
134
|
+
/** Whether detailed input schema is available from `tools describe`. */
|
|
135
|
+
hasInputSchema?: boolean;
|
|
136
|
+
/** Whether detailed output schema is available from `tools describe`. */
|
|
137
|
+
hasOutputSchema?: boolean;
|
|
134
138
|
/** JSON Schema describing the tool's input parameters. */
|
|
135
139
|
inputSchema?: Record<string, unknown>;
|
|
136
140
|
/** JSON Schema describing the tool's output shape. */
|
|
@@ -661,6 +665,7 @@ export interface PlayListItem {
|
|
|
661
665
|
currentPublishedVersion?: number | null;
|
|
662
666
|
tableNamespace?: string | null;
|
|
663
667
|
isDraftDirty?: boolean;
|
|
668
|
+
hasInputSchema?: boolean;
|
|
664
669
|
inputSchema?: Record<string, unknown> | null;
|
|
665
670
|
outputSchema?: Record<string, unknown> | null;
|
|
666
671
|
staticPipeline?: unknown;
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import {
|
|
2
|
+
noopPacingPermit,
|
|
3
|
+
type PacingPermit,
|
|
4
|
+
type PacingRule,
|
|
5
|
+
type RateStateBackend,
|
|
6
|
+
} from './rate-state-backend';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Distributed Rate State Backend for the `esm_workers` substrate.
|
|
10
|
+
*
|
|
11
|
+
* On Cloudflare a single play run fans child plays and map rows across many
|
|
12
|
+
* V8 isolates, so the per-`(org, provider)` request window cannot be
|
|
13
|
+
* process-local — each isolate would otherwise pace against its own private
|
|
14
|
+
* counter and the org could blow past a provider's real limit by the number of
|
|
15
|
+
* isolates. This backend makes the window GLOBAL by RPCing the coordinator
|
|
16
|
+
* Durable Object addressed per bucket (`idFromName('rate:<orgId>:<provider>')`).
|
|
17
|
+
* The DO is single-threaded, so it runs the same sliding-window algorithm as
|
|
18
|
+
* `InMemoryRateStateBackend` correctly for all isolates at once.
|
|
19
|
+
*
|
|
20
|
+
* Latency: a full DO round-trip on every outbound tool call would tax the
|
|
21
|
+
* hello-world latency baseline. Instead the backend LEASES SMALL PERMIT BLOCKS:
|
|
22
|
+
* one `/rate-acquire` round-trip debits up to {@link LEASE_BLOCK_SIZE} permits
|
|
23
|
+
* from the global window, and subsequent acquires draw from the local block
|
|
24
|
+
* until it is exhausted or its short TTL expires. This bounds round-trips to
|
|
25
|
+
* roughly `calls / LEASE_BLOCK_SIZE` while keeping over-issuance bounded by one
|
|
26
|
+
* block per isolate per window.
|
|
27
|
+
*
|
|
28
|
+
* Fail-open: if the coordinator is unreachable the backend logs once and
|
|
29
|
+
* PROCEEDS (grants the permit) rather than stalling the run, matching the
|
|
30
|
+
* semantics of `src/lib/redis/customer-rate-limiter.ts` — a degraded limiter
|
|
31
|
+
* must never become an availability outage. The Governor's global
|
|
32
|
+
* tool-concurrency semaphore remains the unconditional backstop.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
/** Permits leased per round-trip. Tuned to amortize the DO hop, not to batch. */
|
|
36
|
+
const LEASE_BLOCK_SIZE = 16;
|
|
37
|
+
/**
|
|
38
|
+
* Max age of a leased block. A leased permit debited the global window already,
|
|
39
|
+
* but if it is held past roughly one window it could let an isolate run ahead
|
|
40
|
+
* of a rolled-over window. Discarding stale blocks bounds that to sub-window.
|
|
41
|
+
*/
|
|
42
|
+
const LEASE_BLOCK_TTL_MS = 250;
|
|
43
|
+
/** Cap on how long the backend will park waiting on a saturated window. */
|
|
44
|
+
const MAX_ACQUIRE_WAIT_MS = 5_000;
|
|
45
|
+
|
|
46
|
+
export interface CoordinatorRatePort {
|
|
47
|
+
/**
|
|
48
|
+
* Lease up to `requested` request-window permits for `bucketId` under all
|
|
49
|
+
* `rules` from the coordinator DO. Returns how many were `granted` (0 when the
|
|
50
|
+
* window is saturated) and a `waitMs` hint before retrying.
|
|
51
|
+
*/
|
|
52
|
+
rateAcquire(input: {
|
|
53
|
+
bucketId: string;
|
|
54
|
+
rules: PacingRule[];
|
|
55
|
+
requested: number;
|
|
56
|
+
}): Promise<{ granted: number; waitMs: number }>;
|
|
57
|
+
/** Feed a Retry-After cooldown back into the global bucket. */
|
|
58
|
+
ratePenalize(input: { bucketId: string; cooldownMs: number }): Promise<void>;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
interface LeasedBlock {
|
|
62
|
+
remaining: number;
|
|
63
|
+
expiresAt: number;
|
|
64
|
+
/** Stable signature of the rules this block was leased under. */
|
|
65
|
+
rulesKey: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
interface Options {
|
|
69
|
+
now?: () => number;
|
|
70
|
+
sleep?: (ms: number) => Promise<void>;
|
|
71
|
+
onDegraded?: (info: { bucketId: string; error: string }) => void;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function rulesSignature(rules: readonly PacingRule[]): string {
|
|
75
|
+
return [...rules]
|
|
76
|
+
.map(
|
|
77
|
+
(rule) =>
|
|
78
|
+
`${rule.ruleId}:${rule.requestsPerWindow}:${rule.windowMs}:${rule.maxConcurrency ?? ''}`,
|
|
79
|
+
)
|
|
80
|
+
.sort()
|
|
81
|
+
.join('|');
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export class CoordinatorRateStateBackend implements RateStateBackend {
|
|
85
|
+
private readonly port: CoordinatorRatePort;
|
|
86
|
+
private readonly now: () => number;
|
|
87
|
+
private readonly sleep: (ms: number) => Promise<void>;
|
|
88
|
+
private readonly onDegraded: (info: {
|
|
89
|
+
bucketId: string;
|
|
90
|
+
error: string;
|
|
91
|
+
}) => void;
|
|
92
|
+
private readonly blocks = new Map<string, LeasedBlock>();
|
|
93
|
+
private degradedLogged = false;
|
|
94
|
+
|
|
95
|
+
constructor(port: CoordinatorRatePort, options: Options = {}) {
|
|
96
|
+
this.port = port;
|
|
97
|
+
this.now = options.now ?? (() => Date.now());
|
|
98
|
+
this.sleep =
|
|
99
|
+
options.sleep ??
|
|
100
|
+
((ms: number) => new Promise((resolve) => setTimeout(resolve, ms)));
|
|
101
|
+
this.onDegraded =
|
|
102
|
+
options.onDegraded ??
|
|
103
|
+
((info) => {
|
|
104
|
+
if (this.degradedLogged) return;
|
|
105
|
+
this.degradedLogged = true;
|
|
106
|
+
console.warn('[coordinator-rate-state] acquire failed open', info);
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async acquire(input: {
|
|
111
|
+
bucketId: string;
|
|
112
|
+
rules: readonly PacingRule[];
|
|
113
|
+
signal?: AbortSignal;
|
|
114
|
+
}): Promise<PacingPermit> {
|
|
115
|
+
const { bucketId, rules, signal } = input;
|
|
116
|
+
if (rules.length === 0) {
|
|
117
|
+
return noopPacingPermit();
|
|
118
|
+
}
|
|
119
|
+
const rulesKey = rulesSignature(rules);
|
|
120
|
+
|
|
121
|
+
// Draw from a still-valid local block first — no round-trip.
|
|
122
|
+
if (this.drawFromBlock(bucketId, rulesKey)) {
|
|
123
|
+
return noopPacingPermit();
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const waitStartedAt = this.now();
|
|
127
|
+
while (true) {
|
|
128
|
+
if (signal?.aborted) {
|
|
129
|
+
throw signal.reason instanceof Error
|
|
130
|
+
? signal.reason
|
|
131
|
+
: new Error('Rate-state acquire aborted.');
|
|
132
|
+
}
|
|
133
|
+
let response: { granted: number; waitMs: number };
|
|
134
|
+
try {
|
|
135
|
+
response = await this.port.rateAcquire({
|
|
136
|
+
bucketId,
|
|
137
|
+
rules: [...rules],
|
|
138
|
+
requested: LEASE_BLOCK_SIZE,
|
|
139
|
+
});
|
|
140
|
+
} catch (error) {
|
|
141
|
+
// Fail open: a degraded coordinator must not stall the run.
|
|
142
|
+
this.onDegraded({
|
|
143
|
+
bucketId,
|
|
144
|
+
error: error instanceof Error ? error.message : String(error),
|
|
145
|
+
});
|
|
146
|
+
return noopPacingPermit();
|
|
147
|
+
}
|
|
148
|
+
if (response.granted > 0) {
|
|
149
|
+
// Consume one for this call; cache the rest as a short-lived block.
|
|
150
|
+
const remaining = response.granted - 1;
|
|
151
|
+
if (remaining > 0) {
|
|
152
|
+
this.mergeBlock(bucketId, remaining, rulesKey);
|
|
153
|
+
}
|
|
154
|
+
return noopPacingPermit();
|
|
155
|
+
}
|
|
156
|
+
// Window saturated. Park for the hint, then re-acquire. Cap total wait so
|
|
157
|
+
// a stuck bucket surfaces through the Governor's wall-clock guard instead
|
|
158
|
+
// of hanging forever.
|
|
159
|
+
if (this.now() - waitStartedAt >= MAX_ACQUIRE_WAIT_MS) {
|
|
160
|
+
return noopPacingPermit();
|
|
161
|
+
}
|
|
162
|
+
const waitMs = Math.max(1, Math.min(response.waitMs, MAX_ACQUIRE_WAIT_MS));
|
|
163
|
+
await this.sleep(waitMs);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
penalize(input: { bucketId: string; cooldownMs: number }): void {
|
|
168
|
+
if (input.cooldownMs <= 0) return;
|
|
169
|
+
// Drop any cached block for this bucket so the cooldown takes effect on the
|
|
170
|
+
// very next acquire instead of being masked by already-leased permits.
|
|
171
|
+
this.blocks.delete(input.bucketId);
|
|
172
|
+
void this.port
|
|
173
|
+
.ratePenalize({
|
|
174
|
+
bucketId: input.bucketId,
|
|
175
|
+
cooldownMs: input.cooldownMs,
|
|
176
|
+
})
|
|
177
|
+
.catch((error) => {
|
|
178
|
+
this.onDegraded({
|
|
179
|
+
bucketId: input.bucketId,
|
|
180
|
+
error: error instanceof Error ? error.message : String(error),
|
|
181
|
+
});
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Add freshly-leased permits to the bucket's block instead of overwriting it.
|
|
187
|
+
* Two concurrent acquires can both miss the local block and both round-trip;
|
|
188
|
+
* each debited the global window, so the DO already issued both blocks'
|
|
189
|
+
* permits. Overwriting would drop one set — under-issuance that wastes window
|
|
190
|
+
* capacity and over-throttles. Merging preserves every debited permit:
|
|
191
|
+
* - same rulesKey + still valid → sum remaining, keep the earlier expiry so
|
|
192
|
+
* the merged block never outlives the older lease's sub-window bound.
|
|
193
|
+
* - missing / stale / different rules → start fresh from this lease.
|
|
194
|
+
*/
|
|
195
|
+
private mergeBlock(
|
|
196
|
+
bucketId: string,
|
|
197
|
+
remaining: number,
|
|
198
|
+
rulesKey: string,
|
|
199
|
+
): void {
|
|
200
|
+
const freshExpiresAt = this.now() + LEASE_BLOCK_TTL_MS;
|
|
201
|
+
const existing = this.blocks.get(bucketId);
|
|
202
|
+
if (
|
|
203
|
+
existing &&
|
|
204
|
+
existing.rulesKey === rulesKey &&
|
|
205
|
+
existing.expiresAt > this.now()
|
|
206
|
+
) {
|
|
207
|
+
existing.remaining += remaining;
|
|
208
|
+
existing.expiresAt = Math.min(existing.expiresAt, freshExpiresAt);
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
this.blocks.set(bucketId, {
|
|
212
|
+
remaining,
|
|
213
|
+
expiresAt: freshExpiresAt,
|
|
214
|
+
rulesKey,
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
private drawFromBlock(bucketId: string, rulesKey: string): boolean {
|
|
219
|
+
const block = this.blocks.get(bucketId);
|
|
220
|
+
if (!block) return false;
|
|
221
|
+
if (block.rulesKey !== rulesKey || block.expiresAt <= this.now()) {
|
|
222
|
+
this.blocks.delete(bucketId);
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
block.remaining -= 1;
|
|
226
|
+
if (block.remaining <= 0) {
|
|
227
|
+
this.blocks.delete(bucketId);
|
|
228
|
+
}
|
|
229
|
+
return true;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Play Execution Governor — the deep module that owns execution policy.
|
|
3
|
+
*
|
|
4
|
+
* Both runner Adapters (`cjs_node20`, `esm_workers`) gate their work through one
|
|
5
|
+
* Governor instance per run-attempt so the substrates cannot diverge on
|
|
6
|
+
* concurrency, budgets, or pacing. Adapters keep only substrate mechanism (how
|
|
7
|
+
* to resolve a row, batch a tool call, or submit a child); the Governor owns the
|
|
8
|
+
* "may I, and how many at once" policy. See ADR 0007 + CONTEXT.md.
|
|
9
|
+
*
|
|
10
|
+
* Surface (small, by design):
|
|
11
|
+
* - acquireRowSlot / acquireChildPlaySlot / acquireToolSlot → blocking leases
|
|
12
|
+
* - chargeBudget → throws on breach
|
|
13
|
+
* - forkChild → child lineage snapshot
|
|
14
|
+
* - resolveRowConcurrency / reportProviderBackpressure / snapshot
|
|
15
|
+
*/
|
|
16
|
+
import {
|
|
17
|
+
type AdapterId,
|
|
18
|
+
type ResolvedExecutionPolicy,
|
|
19
|
+
resolveExecutionPolicy,
|
|
20
|
+
resolveRowConcurrency,
|
|
21
|
+
} from './policy';
|
|
22
|
+
import {
|
|
23
|
+
noopPacingPermit,
|
|
24
|
+
type PacingRule,
|
|
25
|
+
type RateStateBackend,
|
|
26
|
+
} from './rate-state-backend';
|
|
27
|
+
|
|
28
|
+
export interface WorkLease {
|
|
29
|
+
/** Free the slot / pacing permit. Idempotent. MUST be called in a finally. */
|
|
30
|
+
release(): void;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export type BudgetKind =
|
|
34
|
+
| 'playCall'
|
|
35
|
+
| 'toolCall'
|
|
36
|
+
| 'retry'
|
|
37
|
+
| 'descendant'
|
|
38
|
+
| 'waterfallStep';
|
|
39
|
+
|
|
40
|
+
/** Counters that accumulate down the dispatch lineage; thread via the snapshot. */
|
|
41
|
+
export interface GovernanceSnapshot {
|
|
42
|
+
rootRunId: string;
|
|
43
|
+
currentRunId: string;
|
|
44
|
+
currentPlayId: string;
|
|
45
|
+
ancestryPlayIds: string[];
|
|
46
|
+
ancestryRunIds: string[];
|
|
47
|
+
callDepth: number;
|
|
48
|
+
playCallCount: number;
|
|
49
|
+
toolCallCount: number;
|
|
50
|
+
retryCount: number;
|
|
51
|
+
descendantCount: number;
|
|
52
|
+
waterfallStepExecutions: number;
|
|
53
|
+
/** Direct child plays launched, keyed by parent play id. */
|
|
54
|
+
parentChildCalls: Record<string, number>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export class GovernorBudgetError extends Error {
|
|
58
|
+
constructor(
|
|
59
|
+
readonly budget: BudgetKind | 'playDepth' | 'childPerParent',
|
|
60
|
+
readonly observed: number,
|
|
61
|
+
readonly limit: number,
|
|
62
|
+
) {
|
|
63
|
+
const message =
|
|
64
|
+
budget === 'playDepth'
|
|
65
|
+
? `Play-call depth exceeded (${observed}/${limit}).`
|
|
66
|
+
: `Play execution ${budget} budget exceeded (${observed}/${limit}).`;
|
|
67
|
+
super(message);
|
|
68
|
+
this.name = 'GovernorBudgetError';
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Maps a toolId to its provider + resolved pacing rules (from rate-limit defs). */
|
|
73
|
+
export type PacingResolver = (
|
|
74
|
+
toolId: string,
|
|
75
|
+
) => Promise<{ provider: string; rules: PacingRule[] } | null>;
|
|
76
|
+
|
|
77
|
+
export interface PlayExecutionGovernor {
|
|
78
|
+
readonly adapter: AdapterId;
|
|
79
|
+
readonly policy: ResolvedExecutionPolicy;
|
|
80
|
+
|
|
81
|
+
/** Block until a map-row slot is free. */
|
|
82
|
+
acquireRowSlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
|
|
83
|
+
/** Block until a child-play slot is free. */
|
|
84
|
+
acquireChildPlaySlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
|
|
85
|
+
/**
|
|
86
|
+
* Block until a global tool-concurrency slot AND the per-(org,provider) pacer
|
|
87
|
+
* permit are free, then charge the tool-call budget and return a lease. Order:
|
|
88
|
+
* concurrency slot → provider pace → tool budget (charged last so a
|
|
89
|
+
* failed/aborted acquire never consumes budget). A run over tool budget still
|
|
90
|
+
* acquires and holds a slot + pacing permit before the breach is detected; the
|
|
91
|
+
* breach surfaces only once the call is otherwise cleared to run.
|
|
92
|
+
*/
|
|
93
|
+
acquireToolSlot(
|
|
94
|
+
toolId: string,
|
|
95
|
+
opts?: { signal?: AbortSignal },
|
|
96
|
+
): Promise<WorkLease>;
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Suggested batch parallelism for a tool: the provider's own rate hints
|
|
100
|
+
* tightened to the policy's suggested ceiling. No hints → the fallback.
|
|
101
|
+
*/
|
|
102
|
+
suggestedParallelism(toolId: string, fallback: number): Promise<number>;
|
|
103
|
+
|
|
104
|
+
/** Increment a monotonic budget counter; throws GovernorBudgetError on breach. */
|
|
105
|
+
chargeBudget(kind: BudgetKind, amount?: number): void;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Reserve depth + per-parent + descendant budget for a child play and return
|
|
109
|
+
* the snapshot to thread into the child run so budgets accumulate across the
|
|
110
|
+
* lineage (and across isolates on `esm_workers`). Throws on breach.
|
|
111
|
+
*
|
|
112
|
+
* Unlike {@link acquireToolSlot} (which charges last so an aborted acquire
|
|
113
|
+
* never consumes budget), child-lineage counters are charged here at fork
|
|
114
|
+
* time, BEFORE the caller acquires a child-play slot. A slot acquire that then
|
|
115
|
+
* fails (e.g. abort) does NOT refund these counters. This is intentional and
|
|
116
|
+
* safe given the 100k child caps: the charge reserves lineage-global capacity
|
|
117
|
+
* for a launch the caller has committed to, and forkChild must return the
|
|
118
|
+
* threaded snapshot synchronously, so the charge cannot be deferred behind the
|
|
119
|
+
* async slot acquire.
|
|
120
|
+
*/
|
|
121
|
+
forkChild(input: {
|
|
122
|
+
childPlayName: string;
|
|
123
|
+
childRunId: string;
|
|
124
|
+
}): GovernanceSnapshot;
|
|
125
|
+
|
|
126
|
+
/** Effective row concurrency: explicit request clamped to [1, rowMax], else default. */
|
|
127
|
+
resolveRowConcurrency(requested?: number): number;
|
|
128
|
+
|
|
129
|
+
/** Feed a provider's Retry-After back into the shared pacer. */
|
|
130
|
+
reportProviderBackpressure(input: {
|
|
131
|
+
provider: string;
|
|
132
|
+
retryAfterMs: number;
|
|
133
|
+
}): void;
|
|
134
|
+
|
|
135
|
+
snapshot(): GovernanceSnapshot;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
interface GovernorInput {
|
|
139
|
+
adapter: AdapterId;
|
|
140
|
+
scope: { orgId: string; rootRunId: string };
|
|
141
|
+
rateState: RateStateBackend;
|
|
142
|
+
resolvePacing: PacingResolver;
|
|
143
|
+
resume?: GovernanceSnapshot;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
class Semaphore {
|
|
147
|
+
private inFlight = 0;
|
|
148
|
+
private readonly waiters: Array<() => void> = [];
|
|
149
|
+
constructor(private readonly limit: number) {}
|
|
150
|
+
|
|
151
|
+
async acquire(signal?: AbortSignal): Promise<WorkLease> {
|
|
152
|
+
// Fail fast on an already-aborted signal: the parked-promise abort listener
|
|
153
|
+
// below registers with { once: true } and never fires for a signal that was
|
|
154
|
+
// aborted before we parked, so without this check a full pool would block
|
|
155
|
+
// the waiter until a slot frees (or forever if it never drains).
|
|
156
|
+
if (signal?.aborted) {
|
|
157
|
+
throw signal.reason instanceof Error
|
|
158
|
+
? signal.reason
|
|
159
|
+
: new Error('Slot acquire aborted.');
|
|
160
|
+
}
|
|
161
|
+
while (this.inFlight >= this.limit) {
|
|
162
|
+
await new Promise<void>((resolve, reject) => {
|
|
163
|
+
const onResolve = () => {
|
|
164
|
+
signal?.removeEventListener('abort', onAbort);
|
|
165
|
+
resolve();
|
|
166
|
+
};
|
|
167
|
+
const onAbort = () => {
|
|
168
|
+
const idx = this.waiters.indexOf(onResolve);
|
|
169
|
+
if (idx >= 0) this.waiters.splice(idx, 1);
|
|
170
|
+
reject(
|
|
171
|
+
signal?.reason instanceof Error
|
|
172
|
+
? signal.reason
|
|
173
|
+
: new Error('Slot acquire aborted.'),
|
|
174
|
+
);
|
|
175
|
+
};
|
|
176
|
+
this.waiters.push(onResolve);
|
|
177
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
178
|
+
});
|
|
179
|
+
if (signal?.aborted) {
|
|
180
|
+
throw signal.reason instanceof Error
|
|
181
|
+
? signal.reason
|
|
182
|
+
: new Error('Slot acquire aborted.');
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
this.inFlight += 1;
|
|
186
|
+
let released = false;
|
|
187
|
+
return {
|
|
188
|
+
release: () => {
|
|
189
|
+
if (released) return;
|
|
190
|
+
released = true;
|
|
191
|
+
this.inFlight = Math.max(0, this.inFlight - 1);
|
|
192
|
+
this.waiters.shift()?.();
|
|
193
|
+
},
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function createDefaultGovernanceSnapshot(scope: {
|
|
199
|
+
orgId: string;
|
|
200
|
+
rootRunId: string;
|
|
201
|
+
rootPlayId?: string;
|
|
202
|
+
}): GovernanceSnapshot {
|
|
203
|
+
return {
|
|
204
|
+
rootRunId: scope.rootRunId,
|
|
205
|
+
currentRunId: scope.rootRunId,
|
|
206
|
+
currentPlayId: scope.rootPlayId ?? scope.rootRunId,
|
|
207
|
+
ancestryPlayIds: scope.rootPlayId ? [scope.rootPlayId] : [],
|
|
208
|
+
ancestryRunIds: [scope.rootRunId],
|
|
209
|
+
callDepth: scope.rootPlayId ? 1 : 0,
|
|
210
|
+
playCallCount: 0,
|
|
211
|
+
toolCallCount: 0,
|
|
212
|
+
retryCount: 0,
|
|
213
|
+
descendantCount: 0,
|
|
214
|
+
waterfallStepExecutions: 0,
|
|
215
|
+
parentChildCalls: {},
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export function createPlayExecutionGovernor(
|
|
220
|
+
input: GovernorInput,
|
|
221
|
+
): PlayExecutionGovernor {
|
|
222
|
+
const policy = resolveExecutionPolicy(input.adapter);
|
|
223
|
+
const state: GovernanceSnapshot =
|
|
224
|
+
input.resume ?? createDefaultGovernanceSnapshot(input.scope);
|
|
225
|
+
|
|
226
|
+
const rowSlots = new Semaphore(policy.concurrency.rowMax);
|
|
227
|
+
const childPlaySlots = new Semaphore(policy.concurrency.childPlay);
|
|
228
|
+
const toolSlots = new Semaphore(policy.concurrency.toolCalls);
|
|
229
|
+
|
|
230
|
+
const bucketId = (provider: string) => `${input.scope.orgId}:${provider}`;
|
|
231
|
+
|
|
232
|
+
function chargeBudget(kind: BudgetKind, amount = 1): void {
|
|
233
|
+
switch (kind) {
|
|
234
|
+
case 'playCall':
|
|
235
|
+
state.playCallCount += amount;
|
|
236
|
+
if (state.playCallCount > policy.budgets.maxPlayCallCount)
|
|
237
|
+
throw new GovernorBudgetError('playCall', state.playCallCount, policy.budgets.maxPlayCallCount);
|
|
238
|
+
return;
|
|
239
|
+
case 'toolCall':
|
|
240
|
+
state.toolCallCount += amount;
|
|
241
|
+
if (state.toolCallCount > policy.budgets.maxToolCallCount)
|
|
242
|
+
throw new GovernorBudgetError('toolCall', state.toolCallCount, policy.budgets.maxToolCallCount);
|
|
243
|
+
return;
|
|
244
|
+
case 'retry':
|
|
245
|
+
state.retryCount += amount;
|
|
246
|
+
if (state.retryCount > policy.budgets.maxRetryCount)
|
|
247
|
+
throw new GovernorBudgetError('retry', state.retryCount, policy.budgets.maxRetryCount);
|
|
248
|
+
return;
|
|
249
|
+
case 'descendant':
|
|
250
|
+
state.descendantCount += amount;
|
|
251
|
+
if (state.descendantCount > policy.budgets.maxDescendants)
|
|
252
|
+
throw new GovernorBudgetError('descendant', state.descendantCount, policy.budgets.maxDescendants);
|
|
253
|
+
return;
|
|
254
|
+
case 'waterfallStep':
|
|
255
|
+
state.waterfallStepExecutions += amount;
|
|
256
|
+
if (state.waterfallStepExecutions > policy.budgets.maxWaterfallStepExecutions)
|
|
257
|
+
throw new GovernorBudgetError('waterfallStep', state.waterfallStepExecutions, policy.budgets.maxWaterfallStepExecutions);
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
adapter: input.adapter,
|
|
264
|
+
policy,
|
|
265
|
+
|
|
266
|
+
acquireRowSlot: (opts) => rowSlots.acquire(opts?.signal),
|
|
267
|
+
acquireChildPlaySlot: (opts) => childPlaySlots.acquire(opts?.signal),
|
|
268
|
+
|
|
269
|
+
async acquireToolSlot(toolId, opts) {
|
|
270
|
+
// 1. global tool-concurrency slot.
|
|
271
|
+
const slot = await toolSlots.acquire(opts?.signal);
|
|
272
|
+
// 2. per-(org,provider) pacing. The provider comes from the pacing
|
|
273
|
+
// resolver, so callers only need the toolId. No rules → no pacing.
|
|
274
|
+
let permit: { release(): void };
|
|
275
|
+
try {
|
|
276
|
+
const pacing = await input.resolvePacing(toolId);
|
|
277
|
+
permit =
|
|
278
|
+
pacing && pacing.rules.length > 0
|
|
279
|
+
? await input.rateState.acquire({
|
|
280
|
+
bucketId: bucketId(pacing.provider),
|
|
281
|
+
rules: pacing.rules,
|
|
282
|
+
signal: opts?.signal,
|
|
283
|
+
})
|
|
284
|
+
: noopPacingPermit();
|
|
285
|
+
} catch (error) {
|
|
286
|
+
slot.release();
|
|
287
|
+
throw error;
|
|
288
|
+
}
|
|
289
|
+
// 3. charge the budget only once the call is actually cleared to run, so a
|
|
290
|
+
// failed/aborted acquisition never permanently consumes tool budget.
|
|
291
|
+
try {
|
|
292
|
+
chargeBudget('toolCall');
|
|
293
|
+
} catch (error) {
|
|
294
|
+
permit.release();
|
|
295
|
+
slot.release();
|
|
296
|
+
throw error;
|
|
297
|
+
}
|
|
298
|
+
let released = false;
|
|
299
|
+
return {
|
|
300
|
+
release: () => {
|
|
301
|
+
if (released) return;
|
|
302
|
+
released = true;
|
|
303
|
+
permit.release();
|
|
304
|
+
slot.release();
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
},
|
|
308
|
+
|
|
309
|
+
async suggestedParallelism(toolId, fallback) {
|
|
310
|
+
const pacing = await input.resolvePacing(toolId);
|
|
311
|
+
if (!pacing || pacing.rules.length === 0) return fallback;
|
|
312
|
+
const limits = pacing.rules.flatMap((rule) =>
|
|
313
|
+
rule.maxConcurrency != null
|
|
314
|
+
? [rule.requestsPerWindow, rule.maxConcurrency]
|
|
315
|
+
: [rule.requestsPerWindow],
|
|
316
|
+
);
|
|
317
|
+
return Math.max(
|
|
318
|
+
1,
|
|
319
|
+
Math.min(policy.pacing.suggestedMaxParallelism, ...limits),
|
|
320
|
+
);
|
|
321
|
+
},
|
|
322
|
+
|
|
323
|
+
chargeBudget,
|
|
324
|
+
|
|
325
|
+
forkChild(childInput) {
|
|
326
|
+
if (state.ancestryPlayIds.includes(childInput.childPlayName)) {
|
|
327
|
+
const chain = [...state.ancestryPlayIds, childInput.childPlayName].join(' -> ');
|
|
328
|
+
throw new Error(`Recursive play graph detected: ${chain}`);
|
|
329
|
+
}
|
|
330
|
+
const nextDepth = state.callDepth + 1;
|
|
331
|
+
if (nextDepth > policy.budgets.maxPlayCallDepth)
|
|
332
|
+
throw new GovernorBudgetError('playDepth', nextDepth, policy.budgets.maxPlayCallDepth);
|
|
333
|
+
const parentKey = state.currentPlayId;
|
|
334
|
+
const nextParent = (state.parentChildCalls[parentKey] ?? 0) + 1;
|
|
335
|
+
if (nextParent > policy.budgets.maxChildPlayCallsPerParent)
|
|
336
|
+
throw new GovernorBudgetError('childPerParent', nextParent, policy.budgets.maxChildPlayCallsPerParent);
|
|
337
|
+
// Charge the run-wide play/descendant budgets on the parent. Charged at
|
|
338
|
+
// fork time (not after the caller's child-play slot acquire) and never
|
|
339
|
+
// refunded if that acquire fails — see the forkChild interface doc.
|
|
340
|
+
chargeBudget('playCall');
|
|
341
|
+
chargeBudget('descendant');
|
|
342
|
+
state.parentChildCalls[parentKey] = nextParent;
|
|
343
|
+
// Child seeds from the parent's accumulated counters → lineage-global budget.
|
|
344
|
+
return {
|
|
345
|
+
rootRunId: state.rootRunId,
|
|
346
|
+
currentRunId: childInput.childRunId,
|
|
347
|
+
currentPlayId: childInput.childPlayName,
|
|
348
|
+
ancestryPlayIds: [...state.ancestryPlayIds, childInput.childPlayName],
|
|
349
|
+
ancestryRunIds: [...state.ancestryRunIds, childInput.childRunId],
|
|
350
|
+
callDepth: nextDepth,
|
|
351
|
+
playCallCount: state.playCallCount,
|
|
352
|
+
toolCallCount: state.toolCallCount,
|
|
353
|
+
retryCount: state.retryCount,
|
|
354
|
+
descendantCount: state.descendantCount,
|
|
355
|
+
waterfallStepExecutions: state.waterfallStepExecutions,
|
|
356
|
+
parentChildCalls: {},
|
|
357
|
+
};
|
|
358
|
+
},
|
|
359
|
+
|
|
360
|
+
resolveRowConcurrency: (requested) => resolveRowConcurrency(policy, requested),
|
|
361
|
+
|
|
362
|
+
reportProviderBackpressure(bp) {
|
|
363
|
+
input.rateState.penalize({
|
|
364
|
+
bucketId: bucketId(bp.provider),
|
|
365
|
+
cooldownMs: bp.retryAfterMs,
|
|
366
|
+
});
|
|
367
|
+
},
|
|
368
|
+
|
|
369
|
+
snapshot: () => ({
|
|
370
|
+
...state,
|
|
371
|
+
ancestryPlayIds: [...state.ancestryPlayIds],
|
|
372
|
+
ancestryRunIds: [...state.ancestryRunIds],
|
|
373
|
+
parentChildCalls: { ...state.parentChildCalls },
|
|
374
|
+
}),
|
|
375
|
+
};
|
|
376
|
+
}
|