deepline 0.1.79 → 0.1.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/cli/index.js +68 -31
  2. package/dist/cli/index.mjs +68 -31
  3. package/dist/index.d.mts +9 -1
  4. package/dist/index.d.ts +9 -1
  5. package/dist/index.js +7 -4
  6. package/dist/index.mjs +7 -4
  7. package/dist/repo/apps/play-runner-workers/src/child-play-await.ts +192 -0
  8. package/dist/repo/apps/play-runner-workers/src/coordinator-entry.ts +1102 -1616
  9. package/dist/repo/apps/play-runner-workers/src/dedup-do.ts +506 -654
  10. package/dist/repo/apps/play-runner-workers/src/entry.ts +896 -354
  11. package/dist/repo/apps/play-runner-workers/src/workflow-retry-state.ts +8 -2
  12. package/dist/repo/sdk/src/client.ts +9 -2
  13. package/dist/repo/sdk/src/release.ts +2 -2
  14. package/dist/repo/sdk/src/types.ts +5 -0
  15. package/dist/repo/shared_libs/play-runtime/governor/coordinator-rate-state-backend.ts +231 -0
  16. package/dist/repo/shared_libs/play-runtime/governor/governor.ts +376 -0
  17. package/dist/repo/shared_libs/play-runtime/governor/policy.ts +179 -0
  18. package/dist/repo/shared_libs/play-runtime/governor/rate-state-backend.ts +87 -0
  19. package/dist/repo/shared_libs/play-runtime/run-failure.ts +12 -0
  20. package/dist/repo/shared_libs/play-runtime/scheduler-backend.ts +24 -0
  21. package/dist/repo/shared_libs/play-runtime/submit-limits.ts +35 -0
  22. package/dist/repo/shared_libs/plays/bundling/index.ts +4 -12
  23. package/dist/repo/shared_libs/plays/bundling/limits.ts +29 -0
  24. package/dist/repo/shared_libs/plays/static-pipeline.ts +56 -3
  25. package/dist/repo/shared_libs/temporal/constants.ts +38 -0
  26. package/package.json +1 -1
@@ -6,10 +6,16 @@ import type {
6
6
  PlayRuntimeManifestMap,
7
7
  } from '../../../shared_libs/plays/compiler-manifest';
8
8
 
9
- export const WORKFLOW_RETRY_STATE_TARGET_BYTES = 100_000;
9
+ import {
10
+ PLAY_SUBMIT_INPUT_INLINE_MAX_BYTES,
11
+ PLAY_SUBMIT_INPUT_MAX_BYTES,
12
+ } from '../../../shared_libs/play-runtime/submit-limits';
13
+
14
+ export const WORKFLOW_RETRY_STATE_TARGET_BYTES =
15
+ PLAY_SUBMIT_INPUT_INLINE_MAX_BYTES;
10
16
  export const WORKFLOW_RETRY_PARAMS_EXTERNALIZE_AFTER_BYTES =
11
17
  WORKFLOW_RETRY_STATE_TARGET_BYTES;
12
- export const WORKFLOW_RETRY_PARAMS_MAX_BYTES = 1024 * 1024;
18
+ export const WORKFLOW_RETRY_PARAMS_MAX_BYTES = PLAY_SUBMIT_INPUT_MAX_BYTES;
13
19
 
14
20
  export type WorkflowRetryParamsRef = {
15
21
  storageKind: 'r2';
@@ -642,6 +642,7 @@ export class DeeplineClient {
642
642
  categories?: string;
643
643
  grep?: string;
644
644
  grepMode?: 'all' | 'any' | 'phrase';
645
+ compact?: boolean;
645
646
  }): Promise<ToolDefinition[]> {
646
647
  const params = new URLSearchParams();
647
648
  if (options?.categories?.trim()) {
@@ -651,6 +652,7 @@ export class DeeplineClient {
651
652
  params.set('grep', options.grep.trim());
652
653
  params.set('grep_mode', options.grepMode ?? 'all');
653
654
  }
655
+ params.set('compact', options?.compact === true ? 'true' : 'false');
654
656
  const suffix = params.toString() ? `?${params.toString()}` : '';
655
657
  const res = await this.http.get<{ tools: ToolDefinition[] }>(
656
658
  `/api/v2/tools${suffix}`,
@@ -1433,6 +1435,7 @@ export class DeeplineClient {
1433
1435
  if (status) {
1434
1436
  params.set('status', status);
1435
1437
  }
1438
+ params.set('compact', 'true');
1436
1439
  const response = await this.http.get<{ runs: PlayRunListItem[] }>(
1437
1440
  `/api/v2/runs?${params.toString()}`,
1438
1441
  );
@@ -1636,10 +1639,14 @@ export class DeeplineClient {
1636
1639
  * @param name - Play name
1637
1640
  * @returns Version list (newest first)
1638
1641
  */
1639
- async listPlayVersions(name: string): Promise<PlayRevisionSummary[]> {
1642
+ async listPlayVersions(
1643
+ name: string,
1644
+ options?: { full?: boolean },
1645
+ ): Promise<PlayRevisionSummary[]> {
1640
1646
  const encodedName = encodeURIComponent(name);
1647
+ const suffix = options?.full ? '?full=true' : '';
1641
1648
  const response = await this.http.get<{ versions: PlayRevisionSummary[] }>(
1642
- `/api/v2/plays/${encodedName}/versions`,
1649
+ `/api/v2/plays/${encodedName}/versions${suffix}`,
1643
1650
  );
1644
1651
  return response.versions ?? [];
1645
1652
  }
@@ -50,10 +50,10 @@ export type SdkRelease = {
50
50
  };
51
51
 
52
52
  export const SDK_RELEASE = {
53
- version: '0.1.79',
53
+ version: '0.1.80',
54
54
  apiContract: '2026-06-dataset-column-cell-stale-hard-cutover',
55
55
  supportPolicy: {
56
- latest: '0.1.79',
56
+ latest: '0.1.80',
57
57
  minimumSupported: '0.1.53',
58
58
  deprecatedBelow: '0.1.53',
59
59
  },
@@ -131,6 +131,10 @@ export interface ToolDefinition {
131
131
  operationId?: string;
132
132
  /** Alternative names that resolve to this tool. */
133
133
  operationAliases?: string[];
134
+ /** Whether detailed input schema is available from `tools describe`. */
135
+ hasInputSchema?: boolean;
136
+ /** Whether detailed output schema is available from `tools describe`. */
137
+ hasOutputSchema?: boolean;
134
138
  /** JSON Schema describing the tool's input parameters. */
135
139
  inputSchema?: Record<string, unknown>;
136
140
  /** JSON Schema describing the tool's output shape. */
@@ -661,6 +665,7 @@ export interface PlayListItem {
661
665
  currentPublishedVersion?: number | null;
662
666
  tableNamespace?: string | null;
663
667
  isDraftDirty?: boolean;
668
+ hasInputSchema?: boolean;
664
669
  inputSchema?: Record<string, unknown> | null;
665
670
  outputSchema?: Record<string, unknown> | null;
666
671
  staticPipeline?: unknown;
@@ -0,0 +1,231 @@
1
+ import {
2
+ noopPacingPermit,
3
+ type PacingPermit,
4
+ type PacingRule,
5
+ type RateStateBackend,
6
+ } from './rate-state-backend';
7
+
8
+ /**
9
+ * Distributed Rate State Backend for the `esm_workers` substrate.
10
+ *
11
+ * On Cloudflare a single play run fans child plays and map rows across many
12
+ * V8 isolates, so the per-`(org, provider)` request window cannot be
13
+ * process-local — each isolate would otherwise pace against its own private
14
+ * counter and the org could blow past a provider's real limit by the number of
15
+ * isolates. This backend makes the window GLOBAL by RPCing the coordinator
16
+ * Durable Object addressed per bucket (`idFromName('rate:<orgId>:<provider>')`).
17
+ * The DO is single-threaded, so it runs the same sliding-window algorithm as
18
+ * `InMemoryRateStateBackend` correctly for all isolates at once.
19
+ *
20
+ * Latency: a full DO round-trip on every outbound tool call would tax the
21
+ * hello-world latency baseline. Instead the backend LEASES SMALL PERMIT BLOCKS:
22
+ * one `/rate-acquire` round-trip debits up to {@link LEASE_BLOCK_SIZE} permits
23
+ * from the global window, and subsequent acquires draw from the local block
24
+ * until it is exhausted or its short TTL expires. This bounds round-trips to
25
+ * roughly `calls / LEASE_BLOCK_SIZE` while keeping over-issuance bounded by one
26
+ * block per isolate per window.
27
+ *
28
+ * Fail-open: if the coordinator is unreachable the backend logs once and
29
+ * PROCEEDS (grants the permit) rather than stalling the run, matching the
30
+ * semantics of `src/lib/redis/customer-rate-limiter.ts` — a degraded limiter
31
+ * must never become an availability outage. The Governor's global
32
+ * tool-concurrency semaphore remains the unconditional backstop.
33
+ */
34
+
35
+ /** Permits leased per round-trip. Tuned to amortize the DO hop, not to batch. */
36
+ const LEASE_BLOCK_SIZE = 16;
37
+ /**
38
+ * Max age of a leased block. A leased permit debited the global window already,
39
+ * but if it is held past roughly one window it could let an isolate run ahead
40
+ * of a rolled-over window. Discarding stale blocks bounds that to sub-window.
41
+ */
42
+ const LEASE_BLOCK_TTL_MS = 250;
43
+ /** Cap on how long the backend will park waiting on a saturated window. */
44
+ const MAX_ACQUIRE_WAIT_MS = 5_000;
45
+
46
+ export interface CoordinatorRatePort {
47
+ /**
48
+ * Lease up to `requested` request-window permits for `bucketId` under all
49
+ * `rules` from the coordinator DO. Returns how many were `granted` (0 when the
50
+ * window is saturated) and a `waitMs` hint before retrying.
51
+ */
52
+ rateAcquire(input: {
53
+ bucketId: string;
54
+ rules: PacingRule[];
55
+ requested: number;
56
+ }): Promise<{ granted: number; waitMs: number }>;
57
+ /** Feed a Retry-After cooldown back into the global bucket. */
58
+ ratePenalize(input: { bucketId: string; cooldownMs: number }): Promise<void>;
59
+ }
60
+
61
+ interface LeasedBlock {
62
+ remaining: number;
63
+ expiresAt: number;
64
+ /** Stable signature of the rules this block was leased under. */
65
+ rulesKey: string;
66
+ }
67
+
68
+ interface Options {
69
+ now?: () => number;
70
+ sleep?: (ms: number) => Promise<void>;
71
+ onDegraded?: (info: { bucketId: string; error: string }) => void;
72
+ }
73
+
74
+ function rulesSignature(rules: readonly PacingRule[]): string {
75
+ return [...rules]
76
+ .map(
77
+ (rule) =>
78
+ `${rule.ruleId}:${rule.requestsPerWindow}:${rule.windowMs}:${rule.maxConcurrency ?? ''}`,
79
+ )
80
+ .sort()
81
+ .join('|');
82
+ }
83
+
84
+ export class CoordinatorRateStateBackend implements RateStateBackend {
85
+ private readonly port: CoordinatorRatePort;
86
+ private readonly now: () => number;
87
+ private readonly sleep: (ms: number) => Promise<void>;
88
+ private readonly onDegraded: (info: {
89
+ bucketId: string;
90
+ error: string;
91
+ }) => void;
92
+ private readonly blocks = new Map<string, LeasedBlock>();
93
+ private degradedLogged = false;
94
+
95
+ constructor(port: CoordinatorRatePort, options: Options = {}) {
96
+ this.port = port;
97
+ this.now = options.now ?? (() => Date.now());
98
+ this.sleep =
99
+ options.sleep ??
100
+ ((ms: number) => new Promise((resolve) => setTimeout(resolve, ms)));
101
+ this.onDegraded =
102
+ options.onDegraded ??
103
+ ((info) => {
104
+ if (this.degradedLogged) return;
105
+ this.degradedLogged = true;
106
+ console.warn('[coordinator-rate-state] acquire failed open', info);
107
+ });
108
+ }
109
+
110
+ async acquire(input: {
111
+ bucketId: string;
112
+ rules: readonly PacingRule[];
113
+ signal?: AbortSignal;
114
+ }): Promise<PacingPermit> {
115
+ const { bucketId, rules, signal } = input;
116
+ if (rules.length === 0) {
117
+ return noopPacingPermit();
118
+ }
119
+ const rulesKey = rulesSignature(rules);
120
+
121
+ // Draw from a still-valid local block first — no round-trip.
122
+ if (this.drawFromBlock(bucketId, rulesKey)) {
123
+ return noopPacingPermit();
124
+ }
125
+
126
+ const waitStartedAt = this.now();
127
+ while (true) {
128
+ if (signal?.aborted) {
129
+ throw signal.reason instanceof Error
130
+ ? signal.reason
131
+ : new Error('Rate-state acquire aborted.');
132
+ }
133
+ let response: { granted: number; waitMs: number };
134
+ try {
135
+ response = await this.port.rateAcquire({
136
+ bucketId,
137
+ rules: [...rules],
138
+ requested: LEASE_BLOCK_SIZE,
139
+ });
140
+ } catch (error) {
141
+ // Fail open: a degraded coordinator must not stall the run.
142
+ this.onDegraded({
143
+ bucketId,
144
+ error: error instanceof Error ? error.message : String(error),
145
+ });
146
+ return noopPacingPermit();
147
+ }
148
+ if (response.granted > 0) {
149
+ // Consume one for this call; cache the rest as a short-lived block.
150
+ const remaining = response.granted - 1;
151
+ if (remaining > 0) {
152
+ this.mergeBlock(bucketId, remaining, rulesKey);
153
+ }
154
+ return noopPacingPermit();
155
+ }
156
+ // Window saturated. Park for the hint, then re-acquire. Cap total wait so
157
+ // a stuck bucket surfaces through the Governor's wall-clock guard instead
158
+ // of hanging forever.
159
+ if (this.now() - waitStartedAt >= MAX_ACQUIRE_WAIT_MS) {
160
+ return noopPacingPermit();
161
+ }
162
+ const waitMs = Math.max(1, Math.min(response.waitMs, MAX_ACQUIRE_WAIT_MS));
163
+ await this.sleep(waitMs);
164
+ }
165
+ }
166
+
167
+ penalize(input: { bucketId: string; cooldownMs: number }): void {
168
+ if (input.cooldownMs <= 0) return;
169
+ // Drop any cached block for this bucket so the cooldown takes effect on the
170
+ // very next acquire instead of being masked by already-leased permits.
171
+ this.blocks.delete(input.bucketId);
172
+ void this.port
173
+ .ratePenalize({
174
+ bucketId: input.bucketId,
175
+ cooldownMs: input.cooldownMs,
176
+ })
177
+ .catch((error) => {
178
+ this.onDegraded({
179
+ bucketId: input.bucketId,
180
+ error: error instanceof Error ? error.message : String(error),
181
+ });
182
+ });
183
+ }
184
+
185
+ /**
186
+ * Add freshly-leased permits to the bucket's block instead of overwriting it.
187
+ * Two concurrent acquires can both miss the local block and both round-trip;
188
+ * each debited the global window, so the DO already issued both blocks'
189
+ * permits. Overwriting would drop one set — under-issuance that wastes window
190
+ * capacity and over-throttles. Merging preserves every debited permit:
191
+ * - same rulesKey + still valid → sum remaining, keep the earlier expiry so
192
+ * the merged block never outlives the older lease's sub-window bound.
193
+ * - missing / stale / different rules → start fresh from this lease.
194
+ */
195
+ private mergeBlock(
196
+ bucketId: string,
197
+ remaining: number,
198
+ rulesKey: string,
199
+ ): void {
200
+ const freshExpiresAt = this.now() + LEASE_BLOCK_TTL_MS;
201
+ const existing = this.blocks.get(bucketId);
202
+ if (
203
+ existing &&
204
+ existing.rulesKey === rulesKey &&
205
+ existing.expiresAt > this.now()
206
+ ) {
207
+ existing.remaining += remaining;
208
+ existing.expiresAt = Math.min(existing.expiresAt, freshExpiresAt);
209
+ return;
210
+ }
211
+ this.blocks.set(bucketId, {
212
+ remaining,
213
+ expiresAt: freshExpiresAt,
214
+ rulesKey,
215
+ });
216
+ }
217
+
218
+ private drawFromBlock(bucketId: string, rulesKey: string): boolean {
219
+ const block = this.blocks.get(bucketId);
220
+ if (!block) return false;
221
+ if (block.rulesKey !== rulesKey || block.expiresAt <= this.now()) {
222
+ this.blocks.delete(bucketId);
223
+ return false;
224
+ }
225
+ block.remaining -= 1;
226
+ if (block.remaining <= 0) {
227
+ this.blocks.delete(bucketId);
228
+ }
229
+ return true;
230
+ }
231
+ }
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Play Execution Governor — the deep module that owns execution policy.
3
+ *
4
+ * Both runner Adapters (`cjs_node20`, `esm_workers`) gate their work through one
5
+ * Governor instance per run-attempt so the substrates cannot diverge on
6
+ * concurrency, budgets, or pacing. Adapters keep only substrate mechanism (how
7
+ * to resolve a row, batch a tool call, or submit a child); the Governor owns the
8
+ * "may I, and how many at once" policy. See ADR 0007 + CONTEXT.md.
9
+ *
10
+ * Surface (small, by design):
11
+ * - acquireRowSlot / acquireChildPlaySlot / acquireToolSlot → blocking leases
12
+ * - chargeBudget → throws on breach
13
+ * - forkChild → child lineage snapshot
14
+ * - resolveRowConcurrency / reportProviderBackpressure / snapshot
15
+ */
16
+ import {
17
+ type AdapterId,
18
+ type ResolvedExecutionPolicy,
19
+ resolveExecutionPolicy,
20
+ resolveRowConcurrency,
21
+ } from './policy';
22
+ import {
23
+ noopPacingPermit,
24
+ type PacingRule,
25
+ type RateStateBackend,
26
+ } from './rate-state-backend';
27
+
28
+ export interface WorkLease {
29
+ /** Free the slot / pacing permit. Idempotent. MUST be called in a finally. */
30
+ release(): void;
31
+ }
32
+
33
+ export type BudgetKind =
34
+ | 'playCall'
35
+ | 'toolCall'
36
+ | 'retry'
37
+ | 'descendant'
38
+ | 'waterfallStep';
39
+
40
+ /** Counters that accumulate down the dispatch lineage; thread via the snapshot. */
41
+ export interface GovernanceSnapshot {
42
+ rootRunId: string;
43
+ currentRunId: string;
44
+ currentPlayId: string;
45
+ ancestryPlayIds: string[];
46
+ ancestryRunIds: string[];
47
+ callDepth: number;
48
+ playCallCount: number;
49
+ toolCallCount: number;
50
+ retryCount: number;
51
+ descendantCount: number;
52
+ waterfallStepExecutions: number;
53
+ /** Direct child plays launched, keyed by parent play id. */
54
+ parentChildCalls: Record<string, number>;
55
+ }
56
+
57
+ export class GovernorBudgetError extends Error {
58
+ constructor(
59
+ readonly budget: BudgetKind | 'playDepth' | 'childPerParent',
60
+ readonly observed: number,
61
+ readonly limit: number,
62
+ ) {
63
+ const message =
64
+ budget === 'playDepth'
65
+ ? `Play-call depth exceeded (${observed}/${limit}).`
66
+ : `Play execution ${budget} budget exceeded (${observed}/${limit}).`;
67
+ super(message);
68
+ this.name = 'GovernorBudgetError';
69
+ }
70
+ }
71
+
72
+ /** Maps a toolId to its provider + resolved pacing rules (from rate-limit defs). */
73
+ export type PacingResolver = (
74
+ toolId: string,
75
+ ) => Promise<{ provider: string; rules: PacingRule[] } | null>;
76
+
77
+ export interface PlayExecutionGovernor {
78
+ readonly adapter: AdapterId;
79
+ readonly policy: ResolvedExecutionPolicy;
80
+
81
+ /** Block until a map-row slot is free. */
82
+ acquireRowSlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
83
+ /** Block until a child-play slot is free. */
84
+ acquireChildPlaySlot(opts?: { signal?: AbortSignal }): Promise<WorkLease>;
85
+ /**
86
+ * Block until a global tool-concurrency slot AND the per-(org,provider) pacer
87
+ * permit are free, then charge the tool-call budget and return a lease. Order:
88
+ * concurrency slot → provider pace → tool budget (charged last so a
89
+ * failed/aborted acquire never consumes budget). A run over tool budget still
90
+ * acquires and holds a slot + pacing permit before the breach is detected; the
91
+ * breach surfaces only once the call is otherwise cleared to run.
92
+ */
93
+ acquireToolSlot(
94
+ toolId: string,
95
+ opts?: { signal?: AbortSignal },
96
+ ): Promise<WorkLease>;
97
+
98
+ /**
99
+ * Suggested batch parallelism for a tool: the provider's own rate hints
100
+ * tightened to the policy's suggested ceiling. No hints → the fallback.
101
+ */
102
+ suggestedParallelism(toolId: string, fallback: number): Promise<number>;
103
+
104
+ /** Increment a monotonic budget counter; throws GovernorBudgetError on breach. */
105
+ chargeBudget(kind: BudgetKind, amount?: number): void;
106
+
107
+ /**
108
+ * Reserve depth + per-parent + descendant budget for a child play and return
109
+ * the snapshot to thread into the child run so budgets accumulate across the
110
+ * lineage (and across isolates on `esm_workers`). Throws on breach.
111
+ *
112
+ * Unlike {@link acquireToolSlot} (which charges last so an aborted acquire
113
+ * never consumes budget), child-lineage counters are charged here at fork
114
+ * time, BEFORE the caller acquires a child-play slot. A slot acquire that then
115
+ * fails (e.g. abort) does NOT refund these counters. This is intentional and
116
+ * safe given the 100k child caps: the charge reserves lineage-global capacity
117
+ * for a launch the caller has committed to, and forkChild must return the
118
+ * threaded snapshot synchronously, so the charge cannot be deferred behind the
119
+ * async slot acquire.
120
+ */
121
+ forkChild(input: {
122
+ childPlayName: string;
123
+ childRunId: string;
124
+ }): GovernanceSnapshot;
125
+
126
+ /** Effective row concurrency: explicit request clamped to [1, rowMax], else default. */
127
+ resolveRowConcurrency(requested?: number): number;
128
+
129
+ /** Feed a provider's Retry-After back into the shared pacer. */
130
+ reportProviderBackpressure(input: {
131
+ provider: string;
132
+ retryAfterMs: number;
133
+ }): void;
134
+
135
+ snapshot(): GovernanceSnapshot;
136
+ }
137
+
138
+ interface GovernorInput {
139
+ adapter: AdapterId;
140
+ scope: { orgId: string; rootRunId: string };
141
+ rateState: RateStateBackend;
142
+ resolvePacing: PacingResolver;
143
+ resume?: GovernanceSnapshot;
144
+ }
145
+
146
+ class Semaphore {
147
+ private inFlight = 0;
148
+ private readonly waiters: Array<() => void> = [];
149
+ constructor(private readonly limit: number) {}
150
+
151
+ async acquire(signal?: AbortSignal): Promise<WorkLease> {
152
+ // Fail fast on an already-aborted signal: the parked-promise abort listener
153
+ // below registers with { once: true } and never fires for a signal that was
154
+ // aborted before we parked, so without this check a full pool would block
155
+ // the waiter until a slot frees (or forever if it never drains).
156
+ if (signal?.aborted) {
157
+ throw signal.reason instanceof Error
158
+ ? signal.reason
159
+ : new Error('Slot acquire aborted.');
160
+ }
161
+ while (this.inFlight >= this.limit) {
162
+ await new Promise<void>((resolve, reject) => {
163
+ const onResolve = () => {
164
+ signal?.removeEventListener('abort', onAbort);
165
+ resolve();
166
+ };
167
+ const onAbort = () => {
168
+ const idx = this.waiters.indexOf(onResolve);
169
+ if (idx >= 0) this.waiters.splice(idx, 1);
170
+ reject(
171
+ signal?.reason instanceof Error
172
+ ? signal.reason
173
+ : new Error('Slot acquire aborted.'),
174
+ );
175
+ };
176
+ this.waiters.push(onResolve);
177
+ signal?.addEventListener('abort', onAbort, { once: true });
178
+ });
179
+ if (signal?.aborted) {
180
+ throw signal.reason instanceof Error
181
+ ? signal.reason
182
+ : new Error('Slot acquire aborted.');
183
+ }
184
+ }
185
+ this.inFlight += 1;
186
+ let released = false;
187
+ return {
188
+ release: () => {
189
+ if (released) return;
190
+ released = true;
191
+ this.inFlight = Math.max(0, this.inFlight - 1);
192
+ this.waiters.shift()?.();
193
+ },
194
+ };
195
+ }
196
+ }
197
+
198
+ export function createDefaultGovernanceSnapshot(scope: {
199
+ orgId: string;
200
+ rootRunId: string;
201
+ rootPlayId?: string;
202
+ }): GovernanceSnapshot {
203
+ return {
204
+ rootRunId: scope.rootRunId,
205
+ currentRunId: scope.rootRunId,
206
+ currentPlayId: scope.rootPlayId ?? scope.rootRunId,
207
+ ancestryPlayIds: scope.rootPlayId ? [scope.rootPlayId] : [],
208
+ ancestryRunIds: [scope.rootRunId],
209
+ callDepth: scope.rootPlayId ? 1 : 0,
210
+ playCallCount: 0,
211
+ toolCallCount: 0,
212
+ retryCount: 0,
213
+ descendantCount: 0,
214
+ waterfallStepExecutions: 0,
215
+ parentChildCalls: {},
216
+ };
217
+ }
218
+
219
+ export function createPlayExecutionGovernor(
220
+ input: GovernorInput,
221
+ ): PlayExecutionGovernor {
222
+ const policy = resolveExecutionPolicy(input.adapter);
223
+ const state: GovernanceSnapshot =
224
+ input.resume ?? createDefaultGovernanceSnapshot(input.scope);
225
+
226
+ const rowSlots = new Semaphore(policy.concurrency.rowMax);
227
+ const childPlaySlots = new Semaphore(policy.concurrency.childPlay);
228
+ const toolSlots = new Semaphore(policy.concurrency.toolCalls);
229
+
230
+ const bucketId = (provider: string) => `${input.scope.orgId}:${provider}`;
231
+
232
+ function chargeBudget(kind: BudgetKind, amount = 1): void {
233
+ switch (kind) {
234
+ case 'playCall':
235
+ state.playCallCount += amount;
236
+ if (state.playCallCount > policy.budgets.maxPlayCallCount)
237
+ throw new GovernorBudgetError('playCall', state.playCallCount, policy.budgets.maxPlayCallCount);
238
+ return;
239
+ case 'toolCall':
240
+ state.toolCallCount += amount;
241
+ if (state.toolCallCount > policy.budgets.maxToolCallCount)
242
+ throw new GovernorBudgetError('toolCall', state.toolCallCount, policy.budgets.maxToolCallCount);
243
+ return;
244
+ case 'retry':
245
+ state.retryCount += amount;
246
+ if (state.retryCount > policy.budgets.maxRetryCount)
247
+ throw new GovernorBudgetError('retry', state.retryCount, policy.budgets.maxRetryCount);
248
+ return;
249
+ case 'descendant':
250
+ state.descendantCount += amount;
251
+ if (state.descendantCount > policy.budgets.maxDescendants)
252
+ throw new GovernorBudgetError('descendant', state.descendantCount, policy.budgets.maxDescendants);
253
+ return;
254
+ case 'waterfallStep':
255
+ state.waterfallStepExecutions += amount;
256
+ if (state.waterfallStepExecutions > policy.budgets.maxWaterfallStepExecutions)
257
+ throw new GovernorBudgetError('waterfallStep', state.waterfallStepExecutions, policy.budgets.maxWaterfallStepExecutions);
258
+ return;
259
+ }
260
+ }
261
+
262
+ return {
263
+ adapter: input.adapter,
264
+ policy,
265
+
266
+ acquireRowSlot: (opts) => rowSlots.acquire(opts?.signal),
267
+ acquireChildPlaySlot: (opts) => childPlaySlots.acquire(opts?.signal),
268
+
269
+ async acquireToolSlot(toolId, opts) {
270
+ // 1. global tool-concurrency slot.
271
+ const slot = await toolSlots.acquire(opts?.signal);
272
+ // 2. per-(org,provider) pacing. The provider comes from the pacing
273
+ // resolver, so callers only need the toolId. No rules → no pacing.
274
+ let permit: { release(): void };
275
+ try {
276
+ const pacing = await input.resolvePacing(toolId);
277
+ permit =
278
+ pacing && pacing.rules.length > 0
279
+ ? await input.rateState.acquire({
280
+ bucketId: bucketId(pacing.provider),
281
+ rules: pacing.rules,
282
+ signal: opts?.signal,
283
+ })
284
+ : noopPacingPermit();
285
+ } catch (error) {
286
+ slot.release();
287
+ throw error;
288
+ }
289
+ // 3. charge the budget only once the call is actually cleared to run, so a
290
+ // failed/aborted acquisition never permanently consumes tool budget.
291
+ try {
292
+ chargeBudget('toolCall');
293
+ } catch (error) {
294
+ permit.release();
295
+ slot.release();
296
+ throw error;
297
+ }
298
+ let released = false;
299
+ return {
300
+ release: () => {
301
+ if (released) return;
302
+ released = true;
303
+ permit.release();
304
+ slot.release();
305
+ },
306
+ };
307
+ },
308
+
309
+ async suggestedParallelism(toolId, fallback) {
310
+ const pacing = await input.resolvePacing(toolId);
311
+ if (!pacing || pacing.rules.length === 0) return fallback;
312
+ const limits = pacing.rules.flatMap((rule) =>
313
+ rule.maxConcurrency != null
314
+ ? [rule.requestsPerWindow, rule.maxConcurrency]
315
+ : [rule.requestsPerWindow],
316
+ );
317
+ return Math.max(
318
+ 1,
319
+ Math.min(policy.pacing.suggestedMaxParallelism, ...limits),
320
+ );
321
+ },
322
+
323
+ chargeBudget,
324
+
325
+ forkChild(childInput) {
326
+ if (state.ancestryPlayIds.includes(childInput.childPlayName)) {
327
+ const chain = [...state.ancestryPlayIds, childInput.childPlayName].join(' -> ');
328
+ throw new Error(`Recursive play graph detected: ${chain}`);
329
+ }
330
+ const nextDepth = state.callDepth + 1;
331
+ if (nextDepth > policy.budgets.maxPlayCallDepth)
332
+ throw new GovernorBudgetError('playDepth', nextDepth, policy.budgets.maxPlayCallDepth);
333
+ const parentKey = state.currentPlayId;
334
+ const nextParent = (state.parentChildCalls[parentKey] ?? 0) + 1;
335
+ if (nextParent > policy.budgets.maxChildPlayCallsPerParent)
336
+ throw new GovernorBudgetError('childPerParent', nextParent, policy.budgets.maxChildPlayCallsPerParent);
337
+ // Charge the run-wide play/descendant budgets on the parent. Charged at
338
+ // fork time (not after the caller's child-play slot acquire) and never
339
+ // refunded if that acquire fails — see the forkChild interface doc.
340
+ chargeBudget('playCall');
341
+ chargeBudget('descendant');
342
+ state.parentChildCalls[parentKey] = nextParent;
343
+ // Child seeds from the parent's accumulated counters → lineage-global budget.
344
+ return {
345
+ rootRunId: state.rootRunId,
346
+ currentRunId: childInput.childRunId,
347
+ currentPlayId: childInput.childPlayName,
348
+ ancestryPlayIds: [...state.ancestryPlayIds, childInput.childPlayName],
349
+ ancestryRunIds: [...state.ancestryRunIds, childInput.childRunId],
350
+ callDepth: nextDepth,
351
+ playCallCount: state.playCallCount,
352
+ toolCallCount: state.toolCallCount,
353
+ retryCount: state.retryCount,
354
+ descendantCount: state.descendantCount,
355
+ waterfallStepExecutions: state.waterfallStepExecutions,
356
+ parentChildCalls: {},
357
+ };
358
+ },
359
+
360
+ resolveRowConcurrency: (requested) => resolveRowConcurrency(policy, requested),
361
+
362
+ reportProviderBackpressure(bp) {
363
+ input.rateState.penalize({
364
+ bucketId: bucketId(bp.provider),
365
+ cooldownMs: bp.retryAfterMs,
366
+ });
367
+ },
368
+
369
+ snapshot: () => ({
370
+ ...state,
371
+ ancestryPlayIds: [...state.ancestryPlayIds],
372
+ ancestryRunIds: [...state.ancestryRunIds],
373
+ parentChildCalls: { ...state.parentChildCalls },
374
+ }),
375
+ };
376
+ }