throttleai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/dist/adapters/express.cjs +75 -0
- package/dist/adapters/express.cjs.map +1 -0
- package/dist/adapters/express.d.cts +79 -0
- package/dist/adapters/express.d.ts +79 -0
- package/dist/adapters/express.js +50 -0
- package/dist/adapters/express.js.map +1 -0
- package/dist/adapters/fetch.cjs +99 -0
- package/dist/adapters/fetch.cjs.map +1 -0
- package/dist/adapters/fetch.d.cts +69 -0
- package/dist/adapters/fetch.d.ts +69 -0
- package/dist/adapters/fetch.js +68 -0
- package/dist/adapters/fetch.js.map +1 -0
- package/dist/adapters/hono.cjs +74 -0
- package/dist/adapters/hono.cjs.map +1 -0
- package/dist/adapters/hono.d.cts +73 -0
- package/dist/adapters/hono.d.ts +73 -0
- package/dist/adapters/hono.js +49 -0
- package/dist/adapters/hono.js.map +1 -0
- package/dist/adapters/openai.cjs +103 -0
- package/dist/adapters/openai.cjs.map +1 -0
- package/dist/adapters/openai.d.cts +102 -0
- package/dist/adapters/openai.d.ts +102 -0
- package/dist/adapters/openai.js +70 -0
- package/dist/adapters/openai.js.map +1 -0
- package/dist/adapters/tools.cjs +80 -0
- package/dist/adapters/tools.cjs.map +1 -0
- package/dist/adapters/tools.d.cts +56 -0
- package/dist/adapters/tools.d.ts +56 -0
- package/dist/adapters/tools.js +49 -0
- package/dist/adapters/tools.js.map +1 -0
- package/dist/chunk-YHOXYRXL.js +11 -0
- package/dist/chunk-YHOXYRXL.js.map +1 -0
- package/dist/governor-MVaCesqM.d.cts +206 -0
- package/dist/governor-MVaCesqM.d.ts +206 -0
- package/dist/index.cjs +1163 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +213 -0
- package/dist/index.d.ts +213 -0
- package/dist/index.js +1128 -0
- package/dist/index.js.map +1 -0
- package/dist/types-BkfBESR2.d.ts +47 -0
- package/dist/types-DOUI5hr7.d.cts +47 -0
- package/package.json +114 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import {
|
|
2
|
+
classifyOutcome
|
|
3
|
+
} from "../chunk-YHOXYRXL.js";
|
|
4
|
+
|
|
5
|
+
// src/adapters/tools.ts
|
|
6
|
+
function wrapTool(fn, options) {
|
|
7
|
+
const {
|
|
8
|
+
governor,
|
|
9
|
+
toolId,
|
|
10
|
+
actorId = "default",
|
|
11
|
+
priority = "background",
|
|
12
|
+
costWeight = 1
|
|
13
|
+
} = options;
|
|
14
|
+
return async (...args) => {
|
|
15
|
+
const decision = governor.acquire({
|
|
16
|
+
actorId,
|
|
17
|
+
action: `tool.${toolId}`,
|
|
18
|
+
priority,
|
|
19
|
+
estimate: { weight: costWeight }
|
|
20
|
+
});
|
|
21
|
+
if (!decision.granted) {
|
|
22
|
+
return {
|
|
23
|
+
ok: false,
|
|
24
|
+
decision
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
const start = Date.now();
|
|
28
|
+
try {
|
|
29
|
+
const result = await fn(...args);
|
|
30
|
+
const latencyMs = Date.now() - start;
|
|
31
|
+
governor.release(decision.leaseId, {
|
|
32
|
+
outcome: "success",
|
|
33
|
+
latencyMs
|
|
34
|
+
});
|
|
35
|
+
return { ok: true, result, latencyMs };
|
|
36
|
+
} catch (err) {
|
|
37
|
+
governor.release(decision.leaseId, {
|
|
38
|
+
outcome: "error",
|
|
39
|
+
latencyMs: Date.now() - start
|
|
40
|
+
});
|
|
41
|
+
throw err;
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export {
|
|
46
|
+
classifyOutcome,
|
|
47
|
+
wrapTool
|
|
48
|
+
};
|
|
49
|
+
//# sourceMappingURL=tools.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/tools.ts"],"sourcesContent":["/**\n * ThrottleAI tool adapter — wraps any async function (embeddings, rerankers, etc).\n *\n * Unifies throttling across model calls + tool calls so you can control\n * the total throughput of your AI pipeline, not just the LLM calls.\n *\n * @module throttleai/adapters/tools\n */\n\nexport type {\n AdapterGovernor,\n AdapterOptions,\n AdapterResult,\n AdapterGranted,\n AdapterDenied,\n} from \"./types.js\";\n\nexport { classifyOutcome } from \"./types.js\";\n\nimport type { Priority, AcquireDecision } from \"../types.js\";\nimport type { AdapterGovernor, AdapterResult } from \"./types.js\";\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\n/** Options for wrapping a tool function. */\nexport interface WrapToolOptions {\n /** Governor instance. */\n governor: AdapterGovernor;\n /** Tool identifier used as the action string (e.g., \"embed\", \"rerank\"). */\n toolId: string;\n /** Actor ID (default: \"default\"). */\n actorId?: string;\n /** Priority (default: \"background\"). Tools default to background. */\n priority?: Priority;\n /** Concurrency weight for this tool (default: 1). Heavy tools consume more capacity. */\n costWeight?: number;\n}\n\n// ---------------------------------------------------------------------------\n// wrapTool\n// ---------------------------------------------------------------------------\n\n/**\n * Wrap any async function with governor throttling.\n *\n * Acquires a lease (with configurable weight), runs the function,\n * and releases with outcome + latency. Useful for embeddings,\n * rerankers, vector DB calls, file operations, etc.\n *\n * ```ts\n * import { createGovernor, presets } from \"throttleai\";\n * import { wrapTool } from \"throttleai/adapters/tools\";\n *\n * const gov = createGovernor(presets.balanced());\n *\n * const embed = wrapTool(\n * (text: string) => embeddingModel.embed(text),\n * { governor: gov, toolId: \"embed\", costWeight: 1 },\n * );\n *\n * const rerank = wrapTool(\n * (docs: string[]) => reranker.rerank(docs),\n * { governor: gov, toolId: \"rerank\", costWeight: 2 },\n * );\n *\n * const embedResult = await embed(\"hello world\");\n * if (embedResult.ok) console.log(embedResult.result);\n * ```\n */\nexport function wrapTool<TArgs extends unknown[], TResult>(\n fn: (...args: TArgs) => Promise<TResult>,\n options: WrapToolOptions,\n): (...args: TArgs) => Promise<AdapterResult<TResult>> {\n const {\n governor,\n toolId,\n actorId = \"default\",\n priority = \"background\",\n costWeight = 1,\n } = options;\n\n return async (...args) => {\n const decision = governor.acquire({\n actorId,\n action: `tool.${toolId}`,\n priority,\n estimate: { weight: costWeight },\n });\n\n if (!decision.granted) {\n return {\n ok: false,\n decision: decision as AcquireDecision & { granted: false },\n };\n }\n\n const start = Date.now();\n\n try {\n const result = await fn(...args);\n const latencyMs = Date.now() - start;\n\n governor.release(decision.leaseId, {\n outcome: \"success\",\n latencyMs,\n });\n\n return { ok: true, result, latencyMs };\n } catch (err) {\n governor.release(decision.leaseId, {\n outcome: \"error\",\n latencyMs: Date.now() - start,\n });\n throw err;\n }\n };\n}\n"],"mappings":";;;;;AAuEO,SAAS,SACd,IACA,SACqD;AACrD,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,UAAU;AAAA,IACV,WAAW;AAAA,IACX,aAAa;AAAA,EACf,IAAI;AAEJ,SAAO,UAAU,SAAS;AACxB,UAAM,WAAW,SAAS,QAAQ;AAAA,MAChC;AAAA,MACA,QAAQ,QAAQ,MAAM;AAAA,MACtB;AAAA,MACA,UAAU,EAAE,QAAQ,WAAW;AAAA,IACjC,CAAC;AAED,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO;AAAA,QACL,IAAI;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,IAAI;AAEvB,QAAI;AACF,YAAM,SAAS,MAAM,GAAG,GAAG,IAAI;AAC/B,YAAM,YAAY,KAAK,IAAI,IAAI;AAE/B,eAAS,QAAQ,SAAS,SAAS;AAAA,QACjC,SAAS;AAAA,QACT;AAAA,MACF,CAAC;AAED,aAAO,EAAE,IAAI,MAAM,QAAQ,UAAU;AAAA,IACvC,SAAS,KAAK;AACZ,eAAS,QAAQ,SAAS,SAAS;AAAA,QACjC,SAAS;AAAA,QACT,WAAW,KAAK,IAAI,IAAI;AAAA,MAC1B,CAAC;AACD,YAAM;AAAA,IACR;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// src/adapters/types.ts
|
|
2
|
+
function classifyOutcome(error, statusCode) {
|
|
3
|
+
if (error) return "error";
|
|
4
|
+
if (statusCode !== void 0 && statusCode >= 400) return "error";
|
|
5
|
+
return "success";
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export {
|
|
9
|
+
classifyOutcome
|
|
10
|
+
};
|
|
11
|
+
//# sourceMappingURL=chunk-YHOXYRXL.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/adapters/types.ts"],"sourcesContent":["/**\n * Shared types for all ThrottleAI adapters.\n *\n * Adapters live in separate entrypoints so bundlers can tree-shake them.\n * Core ThrottleAI never imports from adapters.\n */\n\nimport type { Governor } from \"../governor.js\";\nimport type { Priority, LeaseOutcome, AcquireDecision } from \"../types.js\";\n\n// ---------------------------------------------------------------------------\n// Adapter context — what every adapter needs\n// ---------------------------------------------------------------------------\n\n/** Minimal governor interface that adapters depend on. */\nexport interface AdapterGovernor {\n acquire: Governor[\"acquire\"];\n release: Governor[\"release\"];\n}\n\n/** Options common to all adapters. */\nexport interface AdapterOptions {\n /** Governor instance to use for throttling. */\n governor: AdapterGovernor;\n /** Actor ID for the request (default: \"default\"). */\n actorId?: string;\n /** Priority for the request (default: \"interactive\"). */\n priority?: Priority;\n}\n\n// ---------------------------------------------------------------------------\n// Adapter result — consistent deny shape across all adapters\n// ---------------------------------------------------------------------------\n\n/** Successful adapter result. */\nexport interface AdapterGranted<T> {\n ok: true;\n result: T;\n latencyMs: number;\n}\n\n/** Denied adapter result — consistent shape across all adapters. */\nexport interface AdapterDenied {\n ok: false;\n decision: AcquireDecision & { granted: false };\n}\n\n/** Union result from any adapter. */\nexport type AdapterResult<T> = AdapterGranted<T> | AdapterDenied;\n\n// ---------------------------------------------------------------------------\n// Usage extraction — optional hook for adapters to report actual tokens\n// ---------------------------------------------------------------------------\n\n/** Token usage as reported by a provider response. */\nexport interface ProviderUsage {\n promptTokens?: number;\n outputTokens?: number;\n}\n\n/** Classify the outcome of an operation. */\nexport function classifyOutcome(\n error: unknown,\n statusCode?: number,\n): LeaseOutcome {\n if (error) return \"error\";\n if (statusCode !== undefined && statusCode >= 400) return \"error\";\n return \"success\";\n}\n"],"mappings":";AA6DO,SAAS,gBACd,OACA,YACc;AACd,MAAI,MAAO,QAAO;AAClB,MAAI,eAAe,UAAa,cAAc,IAAK,QAAO;AAC1D,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
interface ConcurrencyConfig {
|
|
2
|
+
/** Maximum number of in-flight leases. */
|
|
3
|
+
maxInFlight: number;
|
|
4
|
+
/** Slots reserved for interactive priority (default 0). */
|
|
5
|
+
interactiveReserve?: number;
|
|
6
|
+
}
|
|
7
|
+
interface RateConfig {
|
|
8
|
+
/** Maximum requests allowed within the rolling window. */
|
|
9
|
+
requestsPerMinute?: number;
|
|
10
|
+
/** Maximum tokens allowed within the rolling window. */
|
|
11
|
+
tokensPerMinute?: number;
|
|
12
|
+
/** Rolling window size in ms (default 60 000). */
|
|
13
|
+
windowMs?: number;
|
|
14
|
+
}
|
|
15
|
+
interface FairnessConfig {
|
|
16
|
+
/** Fraction of maxInFlight weight an actor can hold before soft-cap (default 0.6). */
|
|
17
|
+
softCapRatio?: number;
|
|
18
|
+
/** How long a denied actor gets priority boost in ms (default 5 000). */
|
|
19
|
+
starvationWindowMs?: number;
|
|
20
|
+
}
|
|
21
|
+
interface AdaptiveConfig {
|
|
22
|
+
/** EMA smoothing factor (0–1, default 0.2). Higher = more responsive. */
|
|
23
|
+
alpha?: number;
|
|
24
|
+
/** Target deny rate (0–1, default 0.05). Above this → reduce concurrency. */
|
|
25
|
+
targetDenyRate?: number;
|
|
26
|
+
/** Latency increase ratio that triggers reduction (default 1.5). */
|
|
27
|
+
latencyThreshold?: number;
|
|
28
|
+
/** How often to recalculate in ms (default 5 000). */
|
|
29
|
+
adjustIntervalMs?: number;
|
|
30
|
+
/** Minimum effective concurrency (default 1). */
|
|
31
|
+
minConcurrency?: number;
|
|
32
|
+
}
|
|
33
|
+
interface GovernorConfig {
|
|
34
|
+
concurrency?: ConcurrencyConfig;
|
|
35
|
+
rate?: RateConfig;
|
|
36
|
+
/** Fairness settings. Set to `true` for defaults, or pass a config object. Only active when concurrency is configured. */
|
|
37
|
+
fairness?: boolean | FairnessConfig;
|
|
38
|
+
/** Adaptive concurrency tuning. Set to `true` for defaults, or pass a config. Only active when concurrency is configured. */
|
|
39
|
+
adaptive?: boolean | AdaptiveConfig;
|
|
40
|
+
/** Lease time-to-live in ms (default 60 000). */
|
|
41
|
+
leaseTtlMs?: number;
|
|
42
|
+
/** How often the reaper sweeps expired leases in ms (default 5 000). */
|
|
43
|
+
reaperIntervalMs?: number;
|
|
44
|
+
/** Optional event handler. Receives structured events for acquire/deny/release/expire. No logging by default. */
|
|
45
|
+
onEvent?: GovernorEventHandler;
|
|
46
|
+
/**
|
|
47
|
+
* Enable strict mode for development.
|
|
48
|
+
*
|
|
49
|
+
* When `true`:
|
|
50
|
+
* - Double release throws an error
|
|
51
|
+
* - Releasing an unknown lease ID throws an error
|
|
52
|
+
* - Long-held leases (>80% of TTL) emit a "warn" event via onEvent
|
|
53
|
+
*/
|
|
54
|
+
strict?: boolean;
|
|
55
|
+
}
|
|
56
|
+
type Priority = "interactive" | "background";
|
|
57
|
+
type DenyReason = "concurrency" | "rate" | "budget" | "policy";
|
|
58
|
+
type LeaseOutcome = "success" | "error" | "timeout" | "cancelled";
|
|
59
|
+
interface TokenEstimate {
|
|
60
|
+
promptTokens?: number;
|
|
61
|
+
maxOutputTokens?: number;
|
|
62
|
+
/** Concurrency weight for this call (default 1). Heavy calls consume more capacity. */
|
|
63
|
+
weight?: number;
|
|
64
|
+
}
|
|
65
|
+
interface AcquireRequest {
|
|
66
|
+
actorId: string;
|
|
67
|
+
action: string;
|
|
68
|
+
estimate?: TokenEstimate;
|
|
69
|
+
idempotencyKey?: string;
|
|
70
|
+
priority?: Priority;
|
|
71
|
+
}
|
|
72
|
+
interface Constraints {
|
|
73
|
+
maxOutputTokens?: number;
|
|
74
|
+
}
|
|
75
|
+
/** Structured hint about the limits that caused a denial. */
|
|
76
|
+
interface LimitsHint {
|
|
77
|
+
/** Current in-flight weight (concurrency denials). */
|
|
78
|
+
inFlight?: number;
|
|
79
|
+
/** Maximum allowed in-flight weight (concurrency denials). */
|
|
80
|
+
maxInFlight?: number;
|
|
81
|
+
/** Current request count in window (rate denials). */
|
|
82
|
+
rateUsed?: number;
|
|
83
|
+
/** Rate limit (rate denials). */
|
|
84
|
+
rateLimit?: number;
|
|
85
|
+
}
|
|
86
|
+
type AcquireDecision = {
|
|
87
|
+
granted: true;
|
|
88
|
+
leaseId: string;
|
|
89
|
+
expiresAt: number;
|
|
90
|
+
constraints?: Constraints;
|
|
91
|
+
} | {
|
|
92
|
+
granted: false;
|
|
93
|
+
reason: DenyReason;
|
|
94
|
+
retryAfterMs: number;
|
|
95
|
+
recommendation: string;
|
|
96
|
+
/** Structured hint about the limits that caused the denial. */
|
|
97
|
+
limitsHint?: LimitsHint;
|
|
98
|
+
};
|
|
99
|
+
interface ReleaseReport {
|
|
100
|
+
outcome: LeaseOutcome;
|
|
101
|
+
usage?: {
|
|
102
|
+
promptTokens?: number;
|
|
103
|
+
outputTokens?: number;
|
|
104
|
+
};
|
|
105
|
+
actualCostCents?: number;
|
|
106
|
+
/** Actual latency of the operation in ms. Used by adaptive tuning. */
|
|
107
|
+
latencyMs?: number;
|
|
108
|
+
}
|
|
109
|
+
interface GovernorSnapshot {
|
|
110
|
+
/** Timestamp when the snapshot was taken. */
|
|
111
|
+
timestamp: number;
|
|
112
|
+
/** Number of active leases. */
|
|
113
|
+
activeLeases: number;
|
|
114
|
+
concurrency: {
|
|
115
|
+
/** Current in-flight weight (sum of all active lease weights). */
|
|
116
|
+
inFlightWeight: number;
|
|
117
|
+
/** Current in-flight count (number of active leases). */
|
|
118
|
+
inFlightCount: number;
|
|
119
|
+
/** Available weight capacity. */
|
|
120
|
+
available: number;
|
|
121
|
+
/** Configured max in-flight weight. */
|
|
122
|
+
max: number;
|
|
123
|
+
/** Effective max (may be lower when adaptive is active). */
|
|
124
|
+
effectiveMax: number;
|
|
125
|
+
/** @deprecated Use `inFlightWeight` instead. */
|
|
126
|
+
active: number;
|
|
127
|
+
} | null;
|
|
128
|
+
requestRate: {
|
|
129
|
+
current: number;
|
|
130
|
+
limit: number;
|
|
131
|
+
} | null;
|
|
132
|
+
tokenRate: {
|
|
133
|
+
current: number;
|
|
134
|
+
limit: number;
|
|
135
|
+
} | null;
|
|
136
|
+
fairness: boolean;
|
|
137
|
+
adaptive: boolean;
|
|
138
|
+
/** Most recent deny event, if any. */
|
|
139
|
+
lastDeny: {
|
|
140
|
+
reason: DenyReason;
|
|
141
|
+
timestamp: number;
|
|
142
|
+
actorId?: string;
|
|
143
|
+
} | null;
|
|
144
|
+
}
|
|
145
|
+
type GovernorEventType = "acquire" | "deny" | "release" | "expire" | "warn";
|
|
146
|
+
interface GovernorEvent {
|
|
147
|
+
type: GovernorEventType;
|
|
148
|
+
timestamp: number;
|
|
149
|
+
leaseId?: string;
|
|
150
|
+
actorId?: string;
|
|
151
|
+
action?: string;
|
|
152
|
+
reason?: DenyReason;
|
|
153
|
+
retryAfterMs?: number;
|
|
154
|
+
/** Recommendation string (only for "deny" events). */
|
|
155
|
+
recommendation?: string;
|
|
156
|
+
weight?: number;
|
|
157
|
+
outcome?: LeaseOutcome;
|
|
158
|
+
/** Warning message (only for "warn" events). */
|
|
159
|
+
message?: string;
|
|
160
|
+
}
|
|
161
|
+
type GovernorEventHandler = (event: GovernorEvent) => void;
|
|
162
|
+
|
|
163
|
+
declare class Governor {
|
|
164
|
+
private readonly _store;
|
|
165
|
+
private readonly _concurrency;
|
|
166
|
+
private readonly _rate;
|
|
167
|
+
private readonly _tokenRate;
|
|
168
|
+
private readonly _fairness;
|
|
169
|
+
private readonly _adaptive;
|
|
170
|
+
private readonly _ttlMs;
|
|
171
|
+
private readonly _onEvent;
|
|
172
|
+
private readonly _strict;
|
|
173
|
+
/** Track recently-released lease IDs in strict mode (to detect double release). */
|
|
174
|
+
private readonly _releasedIds;
|
|
175
|
+
/** Max size of _releasedIds before pruning (prevent unbounded growth). */
|
|
176
|
+
private static readonly _RELEASED_CACHE_SIZE;
|
|
177
|
+
/** Track the most recent deny for snapshot(). */
|
|
178
|
+
private _lastDeny;
|
|
179
|
+
constructor(config: GovernorConfig);
|
|
180
|
+
acquire(request: AcquireRequest): AcquireDecision;
|
|
181
|
+
release(leaseId: string, report?: ReleaseReport): void;
|
|
182
|
+
get activeLeases(): number;
|
|
183
|
+
/** Current in-flight weight (or count when all weights are 1). */
|
|
184
|
+
get concurrencyActive(): number;
|
|
185
|
+
/** Available weight capacity. */
|
|
186
|
+
get concurrencyAvailable(): number;
|
|
187
|
+
/** Effective concurrency limit (may be lower than configured max when adaptive is active). */
|
|
188
|
+
get concurrencyEffectiveMax(): number;
|
|
189
|
+
get rateCount(): number;
|
|
190
|
+
get rateLimit(): number;
|
|
191
|
+
/** Current tokens consumed in the active window. */
|
|
192
|
+
get tokenRateCount(): number;
|
|
193
|
+
/** Token-rate limit. */
|
|
194
|
+
get tokenRateLimit(): number;
|
|
195
|
+
/** Return a read-only snapshot of current governor state. */
|
|
196
|
+
snapshot(): GovernorSnapshot;
|
|
197
|
+
dispose(): void;
|
|
198
|
+
private _rollbackConcurrency;
|
|
199
|
+
/** Estimate total tokens from the request's estimate. */
|
|
200
|
+
private _estimateTokens;
|
|
201
|
+
private _onExpired;
|
|
202
|
+
private _recordDeny;
|
|
203
|
+
private _emit;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export { type AcquireDecision as A, type ConcurrencyConfig as C, type DenyReason as D, type FairnessConfig as F, type GovernorConfig as G, type LeaseOutcome as L, type Priority as P, type RateConfig as R, type TokenEstimate as T, Governor as a, type AcquireRequest as b, type GovernorEvent as c, type GovernorSnapshot as d, type AdaptiveConfig as e, type Constraints as f, type GovernorEventHandler as g, type GovernorEventType as h, type LimitsHint as i, type ReleaseReport as j };
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
interface ConcurrencyConfig {
|
|
2
|
+
/** Maximum number of in-flight leases. */
|
|
3
|
+
maxInFlight: number;
|
|
4
|
+
/** Slots reserved for interactive priority (default 0). */
|
|
5
|
+
interactiveReserve?: number;
|
|
6
|
+
}
|
|
7
|
+
interface RateConfig {
|
|
8
|
+
/** Maximum requests allowed within the rolling window. */
|
|
9
|
+
requestsPerMinute?: number;
|
|
10
|
+
/** Maximum tokens allowed within the rolling window. */
|
|
11
|
+
tokensPerMinute?: number;
|
|
12
|
+
/** Rolling window size in ms (default 60 000). */
|
|
13
|
+
windowMs?: number;
|
|
14
|
+
}
|
|
15
|
+
interface FairnessConfig {
|
|
16
|
+
/** Fraction of maxInFlight weight an actor can hold before soft-cap (default 0.6). */
|
|
17
|
+
softCapRatio?: number;
|
|
18
|
+
/** How long a denied actor gets priority boost in ms (default 5 000). */
|
|
19
|
+
starvationWindowMs?: number;
|
|
20
|
+
}
|
|
21
|
+
interface AdaptiveConfig {
|
|
22
|
+
/** EMA smoothing factor (0–1, default 0.2). Higher = more responsive. */
|
|
23
|
+
alpha?: number;
|
|
24
|
+
/** Target deny rate (0–1, default 0.05). Above this → reduce concurrency. */
|
|
25
|
+
targetDenyRate?: number;
|
|
26
|
+
/** Latency increase ratio that triggers reduction (default 1.5). */
|
|
27
|
+
latencyThreshold?: number;
|
|
28
|
+
/** How often to recalculate in ms (default 5 000). */
|
|
29
|
+
adjustIntervalMs?: number;
|
|
30
|
+
/** Minimum effective concurrency (default 1). */
|
|
31
|
+
minConcurrency?: number;
|
|
32
|
+
}
|
|
33
|
+
interface GovernorConfig {
|
|
34
|
+
concurrency?: ConcurrencyConfig;
|
|
35
|
+
rate?: RateConfig;
|
|
36
|
+
/** Fairness settings. Set to `true` for defaults, or pass a config object. Only active when concurrency is configured. */
|
|
37
|
+
fairness?: boolean | FairnessConfig;
|
|
38
|
+
/** Adaptive concurrency tuning. Set to `true` for defaults, or pass a config. Only active when concurrency is configured. */
|
|
39
|
+
adaptive?: boolean | AdaptiveConfig;
|
|
40
|
+
/** Lease time-to-live in ms (default 60 000). */
|
|
41
|
+
leaseTtlMs?: number;
|
|
42
|
+
/** How often the reaper sweeps expired leases in ms (default 5 000). */
|
|
43
|
+
reaperIntervalMs?: number;
|
|
44
|
+
/** Optional event handler. Receives structured events for acquire/deny/release/expire. No logging by default. */
|
|
45
|
+
onEvent?: GovernorEventHandler;
|
|
46
|
+
/**
|
|
47
|
+
* Enable strict mode for development.
|
|
48
|
+
*
|
|
49
|
+
* When `true`:
|
|
50
|
+
* - Double release throws an error
|
|
51
|
+
* - Releasing an unknown lease ID throws an error
|
|
52
|
+
* - Long-held leases (>80% of TTL) emit a "warn" event via onEvent
|
|
53
|
+
*/
|
|
54
|
+
strict?: boolean;
|
|
55
|
+
}
|
|
56
|
+
type Priority = "interactive" | "background";
|
|
57
|
+
type DenyReason = "concurrency" | "rate" | "budget" | "policy";
|
|
58
|
+
type LeaseOutcome = "success" | "error" | "timeout" | "cancelled";
|
|
59
|
+
interface TokenEstimate {
|
|
60
|
+
promptTokens?: number;
|
|
61
|
+
maxOutputTokens?: number;
|
|
62
|
+
/** Concurrency weight for this call (default 1). Heavy calls consume more capacity. */
|
|
63
|
+
weight?: number;
|
|
64
|
+
}
|
|
65
|
+
interface AcquireRequest {
|
|
66
|
+
actorId: string;
|
|
67
|
+
action: string;
|
|
68
|
+
estimate?: TokenEstimate;
|
|
69
|
+
idempotencyKey?: string;
|
|
70
|
+
priority?: Priority;
|
|
71
|
+
}
|
|
72
|
+
interface Constraints {
|
|
73
|
+
maxOutputTokens?: number;
|
|
74
|
+
}
|
|
75
|
+
/** Structured hint about the limits that caused a denial. */
|
|
76
|
+
interface LimitsHint {
|
|
77
|
+
/** Current in-flight weight (concurrency denials). */
|
|
78
|
+
inFlight?: number;
|
|
79
|
+
/** Maximum allowed in-flight weight (concurrency denials). */
|
|
80
|
+
maxInFlight?: number;
|
|
81
|
+
/** Current request count in window (rate denials). */
|
|
82
|
+
rateUsed?: number;
|
|
83
|
+
/** Rate limit (rate denials). */
|
|
84
|
+
rateLimit?: number;
|
|
85
|
+
}
|
|
86
|
+
type AcquireDecision = {
|
|
87
|
+
granted: true;
|
|
88
|
+
leaseId: string;
|
|
89
|
+
expiresAt: number;
|
|
90
|
+
constraints?: Constraints;
|
|
91
|
+
} | {
|
|
92
|
+
granted: false;
|
|
93
|
+
reason: DenyReason;
|
|
94
|
+
retryAfterMs: number;
|
|
95
|
+
recommendation: string;
|
|
96
|
+
/** Structured hint about the limits that caused the denial. */
|
|
97
|
+
limitsHint?: LimitsHint;
|
|
98
|
+
};
|
|
99
|
+
interface ReleaseReport {
|
|
100
|
+
outcome: LeaseOutcome;
|
|
101
|
+
usage?: {
|
|
102
|
+
promptTokens?: number;
|
|
103
|
+
outputTokens?: number;
|
|
104
|
+
};
|
|
105
|
+
actualCostCents?: number;
|
|
106
|
+
/** Actual latency of the operation in ms. Used by adaptive tuning. */
|
|
107
|
+
latencyMs?: number;
|
|
108
|
+
}
|
|
109
|
+
interface GovernorSnapshot {
|
|
110
|
+
/** Timestamp when the snapshot was taken. */
|
|
111
|
+
timestamp: number;
|
|
112
|
+
/** Number of active leases. */
|
|
113
|
+
activeLeases: number;
|
|
114
|
+
concurrency: {
|
|
115
|
+
/** Current in-flight weight (sum of all active lease weights). */
|
|
116
|
+
inFlightWeight: number;
|
|
117
|
+
/** Current in-flight count (number of active leases). */
|
|
118
|
+
inFlightCount: number;
|
|
119
|
+
/** Available weight capacity. */
|
|
120
|
+
available: number;
|
|
121
|
+
/** Configured max in-flight weight. */
|
|
122
|
+
max: number;
|
|
123
|
+
/** Effective max (may be lower when adaptive is active). */
|
|
124
|
+
effectiveMax: number;
|
|
125
|
+
/** @deprecated Use `inFlightWeight` instead. */
|
|
126
|
+
active: number;
|
|
127
|
+
} | null;
|
|
128
|
+
requestRate: {
|
|
129
|
+
current: number;
|
|
130
|
+
limit: number;
|
|
131
|
+
} | null;
|
|
132
|
+
tokenRate: {
|
|
133
|
+
current: number;
|
|
134
|
+
limit: number;
|
|
135
|
+
} | null;
|
|
136
|
+
fairness: boolean;
|
|
137
|
+
adaptive: boolean;
|
|
138
|
+
/** Most recent deny event, if any. */
|
|
139
|
+
lastDeny: {
|
|
140
|
+
reason: DenyReason;
|
|
141
|
+
timestamp: number;
|
|
142
|
+
actorId?: string;
|
|
143
|
+
} | null;
|
|
144
|
+
}
|
|
145
|
+
type GovernorEventType = "acquire" | "deny" | "release" | "expire" | "warn";
|
|
146
|
+
interface GovernorEvent {
|
|
147
|
+
type: GovernorEventType;
|
|
148
|
+
timestamp: number;
|
|
149
|
+
leaseId?: string;
|
|
150
|
+
actorId?: string;
|
|
151
|
+
action?: string;
|
|
152
|
+
reason?: DenyReason;
|
|
153
|
+
retryAfterMs?: number;
|
|
154
|
+
/** Recommendation string (only for "deny" events). */
|
|
155
|
+
recommendation?: string;
|
|
156
|
+
weight?: number;
|
|
157
|
+
outcome?: LeaseOutcome;
|
|
158
|
+
/** Warning message (only for "warn" events). */
|
|
159
|
+
message?: string;
|
|
160
|
+
}
|
|
161
|
+
type GovernorEventHandler = (event: GovernorEvent) => void;
|
|
162
|
+
|
|
163
|
+
declare class Governor {
|
|
164
|
+
private readonly _store;
|
|
165
|
+
private readonly _concurrency;
|
|
166
|
+
private readonly _rate;
|
|
167
|
+
private readonly _tokenRate;
|
|
168
|
+
private readonly _fairness;
|
|
169
|
+
private readonly _adaptive;
|
|
170
|
+
private readonly _ttlMs;
|
|
171
|
+
private readonly _onEvent;
|
|
172
|
+
private readonly _strict;
|
|
173
|
+
/** Track recently-released lease IDs in strict mode (to detect double release). */
|
|
174
|
+
private readonly _releasedIds;
|
|
175
|
+
/** Max size of _releasedIds before pruning (prevent unbounded growth). */
|
|
176
|
+
private static readonly _RELEASED_CACHE_SIZE;
|
|
177
|
+
/** Track the most recent deny for snapshot(). */
|
|
178
|
+
private _lastDeny;
|
|
179
|
+
constructor(config: GovernorConfig);
|
|
180
|
+
acquire(request: AcquireRequest): AcquireDecision;
|
|
181
|
+
release(leaseId: string, report?: ReleaseReport): void;
|
|
182
|
+
get activeLeases(): number;
|
|
183
|
+
/** Current in-flight weight (or count when all weights are 1). */
|
|
184
|
+
get concurrencyActive(): number;
|
|
185
|
+
/** Available weight capacity. */
|
|
186
|
+
get concurrencyAvailable(): number;
|
|
187
|
+
/** Effective concurrency limit (may be lower than configured max when adaptive is active). */
|
|
188
|
+
get concurrencyEffectiveMax(): number;
|
|
189
|
+
get rateCount(): number;
|
|
190
|
+
get rateLimit(): number;
|
|
191
|
+
/** Current tokens consumed in the active window. */
|
|
192
|
+
get tokenRateCount(): number;
|
|
193
|
+
/** Token-rate limit. */
|
|
194
|
+
get tokenRateLimit(): number;
|
|
195
|
+
/** Return a read-only snapshot of current governor state. */
|
|
196
|
+
snapshot(): GovernorSnapshot;
|
|
197
|
+
dispose(): void;
|
|
198
|
+
private _rollbackConcurrency;
|
|
199
|
+
/** Estimate total tokens from the request's estimate. */
|
|
200
|
+
private _estimateTokens;
|
|
201
|
+
private _onExpired;
|
|
202
|
+
private _recordDeny;
|
|
203
|
+
private _emit;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export { type AcquireDecision as A, type ConcurrencyConfig as C, type DenyReason as D, type FairnessConfig as F, type GovernorConfig as G, type LeaseOutcome as L, type Priority as P, type RateConfig as R, type TokenEstimate as T, Governor as a, type AcquireRequest as b, type GovernorEvent as c, type GovernorSnapshot as d, type AdaptiveConfig as e, type Constraints as f, type GovernorEventHandler as g, type GovernorEventType as h, type LimitsHint as i, type ReleaseReport as j };
|