@ekaone/llm-gate 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +345 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +116 -0
- package/dist/index.d.ts +116 -0
- package/dist/index.mjs +2 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +66 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Eka Prasetia <ekaone3033@gmail.com> (https://prasetia.me)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# @ekaone/llm-gate
|
|
2
|
+
|
|
3
|
+
[](https://res.cloudinary.com/ddjsyskef/image/upload/v1774001133/Github/gqyyegcemo7sc2eggson.png)
|
|
4
|
+
|
|
5
|
+
> Lightweight LLM budget & token guard. Prevents **Denial of Wallet** attacks with a zero-dependency circuit-breaker state machine.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Why
|
|
10
|
+
|
|
11
|
+
AI agents can silently burn through your API credits if they enter infinite loops or receive malicious prompts designed to trigger excessive token consumption. `@ekaone/llm-gate` wraps every LLM call with a stateful circuit breaker that tracks token usage, cost, and request frequency — then trips before the damage is done.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
OPEN ──(80% threshold)──► THROTTLED ──(100% limit)──► TRIPPED
|
|
15
|
+
▲ │
|
|
16
|
+
└──────────────────(windowMs elapsed)───────────────────┘
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npm install @ekaone/llm-gate
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pnpm install @ekaone/llm-gate
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
yarn install @ekaone/llm-gate
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
```ts
|
|
40
|
+
import { createGate } from "@ekaone/llm-gate"
|
|
41
|
+
|
|
42
|
+
const gate = createGate({
|
|
43
|
+
maxTokens: 50_000, // trip at 50k tokens per window
|
|
44
|
+
maxBudget: 0.10, // trip at $0.10 USD per window
|
|
45
|
+
maxRequests: 100, // trip at 100 requests per window
|
|
46
|
+
windowMs: 60_000, // 1 minute sliding window
|
|
47
|
+
|
|
48
|
+
onThrottled: (status) => console.warn("⚠️ Approaching limit", status.tokens),
|
|
49
|
+
onTripped: (status) => console.error("🚫 Gate tripped!", status.reason),
|
|
50
|
+
onReset: (status) => console.log("✅ Gate reset, window fresh"),
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
// After every LLM response — feed usage back into the gate
|
|
54
|
+
gate.record({
|
|
55
|
+
model: "claude-sonnet-4-20250514",
|
|
56
|
+
inputTokens: 312,
|
|
57
|
+
outputTokens: 89,
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
// Before the next LLM call — check the gate
|
|
61
|
+
const status = gate.check()
|
|
62
|
+
if (!status.allowed) {
|
|
63
|
+
console.log(`Blocked. Resets at ${status.resets.toLocaleTimeString()}`)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Or throw-style for agent pipelines
|
|
67
|
+
gate.guard() // throws BudgetExceededError if TRIPPED
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## API
|
|
73
|
+
|
|
74
|
+
### `createGate(options)`
|
|
75
|
+
|
|
76
|
+
Creates a new gate instance. At least one of `maxTokens`, `maxBudget`, or `maxRequests` is required.
|
|
77
|
+
|
|
78
|
+
```ts
|
|
79
|
+
const gate = createGate(options: GateOptions): GateInstance
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### `GateOptions`
|
|
83
|
+
|
|
84
|
+
| Option | Type | Default | Description |
|
|
85
|
+
|---|---|---|---|
|
|
86
|
+
| `maxTokens` | `number` | — | Max total tokens (input + output) per window |
|
|
87
|
+
| `maxBudget` | `number` | — | Max cost in USD per window |
|
|
88
|
+
| `maxRequests` | `number` | — | Max LLM calls per window |
|
|
89
|
+
| `windowMs` | `number` | `60_000` | Window duration in milliseconds |
|
|
90
|
+
| `throttleAt` | `number` | `0.8` | Fraction of limit that triggers `THROTTLED` (0.0–1.0) |
|
|
91
|
+
| `pricing` | `PricingTable` | built-in | Custom model pricing — merged over defaults |
|
|
92
|
+
| `onThrottled` | `(status) => void` | — | Fires once on entry to `THROTTLED` state |
|
|
93
|
+
| `onTripped` | `(status) => void` | — | Fires once on entry to `TRIPPED` state |
|
|
94
|
+
| `onReset` | `(status) => void` | — | Fires when window resets or `reset()` is called |
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
### `gate.record(usage)`
|
|
99
|
+
|
|
100
|
+
Feed token usage from an LLM response back into the gate. Call this after every successful LLM response.
|
|
101
|
+
|
|
102
|
+
```ts
|
|
103
|
+
gate.record({
|
|
104
|
+
model: "claude-sonnet-4-20250514",
|
|
105
|
+
inputTokens: 312,
|
|
106
|
+
outputTokens: 89,
|
|
107
|
+
})
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
For convenience, use the built-in adapters to map provider responses directly:
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
import { fromAnthropic, fromOpenAI, fromResponse } from "@ekaone/llm-gate"
|
|
114
|
+
|
|
115
|
+
// Anthropic
|
|
116
|
+
gate.record(fromAnthropic(anthropicResponse))
|
|
117
|
+
|
|
118
|
+
// OpenAI
|
|
119
|
+
gate.record(fromOpenAI(openaiResponse))
|
|
120
|
+
|
|
121
|
+
// Auto-detect provider from response shape
|
|
122
|
+
gate.record(fromResponse(anyResponse))
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
### `gate.check()`
|
|
128
|
+
|
|
129
|
+
Returns the current `GateStatus` — never throws.
|
|
130
|
+
|
|
131
|
+
```ts
|
|
132
|
+
const status = gate.check()
|
|
133
|
+
|
|
134
|
+
// status.state → "OPEN" | "THROTTLED" | "TRIPPED"
|
|
135
|
+
// status.allowed → boolean
|
|
136
|
+
// status.reason → string | null
|
|
137
|
+
// status.tokens → { used, remaining, limit }
|
|
138
|
+
// status.budget → { used, remaining, limit }
|
|
139
|
+
// status.requests → { used, remaining, limit }
|
|
140
|
+
// status.resets → Date
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
#### Example status when TRIPPED
|
|
144
|
+
|
|
145
|
+
```ts
|
|
146
|
+
{
|
|
147
|
+
state: "TRIPPED",
|
|
148
|
+
allowed: false,
|
|
149
|
+
reason: "token_limit_exceeded",
|
|
150
|
+
tokens: { used: 51_200, remaining: 0, limit: 50_000 },
|
|
151
|
+
budget: { used: 0.094, remaining: 0.006, limit: 0.10 },
|
|
152
|
+
requests: { used: 87, remaining: 13, limit: 100 },
|
|
153
|
+
resets: Date <2026-03-20T09:01:00Z>
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
### `gate.guard()`
|
|
160
|
+
|
|
161
|
+
Check and throw `BudgetExceededError` if the gate is `TRIPPED`. Ideal for agent pipelines where you want to bail early.
|
|
162
|
+
|
|
163
|
+
```ts
|
|
164
|
+
try {
|
|
165
|
+
gate.guard()
|
|
166
|
+
const response = await llm.call(prompt)
|
|
167
|
+
gate.record(fromAnthropic(response))
|
|
168
|
+
} catch (err) {
|
|
169
|
+
if (err instanceof BudgetExceededError) {
|
|
170
|
+
console.log(err.reason) // "token_limit_exceeded"
|
|
171
|
+
console.log(err.resets) // Date
|
|
172
|
+
console.log(err.snapshot) // full GateStatus at time of trip
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
### `gate.snapshot()`
|
|
180
|
+
|
|
181
|
+
Read-only view of current state. Identical to `check()` but semantically signals "I'm just observing."
|
|
182
|
+
|
|
183
|
+
```ts
|
|
184
|
+
const snap = gate.snapshot()
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
### `gate.reset()`
|
|
190
|
+
|
|
191
|
+
Manually reset the gate to `OPEN` and clear all counters. Useful for new user sessions or test teardown.
|
|
192
|
+
|
|
193
|
+
```ts
|
|
194
|
+
gate.reset()
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Built-in Adapters
|
|
200
|
+
|
|
201
|
+
Tree-shakeable — only bundled if imported.
|
|
202
|
+
|
|
203
|
+
```ts
|
|
204
|
+
import { fromAnthropic } from "@ekaone/llm-gate" // Anthropic only
|
|
205
|
+
import { fromOpenAI } from "@ekaone/llm-gate" // OpenAI only
|
|
206
|
+
import { fromResponse } from "@ekaone/llm-gate" // auto-detect
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Custom Pricing
|
|
212
|
+
|
|
213
|
+
The built-in pricing table covers common Anthropic and OpenAI models. Override or extend it via the `pricing` option:
|
|
214
|
+
|
|
215
|
+
```ts
|
|
216
|
+
const gate = createGate({
|
|
217
|
+
maxBudget: 1.00,
|
|
218
|
+
pricing: {
|
|
219
|
+
"my-fine-tuned-model": {
|
|
220
|
+
inputPerToken: 0.000005,
|
|
221
|
+
outputPerToken: 0.000015,
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
})
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
Custom entries are **merged** over the defaults — you only need to specify models you want to override.
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Real Use Cases
|
|
232
|
+
|
|
233
|
+
### 1. Autonomous agent loop guard
|
|
234
|
+
|
|
235
|
+
```ts
|
|
236
|
+
const gate = createGate({
|
|
237
|
+
maxTokens: 200_000,
|
|
238
|
+
maxBudget: 0.50,
|
|
239
|
+
onTripped: (s) => alertOps("Agent loop detected", s),
|
|
240
|
+
})
|
|
241
|
+
|
|
242
|
+
while (agentHasWork()) {
|
|
243
|
+
gate.guard() // bail if budget blown
|
|
244
|
+
const res = await agent.step()
|
|
245
|
+
gate.record(fromAnthropic(res))
|
|
246
|
+
}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### 2. Per-user session budget
|
|
250
|
+
|
|
251
|
+
```ts
|
|
252
|
+
const sessions = new Map<string, ReturnType<typeof createGate>>()
|
|
253
|
+
|
|
254
|
+
function getGate(userId: string) {
|
|
255
|
+
if (!sessions.has(userId)) {
|
|
256
|
+
sessions.set(userId, createGate({
|
|
257
|
+
maxTokens: 20_000,
|
|
258
|
+
windowMs: 24 * 60 * 60 * 1000, // 24h
|
|
259
|
+
}))
|
|
260
|
+
}
|
|
261
|
+
return sessions.get(userId)!
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// In your chat handler
|
|
265
|
+
const gate = getGate(req.userId)
|
|
266
|
+
const status = gate.check()
|
|
267
|
+
if (!status.allowed) {
|
|
268
|
+
return res.status(429).json({
|
|
269
|
+
error: "Daily limit reached",
|
|
270
|
+
resets: status.resets,
|
|
271
|
+
})
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### 3. Multi-step RAG pipeline
|
|
276
|
+
|
|
277
|
+
```ts
|
|
278
|
+
const gate = createGate({ maxTokens: 10_000, maxRequests: 5 })
|
|
279
|
+
|
|
280
|
+
const retrieved = await retrieve(query); gate.record(fromAnthropic(retrieved))
|
|
281
|
+
const reranked = await rerank(retrieved); gate.record(fromAnthropic(reranked))
|
|
282
|
+
const summarized = await summarize(reranked); gate.record(fromAnthropic(summarized))
|
|
283
|
+
|
|
284
|
+
gate.guard() // only answer if still within budget
|
|
285
|
+
const answer = await answer(summarized, query); gate.record(fromAnthropic(answer))
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### 4. Dev/test budget cap
|
|
289
|
+
|
|
290
|
+
```ts
|
|
291
|
+
// vitest setup
|
|
292
|
+
import { createGate } from "@ekaone/llm-gate"
|
|
293
|
+
|
|
294
|
+
export const testGate = createGate({
|
|
295
|
+
maxBudget: 0.05, // $0.05 max spend per test run
|
|
296
|
+
onTripped: () => { throw new Error("Test suite exceeded LLM budget!") }
|
|
297
|
+
})
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
## How Token Counting Works
|
|
303
|
+
|
|
304
|
+
`@ekaone/llm-gate` does **not** tokenize text. It reads the `usage` field that every LLM provider returns in the response — this is the authoritative count from the model itself.
|
|
305
|
+
|
|
306
|
+
```
|
|
307
|
+
You set maxTokens: 50_000
|
|
308
|
+
↓
|
|
309
|
+
LLM call happens (gate doesn't intercept this)
|
|
310
|
+
↓
|
|
311
|
+
API returns usage → { input_tokens: 312, output_tokens: 89 }
|
|
312
|
+
↓
|
|
313
|
+
gate.record(...) → tokensUsed += 312 + 89 = 401
|
|
314
|
+
↓
|
|
315
|
+
gate.check() → 401 < 50_000 → OPEN ✅
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
This means the gate cannot prevent a single oversized request — it stops the **next** call after the limit is hit. This is a deliberate tradeoff: no tokenizer dependency, works across all providers, zero overhead.
|
|
319
|
+
|
|
320
|
+
---
|
|
321
|
+
|
|
322
|
+
## TypeScript
|
|
323
|
+
|
|
324
|
+
Fully typed. All types are exported:
|
|
325
|
+
|
|
326
|
+
```ts
|
|
327
|
+
import type {
|
|
328
|
+
GateOptions,
|
|
329
|
+
GateInstance,
|
|
330
|
+
GateStatus,
|
|
331
|
+
GateMetric,
|
|
332
|
+
CircuitState,
|
|
333
|
+
TripReason,
|
|
334
|
+
ThrottleReason,
|
|
335
|
+
UsageRecord,
|
|
336
|
+
ModelPricing,
|
|
337
|
+
PricingTable,
|
|
338
|
+
} from "@ekaone/llm-gate"
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
---
|
|
342
|
+
|
|
343
|
+
## License
|
|
344
|
+
|
|
345
|
+
MIT © [ekaone](https://github.com/ekaone)
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
'use strict';var f=class extends Error{constructor(t){super(`LLM gate tripped: ${t.reason??"limit_exceeded"}. Resets at ${t.resets.toISOString()}.`),this.name="BudgetExceededError",this.reason=t.reason,this.resets=t.resets,this.snapshot=t,Object.setPrototypeOf(this,new.target.prototype);}};var R={"claude-opus-4-20250514":{inputPerToken:15e-6,outputPerToken:75e-6},"claude-sonnet-4-20250514":{inputPerToken:3e-6,outputPerToken:15e-6},"claude-haiku-4-5-20251001":{inputPerToken:8e-7,outputPerToken:4e-6},"gpt-4o":{inputPerToken:25e-7,outputPerToken:1e-5},"gpt-4o-mini":{inputPerToken:15e-8,outputPerToken:6e-7},"gpt-4-turbo":{inputPerToken:1e-5,outputPerToken:3e-5},o3:{inputPerToken:1e-5,outputPerToken:4e-5},"o4-mini":{inputPerToken:11e-7,outputPerToken:44e-7}};function k(e,t,s,u){let c=u[e];return c?t*c.inputPerToken+s*c.outputPerToken:0}var S=6e4,G=.8,m=1/0;function P(e,t){return {used:e,remaining:t===1/0?1/0:Math.max(0,t-e),limit:t===1/0?-1:t}}function w(e={}){let t=e.maxTokens??m,s=e.maxBudget??m,u=e.maxRequests??m,c=e.windowMs??S,T=e.throttleAt??G,A={...R,...e.pricing};if(t===m&&s===m&&u===m)throw new Error("[llm-gate] At least one limit must be set: maxTokens, maxBudget, or maxRequests.");let i=0,a=0,d=0,o="OPEN",l=Date.now();function g(){let n=Date.now();if(n-l>=c){let r=o;i=0,a=0,d=0,o="OPEN",l=n,r!=="OPEN"&&e.onReset?.(p());}}function p(){let n=new Date(l+c),r=null;return o==="TRIPPED"?i>=t?r="token_limit_exceeded":a>=s?r="budget_limit_exceeded":r="request_limit_exceeded":o==="THROTTLED"&&(i>=t*T?r="approaching_token_limit":a>=s*T?r="approaching_budget_limit":r="approaching_request_limit"),{state:o,allowed:o!=="TRIPPED",reason:r,tokens:P(i,t),budget:P(a,s),requests:P(d,u),resets:n}}function _(){let n=o;if(i>=t||a>=s||d>=u){o="TRIPPED",n!=="TRIPPED"&&e.onTripped?.(p());return}if(i>=t*T||a>=s*T||d>=u*T){o="THROTTLED",n!=="THROTTLED"&&e.onThrottled?.(p());return}o="OPEN";}function I(n){g();let r=k(n.model,n.inputTokens,n.outputTokens,A);i+=n.inputTokens+n.outputTokens,a+=r,d+=1,_();}function y(){return g(),p()}function O(){g();let n=p();if(n.state==="TRIPPED")throw new f(n)}function E(){return p()}function b(){i=0,a=0,d=0,o="OPEN",l=Date.now(),e.onReset?.(p());}return {record:I,check:y,guard:O,snapshot:E,reset:b}}function x(e){return {model:e.model,inputTokens:e.usage.input_tokens,outputTokens:e.usage.output_tokens}}function h(e){return {model:e.model,inputTokens:e.usage.prompt_tokens,outputTokens:e.usage.completion_tokens}}function D(e){return "input_tokens"in e.usage}function L(e){return D(e)?x(e):h(e)}exports.BudgetExceededError=f;exports.createGate=w;exports.defaultPricing=R;exports.fromAnthropic=x;exports.fromOpenAI=h;exports.fromResponse=L;exports.resolveCost=k;//# sourceMappingURL=index.cjs.map
|
|
2
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/errors/index.ts","../src/pricing/index.ts","../src/gate.ts","../src/adapters/index.ts"],"names":["BudgetExceededError","status","defaultPricing","resolveCost","model","inputTokens","outputTokens","pricing","entry","DEFAULT_WINDOW_MS","DEFAULT_THROTTLE","SENTINEL_LIMIT","makeMetric","used","limit","createGate","options","maxTokens","maxBudget","maxRequests","windowMs","throttleAt","tokensUsed","budgetUsed","requestCount","state","windowStart","checkWindowReset","now","prevState","buildStatus","resets","reason","evaluateState","record","usage","cost","check","guard","snapshot","reset","fromAnthropic","response","fromOpenAI","isAnthropic","r","fromResponse"],"mappings":"aAEO,IAAMA,CAAAA,CAAN,cAAkC,KAAM,CAK7C,WAAA,CAAYC,CAAAA,CAAoB,CAC9B,KAAA,CACE,CAAA,kBAAA,EAAqBA,CAAAA,CAAO,MAAA,EAAU,gBAAgB,CAAA,YAAA,EACzCA,CAAAA,CAAO,MAAA,CAAO,WAAA,EAAa,CAAA,CAAA,CAC1C,CAAA,CACA,IAAA,CAAK,IAAA,CAAO,qBAAA,CACZ,IAAA,CAAK,MAAA,CAASA,CAAAA,CAAO,MAAA,CACrB,IAAA,CAAK,MAAA,CAASA,EAAO,MAAA,CACrB,IAAA,CAAK,QAAA,CAAWA,CAAAA,CAGhB,MAAA,CAAO,cAAA,CAAe,IAAA,CAAM,GAAA,CAAA,MAAA,CAAW,SAAS,EAClD,CACF,ECXO,IAAMC,CAAAA,CAA+B,CAE1C,wBAAA,CAA0B,CACxB,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,KAClB,CAAA,CACA,0BAAA,CAA4B,CAC1B,aAAA,CAAe,IAAA,CACf,cAAA,CAAgB,KAClB,CAAA,CACA,2BAAA,CAA6B,CAC3B,aAAA,CAAe,KACf,cAAA,CAAgB,IAClB,CAAA,CAGA,QAAA,CAAU,CACR,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,aAAA,CAAe,CACb,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,aAAA,CAAe,CACb,aAAA,CAAe,IAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,EAAA,CAAI,CACF,aAAA,CAAe,IAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,SAAA,CAAW,CACT,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,KAClB,CACF,EAMO,SAASC,CAAAA,CACdC,CAAAA,CACAC,CAAAA,CACAC,CAAAA,CACAC,CAAAA,CACQ,CACR,IAAMC,CAAAA,CAAQD,CAAAA,CAAQH,CAAK,CAAA,CAC3B,OAAKI,CAAAA,CAEHH,CAAAA,CAAcG,CAAAA,CAAM,aAAA,CAAgBF,CAAAA,CAAeE,CAAAA,CAAM,cAAA,CAFxC,CAIrB,CCjDA,IAAMC,CAAAA,CAAoB,GAAA,CACpBC,CAAAA,CAAmB,GACnBC,CAAAA,CAAiB,CAAA,CAAA,CAAA,CAEvB,SAASC,CAAAA,CAAWC,CAAAA,CAAcC,CAAAA,CAA2B,CAC3D,OAAO,CACL,IAAA,CAAAD,CAAAA,CACA,SAAA,CAAWC,CAAAA,GAAU,CAAA,CAAA,CAAA,CAAW,CAAA,CAAA,CAAA,CAAW,IAAA,CAAK,GAAA,CAAI,CAAA,CAAGA,CAAAA,CAAQD,CAAI,CAAA,CACnE,KAAA,CAAOC,CAAAA,GAAU,CAAA,CAAA,CAAA,CAAW,EAAA,CAAKA,CACnC,CACF,CAkBO,SAASC,CAAAA,CAAWC,CAAAA,CAAuB,GAAkB,CAElE,IAAMC,CAAAA,CAAYD,CAAAA,CAAQ,SAAA,EAAaL,CAAAA,CACjCO,CAAAA,CAAYF,CAAAA,CAAQ,SAAA,EAAaL,CAAAA,CACjCQ,CAAAA,CAAcH,CAAAA,CAAQ,WAAA,EAAeL,CAAAA,CACrCS,CAAAA,CAAWJ,CAAAA,CAAQ,QAAA,EAAYP,CAAAA,CAC/BY,CAAAA,CAAaL,CAAAA,CAAQ,UAAA,EAAcN,CAAAA,CACnCH,CAAAA,CAAU,CAAE,GAAGL,CAAAA,CAAgB,GAAGc,CAAAA,CAAQ,OAAQ,CAAA,CAExD,GACEC,CAAAA,GAAcN,GACdO,CAAAA,GAAcP,CAAAA,EACdQ,CAAAA,GAAgBR,CAAAA,CAEhB,MAAM,IAAI,KAAA,CACR,kFACF,CAAA,CAIF,IAAIW,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAe,CAAA,CACfC,CAAAA,CAAsB,MAAA,CACtBC,CAAAA,CAAc,IAAA,CAAK,GAAA,EAAI,CAG3B,SAASC,CAAAA,EAAyB,CAChC,IAAMC,CAAAA,CAAM,IAAA,CAAK,GAAA,EAAI,CACrB,GAAIA,CAAAA,CAAMF,GAAeN,CAAAA,CAAU,CACjC,IAAMS,CAAAA,CAAYJ,CAAAA,CAClBH,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAe,CAAA,CACfC,CAAAA,CAAQ,MAAA,CACRC,CAAAA,CAAcE,CAAAA,CAEVC,CAAAA,GAAc,QAChBb,CAAAA,CAAQ,OAAA,GAAUc,CAAAA,EAAa,EAEnC,CACF,CAGA,SAASA,CAAAA,EAA0B,CACjC,IAAMC,CAAAA,CAAS,IAAI,IAAA,CAAKL,CAAAA,CAAcN,CAAQ,EAE1CY,CAAAA,CAAsC,IAAA,CAE1C,OAAIP,CAAAA,GAAU,SAAA,CACRH,CAAAA,EAAcL,CAAAA,CAAWe,CAAAA,CAAS,sBAAA,CAC7BT,CAAAA,EAAcL,CAAAA,CAAWc,CAAAA,CAAS,uBAAA,CACtCA,CAAAA,CAAS,wBAAA,CACLP,CAAAA,GAAU,WAAA,GACfH,CAAAA,EAAcL,CAAAA,CAAYI,CAAAA,CAC5BW,CAAAA,CAAS,yBAAA,CACFT,CAAAA,EAAcL,CAAAA,CAAYG,CAAAA,CACjCW,CAAAA,CAAS,0BAAA,CACNA,CAAAA,CAAS,2BAAA,CAAA,CAGT,CACL,KAAA,CAAAP,CAAAA,CACA,QAASA,CAAAA,GAAU,SAAA,CACnB,MAAA,CAAAO,CAAAA,CACA,MAAA,CAAQpB,CAAAA,CAAWU,CAAAA,CAAYL,CAAS,CAAA,CACxC,MAAA,CAAQL,CAAAA,CAAWW,CAAAA,CAAYL,CAAS,CAAA,CACxC,QAAA,CAAUN,CAAAA,CAAWY,CAAAA,CAAcL,CAAW,CAAA,CAC9C,MAAA,CAAAY,CACF,CACF,CAGA,SAASE,CAAAA,EAAsB,CAC7B,IAAMJ,CAAAA,CAAYJ,CAAAA,CAGlB,GACEH,CAAAA,EAAcL,CAAAA,EACdM,GAAcL,CAAAA,EACdM,CAAAA,EAAgBL,CAAAA,CAChB,CACAM,CAAAA,CAAQ,SAAA,CACJI,CAAAA,GAAc,SAAA,EAChBb,CAAAA,CAAQ,SAAA,GAAYc,CAAAA,EAAa,CAAA,CAEnC,MACF,CAGA,GACER,CAAAA,EAAcL,CAAAA,CAAYI,CAAAA,EAC1BE,CAAAA,EAAcL,CAAAA,CAAYG,CAAAA,EAC1BG,CAAAA,EAAgBL,CAAAA,CAAcE,CAAAA,CAC9B,CACAI,CAAAA,CAAQ,WAAA,CACJI,CAAAA,GAAc,WAAA,EAChBb,CAAAA,CAAQ,WAAA,GAAcc,GAAa,CAAA,CAErC,MACF,CAEAL,CAAAA,CAAQ,OACV,CAIA,SAASS,CAAAA,CAAOC,CAAAA,CAA0B,CACxCR,CAAAA,EAAiB,CAEjB,IAAMS,CAAAA,CAAOjC,CAAAA,CACXgC,CAAAA,CAAM,KAAA,CACNA,CAAAA,CAAM,WAAA,CACNA,CAAAA,CAAM,YAAA,CACN5B,CACF,CAAA,CAEAe,CAAAA,EAAca,CAAAA,CAAM,WAAA,CAAcA,CAAAA,CAAM,YAAA,CACxCZ,CAAAA,EAAca,CAAAA,CACdZ,CAAAA,EAAgB,EAEhBS,CAAAA,GACF,CAEA,SAASI,CAAAA,EAAoB,CAC3B,OAAAV,CAAAA,EAAiB,CACVG,CAAAA,EACT,CAEA,SAASQ,CAAAA,EAAc,CACrBX,CAAAA,EAAiB,CACjB,IAAM1B,CAAAA,CAAS6B,CAAAA,EAAY,CAC3B,GAAI7B,CAAAA,CAAO,KAAA,GAAU,SAAA,CACnB,MAAM,IAAID,CAAAA,CAAoBC,CAAM,CAExC,CAEA,SAASsC,GAAuB,CAC9B,OAAOT,CAAAA,EACT,CAEA,SAASU,CAAAA,EAAc,CACrBlB,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAe,CAAA,CACfC,CAAAA,CAAQ,MAAA,CACRC,CAAAA,CAAc,IAAA,CAAK,GAAA,EAAI,CACvBV,CAAAA,CAAQ,OAAA,GAAUc,CAAAA,EAAa,EACjC,CAEA,OAAO,CAAE,MAAA,CAAAI,CAAAA,CAAQ,KAAA,CAAAG,CAAAA,CAAO,MAAAC,CAAAA,CAAO,QAAA,CAAAC,CAAAA,CAAU,KAAA,CAAAC,CAAM,CACjD,CCnLO,SAASC,CAAAA,CAAcC,CAAAA,CAA0C,CACtE,OAAO,CACL,KAAA,CAAOA,CAAAA,CAAS,KAAA,CAChB,WAAA,CAAaA,CAAAA,CAAS,KAAA,CAAM,YAAA,CAC5B,YAAA,CAAcA,CAAAA,CAAS,KAAA,CAAM,aAC/B,CACF,CAaO,SAASC,CAAAA,CAAWD,CAAAA,CAAuC,CAChE,OAAO,CACL,MAAOA,CAAAA,CAAS,KAAA,CAChB,WAAA,CAAaA,CAAAA,CAAS,KAAA,CAAM,aAAA,CAC5B,YAAA,CAAcA,CAAAA,CAAS,KAAA,CAAM,iBAC/B,CACF,CAKA,SAASE,CAAAA,CAAYC,CAAAA,CAA2C,CAC9D,OAAO,cAAA,GAAmBA,CAAAA,CAAwB,KACpD,CAMO,SAASC,CAAAA,CAAaJ,CAAAA,CAAuC,CAClE,OAAIE,CAAAA,CAAYF,CAAQ,CAAA,CAAUD,CAAAA,CAAcC,CAAQ,CAAA,CACjDC,CAAAA,CAAWD,CAA0B,CAC9C","file":"index.cjs","sourcesContent":["import type { GateStatus, TripReason } from \"../types/index.js\"\n\nexport class BudgetExceededError extends Error {\n readonly reason: TripReason\n readonly resets: Date\n readonly snapshot: GateStatus\n\n constructor(status: GateStatus) {\n super(\n `LLM gate tripped: ${status.reason ?? \"limit_exceeded\"}. ` +\n `Resets at ${status.resets.toISOString()}.`\n )\n this.name = \"BudgetExceededError\"\n this.reason = status.reason as TripReason\n this.resets = status.resets\n this.snapshot = status\n\n // Maintain proper prototype chain in transpiled environments\n Object.setPrototypeOf(this, new.target.prototype)\n }\n}\n","import type { PricingTable } from \"../types/index.js\";\n\n/**\n * Default pricing table for common models.\n * Prices are per token in USD.\n * Users can override via GateOptions.pricing.\n *\n * Last updated: 2026-03\n */\nexport const defaultPricing: PricingTable = {\n // Anthropic\n \"claude-opus-4-20250514\": {\n inputPerToken: 0.000015,\n outputPerToken: 0.000075,\n },\n \"claude-sonnet-4-20250514\": {\n inputPerToken: 0.000003,\n outputPerToken: 0.000015,\n },\n \"claude-haiku-4-5-20251001\": {\n inputPerToken: 0.0000008,\n outputPerToken: 0.000004,\n },\n\n // OpenAI\n \"gpt-4o\": {\n inputPerToken: 0.0000025,\n outputPerToken: 0.00001,\n },\n \"gpt-4o-mini\": {\n inputPerToken: 0.00000015,\n outputPerToken: 0.0000006,\n },\n \"gpt-4-turbo\": {\n inputPerToken: 0.00001,\n outputPerToken: 0.00003,\n },\n o3: {\n inputPerToken: 0.00001,\n outputPerToken: 0.00004,\n },\n \"o4-mini\": {\n inputPerToken: 0.0000011,\n outputPerToken: 0.0000044,\n },\n};\n\n/**\n * Resolve cost for a given model and token counts.\n * Falls back to 0 if model is not found in the table.\n */\nexport function resolveCost(\n model: string,\n inputTokens: number,\n outputTokens: number,\n pricing: PricingTable,\n): number {\n const entry = pricing[model];\n if (!entry) return 0;\n return (\n inputTokens * entry.inputPerToken + outputTokens * entry.outputPerToken\n );\n}\n","import { BudgetExceededError } from \"./errors/index.js\";\nimport { defaultPricing, resolveCost } from \"./pricing/index.js\";\nimport type {\n CircuitState,\n GateInstance,\n GateMetric,\n GateOptions,\n GateStatus,\n ThrottleReason,\n TripReason,\n UsageRecord,\n} from \"./types/index.js\";\n\nconst DEFAULT_WINDOW_MS = 60_000; // 1 minute\nconst DEFAULT_THROTTLE = 0.8; // 80% of limit triggers THROTTLED\nconst SENTINEL_LIMIT = Infinity; // when a dimension is not configured\n\nfunction makeMetric(used: number, limit: number): GateMetric {\n return {\n used,\n remaining: limit === Infinity ? Infinity : Math.max(0, limit - used),\n limit: limit === Infinity ? -1 : limit,\n };\n}\n\n/**\n * createGate — lightweight LLM budget & token guard.\n *\n * @example\n * ```ts\n * const gate = createGate({\n * maxTokens: 5000,\n * maxBudget: 0.10,\n * maxRequests: 100,\n * windowMs: 60_000,\n * onThrottled: (s) => console.warn(\"Throttled\", s.tokens),\n * onTripped: (s) => console.error(\"Tripped!\", s.reason),\n * onReset: (s) => console.log(\"Gate reset\"),\n * })\n * ```\n */\nexport function createGate(options: GateOptions = {}): GateInstance {\n // Config\n const maxTokens = options.maxTokens ?? SENTINEL_LIMIT;\n const maxBudget = options.maxBudget ?? SENTINEL_LIMIT;\n const maxRequests = options.maxRequests ?? SENTINEL_LIMIT;\n const windowMs = options.windowMs ?? DEFAULT_WINDOW_MS;\n const throttleAt = options.throttleAt ?? DEFAULT_THROTTLE;\n const pricing = { ...defaultPricing, ...options.pricing };\n\n if (\n maxTokens === SENTINEL_LIMIT &&\n maxBudget === SENTINEL_LIMIT &&\n maxRequests === SENTINEL_LIMIT\n ) {\n throw new Error(\n \"[llm-gate] At least one limit must be set: maxTokens, maxBudget, or maxRequests.\",\n );\n }\n\n // Internal State\n let tokensUsed = 0;\n let budgetUsed = 0;\n let requestCount = 0;\n let state: CircuitState = \"OPEN\";\n let windowStart = Date.now();\n\n // Window Management\n function checkWindowReset(): void {\n const now = Date.now();\n if (now - windowStart >= windowMs) {\n const prevState = state;\n tokensUsed = 0;\n budgetUsed = 0;\n requestCount = 0;\n state = \"OPEN\";\n windowStart = now;\n\n if (prevState !== \"OPEN\") {\n options.onReset?.(buildStatus());\n }\n }\n }\n\n // Status Builder\n function buildStatus(): GateStatus {\n const resets = new Date(windowStart + windowMs);\n\n let reason: TripReason | ThrottleReason = null;\n\n if (state === \"TRIPPED\") {\n if (tokensUsed >= maxTokens) reason = \"token_limit_exceeded\";\n else if (budgetUsed >= maxBudget) reason = \"budget_limit_exceeded\";\n else reason = \"request_limit_exceeded\";\n } else if (state === \"THROTTLED\") {\n if (tokensUsed >= maxTokens * throttleAt)\n reason = \"approaching_token_limit\";\n else if (budgetUsed >= maxBudget * throttleAt)\n reason = \"approaching_budget_limit\";\n else reason = \"approaching_request_limit\";\n }\n\n return {\n state,\n allowed: state !== \"TRIPPED\",\n reason,\n tokens: makeMetric(tokensUsed, maxTokens),\n budget: makeMetric(budgetUsed, maxBudget),\n requests: makeMetric(requestCount, maxRequests),\n resets,\n };\n }\n\n // State Machine\n function evaluateState(): void {\n const prevState = state;\n\n // Check TRIPPED first — hard limits\n if (\n tokensUsed >= maxTokens ||\n budgetUsed >= maxBudget ||\n requestCount >= maxRequests\n ) {\n state = \"TRIPPED\";\n if (prevState !== \"TRIPPED\") {\n options.onTripped?.(buildStatus());\n }\n return;\n }\n\n // Check THROTTLED — soft warning threshold\n if (\n tokensUsed >= maxTokens * throttleAt ||\n budgetUsed >= maxBudget * throttleAt ||\n requestCount >= maxRequests * throttleAt\n ) {\n state = \"THROTTLED\";\n if (prevState !== \"THROTTLED\") {\n options.onThrottled?.(buildStatus());\n }\n return;\n }\n\n state = \"OPEN\";\n }\n\n // Public API\n\n function record(usage: UsageRecord): void {\n checkWindowReset();\n\n const cost = resolveCost(\n usage.model,\n usage.inputTokens,\n usage.outputTokens,\n pricing,\n );\n\n tokensUsed += usage.inputTokens + usage.outputTokens;\n budgetUsed += cost;\n requestCount += 1;\n\n evaluateState();\n }\n\n function check(): GateStatus {\n checkWindowReset();\n return buildStatus();\n }\n\n function guard(): void {\n checkWindowReset();\n const status = buildStatus();\n if (status.state === \"TRIPPED\") {\n throw new BudgetExceededError(status);\n }\n }\n\n function snapshot(): GateStatus {\n return buildStatus();\n }\n\n function reset(): void {\n tokensUsed = 0;\n budgetUsed = 0;\n requestCount = 0;\n state = \"OPEN\";\n windowStart = Date.now();\n options.onReset?.(buildStatus());\n }\n\n return { record, check, guard, snapshot, reset };\n}\n","import type { UsageRecord } from \"../types/index.js\";\n\n// Anthropic\nexport interface AnthropicUsage {\n input_tokens: number;\n output_tokens: number;\n}\n\nexport interface AnthropicResponse {\n model: string;\n usage: AnthropicUsage;\n}\n\nexport function fromAnthropic(response: AnthropicResponse): UsageRecord {\n return {\n model: response.model,\n inputTokens: response.usage.input_tokens,\n outputTokens: response.usage.output_tokens,\n };\n}\n\n// OpenAI\nexport interface OpenAIUsage {\n prompt_tokens: number;\n completion_tokens: number;\n}\n\nexport interface OpenAIResponse {\n model: string;\n usage: OpenAIUsage;\n}\n\nexport function fromOpenAI(response: OpenAIResponse): UsageRecord {\n return {\n model: response.model,\n inputTokens: response.usage.prompt_tokens,\n outputTokens: response.usage.completion_tokens,\n };\n}\n\n// Auto-detect\ntype AnyLLMResponse = AnthropicResponse | OpenAIResponse;\n\nfunction isAnthropic(r: AnyLLMResponse): r is AnthropicResponse {\n return \"input_tokens\" in (r as AnthropicResponse).usage;\n}\n\n/**\n * Auto-detect provider from response shape and normalize to UsageRecord.\n * Supports Anthropic and OpenAI response formats.\n */\nexport function fromResponse(response: AnyLLMResponse): UsageRecord {\n if (isAnthropic(response)) return fromAnthropic(response);\n return fromOpenAI(response as OpenAIResponse);\n}\n"]}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
type CircuitState = "OPEN" | "THROTTLED" | "TRIPPED";
|
|
2
|
+
type TripReason = "token_limit_exceeded" | "budget_limit_exceeded" | "request_limit_exceeded" | null;
|
|
3
|
+
type ThrottleReason = "approaching_token_limit" | "approaching_budget_limit" | "approaching_request_limit" | null;
|
|
4
|
+
interface GateMetric {
|
|
5
|
+
used: number;
|
|
6
|
+
remaining: number;
|
|
7
|
+
limit: number;
|
|
8
|
+
}
|
|
9
|
+
interface GateStatus {
|
|
10
|
+
state: CircuitState;
|
|
11
|
+
allowed: boolean;
|
|
12
|
+
reason: TripReason | ThrottleReason;
|
|
13
|
+
tokens: GateMetric;
|
|
14
|
+
budget: GateMetric;
|
|
15
|
+
requests: GateMetric;
|
|
16
|
+
resets: Date;
|
|
17
|
+
}
|
|
18
|
+
interface UsageRecord {
|
|
19
|
+
model: string;
|
|
20
|
+
inputTokens: number;
|
|
21
|
+
outputTokens: number;
|
|
22
|
+
}
|
|
23
|
+
interface ModelPricing {
|
|
24
|
+
inputPerToken: number;
|
|
25
|
+
outputPerToken: number;
|
|
26
|
+
}
|
|
27
|
+
type PricingTable = Record<string, ModelPricing>;
|
|
28
|
+
interface GateOptions {
|
|
29
|
+
maxTokens?: number;
|
|
30
|
+
maxBudget?: number;
|
|
31
|
+
maxRequests?: number;
|
|
32
|
+
windowMs?: number;
|
|
33
|
+
throttleAt?: number;
|
|
34
|
+
pricing?: PricingTable;
|
|
35
|
+
onThrottled?: (status: GateStatus) => void;
|
|
36
|
+
onTripped?: (status: GateStatus) => void;
|
|
37
|
+
onReset?: (status: GateStatus) => void;
|
|
38
|
+
}
|
|
39
|
+
interface GateInstance {
|
|
40
|
+
/** Record token usage from an LLM response */
|
|
41
|
+
record: (usage: UsageRecord) => void;
|
|
42
|
+
/** Check current gate status — never throws */
|
|
43
|
+
check: () => GateStatus;
|
|
44
|
+
/** Check and throw BudgetExceededError if TRIPPED */
|
|
45
|
+
guard: () => void;
|
|
46
|
+
/** Read-only snapshot of current state */
|
|
47
|
+
snapshot: () => GateStatus;
|
|
48
|
+
/** Manually reset gate to OPEN state */
|
|
49
|
+
reset: () => void;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* createGate — lightweight LLM budget & token guard.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```ts
|
|
57
|
+
* const gate = createGate({
|
|
58
|
+
* maxTokens: 5000,
|
|
59
|
+
* maxBudget: 0.10,
|
|
60
|
+
* maxRequests: 100,
|
|
61
|
+
* windowMs: 60_000,
|
|
62
|
+
* onThrottled: (s) => console.warn("Throttled", s.tokens),
|
|
63
|
+
* onTripped: (s) => console.error("Tripped!", s.reason),
|
|
64
|
+
* onReset: (s) => console.log("Gate reset"),
|
|
65
|
+
* })
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
declare function createGate(options?: GateOptions): GateInstance;
|
|
69
|
+
|
|
70
|
+
declare class BudgetExceededError extends Error {
|
|
71
|
+
readonly reason: TripReason;
|
|
72
|
+
readonly resets: Date;
|
|
73
|
+
readonly snapshot: GateStatus;
|
|
74
|
+
constructor(status: GateStatus);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
interface AnthropicUsage {
|
|
78
|
+
input_tokens: number;
|
|
79
|
+
output_tokens: number;
|
|
80
|
+
}
|
|
81
|
+
interface AnthropicResponse {
|
|
82
|
+
model: string;
|
|
83
|
+
usage: AnthropicUsage;
|
|
84
|
+
}
|
|
85
|
+
declare function fromAnthropic(response: AnthropicResponse): UsageRecord;
|
|
86
|
+
interface OpenAIUsage {
|
|
87
|
+
prompt_tokens: number;
|
|
88
|
+
completion_tokens: number;
|
|
89
|
+
}
|
|
90
|
+
interface OpenAIResponse {
|
|
91
|
+
model: string;
|
|
92
|
+
usage: OpenAIUsage;
|
|
93
|
+
}
|
|
94
|
+
declare function fromOpenAI(response: OpenAIResponse): UsageRecord;
|
|
95
|
+
type AnyLLMResponse = AnthropicResponse | OpenAIResponse;
|
|
96
|
+
/**
|
|
97
|
+
* Auto-detect provider from response shape and normalize to UsageRecord.
|
|
98
|
+
* Supports Anthropic and OpenAI response formats.
|
|
99
|
+
*/
|
|
100
|
+
declare function fromResponse(response: AnyLLMResponse): UsageRecord;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Default pricing table for common models.
|
|
104
|
+
* Prices are per token in USD.
|
|
105
|
+
* Users can override via GateOptions.pricing.
|
|
106
|
+
*
|
|
107
|
+
* Last updated: 2026-03
|
|
108
|
+
*/
|
|
109
|
+
declare const defaultPricing: PricingTable;
|
|
110
|
+
/**
|
|
111
|
+
* Resolve cost for a given model and token counts.
|
|
112
|
+
* Falls back to 0 if model is not found in the table.
|
|
113
|
+
*/
|
|
114
|
+
declare function resolveCost(model: string, inputTokens: number, outputTokens: number, pricing: PricingTable): number;
|
|
115
|
+
|
|
116
|
+
export { type AnthropicResponse, BudgetExceededError, type CircuitState, type GateInstance, type GateMetric, type GateOptions, type GateStatus, type ModelPricing, type OpenAIResponse, type PricingTable, type ThrottleReason, type TripReason, type UsageRecord, createGate, defaultPricing, fromAnthropic, fromOpenAI, fromResponse, resolveCost };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
type CircuitState = "OPEN" | "THROTTLED" | "TRIPPED";
|
|
2
|
+
type TripReason = "token_limit_exceeded" | "budget_limit_exceeded" | "request_limit_exceeded" | null;
|
|
3
|
+
type ThrottleReason = "approaching_token_limit" | "approaching_budget_limit" | "approaching_request_limit" | null;
|
|
4
|
+
interface GateMetric {
|
|
5
|
+
used: number;
|
|
6
|
+
remaining: number;
|
|
7
|
+
limit: number;
|
|
8
|
+
}
|
|
9
|
+
interface GateStatus {
|
|
10
|
+
state: CircuitState;
|
|
11
|
+
allowed: boolean;
|
|
12
|
+
reason: TripReason | ThrottleReason;
|
|
13
|
+
tokens: GateMetric;
|
|
14
|
+
budget: GateMetric;
|
|
15
|
+
requests: GateMetric;
|
|
16
|
+
resets: Date;
|
|
17
|
+
}
|
|
18
|
+
interface UsageRecord {
|
|
19
|
+
model: string;
|
|
20
|
+
inputTokens: number;
|
|
21
|
+
outputTokens: number;
|
|
22
|
+
}
|
|
23
|
+
interface ModelPricing {
|
|
24
|
+
inputPerToken: number;
|
|
25
|
+
outputPerToken: number;
|
|
26
|
+
}
|
|
27
|
+
type PricingTable = Record<string, ModelPricing>;
|
|
28
|
+
interface GateOptions {
|
|
29
|
+
maxTokens?: number;
|
|
30
|
+
maxBudget?: number;
|
|
31
|
+
maxRequests?: number;
|
|
32
|
+
windowMs?: number;
|
|
33
|
+
throttleAt?: number;
|
|
34
|
+
pricing?: PricingTable;
|
|
35
|
+
onThrottled?: (status: GateStatus) => void;
|
|
36
|
+
onTripped?: (status: GateStatus) => void;
|
|
37
|
+
onReset?: (status: GateStatus) => void;
|
|
38
|
+
}
|
|
39
|
+
interface GateInstance {
|
|
40
|
+
/** Record token usage from an LLM response */
|
|
41
|
+
record: (usage: UsageRecord) => void;
|
|
42
|
+
/** Check current gate status — never throws */
|
|
43
|
+
check: () => GateStatus;
|
|
44
|
+
/** Check and throw BudgetExceededError if TRIPPED */
|
|
45
|
+
guard: () => void;
|
|
46
|
+
/** Read-only snapshot of current state */
|
|
47
|
+
snapshot: () => GateStatus;
|
|
48
|
+
/** Manually reset gate to OPEN state */
|
|
49
|
+
reset: () => void;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* createGate — lightweight LLM budget & token guard.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```ts
|
|
57
|
+
* const gate = createGate({
|
|
58
|
+
* maxTokens: 5000,
|
|
59
|
+
* maxBudget: 0.10,
|
|
60
|
+
* maxRequests: 100,
|
|
61
|
+
* windowMs: 60_000,
|
|
62
|
+
* onThrottled: (s) => console.warn("Throttled", s.tokens),
|
|
63
|
+
* onTripped: (s) => console.error("Tripped!", s.reason),
|
|
64
|
+
* onReset: (s) => console.log("Gate reset"),
|
|
65
|
+
* })
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
declare function createGate(options?: GateOptions): GateInstance;
|
|
69
|
+
|
|
70
|
+
declare class BudgetExceededError extends Error {
|
|
71
|
+
readonly reason: TripReason;
|
|
72
|
+
readonly resets: Date;
|
|
73
|
+
readonly snapshot: GateStatus;
|
|
74
|
+
constructor(status: GateStatus);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
interface AnthropicUsage {
|
|
78
|
+
input_tokens: number;
|
|
79
|
+
output_tokens: number;
|
|
80
|
+
}
|
|
81
|
+
interface AnthropicResponse {
|
|
82
|
+
model: string;
|
|
83
|
+
usage: AnthropicUsage;
|
|
84
|
+
}
|
|
85
|
+
declare function fromAnthropic(response: AnthropicResponse): UsageRecord;
|
|
86
|
+
interface OpenAIUsage {
|
|
87
|
+
prompt_tokens: number;
|
|
88
|
+
completion_tokens: number;
|
|
89
|
+
}
|
|
90
|
+
interface OpenAIResponse {
|
|
91
|
+
model: string;
|
|
92
|
+
usage: OpenAIUsage;
|
|
93
|
+
}
|
|
94
|
+
declare function fromOpenAI(response: OpenAIResponse): UsageRecord;
|
|
95
|
+
type AnyLLMResponse = AnthropicResponse | OpenAIResponse;
|
|
96
|
+
/**
|
|
97
|
+
* Auto-detect provider from response shape and normalize to UsageRecord.
|
|
98
|
+
* Supports Anthropic and OpenAI response formats.
|
|
99
|
+
*/
|
|
100
|
+
declare function fromResponse(response: AnyLLMResponse): UsageRecord;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Default pricing table for common models.
|
|
104
|
+
* Prices are per token in USD.
|
|
105
|
+
* Users can override via GateOptions.pricing.
|
|
106
|
+
*
|
|
107
|
+
* Last updated: 2026-03
|
|
108
|
+
*/
|
|
109
|
+
declare const defaultPricing: PricingTable;
|
|
110
|
+
/**
|
|
111
|
+
* Resolve cost for a given model and token counts.
|
|
112
|
+
* Falls back to 0 if model is not found in the table.
|
|
113
|
+
*/
|
|
114
|
+
declare function resolveCost(model: string, inputTokens: number, outputTokens: number, pricing: PricingTable): number;
|
|
115
|
+
|
|
116
|
+
export { type AnthropicResponse, BudgetExceededError, type CircuitState, type GateInstance, type GateMetric, type GateOptions, type GateStatus, type ModelPricing, type OpenAIResponse, type PricingTable, type ThrottleReason, type TripReason, type UsageRecord, createGate, defaultPricing, fromAnthropic, fromOpenAI, fromResponse, resolveCost };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
var f=class extends Error{constructor(t){super(`LLM gate tripped: ${t.reason??"limit_exceeded"}. Resets at ${t.resets.toISOString()}.`),this.name="BudgetExceededError",this.reason=t.reason,this.resets=t.resets,this.snapshot=t,Object.setPrototypeOf(this,new.target.prototype);}};var R={"claude-opus-4-20250514":{inputPerToken:15e-6,outputPerToken:75e-6},"claude-sonnet-4-20250514":{inputPerToken:3e-6,outputPerToken:15e-6},"claude-haiku-4-5-20251001":{inputPerToken:8e-7,outputPerToken:4e-6},"gpt-4o":{inputPerToken:25e-7,outputPerToken:1e-5},"gpt-4o-mini":{inputPerToken:15e-8,outputPerToken:6e-7},"gpt-4-turbo":{inputPerToken:1e-5,outputPerToken:3e-5},o3:{inputPerToken:1e-5,outputPerToken:4e-5},"o4-mini":{inputPerToken:11e-7,outputPerToken:44e-7}};function k(e,t,s,u){let c=u[e];return c?t*c.inputPerToken+s*c.outputPerToken:0}var S=6e4,G=.8,m=1/0;function P(e,t){return {used:e,remaining:t===1/0?1/0:Math.max(0,t-e),limit:t===1/0?-1:t}}function w(e={}){let t=e.maxTokens??m,s=e.maxBudget??m,u=e.maxRequests??m,c=e.windowMs??S,T=e.throttleAt??G,A={...R,...e.pricing};if(t===m&&s===m&&u===m)throw new Error("[llm-gate] At least one limit must be set: maxTokens, maxBudget, or maxRequests.");let i=0,a=0,d=0,o="OPEN",l=Date.now();function g(){let n=Date.now();if(n-l>=c){let r=o;i=0,a=0,d=0,o="OPEN",l=n,r!=="OPEN"&&e.onReset?.(p());}}function p(){let n=new Date(l+c),r=null;return o==="TRIPPED"?i>=t?r="token_limit_exceeded":a>=s?r="budget_limit_exceeded":r="request_limit_exceeded":o==="THROTTLED"&&(i>=t*T?r="approaching_token_limit":a>=s*T?r="approaching_budget_limit":r="approaching_request_limit"),{state:o,allowed:o!=="TRIPPED",reason:r,tokens:P(i,t),budget:P(a,s),requests:P(d,u),resets:n}}function _(){let n=o;if(i>=t||a>=s||d>=u){o="TRIPPED",n!=="TRIPPED"&&e.onTripped?.(p());return}if(i>=t*T||a>=s*T||d>=u*T){o="THROTTLED",n!=="THROTTLED"&&e.onThrottled?.(p());return}o="OPEN";}function I(n){g();let r=k(n.model,n.inputTokens,n.outputTokens,A);i+=n.inputTokens+n.outputTokens,a+=r,d+=1,_();}function y(){return g(),p()}function O(){g();let n=p();if(n.state==="TRIPPED")throw new f(n)}function E(){return p()}function b(){i=0,a=0,d=0,o="OPEN",l=Date.now(),e.onReset?.(p());}return {record:I,check:y,guard:O,snapshot:E,reset:b}}function x(e){return {model:e.model,inputTokens:e.usage.input_tokens,outputTokens:e.usage.output_tokens}}function h(e){return {model:e.model,inputTokens:e.usage.prompt_tokens,outputTokens:e.usage.completion_tokens}}function D(e){return "input_tokens"in e.usage}function L(e){return D(e)?x(e):h(e)}export{f as BudgetExceededError,w as createGate,R as defaultPricing,x as fromAnthropic,h as fromOpenAI,L as fromResponse,k as resolveCost};//# sourceMappingURL=index.mjs.map
|
|
2
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/errors/index.ts","../src/pricing/index.ts","../src/gate.ts","../src/adapters/index.ts"],"names":["BudgetExceededError","status","defaultPricing","resolveCost","model","inputTokens","outputTokens","pricing","entry","DEFAULT_WINDOW_MS","DEFAULT_THROTTLE","SENTINEL_LIMIT","makeMetric","used","limit","createGate","options","maxTokens","maxBudget","maxRequests","windowMs","throttleAt","tokensUsed","budgetUsed","requestCount","state","windowStart","checkWindowReset","now","prevState","buildStatus","resets","reason","evaluateState","record","usage","cost","check","guard","snapshot","reset","fromAnthropic","response","fromOpenAI","isAnthropic","r","fromResponse"],"mappings":"AAEO,IAAMA,CAAAA,CAAN,cAAkC,KAAM,CAK7C,WAAA,CAAYC,CAAAA,CAAoB,CAC9B,KAAA,CACE,CAAA,kBAAA,EAAqBA,CAAAA,CAAO,MAAA,EAAU,gBAAgB,CAAA,YAAA,EACzCA,CAAAA,CAAO,MAAA,CAAO,WAAA,EAAa,CAAA,CAAA,CAC1C,CAAA,CACA,IAAA,CAAK,IAAA,CAAO,qBAAA,CACZ,IAAA,CAAK,MAAA,CAASA,CAAAA,CAAO,MAAA,CACrB,IAAA,CAAK,MAAA,CAASA,EAAO,MAAA,CACrB,IAAA,CAAK,QAAA,CAAWA,CAAAA,CAGhB,MAAA,CAAO,cAAA,CAAe,IAAA,CAAM,GAAA,CAAA,MAAA,CAAW,SAAS,EAClD,CACF,ECXO,IAAMC,CAAAA,CAA+B,CAE1C,wBAAA,CAA0B,CACxB,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,KAClB,CAAA,CACA,0BAAA,CAA4B,CAC1B,aAAA,CAAe,IAAA,CACf,cAAA,CAAgB,KAClB,CAAA,CACA,2BAAA,CAA6B,CAC3B,aAAA,CAAe,KACf,cAAA,CAAgB,IAClB,CAAA,CAGA,QAAA,CAAU,CACR,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,aAAA,CAAe,CACb,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,aAAA,CAAe,CACb,aAAA,CAAe,IAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,EAAA,CAAI,CACF,aAAA,CAAe,IAAA,CACf,cAAA,CAAgB,IAClB,CAAA,CACA,SAAA,CAAW,CACT,aAAA,CAAe,KAAA,CACf,cAAA,CAAgB,KAClB,CACF,EAMO,SAASC,CAAAA,CACdC,CAAAA,CACAC,CAAAA,CACAC,CAAAA,CACAC,CAAAA,CACQ,CACR,IAAMC,CAAAA,CAAQD,CAAAA,CAAQH,CAAK,CAAA,CAC3B,OAAKI,CAAAA,CAEHH,CAAAA,CAAcG,CAAAA,CAAM,aAAA,CAAgBF,CAAAA,CAAeE,CAAAA,CAAM,cAAA,CAFxC,CAIrB,CCjDA,IAAMC,CAAAA,CAAoB,GAAA,CACpBC,CAAAA,CAAmB,GACnBC,CAAAA,CAAiB,CAAA,CAAA,CAAA,CAEvB,SAASC,CAAAA,CAAWC,CAAAA,CAAcC,CAAAA,CAA2B,CAC3D,OAAO,CACL,IAAA,CAAAD,CAAAA,CACA,SAAA,CAAWC,CAAAA,GAAU,CAAA,CAAA,CAAA,CAAW,CAAA,CAAA,CAAA,CAAW,IAAA,CAAK,GAAA,CAAI,CAAA,CAAGA,CAAAA,CAAQD,CAAI,CAAA,CACnE,KAAA,CAAOC,CAAAA,GAAU,CAAA,CAAA,CAAA,CAAW,EAAA,CAAKA,CACnC,CACF,CAkBO,SAASC,CAAAA,CAAWC,CAAAA,CAAuB,GAAkB,CAElE,IAAMC,CAAAA,CAAYD,CAAAA,CAAQ,SAAA,EAAaL,CAAAA,CACjCO,CAAAA,CAAYF,CAAAA,CAAQ,SAAA,EAAaL,CAAAA,CACjCQ,CAAAA,CAAcH,CAAAA,CAAQ,WAAA,EAAeL,CAAAA,CACrCS,CAAAA,CAAWJ,CAAAA,CAAQ,QAAA,EAAYP,CAAAA,CAC/BY,CAAAA,CAAaL,CAAAA,CAAQ,UAAA,EAAcN,CAAAA,CACnCH,CAAAA,CAAU,CAAE,GAAGL,CAAAA,CAAgB,GAAGc,CAAAA,CAAQ,OAAQ,CAAA,CAExD,GACEC,CAAAA,GAAcN,GACdO,CAAAA,GAAcP,CAAAA,EACdQ,CAAAA,GAAgBR,CAAAA,CAEhB,MAAM,IAAI,KAAA,CACR,kFACF,CAAA,CAIF,IAAIW,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAe,CAAA,CACfC,CAAAA,CAAsB,MAAA,CACtBC,CAAAA,CAAc,IAAA,CAAK,GAAA,EAAI,CAG3B,SAASC,CAAAA,EAAyB,CAChC,IAAMC,CAAAA,CAAM,IAAA,CAAK,GAAA,EAAI,CACrB,GAAIA,CAAAA,CAAMF,GAAeN,CAAAA,CAAU,CACjC,IAAMS,CAAAA,CAAYJ,CAAAA,CAClBH,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAe,CAAA,CACfC,CAAAA,CAAQ,MAAA,CACRC,CAAAA,CAAcE,CAAAA,CAEVC,CAAAA,GAAc,QAChBb,CAAAA,CAAQ,OAAA,GAAUc,CAAAA,EAAa,EAEnC,CACF,CAGA,SAASA,CAAAA,EAA0B,CACjC,IAAMC,CAAAA,CAAS,IAAI,IAAA,CAAKL,CAAAA,CAAcN,CAAQ,EAE1CY,CAAAA,CAAsC,IAAA,CAE1C,OAAIP,CAAAA,GAAU,SAAA,CACRH,CAAAA,EAAcL,CAAAA,CAAWe,CAAAA,CAAS,sBAAA,CAC7BT,CAAAA,EAAcL,CAAAA,CAAWc,CAAAA,CAAS,uBAAA,CACtCA,CAAAA,CAAS,wBAAA,CACLP,CAAAA,GAAU,WAAA,GACfH,CAAAA,EAAcL,CAAAA,CAAYI,CAAAA,CAC5BW,CAAAA,CAAS,yBAAA,CACFT,CAAAA,EAAcL,CAAAA,CAAYG,CAAAA,CACjCW,CAAAA,CAAS,0BAAA,CACNA,CAAAA,CAAS,2BAAA,CAAA,CAGT,CACL,KAAA,CAAAP,CAAAA,CACA,QAASA,CAAAA,GAAU,SAAA,CACnB,MAAA,CAAAO,CAAAA,CACA,MAAA,CAAQpB,CAAAA,CAAWU,CAAAA,CAAYL,CAAS,CAAA,CACxC,MAAA,CAAQL,CAAAA,CAAWW,CAAAA,CAAYL,CAAS,CAAA,CACxC,QAAA,CAAUN,CAAAA,CAAWY,CAAAA,CAAcL,CAAW,CAAA,CAC9C,MAAA,CAAAY,CACF,CACF,CAGA,SAASE,CAAAA,EAAsB,CAC7B,IAAMJ,CAAAA,CAAYJ,CAAAA,CAGlB,GACEH,CAAAA,EAAcL,CAAAA,EACdM,GAAcL,CAAAA,EACdM,CAAAA,EAAgBL,CAAAA,CAChB,CACAM,CAAAA,CAAQ,SAAA,CACJI,CAAAA,GAAc,SAAA,EAChBb,CAAAA,CAAQ,SAAA,GAAYc,CAAAA,EAAa,CAAA,CAEnC,MACF,CAGA,GACER,CAAAA,EAAcL,CAAAA,CAAYI,CAAAA,EAC1BE,CAAAA,EAAcL,CAAAA,CAAYG,CAAAA,EAC1BG,CAAAA,EAAgBL,CAAAA,CAAcE,CAAAA,CAC9B,CACAI,CAAAA,CAAQ,WAAA,CACJI,CAAAA,GAAc,WAAA,EAChBb,CAAAA,CAAQ,WAAA,GAAcc,GAAa,CAAA,CAErC,MACF,CAEAL,CAAAA,CAAQ,OACV,CAIA,SAASS,CAAAA,CAAOC,CAAAA,CAA0B,CACxCR,CAAAA,EAAiB,CAEjB,IAAMS,CAAAA,CAAOjC,CAAAA,CACXgC,CAAAA,CAAM,KAAA,CACNA,CAAAA,CAAM,WAAA,CACNA,CAAAA,CAAM,YAAA,CACN5B,CACF,CAAA,CAEAe,CAAAA,EAAca,CAAAA,CAAM,WAAA,CAAcA,CAAAA,CAAM,YAAA,CACxCZ,CAAAA,EAAca,CAAAA,CACdZ,CAAAA,EAAgB,EAEhBS,CAAAA,GACF,CAEA,SAASI,CAAAA,EAAoB,CAC3B,OAAAV,CAAAA,EAAiB,CACVG,CAAAA,EACT,CAEA,SAASQ,CAAAA,EAAc,CACrBX,CAAAA,EAAiB,CACjB,IAAM1B,CAAAA,CAAS6B,CAAAA,EAAY,CAC3B,GAAI7B,CAAAA,CAAO,KAAA,GAAU,SAAA,CACnB,MAAM,IAAID,CAAAA,CAAoBC,CAAM,CAExC,CAEA,SAASsC,GAAuB,CAC9B,OAAOT,CAAAA,EACT,CAEA,SAASU,CAAAA,EAAc,CACrBlB,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAa,CAAA,CACbC,CAAAA,CAAe,CAAA,CACfC,CAAAA,CAAQ,MAAA,CACRC,CAAAA,CAAc,IAAA,CAAK,GAAA,EAAI,CACvBV,CAAAA,CAAQ,OAAA,GAAUc,CAAAA,EAAa,EACjC,CAEA,OAAO,CAAE,MAAA,CAAAI,CAAAA,CAAQ,KAAA,CAAAG,CAAAA,CAAO,MAAAC,CAAAA,CAAO,QAAA,CAAAC,CAAAA,CAAU,KAAA,CAAAC,CAAM,CACjD,CCnLO,SAASC,CAAAA,CAAcC,CAAAA,CAA0C,CACtE,OAAO,CACL,KAAA,CAAOA,CAAAA,CAAS,KAAA,CAChB,WAAA,CAAaA,CAAAA,CAAS,KAAA,CAAM,YAAA,CAC5B,YAAA,CAAcA,CAAAA,CAAS,KAAA,CAAM,aAC/B,CACF,CAaO,SAASC,CAAAA,CAAWD,CAAAA,CAAuC,CAChE,OAAO,CACL,MAAOA,CAAAA,CAAS,KAAA,CAChB,WAAA,CAAaA,CAAAA,CAAS,KAAA,CAAM,aAAA,CAC5B,YAAA,CAAcA,CAAAA,CAAS,KAAA,CAAM,iBAC/B,CACF,CAKA,SAASE,CAAAA,CAAYC,CAAAA,CAA2C,CAC9D,OAAO,cAAA,GAAmBA,CAAAA,CAAwB,KACpD,CAMO,SAASC,CAAAA,CAAaJ,CAAAA,CAAuC,CAClE,OAAIE,CAAAA,CAAYF,CAAQ,CAAA,CAAUD,CAAAA,CAAcC,CAAQ,CAAA,CACjDC,CAAAA,CAAWD,CAA0B,CAC9C","file":"index.mjs","sourcesContent":["import type { GateStatus, TripReason } from \"../types/index.js\"\n\nexport class BudgetExceededError extends Error {\n readonly reason: TripReason\n readonly resets: Date\n readonly snapshot: GateStatus\n\n constructor(status: GateStatus) {\n super(\n `LLM gate tripped: ${status.reason ?? \"limit_exceeded\"}. ` +\n `Resets at ${status.resets.toISOString()}.`\n )\n this.name = \"BudgetExceededError\"\n this.reason = status.reason as TripReason\n this.resets = status.resets\n this.snapshot = status\n\n // Maintain proper prototype chain in transpiled environments\n Object.setPrototypeOf(this, new.target.prototype)\n }\n}\n","import type { PricingTable } from \"../types/index.js\";\n\n/**\n * Default pricing table for common models.\n * Prices are per token in USD.\n * Users can override via GateOptions.pricing.\n *\n * Last updated: 2026-03\n */\nexport const defaultPricing: PricingTable = {\n // Anthropic\n \"claude-opus-4-20250514\": {\n inputPerToken: 0.000015,\n outputPerToken: 0.000075,\n },\n \"claude-sonnet-4-20250514\": {\n inputPerToken: 0.000003,\n outputPerToken: 0.000015,\n },\n \"claude-haiku-4-5-20251001\": {\n inputPerToken: 0.0000008,\n outputPerToken: 0.000004,\n },\n\n // OpenAI\n \"gpt-4o\": {\n inputPerToken: 0.0000025,\n outputPerToken: 0.00001,\n },\n \"gpt-4o-mini\": {\n inputPerToken: 0.00000015,\n outputPerToken: 0.0000006,\n },\n \"gpt-4-turbo\": {\n inputPerToken: 0.00001,\n outputPerToken: 0.00003,\n },\n o3: {\n inputPerToken: 0.00001,\n outputPerToken: 0.00004,\n },\n \"o4-mini\": {\n inputPerToken: 0.0000011,\n outputPerToken: 0.0000044,\n },\n};\n\n/**\n * Resolve cost for a given model and token counts.\n * Falls back to 0 if model is not found in the table.\n */\nexport function resolveCost(\n model: string,\n inputTokens: number,\n outputTokens: number,\n pricing: PricingTable,\n): number {\n const entry = pricing[model];\n if (!entry) return 0;\n return (\n inputTokens * entry.inputPerToken + outputTokens * entry.outputPerToken\n );\n}\n","import { BudgetExceededError } from \"./errors/index.js\";\nimport { defaultPricing, resolveCost } from \"./pricing/index.js\";\nimport type {\n CircuitState,\n GateInstance,\n GateMetric,\n GateOptions,\n GateStatus,\n ThrottleReason,\n TripReason,\n UsageRecord,\n} from \"./types/index.js\";\n\nconst DEFAULT_WINDOW_MS = 60_000; // 1 minute\nconst DEFAULT_THROTTLE = 0.8; // 80% of limit triggers THROTTLED\nconst SENTINEL_LIMIT = Infinity; // when a dimension is not configured\n\nfunction makeMetric(used: number, limit: number): GateMetric {\n return {\n used,\n remaining: limit === Infinity ? Infinity : Math.max(0, limit - used),\n limit: limit === Infinity ? -1 : limit,\n };\n}\n\n/**\n * createGate — lightweight LLM budget & token guard.\n *\n * @example\n * ```ts\n * const gate = createGate({\n * maxTokens: 5000,\n * maxBudget: 0.10,\n * maxRequests: 100,\n * windowMs: 60_000,\n * onThrottled: (s) => console.warn(\"Throttled\", s.tokens),\n * onTripped: (s) => console.error(\"Tripped!\", s.reason),\n * onReset: (s) => console.log(\"Gate reset\"),\n * })\n * ```\n */\nexport function createGate(options: GateOptions = {}): GateInstance {\n // Config\n const maxTokens = options.maxTokens ?? SENTINEL_LIMIT;\n const maxBudget = options.maxBudget ?? SENTINEL_LIMIT;\n const maxRequests = options.maxRequests ?? SENTINEL_LIMIT;\n const windowMs = options.windowMs ?? DEFAULT_WINDOW_MS;\n const throttleAt = options.throttleAt ?? DEFAULT_THROTTLE;\n const pricing = { ...defaultPricing, ...options.pricing };\n\n if (\n maxTokens === SENTINEL_LIMIT &&\n maxBudget === SENTINEL_LIMIT &&\n maxRequests === SENTINEL_LIMIT\n ) {\n throw new Error(\n \"[llm-gate] At least one limit must be set: maxTokens, maxBudget, or maxRequests.\",\n );\n }\n\n // Internal State\n let tokensUsed = 0;\n let budgetUsed = 0;\n let requestCount = 0;\n let state: CircuitState = \"OPEN\";\n let windowStart = Date.now();\n\n // Window Management\n function checkWindowReset(): void {\n const now = Date.now();\n if (now - windowStart >= windowMs) {\n const prevState = state;\n tokensUsed = 0;\n budgetUsed = 0;\n requestCount = 0;\n state = \"OPEN\";\n windowStart = now;\n\n if (prevState !== \"OPEN\") {\n options.onReset?.(buildStatus());\n }\n }\n }\n\n // Status Builder\n function buildStatus(): GateStatus {\n const resets = new Date(windowStart + windowMs);\n\n let reason: TripReason | ThrottleReason = null;\n\n if (state === \"TRIPPED\") {\n if (tokensUsed >= maxTokens) reason = \"token_limit_exceeded\";\n else if (budgetUsed >= maxBudget) reason = \"budget_limit_exceeded\";\n else reason = \"request_limit_exceeded\";\n } else if (state === \"THROTTLED\") {\n if (tokensUsed >= maxTokens * throttleAt)\n reason = \"approaching_token_limit\";\n else if (budgetUsed >= maxBudget * throttleAt)\n reason = \"approaching_budget_limit\";\n else reason = \"approaching_request_limit\";\n }\n\n return {\n state,\n allowed: state !== \"TRIPPED\",\n reason,\n tokens: makeMetric(tokensUsed, maxTokens),\n budget: makeMetric(budgetUsed, maxBudget),\n requests: makeMetric(requestCount, maxRequests),\n resets,\n };\n }\n\n // State Machine\n function evaluateState(): void {\n const prevState = state;\n\n // Check TRIPPED first — hard limits\n if (\n tokensUsed >= maxTokens ||\n budgetUsed >= maxBudget ||\n requestCount >= maxRequests\n ) {\n state = \"TRIPPED\";\n if (prevState !== \"TRIPPED\") {\n options.onTripped?.(buildStatus());\n }\n return;\n }\n\n // Check THROTTLED — soft warning threshold\n if (\n tokensUsed >= maxTokens * throttleAt ||\n budgetUsed >= maxBudget * throttleAt ||\n requestCount >= maxRequests * throttleAt\n ) {\n state = \"THROTTLED\";\n if (prevState !== \"THROTTLED\") {\n options.onThrottled?.(buildStatus());\n }\n return;\n }\n\n state = \"OPEN\";\n }\n\n // Public API\n\n function record(usage: UsageRecord): void {\n checkWindowReset();\n\n const cost = resolveCost(\n usage.model,\n usage.inputTokens,\n usage.outputTokens,\n pricing,\n );\n\n tokensUsed += usage.inputTokens + usage.outputTokens;\n budgetUsed += cost;\n requestCount += 1;\n\n evaluateState();\n }\n\n function check(): GateStatus {\n checkWindowReset();\n return buildStatus();\n }\n\n function guard(): void {\n checkWindowReset();\n const status = buildStatus();\n if (status.state === \"TRIPPED\") {\n throw new BudgetExceededError(status);\n }\n }\n\n function snapshot(): GateStatus {\n return buildStatus();\n }\n\n function reset(): void {\n tokensUsed = 0;\n budgetUsed = 0;\n requestCount = 0;\n state = \"OPEN\";\n windowStart = Date.now();\n options.onReset?.(buildStatus());\n }\n\n return { record, check, guard, snapshot, reset };\n}\n","import type { UsageRecord } from \"../types/index.js\";\n\n// Anthropic\nexport interface AnthropicUsage {\n input_tokens: number;\n output_tokens: number;\n}\n\nexport interface AnthropicResponse {\n model: string;\n usage: AnthropicUsage;\n}\n\nexport function fromAnthropic(response: AnthropicResponse): UsageRecord {\n return {\n model: response.model,\n inputTokens: response.usage.input_tokens,\n outputTokens: response.usage.output_tokens,\n };\n}\n\n// OpenAI\nexport interface OpenAIUsage {\n prompt_tokens: number;\n completion_tokens: number;\n}\n\nexport interface OpenAIResponse {\n model: string;\n usage: OpenAIUsage;\n}\n\nexport function fromOpenAI(response: OpenAIResponse): UsageRecord {\n return {\n model: response.model,\n inputTokens: response.usage.prompt_tokens,\n outputTokens: response.usage.completion_tokens,\n };\n}\n\n// Auto-detect\ntype AnyLLMResponse = AnthropicResponse | OpenAIResponse;\n\nfunction isAnthropic(r: AnyLLMResponse): r is AnthropicResponse {\n return \"input_tokens\" in (r as AnthropicResponse).usage;\n}\n\n/**\n * Auto-detect provider from response shape and normalize to UsageRecord.\n * Supports Anthropic and OpenAI response formats.\n */\nexport function fromResponse(response: AnyLLMResponse): UsageRecord {\n if (isAnthropic(response)) return fromAnthropic(response);\n return fromOpenAI(response as OpenAIResponse);\n}\n"]}
|
package/package.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ekaone/llm-gate",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "Lightweight LLM budget & token guard. Prevents Denial of Wallet attacks with a circuit-breaker state machine.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"llm",
|
|
7
|
+
"token",
|
|
8
|
+
"budget",
|
|
9
|
+
"guard",
|
|
10
|
+
"circuit-breaker",
|
|
11
|
+
"ai",
|
|
12
|
+
"anthropic",
|
|
13
|
+
"openai",
|
|
14
|
+
"rate-limit",
|
|
15
|
+
"agent"
|
|
16
|
+
],
|
|
17
|
+
"author": {
|
|
18
|
+
"name": "Eka Prasetia",
|
|
19
|
+
"email": "ekaone3033@gmail.com",
|
|
20
|
+
"url": "https://prasetia.me"
|
|
21
|
+
},
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "https://github.com/ekaone/llm-gate.git"
|
|
26
|
+
},
|
|
27
|
+
"type": "module",
|
|
28
|
+
"main": "./dist/index.cjs",
|
|
29
|
+
"module": "./dist/index.mjs",
|
|
30
|
+
"types": "./dist/index.d.ts",
|
|
31
|
+
"exports": {
|
|
32
|
+
".": {
|
|
33
|
+
"types": "./dist/index.d.ts",
|
|
34
|
+
"import": "./dist/index.mjs",
|
|
35
|
+
"require": "./dist/index.cjs"
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"publishConfig": {
|
|
39
|
+
"access": "public"
|
|
40
|
+
},
|
|
41
|
+
"files": [
|
|
42
|
+
"dist",
|
|
43
|
+
"README.md",
|
|
44
|
+
"LICENSE"
|
|
45
|
+
],
|
|
46
|
+
"sideEffects": false,
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"@types/node": "^25.5.0",
|
|
49
|
+
"@vitest/coverage-v8": "^4.1.0",
|
|
50
|
+
"@vitest/ui": "^4.1.0",
|
|
51
|
+
"rimraf": "^6.1.3",
|
|
52
|
+
"tsup": "^8.5.1",
|
|
53
|
+
"typescript": "^5.9.3",
|
|
54
|
+
"vitest": "^4.1.0"
|
|
55
|
+
},
|
|
56
|
+
"scripts": {
|
|
57
|
+
"build": "tsup",
|
|
58
|
+
"dev": "tsup --watch",
|
|
59
|
+
"clean": "rimraf dist",
|
|
60
|
+
"typecheck": "tsc --noEmit",
|
|
61
|
+
"test": "vitest run",
|
|
62
|
+
"test:watch": "vitest",
|
|
63
|
+
"test:ui": "vitest --ui",
|
|
64
|
+
"test:coverage": "vitest run --coverage"
|
|
65
|
+
}
|
|
66
|
+
}
|