@tktideai/ai-api-cost-guard 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +360 -0
- package/dist/errors.d.ts +14 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +14 -0
- package/dist/errors.js.map +1 -0
- package/dist/guard.d.ts +99 -0
- package/dist/guard.d.ts.map +1 -0
- package/dist/guard.js +310 -0
- package/dist/guard.js.map +1 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +11 -0
- package/dist/index.js.map +1 -0
- package/dist/notifiers/email.d.ts +36 -0
- package/dist/notifiers/email.d.ts.map +1 -0
- package/dist/notifiers/email.js +284 -0
- package/dist/notifiers/email.js.map +1 -0
- package/dist/notifiers/index.d.ts +3 -0
- package/dist/notifiers/index.d.ts.map +1 -0
- package/dist/notifiers/index.js +6 -0
- package/dist/notifiers/index.js.map +1 -0
- package/dist/pricing.d.ts +11 -0
- package/dist/pricing.d.ts.map +1 -0
- package/dist/pricing.js +36 -0
- package/dist/pricing.js.map +1 -0
- package/dist/types.d.ts +127 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +78 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TKTIDE (https://tktide.com)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
# api-cost-guard
|
|
2
|
+
|
|
3
|
+
> **Set a hard stop on your AI API spend — in one line of code.**
|
|
4
|
+
|
|
5
|
+
Your cloud provider's billing limit doesn't stop mid-request spikes, infinite loops, or per-user abuse. This does.
|
|
6
|
+
|
|
7
|
+
Built with TypeScript and money-safe decimal arithmetic. Zero floating-point surprises.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## The problem
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
Your app has a bug → infinite loop → 1,000 API calls in 60 seconds
|
|
15
|
+
Your cloud billing limit? It fires an email. After the fact.
|
|
16
|
+
Your credit card? Already charged.
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
**api-cost-guard** intercepts every request **before** it reaches the provider. If the cost would exceed your limit — it never goes out.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
npm install @tktideai/ai-api-cost-guard
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick start
|
|
32
|
+
|
|
33
|
+
```ts
|
|
34
|
+
import { ApiCostGuard } from '@tktideai/ai-api-cost-guard';
|
|
35
|
+
|
|
36
|
+
const guard = new ApiCostGuard({
|
|
37
|
+
hardLimit: 10.00, // stop all requests at $10
|
|
38
|
+
softLimit: 8.00, // warn at $8
|
|
39
|
+
perRequestLimit: 0.50, // no single request over $0.50
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
// Before every API call:
|
|
43
|
+
const check = guard.preflightCheck('gpt-4o', 500, 500);
|
|
44
|
+
if (!check.allowed) return; // blocked — never reaches OpenAI
|
|
45
|
+
|
|
46
|
+
const response = await openai.chat.completions.create({ ... });
|
|
47
|
+
|
|
48
|
+
// After — record real usage so future checks stay accurate:
|
|
49
|
+
guard.recordUsage('gpt-4o',
|
|
50
|
+
response.usage.prompt_tokens,
|
|
51
|
+
response.usage.completion_tokens,
|
|
52
|
+
);
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Simpler: use `wrap()`
|
|
58
|
+
|
|
59
|
+
```ts
|
|
60
|
+
const response = await guard.wrap(
|
|
61
|
+
() => openai.chat.completions.create({ model: 'gpt-4o', messages }),
|
|
62
|
+
{
|
|
63
|
+
model: 'gpt-4o',
|
|
64
|
+
estimatedInputTokens: 500,
|
|
65
|
+
estimatedOutputTokens: 500,
|
|
66
|
+
extractUsage: (res) => ({
|
|
67
|
+
inputTokens: res.usage.prompt_tokens,
|
|
68
|
+
outputTokens: res.usage.completion_tokens,
|
|
69
|
+
}),
|
|
70
|
+
},
|
|
71
|
+
);
|
|
72
|
+
// returns null if blocked, result if allowed
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Options
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
const guard = new ApiCostGuard({
|
|
81
|
+
// ── Limits ───────────────────────────────────────────────────────────
|
|
82
|
+
hardLimit: 10.00, // $ — block all requests at this cumulative spend
|
|
83
|
+
softLimit: 8.00, // $ — fire onWarning (does not block)
|
|
84
|
+
perRequestLimit: 0.50, // $ — block any single request above this cost
|
|
85
|
+
|
|
86
|
+
// ── Rolling window ───────────────────────────────────────────────────
|
|
87
|
+
windowMs: 3_600_000, // only count spend in the last 1 hour (optional)
|
|
88
|
+
|
|
89
|
+
// ── Callbacks ────────────────────────────────────────────────────────
|
|
90
|
+
onWarning: (details) => {
|
|
91
|
+
console.log(`⚠️ Approaching limit: $${details.projectedTotal}`);
|
|
92
|
+
},
|
|
93
|
+
onBlock: (error) => {
|
|
94
|
+
console.log(`🛑 Blocked: ${error.message}`);
|
|
95
|
+
},
|
|
96
|
+
|
|
97
|
+
// ── Behaviour ────────────────────────────────────────────────────────
|
|
98
|
+
throwOnBlock: true, // throw BudgetExceededError when blocked (default: true)
|
|
99
|
+
|
|
100
|
+
// ── Custom pricing ───────────────────────────────────────────────────
|
|
101
|
+
customPricing: {
|
|
102
|
+
'my-fine-tuned-model': { input: '0.008', output: '0.016', unit: 1000 },
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Supported models
|
|
110
|
+
|
|
111
|
+
Pricing is built-in and matched by substring — versioned names like `gpt-4o-mini-2024-07-18` work automatically.
|
|
112
|
+
|
|
113
|
+
### OpenAI
|
|
114
|
+
| Model | Input / 1k | Output / 1k |
|
|
115
|
+
|---|---|---|
|
|
116
|
+
| gpt-4o | $0.0025 | $0.0100 |
|
|
117
|
+
| gpt-4o-mini | $0.00015 | $0.0006 |
|
|
118
|
+
| gpt-4-turbo | $0.0100 | $0.0300 |
|
|
119
|
+
| gpt-4 | $0.0300 | $0.0600 |
|
|
120
|
+
| gpt-3.5-turbo | $0.0005 | $0.0015 |
|
|
121
|
+
| o1 | $0.0150 | $0.0600 |
|
|
122
|
+
| o3 | $0.0100 | $0.0400 |
|
|
123
|
+
| o4-mini | $0.0011 | $0.0044 |
|
|
124
|
+
| o1-pro | $0.1500 | $0.6000 |
|
|
125
|
+
|
|
126
|
+
### Anthropic
|
|
127
|
+
| Model | Input / 1k | Output / 1k |
|
|
128
|
+
|---|---|---|
|
|
129
|
+
| claude-opus-4 | $0.0150 | $0.0750 |
|
|
130
|
+
| claude-sonnet-4 | $0.0030 | $0.0150 |
|
|
131
|
+
| claude-3-5-sonnet | $0.0030 | $0.0150 |
|
|
132
|
+
| claude-3-5-haiku | $0.0008 | $0.0040 |
|
|
133
|
+
| claude-3-opus | $0.0150 | $0.0750 |
|
|
134
|
+
| claude-3-haiku | $0.00025 | $0.00125 |
|
|
135
|
+
|
|
136
|
+
### Google Gemini
|
|
137
|
+
| Model | Input / 1k | Output / 1k |
|
|
138
|
+
|---|---|---|
|
|
139
|
+
| gemini-2.5-pro | $0.00125 | $0.0100 |
|
|
140
|
+
| gemini-2.0-flash | $0.0001 | $0.0004 |
|
|
141
|
+
| gemini-1.5-pro | $0.00125 | $0.0050 |
|
|
142
|
+
| gemini-1.5-flash | $0.000075 | $0.0003 |
|
|
143
|
+
|
|
144
|
+
> Prices may change. Always verify against provider docs and override via `customPricing` if needed.
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Check your spend
|
|
149
|
+
|
|
150
|
+
```ts
|
|
151
|
+
const stats = guard.getStats();
|
|
152
|
+
|
|
153
|
+
console.log(stats.totalSpend.toFixed(4)); // "3.4200"
|
|
154
|
+
console.log(stats.remainingBudget.toFixed(4)); // "6.5800"
|
|
155
|
+
console.log(stats.percentUsed); // "34.20"
|
|
156
|
+
console.log(stats.requestCount); // 47
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Express middleware
|
|
162
|
+
|
|
163
|
+
```ts
|
|
164
|
+
import { ApiCostGuard, createMiddleware } from '@tktideai/ai-api-cost-guard';
|
|
165
|
+
|
|
166
|
+
const guard = new ApiCostGuard({ hardLimit: 50 });
|
|
167
|
+
|
|
168
|
+
app.post('/api/chat', createMiddleware(guard), async (req, res) => {
|
|
169
|
+
// If we reach here, budget is OK
|
|
170
|
+
const response = await openai.chat.completions.create(req.body);
|
|
171
|
+
guard.recordUsage(
|
|
172
|
+
req.body.model,
|
|
173
|
+
response.usage.prompt_tokens,
|
|
174
|
+
response.usage.completion_tokens,
|
|
175
|
+
);
|
|
176
|
+
res.json(response);
|
|
177
|
+
});
|
|
178
|
+
// Returns 429 automatically if budget exceeded
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Per-user limits (SaaS)
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
// One guard per user — isolated budgets
|
|
187
|
+
const userGuards = new Map<string, ApiCostGuard>();
|
|
188
|
+
|
|
189
|
+
function getGuard(userId: string): ApiCostGuard {
|
|
190
|
+
if (!userGuards.has(userId)) {
|
|
191
|
+
userGuards.set(userId, new ApiCostGuard({
|
|
192
|
+
hardLimit: 5.00, // $5 per user
|
|
193
|
+
windowMs: 30 * 24 * 60 * 60 * 1000, // per month
|
|
194
|
+
}));
|
|
195
|
+
}
|
|
196
|
+
return userGuards.get(userId)!;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// In your route:
|
|
200
|
+
const guard = getGuard(req.user.id);
|
|
201
|
+
const check = guard.preflightCheck('gpt-4o', 500, 500);
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Email alerts (optional)
|
|
207
|
+
|
|
208
|
+
Get real email notifications when limits are hit. Uses Gmail SMTP — no external API key needed.
|
|
209
|
+
|
|
210
|
+
### Setup
|
|
211
|
+
|
|
212
|
+
**1. Get a Gmail app password for your sender address:**
|
|
213
|
+
```
|
|
214
|
+
Google Account → Security → 2-Step Verification → App Passwords → Generate
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
**2. Add to your `.env`:**
|
|
218
|
+
```env
|
|
219
|
+
GMAIL_FROM=your@gmail.com
|
|
220
|
+
GMAIL_PASS=xxxx xxxx xxxx xxxx
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**3. Use in your app:**
|
|
224
|
+
```ts
|
|
225
|
+
import { ApiCostGuard } from '@tktideai/ai-api-cost-guard';
|
|
226
|
+
import { createEmailNotifier } from '@tktideai/ai-api-cost-guard/notifiers';
|
|
227
|
+
|
|
228
|
+
const notifier = createEmailNotifier({
|
|
229
|
+
from: process.env.GMAIL_FROM!, // your Gmail — used as the sender
|
|
230
|
+
password: process.env.GMAIL_PASS!, // your Gmail app password
|
|
231
|
+
to: 'you@example.com', // where alerts are delivered
|
|
232
|
+
message: 'Your AI budget needs attention.', // optional
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
const guard = new ApiCostGuard({
|
|
236
|
+
softLimit: 8,
|
|
237
|
+
hardLimit: 10,
|
|
238
|
+
onWarning: notifier,
|
|
239
|
+
onBlock: notifier,
|
|
240
|
+
});
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### What the email looks like
|
|
244
|
+
|
|
245
|
+
```
|
|
246
|
+
Subject: ⚠️ API Cost Alert — Budget Warning | TKTIDE
|
|
247
|
+
|
|
248
|
+
┌─────────────────────────────────────┐
|
|
249
|
+
│ ⚠️ Approaching Budget Limit │
|
|
250
|
+
├─────────────────────────────────────┤
|
|
251
|
+
│ Your AI budget needs attention. │ ← your message
|
|
252
|
+
│ │
|
|
253
|
+
│ Trigger Soft Limit Warning │
|
|
254
|
+
│ Current Spend $7.9900 │
|
|
255
|
+
│ Projected $8.0075 │
|
|
256
|
+
│ Limit $8.0000 │
|
|
257
|
+
├─────────────────────────────────────┤
|
|
258
|
+
│ [TKTIDE logo] │
|
|
259
|
+
│ TKTIDE is here to help │
|
|
260
|
+
│ https://tktide.com │
|
|
261
|
+
└─────────────────────────────────────┘
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
> Email failures are non-fatal — they never crash or block your app.
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Why TypeScript + Decimal.js?
|
|
269
|
+
|
|
270
|
+
Plain JavaScript has a well-known problem with money math:
|
|
271
|
+
|
|
272
|
+
```js
|
|
273
|
+
// JavaScript — this is real:
|
|
274
|
+
0.1 + 0.2 === 0.30000000000000004 // true
|
|
275
|
+
|
|
276
|
+
// Your $0.30 hard limit never triggers
|
|
277
|
+
// You get charged $0.30000000000000004
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**api-cost-guard** uses [Decimal.js](https://mikemcl.github.io/decimal.js/) for all arithmetic:
|
|
281
|
+
|
|
282
|
+
```ts
|
|
283
|
+
new Decimal('0.10').plus('0.20').equals('0.30') // always true
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
Every cost, limit, and comparison is exact.
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Error handling
|
|
291
|
+
|
|
292
|
+
```ts
|
|
293
|
+
import { ApiCostGuard, BudgetExceededError } from '@tktideai/ai-api-cost-guard';
|
|
294
|
+
|
|
295
|
+
try {
|
|
296
|
+
guard.preflightCheck('gpt-4o', 5000, 2000);
|
|
297
|
+
} catch (e) {
|
|
298
|
+
if (e instanceof BudgetExceededError) {
|
|
299
|
+
console.log(e.details.type); // 'hard_limit' | 'per_request_limit'
|
|
300
|
+
console.log(e.details.limit); // Decimal
|
|
301
|
+
console.log(e.details.projectedTotal); // Decimal
|
|
302
|
+
console.log(e.details.currentSpend); // Decimal
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
Or opt out of throwing:
|
|
308
|
+
|
|
309
|
+
```ts
|
|
310
|
+
const guard = new ApiCostGuard({
|
|
311
|
+
hardLimit: 10,
|
|
312
|
+
throwOnBlock: false, // returns { allowed: false } instead of throwing
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
const check = guard.preflightCheck('gpt-4o', 5000, 2000);
|
|
316
|
+
if (!check.allowed) {
|
|
317
|
+
console.log(check.reason); // 'hard_limit' | 'per_request_limit'
|
|
318
|
+
}
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
---
|
|
322
|
+
|
|
323
|
+
## Full API reference
|
|
324
|
+
|
|
325
|
+
### `new ApiCostGuard(options)`
|
|
326
|
+
Creates a new guard instance.
|
|
327
|
+
|
|
328
|
+
### `guard.preflightCheck(model, inputTokens, outputTokens?)`
|
|
329
|
+
Check if a request is within budget before sending.
|
|
330
|
+
Returns `PreflightResult` — `{ allowed: true }` or `{ allowed: false, reason }`.
|
|
331
|
+
|
|
332
|
+
### `guard.recordUsage(model, inputTokens, outputTokens, metadata?)`
|
|
333
|
+
Record actual token usage after a successful API response.
|
|
334
|
+
Returns the cost as a `Decimal`.
|
|
335
|
+
|
|
336
|
+
### `guard.wrap(apiFn, options)`
|
|
337
|
+
Combines preflight + API call + usage recording in one call.
|
|
338
|
+
Returns the API result or `null` if blocked (when `throwOnBlock: false`).
|
|
339
|
+
|
|
340
|
+
### `guard.getStats()`
|
|
341
|
+
Returns a snapshot of current spend, request count, remaining budget, and the full ledger.
|
|
342
|
+
|
|
343
|
+
### `guard.reset()`
|
|
344
|
+
Clears all spend records. Useful between billing periods or test runs.
|
|
345
|
+
|
|
346
|
+
---
|
|
347
|
+
|
|
348
|
+
## Scripts
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
npm test # run all 62 tests
|
|
352
|
+
npm run build # compile TypeScript to dist/
|
|
353
|
+
npm run test:watch # watch mode
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
## License
|
|
359
|
+
|
|
360
|
+
MIT — built with ❤️ by [TKTIDE](https://tktide.com)
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { BlockReason } from './types';
|
|
2
|
+
import Decimal from 'decimal.js';
|
|
3
|
+
export interface BudgetExceededDetails {
|
|
4
|
+
readonly type: BlockReason;
|
|
5
|
+
readonly projectedTotal: Decimal;
|
|
6
|
+
readonly currentSpend: Decimal;
|
|
7
|
+
readonly limit: Decimal;
|
|
8
|
+
readonly estimatedCost: Decimal | null;
|
|
9
|
+
}
|
|
10
|
+
export declare class BudgetExceededError extends Error {
|
|
11
|
+
readonly details: BudgetExceededDetails;
|
|
12
|
+
constructor(message: string, details: BudgetExceededDetails);
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAC3C,OAAO,OAAO,MAAM,YAAY,CAAC;AAEjC,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAC3B,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC;IAC/B,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,aAAa,EAAE,OAAO,GAAG,IAAI,CAAC;CACxC;AAED,qBAAa,mBAAoB,SAAQ,KAAK;IAC5C,SAAgB,OAAO,EAAE,qBAAqB,CAAC;gBAEnC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB;CAQ5D"}
|
package/dist/errors.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.BudgetExceededError = void 0;
|
|
4
|
+
class BudgetExceededError extends Error {
|
|
5
|
+
constructor(message, details) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.name = 'BudgetExceededError';
|
|
8
|
+
this.details = details;
|
|
9
|
+
// Maintains proper prototype chain in TypeScript
|
|
10
|
+
Object.setPrototypeOf(this, BudgetExceededError.prototype);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
exports.BudgetExceededError = BudgetExceededError;
|
|
14
|
+
//# sourceMappingURL=errors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":";;;AAWA,MAAa,mBAAoB,SAAQ,KAAK;IAG5C,YAAY,OAAe,EAAE,OAA8B;QACzD,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,qBAAqB,CAAC;QAClC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QAEvB,iDAAiD;QACjD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,mBAAmB,CAAC,SAAS,CAAC,CAAC;IAC7D,CAAC;CACF;AAXD,kDAWC"}
|
package/dist/guard.d.ts
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import Decimal from 'decimal.js';
|
|
2
|
+
import type { ApiCostGuardOptions, PreflightResult, GuardStats, WrapOptions } from './types';
|
|
3
|
+
export declare class ApiCostGuard {
|
|
4
|
+
private readonly hardLimit;
|
|
5
|
+
private readonly softLimit;
|
|
6
|
+
private readonly perRequestLimit;
|
|
7
|
+
private readonly windowMs;
|
|
8
|
+
private readonly throwOnBlock;
|
|
9
|
+
private readonly onWarning;
|
|
10
|
+
private readonly onBlock;
|
|
11
|
+
private readonly pricing;
|
|
12
|
+
private ledger;
|
|
13
|
+
private totalSpend;
|
|
14
|
+
constructor(options?: ApiCostGuardOptions);
|
|
15
|
+
/**
|
|
16
|
+
* Estimate the cost of a request without checking limits.
|
|
17
|
+
* Returns null if the model is not in the pricing table.
|
|
18
|
+
*/
|
|
19
|
+
estimateCost(model: string, inputTokens: number, outputTokens?: number): Decimal | null;
|
|
20
|
+
/**
|
|
21
|
+
* Check whether a request should be allowed based on current spend.
|
|
22
|
+
* Call this BEFORE making the API request.
|
|
23
|
+
*
|
|
24
|
+
* @throws {BudgetExceededError} if blocked and `throwOnBlock` is true
|
|
25
|
+
*/
|
|
26
|
+
preflightCheck(model: string, estimatedInputTokens: number, estimatedOutputTokens?: number): PreflightResult;
|
|
27
|
+
/**
|
|
28
|
+
* Record the actual token usage AFTER a successful API response.
|
|
29
|
+
* This keeps the spend ledger accurate.
|
|
30
|
+
*
|
|
31
|
+
* @returns The actual cost of this request
|
|
32
|
+
*/
|
|
33
|
+
recordUsage(model: string, inputTokens: number, outputTokens: number, metadata?: Record<string, unknown>): Decimal;
|
|
34
|
+
/**
|
|
35
|
+
* Wrap any async API call with automatic preflight + usage recording.
|
|
36
|
+
*
|
|
37
|
+
* @returns The API result, or null if blocked (when throwOnBlock=false)
|
|
38
|
+
* @throws {BudgetExceededError} if blocked and throwOnBlock=true
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* const response = await guard.wrap(
|
|
42
|
+
* () => openai.chat.completions.create({ model: 'gpt-4o', messages }),
|
|
43
|
+
* {
|
|
44
|
+
* model: 'gpt-4o',
|
|
45
|
+
* estimatedInputTokens: 500,
|
|
46
|
+
* estimatedOutputTokens: 500,
|
|
47
|
+
* extractUsage: (res) => ({
|
|
48
|
+
* inputTokens: res.usage.prompt_tokens,
|
|
49
|
+
* outputTokens: res.usage.completion_tokens,
|
|
50
|
+
* }),
|
|
51
|
+
* },
|
|
52
|
+
* );
|
|
53
|
+
*/
|
|
54
|
+
wrap<T>(apiFn: () => Promise<T>, options: WrapOptions<T>): Promise<T | null>;
|
|
55
|
+
/**
|
|
56
|
+
* Get a snapshot of current spend and budget status.
|
|
57
|
+
*/
|
|
58
|
+
getStats(): GuardStats;
|
|
59
|
+
/**
|
|
60
|
+
* Reset all spend records. Useful between test runs or billing periods.
|
|
61
|
+
*/
|
|
62
|
+
reset(): void;
|
|
63
|
+
private getWindowSpend;
|
|
64
|
+
private pruneOldEntries;
|
|
65
|
+
private resolvePricing;
|
|
66
|
+
private handleBlock;
|
|
67
|
+
private validateOptions;
|
|
68
|
+
}
|
|
69
|
+
type AnyRequest = any;
|
|
70
|
+
type AnyResponse = any;
|
|
71
|
+
type NextFn = (err?: unknown) => void;
|
|
72
|
+
export interface MiddlewareOptions {
|
|
73
|
+
/** Extract model name from the request. Default: req.body?.model */
|
|
74
|
+
getModel?: (req: AnyRequest) => string | undefined;
|
|
75
|
+
/** Extract estimated token counts. Default: req.body?.max_tokens for both */
|
|
76
|
+
getTokens?: (req: AnyRequest) => {
|
|
77
|
+
input: number;
|
|
78
|
+
output: number;
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Express/Connect-compatible middleware.
|
|
83
|
+
* Runs a preflight check on every incoming request.
|
|
84
|
+
* Returns 429 with JSON error body if budget is exceeded.
|
|
85
|
+
*
|
|
86
|
+
* @example
|
|
87
|
+
* const guard = new ApiCostGuard({ hardLimit: 50 });
|
|
88
|
+
* app.post('/api/chat', createMiddleware(guard), async (req, res) => {
|
|
89
|
+
* const response = await openai.chat.completions.create(req.body);
|
|
90
|
+
* guard.recordUsage(req.body.model,
|
|
91
|
+
* response.usage.prompt_tokens,
|
|
92
|
+
* response.usage.completion_tokens,
|
|
93
|
+
* );
|
|
94
|
+
* res.json(response);
|
|
95
|
+
* });
|
|
96
|
+
*/
|
|
97
|
+
export declare function createMiddleware(guard: ApiCostGuard, options?: MiddlewareOptions): (req: AnyRequest, res: AnyResponse, next: NextFn) => void;
|
|
98
|
+
export {};
|
|
99
|
+
//# sourceMappingURL=guard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"guard.d.ts","sourceRoot":"","sources":["../src/guard.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,YAAY,CAAC;AAGjC,OAAO,KAAK,EACV,mBAAmB,EAGnB,eAAe,EAEf,UAAU,EACV,WAAW,EAEZ,MAAM,SAAS,CAAC;AAUjB,qBAAa,YAAY;IAEvB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAiB;IACjD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAwB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAa;IAG1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA6C;IACvE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkD;IAG1E,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAa;IAGrC,OAAO,CAAC,MAAM,CAAyB;IACvC,OAAO,CAAC,UAAU,CAAiC;gBAIvC,OAAO,GAAE,mBAAwB;IAiB7C;;;OAGG;IACI,YAAY,CACjB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,YAAY,GAAE,MAAU,GACvB,OAAO,GAAG,IAAI;IAiBjB;;;;;OAKG;IACI,cAAc,CACnB,KAAK,EAAE,MAAM,EACb,oBAAoB,EAAE,MAAM,EAC5B,qBAAqB,GAAE,MAAU,GAChC,eAAe;IA+ClB;;;;;OAKG;IACI,WAAW,CAChB,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM,EACpB,QAAQ,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GACrC,OAAO;IAqBV;;;;;;;;;;;;;;;;;;;OAmBG;IACU,IAAI,CAAC,CAAC,EACjB,KAAK,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACvB,OAAO,EAAE,WAAW,CAAC,CAAC,CAAC,GACtB,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC;IAiCpB;;OAEG;IACI,QAAQ,IAAI,UAAU;IAmB7B;;OAEG;IACI,KAAK,IAAI,IAAI;IAOpB,OAAO,CAAC,cAAc;IAStB,OAAO,CAAC,eAAe;IAavB,OAAO,CAAC,cAAc;IActB,OAAO,CAAC,WAAW;IAmBnB,OAAO,CAAC,eAAe;CAiBxB;AAKD,KAAK,UAAU,GAAI,GAAG,CAAC;AAEvB,KAAK,WAAW,GAAG,GAAG,CAAC;AACvB,KAAK,MAAM,GAAQ,CAAC,GAAG,CAAC,EAAE,OAAO,KAAK,IAAI,CAAC;AAE3C,MAAM,WAAW,iBAAiB;IAChC,oEAAoE;IACpE,QAAQ,CAAC,EAAG,CAAC,GAAG,EAAE,UAAU,KAAK,MAAM,GAAG,SAAS,CAAC;IACpD,6EAA6E;IAC7E,SAAS,CAAC,EAAE,CAAC,GAAG,EAAE,UAAU,KAAK;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;CACpE;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,gBAAgB,CAC9B,KAAK,EAAI,YAAY,EACrB,OAAO,GAAE,iBAAsB,IAQvB,KAAK,UAAU,EAAE,KAAK,WAAW,EAAE,MAAM,MAAM,KAAG,IAAI,CAsC/D"}
|