@salimassili/ai-costguard 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -46
- package/LICENSE +21 -21
- package/README.md +421 -314
- package/benchmarks/run.mjs +229 -229
- package/benchmarks/token-accuracy.mjs +182 -0
- package/dist/cli.d.ts +11 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +63 -2
- package/dist/cli.js.map +1 -1
- package/dist/core/CostGuard.d.ts +4 -2
- package/dist/core/CostGuard.d.ts.map +1 -1
- package/dist/core/CostGuard.js +2 -1
- package/dist/core/CostGuard.js.map +1 -1
- package/dist/core/GuardCore.d.ts +2 -0
- package/dist/core/GuardCore.d.ts.map +1 -1
- package/dist/core/GuardCore.js +37 -5
- package/dist/core/GuardCore.js.map +1 -1
- package/dist/core/GuardPro.d.ts +1 -13
- package/dist/core/GuardPro.d.ts.map +1 -1
- package/dist/core/GuardPro.js +7 -19
- package/dist/core/GuardPro.js.map +1 -1
- package/dist/core/tokenizer.d.ts +18 -0
- package/dist/core/tokenizer.d.ts.map +1 -1
- package/dist/core/tokenizer.js +45 -1
- package/dist/core/tokenizer.js.map +1 -1
- package/dist/core/types.d.ts +12 -3
- package/dist/core/types.d.ts.map +1 -1
- package/dist/dashboard.js +49 -49
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/pricing/index.d.ts +24 -0
- package/dist/pricing/index.d.ts.map +1 -1
- package/dist/pricing/index.js +31 -5
- package/dist/pricing/index.js.map +1 -1
- package/dist/pro.d.ts +1 -1
- package/dist/pro.d.ts.map +1 -1
- package/dist/pro.js +1 -1
- package/dist/pro.js.map +1 -1
- package/docs/BENCHMARKS.md +54 -35
- package/docs/DASHBOARD.md +61 -61
- package/docs/INTEGRATIONS.md +153 -153
- package/examples/integrations/anthropic-workflow-budget.mjs +36 -36
- package/examples/integrations/ci-budget-check.mjs +32 -32
- package/examples/integrations/crewai-budget-gate.mjs +31 -31
- package/examples/integrations/langchain-retry-storm.mjs +32 -32
- package/examples/integrations/mastra-agent.mjs +41 -41
- package/examples/integrations/openai-agent-loop.mjs +44 -44
- package/examples/integrations/vercel-ai-chatbot.mjs +29 -29
- package/package.json +71 -69
package/README.md
CHANGED
|
@@ -1,380 +1,487 @@
|
|
|
1
|
-
# AI CostGuard
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
# AI CostGuard
|
|
2
|
+
[](https://www.npmjs.com/package/@salimassili/ai-costguard)
|
|
3
|
+
|
|
4
|
+
AI CostGuard is a local-first runtime safety layer for AI agents that prevents runaway costs, loops, retries, and budget explosions before API calls execute. It wraps OpenAI-compatible clients and function-style SDK calls, estimates request cost locally, blocks budget overruns, detects repeated prompts, emits structured events, and exposes CLI checks plus a local dashboard.
|
|
5
|
+
|
|
5
6
|
It is local-first. It does not include a SaaS control plane, cloud dashboard, proxy gateway, telemetry service, billing reconciliation service, or hard security boundary.
|
|
6
7
|
|
|
7
|
-
##
|
|
8
|
+
## What AI CostGuard Does
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
- Checks selected AI SDK calls before they execute.
|
|
11
|
+
- Estimates request cost from model pricing, prompt text, and reserved output tokens.
|
|
12
|
+
- Blocks unknown models unless explicit pricing is supplied.
|
|
13
|
+
- Blocks budget overruns, repeated prompt loops, retry storms, and max-step overruns.
|
|
14
|
+
- Emits structured errors and local events your app can handle.
|
|
12
15
|
|
|
13
|
-
##
|
|
16
|
+
## What AI CostGuard Does Not Do
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
+
- It does not call providers for real-time pricing.
|
|
19
|
+
- It does not reconcile provider invoices or replace provider billing alerts.
|
|
20
|
+
- It does not provide auth, API-key security, or a hard security boundary.
|
|
21
|
+
- It does not run a hosted dashboard, SaaS backend, or cloud telemetry service.
|
|
22
|
+
- It does not guarantee exact tokenizer parity with OpenAI, Anthropic, or other providers.
|
|
18
23
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npm install @salimassili/ai-costguard
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```ts
|
|
33
|
+
import OpenAI from 'openai';
|
|
34
|
+
import { guard, GuardError } from '@salimassili/ai-costguard';
|
|
35
|
+
|
|
36
|
+
const openai = guard(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }), {
|
|
37
|
+
budget: 5,
|
|
38
|
+
maxSteps: 50,
|
|
39
|
+
scope: { projectId: 'my-app' },
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
const response = await openai.chat.completions.create({
|
|
44
|
+
model: 'gpt-4o-mini',
|
|
45
|
+
messages: [{ role: 'user', content: 'Write a short summary.' }],
|
|
46
|
+
max_tokens: 200,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
console.log(response.choices[0]?.message?.content);
|
|
50
|
+
} catch (error) {
|
|
51
|
+
if (error instanceof GuardError) {
|
|
52
|
+
console.error(error.code, error.message, error.context);
|
|
53
|
+
} else {
|
|
54
|
+
throw error;
|
|
55
|
+
}
|
|
39
56
|
}
|
|
40
57
|
```
|
|
41
58
|
|
|
42
|
-
##
|
|
43
|
-
|
|
44
|
-
By default AI CostGuard evaluates these SDK method paths:
|
|
45
|
-
|
|
46
|
-
- `chat.completions.create`
|
|
47
|
-
- `completions.create`
|
|
48
|
-
- `responses.create`
|
|
49
|
-
- `messages.create`
|
|
59
|
+
## Before / After
|
|
50
60
|
|
|
51
|
-
|
|
61
|
+
Without AI CostGuard:
|
|
52
62
|
|
|
53
63
|
```ts
|
|
54
|
-
|
|
55
|
-
budget: 2,
|
|
56
|
-
guardedMethods: ['agent.run'],
|
|
57
|
-
pricingOverrides: [
|
|
58
|
-
{
|
|
59
|
-
model: 'internal-model',
|
|
60
|
-
inputPer1kTokens: 0.001,
|
|
61
|
-
outputPer1kTokens: 0.002,
|
|
62
|
-
lastUpdated: '2026-06-07',
|
|
63
|
-
source: 'internal pricing sheet',
|
|
64
|
-
},
|
|
65
|
-
],
|
|
66
|
-
});
|
|
64
|
+
await openai.chat.completions.create(request);
|
|
67
65
|
```
|
|
68
66
|
|
|
69
|
-
|
|
67
|
+
With AI CostGuard:
|
|
70
68
|
|
|
71
69
|
```ts
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
scope: { projectId: 'chatbot' },
|
|
70
|
+
const openai = guard(new OpenAI({ apiKey: process.env.OPENAI_API_KEY }), {
|
|
71
|
+
budget: 5,
|
|
72
|
+
maxSteps: 50,
|
|
73
|
+
scope: { projectId: 'agent-api', sessionId: runId },
|
|
77
74
|
});
|
|
78
75
|
|
|
79
|
-
await
|
|
80
|
-
model: 'gpt-4o-mini',
|
|
81
|
-
prompt: 'Answer the user in one paragraph.',
|
|
82
|
-
max_tokens: 200,
|
|
83
|
-
});
|
|
76
|
+
await openai.chat.completions.create(request);
|
|
84
77
|
```
|
|
85
78
|
|
|
86
|
-
##
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
79
|
+
## What It Guards
|
|
80
|
+
|
|
81
|
+
By default AI CostGuard evaluates these SDK method paths:
|
|
82
|
+
|
|
83
|
+
- `chat.completions.create`
|
|
84
|
+
- `completions.create`
|
|
85
|
+
- `responses.create`
|
|
86
|
+
- `messages.create`
|
|
87
|
+
|
|
88
|
+
Other client methods are passed through without cost checks. To protect a custom client method:
|
|
89
|
+
|
|
90
|
+
```ts
|
|
91
|
+
const client = guard(customClient, {
|
|
92
|
+
budget: 2,
|
|
93
|
+
guardedMethods: ['agent.run'],
|
|
94
|
+
pricingOverrides: [
|
|
95
|
+
{
|
|
96
|
+
model: 'internal-model',
|
|
97
|
+
inputPer1kTokens: 0.001,
|
|
98
|
+
outputPer1kTokens: 0.002,
|
|
99
|
+
lastUpdated: '2026-06-07',
|
|
100
|
+
source: 'internal pricing sheet',
|
|
101
|
+
},
|
|
102
|
+
],
|
|
103
|
+
});
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
For function-style SDKs such as Vercel AI SDK adapters, LangChain wrappers, or agent runners:
|
|
107
|
+
|
|
108
|
+
```ts
|
|
109
|
+
import { guardFunction } from '@salimassili/ai-costguard';
|
|
110
|
+
|
|
111
|
+
const guardedGenerateText = guardFunction(generateTextAdapter, {
|
|
112
|
+
budget: 1,
|
|
113
|
+
scope: { projectId: 'chatbot' },
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
await guardedGenerateText({
|
|
117
|
+
model: 'gpt-4o-mini',
|
|
118
|
+
prompt: 'Answer the user in one paragraph.',
|
|
119
|
+
max_tokens: 200,
|
|
120
|
+
});
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Decisions And Errors
|
|
124
|
+
|
|
125
|
+
Blocked requests throw `GuardError` before the provider method is called.
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
try {
|
|
129
|
+
await openai.chat.completions.create(request);
|
|
130
|
+
} catch (error) {
|
|
131
|
+
if (error instanceof GuardError) {
|
|
132
|
+
console.log(error.code);
|
|
133
|
+
console.log(error.metadata);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Current runtime block codes:
|
|
139
|
+
|
|
140
|
+
- `UNKNOWN_MODEL`
|
|
141
|
+
- `BUDGET_EXCEEDED`
|
|
142
|
+
- `MAX_STEPS_EXCEEDED`
|
|
143
|
+
- `LOOP_DETECTED`
|
|
144
|
+
- `RETRY_STORM_DETECTED`
|
|
145
|
+
|
|
111
146
|
## Configuration
|
|
112
|
-
|
|
113
|
-
```ts
|
|
114
|
-
guard(client, {
|
|
115
|
-
budget: 10,
|
|
116
|
-
maxSteps: 100,
|
|
147
|
+
|
|
148
|
+
```ts
|
|
149
|
+
guard(client, {
|
|
150
|
+
budget: 10,
|
|
151
|
+
maxSteps: 100,
|
|
117
152
|
behaviorAnalysis: true,
|
|
118
153
|
maxHistory: 32,
|
|
119
154
|
historyTtlMs: 5 * 60 * 1000,
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
projectId: 'production-api',
|
|
125
|
-
userId: 'optional-user',
|
|
126
|
-
sessionId: 'optional-agent-run',
|
|
155
|
+
loopDetection: {
|
|
156
|
+
similarityThreshold: 0.85,
|
|
157
|
+
minHistorySize: 2,
|
|
158
|
+
windowSize: 5,
|
|
127
159
|
},
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
160
|
+
retryThreshold: 2,
|
|
161
|
+
scope: {
|
|
162
|
+
projectId: 'production-api',
|
|
163
|
+
userId: 'optional-user',
|
|
164
|
+
sessionId: 'optional-agent-run',
|
|
165
|
+
},
|
|
166
|
+
guardedMethods: ['chat.completions.create', 'responses.create'],
|
|
167
|
+
pricingOverrides: [],
|
|
168
|
+
webhooks: {
|
|
169
|
+
slack: process.env.SLACK_WEBHOOK,
|
|
170
|
+
discord: process.env.DISCORD_WEBHOOK,
|
|
171
|
+
retries: 2,
|
|
172
|
+
timeoutMs: 1500,
|
|
173
|
+
},
|
|
174
|
+
eventLogPath: '.ai-costguard/events.jsonl',
|
|
175
|
+
eventLogPrompt: 'none',
|
|
176
|
+
});
|
|
177
|
+
```
|
|
178
|
+
|
|
141
179
|
`scope` isolates budgets and behavior history. If no scope is supplied, the guard uses one process-local default scope.
|
|
142
180
|
|
|
143
|
-
##
|
|
144
|
-
|
|
145
|
-
AI CostGuard is a pre-call estimator, not a billing ledger.
|
|
181
|
+
## Loop Detection Tuning
|
|
146
182
|
|
|
147
|
-
|
|
148
|
-
- `totalCost`: estimated cost of allowed calls.
|
|
149
|
-
- `blockedCost`: estimated cost stopped before provider execution.
|
|
150
|
-
- `actualCost`: provider-reported usage cost when the response includes recognizable `usage` fields.
|
|
183
|
+
Default loop detection uses character trigram cosine similarity with:
|
|
151
184
|
|
|
152
|
-
|
|
185
|
+
- `loopDetection.similarityThreshold: 0.85`
|
|
186
|
+
- `loopDetection.minHistorySize: 2`
|
|
187
|
+
- `loopDetection.windowSize: 5`
|
|
153
188
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
189
|
+
- Higher threshold, such as `0.95`: fewer false positives, but near-duplicate loops can slip through.
|
|
190
|
+
- Lower threshold, such as `0.75`: catches looser repeats, but unrelated prompts can be blocked.
|
|
191
|
+
- Higher `minHistorySize`: waits for more repeated prompts before blocking.
|
|
192
|
+
- Lower `minHistorySize`: blocks faster, but is more aggressive.
|
|
193
|
+
- Smaller `windowSize`: compares fewer recent prompts, reducing old-history false positives.
|
|
194
|
+
- Larger `windowSize`: compares more history, improving catch rate but increasing false-positive risk in repetitive workflows.
|
|
157
195
|
|
|
158
196
|
```ts
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
registerPricing([
|
|
162
|
-
{
|
|
163
|
-
model: 'my-company-model',
|
|
164
|
-
inputPer1kTokens: 0.001,
|
|
165
|
-
outputPer1kTokens: 0.002,
|
|
166
|
-
lastUpdated: '2026-06-07',
|
|
167
|
-
source: 'internal',
|
|
168
|
-
},
|
|
169
|
-
]);
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
If you intentionally want fallback pricing for unknown models:
|
|
173
|
-
|
|
174
|
-
```ts
|
|
175
|
-
guard(client, {
|
|
197
|
+
const openai = guard(client, {
|
|
176
198
|
budget: 5,
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
outputPer1kTokens: 0.002,
|
|
182
|
-
lastUpdated: '2026-06-07',
|
|
183
|
-
source: 'application fallback',
|
|
199
|
+
loopDetection: {
|
|
200
|
+
similarityThreshold: 0.9,
|
|
201
|
+
minHistorySize: 3,
|
|
202
|
+
windowSize: 6,
|
|
184
203
|
},
|
|
204
|
+
scope: { sessionId: 'agent-run-123' },
|
|
185
205
|
});
|
|
186
206
|
```
|
|
187
207
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
## Events
|
|
191
|
-
|
|
192
|
-
```ts
|
|
193
|
-
const unsubscribe = openai.on('block', (event) => {
|
|
194
|
-
console.log(event.code, event.reason, event.context.estimatedCost);
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
unsubscribe();
|
|
198
|
-
```
|
|
199
|
-
|
|
200
|
-
Supported events are `cost`, `allow`, and `block`. Handler errors are swallowed so observability code cannot change guard decisions.
|
|
208
|
+
Legacy `loopSimilarityThreshold` and `loopMinRepeats` config fields are still accepted, but `loopDetection` takes precedence. Loop detection is heuristic. Expect false positives and false negatives, especially for short prompts, templated prompts, and prompts that share a lot of boilerplate.
|
|
201
209
|
|
|
202
|
-
##
|
|
203
|
-
|
|
204
|
-
|
|
210
|
+
## Accounting Semantics
|
|
211
|
+
|
|
212
|
+
AI CostGuard is a pre-call estimator, not a billing ledger.
|
|
213
|
+
|
|
214
|
+
- `attemptedCost`: estimated cost of every guarded attempt, including blocked attempts.
|
|
215
|
+
- `totalCost`: estimated cost of allowed calls.
|
|
216
|
+
- `blockedCost`: estimated cost stopped before provider execution.
|
|
217
|
+
- `actualCost`: provider-reported usage cost when the response includes recognizable `usage` fields.
|
|
218
|
+
|
|
219
|
+
Budget decisions use estimated allowed cost. Actual usage is recorded for observability but does not rewrite earlier decisions.
|
|
220
|
+
|
|
221
|
+
## Pricing
|
|
205
222
|
|
|
206
|
-
|
|
207
|
-
const openai = guard(client, {
|
|
208
|
-
budget: 5,
|
|
209
|
-
eventLogPath: '.ai-costguard/events.jsonl',
|
|
210
|
-
});
|
|
211
|
-
```
|
|
223
|
+
Known model pricing comes from built-in registry entries, runtime registrations, or per-guard overrides. Unknown models are blocked by default.
|
|
212
224
|
|
|
213
|
-
|
|
225
|
+
Pricing last updated: `2026-06-07`. Provider pricing changes; AI CostGuard does not fetch real-time pricing. Override pricing manually when provider pages or your contract pricing differ from the built-ins.
|
|
226
|
+
|
|
227
|
+
```ts
|
|
228
|
+
import { getPricingMeta, registerPricing } from '@salimassili/ai-costguard';
|
|
229
|
+
|
|
230
|
+
registerPricing([
|
|
231
|
+
{
|
|
232
|
+
model: 'my-company-model',
|
|
233
|
+
inputPer1kTokens: 0.001,
|
|
234
|
+
outputPer1kTokens: 0.002,
|
|
235
|
+
lastUpdated: '2026-06-07',
|
|
236
|
+
source: 'internal',
|
|
237
|
+
},
|
|
238
|
+
]);
|
|
214
239
|
|
|
215
|
-
|
|
216
|
-
ai-costguard dashboard --events .ai-costguard/events.jsonl --budget 5
|
|
240
|
+
console.log(getPricingMeta('gpt-4o-mini'));
|
|
217
241
|
```
|
|
218
242
|
|
|
219
|
-
|
|
243
|
+
Check built-in pricing freshness from CI or a release script:
|
|
220
244
|
|
|
221
245
|
```bash
|
|
222
|
-
|
|
246
|
+
aifw pricing --check-stale --days 30
|
|
223
247
|
```
|
|
224
248
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
- CrewAI launch/budget gate
|
|
245
|
-
- CI budget checks
|
|
246
|
-
|
|
247
|
-
See `docs/INTEGRATIONS.md` and `examples/integrations`.
|
|
249
|
+
The command exits `0` when all registry entries are within the threshold and `1` when one or more entries are stale.
|
|
250
|
+
|
|
251
|
+
If you intentionally want fallback pricing for unknown models:
|
|
252
|
+
|
|
253
|
+
```ts
|
|
254
|
+
guard(client, {
|
|
255
|
+
budget: 5,
|
|
256
|
+
unknownModelPolicy: 'fallback',
|
|
257
|
+
unknownModelPricing: {
|
|
258
|
+
model: 'fallback',
|
|
259
|
+
inputPer1kTokens: 0.001,
|
|
260
|
+
outputPer1kTokens: 0.002,
|
|
261
|
+
lastUpdated: '2026-06-07',
|
|
262
|
+
source: 'application fallback',
|
|
263
|
+
},
|
|
264
|
+
});
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Pricing changes frequently. Verify provider pricing before production use and override entries when needed.
|
|
248
268
|
|
|
249
|
-
##
|
|
269
|
+
## Token Counting Accuracy
|
|
250
270
|
|
|
251
|
-
|
|
271
|
+
AI CostGuard ships with a dependency-free token estimator so the root package stays small. It warns once per model/scope when approximate counting is used:
|
|
252
272
|
|
|
253
|
-
```
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
app.use(middleware({ budget: 2 }));
|
|
257
|
-
|
|
258
|
-
app.post('/chat', async (req, res, next) => {
|
|
259
|
-
try {
|
|
260
|
-
req.localSafety.check({
|
|
261
|
-
model: 'gpt-4o-mini',
|
|
262
|
-
tokens: 500,
|
|
263
|
-
inputTokens: 100,
|
|
264
|
-
outputTokens: 400,
|
|
265
|
-
estimatedCost: 0.0003,
|
|
266
|
-
timestamp: Date.now(),
|
|
267
|
-
prompt: String(req.body?.prompt ?? ''),
|
|
268
|
-
});
|
|
269
|
-
|
|
270
|
-
res.json({ ok: true });
|
|
271
|
-
} catch (error) {
|
|
272
|
-
if (error instanceof GuardError) {
|
|
273
|
-
res.status(403).json({ code: error.code, reason: error.message });
|
|
274
|
-
return;
|
|
275
|
-
}
|
|
276
|
-
next(error);
|
|
277
|
-
}
|
|
278
|
-
});
|
|
273
|
+
```text
|
|
274
|
+
[ai-costguard] Using approximate token counting for model: gpt-4o-mini. Register an exact tokenizer via registerTokenizer() for production use.
|
|
279
275
|
```
|
|
280
276
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
Redis-backed shared spend tracking is isolated behind a subpath import:
|
|
277
|
+
For production budgets that need tighter input-token estimates, register a provider tokenizer:
|
|
284
278
|
|
|
285
279
|
```ts
|
|
286
|
-
import {
|
|
280
|
+
import { registerTokenizer } from '@salimassili/ai-costguard';
|
|
287
281
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
budget: 25,
|
|
291
|
-
windowSeconds: 86400,
|
|
282
|
+
registerTokenizer('gpt-4o-mini', (text) => {
|
|
283
|
+
return myTokenizer.encode(text).length;
|
|
292
284
|
});
|
|
293
285
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
`ioredis` is an optional dependency and is not loaded by the root import.
|
|
299
|
-
|
|
300
|
-
`licenseKey` is accepted as a deprecated compatibility field only. AI CostGuard does not enforce commercial licenses locally, and `validateLicense()` is a format sanity helper, not security.
|
|
301
|
-
|
|
302
|
-
## CLI
|
|
303
|
-
|
|
304
|
-
```bash
|
|
305
|
-
aifw check --budget 1 --model gpt-4o-mini --input-tokens 500 --tokens 1000 --max-steps 5
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
The package also installs an `ai-costguard` bin alias:
|
|
309
|
-
|
|
310
|
-
```bash
|
|
311
|
-
ai-costguard check --budget 1 --model gpt-4o-mini --tokens 1000 --max-steps 5
|
|
312
|
-
ai-costguard dashboard --events .ai-costguard/events.jsonl --budget 5
|
|
313
|
-
```
|
|
314
|
-
|
|
315
|
-
For custom models:
|
|
316
|
-
|
|
317
|
-
```bash
|
|
318
|
-
aifw check --budget 1 --model internal-model --tokens 1000 --input-price-per-1k 0.001 --output-price-per-1k 0.002
|
|
319
|
-
```
|
|
320
|
-
|
|
321
|
-
Exit codes:
|
|
322
|
-
|
|
323
|
-
- `0`: projected cost is within budget
|
|
324
|
-
- `1`: projected cost exceeds budget
|
|
325
|
-
- `2`: usage/config error
|
|
326
|
-
|
|
327
|
-
## Benchmarks
|
|
328
|
-
|
|
329
|
-
Run local benchmarks:
|
|
330
|
-
|
|
331
|
-
```bash
|
|
332
|
-
npm run build
|
|
333
|
-
npm run benchmark
|
|
286
|
+
registerTokenizer(/^claude-/u, (text) => {
|
|
287
|
+
return myAnthropicTokenizer.count(text);
|
|
288
|
+
});
|
|
334
289
|
```
|
|
335
290
|
|
|
291
|
+
String patterns match model-name substrings case-insensitively. `RegExp` patterns are tested against the original model string. If a registered tokenizer throws or returns an invalid count, AI CostGuard falls back to the built-in approximation and keeps guarding the call.
|
|
292
|
+
|
|
293
|
+
## Events
|
|
294
|
+
|
|
295
|
+
```ts
|
|
296
|
+
const unsubscribe = openai.on('block', (event) => {
|
|
297
|
+
console.log(event.code, event.reason, event.context.estimatedCost);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
unsubscribe();
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
Supported events are `cost`, `allow`, and `block`. Handler errors are swallowed so observability code cannot change guard decisions.
|
|
304
|
+
|
|
305
|
+
## Local Dashboard
|
|
306
|
+
|
|
307
|
+
Opt into a local JSONL event log:
|
|
308
|
+
|
|
309
|
+
```ts
|
|
310
|
+
const openai = guard(client, {
|
|
311
|
+
budget: 5,
|
|
312
|
+
eventLogPath: '.ai-costguard/events.jsonl',
|
|
313
|
+
});
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
Start the local-only dashboard:
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
ai-costguard dashboard --events .ai-costguard/events.jsonl --budget 5
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
For one-off package execution:
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
npx @salimassili/ai-costguard dashboard --events .ai-costguard/events.jsonl --budget 5
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
If the package is installed locally, `npx ai-costguard dashboard` also works. The dashboard binds to `127.0.0.1` by default and reads only local event files.
|
|
329
|
+
|
|
330
|
+
For CI or terminal output:
|
|
331
|
+
|
|
332
|
+
```bash
|
|
333
|
+
ai-costguard dashboard --events .ai-costguard/events.jsonl --budget 5 --once --json
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
See `docs/DASHBOARD.md`.
|
|
337
|
+
|
|
338
|
+
## Integrations
|
|
339
|
+
|
|
340
|
+
Runnable mocked examples are included for:
|
|
341
|
+
|
|
342
|
+
- OpenAI SDK agent loop protection
|
|
343
|
+
- Anthropic SDK workflow budget guard
|
|
344
|
+
- Vercel AI SDK chatbot budget cap
|
|
345
|
+
- LangChain retry-storm prevention
|
|
346
|
+
- Mastra-style agent runner protection
|
|
347
|
+
- CrewAI launch/budget gate
|
|
348
|
+
- CI budget checks
|
|
349
|
+
|
|
350
|
+
See `docs/INTEGRATIONS.md` and `examples/integrations`.
|
|
351
|
+
|
|
352
|
+
## Express Middleware
|
|
353
|
+
|
|
354
|
+
The middleware attaches a manual checker. It does not automatically parse or inspect every route.
|
|
355
|
+
|
|
356
|
+
```ts
|
|
357
|
+
import { middleware, GuardError } from '@salimassili/ai-costguard';
|
|
358
|
+
|
|
359
|
+
app.use(middleware({ budget: 2 }));
|
|
360
|
+
|
|
361
|
+
app.post('/chat', async (req, res, next) => {
|
|
362
|
+
try {
|
|
363
|
+
req.localSafety.check({
|
|
364
|
+
model: 'gpt-4o-mini',
|
|
365
|
+
tokens: 500,
|
|
366
|
+
inputTokens: 100,
|
|
367
|
+
outputTokens: 400,
|
|
368
|
+
estimatedCost: 0.0003,
|
|
369
|
+
timestamp: Date.now(),
|
|
370
|
+
prompt: String(req.body?.prompt ?? ''),
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
res.json({ ok: true });
|
|
374
|
+
} catch (error) {
|
|
375
|
+
if (error instanceof GuardError) {
|
|
376
|
+
res.status(403).json({ code: error.code, reason: error.message });
|
|
377
|
+
return;
|
|
378
|
+
}
|
|
379
|
+
next(error);
|
|
380
|
+
}
|
|
381
|
+
});
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
## Optional Redis / Pro Helper
|
|
385
|
+
|
|
386
|
+
Redis-backed shared spend tracking is isolated behind a subpath import:
|
|
387
|
+
|
|
388
|
+
```ts
|
|
389
|
+
import { GuardPro } from '@salimassili/ai-costguard/pro';
|
|
390
|
+
|
|
391
|
+
const pro = new GuardPro({
|
|
392
|
+
redisUrl: process.env.REDIS_URL ?? '',
|
|
393
|
+
budget: 25,
|
|
394
|
+
windowSeconds: 86400,
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
await pro.checkAndCharge('production', 0.0042);
|
|
398
|
+
await pro.shutdown();
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
`ioredis` is an optional dependency and is not loaded by the root import.
|
|
402
|
+
|
|
403
|
+
AI CostGuard does not include license-key checks or local commercial-license enforcement.
|
|
404
|
+
|
|
405
|
+
## CLI
|
|
406
|
+
|
|
407
|
+
```bash
|
|
408
|
+
aifw check --budget 1 --model gpt-4o-mini --input-tokens 500 --tokens 1000 --max-steps 5
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
The package also installs an `ai-costguard` bin alias:
|
|
412
|
+
|
|
413
|
+
```bash
|
|
414
|
+
ai-costguard check --budget 1 --model gpt-4o-mini --tokens 1000 --max-steps 5
|
|
415
|
+
ai-costguard dashboard --events .ai-costguard/events.jsonl --budget 5
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
For custom models:
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
aifw check --budget 1 --model internal-model --tokens 1000 --input-price-per-1k 0.001 --output-price-per-1k 0.002
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
Exit codes:
|
|
425
|
+
|
|
426
|
+
- `0`: projected cost is within budget
|
|
427
|
+
- `1`: projected cost exceeds budget
|
|
428
|
+
- `2`: usage/config error
|
|
429
|
+
|
|
430
|
+
## Benchmarks
|
|
431
|
+
|
|
432
|
+
Run local benchmarks:
|
|
433
|
+
|
|
434
|
+
```bash
|
|
435
|
+
npm run build
|
|
436
|
+
npm run benchmark
|
|
437
|
+
```
|
|
438
|
+
|
|
336
439
|
The script reports runtime overhead, approximate heap delta, false-positive scenarios, loop detection behavior, and cost-estimation boundaries. Results are local measurements, not universal guarantees. See `docs/BENCHMARKS.md`.
|
|
337
440
|
|
|
338
|
-
Latest local benchmark in this repo on Node `v24.14.1` / Windows measured `0.
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
-
|
|
347
|
-
-
|
|
348
|
-
-
|
|
349
|
-
-
|
|
350
|
-
-
|
|
351
|
-
-
|
|
352
|
-
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
npm
|
|
360
|
-
npm
|
|
441
|
+
Latest local benchmark in this repo on Node `v24.14.1` / Windows measured `0.023937 ms` added per mocked guarded call over `5000` iterations. Re-run on your target runtime before using this number in performance-sensitive claims.
|
|
442
|
+
|
|
443
|
+
Token accuracy benchmark, fixed proxy corpus: average error `237.76%`, median error `240.06%`, max error `390%`, `24` samples. The current dependency-free estimator is conservative and can substantially overestimate short prompts. Register an exact tokenizer for production use when token accuracy matters.
|
|
444
|
+
|
|
445
|
+
## Why Not 50 Lines Of Code?
|
|
446
|
+
|
|
447
|
+
A simple homemade budget check can stop one request after one counter crosses one number. AI CostGuard packages the parts that usually become messy once agents enter production:
|
|
448
|
+
|
|
449
|
+
- Provider pricing registry with runtime overrides and unknown-model blocking.
|
|
450
|
+
- Structured `GuardError` codes and metadata for API responses.
|
|
451
|
+
- Scoped budget and behavior state per project, user, or session.
|
|
452
|
+
- TTL-bounded prompt history.
|
|
453
|
+
- Loop and retry-storm detection.
|
|
454
|
+
- Estimated, attempted, blocked, and actual usage accounting.
|
|
455
|
+
- Method filtering so non-AI SDK calls are not charged.
|
|
456
|
+
- Event hooks, best-effort webhooks, JSONL event logs, and local dashboard visibility.
|
|
457
|
+
- CI budget checks and runnable integration examples.
|
|
458
|
+
|
|
459
|
+
## Development
|
|
460
|
+
|
|
461
|
+
```bash
|
|
462
|
+
npm ci
|
|
463
|
+
npm run build
|
|
464
|
+
npm run typecheck
|
|
465
|
+
npm test
|
|
361
466
|
npm run smoke
|
|
362
467
|
npm run benchmark
|
|
468
|
+
npm run benchmark:tokens
|
|
363
469
|
npm audit --omit=dev
|
|
364
470
|
npm pack --dry-run
|
|
365
|
-
```
|
|
366
|
-
|
|
367
|
-
## Limitations
|
|
368
|
-
|
|
369
|
-
- Token counting is approximate and dependency-free.
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
## Limitations
|
|
474
|
+
|
|
475
|
+
- Token counting is approximate and dependency-free unless you register an exact tokenizer.
|
|
476
|
+
- Token estimation is intentionally conservative and can overestimate materially; see the token accuracy benchmark.
|
|
370
477
|
- Pricing entries can become stale; override them for production.
|
|
371
|
-
- The free guard is process-local.
|
|
372
|
-
- Loop detection uses character trigram similarity, not embeddings.
|
|
373
|
-
- Retry detection is heuristic.
|
|
374
|
-
- Webhooks are best-effort and never affect enforcement.
|
|
375
|
-
- The dashboard reads local JSONL logs only; it is not a hosted analytics product.
|
|
376
|
-
- Provider usage reconciliation only works when responses expose recognizable `usage` fields.
|
|
377
|
-
|
|
378
|
-
## License
|
|
379
|
-
|
|
380
|
-
MIT
|
|
478
|
+
- The free guard is process-local.
|
|
479
|
+
- Loop detection uses character trigram similarity, not embeddings.
|
|
480
|
+
- Retry detection is heuristic.
|
|
481
|
+
- Webhooks are best-effort and never affect enforcement.
|
|
482
|
+
- The dashboard reads local JSONL logs only; it is not a hosted analytics product.
|
|
483
|
+
- Provider usage reconciliation only works when responses expose recognizable `usage` fields.
|
|
484
|
+
|
|
485
|
+
## License
|
|
486
|
+
|
|
487
|
+
MIT
|