@salimassili/ai-costguard 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -50
- package/LICENSE +21 -21
- package/README.md +399 -339
- package/benchmarks/run.mjs +229 -229
- package/benchmarks/token-accuracy.mjs +86 -0
- package/dist/core/CostGuard.d.ts +1 -1
- package/dist/core/CostGuard.d.ts.map +1 -1
- package/dist/core/CostGuard.js +1 -1
- package/dist/core/CostGuard.js.map +1 -1
- package/dist/core/GuardPro.d.ts +1 -13
- package/dist/core/GuardPro.d.ts.map +1 -1
- package/dist/core/GuardPro.js +7 -19
- package/dist/core/GuardPro.js.map +1 -1
- package/dist/core/types.d.ts +1 -3
- package/dist/core/types.d.ts.map +1 -1
- package/dist/dashboard.js +49 -49
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/pricing/index.d.ts +7 -0
- package/dist/pricing/index.d.ts.map +1 -1
- package/dist/pricing/index.js +7 -0
- package/dist/pricing/index.js.map +1 -1
- package/dist/pro.d.ts +1 -1
- package/dist/pro.d.ts.map +1 -1
- package/dist/pro.js +1 -1
- package/dist/pro.js.map +1 -1
- package/docs/BENCHMARKS.md +60 -41
- package/docs/DASHBOARD.md +61 -61
- package/docs/INTEGRATIONS.md +153 -153
- package/examples/integrations/anthropic-workflow-budget.mjs +36 -36
- package/examples/integrations/ci-budget-check.mjs +32 -32
- package/examples/integrations/crewai-budget-gate.mjs +31 -31
- package/examples/integrations/langchain-retry-storm.mjs +32 -32
- package/examples/integrations/mastra-agent.mjs +41 -41
- package/examples/integrations/openai-agent-loop.mjs +44 -44
- package/examples/integrations/vercel-ai-chatbot.mjs +29 -29
- package/package.json +71 -69
package/benchmarks/run.mjs
CHANGED
|
@@ -1,229 +1,229 @@
|
|
|
1
|
-
import { performance } from 'node:perf_hooks';
|
|
2
|
-
import { guardFunction } from '../dist/index.js';
|
|
3
|
-
import { GuardCore, GuardError } from '../dist/core/GuardCore.js';
|
|
4
|
-
import { estimateRequestTokens } from '../dist/core/tokenizer.js';
|
|
5
|
-
|
|
6
|
-
const iterations = readIterations(process.argv.slice(2));
|
|
7
|
-
|
|
8
|
-
const result = {
|
|
9
|
-
generatedAt: new Date().toISOString(),
|
|
10
|
-
node: process.version,
|
|
11
|
-
platform: process.platform,
|
|
12
|
-
iterations,
|
|
13
|
-
runtimeOverhead: await measureRuntimeOverhead(iterations),
|
|
14
|
-
memoryOverhead: await measureMemoryOverhead(Math.max(250, Math.min(iterations, 2000))),
|
|
15
|
-
falsePositiveScenarios: measureFalsePositiveScenarios(),
|
|
16
|
-
loopDetectionBehavior: measureLoopDetectionBehavior(),
|
|
17
|
-
costEstimationBoundaries: measureCostEstimationBoundaries(),
|
|
18
|
-
};
|
|
19
|
-
|
|
20
|
-
console.log(JSON.stringify(result, null, 2));
|
|
21
|
-
|
|
22
|
-
async function measureRuntimeOverhead(count) {
|
|
23
|
-
async function directCall(request) {
|
|
24
|
-
return { ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } };
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
const guardedCall = guardFunction(directCall, {
|
|
28
|
-
budget: 1_000,
|
|
29
|
-
behaviorAnalysis: false,
|
|
30
|
-
scope: { projectId: 'benchmark' },
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
const request = {
|
|
34
|
-
model: 'gpt-4o-mini',
|
|
35
|
-
prompt: 'benchmark request',
|
|
36
|
-
max_tokens: 8,
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
await warmup(directCall, guardedCall, request);
|
|
40
|
-
|
|
41
|
-
const directMs = await timeAsync(count, () => directCall(request));
|
|
42
|
-
const guardedMs = await timeAsync(count, () => guardedCall(request));
|
|
43
|
-
const directPerCallMs = directMs / count;
|
|
44
|
-
const guardedPerCallMs = guardedMs / count;
|
|
45
|
-
|
|
46
|
-
return {
|
|
47
|
-
directTotalMs: round(directMs, 3),
|
|
48
|
-
guardedTotalMs: round(guardedMs, 3),
|
|
49
|
-
directPerCallMs: round(directPerCallMs, 6),
|
|
50
|
-
guardedPerCallMs: round(guardedPerCallMs, 6),
|
|
51
|
-
addedPerCallMs: round(guardedPerCallMs - directPerCallMs, 6),
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
async function measureMemoryOverhead(count) {
|
|
56
|
-
const guardedCall = guardFunction(
|
|
57
|
-
async (request) => ({ ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } }),
|
|
58
|
-
{
|
|
59
|
-
budget: 1_000,
|
|
60
|
-
maxHistory: count,
|
|
61
|
-
loopMinRepeats: count + 1,
|
|
62
|
-
scope: { projectId: 'memory-benchmark' },
|
|
63
|
-
}
|
|
64
|
-
);
|
|
65
|
-
|
|
66
|
-
collectGarbageIfAvailable();
|
|
67
|
-
const beforeBytes = process.memoryUsage().heapUsed;
|
|
68
|
-
|
|
69
|
-
for (let index = 0; index < count; index++) {
|
|
70
|
-
await guardedCall({
|
|
71
|
-
model: 'gpt-4o-mini',
|
|
72
|
-
prompt: `memory benchmark unique prompt ${index}`,
|
|
73
|
-
max_tokens: 8,
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
collectGarbageIfAvailable();
|
|
78
|
-
const afterBytes = process.memoryUsage().heapUsed;
|
|
79
|
-
|
|
80
|
-
return {
|
|
81
|
-
calls: count,
|
|
82
|
-
heapDeltaBytes: afterBytes - beforeBytes,
|
|
83
|
-
heapDeltaPerCallBytes: round((afterBytes - beforeBytes) / count, 2),
|
|
84
|
-
gcAvailable: typeof globalThis.gc === 'function',
|
|
85
|
-
note: 'Heap measurements are process-local and noisy unless Node is run with --expose-gc.',
|
|
86
|
-
};
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function measureFalsePositiveScenarios() {
|
|
90
|
-
const core = new GuardCore({
|
|
91
|
-
budget: 1,
|
|
92
|
-
retryThreshold: 2,
|
|
93
|
-
loopSimilarityThreshold: 0.9,
|
|
94
|
-
scope: { projectId: 'false-positive-benchmark' },
|
|
95
|
-
});
|
|
96
|
-
const prompts = [
|
|
97
|
-
'again compare the two product options',
|
|
98
|
-
'again summarize the second option with different tradeoffs',
|
|
99
|
-
'again write a new title for the launch note',
|
|
100
|
-
];
|
|
101
|
-
let blocked = 0;
|
|
102
|
-
|
|
103
|
-
for (const prompt of prompts) {
|
|
104
|
-
try {
|
|
105
|
-
core.check(context(prompt));
|
|
106
|
-
} catch (error) {
|
|
107
|
-
if (error instanceof GuardError) blocked += 1;
|
|
108
|
-
else throw error;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
return {
|
|
113
|
-
scenario: 'Repeated benign "again" prompts without failure/error language',
|
|
114
|
-
prompts: prompts.length,
|
|
115
|
-
blocked,
|
|
116
|
-
};
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
function measureLoopDetectionBehavior() {
|
|
120
|
-
const core = new GuardCore({
|
|
121
|
-
budget: 1,
|
|
122
|
-
loopSimilarityThreshold: 0.9,
|
|
123
|
-
loopMinRepeats: 2,
|
|
124
|
-
scope: { projectId: 'loop-benchmark' },
|
|
125
|
-
});
|
|
126
|
-
const prompt = 'summarize the same tool observation and continue the agent plan';
|
|
127
|
-
|
|
128
|
-
for (let step = 1; step <= 5; step++) {
|
|
129
|
-
try {
|
|
130
|
-
core.check(context(prompt));
|
|
131
|
-
} catch (error) {
|
|
132
|
-
if (error instanceof GuardError) {
|
|
133
|
-
return {
|
|
134
|
-
repeatedPrompt: prompt,
|
|
135
|
-
blockedAtStep: step,
|
|
136
|
-
code: error.code,
|
|
137
|
-
reason: error.message,
|
|
138
|
-
};
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
throw error;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return {
|
|
146
|
-
repeatedPrompt: prompt,
|
|
147
|
-
blockedAtStep: null,
|
|
148
|
-
};
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
function measureCostEstimationBoundaries() {
|
|
152
|
-
const samples = [
|
|
153
|
-
{ label: 'short chat', request: { messages: [{ role: 'user', content: 'hello' }], max_tokens: 32 } },
|
|
154
|
-
{
|
|
155
|
-
label: 'long instruction',
|
|
156
|
-
request: {
|
|
157
|
-
messages: [{ role: 'user', content: 'Summarize the following requirements. '.repeat(120) }],
|
|
158
|
-
max_tokens: 256,
|
|
159
|
-
},
|
|
160
|
-
},
|
|
161
|
-
{
|
|
162
|
-
label: 'code-heavy prompt',
|
|
163
|
-
request: {
|
|
164
|
-
prompt: 'function example(value) { return value.map((item) => item.id).join(","); }\n'.repeat(40),
|
|
165
|
-
max_tokens: 128,
|
|
166
|
-
},
|
|
167
|
-
},
|
|
168
|
-
];
|
|
169
|
-
|
|
170
|
-
return {
|
|
171
|
-
tokenizer: 'dependency-free estimator',
|
|
172
|
-
doesNotClaimProviderExactness: true,
|
|
173
|
-
samples: samples.map((sample) => {
|
|
174
|
-
const estimate = estimateRequestTokens(sample.request);
|
|
175
|
-
return {
|
|
176
|
-
label: sample.label,
|
|
177
|
-
inputTokens: estimate.inputTokens,
|
|
178
|
-
outputTokens: estimate.outputTokens,
|
|
179
|
-
totalTokens: estimate.tokens,
|
|
180
|
-
};
|
|
181
|
-
}),
|
|
182
|
-
};
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
async function warmup(directCall, guardedCall, request) {
|
|
186
|
-
for (let index = 0; index < 100; index++) {
|
|
187
|
-
await directCall(request);
|
|
188
|
-
await guardedCall(request);
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
async function timeAsync(count, fn) {
|
|
193
|
-
const start = performance.now();
|
|
194
|
-
for (let index = 0; index < count; index++) {
|
|
195
|
-
await fn();
|
|
196
|
-
}
|
|
197
|
-
return performance.now() - start;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
function context(prompt) {
|
|
201
|
-
return {
|
|
202
|
-
model: 'gpt-4o-mini',
|
|
203
|
-
pricingKnown: true,
|
|
204
|
-
tokens: 100,
|
|
205
|
-
inputTokens: 40,
|
|
206
|
-
outputTokens: 60,
|
|
207
|
-
estimatedCost: 0.0001,
|
|
208
|
-
timestamp: Date.now(),
|
|
209
|
-
prompt,
|
|
210
|
-
};
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
function collectGarbageIfAvailable() {
|
|
214
|
-
if (typeof globalThis.gc === 'function') {
|
|
215
|
-
globalThis.gc();
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
function readIterations(args) {
|
|
220
|
-
const index = args.indexOf('--iterations');
|
|
221
|
-
if (index === -1) return 5000;
|
|
222
|
-
const value = Number(args[index + 1]);
|
|
223
|
-
return Number.isFinite(value) && value > 0 ? Math.trunc(value) : 5000;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
function round(value, digits) {
|
|
227
|
-
const factor = 10 ** digits;
|
|
228
|
-
return Math.round(value * factor) / factor;
|
|
229
|
-
}
|
|
1
|
+
import { performance } from 'node:perf_hooks';
|
|
2
|
+
import { guardFunction } from '../dist/index.js';
|
|
3
|
+
import { GuardCore, GuardError } from '../dist/core/GuardCore.js';
|
|
4
|
+
import { estimateRequestTokens } from '../dist/core/tokenizer.js';
|
|
5
|
+
|
|
6
|
+
const iterations = readIterations(process.argv.slice(2));
|
|
7
|
+
|
|
8
|
+
const result = {
|
|
9
|
+
generatedAt: new Date().toISOString(),
|
|
10
|
+
node: process.version,
|
|
11
|
+
platform: process.platform,
|
|
12
|
+
iterations,
|
|
13
|
+
runtimeOverhead: await measureRuntimeOverhead(iterations),
|
|
14
|
+
memoryOverhead: await measureMemoryOverhead(Math.max(250, Math.min(iterations, 2000))),
|
|
15
|
+
falsePositiveScenarios: measureFalsePositiveScenarios(),
|
|
16
|
+
loopDetectionBehavior: measureLoopDetectionBehavior(),
|
|
17
|
+
costEstimationBoundaries: measureCostEstimationBoundaries(),
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
console.log(JSON.stringify(result, null, 2));
|
|
21
|
+
|
|
22
|
+
async function measureRuntimeOverhead(count) {
|
|
23
|
+
async function directCall(request) {
|
|
24
|
+
return { ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const guardedCall = guardFunction(directCall, {
|
|
28
|
+
budget: 1_000,
|
|
29
|
+
behaviorAnalysis: false,
|
|
30
|
+
scope: { projectId: 'benchmark' },
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
const request = {
|
|
34
|
+
model: 'gpt-4o-mini',
|
|
35
|
+
prompt: 'benchmark request',
|
|
36
|
+
max_tokens: 8,
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
await warmup(directCall, guardedCall, request);
|
|
40
|
+
|
|
41
|
+
const directMs = await timeAsync(count, () => directCall(request));
|
|
42
|
+
const guardedMs = await timeAsync(count, () => guardedCall(request));
|
|
43
|
+
const directPerCallMs = directMs / count;
|
|
44
|
+
const guardedPerCallMs = guardedMs / count;
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
directTotalMs: round(directMs, 3),
|
|
48
|
+
guardedTotalMs: round(guardedMs, 3),
|
|
49
|
+
directPerCallMs: round(directPerCallMs, 6),
|
|
50
|
+
guardedPerCallMs: round(guardedPerCallMs, 6),
|
|
51
|
+
addedPerCallMs: round(guardedPerCallMs - directPerCallMs, 6),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function measureMemoryOverhead(count) {
|
|
56
|
+
const guardedCall = guardFunction(
|
|
57
|
+
async (request) => ({ ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } }),
|
|
58
|
+
{
|
|
59
|
+
budget: 1_000,
|
|
60
|
+
maxHistory: count,
|
|
61
|
+
loopMinRepeats: count + 1,
|
|
62
|
+
scope: { projectId: 'memory-benchmark' },
|
|
63
|
+
}
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
collectGarbageIfAvailable();
|
|
67
|
+
const beforeBytes = process.memoryUsage().heapUsed;
|
|
68
|
+
|
|
69
|
+
for (let index = 0; index < count; index++) {
|
|
70
|
+
await guardedCall({
|
|
71
|
+
model: 'gpt-4o-mini',
|
|
72
|
+
prompt: `memory benchmark unique prompt ${index}`,
|
|
73
|
+
max_tokens: 8,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
collectGarbageIfAvailable();
|
|
78
|
+
const afterBytes = process.memoryUsage().heapUsed;
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
calls: count,
|
|
82
|
+
heapDeltaBytes: afterBytes - beforeBytes,
|
|
83
|
+
heapDeltaPerCallBytes: round((afterBytes - beforeBytes) / count, 2),
|
|
84
|
+
gcAvailable: typeof globalThis.gc === 'function',
|
|
85
|
+
note: 'Heap measurements are process-local and noisy unless Node is run with --expose-gc.',
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function measureFalsePositiveScenarios() {
|
|
90
|
+
const core = new GuardCore({
|
|
91
|
+
budget: 1,
|
|
92
|
+
retryThreshold: 2,
|
|
93
|
+
loopSimilarityThreshold: 0.9,
|
|
94
|
+
scope: { projectId: 'false-positive-benchmark' },
|
|
95
|
+
});
|
|
96
|
+
const prompts = [
|
|
97
|
+
'again compare the two product options',
|
|
98
|
+
'again summarize the second option with different tradeoffs',
|
|
99
|
+
'again write a new title for the launch note',
|
|
100
|
+
];
|
|
101
|
+
let blocked = 0;
|
|
102
|
+
|
|
103
|
+
for (const prompt of prompts) {
|
|
104
|
+
try {
|
|
105
|
+
core.check(context(prompt));
|
|
106
|
+
} catch (error) {
|
|
107
|
+
if (error instanceof GuardError) blocked += 1;
|
|
108
|
+
else throw error;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
scenario: 'Repeated benign "again" prompts without failure/error language',
|
|
114
|
+
prompts: prompts.length,
|
|
115
|
+
blocked,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function measureLoopDetectionBehavior() {
|
|
120
|
+
const core = new GuardCore({
|
|
121
|
+
budget: 1,
|
|
122
|
+
loopSimilarityThreshold: 0.9,
|
|
123
|
+
loopMinRepeats: 2,
|
|
124
|
+
scope: { projectId: 'loop-benchmark' },
|
|
125
|
+
});
|
|
126
|
+
const prompt = 'summarize the same tool observation and continue the agent plan';
|
|
127
|
+
|
|
128
|
+
for (let step = 1; step <= 5; step++) {
|
|
129
|
+
try {
|
|
130
|
+
core.check(context(prompt));
|
|
131
|
+
} catch (error) {
|
|
132
|
+
if (error instanceof GuardError) {
|
|
133
|
+
return {
|
|
134
|
+
repeatedPrompt: prompt,
|
|
135
|
+
blockedAtStep: step,
|
|
136
|
+
code: error.code,
|
|
137
|
+
reason: error.message,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
throw error;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
repeatedPrompt: prompt,
|
|
147
|
+
blockedAtStep: null,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function measureCostEstimationBoundaries() {
|
|
152
|
+
const samples = [
|
|
153
|
+
{ label: 'short chat', request: { messages: [{ role: 'user', content: 'hello' }], max_tokens: 32 } },
|
|
154
|
+
{
|
|
155
|
+
label: 'long instruction',
|
|
156
|
+
request: {
|
|
157
|
+
messages: [{ role: 'user', content: 'Summarize the following requirements. '.repeat(120) }],
|
|
158
|
+
max_tokens: 256,
|
|
159
|
+
},
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
label: 'code-heavy prompt',
|
|
163
|
+
request: {
|
|
164
|
+
prompt: 'function example(value) { return value.map((item) => item.id).join(","); }\n'.repeat(40),
|
|
165
|
+
max_tokens: 128,
|
|
166
|
+
},
|
|
167
|
+
},
|
|
168
|
+
];
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
tokenizer: 'dependency-free estimator',
|
|
172
|
+
doesNotClaimProviderExactness: true,
|
|
173
|
+
samples: samples.map((sample) => {
|
|
174
|
+
const estimate = estimateRequestTokens(sample.request);
|
|
175
|
+
return {
|
|
176
|
+
label: sample.label,
|
|
177
|
+
inputTokens: estimate.inputTokens,
|
|
178
|
+
outputTokens: estimate.outputTokens,
|
|
179
|
+
totalTokens: estimate.tokens,
|
|
180
|
+
};
|
|
181
|
+
}),
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
async function warmup(directCall, guardedCall, request) {
|
|
186
|
+
for (let index = 0; index < 100; index++) {
|
|
187
|
+
await directCall(request);
|
|
188
|
+
await guardedCall(request);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async function timeAsync(count, fn) {
|
|
193
|
+
const start = performance.now();
|
|
194
|
+
for (let index = 0; index < count; index++) {
|
|
195
|
+
await fn();
|
|
196
|
+
}
|
|
197
|
+
return performance.now() - start;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function context(prompt) {
|
|
201
|
+
return {
|
|
202
|
+
model: 'gpt-4o-mini',
|
|
203
|
+
pricingKnown: true,
|
|
204
|
+
tokens: 100,
|
|
205
|
+
inputTokens: 40,
|
|
206
|
+
outputTokens: 60,
|
|
207
|
+
estimatedCost: 0.0001,
|
|
208
|
+
timestamp: Date.now(),
|
|
209
|
+
prompt,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function collectGarbageIfAvailable() {
|
|
214
|
+
if (typeof globalThis.gc === 'function') {
|
|
215
|
+
globalThis.gc();
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function readIterations(args) {
|
|
220
|
+
const index = args.indexOf('--iterations');
|
|
221
|
+
if (index === -1) return 5000;
|
|
222
|
+
const value = Number(args[index + 1]);
|
|
223
|
+
return Number.isFinite(value) && value > 0 ? Math.trunc(value) : 5000;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function round(value, digits) {
|
|
227
|
+
const factor = 10 ** digits;
|
|
228
|
+
return Math.round(value * factor) / factor;
|
|
229
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { estimateTokensFromText } from '../dist/core/tokenizer.js';
|
|
2
|
+
|
|
3
|
+
const corpus = [
|
|
4
|
+
{ label: 'support summary', text: 'Summarize this support ticket in two bullets.', referenceTokens: 9 },
|
|
5
|
+
{
|
|
6
|
+
label: 'agent instruction',
|
|
7
|
+
text: 'You are an agent. Use the search tool, then cite the result in JSON.',
|
|
8
|
+
referenceTokens: 16,
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
label: 'retry guard',
|
|
12
|
+
text: 'The database migration failed with timeout 504. Retry only if the previous step is idempotent.',
|
|
13
|
+
referenceTokens: 19,
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
label: 'typescript code',
|
|
17
|
+
text: 'function normalizeUser(user) { return { id: user.id, email: user.email?.toLowerCase() }; }',
|
|
18
|
+
referenceTokens: 24,
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
label: 'model comparison',
|
|
22
|
+
text: 'Compare Claude Haiku and GPT-4o mini for a budget-sensitive chatbot.',
|
|
23
|
+
referenceTokens: 17,
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
label: 'customer apology',
|
|
27
|
+
text: 'Write a concise customer apology for a delayed shipment and include one next step.',
|
|
28
|
+
referenceTokens: 16,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
label: 'log line',
|
|
32
|
+
text: 'Analyze these logs: ERROR rate_limit_exceeded request_id=req_123 retry_after=2s',
|
|
33
|
+
referenceTokens: 24,
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
label: 'interface request',
|
|
37
|
+
text: 'Create a TypeScript interface for a webhook payload with cost, model, and reason fields.',
|
|
38
|
+
referenceTokens: 17,
|
|
39
|
+
},
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
const samples = corpus.map((sample) => {
|
|
43
|
+
const estimatedTokens = estimateTokensFromText(sample.text);
|
|
44
|
+
const absoluteError = Math.abs(estimatedTokens - sample.referenceTokens);
|
|
45
|
+
const percentError = (absoluteError / sample.referenceTokens) * 100;
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
label: sample.label,
|
|
49
|
+
estimatedTokens,
|
|
50
|
+
referenceTokens: sample.referenceTokens,
|
|
51
|
+
absoluteError,
|
|
52
|
+
percentError: round(percentError, 2),
|
|
53
|
+
};
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const percentErrors = samples.map((sample) => sample.percentError).sort((a, b) => a - b);
|
|
57
|
+
const report = {
|
|
58
|
+
generatedAt: new Date().toISOString(),
|
|
59
|
+
reference: {
|
|
60
|
+
tokenizer: 'gpt-tokenizer cl100k_base fixture counts',
|
|
61
|
+
note:
|
|
62
|
+
'Reference counts are fixed corpus fixtures, not live provider calls. Use this to understand estimator bias, not to claim exact tokenizer parity.',
|
|
63
|
+
},
|
|
64
|
+
sampleCount: samples.length,
|
|
65
|
+
averageErrorPercent: round(average(percentErrors), 2),
|
|
66
|
+
medianErrorPercent: round(median(percentErrors), 2),
|
|
67
|
+
maxErrorPercent: Math.max(...percentErrors),
|
|
68
|
+
samples,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
console.log(JSON.stringify(report, null, 2));
|
|
72
|
+
|
|
73
|
+
function average(values) {
|
|
74
|
+
return values.reduce((total, value) => total + value, 0) / values.length;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function median(values) {
|
|
78
|
+
const midpoint = Math.floor(values.length / 2);
|
|
79
|
+
if (values.length % 2 === 1) return values[midpoint];
|
|
80
|
+
return (values[midpoint - 1] + values[midpoint]) / 2;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function round(value, digits) {
|
|
84
|
+
const factor = 10 ** digits;
|
|
85
|
+
return Math.round(value * factor) / factor;
|
|
86
|
+
}
|
package/dist/core/CostGuard.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export { guard, guardFunction, GuardError, middleware } from './GuardFree.js';
|
|
2
2
|
export type { GuardedClient, GuardEventControls } from './GuardFree.js';
|
|
3
|
-
export { getPricing, registerPricing, listPricing } from '../pricing/index.js';
|
|
3
|
+
export { BUILTIN_PRICING_LAST_UPDATED, getPricing, registerPricing, listPricing } from '../pricing/index.js';
|
|
4
4
|
export type { ModelPricing } from '../pricing/index.js';
|
|
5
5
|
export type { GuardConfig, GuardErrorCode, GuardEvent, GuardEventHandler, GuardEventName, GuardScope, GuardState, GuardWebhookConfig, RequestContext, } from './types.js';
|
|
6
6
|
//# sourceMappingURL=CostGuard.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CostGuard.d.ts","sourceRoot":"","sources":["../../src/core/CostGuard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC9E,YAAY,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACxE,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"CostGuard.d.ts","sourceRoot":"","sources":["../../src/core/CostGuard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC9E,YAAY,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACxE,OAAO,EAAE,4BAA4B,EAAE,UAAU,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAC7G,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxD,YAAY,EACV,WAAW,EACX,cAAc,EACd,UAAU,EACV,iBAAiB,EACjB,cAAc,EACd,UAAU,EACV,UAAU,EACV,kBAAkB,EAClB,cAAc,GACf,MAAM,YAAY,CAAC"}
|
package/dist/core/CostGuard.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
export { guard, guardFunction, GuardError, middleware } from './GuardFree.js';
|
|
2
|
-
export { getPricing, registerPricing, listPricing } from '../pricing/index.js';
|
|
2
|
+
export { BUILTIN_PRICING_LAST_UPDATED, getPricing, registerPricing, listPricing } from '../pricing/index.js';
|
|
3
3
|
//# sourceMappingURL=CostGuard.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"CostGuard.js","sourceRoot":"","sources":["../../src/core/CostGuard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE9E,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC"}
|
|
1
|
+
{"version":3,"file":"CostGuard.js","sourceRoot":"","sources":["../../src/core/CostGuard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE9E,OAAO,EAAE,4BAA4B,EAAE,UAAU,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC"}
|
package/dist/core/GuardPro.d.ts
CHANGED
|
@@ -34,8 +34,6 @@ export interface GuardProConfig {
|
|
|
34
34
|
discordWebhook?: string;
|
|
35
35
|
/** Combined webhook configuration. */
|
|
36
36
|
webhooks?: GuardWebhookConfig;
|
|
37
|
-
/** Deprecated compatibility field. GuardPro does not enforce licenses locally. */
|
|
38
|
-
licenseKey?: string;
|
|
39
37
|
/** Optional Redis-compatible client. When omitted, GuardPro pools ioredis clients by URL. */
|
|
40
38
|
redisClient?: GuardProRedisClient;
|
|
41
39
|
}
|
|
@@ -90,18 +88,8 @@ export declare class GuardPro {
|
|
|
90
88
|
private getSpendKey;
|
|
91
89
|
private createContext;
|
|
92
90
|
}
|
|
93
|
-
/**
|
|
94
|
-
* Deprecated compatibility helper.
|
|
95
|
-
*
|
|
96
|
-
* This is a format sanity check only. It is not license enforcement and should
|
|
97
|
-
* not be used for commercial access control.
|
|
98
|
-
*/
|
|
99
|
-
export declare function validateLicense(key: string): boolean;
|
|
100
91
|
/**
|
|
101
92
|
* Creates GuardPro.
|
|
102
|
-
*
|
|
103
|
-
* The return type remains nullable for backwards compatibility with older
|
|
104
|
-
* callers, but local license rejection has intentionally been removed.
|
|
105
93
|
*/
|
|
106
|
-
export declare function getProGuard(config: GuardProConfig): GuardPro
|
|
94
|
+
export declare function getProGuard(config: GuardProConfig): GuardPro;
|
|
107
95
|
//# sourceMappingURL=GuardPro.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GuardPro.d.ts","sourceRoot":"","sources":["../../src/core/GuardPro.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,kBAAkB,EAAkB,MAAM,YAAY,CAAC;AAGrE;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,wEAAwE;IACxE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,EAAE,CAAC,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC;IAC1E,0DAA0D;IAC1D,OAAO,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAC7B,uDAAuD;IACvD,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACtG,qCAAqC;IACrC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IACzC,2BAA2B;IAC3B,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACnC,6BAA6B;IAC7B,IAAI,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,oFAAoF;IACpF,QAAQ,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,MAAM,EAAE,MAAM,CAAC;IACf,iDAAiD;IACjD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,wDAAwD;IACxD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,0DAA0D;IAC1D,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,sCAAsC;IACtC,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAC9B,
|
|
1
|
+
{"version":3,"file":"GuardPro.d.ts","sourceRoot":"","sources":["../../src/core/GuardPro.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,kBAAkB,EAAkB,MAAM,YAAY,CAAC;AAGrE;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,wEAAwE;IACxE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,EAAE,CAAC,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC;IAC1E,0DAA0D;IAC1D,OAAO,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;IAC7B,uDAAuD;IACvD,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACtG,qCAAqC;IACrC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IACzC,2BAA2B;IAC3B,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACnC,6BAA6B;IAC7B,IAAI,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,oFAAoF;IACpF,QAAQ,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,MAAM,EAAE,MAAM,CAAC;IACf,iDAAiD;IACjD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,wDAAwD;IACxD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,0DAA0D;IAC1D,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,sCAAsC;IACtC,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAC9B,6FAA6F;IAC7F,WAAW,CAAC,EAAE,mBAAmB,CAAC;CACnC;AAcD;;GAEG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAqC;IAElE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAsB;IACnD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAS;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAuC;IAClE,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,gBAAgB,CAAS;IAEjC;;OAEG;gBACS,MAAM,EAAE,cAAc;IAsBlC;;OAEG;IACG,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkB7E;;OAEG;IACG,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAclD;;OAEG;IACG,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAalD;;OAEG;IACH,WAAW,IAAI,OAAO;IAKtB;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAe/B,OAAO,CAAC,MAAM,CAAC,YAAY;IA+B3B,OAAO,CAAC,sBAAsB;YAchB,cAAc;YAed,OAAO;YAiBP,wBAAwB;YAcxB,cAAc;IAc5B,OAAO,CAAC,cAAc;IAOtB,OAAO,CAAC,QAAQ;IAgBhB,OAAO,CAAC,gBAAgB;YAUV,QAAQ;IAQtB,OAAO,CAAC,WAAW;IAInB,OAAO,CAAC,aAAa;CAWtB;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,QAAQ,CAE5D"}
|
package/dist/core/GuardPro.js
CHANGED
|
@@ -193,13 +193,13 @@ export class GuardPro {
|
|
|
193
193
|
}
|
|
194
194
|
}
|
|
195
195
|
async incrementRedis(redis, key, estimatedCost) {
|
|
196
|
-
const script = `
|
|
197
|
-
local total = redis.call("INCRBYFLOAT", KEYS[1], ARGV[1])
|
|
198
|
-
local ttl = redis.call("TTL", KEYS[1])
|
|
199
|
-
if ttl == -1 then
|
|
200
|
-
redis.call("EXPIRE", KEYS[1], ARGV[2])
|
|
201
|
-
end
|
|
202
|
-
return total
|
|
196
|
+
const script = `
|
|
197
|
+
local total = redis.call("INCRBYFLOAT", KEYS[1], ARGV[1])
|
|
198
|
+
local ttl = redis.call("TTL", KEYS[1])
|
|
199
|
+
if ttl == -1 then
|
|
200
|
+
redis.call("EXPIRE", KEYS[1], ARGV[2])
|
|
201
|
+
end
|
|
202
|
+
return total
|
|
203
203
|
`;
|
|
204
204
|
const total = await redis.eval(script, 1, key, estimatedCost.toString(), this.windowSeconds.toString());
|
|
205
205
|
return Number(total);
|
|
@@ -256,20 +256,8 @@ export class GuardPro {
|
|
|
256
256
|
};
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
|
-
/**
|
|
260
|
-
* Deprecated compatibility helper.
|
|
261
|
-
*
|
|
262
|
-
* This is a format sanity check only. It is not license enforcement and should
|
|
263
|
-
* not be used for commercial access control.
|
|
264
|
-
*/
|
|
265
|
-
export function validateLicense(key) {
|
|
266
|
-
return typeof key === 'string' && key.trim().length >= 16;
|
|
267
|
-
}
|
|
268
259
|
/**
|
|
269
260
|
* Creates GuardPro.
|
|
270
|
-
*
|
|
271
|
-
* The return type remains nullable for backwards compatibility with older
|
|
272
|
-
* callers, but local license rejection has intentionally been removed.
|
|
273
261
|
*/
|
|
274
262
|
export function getProGuard(config) {
|
|
275
263
|
return new GuardPro(config);
|