@salimassili/ai-costguard 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +62 -46
  2. package/LICENSE +21 -21
  3. package/README.md +421 -314
  4. package/benchmarks/run.mjs +229 -229
  5. package/benchmarks/token-accuracy.mjs +182 -0
  6. package/dist/cli.d.ts +11 -0
  7. package/dist/cli.d.ts.map +1 -1
  8. package/dist/cli.js +63 -2
  9. package/dist/cli.js.map +1 -1
  10. package/dist/core/CostGuard.d.ts +4 -2
  11. package/dist/core/CostGuard.d.ts.map +1 -1
  12. package/dist/core/CostGuard.js +2 -1
  13. package/dist/core/CostGuard.js.map +1 -1
  14. package/dist/core/GuardCore.d.ts +2 -0
  15. package/dist/core/GuardCore.d.ts.map +1 -1
  16. package/dist/core/GuardCore.js +37 -5
  17. package/dist/core/GuardCore.js.map +1 -1
  18. package/dist/core/GuardPro.d.ts +1 -13
  19. package/dist/core/GuardPro.d.ts.map +1 -1
  20. package/dist/core/GuardPro.js +7 -19
  21. package/dist/core/GuardPro.js.map +1 -1
  22. package/dist/core/tokenizer.d.ts +18 -0
  23. package/dist/core/tokenizer.d.ts.map +1 -1
  24. package/dist/core/tokenizer.js +45 -1
  25. package/dist/core/tokenizer.js.map +1 -1
  26. package/dist/core/types.d.ts +12 -3
  27. package/dist/core/types.d.ts.map +1 -1
  28. package/dist/dashboard.js +49 -49
  29. package/dist/index.d.ts +4 -2
  30. package/dist/index.d.ts.map +1 -1
  31. package/dist/index.js +2 -1
  32. package/dist/index.js.map +1 -1
  33. package/dist/pricing/index.d.ts +24 -0
  34. package/dist/pricing/index.d.ts.map +1 -1
  35. package/dist/pricing/index.js +31 -5
  36. package/dist/pricing/index.js.map +1 -1
  37. package/dist/pro.d.ts +1 -1
  38. package/dist/pro.d.ts.map +1 -1
  39. package/dist/pro.js +1 -1
  40. package/dist/pro.js.map +1 -1
  41. package/docs/BENCHMARKS.md +54 -35
  42. package/docs/DASHBOARD.md +61 -61
  43. package/docs/INTEGRATIONS.md +153 -153
  44. package/examples/integrations/anthropic-workflow-budget.mjs +36 -36
  45. package/examples/integrations/ci-budget-check.mjs +32 -32
  46. package/examples/integrations/crewai-budget-gate.mjs +31 -31
  47. package/examples/integrations/langchain-retry-storm.mjs +32 -32
  48. package/examples/integrations/mastra-agent.mjs +41 -41
  49. package/examples/integrations/openai-agent-loop.mjs +44 -44
  50. package/examples/integrations/vercel-ai-chatbot.mjs +29 -29
  51. package/package.json +71 -69
@@ -1,229 +1,229 @@
1
- import { performance } from 'node:perf_hooks';
2
- import { guardFunction } from '../dist/index.js';
3
- import { GuardCore, GuardError } from '../dist/core/GuardCore.js';
4
- import { estimateRequestTokens } from '../dist/core/tokenizer.js';
5
-
6
- const iterations = readIterations(process.argv.slice(2));
7
-
8
- const result = {
9
- generatedAt: new Date().toISOString(),
10
- node: process.version,
11
- platform: process.platform,
12
- iterations,
13
- runtimeOverhead: await measureRuntimeOverhead(iterations),
14
- memoryOverhead: await measureMemoryOverhead(Math.max(250, Math.min(iterations, 2000))),
15
- falsePositiveScenarios: measureFalsePositiveScenarios(),
16
- loopDetectionBehavior: measureLoopDetectionBehavior(),
17
- costEstimationBoundaries: measureCostEstimationBoundaries(),
18
- };
19
-
20
- console.log(JSON.stringify(result, null, 2));
21
-
22
- async function measureRuntimeOverhead(count) {
23
- async function directCall(request) {
24
- return { ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } };
25
- }
26
-
27
- const guardedCall = guardFunction(directCall, {
28
- budget: 1_000,
29
- behaviorAnalysis: false,
30
- scope: { projectId: 'benchmark' },
31
- });
32
-
33
- const request = {
34
- model: 'gpt-4o-mini',
35
- prompt: 'benchmark request',
36
- max_tokens: 8,
37
- };
38
-
39
- await warmup(directCall, guardedCall, request);
40
-
41
- const directMs = await timeAsync(count, () => directCall(request));
42
- const guardedMs = await timeAsync(count, () => guardedCall(request));
43
- const directPerCallMs = directMs / count;
44
- const guardedPerCallMs = guardedMs / count;
45
-
46
- return {
47
- directTotalMs: round(directMs, 3),
48
- guardedTotalMs: round(guardedMs, 3),
49
- directPerCallMs: round(directPerCallMs, 6),
50
- guardedPerCallMs: round(guardedPerCallMs, 6),
51
- addedPerCallMs: round(guardedPerCallMs - directPerCallMs, 6),
52
- };
53
- }
54
-
55
- async function measureMemoryOverhead(count) {
56
- const guardedCall = guardFunction(
57
- async (request) => ({ ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } }),
58
- {
59
- budget: 1_000,
60
- maxHistory: count,
61
- loopMinRepeats: count + 1,
62
- scope: { projectId: 'memory-benchmark' },
63
- }
64
- );
65
-
66
- collectGarbageIfAvailable();
67
- const beforeBytes = process.memoryUsage().heapUsed;
68
-
69
- for (let index = 0; index < count; index++) {
70
- await guardedCall({
71
- model: 'gpt-4o-mini',
72
- prompt: `memory benchmark unique prompt ${index}`,
73
- max_tokens: 8,
74
- });
75
- }
76
-
77
- collectGarbageIfAvailable();
78
- const afterBytes = process.memoryUsage().heapUsed;
79
-
80
- return {
81
- calls: count,
82
- heapDeltaBytes: afterBytes - beforeBytes,
83
- heapDeltaPerCallBytes: round((afterBytes - beforeBytes) / count, 2),
84
- gcAvailable: typeof globalThis.gc === 'function',
85
- note: 'Heap measurements are process-local and noisy unless Node is run with --expose-gc.',
86
- };
87
- }
88
-
89
- function measureFalsePositiveScenarios() {
90
- const core = new GuardCore({
91
- budget: 1,
92
- retryThreshold: 2,
93
- loopSimilarityThreshold: 0.9,
94
- scope: { projectId: 'false-positive-benchmark' },
95
- });
96
- const prompts = [
97
- 'again compare the two product options',
98
- 'again summarize the second option with different tradeoffs',
99
- 'again write a new title for the launch note',
100
- ];
101
- let blocked = 0;
102
-
103
- for (const prompt of prompts) {
104
- try {
105
- core.check(context(prompt));
106
- } catch (error) {
107
- if (error instanceof GuardError) blocked += 1;
108
- else throw error;
109
- }
110
- }
111
-
112
- return {
113
- scenario: 'Repeated benign "again" prompts without failure/error language',
114
- prompts: prompts.length,
115
- blocked,
116
- };
117
- }
118
-
119
- function measureLoopDetectionBehavior() {
120
- const core = new GuardCore({
121
- budget: 1,
122
- loopSimilarityThreshold: 0.9,
123
- loopMinRepeats: 2,
124
- scope: { projectId: 'loop-benchmark' },
125
- });
126
- const prompt = 'summarize the same tool observation and continue the agent plan';
127
-
128
- for (let step = 1; step <= 5; step++) {
129
- try {
130
- core.check(context(prompt));
131
- } catch (error) {
132
- if (error instanceof GuardError) {
133
- return {
134
- repeatedPrompt: prompt,
135
- blockedAtStep: step,
136
- code: error.code,
137
- reason: error.message,
138
- };
139
- }
140
-
141
- throw error;
142
- }
143
- }
144
-
145
- return {
146
- repeatedPrompt: prompt,
147
- blockedAtStep: null,
148
- };
149
- }
150
-
151
- function measureCostEstimationBoundaries() {
152
- const samples = [
153
- { label: 'short chat', request: { messages: [{ role: 'user', content: 'hello' }], max_tokens: 32 } },
154
- {
155
- label: 'long instruction',
156
- request: {
157
- messages: [{ role: 'user', content: 'Summarize the following requirements. '.repeat(120) }],
158
- max_tokens: 256,
159
- },
160
- },
161
- {
162
- label: 'code-heavy prompt',
163
- request: {
164
- prompt: 'function example(value) { return value.map((item) => item.id).join(","); }\n'.repeat(40),
165
- max_tokens: 128,
166
- },
167
- },
168
- ];
169
-
170
- return {
171
- tokenizer: 'dependency-free estimator',
172
- doesNotClaimProviderExactness: true,
173
- samples: samples.map((sample) => {
174
- const estimate = estimateRequestTokens(sample.request);
175
- return {
176
- label: sample.label,
177
- inputTokens: estimate.inputTokens,
178
- outputTokens: estimate.outputTokens,
179
- totalTokens: estimate.tokens,
180
- };
181
- }),
182
- };
183
- }
184
-
185
- async function warmup(directCall, guardedCall, request) {
186
- for (let index = 0; index < 100; index++) {
187
- await directCall(request);
188
- await guardedCall(request);
189
- }
190
- }
191
-
192
- async function timeAsync(count, fn) {
193
- const start = performance.now();
194
- for (let index = 0; index < count; index++) {
195
- await fn();
196
- }
197
- return performance.now() - start;
198
- }
199
-
200
- function context(prompt) {
201
- return {
202
- model: 'gpt-4o-mini',
203
- pricingKnown: true,
204
- tokens: 100,
205
- inputTokens: 40,
206
- outputTokens: 60,
207
- estimatedCost: 0.0001,
208
- timestamp: Date.now(),
209
- prompt,
210
- };
211
- }
212
-
213
- function collectGarbageIfAvailable() {
214
- if (typeof globalThis.gc === 'function') {
215
- globalThis.gc();
216
- }
217
- }
218
-
219
- function readIterations(args) {
220
- const index = args.indexOf('--iterations');
221
- if (index === -1) return 5000;
222
- const value = Number(args[index + 1]);
223
- return Number.isFinite(value) && value > 0 ? Math.trunc(value) : 5000;
224
- }
225
-
226
- function round(value, digits) {
227
- const factor = 10 ** digits;
228
- return Math.round(value * factor) / factor;
229
- }
1
+ import { performance } from 'node:perf_hooks';
2
+ import { guardFunction } from '../dist/index.js';
3
+ import { GuardCore, GuardError } from '../dist/core/GuardCore.js';
4
+ import { estimateRequestTokens } from '../dist/core/tokenizer.js';
5
+
6
+ const iterations = readIterations(process.argv.slice(2));
7
+
8
+ const result = {
9
+ generatedAt: new Date().toISOString(),
10
+ node: process.version,
11
+ platform: process.platform,
12
+ iterations,
13
+ runtimeOverhead: await measureRuntimeOverhead(iterations),
14
+ memoryOverhead: await measureMemoryOverhead(Math.max(250, Math.min(iterations, 2000))),
15
+ falsePositiveScenarios: measureFalsePositiveScenarios(),
16
+ loopDetectionBehavior: measureLoopDetectionBehavior(),
17
+ costEstimationBoundaries: measureCostEstimationBoundaries(),
18
+ };
19
+
20
+ console.log(JSON.stringify(result, null, 2));
21
+
22
+ async function measureRuntimeOverhead(count) {
23
+ async function directCall(request) {
24
+ return { ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } };
25
+ }
26
+
27
+ const guardedCall = guardFunction(directCall, {
28
+ budget: 1_000,
29
+ behaviorAnalysis: false,
30
+ scope: { projectId: 'benchmark' },
31
+ });
32
+
33
+ const request = {
34
+ model: 'gpt-4o-mini',
35
+ prompt: 'benchmark request',
36
+ max_tokens: 8,
37
+ };
38
+
39
+ await warmup(directCall, guardedCall, request);
40
+
41
+ const directMs = await timeAsync(count, () => directCall(request));
42
+ const guardedMs = await timeAsync(count, () => guardedCall(request));
43
+ const directPerCallMs = directMs / count;
44
+ const guardedPerCallMs = guardedMs / count;
45
+
46
+ return {
47
+ directTotalMs: round(directMs, 3),
48
+ guardedTotalMs: round(guardedMs, 3),
49
+ directPerCallMs: round(directPerCallMs, 6),
50
+ guardedPerCallMs: round(guardedPerCallMs, 6),
51
+ addedPerCallMs: round(guardedPerCallMs - directPerCallMs, 6),
52
+ };
53
+ }
54
+
55
+ async function measureMemoryOverhead(count) {
56
+ const guardedCall = guardFunction(
57
+ async (request) => ({ ok: true, usage: { prompt_tokens: 12, completion_tokens: request.max_tokens ?? 8 } }),
58
+ {
59
+ budget: 1_000,
60
+ maxHistory: count,
61
+ loopMinRepeats: count + 1,
62
+ scope: { projectId: 'memory-benchmark' },
63
+ }
64
+ );
65
+
66
+ collectGarbageIfAvailable();
67
+ const beforeBytes = process.memoryUsage().heapUsed;
68
+
69
+ for (let index = 0; index < count; index++) {
70
+ await guardedCall({
71
+ model: 'gpt-4o-mini',
72
+ prompt: `memory benchmark unique prompt ${index}`,
73
+ max_tokens: 8,
74
+ });
75
+ }
76
+
77
+ collectGarbageIfAvailable();
78
+ const afterBytes = process.memoryUsage().heapUsed;
79
+
80
+ return {
81
+ calls: count,
82
+ heapDeltaBytes: afterBytes - beforeBytes,
83
+ heapDeltaPerCallBytes: round((afterBytes - beforeBytes) / count, 2),
84
+ gcAvailable: typeof globalThis.gc === 'function',
85
+ note: 'Heap measurements are process-local and noisy unless Node is run with --expose-gc.',
86
+ };
87
+ }
88
+
89
+ function measureFalsePositiveScenarios() {
90
+ const core = new GuardCore({
91
+ budget: 1,
92
+ retryThreshold: 2,
93
+ loopSimilarityThreshold: 0.9,
94
+ scope: { projectId: 'false-positive-benchmark' },
95
+ });
96
+ const prompts = [
97
+ 'again compare the two product options',
98
+ 'again summarize the second option with different tradeoffs',
99
+ 'again write a new title for the launch note',
100
+ ];
101
+ let blocked = 0;
102
+
103
+ for (const prompt of prompts) {
104
+ try {
105
+ core.check(context(prompt));
106
+ } catch (error) {
107
+ if (error instanceof GuardError) blocked += 1;
108
+ else throw error;
109
+ }
110
+ }
111
+
112
+ return {
113
+ scenario: 'Repeated benign "again" prompts without failure/error language',
114
+ prompts: prompts.length,
115
+ blocked,
116
+ };
117
+ }
118
+
119
+ function measureLoopDetectionBehavior() {
120
+ const core = new GuardCore({
121
+ budget: 1,
122
+ loopSimilarityThreshold: 0.9,
123
+ loopMinRepeats: 2,
124
+ scope: { projectId: 'loop-benchmark' },
125
+ });
126
+ const prompt = 'summarize the same tool observation and continue the agent plan';
127
+
128
+ for (let step = 1; step <= 5; step++) {
129
+ try {
130
+ core.check(context(prompt));
131
+ } catch (error) {
132
+ if (error instanceof GuardError) {
133
+ return {
134
+ repeatedPrompt: prompt,
135
+ blockedAtStep: step,
136
+ code: error.code,
137
+ reason: error.message,
138
+ };
139
+ }
140
+
141
+ throw error;
142
+ }
143
+ }
144
+
145
+ return {
146
+ repeatedPrompt: prompt,
147
+ blockedAtStep: null,
148
+ };
149
+ }
150
+
151
+ function measureCostEstimationBoundaries() {
152
+ const samples = [
153
+ { label: 'short chat', request: { messages: [{ role: 'user', content: 'hello' }], max_tokens: 32 } },
154
+ {
155
+ label: 'long instruction',
156
+ request: {
157
+ messages: [{ role: 'user', content: 'Summarize the following requirements. '.repeat(120) }],
158
+ max_tokens: 256,
159
+ },
160
+ },
161
+ {
162
+ label: 'code-heavy prompt',
163
+ request: {
164
+ prompt: 'function example(value) { return value.map((item) => item.id).join(","); }\n'.repeat(40),
165
+ max_tokens: 128,
166
+ },
167
+ },
168
+ ];
169
+
170
+ return {
171
+ tokenizer: 'dependency-free estimator',
172
+ doesNotClaimProviderExactness: true,
173
+ samples: samples.map((sample) => {
174
+ const estimate = estimateRequestTokens(sample.request);
175
+ return {
176
+ label: sample.label,
177
+ inputTokens: estimate.inputTokens,
178
+ outputTokens: estimate.outputTokens,
179
+ totalTokens: estimate.tokens,
180
+ };
181
+ }),
182
+ };
183
+ }
184
+
185
+ async function warmup(directCall, guardedCall, request) {
186
+ for (let index = 0; index < 100; index++) {
187
+ await directCall(request);
188
+ await guardedCall(request);
189
+ }
190
+ }
191
+
192
+ async function timeAsync(count, fn) {
193
+ const start = performance.now();
194
+ for (let index = 0; index < count; index++) {
195
+ await fn();
196
+ }
197
+ return performance.now() - start;
198
+ }
199
+
200
+ function context(prompt) {
201
+ return {
202
+ model: 'gpt-4o-mini',
203
+ pricingKnown: true,
204
+ tokens: 100,
205
+ inputTokens: 40,
206
+ outputTokens: 60,
207
+ estimatedCost: 0.0001,
208
+ timestamp: Date.now(),
209
+ prompt,
210
+ };
211
+ }
212
+
213
+ function collectGarbageIfAvailable() {
214
+ if (typeof globalThis.gc === 'function') {
215
+ globalThis.gc();
216
+ }
217
+ }
218
+
219
+ function readIterations(args) {
220
+ const index = args.indexOf('--iterations');
221
+ if (index === -1) return 5000;
222
+ const value = Number(args[index + 1]);
223
+ return Number.isFinite(value) && value > 0 ? Math.trunc(value) : 5000;
224
+ }
225
+
226
+ function round(value, digits) {
227
+ const factor = 10 ** digits;
228
+ return Math.round(value * factor) / factor;
229
+ }
@@ -0,0 +1,182 @@
1
+ import { estimateTokensFromText } from '../dist/core/tokenizer.js';
2
+
3
+ const corpus = [
4
+ { label: 'short english', text: 'Summarize this ticket.', referenceTokens: 5 },
5
+ {
6
+ label: 'long english',
7
+ text:
8
+ 'A customer reports that their nightly invoice reconciliation agent loops after a database timeout. ' +
9
+ 'Explain the likely failure mode and propose two safe remediation steps.',
10
+ referenceTokens: 31,
11
+ },
12
+ { label: 'support summary', text: 'Summarize this support ticket in two bullets.', referenceTokens: 9 },
13
+ {
14
+ label: 'agent instruction',
15
+ text: 'You are an agent. Use the search tool, then cite the result in JSON.',
16
+ referenceTokens: 16,
17
+ },
18
+ {
19
+ label: 'retry guard',
20
+ text: 'The database migration failed with timeout 504. Retry only if the previous step is idempotent.',
21
+ referenceTokens: 19,
22
+ },
23
+ {
24
+ label: 'typescript code',
25
+ text: 'function normalizeUser(user) { return { id: user.id, email: user.email?.toLowerCase() }; }',
26
+ referenceTokens: 24,
27
+ },
28
+ {
29
+ label: 'python code',
30
+ text: 'def should_retry(error):\n return error.status in {429, 500, 503} and error.attempts < 3',
31
+ referenceTokens: 27,
32
+ },
33
+ {
34
+ label: 'json config',
35
+ text: '{"model":"gpt-4o-mini","max_tokens":400,"tools":[{"name":"search","strict":true}]}',
36
+ referenceTokens: 25,
37
+ },
38
+ {
39
+ label: 'markdown task',
40
+ text: '## Release checklist\n- Run tests\n- Verify npm pack\n- Publish patch\n- Tag the release',
41
+ referenceTokens: 22,
42
+ },
43
+ {
44
+ label: 'arabic prompt',
45
+ text: 'لخص محادثة الدعم هذه وحدد السبب الجذري والخطوة التالية.',
46
+ referenceTokens: 18,
47
+ },
48
+ {
49
+ label: 'french prompt',
50
+ text: 'Explique pourquoi le budget de l’agent a été dépassé et propose une limite plus sûre.',
51
+ referenceTokens: 20,
52
+ },
53
+ {
54
+ label: 'repeated loop',
55
+ text: 'retry retry retry retry retry retry retry retry retry retry',
56
+ referenceTokens: 10,
57
+ },
58
+ {
59
+ label: 'tool call like',
60
+ text:
61
+ 'tool_call: {"name":"fetch_invoice","arguments":{"customerId":"cus_123","month":"2026-06"}} ' +
62
+ 'result: {"status":"timeout"}',
63
+ referenceTokens: 34,
64
+ },
65
+ {
66
+ label: 'agent loop like',
67
+ text:
68
+ 'Step 12 failed. Try the same retrieval again with identical arguments, then repeat until the API returns data.',
69
+ referenceTokens: 22,
70
+ },
71
+ {
72
+ label: 'model comparison',
73
+ text: 'Compare Claude Haiku and GPT-4o mini for a budget-sensitive chatbot.',
74
+ referenceTokens: 17,
75
+ },
76
+ {
77
+ label: 'customer apology',
78
+ text: 'Write a concise customer apology for a delayed shipment and include one next step.',
79
+ referenceTokens: 16,
80
+ },
81
+ {
82
+ label: 'log line',
83
+ text: 'Analyze these logs: ERROR rate_limit_exceeded request_id=req_123 retry_after=2s',
84
+ referenceTokens: 24,
85
+ },
86
+ {
87
+ label: 'interface request',
88
+ text: 'Create a TypeScript interface for a webhook payload with cost, model, and reason fields.',
89
+ referenceTokens: 17,
90
+ },
91
+ {
92
+ label: 'sql query',
93
+ text: 'SELECT tenant_id, SUM(cost_usd) FROM ai_events WHERE blocked = false GROUP BY tenant_id;',
94
+ referenceTokens: 23,
95
+ },
96
+ {
97
+ label: 'error stack',
98
+ text: 'Error: Budget exceeded\n at GuardCore.check (src/core/GuardCore.ts:144:11)\n at agent.run (agent.ts:42:7)',
99
+ referenceTokens: 33,
100
+ },
101
+ {
102
+ label: 'anthropic workflow',
103
+ text:
104
+ 'Claude should inspect the document, call the classifier once, and stop if confidence is below 0.7.',
105
+ referenceTokens: 21,
106
+ },
107
+ {
108
+ label: 'vercel chatbot',
109
+ text:
110
+ 'streamText should answer the user only after the guard confirms the remaining session budget is sufficient.',
111
+ referenceTokens: 19,
112
+ },
113
+ {
114
+ label: 'ci budget check',
115
+ text: 'npx aifw check --budget 1 --model gpt-4o-mini --tokens 800 --max-steps 20',
116
+ referenceTokens: 29,
117
+ },
118
+ {
119
+ label: 'mixed punctuation',
120
+ text: 'Guard this: $$$, retries=3, model=gpt-4o-mini, scope=session:abc, max_tokens=250.',
121
+ referenceTokens: 28,
122
+ },
123
+ ];
124
+
125
+ const samples = corpus.map((sample) => {
126
+ const estimatedTokens = estimateTokensFromText(sample.text);
127
+ const absoluteError = Math.abs(estimatedTokens - sample.referenceTokens);
128
+ const percentError = (absoluteError / sample.referenceTokens) * 100;
129
+
130
+ return {
131
+ label: sample.label,
132
+ estimatedTokens,
133
+ referenceTokens: sample.referenceTokens,
134
+ absoluteError,
135
+ percentError: round(percentError, 2),
136
+ };
137
+ });
138
+
139
+ const percentErrors = samples.map((sample) => sample.percentError).sort((a, b) => a - b);
140
+ const report = {
141
+ generatedAt: new Date().toISOString(),
142
+ reference: {
143
+ tokenizer: 'fixed proxy fixture counts',
144
+ note:
145
+ 'Reference counts are dependency-free proxy fixtures, not live provider tokenizers. Use this to understand estimator bias and boundaries, not to claim exact provider parity.',
146
+ },
147
+ sampleCount: samples.length,
148
+ averageErrorPercent: round(average(percentErrors), 2),
149
+ medianErrorPercent: round(median(percentErrors), 2),
150
+ maxErrorPercent: Math.max(...percentErrors),
151
+ markdownTable: formatMarkdownTable(samples),
152
+ samples,
153
+ };
154
+
155
+ console.log(JSON.stringify(report, null, 2));
156
+
157
+ function average(values) {
158
+ return values.reduce((total, value) => total + value, 0) / values.length;
159
+ }
160
+
161
+ function median(values) {
162
+ const midpoint = Math.floor(values.length / 2);
163
+ if (values.length % 2 === 1) return values[midpoint];
164
+ return (values[midpoint - 1] + values[midpoint]) / 2;
165
+ }
166
+
167
+ function round(value, digits) {
168
+ const factor = 10 ** digits;
169
+ return Math.round(value * factor) / factor;
170
+ }
171
+
172
+ function formatMarkdownTable(rows) {
173
+ const lines = ['| sample | estimate | proxy | error | error % |', '| --- | ---: | ---: | ---: | ---: |'];
174
+
175
+ for (const row of rows) {
176
+ lines.push(
177
+ `| ${row.label} | ${row.estimatedTokens} | ${row.referenceTokens} | ${row.absoluteError} | ${row.percentError}% |`
178
+ );
179
+ }
180
+
181
+ return lines.join('\n');
182
+ }
package/dist/cli.d.ts CHANGED
@@ -35,6 +35,13 @@ export interface CliDashboardOptions extends DashboardOptions {
35
35
  once: boolean;
36
36
  json: boolean;
37
37
  }
38
+ /**
39
+ * Parsed options for the pricing freshness command.
40
+ */
41
+ export interface CliPricingOptions {
42
+ checkStale: boolean;
43
+ days: number;
44
+ }
38
45
  /**
39
46
  * Parses arguments for `aifw check`.
40
47
  */
@@ -43,6 +50,10 @@ export declare function parseCheckArgs(args: readonly string[]): CliCheckOptions
43
50
  * Parses arguments for `aifw dashboard`.
44
51
  */
45
52
  export declare function parseDashboardArgs(args: readonly string[]): CliDashboardOptions;
53
+ /**
54
+ * Parses arguments for `aifw pricing`.
55
+ */
56
+ export declare function parsePricingArgs(args: readonly string[]): CliPricingOptions;
46
57
  /**
47
58
  * Runs the aifw CLI and returns a process exit code.
48
59
  */
package/dist/cli.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAGA,OAAO,EAKL,KAAK,gBAAgB,EACtB,MAAM,gBAAgB,CAAC;AAExB;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,8BAA8B;IAC9B,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,6BAA6B;IAC7B,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,qBAAqB;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,mCAAmC;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,MAAM,EAAE,MAAM,CAAC;IACf,sDAAsD;IACtD,WAAW,EAAE,MAAM,CAAC;IACpB,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,CAAC;IACjB,kDAAkD;IAClD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,mDAAmD;IACnD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAoB,SAAQ,gBAAgB;IAC3D,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;CACf;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,GAAG,eAAe,CA0BvE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,GAAG,mBAAmB,CAgC/E;AAED;;GAEG;AACH,wBAAgB,MAAM,CAAC,IAAI,GAAE,SAAS,MAAM,EAA0B,EAAE,EAAE,GAAE,KAAiB,GAAG,MAAM,CA4ErG"}
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAGA,OAAO,EAKL,KAAK,gBAAgB,EACtB,MAAM,gBAAgB,CAAC;AAExB;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,8BAA8B;IAC9B,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,6BAA6B;IAC7B,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,qBAAqB;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,mCAAmC;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,wCAAwC;IACxC,MAAM,EAAE,MAAM,CAAC;IACf,sDAAsD;IACtD,WAAW,EAAE,MAAM,CAAC;IACpB,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,CAAC;IACjB,kDAAkD;IAClD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,mDAAmD;IACnD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAoB,SAAQ,gBAAgB;IAC3D,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,OAAO,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,GAAG,eAAe,CA0BvE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,GAAG,mBAAmB,CAgC/E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,GAAG,iBAAiB,CA+B3E;AAED;;GAEG;AACH,wBAAgB,MAAM,CAAC,IAAI,GAAE,SAAS,MAAM,EAA0B,EAAE,EAAE,GAAE,KAAiB,GAAG,MAAM,CA6GrG"}