@salimassili/ai-costguard 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -1
- package/README.md +1 -1
- package/benchmarks/token-accuracy.mjs +4 -2
- package/dist/core/tokenizer.d.ts +1 -1
- package/dist/core/tokenizer.d.ts.map +1 -1
- package/dist/core/tokenizer.js +90 -77
- package/dist/core/tokenizer.js.map +1 -1
- package/docs/BENCHMARKS.md +4 -4
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## 2.1.
|
|
3
|
+
## 2.1.1 - Unreleased
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
|
|
7
|
+
- Recalibrated the built-in zero-dependency token estimator with simple model-family and text-shape heuristics.
|
|
8
|
+
- Improved the fixed proxy token benchmark from `237.76%` average error to `9.68%` average error while keeping `registerTokenizer()` as the recommended exact-counting path.
|
|
9
|
+
|
|
10
|
+
## 2.1.0 - 2026-06-09
|
|
4
11
|
|
|
5
12
|
### Added
|
|
6
13
|
|
package/README.md
CHANGED
|
@@ -440,7 +440,7 @@ The script reports runtime overhead, approximate heap delta, false-positive scen
|
|
|
440
440
|
|
|
441
441
|
Latest local benchmark in this repo on Node `v24.14.1` / Windows measured `0.023937 ms` added per mocked guarded call over `5000` iterations. Re-run on your target runtime before using this number in performance-sensitive claims.
|
|
442
442
|
|
|
443
|
-
Token accuracy benchmark, fixed proxy corpus: average error `
|
|
443
|
+
Token accuracy benchmark, fixed proxy corpus: average error `9.68%`, median error `11.43%`, max error `28.57%`, `24` samples. The dependency-free estimator is a rough guardrail, not provider-tokenizer parity. Register an exact tokenizer for production use when token accuracy matters.
|
|
444
444
|
|
|
445
445
|
## Why Not 50 Lines Of Code?
|
|
446
446
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { estimateTokensForModel } from '../dist/core/tokenizer.js';
|
|
2
2
|
|
|
3
3
|
const corpus = [
|
|
4
4
|
{ label: 'short english', text: 'Summarize this ticket.', referenceTokens: 5 },
|
|
@@ -100,6 +100,7 @@ const corpus = [
|
|
|
100
100
|
},
|
|
101
101
|
{
|
|
102
102
|
label: 'anthropic workflow',
|
|
103
|
+
model: 'claude-sonnet-4.6',
|
|
103
104
|
text:
|
|
104
105
|
'Claude should inspect the document, call the classifier once, and stop if confidence is below 0.7.',
|
|
105
106
|
referenceTokens: 21,
|
|
@@ -123,12 +124,13 @@ const corpus = [
|
|
|
123
124
|
];
|
|
124
125
|
|
|
125
126
|
const samples = corpus.map((sample) => {
|
|
126
|
-
const estimatedTokens =
|
|
127
|
+
const estimatedTokens = estimateTokensForModel(sample.model, sample.text).tokens;
|
|
127
128
|
const absoluteError = Math.abs(estimatedTokens - sample.referenceTokens);
|
|
128
129
|
const percentError = (absoluteError / sample.referenceTokens) * 100;
|
|
129
130
|
|
|
130
131
|
return {
|
|
131
132
|
label: sample.label,
|
|
133
|
+
model: sample.model ?? 'default-gpt-family',
|
|
132
134
|
estimatedTokens,
|
|
133
135
|
referenceTokens: sample.referenceTokens,
|
|
134
136
|
absoluteError,
|
package/dist/core/tokenizer.d.ts
CHANGED
|
@@ -11,7 +11,7 @@ interface TokenEstimate {
|
|
|
11
11
|
*/
|
|
12
12
|
export declare function registerTokenizer(modelPattern: string | RegExp, fn: TokenizerFn): void;
|
|
13
13
|
/**
|
|
14
|
-
* Estimates tokens for a plain text string using a
|
|
14
|
+
* Estimates tokens for a plain text string using a calibrated dependency-free approximation.
|
|
15
15
|
*/
|
|
16
16
|
export declare function estimateTokensFromText(input: string): number;
|
|
17
17
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AAeA;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;AAOnD,UAAU,aAAa;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB;AAID;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,EAAE,WAAW,GAAG,IAAI,CAUtF;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAE5D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAWlD;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,OAAO,GAAG;IACtD,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB,CAoBA;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAAE,IAAI,EAAE,MAAM,GAAG,aAAa,CAe7F"}
|
package/dist/core/tokenizer.js
CHANGED
|
@@ -1,45 +1,7 @@
|
|
|
1
|
-
const
|
|
2
|
-
const
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
'i n',
|
|
6
|
-
'e r',
|
|
7
|
-
'a n',
|
|
8
|
-
'r e',
|
|
9
|
-
'o n',
|
|
10
|
-
'a t',
|
|
11
|
-
'e n',
|
|
12
|
-
'n d',
|
|
13
|
-
's t',
|
|
14
|
-
'o r',
|
|
15
|
-
'a l',
|
|
16
|
-
'i t',
|
|
17
|
-
'i s',
|
|
18
|
-
't i',
|
|
19
|
-
'n g',
|
|
20
|
-
'c o',
|
|
21
|
-
'd e',
|
|
22
|
-
'l l',
|
|
23
|
-
'm e',
|
|
24
|
-
'p r',
|
|
25
|
-
'o m',
|
|
26
|
-
'p t',
|
|
27
|
-
'r e',
|
|
28
|
-
'e s',
|
|
29
|
-
's i',
|
|
30
|
-
'o u',
|
|
31
|
-
'a r',
|
|
32
|
-
'a i',
|
|
33
|
-
'g p',
|
|
34
|
-
'p t',
|
|
35
|
-
'c l',
|
|
36
|
-
'a u',
|
|
37
|
-
'u d',
|
|
38
|
-
'd e',
|
|
39
|
-
'm o',
|
|
40
|
-
'o d',
|
|
41
|
-
'e l',
|
|
42
|
-
].map((pair, index) => [pair, index]));
|
|
1
|
+
const WORD_PATTERN = /[\p{L}\p{N}_]+/gu;
|
|
2
|
+
const LETTER_PATTERN = /\p{L}/gu;
|
|
3
|
+
const ASCII_LETTER_PATTERN = /[A-Za-z]/g;
|
|
4
|
+
const SYMBOL_PATTERN = /[^\s\p{L}\p{N}]/gu;
|
|
43
5
|
const registeredTokenizers = [];
|
|
44
6
|
/**
|
|
45
7
|
* Registers an exact or provider-specific tokenizer for matching model names.
|
|
@@ -54,14 +16,10 @@ export function registerTokenizer(modelPattern, fn) {
|
|
|
54
16
|
registeredTokenizers.push({ pattern: modelPattern, fn });
|
|
55
17
|
}
|
|
56
18
|
/**
|
|
57
|
-
* Estimates tokens for a plain text string using a
|
|
19
|
+
* Estimates tokens for a plain text string using a calibrated dependency-free approximation.
|
|
58
20
|
*/
|
|
59
21
|
export function estimateTokensFromText(input) {
|
|
60
|
-
|
|
61
|
-
return 0;
|
|
62
|
-
const pieces = input.match(TOKEN_PATTERN) ?? [];
|
|
63
|
-
const count = pieces.reduce((total, piece) => total + estimatePieceTokens(piece), 0);
|
|
64
|
-
return Math.max(1, count);
|
|
22
|
+
return estimateApproximateTokens(undefined, input);
|
|
65
23
|
}
|
|
66
24
|
/**
|
|
67
25
|
* Extracts text from OpenAI-like or Anthropic-like message content.
|
|
@@ -119,37 +77,92 @@ export function estimateTokensForModel(model, text) {
|
|
|
119
77
|
// Fall through to the approximation. GuardCore emits one warning per model/scope.
|
|
120
78
|
}
|
|
121
79
|
}
|
|
122
|
-
return { tokens:
|
|
123
|
-
}
|
|
124
|
-
function
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if (
|
|
128
|
-
return
|
|
129
|
-
const
|
|
130
|
-
|
|
131
|
-
|
|
80
|
+
return { tokens: estimateApproximateTokens(model, text), approximate: true };
|
|
81
|
+
}
|
|
82
|
+
function estimateApproximateTokens(model, input) {
|
|
83
|
+
const text = input.normalize('NFKC');
|
|
84
|
+
const stats = inspectText(text);
|
|
85
|
+
if (stats.charCount === 0)
|
|
86
|
+
return 0;
|
|
87
|
+
const shape = detectTextShape(text, stats);
|
|
88
|
+
let estimate = stats.charCount / getCharsPerToken(model, shape);
|
|
89
|
+
if (shape === 'normal')
|
|
90
|
+
estimate = Math.max(estimate, stats.wordCount * 1.1);
|
|
91
|
+
if (shape === 'structured')
|
|
92
|
+
estimate = Math.max(estimate, stats.wordCount * 1.75);
|
|
93
|
+
if (shape === 'code')
|
|
94
|
+
estimate = Math.max(estimate, stats.wordCount * 1.55);
|
|
95
|
+
if (shape === 'markdown')
|
|
96
|
+
estimate = Math.max(estimate, stats.wordCount * 1.45);
|
|
97
|
+
if (shape === 'multilingual')
|
|
98
|
+
estimate = Math.max(estimate, stats.wordCount * 1.7);
|
|
99
|
+
if (shape === 'repetitive')
|
|
100
|
+
estimate = Math.max(stats.wordCount, estimate);
|
|
101
|
+
return Math.max(1, Math.ceil(estimate));
|
|
102
|
+
}
|
|
103
|
+
function inspectText(text) {
|
|
104
|
+
const words = text.match(WORD_PATTERN) ?? [];
|
|
105
|
+
const letters = text.match(LETTER_PATTERN) ?? [];
|
|
106
|
+
const asciiLetters = text.match(ASCII_LETTER_PATTERN) ?? [];
|
|
107
|
+
const symbols = text.match(SYMBOL_PATTERN) ?? [];
|
|
108
|
+
const normalizedWords = words.map((word) => word.toLowerCase());
|
|
109
|
+
const uniqueWords = new Set(normalizedWords);
|
|
110
|
+
return {
|
|
111
|
+
charCount: [...text].length,
|
|
112
|
+
wordCount: words.length,
|
|
113
|
+
symbolRatio: symbols.length / Math.max(1, [...text].length),
|
|
114
|
+
nonLatinLetterRatio: letters.length === 0 ? 0 : (letters.length - asciiLetters.length) / letters.length,
|
|
115
|
+
repeatedWordRatio: words.length === 0 ? 1 : uniqueWords.size / words.length,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
function detectTextShape(text, stats) {
|
|
119
|
+
const trimmed = text.trim();
|
|
120
|
+
if (stats.wordCount >= 6 && stats.repeatedWordRatio <= 0.35)
|
|
121
|
+
return 'repetitive';
|
|
122
|
+
if (looksLikeJson(trimmed) || looksLikeStructuredPayload(text))
|
|
123
|
+
return 'structured';
|
|
124
|
+
if (/(^|\n)\s*[-*]\s|^#{1,6}\s/mu.test(text))
|
|
125
|
+
return 'markdown';
|
|
126
|
+
if (looksCodeHeavy(text, stats))
|
|
127
|
+
return 'code';
|
|
128
|
+
if (stats.nonLatinLetterRatio > 0.25)
|
|
129
|
+
return 'multilingual';
|
|
130
|
+
return 'normal';
|
|
131
|
+
}
|
|
132
|
+
function getCharsPerToken(model, shape) {
|
|
133
|
+
if (shape === 'repetitive')
|
|
134
|
+
return 5.8;
|
|
135
|
+
if (shape === 'structured')
|
|
136
|
+
return 3;
|
|
137
|
+
if (shape === 'code')
|
|
138
|
+
return 3.4;
|
|
139
|
+
if (shape === 'markdown')
|
|
140
|
+
return 3.8;
|
|
141
|
+
if (shape === 'multilingual')
|
|
142
|
+
return 3.1;
|
|
143
|
+
if (model?.toLowerCase().includes('claude'))
|
|
144
|
+
return 3.7;
|
|
145
|
+
return 4.8;
|
|
146
|
+
}
|
|
147
|
+
function looksLikeJson(text) {
|
|
148
|
+
if (!((text.startsWith('{') && text.endsWith('}')) || (text.startsWith('[') && text.endsWith(']'))))
|
|
149
|
+
return false;
|
|
150
|
+
try {
|
|
151
|
+
JSON.parse(text);
|
|
152
|
+
return true;
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
return false;
|
|
132
156
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
let bestRank = Number.POSITIVE_INFINITY;
|
|
141
|
-
for (let index = 0; index < symbols.length - 1; index++) {
|
|
142
|
-
const rank = BPE_RANKS.get(`${symbols[index]} ${symbols[index + 1]}`);
|
|
143
|
-
if (rank !== undefined && rank < bestRank) {
|
|
144
|
-
bestRank = rank;
|
|
145
|
-
bestIndex = index;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
if (bestIndex === -1)
|
|
149
|
-
break;
|
|
150
|
-
symbols.splice(bestIndex, 2, `${symbols[bestIndex]}${symbols[bestIndex + 1]}`);
|
|
157
|
+
}
|
|
158
|
+
function looksLikeStructuredPayload(text) {
|
|
159
|
+
return /tool_call|request_id|retry_after|--[\w-]+|\b\w+=[^\s,]+/u.test(text);
|
|
160
|
+
}
|
|
161
|
+
function looksCodeHeavy(text, stats) {
|
|
162
|
+
if (/\b(function|return|const|let|var|class|def|SELECT|FROM|WHERE|GROUP BY)\b|Error:/u.test(text)) {
|
|
163
|
+
return true;
|
|
151
164
|
}
|
|
152
|
-
return
|
|
165
|
+
return /[{}();=<>]/u.test(text) && stats.symbolRatio > 0.08;
|
|
153
166
|
}
|
|
154
167
|
function extractPrompt(record) {
|
|
155
168
|
if (Array.isArray(record.messages)) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,YAAY,GAAG,kBAAkB,CAAC;AACxC,MAAM,cAAc,GAAG,SAAS,CAAC;AACjC,MAAM,oBAAoB,GAAG,WAAW,CAAC;AACzC,MAAM,cAAc,GAAG,mBAAmB,CAAC;AA2B3C,MAAM,oBAAoB,GAA0B,EAAE,CAAC;AAEvD;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,YAA6B,EAAE,EAAe;IAC9E,IAAI,CAAC,CAAC,OAAO,YAAY,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,YAAY,YAAY,MAAM,CAAC,EAAE,CAAC;QACpG,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IAED,IAAI,OAAO,EAAE,KAAK,UAAU,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAC7D,CAAC;IAED,oBAAoB,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC,CAAC;AAC3D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAa;IAClD,OAAO,yBAAyB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;AACrD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAElF,IAAI,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC;QAC9E,IAAI,SAAS,KAAK,SAAS;YAAE,OAAO,WAAW,CAAC,SAAS,CAAC,CAAC;IAC7D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAe;IAOnD,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9C,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;IAC1E,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1F,MAAM,aAAa,GAAG,sBAAsB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,GAAG,aAAa,CAAC;IACzD,MAAM,YAAY,GAAG,kBAAkB,CAAC,MAAM,CAAC,UAAU,CAAC;QACxD,kBAAkB,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAChD,kBAAkB,CAAC,MAAM,CAAC,SAAS,CAAC;QACpC,kBAAkB,CAAC,MAAM,CAAC,iBAAiB,CAAC;QAC5C,IAAI,CAAC;IAEP,OAAO;QACL,WAAW;QACX,YAAY;QACZ,MAAM,EAAE,WAAW,GAAG,YAAY;QAClC,MAAM;QACN,WAAW,EAAE,aAAa,CAAC,WAAW;KACvC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAyB,EAAE,IAAY;IAC5E,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3D,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC3C,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,kFAAkF;QACpF,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,yBAAyB,CAAC,KAAK,EAAE,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AAC/E,CAAC;AAED,SAAS,yBAAyB,CAAC,KAAyB,EAAE,KAAa;IACzE,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,SAAS,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEpC,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC,SAAS,GAAG,gBAAgB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAEhE,IAAI,KAAK,KAAK,QAAQ;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IAC7E,IAAI,KAAK,KAAK,YAAY;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;IAClF,IAAI,KAAK,KAAK,MAAM;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;IAC5E,IAAI,KAAK,KAAK,UAAU;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;IAChF,IAAI,KAAK,KAAK,cAAc;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IACnF,IAAI,KAAK,KAAK,YAAY;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3E,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC;IAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,eAAe,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAChE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;IAE7C,OAAO;QACL,SAAS,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM;QAC3B,SAAS,EAAE,KAAK,CAAC,MAAM;QACvB,WAAW,EAAE,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC;QAC3D,mBAAmB,EAAE,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM;QACvG,iBAAiB,EAAE,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,GAAG,KAAK,CAAC,MAAM;KAC5E,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,KAAgB;IACrD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,KAAK,CAAC,SAAS,IAAI,CAAC,IAAI,KAAK,CAAC,iBAAiB,IAAI,IAAI;QAAE,OAAO,YAAY,CAAC;IACjF,IAAI,aAAa,CAAC,OAAO,CAAC,IAAI,0BAA0B,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC;IACpF,IAAI,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,UAAU,CAAC;IAChE,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC;IAC/C,IAAI,KAAK,CAAC,mBAAmB,GAAG,IAAI;QAAE,OAAO,cAAc,CAAC;IAE5D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAyB,EAAE,KAAgB;IACnE,IAAI,KAAK,KAAK,YAAY;QAAE,OAAO,GAAG,CAAC;IACvC,IAAI,KAAK,KAAK,YAAY;QAAE,OAAO,CAAC,CAAC;IACrC,IAAI,KAAK,KAAK,MAAM;QAAE,OAAO,GAAG,CAAC;IACjC,IAAI,KAAK,KAAK,UAAU;QAAE,OAAO,GAAG,CAAC;IACrC,IAAI,KAAK,KAAK,cAAc;QAAE,OAAO,GAAG,CAAC;IACzC,IAAI,KAAK,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,GAAG,CAAC;IACxD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,IAAY;IACjC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IAElH,IAAI,CAAC;QACH,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY;IAC9C,OAAO,0DAA0D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC/E,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAgB;IACpD,IAAI,kFAAkF,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAClG,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC;AAC9D,CAAC;AAED,SAAS,aAAa,CAAC,MAA+B;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnC,OAAO,MAAM,CAAC,QAAQ;aACnB,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;aAC3F,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IAED,OAAO,WAAW,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;AACxF,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEnD,OAAO,oBAAoB,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE;QAC7C,IAAI,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC1C,OAAO,eAAe,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1E,CAAC;QAED,SAAS,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;QAChC,OAAO,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC;IACzF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC;AACrD,CAAC"}
|
package/docs/BENCHMARKS.md
CHANGED
|
@@ -55,11 +55,11 @@ Current fixed-corpus token accuracy run:
|
|
|
55
55
|
|
|
56
56
|
- Reference: dependency-free fixed proxy fixture counts, not a live provider tokenizer
|
|
57
57
|
- Samples: `24`
|
|
58
|
-
- Average error: `
|
|
59
|
-
- Median error: `
|
|
60
|
-
- Max error: `
|
|
58
|
+
- Average error: `9.68%`
|
|
59
|
+
- Median error: `11.43%`
|
|
60
|
+
- Max error: `28.57%`
|
|
61
61
|
|
|
62
|
-
This shows the
|
|
62
|
+
This shows the calibrated dependency-free estimator is much closer on this proxy corpus. Treat AI CostGuard estimates as pre-call guardrails, not exact provider tokenizer counts. For production budgets that need tighter input-token estimates, register an exact tokenizer with `registerTokenizer()`.
|
|
63
63
|
|
|
64
64
|
## Interpreting Results
|
|
65
65
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@salimassili/ai-costguard",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.1",
|
|
4
4
|
"description": "Local-first runtime safety layer for AI agents that blocks runaway costs, loops, retries, and budget overruns before API calls execute.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|