@salimassili/ai-costguard 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,13 @@
1
1
  # Changelog
2
2
 
3
- ## 2.1.0 - Unreleased
3
+ ## 2.1.1 - Unreleased
4
+
5
+ ### Changed
6
+
7
+ - Recalibrated the built-in zero-dependency token estimator with simple model-family and text-shape heuristics.
8
+ - Improved the fixed proxy token benchmark from `237.76%` average error to `9.68%` average error while keeping `registerTokenizer()` as the recommended exact-counting path.
9
+
10
+ ## 2.1.0 - 2026-06-09
4
11
 
5
12
  ### Added
6
13
 
package/README.md CHANGED
@@ -440,7 +440,7 @@ The script reports runtime overhead, approximate heap delta, false-positive scen
440
440
 
441
441
  Latest local benchmark in this repo on Node `v24.14.1` / Windows measured `0.023937 ms` added per mocked guarded call over `5000` iterations. Re-run on your target runtime before using this number in performance-sensitive claims.
442
442
 
443
- Token accuracy benchmark, fixed proxy corpus: average error `237.76%`, median error `240.06%`, max error `390%`, `24` samples. The current dependency-free estimator is conservative and can substantially overestimate short prompts. Register an exact tokenizer for production use when token accuracy matters.
443
+ Token accuracy benchmark, fixed proxy corpus: average error `9.68%`, median error `11.43%`, max error `28.57%`, `24` samples. The dependency-free estimator is a rough guardrail, not provider-tokenizer parity. Register an exact tokenizer for production use when token accuracy matters.
444
444
 
445
445
  ## Why Not 50 Lines Of Code?
446
446
 
@@ -1,4 +1,4 @@
1
- import { estimateTokensFromText } from '../dist/core/tokenizer.js';
1
+ import { estimateTokensForModel } from '../dist/core/tokenizer.js';
2
2
 
3
3
  const corpus = [
4
4
  { label: 'short english', text: 'Summarize this ticket.', referenceTokens: 5 },
@@ -100,6 +100,7 @@ const corpus = [
100
100
  },
101
101
  {
102
102
  label: 'anthropic workflow',
103
+ model: 'claude-sonnet-4.6',
103
104
  text:
104
105
  'Claude should inspect the document, call the classifier once, and stop if confidence is below 0.7.',
105
106
  referenceTokens: 21,
@@ -123,12 +124,13 @@ const corpus = [
123
124
  ];
124
125
 
125
126
  const samples = corpus.map((sample) => {
126
- const estimatedTokens = estimateTokensFromText(sample.text);
127
+ const estimatedTokens = estimateTokensForModel(sample.model, sample.text).tokens;
127
128
  const absoluteError = Math.abs(estimatedTokens - sample.referenceTokens);
128
129
  const percentError = (absoluteError / sample.referenceTokens) * 100;
129
130
 
130
131
  return {
131
132
  label: sample.label,
133
+ model: sample.model ?? 'default-gpt-family',
132
134
  estimatedTokens,
133
135
  referenceTokens: sample.referenceTokens,
134
136
  absoluteError,
@@ -11,7 +11,7 @@ interface TokenEstimate {
11
11
  */
12
12
  export declare function registerTokenizer(modelPattern: string | RegExp, fn: TokenizerFn): void;
13
13
  /**
14
- * Estimates tokens for a plain text string using a small inline BPE approximation.
14
+ * Estimates tokens for a plain text string using a calibrated dependency-free approximation.
15
15
  */
16
16
  export declare function estimateTokensFromText(input: string): number;
17
17
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AA+CA;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;AAOnD,UAAU,aAAa;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB;AAID;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,EAAE,WAAW,GAAG,IAAI,CAUtF;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAO5D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAWlD;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,OAAO,GAAG;IACtD,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB,CAoBA;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAAE,IAAI,EAAE,MAAM,GAAG,aAAa,CAe7F"}
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AAeA;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;AAOnD,UAAU,aAAa;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB;AAID;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,EAAE,WAAW,GAAG,IAAI,CAUtF;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAE5D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAWlD;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,OAAO,GAAG;IACtD,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB,CAoBA;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAAE,IAAI,EAAE,MAAM,GAAG,aAAa,CAe7F"}
@@ -1,45 +1,7 @@
1
- const TOKEN_PATTERN = /'s|'t|'re|'ve|'m|'ll|'d| ?[\p{L}]+| ?\p{N}{1,3}| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
2
- const BPE_RANKS = new Map([
3
- 't h',
4
- 'h e',
5
- 'i n',
6
- 'e r',
7
- 'a n',
8
- 'r e',
9
- 'o n',
10
- 'a t',
11
- 'e n',
12
- 'n d',
13
- 's t',
14
- 'o r',
15
- 'a l',
16
- 'i t',
17
- 'i s',
18
- 't i',
19
- 'n g',
20
- 'c o',
21
- 'd e',
22
- 'l l',
23
- 'm e',
24
- 'p r',
25
- 'o m',
26
- 'p t',
27
- 'r e',
28
- 'e s',
29
- 's i',
30
- 'o u',
31
- 'a r',
32
- 'a i',
33
- 'g p',
34
- 'p t',
35
- 'c l',
36
- 'a u',
37
- 'u d',
38
- 'd e',
39
- 'm o',
40
- 'o d',
41
- 'e l',
42
- ].map((pair, index) => [pair, index]));
1
+ const WORD_PATTERN = /[\p{L}\p{N}_]+/gu;
2
+ const LETTER_PATTERN = /\p{L}/gu;
3
+ const ASCII_LETTER_PATTERN = /[A-Za-z]/g;
4
+ const SYMBOL_PATTERN = /[^\s\p{L}\p{N}]/gu;
43
5
  const registeredTokenizers = [];
44
6
  /**
45
7
  * Registers an exact or provider-specific tokenizer for matching model names.
@@ -54,14 +16,10 @@ export function registerTokenizer(modelPattern, fn) {
54
16
  registeredTokenizers.push({ pattern: modelPattern, fn });
55
17
  }
56
18
  /**
57
- * Estimates tokens for a plain text string using a small inline BPE approximation.
19
+ * Estimates tokens for a plain text string using a calibrated dependency-free approximation.
58
20
  */
59
21
  export function estimateTokensFromText(input) {
60
- if (input.length === 0)
61
- return 0;
62
- const pieces = input.match(TOKEN_PATTERN) ?? [];
63
- const count = pieces.reduce((total, piece) => total + estimatePieceTokens(piece), 0);
64
- return Math.max(1, count);
22
+ return estimateApproximateTokens(undefined, input);
65
23
  }
66
24
  /**
67
25
  * Extracts text from OpenAI-like or Anthropic-like message content.
@@ -119,37 +77,92 @@ export function estimateTokensForModel(model, text) {
119
77
  // Fall through to the approximation. GuardCore emits one warning per model/scope.
120
78
  }
121
79
  }
122
- return { tokens: estimateTokensFromText(text), approximate: true };
123
- }
124
- function estimatePieceTokens(piece) {
125
- if (/^\s+$/u.test(piece))
126
- return 1;
127
- if (/^\p{N}{1,3}$/u.test(piece.trim()))
128
- return 1;
129
- const normalized = piece.normalize('NFKC');
130
- if (/^[\p{P}\p{S}\s]+$/u.test(normalized)) {
131
- return Math.max(1, Math.ceil([...normalized].length / 2));
80
+ return { tokens: estimateApproximateTokens(model, text), approximate: true };
81
+ }
82
+ function estimateApproximateTokens(model, input) {
83
+ const text = input.normalize('NFKC');
84
+ const stats = inspectText(text);
85
+ if (stats.charCount === 0)
86
+ return 0;
87
+ const shape = detectTextShape(text, stats);
88
+ let estimate = stats.charCount / getCharsPerToken(model, shape);
89
+ if (shape === 'normal')
90
+ estimate = Math.max(estimate, stats.wordCount * 1.1);
91
+ if (shape === 'structured')
92
+ estimate = Math.max(estimate, stats.wordCount * 1.75);
93
+ if (shape === 'code')
94
+ estimate = Math.max(estimate, stats.wordCount * 1.55);
95
+ if (shape === 'markdown')
96
+ estimate = Math.max(estimate, stats.wordCount * 1.45);
97
+ if (shape === 'multilingual')
98
+ estimate = Math.max(estimate, stats.wordCount * 1.7);
99
+ if (shape === 'repetitive')
100
+ estimate = Math.max(stats.wordCount, estimate);
101
+ return Math.max(1, Math.ceil(estimate));
102
+ }
103
+ function inspectText(text) {
104
+ const words = text.match(WORD_PATTERN) ?? [];
105
+ const letters = text.match(LETTER_PATTERN) ?? [];
106
+ const asciiLetters = text.match(ASCII_LETTER_PATTERN) ?? [];
107
+ const symbols = text.match(SYMBOL_PATTERN) ?? [];
108
+ const normalizedWords = words.map((word) => word.toLowerCase());
109
+ const uniqueWords = new Set(normalizedWords);
110
+ return {
111
+ charCount: [...text].length,
112
+ wordCount: words.length,
113
+ symbolRatio: symbols.length / Math.max(1, [...text].length),
114
+ nonLatinLetterRatio: letters.length === 0 ? 0 : (letters.length - asciiLetters.length) / letters.length,
115
+ repeatedWordRatio: words.length === 0 ? 1 : uniqueWords.size / words.length,
116
+ };
117
+ }
118
+ function detectTextShape(text, stats) {
119
+ const trimmed = text.trim();
120
+ if (stats.wordCount >= 6 && stats.repeatedWordRatio <= 0.35)
121
+ return 'repetitive';
122
+ if (looksLikeJson(trimmed) || looksLikeStructuredPayload(text))
123
+ return 'structured';
124
+ if (/(^|\n)\s*[-*]\s|^#{1,6}\s/mu.test(text))
125
+ return 'markdown';
126
+ if (looksCodeHeavy(text, stats))
127
+ return 'code';
128
+ if (stats.nonLatinLetterRatio > 0.25)
129
+ return 'multilingual';
130
+ return 'normal';
131
+ }
132
+ function getCharsPerToken(model, shape) {
133
+ if (shape === 'repetitive')
134
+ return 5.8;
135
+ if (shape === 'structured')
136
+ return 3;
137
+ if (shape === 'code')
138
+ return 3.4;
139
+ if (shape === 'markdown')
140
+ return 3.8;
141
+ if (shape === 'multilingual')
142
+ return 3.1;
143
+ if (model?.toLowerCase().includes('claude'))
144
+ return 3.7;
145
+ return 4.8;
146
+ }
147
+ function looksLikeJson(text) {
148
+ if (!((text.startsWith('{') && text.endsWith('}')) || (text.startsWith('[') && text.endsWith(']'))))
149
+ return false;
150
+ try {
151
+ JSON.parse(text);
152
+ return true;
153
+ }
154
+ catch {
155
+ return false;
132
156
  }
133
- const symbols = applyApproximateBpe([...normalized.toLowerCase()]);
134
- return Math.max(1, symbols.length);
135
- }
136
- function applyApproximateBpe(initialSymbols) {
137
- const symbols = [...initialSymbols];
138
- while (symbols.length > 1) {
139
- let bestIndex = -1;
140
- let bestRank = Number.POSITIVE_INFINITY;
141
- for (let index = 0; index < symbols.length - 1; index++) {
142
- const rank = BPE_RANKS.get(`${symbols[index]} ${symbols[index + 1]}`);
143
- if (rank !== undefined && rank < bestRank) {
144
- bestRank = rank;
145
- bestIndex = index;
146
- }
147
- }
148
- if (bestIndex === -1)
149
- break;
150
- symbols.splice(bestIndex, 2, `${symbols[bestIndex]}${symbols[bestIndex + 1]}`);
157
+ }
158
+ function looksLikeStructuredPayload(text) {
159
+ return /tool_call|request_id|retry_after|--[\w-]+|\b\w+=[^\s,]+/u.test(text);
160
+ }
161
+ function looksCodeHeavy(text, stats) {
162
+ if (/\b(function|return|const|let|var|class|def|SELECT|FROM|WHERE|GROUP BY)\b|Error:/u.test(text)) {
163
+ return true;
151
164
  }
152
- return symbols;
165
+ return /[{}();=<>]/u.test(text) && stats.symbolRatio > 0.08;
153
166
  }
154
167
  function extractPrompt(record) {
155
168
  if (Array.isArray(record.messages)) {
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,aAAa,GACjB,oFAAoF,CAAC;AAEvF,MAAM,SAAS,GAAG,IAAI,GAAG,CACvB;IACE,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;CACN,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CACtC,CAAC;AAiBF,MAAM,oBAAoB,GAA0B,EAAE,CAAC;AAEvD;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,YAA6B,EAAE,EAAe;IAC9E,IAAI,CAAC,CAAC,OAAO,YAAY,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,YAAY,YAAY,MAAM,CAAC,EAAE,CAAC;QACpG,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IAED,IAAI,OAAO,EAAE,KAAK,UAAU,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAC7D,CAAC;IAED,oBAAoB,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC,CAAC;AAC3D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAa;IAClD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjC,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;IAChD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;IAErF,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;AAC5B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAElF,IAAI,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC;QAC9E,IAAI,SAAS,KAAK,SAAS;YAAE,OAAO,WAAW,CAAC,SAAS,CAAC,CAAC;IAC7D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAe;IAOnD,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9C,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;IAC1E,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1F,MAAM,aAAa,GAAG,sBAAsB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,GAAG,aAAa,CAAC;IACzD,MAAM,YAAY,GAAG,kBAAkB,CAAC,MAAM,CAAC,UAAU,CAAC;QACxD,kBAAkB,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAChD,kBAAkB,CAAC,MAAM,CAAC,SAAS,CAAC;QACpC,kBAAkB,CAAC,MAAM,CAAC,iBAAiB,CAAC;QAC5C,IAAI,CAAC;IAEP,OAAO;QACL,WAAW;QACX,YAAY;QACZ,MAAM,EAAE,WAAW,GAAG,YAAY;QAClC,MAAM;QACN,WAAW,EAAE,aAAa,CAAC,WAAW;KACvC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAyB,EAAE,IAAY;IAC5E,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3D,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC3C,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,kFAAkF;QACpF,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,sBAAsB,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AACrE,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAa;IACxC,IAAI,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,IAAI,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjD,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAC3C,IAAI,oBAAoB,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,MAAM,OAAO,GAAG,mBAAmB,CAAC,CAAC,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IACnE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC;AAED,SAAS,mBAAmB,CAAC,cAAwB;IACnD,MAAM,OAAO,GAAG,CAAC,GAAG,cAAc,CAAC,CAAC;IAEpC,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;QACnB,IAAI,QAAQ,GAAG,MAAM,CAAC,iBAAiB,CAAC;QAExC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;YACtE,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,GAAG,QAAQ,EAAE,CAAC;gBAC1C,QAAQ,GAAG,IAAI,CAAC;gBAChB,SAAS,GAAG,KAAK,CAAC;YACpB,CAAC;QACH,CAAC;QAED,IAAI,SAAS,KAAK,CAAC,CAAC;YAAE,MAAM;QAE5B,OAAO,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;IACjF,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,MAA+B;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnC,OAAO,MAAM,CAAC,QAAQ;aACnB,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;aAC3F,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IAED,OAAO,WAAW,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;AACxF,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEnD,OAAO,oBAAoB,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE;QAC7C,IAAI,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC1C,OAAO,eAAe,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1E,CAAC;QAED,SAAS,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;QAChC,OAAO,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC;IACzF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC;AACrD,CAAC"}
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/core/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,YAAY,GAAG,kBAAkB,CAAC;AACxC,MAAM,cAAc,GAAG,SAAS,CAAC;AACjC,MAAM,oBAAoB,GAAG,WAAW,CAAC;AACzC,MAAM,cAAc,GAAG,mBAAmB,CAAC;AA2B3C,MAAM,oBAAoB,GAA0B,EAAE,CAAC;AAEvD;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,YAA6B,EAAE,EAAe;IAC9E,IAAI,CAAC,CAAC,OAAO,YAAY,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,YAAY,YAAY,MAAM,CAAC,EAAE,CAAC;QACpG,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;IACzF,CAAC;IAED,IAAI,OAAO,EAAE,KAAK,UAAU,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAC7D,CAAC;IAED,oBAAoB,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC,CAAC;AAC3D,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAa;IAClD,OAAO,yBAAyB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;AACrD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,OAAO,KAAK,KAAK,SAAS;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAClF,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAElF,IAAI,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC;QAC9E,IAAI,SAAS,KAAK,SAAS;YAAE,OAAO,WAAW,CAAC,SAAS,CAAC,CAAC;IAC7D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAe;IAOnD,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9C,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;IAC1E,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1F,MAAM,aAAa,GAAG,sBAAsB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,GAAG,aAAa,CAAC;IACzD,MAAM,YAAY,GAAG,kBAAkB,CAAC,MAAM,CAAC,UAAU,CAAC;QACxD,kBAAkB,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAChD,kBAAkB,CAAC,MAAM,CAAC,SAAS,CAAC;QACpC,kBAAkB,CAAC,MAAM,CAAC,iBAAiB,CAAC;QAC5C,IAAI,CAAC;IAEP,OAAO;QACL,WAAW;QACX,YAAY;QACZ,MAAM,EAAE,WAAW,GAAG,YAAY;QAClC,MAAM;QACN,WAAW,EAAE,aAAa,CAAC,WAAW;KACvC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,KAAyB,EAAE,IAAY;IAC5E,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3D,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC3C,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;YACxE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,kFAAkF;QACpF,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,yBAAyB,CAAC,KAAK,EAAE,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AAC/E,CAAC;AAED,SAAS,yBAAyB,CAAC,KAAyB,EAAE,KAAa;IACzE,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,SAAS,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEpC,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC,SAAS,GAAG,gBAAgB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAEhE,IAAI,KAAK,KAAK,QAAQ;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IAC7E,IAAI,KAAK,KAAK,YAAY;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;IAClF,IAAI,KAAK,KAAK,MAAM;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;IAC5E,IAAI,KAAK,KAAK,UAAU;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC;IAChF,IAAI,KAAK,KAAK,cAAc;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IACnF,IAAI,KAAK,KAAK,YAAY;QAAE,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3E,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC;IAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IACjD,MAAM,eAAe,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAChE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;IAE7C,OAAO;QACL,SAAS,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM;QAC3B,SAAS,EAAE,KAAK,CAAC,MAAM;QACvB,WAAW,EAAE,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC;QAC3D,mBAAmB,EAAE,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM;QACvG,iBAAiB,EAAE,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,GAAG,KAAK,CAAC,MAAM;KAC5E,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,KAAgB;IACrD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,KAAK,CAAC,SAAS,IAAI,CAAC,IAAI,KAAK,CAAC,iBAAiB,IAAI,IAAI;QAAE,OAAO,YAAY,CAAC;IACjF,IAAI,aAAa,CAAC,OAAO,CAAC,IAAI,0BAA0B,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC;IACpF,IAAI,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,UAAU,CAAC;IAChE,IAAI,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC;IAC/C,IAAI,KAAK,CAAC,mBAAmB,GAAG,IAAI;QAAE,OAAO,cAAc,CAAC;IAE5D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAyB,EAAE,KAAgB;IACnE,IAAI,KAAK,KAAK,YAAY;QAAE,OAAO,GAAG,CAAC;IACvC,IAAI,KAAK,KAAK,YAAY;QAAE,OAAO,CAAC,CAAC;IACrC,IAAI,KAAK,KAAK,MAAM;QAAE,OAAO,GAAG,CAAC;IACjC,IAAI,KAAK,KAAK,UAAU;QAAE,OAAO,GAAG,CAAC;IACrC,IAAI,KAAK,KAAK,cAAc;QAAE,OAAO,GAAG,CAAC;IACzC,IAAI,KAAK,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,GAAG,CAAC;IACxD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,IAAY;IACjC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IAElH,IAAI,CAAC;QACH,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY;IAC9C,OAAO,0DAA0D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC/E,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAgB;IACpD,IAAI,kFAAkF,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAClG,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC;AAC9D,CAAC;AAED,SAAS,aAAa,CAAC,MAA+B;IACpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnC,OAAO,MAAM,CAAC,QAAQ;aACnB,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;aAC3F,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IAED,OAAO,WAAW,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;AACxF,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEnD,OAAO,oBAAoB,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE;QAC7C,IAAI,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC1C,OAAO,eAAe,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1E,CAAC;QAED,SAAS,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;QAChC,OAAO,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC;IACzF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC;AACrD,CAAC"}
@@ -55,11 +55,11 @@ Current fixed-corpus token accuracy run:
55
55
 
56
56
  - Reference: dependency-free fixed proxy fixture counts, not a live provider tokenizer
57
57
  - Samples: `24`
58
- - Average error: `237.76%`
59
- - Median error: `240.06%`
60
- - Max error: `390%`
58
+ - Average error: `9.68%`
59
+ - Median error: `11.43%`
60
+ - Max error: `28.57%`
61
61
 
62
- This shows the current dependency-free estimator is conservative and materially overestimates this proxy corpus. Treat AI CostGuard estimates as pre-call guardrails, not exact provider tokenizer counts. For production budgets that need tighter input-token estimates, register an exact tokenizer with `registerTokenizer()`.
62
+ This shows the calibrated dependency-free estimator is much closer on this proxy corpus. Treat AI CostGuard estimates as pre-call guardrails, not exact provider tokenizer counts. For production budgets that need tighter input-token estimates, register an exact tokenizer with `registerTokenizer()`.
63
63
 
64
64
  ## Interpreting Results
65
65
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@salimassili/ai-costguard",
3
- "version": "2.1.0",
3
+ "version": "2.1.1",
4
4
  "description": "Local-first runtime safety layer for AI agents that blocks runaway costs, loops, retries, and budget overruns before API calls execute.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",