@hyvmind/tiktoken-ts 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +557 -0
  3. package/dist/bpe.d.ts +171 -0
  4. package/dist/bpe.d.ts.map +1 -0
  5. package/dist/bpe.js +478 -0
  6. package/dist/bpe.js.map +1 -0
  7. package/dist/core/byte-pair-encoding.d.ts +49 -0
  8. package/dist/core/byte-pair-encoding.d.ts.map +1 -0
  9. package/dist/core/byte-pair-encoding.js +154 -0
  10. package/dist/core/byte-pair-encoding.js.map +1 -0
  11. package/dist/core/encoding-definitions.d.ts +95 -0
  12. package/dist/core/encoding-definitions.d.ts.map +1 -0
  13. package/dist/core/encoding-definitions.js +202 -0
  14. package/dist/core/encoding-definitions.js.map +1 -0
  15. package/dist/core/index.d.ts +12 -0
  16. package/dist/core/index.d.ts.map +1 -0
  17. package/dist/core/index.js +17 -0
  18. package/dist/core/index.js.map +1 -0
  19. package/dist/core/model-to-encoding.d.ts +36 -0
  20. package/dist/core/model-to-encoding.d.ts.map +1 -0
  21. package/dist/core/model-to-encoding.js +299 -0
  22. package/dist/core/model-to-encoding.js.map +1 -0
  23. package/dist/core/tiktoken.d.ts +126 -0
  24. package/dist/core/tiktoken.d.ts.map +1 -0
  25. package/dist/core/tiktoken.js +295 -0
  26. package/dist/core/tiktoken.js.map +1 -0
  27. package/dist/core/vocab-loader.d.ts +77 -0
  28. package/dist/core/vocab-loader.d.ts.map +1 -0
  29. package/dist/core/vocab-loader.js +176 -0
  30. package/dist/core/vocab-loader.js.map +1 -0
  31. package/dist/encodings/cl100k-base.d.ts +43 -0
  32. package/dist/encodings/cl100k-base.d.ts.map +1 -0
  33. package/dist/encodings/cl100k-base.js +142 -0
  34. package/dist/encodings/cl100k-base.js.map +1 -0
  35. package/dist/encodings/claude-estimation.d.ts +136 -0
  36. package/dist/encodings/claude-estimation.d.ts.map +1 -0
  37. package/dist/encodings/claude-estimation.js +160 -0
  38. package/dist/encodings/claude-estimation.js.map +1 -0
  39. package/dist/encodings/index.d.ts +9 -0
  40. package/dist/encodings/index.d.ts.map +1 -0
  41. package/dist/encodings/index.js +13 -0
  42. package/dist/encodings/index.js.map +1 -0
  43. package/dist/encodings/o200k-base.d.ts +58 -0
  44. package/dist/encodings/o200k-base.d.ts.map +1 -0
  45. package/dist/encodings/o200k-base.js +191 -0
  46. package/dist/encodings/o200k-base.js.map +1 -0
  47. package/dist/encodings/p50k-base.d.ts +44 -0
  48. package/dist/encodings/p50k-base.d.ts.map +1 -0
  49. package/dist/encodings/p50k-base.js +64 -0
  50. package/dist/encodings/p50k-base.js.map +1 -0
  51. package/dist/index.d.ts +61 -0
  52. package/dist/index.d.ts.map +1 -0
  53. package/dist/index.js +109 -0
  54. package/dist/index.js.map +1 -0
  55. package/dist/models.d.ts +92 -0
  56. package/dist/models.d.ts.map +1 -0
  57. package/dist/models.js +320 -0
  58. package/dist/models.js.map +1 -0
  59. package/dist/tiktoken.d.ts +198 -0
  60. package/dist/tiktoken.d.ts.map +1 -0
  61. package/dist/tiktoken.js +331 -0
  62. package/dist/tiktoken.js.map +1 -0
  63. package/dist/tokenizer.d.ts +181 -0
  64. package/dist/tokenizer.d.ts.map +1 -0
  65. package/dist/tokenizer.js +436 -0
  66. package/dist/tokenizer.js.map +1 -0
  67. package/dist/types.d.ts +127 -0
  68. package/dist/types.d.ts.map +1 -0
  69. package/dist/types.js +6 -0
  70. package/dist/types.js.map +1 -0
  71. package/dist/utils.d.ts +152 -0
  72. package/dist/utils.d.ts.map +1 -0
  73. package/dist/utils.js +244 -0
  74. package/dist/utils.js.map +1 -0
  75. package/package.json +78 -0
package/dist/bpe.d.ts ADDED
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Byte-Pair Encoding (BPE) Implementation
3
+ * A pure TypeScript implementation of the BPE algorithm
4
+ *
5
+ * This implementation uses a hybrid approach:
6
+ * 1. Pattern-based tokenization for accuracy with special tokens
7
+ * 2. Statistical estimation for token counting (without full vocabulary)
8
+ *
9
+ * For production use with full accuracy, consider loading the actual
10
+ * vocabulary files from tiktoken.
11
+ */
12
+ import type { EncodingConfig, EncodingName, SpecialTokens } from "./types.js";
13
+ /**
14
+ * Default special tokens for OpenAI chat format
15
+ */
16
+ export declare const DEFAULT_SPECIAL_TOKENS: SpecialTokens;
17
+ /**
18
+ * cl100k_base encoding configuration
19
+ * Used by GPT-4, GPT-3.5-turbo
20
+ */
21
+ export declare const CL100K_BASE_CONFIG: EncodingConfig;
22
+ /**
23
+ * o200k_base encoding configuration
24
+ * Used by GPT-4o, GPT-4.1, GPT-5 models
25
+ */
26
+ export declare const O200K_BASE_CONFIG: EncodingConfig;
27
+ /**
28
+ * p50k_base encoding configuration (legacy)
29
+ * Used by older codex models
30
+ */
31
+ export declare const P50K_BASE_CONFIG: EncodingConfig;
32
+ /**
33
+ * Claude estimation encoding configuration
34
+ *
35
+ * IMPORTANT: This is an ESTIMATION encoding, not an exact BPE tokenizer.
36
+ *
37
+ * Claude uses a proprietary tokenizer that is NOT publicly available.
38
+ * This encoding provides "safe" estimates that intentionally over-count
39
+ * tokens to prevent API truncation issues.
40
+ *
41
+ * Based on research findings (see claude-tokenizer-research.md):
42
+ * - Claude 3+ uses ~22,000 token vocabulary (much smaller than OpenAI's 100K-200K)
43
+ * - Claude produces 16-30% MORE tokens than GPT-4 for equivalent content
44
+ * - Average ~3.5 characters per token (vs GPT-4's ~4)
45
+ *
46
+ * This encoding applies a 1.25x safety multiplier to cl100k_base estimates.
47
+ * For exact Claude token counts, use Anthropic's /v1/messages/count_tokens API.
48
+ *
49
+ * @see https://docs.anthropic.com/en/docs/build-with-claude/token-counting
50
+ */
51
+ export declare const CLAUDE_ESTIMATION_CONFIG: EncodingConfig;
52
+ /**
53
+ * Get encoding configuration by name
54
+ *
55
+ * @param name - Encoding name
56
+ * @returns Encoding configuration
57
+ */
58
+ export declare function getEncodingConfig(name: EncodingName): EncodingConfig;
59
+ /**
60
+ * Safety multiplier for Claude token estimation.
61
+ *
62
+ * Research shows Claude produces 16-30% more tokens than GPT-4 for equivalent content:
63
+ * - English articles: +16%
64
+ * - Math equations: +21%
65
+ * - Python code: +30%
66
+ *
67
+ * We use 1.25 (25% over-estimate) as a safe default that covers most cases
68
+ * without being excessively conservative.
69
+ */
70
+ export declare const CLAUDE_SAFETY_MULTIPLIER = 1.25;
71
+ /**
72
+ * BPE Tokenizer class
73
+ * Implements the core BPE algorithm with statistical estimation
74
+ */
75
+ export declare class BPETokenizer {
76
+ private readonly config;
77
+ private readonly specialTokenPatterns;
78
+ private readonly bytePairCache;
79
+ private readonly isClaudeEstimation;
80
+ constructor(encoding?: EncodingName);
81
+ /**
82
+ * Initialize special token patterns
83
+ */
84
+ private initializeSpecialTokens;
85
+ /**
86
+ * Get the encoding name
87
+ */
88
+ get encodingName(): EncodingName;
89
+ /**
90
+ * Encode text into token IDs
91
+ * Uses a hybrid approach: special token detection + BPE encoding
92
+ *
93
+ * @param text - Input text
94
+ * @returns Array of token IDs
95
+ */
96
+ encode(text: string): number[];
97
+ /**
98
+ * Decode token IDs back to text
99
+ * Note: This is a best-effort decode for estimated tokens
100
+ *
101
+ * @param tokens - Array of token IDs
102
+ * @returns Decoded text
103
+ */
104
+ decode(tokens: number[]): string;
105
+ /**
106
+ * Count the number of tokens in text
107
+ * This uses statistical estimation for efficiency
108
+ *
109
+ * For Claude models (claude_estimation encoding), this applies a safety
110
+ * multiplier to ensure estimates err on the side of over-counting,
111
+ * preventing API truncation issues.
112
+ *
113
+ * @param text - Input text
114
+ * @returns Estimated token count
115
+ */
116
+ countTokens(text: string): number;
117
+ /**
118
+ * Split text on special tokens while preserving them
119
+ *
120
+ * @param text - Input text
121
+ * @returns Array of segments
122
+ */
123
+ private splitOnSpecialTokens;
124
+ /**
125
+ * Encode regular text using BPE
126
+ * Uses pattern-based splitting and byte-pair encoding
127
+ *
128
+ * @param text - Text to encode (no special tokens)
129
+ * @returns Array of token IDs
130
+ */
131
+ private encodeText;
132
+ /**
133
+ * Apply BPE to a byte sequence
134
+ * Uses a simplified hash-based approach for estimation
135
+ *
136
+ * @param bytes - UTF-8 bytes
137
+ * @returns Array of token IDs
138
+ */
139
+ private bytePairEncode;
140
+ /**
141
+ * Estimate the boundary of the next token
142
+ *
143
+ * @param bytes - Full byte sequence
144
+ * @param start - Starting position
145
+ * @returns Number of bytes for the next token
146
+ */
147
+ private estimateTokenBoundary;
148
+ /**
149
+ * Convert bytes to a deterministic token ID
150
+ *
151
+ * @param bytes - Token bytes
152
+ * @returns Token ID
153
+ */
154
+ private bytesToTokenId;
155
+ /**
156
+ * Estimate token count for text using statistical methods
157
+ * This is the primary method for fast token counting
158
+ *
159
+ * ACCURACY NOTES (validated against tiktoken-rs):
160
+ * - English text: +/-10-15% typically accurate
161
+ * - Numbers: Now properly models 1-3 digit grouping
162
+ * - CJK with o200k: Highly efficient (1 token per char typically)
163
+ * - CJK with cl100k: Less efficient (1-2 tokens per char)
164
+ * - Emoji: 2-3 tokens each
165
+ *
166
+ * @param text - Text to estimate
167
+ * @returns Estimated token count
168
+ */
169
+ private estimateTokenCount;
170
+ }
171
+ //# sourceMappingURL=bpe.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bpe.d.ts","sourceRoot":"","sources":["../src/bpe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAU9E;;GAEG;AACH,eAAO,MAAM,sBAAsB,EAAE,aAOpC,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,cAOhC,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,iBAAiB,EAAE,cAO/B,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,gBAAgB,EAAE,cAO9B,CAAC;AAEF;;;;;;;;;;;;;;;;;;GAkBG;AACH,eAAO,MAAM,wBAAwB,EAAE,cAUtC,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,YAAY,GAAG,cAAc,CAapE;AAED;;;;;;;;;;GAUG;AACH,eAAO,MAAM,wBAAwB,OAAO,CAAC;AAE7C;;;GAGG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiB;IACxC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAsB;IAC3D,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAwB;IACtD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;gBAEjC,QAAQ,GAAE,YAA2B;IAUjD;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAgB/B;;OAEG;IACH,IAAI,YAAY,IAAI,YAAY,CAE/B;IAED;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAqB9B;;;;;;OAMG;IACH,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM;IAoBhC;;;;;;;;;;OAUG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IA2BjC;;;;;OAKG;IACH,OAAO,CAAC,oBAAoB;IAmC5B;;;;;;OAMG;IACH,OAAO,CAAC,UAAU;IA6BlB;;;;;;OAMG;IACH,OAAO,CAAC,cAAc;IA0BtB;;;;;;OAMG;IACH,OAAO,CAAC,qBAAqB;IAyC7B;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAStB;;;;;;;;;;;;;OAaG;IACH,OAAO,CAAC,kBAAkB;CA6F3B"}
package/dist/bpe.js ADDED
@@ -0,0 +1,478 @@
1
+ /**
2
+ * Byte-Pair Encoding (BPE) Implementation
3
+ * A pure TypeScript implementation of the BPE algorithm
4
+ *
5
+ * This implementation uses a hybrid approach:
6
+ * 1. Pattern-based tokenization for accuracy with special tokens
7
+ * 2. Statistical estimation for token counting (without full vocabulary)
8
+ *
9
+ * For production use with full accuracy, consider loading the actual
10
+ * vocabulary files from tiktoken.
11
+ */
12
+ import { stringToBytes, bytesToString, containsEmoji, isAscii, countCodePoints, hashString, } from "./utils.js";
13
+ /**
14
+ * Default special tokens for OpenAI chat format
15
+ */
16
+ export const DEFAULT_SPECIAL_TOKENS = {
17
+ endOfText: "<|endoftext|>",
18
+ imStart: "<|im_start|>",
19
+ imEnd: "<|im_end|>",
20
+ imSep: "<|im_sep|>",
21
+ toolCall: "<|tool_call|>",
22
+ toolResult: "<|tool_result|>",
23
+ };
24
+ /**
25
+ * cl100k_base encoding configuration
26
+ * Used by GPT-4, GPT-3.5-turbo
27
+ */
28
+ export const CL100K_BASE_CONFIG = {
29
+ name: "cl100k_base",
30
+ patternSplit: /(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/gu,
31
+ specialTokens: DEFAULT_SPECIAL_TOKENS,
32
+ vocabSize: 100256,
33
+ averageCharsPerToken: 3.8,
34
+ };
35
+ /**
36
+ * o200k_base encoding configuration
37
+ * Used by GPT-4o, GPT-4.1, GPT-5 models
38
+ */
39
+ export const O200K_BASE_CONFIG = {
40
+ name: "o200k_base",
41
+ patternSplit: /[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/gu,
42
+ specialTokens: DEFAULT_SPECIAL_TOKENS,
43
+ vocabSize: 200000,
44
+ averageCharsPerToken: 4.0,
45
+ };
46
+ /**
47
+ * p50k_base encoding configuration (legacy)
48
+ * Used by older codex models
49
+ */
50
+ export const P50K_BASE_CONFIG = {
51
+ name: "p50k_base",
52
+ patternSplit: /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,
53
+ specialTokens: DEFAULT_SPECIAL_TOKENS,
54
+ vocabSize: 50257,
55
+ averageCharsPerToken: 3.5,
56
+ };
57
+ /**
58
+ * Claude estimation encoding configuration
59
+ *
60
+ * IMPORTANT: This is an ESTIMATION encoding, not an exact BPE tokenizer.
61
+ *
62
+ * Claude uses a proprietary tokenizer that is NOT publicly available.
63
+ * This encoding provides "safe" estimates that intentionally over-count
64
+ * tokens to prevent API truncation issues.
65
+ *
66
+ * Based on research findings (see claude-tokenizer-research.md):
67
+ * - Claude 3+ uses ~22,000 token vocabulary (much smaller than OpenAI's 100K-200K)
68
+ * - Claude produces 16-30% MORE tokens than GPT-4 for equivalent content
69
+ * - Average ~3.5 characters per token (vs GPT-4's ~4)
70
+ *
71
+ * This encoding applies a 1.25x safety multiplier to cl100k_base estimates.
72
+ * For exact Claude token counts, use Anthropic's /v1/messages/count_tokens API.
73
+ *
74
+ * @see https://docs.anthropic.com/en/docs/build-with-claude/token-counting
75
+ */
76
+ export const CLAUDE_ESTIMATION_CONFIG = {
77
+ name: "claude_estimation",
78
+ // Use cl100k_base pattern as base (70% vocabulary overlap with Claude 1/2)
79
+ patternSplit: /(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/gu,
80
+ specialTokens: DEFAULT_SPECIAL_TOKENS,
81
+ // Claude's estimated vocabulary (~22K) but we use this for informational purposes
82
+ vocabSize: 22000,
83
+ // Claude averages ~3.5 chars per token, lower than cl100k_base
84
+ averageCharsPerToken: 3.5,
85
+ };
86
+ /**
87
+ * Get encoding configuration by name
88
+ *
89
+ * @param name - Encoding name
90
+ * @returns Encoding configuration
91
+ */
92
+ export function getEncodingConfig(name) {
93
+ switch (name) {
94
+ case "cl100k_base":
95
+ return CL100K_BASE_CONFIG;
96
+ case "o200k_base":
97
+ return O200K_BASE_CONFIG;
98
+ case "p50k_base":
99
+ return P50K_BASE_CONFIG;
100
+ case "claude_estimation":
101
+ return CLAUDE_ESTIMATION_CONFIG;
102
+ default:
103
+ return O200K_BASE_CONFIG;
104
+ }
105
+ }
106
+ /**
107
+ * Safety multiplier for Claude token estimation.
108
+ *
109
+ * Research shows Claude produces 16-30% more tokens than GPT-4 for equivalent content:
110
+ * - English articles: +16%
111
+ * - Math equations: +21%
112
+ * - Python code: +30%
113
+ *
114
+ * We use 1.25 (25% over-estimate) as a safe default that covers most cases
115
+ * without being excessively conservative.
116
+ */
117
+ export const CLAUDE_SAFETY_MULTIPLIER = 1.25;
118
+ /**
119
+ * BPE Tokenizer class
120
+ * Implements the core BPE algorithm with statistical estimation
121
+ */
122
+ export class BPETokenizer {
123
+ config;
124
+ specialTokenPatterns;
125
+ bytePairCache;
126
+ isClaudeEstimation;
127
+ constructor(encoding = "o200k_base") {
128
+ this.config = getEncodingConfig(encoding);
129
+ this.specialTokenPatterns = new Map();
130
+ this.bytePairCache = new Map();
131
+ this.isClaudeEstimation = encoding === "claude_estimation";
132
+ // Pre-compute special token patterns and their token IDs
133
+ this.initializeSpecialTokens();
134
+ }
135
+ /**
136
+ * Initialize special token patterns
137
+ */
138
+ initializeSpecialTokens() {
139
+ const tokens = this.config.specialTokens;
140
+ let tokenId = this.config.vocabSize - 10; // Reserve IDs at end of vocab
141
+ this.specialTokenPatterns.set(tokens.endOfText, tokenId++);
142
+ this.specialTokenPatterns.set(tokens.imStart, tokenId++);
143
+ this.specialTokenPatterns.set(tokens.imEnd, tokenId++);
144
+ this.specialTokenPatterns.set(tokens.imSep, tokenId++);
145
+ if (tokens.toolCall) {
146
+ this.specialTokenPatterns.set(tokens.toolCall, tokenId++);
147
+ }
148
+ if (tokens.toolResult) {
149
+ this.specialTokenPatterns.set(tokens.toolResult, tokenId++);
150
+ }
151
+ }
152
+ /**
153
+ * Get the encoding name
154
+ */
155
+ get encodingName() {
156
+ return this.config.name;
157
+ }
158
+ /**
159
+ * Encode text into token IDs
160
+ * Uses a hybrid approach: special token detection + BPE encoding
161
+ *
162
+ * @param text - Input text
163
+ * @returns Array of token IDs
164
+ */
165
+ encode(text) {
166
+ if (!text)
167
+ return [];
168
+ const tokens = [];
169
+ // First, split on special tokens
170
+ const segments = this.splitOnSpecialTokens(text);
171
+ for (const segment of segments) {
172
+ if (this.specialTokenPatterns.has(segment)) {
173
+ // Special token - use its pre-assigned ID
174
+ tokens.push(this.specialTokenPatterns.get(segment));
175
+ }
176
+ else {
177
+ // Regular text - apply BPE
178
+ tokens.push(...this.encodeText(segment));
179
+ }
180
+ }
181
+ return tokens;
182
+ }
183
+ /**
184
+ * Decode token IDs back to text
185
+ * Note: This is a best-effort decode for estimated tokens
186
+ *
187
+ * @param tokens - Array of token IDs
188
+ * @returns Decoded text
189
+ */
190
+ decode(tokens) {
191
+ const parts = [];
192
+ for (const token of tokens) {
193
+ // Check if it's a special token
194
+ for (const [text, id] of this.specialTokenPatterns) {
195
+ if (id === token) {
196
+ parts.push(text);
197
+ break;
198
+ }
199
+ }
200
+ // For regular tokens, we'd need the vocabulary to decode properly
201
+ // Since we don't have it, we return a placeholder
202
+ parts.push(`<token:${token}>`);
203
+ }
204
+ return parts.join("");
205
+ }
206
+ /**
207
+ * Count the number of tokens in text
208
+ * This uses statistical estimation for efficiency
209
+ *
210
+ * For Claude models (claude_estimation encoding), this applies a safety
211
+ * multiplier to ensure estimates err on the side of over-counting,
212
+ * preventing API truncation issues.
213
+ *
214
+ * @param text - Input text
215
+ * @returns Estimated token count
216
+ */
217
+ countTokens(text) {
218
+ if (!text)
219
+ return 0;
220
+ let count = 0;
221
+ // Split on special tokens first
222
+ const segments = this.splitOnSpecialTokens(text);
223
+ for (const segment of segments) {
224
+ if (this.specialTokenPatterns.has(segment)) {
225
+ count += 1;
226
+ }
227
+ else {
228
+ count += this.estimateTokenCount(segment);
229
+ }
230
+ }
231
+ let result = Math.max(1, Math.round(count));
232
+ // Apply Claude safety multiplier for Claude estimation encoding
233
+ // This ensures we over-estimate to prevent API truncation
234
+ if (this.isClaudeEstimation) {
235
+ result = Math.ceil(result * CLAUDE_SAFETY_MULTIPLIER);
236
+ }
237
+ return result;
238
+ }
239
+ /**
240
+ * Split text on special tokens while preserving them
241
+ *
242
+ * @param text - Input text
243
+ * @returns Array of segments
244
+ */
245
+ splitOnSpecialTokens(text) {
246
+ const segments = [];
247
+ // Build regex pattern for all special tokens
248
+ const specialTokensList = Array.from(this.specialTokenPatterns.keys());
249
+ if (specialTokensList.length === 0) {
250
+ return [text];
251
+ }
252
+ const pattern = new RegExp(`(${specialTokensList.map((t) => t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})`, "g");
253
+ let lastIndex = 0;
254
+ let match;
255
+ while ((match = pattern.exec(text)) !== null) {
256
+ // Add text before the special token
257
+ if (match.index > lastIndex) {
258
+ segments.push(text.slice(lastIndex, match.index));
259
+ }
260
+ // Add the special token
261
+ segments.push(match[0]);
262
+ lastIndex = match.index + match[0].length;
263
+ }
264
+ // Add remaining text
265
+ if (lastIndex < text.length) {
266
+ segments.push(text.slice(lastIndex));
267
+ }
268
+ return segments.filter((s) => s.length > 0);
269
+ }
270
+ /**
271
+ * Encode regular text using BPE
272
+ * Uses pattern-based splitting and byte-pair encoding
273
+ *
274
+ * @param text - Text to encode (no special tokens)
275
+ * @returns Array of token IDs
276
+ */
277
+ encodeText(text) {
278
+ const tokens = [];
279
+ // Split text using the encoding's pattern
280
+ const matches = text.match(this.config.patternSplit) || [text];
281
+ for (const match of matches) {
282
+ // Check cache first
283
+ const cached = this.bytePairCache.get(match);
284
+ if (cached) {
285
+ tokens.push(...cached);
286
+ continue;
287
+ }
288
+ // Convert to bytes and apply BPE
289
+ const bytes = stringToBytes(match);
290
+ const encodedTokens = this.bytePairEncode(bytes);
291
+ // Cache the result
292
+ if (match.length < 100) {
293
+ this.bytePairCache.set(match, encodedTokens);
294
+ }
295
+ tokens.push(...encodedTokens);
296
+ }
297
+ return tokens;
298
+ }
299
+ /**
300
+ * Apply BPE to a byte sequence
301
+ * Uses a simplified hash-based approach for estimation
302
+ *
303
+ * @param bytes - UTF-8 bytes
304
+ * @returns Array of token IDs
305
+ */
306
+ bytePairEncode(bytes) {
307
+ if (bytes.length === 0)
308
+ return [];
309
+ // For accurate BPE, we'd need the full vocabulary and merge rules
310
+ // Instead, we use a deterministic hash-based approach that:
311
+ // 1. Groups bytes into likely token boundaries
312
+ // 2. Assigns consistent token IDs based on content
313
+ const tokens = [];
314
+ let i = 0;
315
+ while (i < bytes.length) {
316
+ // Determine likely token boundary (1-6 bytes typically)
317
+ const tokenLength = this.estimateTokenBoundary(bytes, i);
318
+ const tokenBytes = bytes.slice(i, i + tokenLength);
319
+ // Generate a consistent token ID from the bytes
320
+ const tokenId = this.bytesToTokenId(tokenBytes);
321
+ tokens.push(tokenId);
322
+ i += tokenLength;
323
+ }
324
+ return tokens;
325
+ }
326
+ /**
327
+ * Estimate the boundary of the next token
328
+ *
329
+ * @param bytes - Full byte sequence
330
+ * @param start - Starting position
331
+ * @returns Number of bytes for the next token
332
+ */
333
+ estimateTokenBoundary(bytes, start) {
334
+ const remaining = bytes.length - start;
335
+ if (remaining <= 0)
336
+ return 0;
337
+ // UTF-8 aware boundary detection
338
+ const firstByte = bytes[start];
339
+ // ASCII printable characters often merge together
340
+ if (firstByte >= 0x20 && firstByte <= 0x7e) {
341
+ // Check for common patterns
342
+ let end = start + 1;
343
+ while (end < bytes.length && end - start < 6) {
344
+ const b = bytes[end];
345
+ // Continue if same type of character
346
+ if (b >= 0x41 && b <= 0x5a) {
347
+ // A-Z
348
+ end++;
349
+ }
350
+ else if (b >= 0x61 && b <= 0x7a) {
351
+ // a-z
352
+ end++;
353
+ }
354
+ else if (b >= 0x30 && b <= 0x39) {
355
+ // 0-9
356
+ end++;
357
+ }
358
+ else {
359
+ break;
360
+ }
361
+ }
362
+ return Math.min(end - start, remaining);
363
+ }
364
+ // Multi-byte UTF-8 sequences
365
+ if ((firstByte & 0xe0) === 0xc0)
366
+ return Math.min(2, remaining);
367
+ if ((firstByte & 0xf0) === 0xe0)
368
+ return Math.min(3, remaining);
369
+ if ((firstByte & 0xf8) === 0xf0)
370
+ return Math.min(4, remaining);
371
+ return 1;
372
+ }
373
+ /**
374
+ * Convert bytes to a deterministic token ID
375
+ *
376
+ * @param bytes - Token bytes
377
+ * @returns Token ID
378
+ */
379
+ bytesToTokenId(bytes) {
380
+ // Use a hash to generate consistent token IDs
381
+ const text = bytesToString(bytes);
382
+ const hash = hashString(text);
383
+ // Map to valid token range (avoid special tokens at the end)
384
+ return hash % (this.config.vocabSize - 20);
385
+ }
386
+ /**
387
+ * Estimate token count for text using statistical methods
388
+ * This is the primary method for fast token counting
389
+ *
390
+ * ACCURACY NOTES (validated against tiktoken-rs):
391
+ * - English text: +/-10-15% typically accurate
392
+ * - Numbers: Now properly models 1-3 digit grouping
393
+ * - CJK with o200k: Highly efficient (1 token per char typically)
394
+ * - CJK with cl100k: Less efficient (1-2 tokens per char)
395
+ * - Emoji: 2-3 tokens each
396
+ *
397
+ * @param text - Text to estimate
398
+ * @returns Estimated token count
399
+ */
400
+ estimateTokenCount(text) {
401
+ if (!text)
402
+ return 0;
403
+ const codePoints = countCodePoints(text);
404
+ const byteLength = stringToBytes(text).length;
405
+ // Start with base estimate, but we'll rebuild for mixed content
406
+ let estimate = 0;
407
+ // Handle numbers separately - tiktoken splits into 1-3 digit groups
408
+ // This is CRITICAL: "12345678901234567890" = 7 tokens, not 40
409
+ const numberMatches = text.match(/\d+/g);
410
+ let numberCharsCount = 0;
411
+ let numberTokenCount = 0;
412
+ if (numberMatches) {
413
+ for (const num of numberMatches) {
414
+ numberCharsCount += num.length;
415
+ // tiktoken groups digits into 1-3 digit tokens
416
+ numberTokenCount += Math.ceil(num.length / 3);
417
+ }
418
+ }
419
+ // Handle CJK characters - behavior differs by encoding
420
+ const cjkChars = text.match(/[\u4e00-\u9fff\u3400-\u4dbf\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/g);
421
+ let cjkCharsCount = 0;
422
+ let cjkTokenCount = 0;
423
+ if (cjkChars) {
424
+ cjkCharsCount = cjkChars.length;
425
+ if (this.config.name === "o200k_base") {
426
+ // o200k_base is HIGHLY efficient for CJK
427
+ // "こんにちは世界" = 2 tokens (not 6)
428
+ // Typically 1-3 CJK chars per token
429
+ cjkTokenCount = Math.ceil(cjkCharsCount / 2.5);
430
+ }
431
+ else {
432
+ // cl100k_base: CJK chars are typically 1-2 tokens each
433
+ cjkTokenCount = Math.ceil(cjkCharsCount * 1.2);
434
+ }
435
+ }
436
+ // Handle emoji - typically 2-3 tokens each
437
+ let emojiCount = 0;
438
+ let emojiTokenCount = 0;
439
+ if (containsEmoji(text)) {
440
+ const emojis = text.match(/\p{Emoji}/gu);
441
+ if (emojis) {
442
+ emojiCount = emojis.length;
443
+ // Each emoji is typically 2-3 tokens
444
+ emojiTokenCount = Math.ceil(emojiCount * 2.5);
445
+ }
446
+ }
447
+ // Calculate remaining non-special characters
448
+ const specialCharsCount = numberCharsCount + cjkCharsCount + emojiCount;
449
+ const remainingChars = codePoints - specialCharsCount;
450
+ // Base estimate for remaining (mostly ASCII/Latin) text
451
+ if (remainingChars > 0) {
452
+ estimate = remainingChars / this.config.averageCharsPerToken;
453
+ // Whitespace sequences add minimal overhead
454
+ const whitespaceSeqs = text.match(/\s+/g);
455
+ if (whitespaceSeqs) {
456
+ estimate += whitespaceSeqs.length * 0.1;
457
+ }
458
+ }
459
+ // Add special character token counts
460
+ estimate += numberTokenCount;
461
+ estimate += cjkTokenCount;
462
+ estimate += emojiTokenCount;
463
+ // Non-ASCII penalty for remaining text (excluding CJK which we handled)
464
+ if (!isAscii(text) && remainingChars > 0) {
465
+ const nonAsciiRatio = (byteLength - codePoints) / byteLength;
466
+ // Only apply penalty to the base estimate portion
467
+ const basePortion = remainingChars / this.config.averageCharsPerToken;
468
+ estimate += basePortion * nonAsciiRatio * 0.15;
469
+ }
470
+ // o200k_base tends to be slightly more efficient overall
471
+ if (this.config.name === "o200k_base" && remainingChars > 0) {
472
+ const basePortion = remainingChars / this.config.averageCharsPerToken;
473
+ estimate -= basePortion * 0.05;
474
+ }
475
+ return Math.max(1, Math.round(estimate));
476
+ }
477
+ }
478
+ //# sourceMappingURL=bpe.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bpe.js","sourceRoot":"","sources":["../src/bpe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,EACL,aAAa,EACb,aAAa,EACb,aAAa,EACb,OAAO,EACP,eAAe,EACf,UAAU,GACX,MAAM,YAAY,CAAC;AAEpB;;GAEG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAkB;IACnD,SAAS,EAAE,eAAe;IAC1B,OAAO,EAAE,cAAc;IACvB,KAAK,EAAE,YAAY;IACnB,KAAK,EAAE,YAAY;IACnB,QAAQ,EAAE,eAAe;IACzB,UAAU,EAAE,iBAAiB;CAC9B,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAmB;IAChD,IAAI,EAAE,aAAa;IACnB,YAAY,EACV,uHAAuH;IACzH,aAAa,EAAE,sBAAsB;IACrC,SAAS,EAAE,MAAM;IACjB,oBAAoB,EAAE,GAAG;CAC1B,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAmB;IAC/C,IAAI,EAAE,YAAY;IAClB,YAAY,EACV,qRAAqR;IACvR,aAAa,EAAE,sBAAsB;IACrC,SAAS,EAAE,MAAM;IACjB,oBAAoB,EAAE,GAAG;CAC1B,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAmB;IAC9C,IAAI,EAAE,WAAW;IACjB,YAAY,EACV,8EAA8E;IAChF,aAAa,EAAE,sBAAsB;IACrC,SAAS,EAAE,KAAK;IAChB,oBAAoB,EAAE,GAAG;CAC1B,CAAC;AAEF;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAmB;IACtD,IAAI,EAAE,mBAAmB;IACzB,2EAA2E;IAC3E,YAAY,EACV,oJAAoJ;IACtJ,aAAa,EAAE,sBAAsB;IACrC,kFAAkF;IAClF,SAAS,EAAE,KAAK;IAChB,+DAA+D;IAC/D,oBAAoB,EAAE,GAAG;CAC1B,CAAC;AAEF;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAkB;IAClD,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,aAAa;YAChB,OAAO,kBAAkB,CAAC;QAC5B,KAAK,YAAY;YACf,OAAO,iBAAiB,CAAC;QAC3B,KAAK,WAAW;YACd,OAAO,gBAAgB,CAAC;QAC1B,KAAK,mBAAmB;YACtB,OAAO,wBAAwB,CAAC;QAClC;YACE,OAAO,iBAAiB,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAG,IAAI,CAAC;AAE7C;;;GAGG;AACH,MAAM,OAAO,YAAY;IACN,MAAM,CAAiB;IACvB,oBAAoB,CAAsB;IAC1C,aAAa,CAAwB;IACrC,kBAAkB,CAAU;IAE7C,YAAY,WAAyB,YAAY;QAC/C,IAAI,CAAC,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,oBAAoB,GAAG,IAAI,GAAG,EAAE,CAAC;QACtC,IAAI,CAAC,aAAa,GAAG,IAAI,GAAG,EAAE,CAAC;QAC/B,IAAI,CAAC,kBAAkB,GAAG,QAAQ,KAAK,mBAAmB,CAAC;QAE3D,yDAAyD;QACzD,IAAI,CAAC,uBAAuB,EAAE,CAAC;IACjC,CAAC;IAED;;OAEG;IACK,uBAAuB;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;QACzC,IAAI,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,8BAA8B;QAExE,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;QAC3D,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;QACzD,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QACvD,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QACvD,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACpB,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAC5D,CAAC;QACD,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YACtB,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED;;OAEG;IACH,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,IAAY;QACjB,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QAErB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,iCAAiC;QACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEjD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,IAAI,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC3C,0CAA0C;gBAC1C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC,CAAC;YACvD,CAAC;iBAAM,CAAC;gBACN,2BAA2B;gBAC3B,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,MAAgB;QACrB,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,gCAAgC;YAChC,KAAK,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC,oBAAoB,EAAE,CAAC;gBACnD,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC;oBACjB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACjB,MAAM;gBACR,CAAC;YACH,CAAC;YAED,kEAAkE;YAClE,kDAAkD;YAClD,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,GAAG,CAAC,CAAC;QACjC,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxB,CAAC;IAED;;;;;;;;;;OAUG;IACH,WAAW,CAAC,IAAY;QACtB,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QAEpB,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,gCAAgC;QAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEjD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,IAAI,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC3C,KAAK,IAAI,CAAC,CAAC;YACb,CAAC;iBAAM,CAAC;gBACN,KAAK,IAAI,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;QAED,IAAI,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;QAE5C,gEAAgE;QAChE,0DAA0D;QAC1D,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC5B,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,wBAAwB,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;OAKG;IACK,oBAAoB,CAAC,IAAY;QACvC,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,6CAA6C;QAC7C,MAAM,iBAAiB,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC,CAAC;QACvE,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,MAAM,CACxB,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EACvF,GAAG,CACJ,CAAC;QAEF,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,KAAK,CAAC;QAEV,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7C,oCAAoC;YACpC,IAAI,KAAK,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBAC5B,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YACpD,CAAC;YACD,wBAAwB;YACxB,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACxB,SAAS,GAAG,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC5C,CAAC;QAED,qBAAqB;QACrB,IAAI,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC5B,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;QACvC,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IAED;;;;;;OAMG;IACK,UAAU,CAAC,IAAY;QAC7B,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,0CAA0C;QAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE/D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,oBAAoB;YACpB,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;gBACvB,SAAS;YACX,CAAC;YAED,iCAAiC;YACjC,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;YAEjD,mBAAmB;YACnB,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBACvB,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;QAChC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;;OAMG;IACK,cAAc,CAAC,KAAiB;QACtC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAElC,kEAAkE;QAClE,4DAA4D;QAC5D,+CAA+C;QAC/C,mDAAmD;QAEnD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,GAAG,CAAC,CAAC;QAEV,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YACxB,wDAAwD;YACxD,MAAM,WAAW,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YACzD,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;YAEnD,gDAAgD;YAChD,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;YAChD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAErB,CAAC,IAAI,WAAW,CAAC;QACnB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;;OAMG;IACK,qBAAqB,CAAC,KAAiB,EAAE,KAAa;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC;QACvC,IAAI,SAAS,IAAI,CAAC;YAAE,OAAO,CAAC,CAAC;QAE7B,iCAAiC;QACjC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAE/B,kDAAkD;QAClD,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,EAAE,CAAC;YAC3C,4BAA4B;YAC5B,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC;YAEpB,OAAO,GAAG,GAAG,KAAK,CAAC,MAAM,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;gBAC7C,MAAM,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;gBAErB,qCAAqC;gBACrC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC3B,MAAM;oBACN,GAAG,EAAE,CAAC;gBACR,CAAC;qBAAM,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAClC,MAAM;oBACN,GAAG,EAAE,CAAC;gBACR,CAAC;qBAAM,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;oBAClC,MAAM;oBACN,GAAG,EAAE,CAAC;gBACR,CAAC;qBAAM,CAAC;oBACN,MAAM;gBACR,CAAC;YACH,CAAC;YAED,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,KAAK,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;QAED,6BAA6B;QAC7B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAC/D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAC/D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAE/D,OAAO,CAAC,CAAC;IACX,CAAC;IAED;;;;;OAKG;IACK,cAAc,CAAC,KAAiB;QACtC,8CAA8C;QAC9C,MAAM,IAAI,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAE9B,6DAA6D;QAC7D,OAAO,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED;;;;;;;;;;;;;OAaG;IACK,kBAAkB,CAAC,IAAY;QACrC,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QAEpB,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QAE9C,gEAAgE;QAChE,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,oEAAoE;QACpE,8DAA8D;QAC9D,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACzC,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,gBAAgB,GAAG,CAAC,CAAC;QAEzB,IAAI,aAAa,EAAE,CAAC;YAClB,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;gBAChC,gBAAgB,IAAI,GAAG,CAAC,MAAM,CAAC;gBAC/B,+CAA+C;gBAC/C,gBAAgB,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CACzB,sEAAsE,CACvE,CAAC;QACF,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,aAAa,GAAG,CAAC,CAAC;QAEtB,IAAI,QAAQ,EAAE,CAAC;YACb,aAAa,GAAG,QAAQ,CAAC,MAAM,CAAC;YAEhC,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;gBACtC,yCAAyC;gBACzC,+BAA+B;gBAC/B,oCAAoC;gBACpC,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,GAAG,CAAC,CAAC;YACjD,CAAC;iBAAM,CAAC;gBACN,uDAAuD;gBACvD,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,GAAG,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,eAAe,GAAG,CAAC,CAAC;QAExB,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YACzC,IAAI,MAAM,EAAE,CAAC;gBACX,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;gBAC3B,qCAAqC;gBACrC,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,aAAa,GAAG,UAAU,CAAC;QACxE,MAAM,cAAc,GAAG,UAAU,GAAG,iBAAiB,CAAC;QAEtD,wDAAwD;QACxD,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;YACvB,QAAQ,GAAG,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;YAE7D,4CAA4C;YAC5C,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAC1C,IAAI,cAAc,EAAE,CAAC;gBACnB,QAAQ,IAAI,cAAc,CAAC,MAAM,GAAG,GAAG,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,qCAAqC;QACrC,QAAQ,IAAI,gBAAgB,CAAC;QAC7B,QAAQ,IAAI,aAAa,CAAC;QAC1B,QAAQ,IAAI,eAAe,CAAC;QAE5B,wEAAwE;QACxE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;YACzC,MAAM,aAAa,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,UAAU,CAAC;YAC7D,kDAAkD;YAClD,MAAM,WAAW,GAAG,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;YACtE,QAAQ,IAAI,WAAW,GAAG,aAAa,GAAG,IAAI,CAAC;QACjD,CAAC;QAED,yDAAyD;QACzD,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,KAAK,YAAY,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;YAC5D,MAAM,WAAW,GAAG,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;YACtE,QAAQ,IAAI,WAAW,GAAG,IAAI,CAAC;QACjC,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC3C,CAAC;CACF"}