@hyvmind/tiktoken-ts 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +557 -0
  3. package/dist/bpe.d.ts +171 -0
  4. package/dist/bpe.d.ts.map +1 -0
  5. package/dist/bpe.js +478 -0
  6. package/dist/bpe.js.map +1 -0
  7. package/dist/core/byte-pair-encoding.d.ts +49 -0
  8. package/dist/core/byte-pair-encoding.d.ts.map +1 -0
  9. package/dist/core/byte-pair-encoding.js +154 -0
  10. package/dist/core/byte-pair-encoding.js.map +1 -0
  11. package/dist/core/encoding-definitions.d.ts +95 -0
  12. package/dist/core/encoding-definitions.d.ts.map +1 -0
  13. package/dist/core/encoding-definitions.js +202 -0
  14. package/dist/core/encoding-definitions.js.map +1 -0
  15. package/dist/core/index.d.ts +12 -0
  16. package/dist/core/index.d.ts.map +1 -0
  17. package/dist/core/index.js +17 -0
  18. package/dist/core/index.js.map +1 -0
  19. package/dist/core/model-to-encoding.d.ts +36 -0
  20. package/dist/core/model-to-encoding.d.ts.map +1 -0
  21. package/dist/core/model-to-encoding.js +299 -0
  22. package/dist/core/model-to-encoding.js.map +1 -0
  23. package/dist/core/tiktoken.d.ts +126 -0
  24. package/dist/core/tiktoken.d.ts.map +1 -0
  25. package/dist/core/tiktoken.js +295 -0
  26. package/dist/core/tiktoken.js.map +1 -0
  27. package/dist/core/vocab-loader.d.ts +77 -0
  28. package/dist/core/vocab-loader.d.ts.map +1 -0
  29. package/dist/core/vocab-loader.js +176 -0
  30. package/dist/core/vocab-loader.js.map +1 -0
  31. package/dist/encodings/cl100k-base.d.ts +43 -0
  32. package/dist/encodings/cl100k-base.d.ts.map +1 -0
  33. package/dist/encodings/cl100k-base.js +142 -0
  34. package/dist/encodings/cl100k-base.js.map +1 -0
  35. package/dist/encodings/claude-estimation.d.ts +136 -0
  36. package/dist/encodings/claude-estimation.d.ts.map +1 -0
  37. package/dist/encodings/claude-estimation.js +160 -0
  38. package/dist/encodings/claude-estimation.js.map +1 -0
  39. package/dist/encodings/index.d.ts +9 -0
  40. package/dist/encodings/index.d.ts.map +1 -0
  41. package/dist/encodings/index.js +13 -0
  42. package/dist/encodings/index.js.map +1 -0
  43. package/dist/encodings/o200k-base.d.ts +58 -0
  44. package/dist/encodings/o200k-base.d.ts.map +1 -0
  45. package/dist/encodings/o200k-base.js +191 -0
  46. package/dist/encodings/o200k-base.js.map +1 -0
  47. package/dist/encodings/p50k-base.d.ts +44 -0
  48. package/dist/encodings/p50k-base.d.ts.map +1 -0
  49. package/dist/encodings/p50k-base.js +64 -0
  50. package/dist/encodings/p50k-base.js.map +1 -0
  51. package/dist/index.d.ts +61 -0
  52. package/dist/index.d.ts.map +1 -0
  53. package/dist/index.js +109 -0
  54. package/dist/index.js.map +1 -0
  55. package/dist/models.d.ts +92 -0
  56. package/dist/models.d.ts.map +1 -0
  57. package/dist/models.js +320 -0
  58. package/dist/models.js.map +1 -0
  59. package/dist/tiktoken.d.ts +198 -0
  60. package/dist/tiktoken.d.ts.map +1 -0
  61. package/dist/tiktoken.js +331 -0
  62. package/dist/tiktoken.js.map +1 -0
  63. package/dist/tokenizer.d.ts +181 -0
  64. package/dist/tokenizer.d.ts.map +1 -0
  65. package/dist/tokenizer.js +436 -0
  66. package/dist/tokenizer.js.map +1 -0
  67. package/dist/types.d.ts +127 -0
  68. package/dist/types.d.ts.map +1 -0
  69. package/dist/types.js +6 -0
  70. package/dist/types.js.map +1 -0
  71. package/dist/utils.d.ts +152 -0
  72. package/dist/utils.d.ts.map +1 -0
  73. package/dist/utils.js +244 -0
  74. package/dist/utils.js.map +1 -0
  75. package/package.json +78 -0
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Tokenizer Utilities
3
+ * Helper functions for tokenization
4
+ */
5
+ /**
6
+ * Convert a string to UTF-8 bytes
7
+ *
8
+ * @param text - Input text
9
+ * @returns Uint8Array of UTF-8 bytes
10
+ */
11
+ export declare function stringToBytes(text: string): Uint8Array;
12
+ /**
13
+ * Convert UTF-8 bytes to string
14
+ *
15
+ * @param bytes - UTF-8 bytes
16
+ * @returns Decoded string
17
+ */
18
+ export declare function bytesToString(bytes: Uint8Array | number[]): string;
19
+ /**
20
+ * Convert a byte to its hexadecimal representation
21
+ *
22
+ * @param byte - Single byte value (0-255)
23
+ * @returns Two-character hex string
24
+ */
25
+ export declare function byteToHex(byte: number): string;
26
+ /**
27
+ * Convert hexadecimal string to byte
28
+ *
29
+ * @param hex - Two-character hex string
30
+ * @returns Byte value (0-255)
31
+ */
32
+ export declare function hexToByte(hex: string): number;
33
+ /**
34
+ * Check if a character is a whitespace character
35
+ *
36
+ * @param char - Single character
37
+ * @returns True if whitespace
38
+ */
39
+ export declare function isWhitespace(char: string): boolean;
40
+ /**
41
+ * Check if a character is a letter
42
+ *
43
+ * @param char - Single character
44
+ * @returns True if letter
45
+ */
46
+ export declare function isLetter(char: string): boolean;
47
+ /**
48
+ * Check if a character is a digit
49
+ *
50
+ * @param char - Single character
51
+ * @returns True if digit
52
+ */
53
+ export declare function isDigit(char: string): boolean;
54
+ /**
55
+ * Check if a character is punctuation
56
+ *
57
+ * @param char - Single character
58
+ * @returns True if punctuation
59
+ */
60
+ export declare function isPunctuation(char: string): boolean;
61
+ /**
62
+ * Check if a string contains only ASCII characters
63
+ *
64
+ * @param text - Input text
65
+ * @returns True if ASCII only
66
+ */
67
+ export declare function isAscii(text: string): boolean;
68
+ /**
69
+ * Count the number of Unicode code points in a string
70
+ * Handles surrogate pairs correctly
71
+ *
72
+ * @param text - Input text
73
+ * @returns Number of code points
74
+ */
75
+ export declare function countCodePoints(text: string): number;
76
+ /**
77
+ * Split text into words using Unicode-aware boundaries
78
+ *
79
+ * @param text - Input text
80
+ * @returns Array of words and whitespace
81
+ */
82
+ export declare function splitIntoWords(text: string): string[];
83
+ /**
84
+ * Escape special regex characters in a string
85
+ *
86
+ * @param text - Input text
87
+ * @returns Escaped string safe for regex
88
+ */
89
+ export declare function escapeRegex(text: string): string;
90
+ /**
91
+ * Clamp a number between min and max values
92
+ *
93
+ * @param value - Value to clamp
94
+ * @param min - Minimum value
95
+ * @param max - Maximum value
96
+ * @returns Clamped value
97
+ */
98
+ export declare function clamp(value: number, min: number, max: number): number;
99
+ /**
100
+ * Calculate the percentage of a value
101
+ *
102
+ * @param value - Base value
103
+ * @param percentage - Percentage (0-1)
104
+ * @returns Calculated percentage value
105
+ */
106
+ export declare function percentage(value: number, pct: number): number;
107
+ /**
108
+ * Create a hash from a string (for vocabulary lookup)
109
+ * Simple FNV-1a hash for performance
110
+ *
111
+ * @param text - Input text
112
+ * @returns 32-bit hash value
113
+ */
114
+ export declare function hashString(text: string): number;
115
+ /**
116
+ * Count occurrences of a substring in text
117
+ *
118
+ * @param text - Text to search in
119
+ * @param substring - Substring to count
120
+ * @returns Number of occurrences
121
+ */
122
+ export declare function countOccurrences(text: string, substring: string): number;
123
+ /**
124
+ * Normalize whitespace in text (collapse multiple spaces)
125
+ *
126
+ * @param text - Input text
127
+ * @returns Text with normalized whitespace
128
+ */
129
+ export declare function normalizeWhitespace(text: string): string;
130
+ /**
131
+ * Check if text contains CJK (Chinese/Japanese/Korean) characters
132
+ *
133
+ * @param text - Input text
134
+ * @returns True if contains CJK characters
135
+ */
136
+ export declare function containsCJK(text: string): boolean;
137
+ /**
138
+ * Check if text contains emoji
139
+ *
140
+ * @param text - Input text
141
+ * @returns True if contains emoji
142
+ */
143
+ export declare function containsEmoji(text: string): boolean;
144
+ /**
145
+ * Estimate token multiplier based on text characteristics
146
+ * Used for non-English text where token count can vary significantly
147
+ *
148
+ * @param text - Input text
149
+ * @returns Multiplier to apply to base token estimate
150
+ */
151
+ export declare function getTextComplexityMultiplier(text: string): number;
152
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,CAGtD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM,EAAE,GAAG,MAAM,CAKlE;AAED;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE9C;AAED;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7C;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAElD;AAED;;;;;GAKG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAE9C;AAED;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAE7C;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAG7C;AAED;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGpD;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAyBrD;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD;AAED;;;;;;;GAOG;AACH,wBAAgB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAErE;AAED;;;;;;GAMG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAE7D;AAED;;;;;;GAMG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAU/C;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAYxE;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAExD;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAKjD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAmBhE"}
package/dist/utils.js ADDED
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Tokenizer Utilities
3
+ * Helper functions for tokenization
4
+ */
5
+ /**
6
+ * Convert a string to UTF-8 bytes
7
+ *
8
+ * @param text - Input text
9
+ * @returns Uint8Array of UTF-8 bytes
10
+ */
11
+ export function stringToBytes(text) {
12
+ const encoder = new TextEncoder();
13
+ return encoder.encode(text);
14
+ }
15
+ /**
16
+ * Convert UTF-8 bytes to string
17
+ *
18
+ * @param bytes - UTF-8 bytes
19
+ * @returns Decoded string
20
+ */
21
+ export function bytesToString(bytes) {
22
+ const decoder = new TextDecoder("utf-8", { fatal: false });
23
+ return decoder.decode(bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes));
24
+ }
25
+ /**
26
+ * Convert a byte to its hexadecimal representation
27
+ *
28
+ * @param byte - Single byte value (0-255)
29
+ * @returns Two-character hex string
30
+ */
31
+ export function byteToHex(byte) {
32
+ return byte.toString(16).padStart(2, "0");
33
+ }
34
+ /**
35
+ * Convert hexadecimal string to byte
36
+ *
37
+ * @param hex - Two-character hex string
38
+ * @returns Byte value (0-255)
39
+ */
40
+ export function hexToByte(hex) {
41
+ return parseInt(hex, 16);
42
+ }
43
+ /**
44
+ * Check if a character is a whitespace character
45
+ *
46
+ * @param char - Single character
47
+ * @returns True if whitespace
48
+ */
49
+ export function isWhitespace(char) {
50
+ return /\s/.test(char);
51
+ }
52
+ /**
53
+ * Check if a character is a letter
54
+ *
55
+ * @param char - Single character
56
+ * @returns True if letter
57
+ */
58
+ export function isLetter(char) {
59
+ return /\p{L}/u.test(char);
60
+ }
61
+ /**
62
+ * Check if a character is a digit
63
+ *
64
+ * @param char - Single character
65
+ * @returns True if digit
66
+ */
67
+ export function isDigit(char) {
68
+ return /\p{N}/u.test(char);
69
+ }
70
+ /**
71
+ * Check if a character is punctuation
72
+ *
73
+ * @param char - Single character
74
+ * @returns True if punctuation
75
+ */
76
+ export function isPunctuation(char) {
77
+ return /\p{P}/u.test(char);
78
+ }
79
+ /**
80
+ * Check if a string contains only ASCII characters
81
+ *
82
+ * @param text - Input text
83
+ * @returns True if ASCII only
84
+ */
85
+ export function isAscii(text) {
86
+ // eslint-disable-next-line no-control-regex
87
+ return /^[\x00-\x7F]*$/.test(text);
88
+ }
89
+ /**
90
+ * Count the number of Unicode code points in a string
91
+ * Handles surrogate pairs correctly
92
+ *
93
+ * @param text - Input text
94
+ * @returns Number of code points
95
+ */
96
+ export function countCodePoints(text) {
97
+ // Using the spread operator to handle surrogate pairs
98
+ return [...text].length;
99
+ }
100
+ /**
101
+ * Split text into words using Unicode-aware boundaries
102
+ *
103
+ * @param text - Input text
104
+ * @returns Array of words and whitespace
105
+ */
106
+ export function splitIntoWords(text) {
107
+ // Split on word boundaries while preserving whitespace
108
+ const segments = [];
109
+ let current = "";
110
+ let inWord = false;
111
+ for (const char of text) {
112
+ const charIsWord = isLetter(char) || isDigit(char) || char === "'";
113
+ if (charIsWord !== inWord) {
114
+ if (current) {
115
+ segments.push(current);
116
+ }
117
+ current = char;
118
+ inWord = charIsWord;
119
+ }
120
+ else {
121
+ current += char;
122
+ }
123
+ }
124
+ if (current) {
125
+ segments.push(current);
126
+ }
127
+ return segments;
128
+ }
129
+ /**
130
+ * Escape special regex characters in a string
131
+ *
132
+ * @param text - Input text
133
+ * @returns Escaped string safe for regex
134
+ */
135
+ export function escapeRegex(text) {
136
+ return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
137
+ }
138
+ /**
139
+ * Clamp a number between min and max values
140
+ *
141
+ * @param value - Value to clamp
142
+ * @param min - Minimum value
143
+ * @param max - Maximum value
144
+ * @returns Clamped value
145
+ */
146
+ export function clamp(value, min, max) {
147
+ return Math.min(Math.max(value, min), max);
148
+ }
149
+ /**
150
+ * Calculate the percentage of a value
151
+ *
152
+ * @param value - Base value
153
+ * @param percentage - Percentage (0-1)
154
+ * @returns Calculated percentage value
155
+ */
156
+ export function percentage(value, pct) {
157
+ return Math.round(value * pct);
158
+ }
159
+ /**
160
+ * Create a hash from a string (for vocabulary lookup)
161
+ * Simple FNV-1a hash for performance
162
+ *
163
+ * @param text - Input text
164
+ * @returns 32-bit hash value
165
+ */
166
+ export function hashString(text) {
167
+ let hash = 0x811c9dc5; // FNV offset basis
168
+ const bytes = stringToBytes(text);
169
+ for (const byte of bytes) {
170
+ hash ^= byte;
171
+ hash = Math.imul(hash, 0x01000193); // FNV prime
172
+ }
173
+ return hash >>> 0; // Convert to unsigned
174
+ }
175
+ /**
176
+ * Count occurrences of a substring in text
177
+ *
178
+ * @param text - Text to search in
179
+ * @param substring - Substring to count
180
+ * @returns Number of occurrences
181
+ */
182
+ export function countOccurrences(text, substring) {
183
+ if (!substring)
184
+ return 0;
185
+ let count = 0;
186
+ let position = 0;
187
+ while ((position = text.indexOf(substring, position)) !== -1) {
188
+ count++;
189
+ position += substring.length;
190
+ }
191
+ return count;
192
+ }
193
+ /**
194
+ * Normalize whitespace in text (collapse multiple spaces)
195
+ *
196
+ * @param text - Input text
197
+ * @returns Text with normalized whitespace
198
+ */
199
+ export function normalizeWhitespace(text) {
200
+ return text.replace(/\s+/g, " ").trim();
201
+ }
202
+ /**
203
+ * Check if text contains CJK (Chinese/Japanese/Korean) characters
204
+ *
205
+ * @param text - Input text
206
+ * @returns True if contains CJK characters
207
+ */
208
+ export function containsCJK(text) {
209
+ // CJK Unified Ideographs and related blocks
210
+ return /[\u4e00-\u9fff\u3400-\u4dbf\u{20000}-\u{2a6df}\u{2a700}-\u{2b73f}\u{2b740}-\u{2b81f}\u{2b820}-\u{2ceaf}\u{2ceb0}-\u{2ebef}\u{30000}-\u{3134f}\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/u.test(text);
211
+ }
212
+ /**
213
+ * Check if text contains emoji
214
+ *
215
+ * @param text - Input text
216
+ * @returns True if contains emoji
217
+ */
218
+ export function containsEmoji(text) {
219
+ return /\p{Emoji}/u.test(text);
220
+ }
221
+ /**
222
+ * Estimate token multiplier based on text characteristics
223
+ * Used for non-English text where token count can vary significantly
224
+ *
225
+ * @param text - Input text
226
+ * @returns Multiplier to apply to base token estimate
227
+ */
228
+ export function getTextComplexityMultiplier(text) {
229
+ let multiplier = 1.0;
230
+ // CJK characters typically use more tokens per character
231
+ if (containsCJK(text)) {
232
+ multiplier *= 1.5;
233
+ }
234
+ // Emoji can use multiple tokens
235
+ if (containsEmoji(text)) {
236
+ multiplier *= 1.2;
237
+ }
238
+ // Non-ASCII text generally uses more tokens
239
+ if (!isAscii(text)) {
240
+ multiplier *= 1.1;
241
+ }
242
+ return multiplier;
243
+ }
244
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;IAClC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,KAA4B;IACxD,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3D,OAAO,OAAO,CAAC,MAAM,CACnB,KAAK,YAAY,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAC5D,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,OAAO,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAC5C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAC,IAAY;IAClC,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAC,IAAY;IAClC,4CAA4C;IAC5C,OAAO,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACrC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,sDAAsD;IACtD,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,uDAAuD;IACvD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,KAAK,GAAG,CAAC;QAEnE,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,OAAO,EAAE,CAAC;gBACZ,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;YACD,OAAO,GAAG,IAAI,CAAC;YACf,MAAM,GAAG,UAAU,CAAC;QACtB,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,IAAI,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,KAAK,CAAC,KAAa,EAAE,GAAW,EAAE,GAAW;IAC3D,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;AAC7C,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,UAAU,CAAC,KAAa,EAAE,GAAW;IACnD,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;AACjC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,IAAI,IAAI,GAAG,UAAU,CAAC,CAAC,mBAAmB;IAC1C,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,IAAI,CAAC;QACb,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,YAAY;IAClD,CAAC;IAED,OAAO,IAAI,KAAK,CAAC,CAAC,CAAC,sBAAsB;AAC3C,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY,EAAE,SAAiB;IAC9D,IAAI,CAAC,SAAS;QAAE,OAAO,CAAC,CAAC;IAEzB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QAC7D,KAAK,EAAE,CAAC;QACR,QAAQ,IAAI,SAAS,CAAC,MAAM,CAAC;IAC/B,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC1C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,4CAA4C;IAC5C,OAAO,wLAAwL,CAAC,IAAI,CAClM,IAAI,CACL,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,2BAA2B,CAAC,IAAY;IACtD,IAAI,UAAU,GAAG,GAAG,CAAC;IAErB,yDAAyD;IACzD,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,UAAU,IAAI,GAAG,CAAC;IACpB,CAAC;IAED,gCAAgC;IAChC,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,UAAU,IAAI,GAAG,CAAC;IACpB,CAAC;IAED,4CAA4C;IAC5C,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACnB,UAAU,IAAI,GAAG,CAAC;IACpB,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC"}
package/package.json ADDED
@@ -0,0 +1,78 @@
1
+ {
2
+ "name": "@hyvmind/tiktoken-ts",
3
+ "version": "0.0.1",
4
+ "sideEffects": false,
5
+ "description": "A pure TypeScript implementation of OpenAI's tiktoken tokenizer, compatible with tiktoken-rs",
6
+ "author": "HyvMind",
7
+ "license": "MIT",
8
+ "type": "module",
9
+ "main": "./dist/index.js",
10
+ "module": "./dist/index.js",
11
+ "types": "./dist/index.d.ts",
12
+ "exports": {
13
+ ".": {
14
+ "import": "./dist/index.js",
15
+ "types": "./dist/index.d.ts"
16
+ },
17
+ "./encodings": {
18
+ "import": "./dist/encodings/index.js",
19
+ "types": "./dist/encodings/index.d.ts"
20
+ }
21
+ },
22
+ "files": [
23
+ "dist",
24
+ "README.md",
25
+ "LICENSE"
26
+ ],
27
+ "keywords": [
28
+ "tiktoken",
29
+ "tokenizer",
30
+ "bpe",
31
+ "openai",
32
+ "gpt",
33
+ "gpt-4",
34
+ "gpt-4o",
35
+ "claude",
36
+ "llm",
37
+ "tokens",
38
+ "encoding"
39
+ ],
40
+ "repository": {
41
+ "type": "git",
42
+ "url": "git+https://github.com/hyvmind-io/tiktoken-ts.git"
43
+ },
44
+ "bugs": {
45
+ "url": "https://github.com/hyvmind-io/tiktoken-ts/issues"
46
+ },
47
+ "homepage": "https://github.com/hyvmind-io/tiktoken-ts#readme",
48
+ "engines": {
49
+ "node": ">=18.0.0"
50
+ },
51
+ "devDependencies": {
52
+ "@eslint/js": "^9.39.2",
53
+ "@types/node": "^22.0.0",
54
+ "@typescript-eslint/eslint-plugin": "^8.0.0",
55
+ "@typescript-eslint/parser": "^8.0.0",
56
+ "@vitest/coverage-v8": "^2.0.0",
57
+ "eslint": "^9.0.0",
58
+ "eslint-config-prettier": "^9.0.0",
59
+ "prettier": "^3.0.0",
60
+ "typescript": "^5.6.0",
61
+ "typescript-eslint": "^8.54.0",
62
+ "vitest": "^2.0.0"
63
+ },
64
+ "scripts": {
65
+ "clean": "rm -rf dist",
66
+ "prebuild": "pnpm run clean",
67
+ "build": "tsc",
68
+ "dev": "tsc --watch",
69
+ "typecheck": "tsc --noEmit",
70
+ "test": "vitest run",
71
+ "test:watch": "vitest",
72
+ "test:coverage": "vitest run --coverage",
73
+ "lint": "eslint src --ext .ts",
74
+ "lint:fix": "eslint src --ext .ts --fix",
75
+ "format": "prettier --write \"src/**/*.ts\"",
76
+ "format:check": "prettier --check \"src/**/*.ts\""
77
+ }
78
+ }