@jeffreycao/copilot-api 1.9.0-beta.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +10 -0
  2. package/README.zh-CN.md +10 -0
  3. package/dist/{auth-DCB53u05.js → auth-C3UbfaIB.js} +6 -6
  4. package/dist/{auth-DCB53u05.js.map → auth-C3UbfaIB.js.map} +1 -1
  5. package/dist/{check-usage-iKUAIfc_.js → check-usage-CELArOu5.js} +6 -6
  6. package/dist/{check-usage-iKUAIfc_.js.map → check-usage-CELArOu5.js.map} +1 -1
  7. package/dist/{config-D3dkAXQE.js → config-BQvWqYh_.js} +3 -3
  8. package/dist/{config-D3dkAXQE.js.map → config-BQvWqYh_.js.map} +1 -1
  9. package/dist/{debug-CUx-7_jF.js → debug-DcC7ZPH0.js} +4 -4
  10. package/dist/{debug-CUx-7_jF.js.map → debug-DcC7ZPH0.js.map} +1 -1
  11. package/dist/main.js +6 -6
  12. package/dist/{paths-DvrimVju.js → paths-Cla6y5eD.js} +2 -2
  13. package/dist/{paths-DvrimVju.js.map → paths-Cla6y5eD.js.map} +1 -1
  14. package/dist/{server-BGqhE4N_.js → server-CA39Gy7x.js} +45 -1647
  15. package/dist/server-CA39Gy7x.js.map +1 -0
  16. package/dist/start-DKwPr9vl.js +268 -0
  17. package/dist/start-DKwPr9vl.js.map +1 -0
  18. package/dist/{token-BCdEIfN_.js → token-CIwAFD_6.js} +9 -5
  19. package/dist/token-CIwAFD_6.js.map +1 -0
  20. package/dist/{utils-DXuuBUT_.js → utils-DG6CB51Z.js} +4 -5
  21. package/dist/{utils-DXuuBUT_.js.map → utils-DG6CB51Z.js.map} +1 -1
  22. package/package.json +2 -1
  23. package/dist/GptEncoding-TiCgsNrQ.js +0 -887
  24. package/dist/GptEncoding-TiCgsNrQ.js.map +0 -1
  25. package/dist/chunk-BZ41Y9eH.js +0 -39
  26. package/dist/cl100k_base-kiDWemr-.js +0 -101375
  27. package/dist/cl100k_base-kiDWemr-.js.map +0 -1
  28. package/dist/dist-B3jIqeb6.js +0 -961
  29. package/dist/dist-B3jIqeb6.js.map +0 -1
  30. package/dist/dist-CSEoNgAt.js +0 -334
  31. package/dist/dist-CSEoNgAt.js.map +0 -1
  32. package/dist/o200k_base-BlrOP1Jc.js +0 -204726
  33. package/dist/o200k_base-BlrOP1Jc.js.map +0 -1
  34. package/dist/p50k_base-DA6KYT1Z.js +0 -11
  35. package/dist/p50k_base-DA6KYT1Z.js.map +0 -1
  36. package/dist/p50k_base-tS38LhIO.js +0 -50483
  37. package/dist/p50k_base-tS38LhIO.js.map +0 -1
  38. package/dist/p50k_edit-BlA1HcF4.js +0 -11
  39. package/dist/p50k_edit-BlA1HcF4.js.map +0 -1
  40. package/dist/prompt-CyV2d7YW.js +0 -850
  41. package/dist/prompt-CyV2d7YW.js.map +0 -1
  42. package/dist/r50k_base-CaCWe6-8.js +0 -50466
  43. package/dist/r50k_base-CaCWe6-8.js.map +0 -1
  44. package/dist/registry-B_2v83L7.js +0 -797
  45. package/dist/registry-B_2v83L7.js.map +0 -1
  46. package/dist/server-BGqhE4N_.js.map +0 -1
  47. package/dist/start-Cxl5pgll.js +0 -27379
  48. package/dist/start-Cxl5pgll.js.map +0 -1
  49. package/dist/token-BCdEIfN_.js.map +0 -1
@@ -1,887 +0,0 @@
1
- import { __export } from "./chunk-BZ41Y9eH.js";
2
-
3
- //#region node_modules/gpt-tokenizer/esm/constants.js
4
- const ALL_SPECIAL_TOKENS = "all";
5
- const DEFAULT_MERGE_CACHE_SIZE = 1e5;
6
-
7
- //#endregion
8
- //#region node_modules/gpt-tokenizer/esm/utfUtil.js
9
- const isAscii = (codePoint) => codePoint <= 127;
10
- const HIGH_SURROGATE_START = 55296;
11
- const HIGH_SURROGATE_END = 56319;
12
- function endsWithIncompleteUtfPairSurrogate(string) {
13
- if (string.length === 0) return false;
14
- const lastCharCode = string.charCodeAt(string.length - 1);
15
- return lastCharCode >= HIGH_SURROGATE_START && lastCharCode <= HIGH_SURROGATE_END;
16
- }
17
- function isValidUTF8(bytes) {
18
- let i = 0;
19
- while (i < bytes.length) {
20
- const byte1 = bytes[i];
21
- let numBytes = 0;
22
- let codePoint = 0;
23
- if (byte1 <= 127) {
24
- numBytes = 1;
25
- codePoint = byte1;
26
- } else if ((byte1 & 224) === 192) {
27
- numBytes = 2;
28
- codePoint = byte1 & 31;
29
- if (byte1 <= 193) return false;
30
- } else if ((byte1 & 240) === 224) {
31
- numBytes = 3;
32
- codePoint = byte1 & 15;
33
- } else if ((byte1 & 248) === 240) {
34
- numBytes = 4;
35
- codePoint = byte1 & 7;
36
- if (byte1 > 244) return false;
37
- } else return false;
38
- if (i + numBytes > bytes.length) return false;
39
- for (let j = 1; j < numBytes; j++) {
40
- const byte = bytes[i + j];
41
- if (byte === void 0 || (byte & 192) !== 128) return false;
42
- codePoint = codePoint << 6 | byte & 63;
43
- }
44
- if (numBytes === 2 && codePoint < 128) return false;
45
- if (numBytes === 3 && codePoint < 2048) return false;
46
- if (numBytes === 4 && codePoint < 65536) return false;
47
- if (codePoint >= 55296 && codePoint <= 57343) return false;
48
- if (codePoint > 1114111) return false;
49
- i += numBytes;
50
- }
51
- return true;
52
- }
53
- const textDecoder = new TextDecoder("utf8", { fatal: false });
54
- function tryConvertToString(arr) {
55
- if (!isValidUTF8(arr)) return;
56
- return textDecoder.decode(arr);
57
- }
58
- function compareUint8Arrays(a, b) {
59
- const len = Math.min(a.length, b.length);
60
- for (let i = 0; i < len; i++) if (a[i] !== b[i]) return a[i] - b[i];
61
- return a.length - b.length;
62
- }
63
-
64
- //#endregion
65
- //#region node_modules/gpt-tokenizer/esm/util.js
66
- function getMaxValueFromMap(map) {
67
- let max = 0;
68
- map.forEach((val) => {
69
- max = Math.max(max, val);
70
- });
71
- return max;
72
- }
73
- function escapeRegExp(string) {
74
- return string.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&");
75
- }
76
- function getSpecialTokenRegex(tokens) {
77
- const inner = [...tokens].map(escapeRegExp).join("|");
78
- return /* @__PURE__ */ new RegExp(`(${inner})`);
79
- }
80
-
81
- //#endregion
82
- //#region node_modules/gpt-tokenizer/esm/BytePairEncodingCore.js
83
- const emptyBuffer = new Uint8Array(0);
84
- const decoder = new TextDecoder("utf8");
85
- var BytePairEncodingCore = class {
86
- mergeableBytePairRankCount;
87
- /**
88
- * an array where the index is the BPE rank,
89
- * and the value is the string or the array of bytes that it decodes to
90
- * it may contain holes if token is unused
91
- */
92
- bytePairRankDecoder;
93
- bytePairNonUtfRankDecoder = /* @__PURE__ */ new Map();
94
- bytePairNonUtfSortedEncoder;
95
- /**
96
- * a reverse map of the bytePairRankDecoder,
97
- * where the key is the string and the value is the rank
98
- * values that cannot be represented as a string are present in `bytePairNonUtfSortedEncoder`
99
- */
100
- bytePairStringRankEncoder;
101
- tokenSplitRegex;
102
- specialTokensEncoder;
103
- specialTokensDecoder;
104
- specialTokenPatternRegex;
105
- textEncoder = new TextEncoder();
106
- mergeCache;
107
- mergeCacheSize;
108
- constructor({ bytePairRankDecoder, specialTokensEncoder, tokenSplitRegex, mergeCacheSize = DEFAULT_MERGE_CACHE_SIZE }) {
109
- this.bytePairRankDecoder = bytePairRankDecoder;
110
- this.bytePairStringRankEncoder = /* @__PURE__ */ new Map();
111
- this.mergeCacheSize = mergeCacheSize;
112
- if (mergeCacheSize > 0) this.mergeCache = /* @__PURE__ */ new Map();
113
- this.mergeableBytePairRankCount = Object.keys(bytePairRankDecoder).length;
114
- const binaryLookup = [];
115
- bytePairRankDecoder.forEach((value, rank) => {
116
- if (typeof value === "string") {
117
- this.bytePairStringRankEncoder.set(value, rank);
118
- return;
119
- }
120
- const byteArray = new Uint8Array(value);
121
- binaryLookup.push([byteArray, rank]);
122
- this.bytePairNonUtfRankDecoder.set(rank, byteArray);
123
- });
124
- this.bytePairNonUtfSortedEncoder = binaryLookup.sort((a, b) => compareUint8Arrays(a[0], b[0]));
125
- this.specialTokensEncoder = specialTokensEncoder ?? /* @__PURE__ */ new Map();
126
- this.specialTokensDecoder = specialTokensEncoder ? new Map([...specialTokensEncoder].map(([key, value]) => [value, key])) : /* @__PURE__ */ new Map();
127
- this.tokenSplitRegex = tokenSplitRegex;
128
- const allSpecialTokensRegex = [...this.specialTokensEncoder.keys()].map(escapeRegExp).join("|");
129
- try {
130
- this.specialTokenPatternRegex = new RegExp(allSpecialTokensRegex, "y");
131
- } catch {
132
- throw new Error("Invalid regular expression pattern.");
133
- }
134
- }
135
- setMergeCacheSize(newSize) {
136
- if (this.mergeCacheSize === 0 && newSize > 0) this.mergeCache = /* @__PURE__ */ new Map();
137
- this.mergeCacheSize = newSize;
138
- if (newSize === 0) this.mergeCache = void 0;
139
- }
140
- clearMergeCache() {
141
- this.mergeCache?.clear();
142
- }
143
- *encodeNativeGenerator(text, allowedSpecial) {
144
- let startIndex = 0;
145
- let lastTokenLength = 0;
146
- while (true) {
147
- const nextSpecialMatch = this.findNextSpecialToken(text, allowedSpecial, startIndex);
148
- const nextSpecialStartIndex = nextSpecialMatch?.[0];
149
- const endIndex = nextSpecialStartIndex ?? text.length;
150
- const textBeforeSpecial = startIndex === 0 && endIndex === text.length ? text : text.slice(startIndex, endIndex);
151
- for (const [match] of textBeforeSpecial.matchAll(this.tokenSplitRegex)) {
152
- const token = this.getBpeRankFromString(match);
153
- if (token !== void 0) {
154
- lastTokenLength = 1;
155
- yield [token];
156
- continue;
157
- }
158
- const tokens = this.bytePairEncode(match);
159
- lastTokenLength = tokens.length;
160
- yield tokens;
161
- }
162
- if (nextSpecialStartIndex !== void 0) {
163
- const specialToken = nextSpecialMatch[1];
164
- const specialTokenValue = this.specialTokensEncoder.get(specialToken);
165
- if (specialTokenValue === void 0) throw new Error(`Special token "${specialToken}" is not in the special token encoder.`);
166
- yield [specialTokenValue];
167
- startIndex = nextSpecialStartIndex + specialToken.length;
168
- lastTokenLength = 1;
169
- } else break;
170
- }
171
- return lastTokenLength;
172
- }
173
- encodeNative(text, allowedSpecial) {
174
- let startIndex = 0;
175
- const tokensArray = [];
176
- while (true) {
177
- const nextSpecialMatch = this.findNextSpecialToken(text, allowedSpecial, startIndex);
178
- const nextSpecialStartIndex = nextSpecialMatch?.[0];
179
- const endIndex = nextSpecialStartIndex ?? text.length;
180
- const textBeforeSpecial = startIndex === 0 && endIndex === text.length ? text : text.slice(startIndex, endIndex);
181
- for (const [match] of textBeforeSpecial.matchAll(this.tokenSplitRegex)) {
182
- const token = this.getBpeRankFromString(match);
183
- if (token !== void 0) {
184
- tokensArray.push(token);
185
- continue;
186
- }
187
- const tokens = this.bytePairEncode(match);
188
- tokensArray.push(...tokens);
189
- }
190
- if (nextSpecialStartIndex !== void 0) {
191
- const specialToken = nextSpecialMatch[1];
192
- const specialTokenValue = this.specialTokensEncoder.get(specialToken);
193
- if (specialTokenValue === void 0) throw new Error(`Special token "${specialToken}" is not in the special token encoder.`);
194
- tokensArray.push(specialTokenValue);
195
- startIndex = nextSpecialStartIndex + specialToken.length;
196
- } else break;
197
- }
198
- return tokensArray;
199
- }
200
- countNative(text, allowedSpecial) {
201
- let startIndex = 0;
202
- let tokensCount = 0;
203
- while (true) {
204
- const nextSpecialMatch = this.findNextSpecialToken(text, allowedSpecial, startIndex);
205
- const nextSpecialStartIndex = nextSpecialMatch?.[0];
206
- const endIndex = nextSpecialStartIndex ?? text.length;
207
- const textBeforeSpecial = startIndex === 0 && endIndex === text.length ? text : text.slice(startIndex, endIndex);
208
- for (const [match] of textBeforeSpecial.matchAll(this.tokenSplitRegex)) {
209
- if (this.getBpeRankFromString(match) !== void 0) {
210
- tokensCount++;
211
- continue;
212
- }
213
- const tokens = this.bytePairEncode(match);
214
- tokensCount += tokens.length;
215
- }
216
- if (nextSpecialStartIndex !== void 0) {
217
- const specialToken = nextSpecialMatch[1];
218
- if (this.specialTokensEncoder.get(specialToken) === void 0) throw new Error(`Special token "${specialToken}" is not in the special token encoder.`);
219
- tokensCount++;
220
- startIndex = nextSpecialStartIndex + specialToken.length;
221
- } else break;
222
- }
223
- return tokensCount;
224
- }
225
- *decodeNativeGenerator(tokens) {
226
- for (const token of tokens) {
227
- const tokenBytes = this.tryDecodeToken(token);
228
- if (tokenBytes) yield tokenBytes;
229
- }
230
- }
231
- decodeNative(tokens) {
232
- let decoded = "";
233
- let intBuffer = emptyBuffer;
234
- for (const token of tokens) {
235
- const tokenBytes = this.tryDecodeToken(token);
236
- if (tokenBytes === void 0) throw new Error(`Token ${token} is not in the byte pair encoder.`);
237
- if (typeof tokenBytes === "string") {
238
- if (intBuffer !== emptyBuffer) {
239
- decoded += decoder.decode(intBuffer, { stream: true });
240
- intBuffer = emptyBuffer;
241
- }
242
- decoded += tokenBytes;
243
- } else {
244
- const newBuffer = new Uint8Array(intBuffer.length + tokenBytes.length);
245
- newBuffer.set(intBuffer);
246
- newBuffer.set(tokenBytes, intBuffer.length);
247
- intBuffer = newBuffer;
248
- }
249
- }
250
- if (intBuffer !== emptyBuffer) decoded += decoder.decode(intBuffer, { stream: true });
251
- return decoded;
252
- }
253
- async *decodeNativeAsyncIterable(tokens) {
254
- for await (const token of tokens) {
255
- const tokenBytesOrString = this.tryDecodeToken(token);
256
- if (tokenBytesOrString) yield tokenBytesOrString;
257
- }
258
- }
259
- getBpeRankFromString(key) {
260
- return this.bytePairStringRankEncoder.get(key);
261
- }
262
- getBpeRankFromStringOrThrow(key) {
263
- const value = this.getBpeRankFromString(key);
264
- if (value === void 0) throw new Error(`The byte-pair encoding does not contain a value for: ${key}`);
265
- return value;
266
- }
267
- getBpeRankFromBytes(key) {
268
- const keyAsString = tryConvertToString(key);
269
- if (keyAsString !== void 0) return this.getBpeRankFromString(keyAsString);
270
- const index = this.binarySearch(key);
271
- if (index !== -1) return this.bytePairNonUtfSortedEncoder[index][1];
272
- }
273
- getBpeRankFromBytesOrThrow(key) {
274
- const value = this.getBpeRankFromBytes(key);
275
- if (value === void 0) throw new Error(`The byte-pair encoding does not contain a value for: ${key.toString()}`);
276
- return value;
277
- }
278
- binarySearch(key) {
279
- let low = 0;
280
- let high = this.bytePairNonUtfSortedEncoder.length - 1;
281
- while (low <= high) {
282
- const mid = low + high >>> 1;
283
- const midKey = this.bytePairNonUtfSortedEncoder[mid][0];
284
- let cmp = 0;
285
- const maxLength = Math.min(midKey.length, key.length);
286
- for (let i = 0; i < maxLength; i++) {
287
- cmp = midKey[i] - key[i];
288
- if (cmp !== 0) break;
289
- }
290
- if (cmp === 0) cmp = midKey.length - key.length;
291
- if (cmp === 0) return mid;
292
- if (cmp < 0) low = mid + 1;
293
- else high = mid - 1;
294
- }
295
- return -1;
296
- }
297
- findNextSpecialToken(text, allowedSpecial, startIndex) {
298
- let searchIndex = startIndex;
299
- while (true) {
300
- this.specialTokenPatternRegex.lastIndex = searchIndex;
301
- const nextSpecialMatch = this.specialTokenPatternRegex.exec(text);
302
- if (!nextSpecialMatch) return;
303
- const specialToken = nextSpecialMatch[0];
304
- if (allowedSpecial?.has(specialToken)) return [nextSpecialMatch.index + searchIndex, specialToken];
305
- searchIndex = nextSpecialMatch.index + searchIndex + 1;
306
- }
307
- }
308
- tryDecodeToken(tokenRank) {
309
- const value = this.bytePairRankDecoder[tokenRank];
310
- if (typeof value === "string") return value;
311
- if (typeof value === "object") {
312
- const fromBinary = this.bytePairNonUtfRankDecoder.get(tokenRank);
313
- if (fromBinary) return fromBinary;
314
- }
315
- return this.specialTokensDecoder.get(tokenRank);
316
- }
317
- addToMergeCache(key, value) {
318
- if (!this.mergeCache) return;
319
- if (this.mergeCache.size >= this.mergeCacheSize) {
320
- const firstKey = this.mergeCache.keys().next().value;
321
- this.mergeCache.delete(firstKey);
322
- }
323
- this.mergeCache.set(key, value);
324
- }
325
- bytePairEncode(input) {
326
- if (input.length === 1 && isAscii(input.codePointAt(0))) return [this.getBpeRankFromStringOrThrow(input)];
327
- if (this.mergeCache?.has(input)) {
328
- const result$1 = this.mergeCache.get(input);
329
- this.mergeCache.delete(input);
330
- this.mergeCache.set(input, result$1);
331
- return result$1;
332
- }
333
- const inputBytes = this.textEncoder.encode(input);
334
- const result = this.bytePairMerge(inputBytes);
335
- this.addToMergeCache(input, result);
336
- return result;
337
- }
338
- bytePairMerge(piece) {
339
- const starts = [];
340
- const ranks = [];
341
- const getRank = (startIndex, pairStart = starts[startIndex], pairEnd = starts[startIndex + 2]) => {
342
- if (pairEnd === void 0) return Number.POSITIVE_INFINITY;
343
- const key = piece.subarray(pairStart, pairEnd);
344
- return this.getBpeRankFromBytes(key) ?? Number.POSITIVE_INFINITY;
345
- };
346
- for (let i = 0; i <= piece.length; i++) {
347
- starts.push(i);
348
- if (i < piece.length - 1) ranks.push(getRank(i, i, i + 2));
349
- else ranks.push(Number.POSITIVE_INFINITY);
350
- }
351
- while (starts.length > 1) {
352
- let lowestRank = Number.POSITIVE_INFINITY;
353
- let lowestPartitionIndex = -1;
354
- for (let i = 0; i < ranks.length - 1; i++) {
355
- const rank = ranks[i];
356
- if (rank < lowestRank) {
357
- lowestRank = rank;
358
- lowestPartitionIndex = i;
359
- }
360
- }
361
- if (lowestRank === Number.POSITIVE_INFINITY || lowestPartitionIndex === -1) break;
362
- starts.splice(lowestPartitionIndex + 1, 1);
363
- ranks.splice(lowestPartitionIndex, 1);
364
- ranks[lowestPartitionIndex] = getRank(lowestPartitionIndex);
365
- if (lowestPartitionIndex > 0) ranks[lowestPartitionIndex - 1] = getRank(lowestPartitionIndex - 1);
366
- }
367
- const output = [];
368
- for (let i = 0; i < starts.length - 1; i++) {
369
- const pairStart = starts[i];
370
- const pairEnd = starts[i + 1];
371
- const bpeValue = this.getBpeRankFromBytesOrThrow(piece.subarray(pairStart, pairEnd));
372
- output.push(bpeValue);
373
- }
374
- return output;
375
- }
376
- };
377
-
378
- //#endregion
379
- //#region node_modules/gpt-tokenizer/esm/modelsChatEnabled.gen.js
380
- const chatEnabledModels = [
381
- "chatgpt-4o-latest",
382
- "codex-mini-latest",
383
- "computer-use-preview",
384
- "computer-use-preview-2025-03-11",
385
- "gpt-3.5",
386
- "gpt-3.5-0301",
387
- "gpt-3.5-turbo",
388
- "gpt-3.5-turbo-0125",
389
- "gpt-3.5-turbo-0613",
390
- "gpt-3.5-turbo-1106",
391
- "gpt-3.5-turbo-16k-0613",
392
- "gpt-3.5-turbo-instruct",
393
- "gpt-4",
394
- "gpt-4-0125-preview",
395
- "gpt-4-0314",
396
- "gpt-4-0613",
397
- "gpt-4-1106-preview",
398
- "gpt-4-1106-vision-preview",
399
- "gpt-4-32k",
400
- "gpt-4-turbo",
401
- "gpt-4-turbo-2024-04-09",
402
- "gpt-4-turbo-preview",
403
- "gpt-4.1",
404
- "gpt-4.1-2025-04-14",
405
- "gpt-4.1-mini",
406
- "gpt-4.1-mini-2025-04-14",
407
- "gpt-4.1-nano",
408
- "gpt-4.1-nano-2025-04-14",
409
- "gpt-4.5-preview",
410
- "gpt-4.5-preview-2025-02-27",
411
- "gpt-4o",
412
- "gpt-4o-2024-05-13",
413
- "gpt-4o-2024-08-06",
414
- "gpt-4o-2024-11-20",
415
- "gpt-4o-audio-preview",
416
- "gpt-4o-audio-preview-2024-10-01",
417
- "gpt-4o-audio-preview-2024-12-17",
418
- "gpt-4o-audio-preview-2025-06-03",
419
- "gpt-4o-mini",
420
- "gpt-4o-mini-2024-07-18",
421
- "gpt-4o-mini-audio-preview",
422
- "gpt-4o-mini-audio-preview-2024-12-17",
423
- "gpt-4o-mini-search-preview",
424
- "gpt-4o-mini-search-preview-2025-03-11",
425
- "gpt-4o-search-preview",
426
- "gpt-4o-search-preview-2025-03-11",
427
- "o1",
428
- "o1-2024-12-17",
429
- "o1-mini",
430
- "o1-mini-2024-09-12",
431
- "o1-preview",
432
- "o1-preview-2024-09-12",
433
- "o1-pro",
434
- "o1-pro-2025-03-19",
435
- "o3",
436
- "o3-2025-04-16",
437
- "o3-mini",
438
- "o3-mini-2025-01-31",
439
- "o3-pro",
440
- "o3-pro-2025-06-10",
441
- "o4-mini",
442
- "o4-mini-2025-04-16"
443
- ];
444
-
445
- //#endregion
446
- //#region node_modules/gpt-tokenizer/esm/modelsMap.js
447
- var modelsMap_exports = /* @__PURE__ */ __export({
448
- cl100k_base: () => cl100k_base,
449
- o200k_base: () => o200k_base$1,
450
- p50k_base: () => p50k_base,
451
- p50k_edit: () => p50k_edit,
452
- r50k_base: () => r50k_base
453
- });
454
- const p50k_base = [
455
- "text-davinci-002",
456
- "text-davinci-003",
457
- "code-davinci-001",
458
- "code-davinci-002",
459
- "davinci-codex",
460
- "code-cushman-001",
461
- "code-cushman-002",
462
- "cushman-codex"
463
- ];
464
- const r50k_base = [
465
- "text-ada-001",
466
- "text-babbage-001",
467
- "text-curie-001",
468
- "text-davinci-001",
469
- "ada",
470
- "babbage",
471
- "curie",
472
- "davinci",
473
- "code-search-ada-code-001",
474
- "code-search-ada-text-001",
475
- "text-similarity-ada-001",
476
- "text-search-ada-doc-001",
477
- "text-search-ada-query-001",
478
- "text-similarity-babbage-001",
479
- "text-search-babbage-doc-001",
480
- "text-search-babbage-query-001",
481
- "code-search-babbage-code-001",
482
- "code-search-babbage-text-001",
483
- "text-similarity-curie-001",
484
- "text-search-curie-doc-001",
485
- "text-search-curie-query-001",
486
- "text-similarity-davinci-001",
487
- "text-search-davinci-doc-001",
488
- "text-search-davinci-query-001"
489
- ];
490
- const p50k_edit = ["code-davinci-edit-001", "text-davinci-edit-001"];
491
- const cl100k_base = [
492
- "gpt-3.5",
493
- "gpt-3.5-0301",
494
- "gpt-3.5-turbo",
495
- "gpt-3.5-turbo-0125",
496
- "gpt-3.5-turbo-0613",
497
- "gpt-3.5-turbo-1106",
498
- "gpt-3.5-turbo-16k-0613",
499
- "gpt-3.5-turbo-instruct",
500
- "gpt-4",
501
- "gpt-4-0125-preview",
502
- "gpt-4-0314",
503
- "gpt-4-0613",
504
- "gpt-4-1106-preview",
505
- "gpt-4-1106-vision-preview",
506
- "gpt-4-32k",
507
- "gpt-4-turbo",
508
- "gpt-4-turbo-2024-04-09",
509
- "gpt-4-turbo-preview",
510
- "text-embedding-3-large",
511
- "text-embedding-3-small",
512
- "text-embedding-ada-002",
513
- "babbage-002",
514
- "davinci-002"
515
- ];
516
- const o200k_base$1 = [];
517
-
518
- //#endregion
519
- //#region node_modules/gpt-tokenizer/esm/specialTokens.js
520
- const EndOfText = "<|endoftext|>";
521
- const FimPrefix = "<|fim_prefix|>";
522
- const FimMiddle = "<|fim_middle|>";
523
- const FimSuffix = "<|fim_suffix|>";
524
- const ImStart = "<|im_start|>";
525
- const ImEnd = "<|im_end|>";
526
- const ImSep = "<|im_sep|>";
527
- const EndOfPrompt = "<|endofprompt|>";
528
-
529
- //#endregion
530
- //#region node_modules/gpt-tokenizer/esm/mapping.js
531
- const o200k_base = "o200k_base";
532
- const DEFAULT_ENCODING = o200k_base;
533
- /**
534
- * maps model names to encoding names
535
- * if a model is not listed, it uses the default encoding for new models
536
- * which is `o200k_base`
537
- */
538
- const modelToEncodingMap = Object.fromEntries(Object.entries(modelsMap_exports).flatMap(([encodingName, models]) => models.map((modelName) => [modelName, encodingName])));
539
- const gpt3params = {
540
- messageSeparator: "\n",
541
- roleSeparator: "\n"
542
- };
543
- const gpt4params = {
544
- messageSeparator: "",
545
- roleSeparator: ImSep
546
- };
547
- const chatModelParams = Object.fromEntries(chatEnabledModels.flatMap((modelName) => modelName.startsWith("gpt-3.5") ? [[modelName, gpt3params]] : [[modelName, gpt4params]]));
548
-
549
- //#endregion
550
- //#region node_modules/gpt-tokenizer/esm/encodingParams/constants.js
551
- const R50K_TOKEN_SPLIT_REGEX = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
552
- const CL_AND_O_TOKEN_SPLIT_PATTERN = /(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu;
553
-
554
- //#endregion
555
- //#region node_modules/gpt-tokenizer/esm/encodingParams/cl100k_base.js
556
- function Cl100KBase(bytePairRankDecoder) {
557
- return {
558
- tokenSplitRegex: CL_AND_O_TOKEN_SPLIT_PATTERN,
559
- bytePairRankDecoder,
560
- specialTokensEncoder: new Map([
561
- [EndOfText, 100257],
562
- [FimPrefix, 100258],
563
- [FimMiddle, 100259],
564
- [FimSuffix, 100260],
565
- [ImStart, 100264],
566
- [ImEnd, 100265],
567
- [ImSep, 100266],
568
- [EndOfPrompt, 100276]
569
- ])
570
- };
571
- }
572
-
573
- //#endregion
574
- //#region node_modules/gpt-tokenizer/esm/encodingParams/o200k_base.js
575
- function O200KBase(bytePairRankDecoder) {
576
- return {
577
- tokenSplitRegex: CL_AND_O_TOKEN_SPLIT_PATTERN,
578
- bytePairRankDecoder,
579
- specialTokensEncoder: new Map([
580
- [EndOfText, 199999],
581
- [FimPrefix, 2e5],
582
- [FimMiddle, 200001],
583
- [FimSuffix, 200002],
584
- [ImStart, 200003],
585
- [ImEnd, 200004],
586
- [ImSep, 200005],
587
- [EndOfPrompt, 200006]
588
- ])
589
- };
590
- }
591
-
592
- //#endregion
593
- //#region node_modules/gpt-tokenizer/esm/encodingParams/p50k_base.js
594
- function P50KBase(bytePairRankDecoder) {
595
- return {
596
- expectedVocabularySize: 50281,
597
- tokenSplitRegex: R50K_TOKEN_SPLIT_REGEX,
598
- bytePairRankDecoder,
599
- specialTokensEncoder: new Map([[EndOfText, 50256]])
600
- };
601
- }
602
-
603
- //#endregion
604
- //#region node_modules/gpt-tokenizer/esm/encodingParams/p50k_edit.js
605
- function P50KEdit(bytePairRankDecoder) {
606
- return {
607
- tokenSplitRegex: R50K_TOKEN_SPLIT_REGEX,
608
- bytePairRankDecoder,
609
- specialTokensEncoder: new Map([
610
- [EndOfText, 50256],
611
- [FimPrefix, 50281],
612
- [FimMiddle, 50282],
613
- [FimSuffix, 50283]
614
- ])
615
- };
616
- }
617
-
618
- //#endregion
619
- //#region node_modules/gpt-tokenizer/esm/encodingParams/r50k_base.js
620
- function R50KBase(bytePairRankDecoder) {
621
- return {
622
- expectedVocabularySize: 50257,
623
- tokenSplitRegex: R50K_TOKEN_SPLIT_REGEX,
624
- bytePairRankDecoder,
625
- specialTokensEncoder: new Map([[EndOfText, 50256]])
626
- };
627
- }
628
-
629
- //#endregion
630
- //#region node_modules/gpt-tokenizer/esm/modelParams.js
631
- function getEncodingParams(encodingName, getMergeableRanks) {
632
- const mergeableBytePairRanks = getMergeableRanks(encodingName);
633
- switch (encodingName.toLowerCase()) {
634
- case "r50k_base": return R50KBase(mergeableBytePairRanks);
635
- case "p50k_base": return P50KBase(mergeableBytePairRanks);
636
- case "p50k_edit": return P50KEdit(mergeableBytePairRanks);
637
- case "cl100k_base": return Cl100KBase(mergeableBytePairRanks);
638
- case "o200k_base": return O200KBase(mergeableBytePairRanks);
639
- default: throw new Error(`Unknown encoding name: ${encodingName}`);
640
- }
641
- }
642
-
643
- //#endregion
644
- //#region node_modules/gpt-tokenizer/esm/GptEncoding.js
645
- var GptEncoding = class GptEncoding {
646
- static EndOfPrompt = EndOfPrompt;
647
- static EndOfText = EndOfText;
648
- static FimMiddle = FimMiddle;
649
- static FimPrefix = FimPrefix;
650
- static FimSuffix = FimSuffix;
651
- modelName;
652
- modelSpec;
653
- bytePairEncodingCoreProcessor;
654
- specialTokensEncoder;
655
- specialTokensSet;
656
- allSpecialTokenRegex;
657
- defaultSpecialTokenConfig;
658
- vocabularySize;
659
- constructor({ bytePairRankDecoder: mergeableBytePairRanks, specialTokensEncoder, expectedVocabularySize, modelName, modelSpec,...rest }) {
660
- this.specialTokensEncoder = specialTokensEncoder;
661
- this.specialTokensSet = new Set(this.specialTokensEncoder.keys());
662
- this.allSpecialTokenRegex = getSpecialTokenRegex(this.specialTokensSet);
663
- this.bytePairEncodingCoreProcessor = new BytePairEncodingCore({
664
- bytePairRankDecoder: mergeableBytePairRanks,
665
- specialTokensEncoder,
666
- ...rest
667
- });
668
- this.defaultSpecialTokenConfig = this.processSpecialTokens();
669
- const maxTokenValue = Math.max(mergeableBytePairRanks.length - 1, getMaxValueFromMap(specialTokensEncoder));
670
- this.vocabularySize = this.bytePairEncodingCoreProcessor.mergeableBytePairRankCount + specialTokensEncoder.size;
671
- if (expectedVocabularySize !== void 0) {
672
- if (this.vocabularySize !== expectedVocabularySize) throw new Error("The number of mergeable tokens and special tokens must be equal to expectedVocabularySize.");
673
- if (maxTokenValue !== expectedVocabularySize - 1) throw new Error(`The model encodings are invalid. The maximum token value must be equal to expectedVocabularySize - 1. Currently ${maxTokenValue}, expected ${expectedVocabularySize - 1}`);
674
- }
675
- this.encode = this.encode.bind(this);
676
- this.decode = this.decode.bind(this);
677
- this.encodeGenerator = this.encodeGenerator.bind(this);
678
- this.decodeGenerator = this.decodeGenerator.bind(this);
679
- this.decodeAsyncGenerator = this.decodeAsyncGenerator.bind(this);
680
- this.decodeAsync = this.decodeAsync.bind(this);
681
- this.isWithinTokenLimit = this.isWithinTokenLimit.bind(this);
682
- this.encodeChat = this.encodeChat.bind(this);
683
- this.encodeChatGenerator = this.encodeChatGenerator.bind(this);
684
- this.countTokens = this.countTokens.bind(this);
685
- this.setMergeCacheSize = this.setMergeCacheSize.bind(this);
686
- this.clearMergeCache = this.clearMergeCache.bind(this);
687
- this.estimateCost = this.estimateCost.bind(this);
688
- this.modelName = modelName;
689
- this.modelSpec = modelSpec;
690
- }
691
- static getEncodingApi(encodingName, getMergeableRanks) {
692
- const modelParams = getEncodingParams(encodingName, getMergeableRanks);
693
- return new GptEncoding(modelParams);
694
- }
695
- static getEncodingApiForModel(modelName, getMergeableRanks, modelSpec) {
696
- const encodingName = modelToEncodingMap[modelName] ?? DEFAULT_ENCODING;
697
- const modelParams = getEncodingParams(encodingName, getMergeableRanks);
698
- return new GptEncoding({
699
- ...modelParams,
700
- modelName,
701
- modelSpec
702
- });
703
- }
704
- processSpecialTokens({ allowedSpecial, disallowedSpecial } = {}) {
705
- let regexPattern;
706
- if (allowedSpecial === ALL_SPECIAL_TOKENS || allowedSpecial?.has(ALL_SPECIAL_TOKENS)) {
707
- allowedSpecial = new Set(this.specialTokensSet);
708
- const allowedSpecialSet = allowedSpecial;
709
- if (disallowedSpecial === ALL_SPECIAL_TOKENS) throw new Error("allowedSpecial and disallowedSpecial cannot both be set to \"all\".");
710
- if (typeof disallowedSpecial === "object") disallowedSpecial.forEach((val) => allowedSpecialSet.delete(val));
711
- else disallowedSpecial = /* @__PURE__ */ new Set();
712
- }
713
- if (!disallowedSpecial || disallowedSpecial === ALL_SPECIAL_TOKENS || disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
714
- disallowedSpecial = new Set(this.specialTokensSet);
715
- const disallowedSpecialSet = disallowedSpecial;
716
- if (allowedSpecial?.size) {
717
- allowedSpecial.forEach((val) => disallowedSpecialSet.delete(val));
718
- disallowedSpecial.forEach((val) => allowedSpecial.delete(val));
719
- if (disallowedSpecial.size > 0) regexPattern = getSpecialTokenRegex(disallowedSpecial);
720
- } else regexPattern = this.allSpecialTokenRegex;
721
- }
722
- return {
723
- allowedSpecial,
724
- regexPattern
725
- };
726
- }
727
- encodeGenerator(lineToEncode, encodeOptions) {
728
- const specialTokenConfig = encodeOptions ? this.processSpecialTokens(encodeOptions) : this.defaultSpecialTokenConfig;
729
- if (specialTokenConfig.regexPattern) {
730
- const match = lineToEncode.match(specialTokenConfig.regexPattern);
731
- if (match !== null) throw new Error(`Disallowed special token found: ${match[0]}`);
732
- }
733
- return this.bytePairEncodingCoreProcessor.encodeNativeGenerator(lineToEncode, specialTokenConfig.allowedSpecial);
734
- }
735
- encode(lineToEncode, encodeOptions) {
736
- const specialTokenConfig = encodeOptions ? this.processSpecialTokens(encodeOptions) : this.defaultSpecialTokenConfig;
737
- if (specialTokenConfig.regexPattern) {
738
- const match = lineToEncode.match(specialTokenConfig.regexPattern);
739
- if (match !== null) throw new Error(`Disallowed special token found: ${match[0]}`);
740
- }
741
- return this.bytePairEncodingCoreProcessor.encodeNative(lineToEncode, specialTokenConfig.allowedSpecial);
742
- }
743
- /**
744
- * Progressively tokenizes an OpenAI chat.
745
- * Warning: gpt-3.5-turbo and gpt-4 chat format may change over time.
746
- * Returns tokens assuming the 'gpt-3.5-turbo-0301' / 'gpt-4-0314' format.
747
- * Based on OpenAI's guidelines: https://github.com/openai/openai-python/blob/main/chatml.md
748
- * Also mentioned in section 6 of this document: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
749
- */
750
- *encodeChatGenerator(chat, model = this.modelName) {
751
- if (!model) throw new Error("Model name must be provided either during initialization or passed in to the method.");
752
- const params = chatModelParams[model];
753
- const chatStartToken = this.specialTokensEncoder.get(ImStart);
754
- const chatEndToken = this.specialTokensEncoder.get(ImEnd);
755
- if (!params || chatStartToken === void 0 || chatEndToken === void 0) throw new Error(`Model '${model}' does not support chat.`);
756
- const allowedSpecial = new Set([ImSep]);
757
- const { messageSeparator, roleSeparator } = params;
758
- const encodedMessageSeparator = messageSeparator.length > 0 ? this.encode(messageSeparator) : [];
759
- const encodedRoleSeparator = roleSeparator.length > 0 ? this.encode(roleSeparator, { allowedSpecial }) : [];
760
- const nameCache = /* @__PURE__ */ new Map();
761
- for (const { role = "system", name = role, content } of chat) {
762
- if (content === void 0) throw new Error("Content must be defined for all messages.");
763
- yield [chatStartToken];
764
- const encodedName = nameCache.get(name) ?? this.encode(name);
765
- nameCache.set(name, encodedName);
766
- yield encodedName;
767
- if (encodedRoleSeparator.length > 0) yield encodedRoleSeparator;
768
- yield* this.encodeGenerator(content);
769
- yield [chatEndToken];
770
- yield encodedMessageSeparator;
771
- }
772
- yield [chatStartToken];
773
- yield* this.encodeGenerator("assistant");
774
- if (encodedRoleSeparator.length > 0) yield encodedRoleSeparator;
775
- }
776
- /**
777
- * Encodes a chat into a single array of tokens.
778
- * Warning: gpt-3.5-turbo and gpt-4 chat format may change over time.
779
- * Returns tokens assuming the 'gpt-3.5-turbo-0301' / 'gpt-4-0314' format.
780
- * Based on OpenAI's guidelines: https://github.com/openai/openai-python/blob/main/chatml.md
781
- * Also mentioned in section 6 of this document: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
782
- */
783
- encodeChat(chat, model = this.modelName) {
784
- return [...this.encodeChatGenerator(chat, model)].flat();
785
- }
786
- /**
787
- * @returns {false | number} false if token limit is exceeded, otherwise the number of tokens
788
- */
789
- isWithinTokenLimit(input, tokenLimit) {
790
- const tokenGenerator = typeof input === "string" ? this.encodeGenerator(input) : this.encodeChatGenerator(input);
791
- let count = 0;
792
- for (const tokens of tokenGenerator) {
793
- count += tokens.length;
794
- if (count > tokenLimit) return false;
795
- }
796
- return count;
797
- }
798
- /**
799
- * Counts the number of tokens in the input.
800
- * @returns {number} The number of tokens.
801
- */
802
- countTokens(input, encodeOptions) {
803
- if (typeof input === "string") {
804
- const specialTokenConfig = encodeOptions ? this.processSpecialTokens(encodeOptions) : this.defaultSpecialTokenConfig;
805
- if (specialTokenConfig.regexPattern) {
806
- const match = input.match(specialTokenConfig.regexPattern);
807
- if (match !== null) throw new Error(`Disallowed special token found: ${match[0]}`);
808
- }
809
- return this.bytePairEncodingCoreProcessor.countNative(input, specialTokenConfig.allowedSpecial);
810
- }
811
- const tokenGenerator = this.encodeChatGenerator(input);
812
- let count = 0;
813
- for (const tokens of tokenGenerator) count += tokens.length;
814
- return count;
815
- }
816
- setMergeCacheSize(size) {
817
- this.bytePairEncodingCoreProcessor.setMergeCacheSize(size);
818
- }
819
- clearMergeCache() {
820
- this.bytePairEncodingCoreProcessor.clearMergeCache();
821
- }
822
- decode(inputTokensToDecode) {
823
- return this.bytePairEncodingCoreProcessor.decodeNative(inputTokensToDecode);
824
- }
825
- *decodeGenerator(inputTokensToDecode) {
826
- const decodedByteGenerator = this.bytePairEncodingCoreProcessor.decodeNativeGenerator(inputTokensToDecode);
827
- let buffer = "";
828
- for (const decodedPart of decodedByteGenerator) {
829
- buffer += typeof decodedPart === "string" ? decodedPart : decoder.decode(decodedPart, { stream: true });
830
- if (buffer.length === 0 || endsWithIncompleteUtfPairSurrogate(buffer)) continue;
831
- else {
832
- yield buffer;
833
- buffer = "";
834
- }
835
- }
836
- if (buffer.length > 0) yield buffer;
837
- }
838
- async *decodeAsyncGenerator(inputTokensToDecode) {
839
- const decodedByteGenerator = this.bytePairEncodingCoreProcessor.decodeNativeAsyncIterable(inputTokensToDecode);
840
- let buffer = "";
841
- for await (const decodedPart of decodedByteGenerator) {
842
- buffer += typeof decodedPart === "string" ? decodedPart : decoder.decode(decodedPart, { stream: true });
843
- if (buffer.length === 0 || endsWithIncompleteUtfPairSurrogate(buffer)) continue;
844
- else {
845
- yield buffer;
846
- buffer = "";
847
- }
848
- }
849
- if (buffer.length > 0) yield buffer;
850
- }
851
- async decodeAsync(inputTokensToDecode) {
852
- const decodedByteGenerator = this.bytePairEncodingCoreProcessor.decodeNativeAsyncIterable(inputTokensToDecode);
853
- let buffer = "";
854
- for await (const decodedPart of decodedByteGenerator) buffer += typeof decodedPart === "string" ? decodedPart : decoder.decode(decodedPart, { stream: true });
855
- return buffer;
856
- }
857
- /**
858
- * Estimates the cost of processing a given token count using the model's pricing.
859
- *
860
- * @param tokenCount - The number of tokens to estimate cost for
861
- * @returns Cost estimate object with applicable price components (input, output, batchInput, batchOutput)
862
- */
863
- estimateCost(tokenCount, modelSpec = this.modelSpec) {
864
- if (!modelSpec) throw new Error("Model spec must be provided either during initialization or passed in to the method.");
865
- if (!modelSpec.price_data) throw new Error(`No cost information available for model: ${modelSpec.name}`);
866
- const priceDataPerMillion = modelSpec.price_data;
867
- const result = {};
868
- const millionTokens = tokenCount / 1e6;
869
- if (priceDataPerMillion.main) result.main = {
870
- input: priceDataPerMillion.main.input && priceDataPerMillion.main.input * millionTokens,
871
- output: priceDataPerMillion.main.output && priceDataPerMillion.main.output * millionTokens,
872
- cached_input: priceDataPerMillion.main.cached_input && priceDataPerMillion.main.cached_input * millionTokens,
873
- cached_output: priceDataPerMillion.main.cached_output && priceDataPerMillion.main.cached_output * millionTokens
874
- };
875
- if (priceDataPerMillion.batch) result.batch = {
876
- input: priceDataPerMillion.batch.input && priceDataPerMillion.batch.input * millionTokens,
877
- output: priceDataPerMillion.batch.output && priceDataPerMillion.batch.output * millionTokens,
878
- cached_input: priceDataPerMillion.batch.cached_input && priceDataPerMillion.batch.cached_input * millionTokens,
879
- cached_output: priceDataPerMillion.batch.cached_output && priceDataPerMillion.batch.cached_output * millionTokens
880
- };
881
- return result;
882
- }
883
- };
884
-
885
- //#endregion
886
- export { ALL_SPECIAL_TOKENS, DEFAULT_MERGE_CACHE_SIZE, EndOfPrompt, EndOfText, FimMiddle, FimPrefix, FimSuffix, GptEncoding, ImEnd, ImSep, ImStart };
887
- //# sourceMappingURL=GptEncoding-TiCgsNrQ.js.map