@zwa73/utils 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.decodeTokenDavinci = exports.decodeTokenTurbo = exports.encodeTokenDavinci = exports.encodeTokenTurbo = exports.tokenNumDavinci = exports.tokenNumTurbo = exports.encodeHtmlEntities = exports.decodeHtmlEntities = void 0;
4
4
  const he = require("html-entities");
5
5
  const tiktoken_1 = require("tiktoken");
6
- const encoderTurbo = (0, tiktoken_1.get_encoding)("cl100k_base");
7
- const encoderDavinci = (0, tiktoken_1.get_encoding)("p50k_base");
6
+ let encoderTurbo = null;
7
+ let encoderDavinci = null;
8
8
  const textDecoder = new TextDecoder();
9
9
  // 定义一个对象,存储常见的HTML实体和对应的字符
10
10
  let htmlEntities = {
@@ -46,13 +46,21 @@ exports.encodeHtmlEntities = encodeHtmlEntities;
46
46
  //cl100k_base ChatGPT models, text-embedding-ada-002
47
47
  //p50k_base Code models, text-davinci-002, text-davinci-003
48
48
  //r50k_base (or gpt2) GPT-3 models like davinci
49
+ //避免在nextjs调用时出错
50
+ function initTikTokenEncoder() {
51
+ if (encoderTurbo != null && encoderDavinci != null)
52
+ return;
53
+ encoderTurbo = (0, tiktoken_1.get_encoding)("cl100k_base");
54
+ encoderDavinci = (0, tiktoken_1.get_encoding)("p50k_base");
55
+ }
49
56
  /**token长度计算器 Turbo模型
50
57
  * @param {string} str = 所要计算的消息
51
58
  * @returns {number} 整数长度结果
52
59
  */
53
60
  function tokenNumTurbo(str) {
61
+ initTikTokenEncoder();
54
62
  //return encoder.encode(str).length
55
- return encoderTurbo.encode(str).length;
63
+ return encoderTurbo?.encode(str).length;
56
64
  }
57
65
  exports.tokenNumTurbo = tokenNumTurbo;
58
66
  /**token长度计算器 Davinci模型
@@ -60,7 +68,8 @@ exports.tokenNumTurbo = tokenNumTurbo;
60
68
  * @returns {number} 整数长度结果
61
69
  */
62
70
  function tokenNumDavinci(str) {
63
- return encoderDavinci.encode(str).length;
71
+ initTikTokenEncoder();
72
+ return encoderDavinci?.encode(str).length;
64
73
  }
65
74
  exports.tokenNumDavinci = tokenNumDavinci;
66
75
  /**token编码 Turbo模型
@@ -68,7 +77,8 @@ exports.tokenNumDavinci = tokenNumDavinci;
68
77
  * @returns {Array<number>} Token数组
69
78
  */
70
79
  function encodeTokenTurbo(str) {
71
- return encoderTurbo.encode(str);
80
+ initTikTokenEncoder();
81
+ return encoderTurbo?.encode(str);
72
82
  }
73
83
  exports.encodeTokenTurbo = encodeTokenTurbo;
74
84
  /**token编码 Davinci模型
@@ -76,7 +86,8 @@ exports.encodeTokenTurbo = encodeTokenTurbo;
76
86
  * @returns {Array<number>} Token数组
77
87
  */
78
88
  function encodeTokenDavinci(str) {
79
- return encoderDavinci.encode(str);
89
+ initTikTokenEncoder();
90
+ return encoderDavinci?.encode(str);
80
91
  }
81
92
  exports.encodeTokenDavinci = encodeTokenDavinci;
82
93
  /**token解码 Turbo模型
@@ -84,7 +95,8 @@ exports.encodeTokenDavinci = encodeTokenDavinci;
84
95
  * @returns {string} 消息字符串
85
96
  */
86
97
  function decodeTokenTurbo(arr) {
87
- return textDecoder.decode(encoderTurbo.decode(arr));
98
+ initTikTokenEncoder();
99
+ return textDecoder.decode(encoderTurbo?.decode(arr));
88
100
  }
89
101
  exports.decodeTokenTurbo = decodeTokenTurbo;
90
102
  /**token解码 Davinci模型
@@ -92,6 +104,7 @@ exports.decodeTokenTurbo = decodeTokenTurbo;
92
104
  * @returns {string} 消息字符串
93
105
  */
94
106
  function decodeTokenDavinci(arr) {
95
- return textDecoder.decode(encoderDavinci.decode(arr));
107
+ initTikTokenEncoder();
108
+ return textDecoder.decode(encoderDavinci?.decode(arr));
96
109
  }
97
110
  exports.decodeTokenDavinci = decodeTokenDavinci;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zwa73/utils",
3
- "version": "1.0.8",
3
+ "version": "1.0.9",
4
4
  "description": "my utils",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/publish.bat ADDED
@@ -0,0 +1,2 @@
1
+ npm publish --access public
2
+ pause
package/src/UtilCodecs.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import * as he from 'html-entities';
2
- import {get_encoding} from 'tiktoken';
3
- const encoderTurbo = get_encoding("cl100k_base");
4
- const encoderDavinci = get_encoding("p50k_base");
2
+ import {get_encoding,Tiktoken} from 'tiktoken';
3
+ let encoderTurbo:Tiktoken|null = null;
4
+ let encoderDavinci:Tiktoken|null = null;
5
5
  const textDecoder = new TextDecoder();
6
6
 
7
7
 
@@ -48,20 +48,32 @@ export function encodeHtmlEntities(str:string) {
48
48
  //p50k_base Code models, text-davinci-002, text-davinci-003
49
49
  //r50k_base (or gpt2) GPT-3 models like davinci
50
50
 
51
+
52
+ //避免在nextjs调用时出错
53
+ function initTikTokenEncoder (){
54
+ if(encoderTurbo!=null && encoderDavinci!=null)
55
+ return;
56
+
57
+ encoderTurbo = get_encoding("cl100k_base");
58
+ encoderDavinci = get_encoding("p50k_base");
59
+ }
60
+
51
61
  /**token长度计算器 Turbo模型
52
62
  * @param {string} str = 所要计算的消息
53
63
  * @returns {number} 整数长度结果
54
64
  */
55
- export function tokenNumTurbo(str:string){
65
+ export function tokenNumTurbo(str:string):number{
66
+ initTikTokenEncoder();
56
67
  //return encoder.encode(str).length
57
- return encoderTurbo.encode(str).length;
68
+ return encoderTurbo?.encode(str).length as any as number;
58
69
  }
59
70
  /**token长度计算器 Davinci模型
60
71
  * @param {string} str = 所要计算的消息
61
72
  * @returns {number} 整数长度结果
62
73
  */
63
74
  export function tokenNumDavinci(str:string):number{
64
- return encoderDavinci.encode(str).length;
75
+ initTikTokenEncoder();
76
+ return encoderDavinci?.encode(str).length as any as number;
65
77
  }
66
78
 
67
79
  /**token编码 Turbo模型
@@ -69,26 +81,30 @@ export function tokenNumDavinci(str:string):number{
69
81
  * @returns {Array<number>} Token数组
70
82
  */
71
83
  export function encodeTokenTurbo(str:string):Uint32Array{
72
- return encoderTurbo.encode(str)
84
+ initTikTokenEncoder();
85
+ return encoderTurbo?.encode(str) as any as Uint32Array
73
86
  }
74
87
  /**token编码 Davinci模型
75
88
  * @param {string} str = 所要计算的消息
76
89
  * @returns {Array<number>} Token数组
77
90
  */
78
91
  export function encodeTokenDavinci(str:string):Uint32Array{
79
- return encoderDavinci.encode(str)
92
+ initTikTokenEncoder();
93
+ return encoderDavinci?.encode(str) as any as Uint32Array;
80
94
  }
81
95
  /**token解码 Turbo模型
82
96
  * @param {Array<number>} arr = Token数组
83
97
  * @returns {string} 消息字符串
84
98
  */
85
99
  export function decodeTokenTurbo(arr:Uint32Array):string{
86
- return textDecoder.decode(encoderTurbo.decode(arr));
100
+ initTikTokenEncoder();
101
+ return textDecoder.decode(encoderTurbo?.decode(arr));
87
102
  }
88
103
  /**token解码 Davinci模型
89
104
  * @param {Array<number>} arr = Token数组
90
105
  * @returns {string} 消息字符串
91
106
  */
92
107
  export function decodeTokenDavinci(arr:Uint32Array):string{
93
- return textDecoder.decode(encoderDavinci.decode(arr));
108
+ initTikTokenEncoder();
109
+ return textDecoder.decode(encoderDavinci?.decode(arr));
94
110
  }