@zwa73/utils 1.0.116 → 1.0.117
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/UtilCodecs.js +15 -11
- package/dist/UtilFunctions.d.ts +2 -2
- package/dist/UtilFunctions.js +5 -3
- package/package.json +1 -1
- package/src/UtilCodecs.ts +15 -12
- package/src/UtilFunctions.ts +5 -3
- package/tsconfig.compile.json +0 -5
- package/tsconfig.json +0 -22
package/dist/UtilCodecs.js
CHANGED
|
@@ -29,8 +29,12 @@ const tiktoken_1 = require("tiktoken");
|
|
|
29
29
|
/**编码/解码器 */
|
|
30
30
|
var UtilCodec;
|
|
31
31
|
(function (UtilCodec) {
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
/**gpt-4, gpt-3.5-turbo, text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large
|
|
33
|
+
*/
|
|
34
|
+
let encoderCl100kBase = null;
|
|
35
|
+
/**Codex models, text-davinci-002, text-davinci-003
|
|
36
|
+
*/
|
|
37
|
+
let encoderP50kBase = null;
|
|
34
38
|
const textDecoder = new TextDecoder();
|
|
35
39
|
/**HTML实体解码 将一个字符串中的HTML实体转换为对应的字符
|
|
36
40
|
* @param str - 要转换的字符串
|
|
@@ -55,10 +59,10 @@ var UtilCodec;
|
|
|
55
59
|
//r50k_base (or gpt2) GPT-3 models like davinci
|
|
56
60
|
//避免在nextjs调用时出错
|
|
57
61
|
function initTikTokenEncoder() {
|
|
58
|
-
if (
|
|
62
|
+
if (encoderCl100kBase != null && encoderP50kBase != null)
|
|
59
63
|
return;
|
|
60
|
-
|
|
61
|
-
|
|
64
|
+
encoderCl100kBase = (0, tiktoken_1.get_encoding)("cl100k_base");
|
|
65
|
+
encoderP50kBase = (0, tiktoken_1.get_encoding)("p50k_base");
|
|
62
66
|
}
|
|
63
67
|
/**token长度计算器 Turbo模型
|
|
64
68
|
* @param str = 所要计算的消息
|
|
@@ -67,7 +71,7 @@ var UtilCodec;
|
|
|
67
71
|
function tokenNumTurbo(str) {
|
|
68
72
|
initTikTokenEncoder();
|
|
69
73
|
//return encoder.encode(str).length
|
|
70
|
-
return
|
|
74
|
+
return encoderCl100kBase?.encode(str).length;
|
|
71
75
|
}
|
|
72
76
|
UtilCodec.tokenNumTurbo = tokenNumTurbo;
|
|
73
77
|
/**token长度计算器 Davinci模型
|
|
@@ -76,7 +80,7 @@ var UtilCodec;
|
|
|
76
80
|
*/
|
|
77
81
|
function tokenNumDavinci(str) {
|
|
78
82
|
initTikTokenEncoder();
|
|
79
|
-
return
|
|
83
|
+
return encoderP50kBase?.encode(str).length;
|
|
80
84
|
}
|
|
81
85
|
UtilCodec.tokenNumDavinci = tokenNumDavinci;
|
|
82
86
|
/**token编码 Turbo模型
|
|
@@ -85,7 +89,7 @@ var UtilCodec;
|
|
|
85
89
|
*/
|
|
86
90
|
function encodeTokenTurbo(str) {
|
|
87
91
|
initTikTokenEncoder();
|
|
88
|
-
return
|
|
92
|
+
return encoderCl100kBase?.encode(str);
|
|
89
93
|
}
|
|
90
94
|
UtilCodec.encodeTokenTurbo = encodeTokenTurbo;
|
|
91
95
|
/**token编码 Davinci模型
|
|
@@ -94,7 +98,7 @@ var UtilCodec;
|
|
|
94
98
|
*/
|
|
95
99
|
function encodeTokenDavinci(str) {
|
|
96
100
|
initTikTokenEncoder();
|
|
97
|
-
return
|
|
101
|
+
return encoderP50kBase?.encode(str);
|
|
98
102
|
}
|
|
99
103
|
UtilCodec.encodeTokenDavinci = encodeTokenDavinci;
|
|
100
104
|
/**token解码 Turbo模型
|
|
@@ -105,7 +109,7 @@ var UtilCodec;
|
|
|
105
109
|
initTikTokenEncoder();
|
|
106
110
|
if (Array.isArray(arr))
|
|
107
111
|
arr = new Uint32Array(arr);
|
|
108
|
-
return textDecoder.decode(
|
|
112
|
+
return textDecoder.decode(encoderCl100kBase?.decode(arr));
|
|
109
113
|
}
|
|
110
114
|
UtilCodec.decodeTokenTurbo = decodeTokenTurbo;
|
|
111
115
|
/**token解码 Davinci模型
|
|
@@ -116,7 +120,7 @@ var UtilCodec;
|
|
|
116
120
|
initTikTokenEncoder();
|
|
117
121
|
if (Array.isArray(arr))
|
|
118
122
|
arr = new Uint32Array(arr);
|
|
119
|
-
return textDecoder.decode(
|
|
123
|
+
return textDecoder.decode(encoderP50kBase?.decode(arr));
|
|
120
124
|
}
|
|
121
125
|
UtilCodec.decodeTokenDavinci = decodeTokenDavinci;
|
|
122
126
|
//#endregion
|
package/dist/UtilFunctions.d.ts
CHANGED
|
@@ -190,7 +190,7 @@ export declare class UtilFunc {
|
|
|
190
190
|
};
|
|
191
191
|
/**移除多行字符串中每行开始的最小空格数。
|
|
192
192
|
*
|
|
193
|
-
* @param
|
|
193
|
+
* @param input - 需要处理的多行 字符串模板 或 字符串。
|
|
194
194
|
* @param values - 插入模板字符串中的值。
|
|
195
195
|
* @returns 返回处理后的字符串,每行开始的空格数已被最小化。
|
|
196
196
|
*
|
|
@@ -205,7 +205,7 @@ export declare class UtilFunc {
|
|
|
205
205
|
* // Hello,
|
|
206
206
|
* // World!
|
|
207
207
|
*/
|
|
208
|
-
static dedent(
|
|
208
|
+
static dedent(input: TemplateStringsArray | string, ...values: any[]): string;
|
|
209
209
|
/**抛出错误
|
|
210
210
|
* @param message - 错误信息
|
|
211
211
|
* @param lvl - 日志等级
|
package/dist/UtilFunctions.js
CHANGED
|
@@ -535,7 +535,7 @@ class UtilFunc {
|
|
|
535
535
|
}
|
|
536
536
|
/**移除多行字符串中每行开始的最小空格数。
|
|
537
537
|
*
|
|
538
|
-
* @param
|
|
538
|
+
* @param input - 需要处理的多行 字符串模板 或 字符串。
|
|
539
539
|
* @param values - 插入模板字符串中的值。
|
|
540
540
|
* @returns 返回处理后的字符串,每行开始的空格数已被最小化。
|
|
541
541
|
*
|
|
@@ -550,8 +550,10 @@ class UtilFunc {
|
|
|
550
550
|
* // Hello,
|
|
551
551
|
* // World!
|
|
552
552
|
*/
|
|
553
|
-
static dedent(
|
|
554
|
-
const str =
|
|
553
|
+
static dedent(input, ...values) {
|
|
554
|
+
const str = typeof input === 'string'
|
|
555
|
+
? input
|
|
556
|
+
: input.reduce((result, string, i) => result + string + (values[i] ?? ''), '');
|
|
555
557
|
const lines = str.split('\n');
|
|
556
558
|
const minIndent = Math.min(...lines.filter(line => line.trim() !== '').map(line => line.search(/\S/)));
|
|
557
559
|
return lines.map(line => line.slice(minIndent)).join('\n');
|
package/package.json
CHANGED
package/src/UtilCodecs.ts
CHANGED
|
@@ -6,9 +6,12 @@ import {get_encoding,Tiktoken} from 'tiktoken';
|
|
|
6
6
|
/**编码/解码器 */
|
|
7
7
|
export namespace UtilCodec{
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
let
|
|
9
|
+
/**gpt-4, gpt-3.5-turbo, text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large
|
|
10
|
+
*/
|
|
11
|
+
let encoderCl100kBase:Tiktoken|null = null;
|
|
12
|
+
/**Codex models, text-davinci-002, text-davinci-003
|
|
13
|
+
*/
|
|
14
|
+
let encoderP50kBase:Tiktoken|null = null;
|
|
12
15
|
const textDecoder = new TextDecoder();
|
|
13
16
|
|
|
14
17
|
/**HTML实体解码 将一个字符串中的HTML实体转换为对应的字符
|
|
@@ -36,11 +39,11 @@ export function encodeHtmlEntities(str:string) {
|
|
|
36
39
|
|
|
37
40
|
//避免在nextjs调用时出错
|
|
38
41
|
function initTikTokenEncoder (){
|
|
39
|
-
if(
|
|
42
|
+
if(encoderCl100kBase!=null && encoderP50kBase!=null)
|
|
40
43
|
return;
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
|
|
45
|
+
encoderCl100kBase = get_encoding("cl100k_base");
|
|
46
|
+
encoderP50kBase = get_encoding("p50k_base");
|
|
44
47
|
}
|
|
45
48
|
|
|
46
49
|
/**token长度计算器 Turbo模型
|
|
@@ -50,7 +53,7 @@ function initTikTokenEncoder (){
|
|
|
50
53
|
export function tokenNumTurbo(str:string):number{
|
|
51
54
|
initTikTokenEncoder();
|
|
52
55
|
//return encoder.encode(str).length
|
|
53
|
-
return
|
|
56
|
+
return encoderCl100kBase?.encode(str).length as any as number;
|
|
54
57
|
}
|
|
55
58
|
/**token长度计算器 Davinci模型
|
|
56
59
|
* @param str = 所要计算的消息
|
|
@@ -58,7 +61,7 @@ export function tokenNumTurbo(str:string):number{
|
|
|
58
61
|
*/
|
|
59
62
|
export function tokenNumDavinci(str:string):number{
|
|
60
63
|
initTikTokenEncoder();
|
|
61
|
-
return
|
|
64
|
+
return encoderP50kBase?.encode(str).length as any as number;
|
|
62
65
|
}
|
|
63
66
|
|
|
64
67
|
/**token编码 Turbo模型
|
|
@@ -67,7 +70,7 @@ export function tokenNumDavinci(str:string):number{
|
|
|
67
70
|
*/
|
|
68
71
|
export function encodeTokenTurbo(str:string):Uint32Array{
|
|
69
72
|
initTikTokenEncoder();
|
|
70
|
-
return
|
|
73
|
+
return encoderCl100kBase?.encode(str) as any as Uint32Array
|
|
71
74
|
}
|
|
72
75
|
/**token编码 Davinci模型
|
|
73
76
|
* @param str = 所要计算的消息
|
|
@@ -75,7 +78,7 @@ export function encodeTokenTurbo(str:string):Uint32Array{
|
|
|
75
78
|
*/
|
|
76
79
|
export function encodeTokenDavinci(str:string):Uint32Array{
|
|
77
80
|
initTikTokenEncoder();
|
|
78
|
-
return
|
|
81
|
+
return encoderP50kBase?.encode(str) as any as Uint32Array;
|
|
79
82
|
}
|
|
80
83
|
/**token解码 Turbo模型
|
|
81
84
|
* @param arr = Token数组
|
|
@@ -84,7 +87,7 @@ export function encodeTokenDavinci(str:string):Uint32Array{
|
|
|
84
87
|
export function decodeTokenTurbo(arr:Uint32Array|number[]):string{
|
|
85
88
|
initTikTokenEncoder();
|
|
86
89
|
if(Array.isArray(arr)) arr = new Uint32Array(arr);
|
|
87
|
-
return textDecoder.decode(
|
|
90
|
+
return textDecoder.decode(encoderCl100kBase?.decode(arr));
|
|
88
91
|
}
|
|
89
92
|
/**token解码 Davinci模型
|
|
90
93
|
* @param arr = Token数组
|
|
@@ -93,7 +96,7 @@ export function decodeTokenTurbo(arr:Uint32Array|number[]):string{
|
|
|
93
96
|
export function decodeTokenDavinci(arr:Uint32Array|number[]):string{
|
|
94
97
|
initTikTokenEncoder();
|
|
95
98
|
if(Array.isArray(arr)) arr = new Uint32Array(arr);
|
|
96
|
-
return textDecoder.decode(
|
|
99
|
+
return textDecoder.decode(encoderP50kBase?.decode(arr));
|
|
97
100
|
}
|
|
98
101
|
//#endregion
|
|
99
102
|
}
|
package/src/UtilFunctions.ts
CHANGED
|
@@ -609,7 +609,7 @@ static bindTo<K extends Keyable, V, B extends {} = {}>
|
|
|
609
609
|
}
|
|
610
610
|
/**移除多行字符串中每行开始的最小空格数。
|
|
611
611
|
*
|
|
612
|
-
* @param
|
|
612
|
+
* @param input - 需要处理的多行 字符串模板 或 字符串。
|
|
613
613
|
* @param values - 插入模板字符串中的值。
|
|
614
614
|
* @returns 返回处理后的字符串,每行开始的空格数已被最小化。
|
|
615
615
|
*
|
|
@@ -624,8 +624,10 @@ static bindTo<K extends Keyable, V, B extends {} = {}>
|
|
|
624
624
|
* // Hello,
|
|
625
625
|
* // World!
|
|
626
626
|
*/
|
|
627
|
-
static dedent(
|
|
628
|
-
const str =
|
|
627
|
+
static dedent(input: TemplateStringsArray|string, ...values: any[]): string {
|
|
628
|
+
const str = typeof input === 'string'
|
|
629
|
+
? input
|
|
630
|
+
: input.reduce((result, string, i) => result + string + (values[i] ?? ''), '');
|
|
629
631
|
const lines = str.split('\n');
|
|
630
632
|
const minIndent = Math.min(
|
|
631
633
|
...lines.filter(line => line.trim() !== '').map(line => line.search(/\S/))
|
package/tsconfig.compile.json
DELETED
package/tsconfig.json
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"allowJs": true,
|
|
4
|
-
"strict": true,
|
|
5
|
-
"target": "ES2022",
|
|
6
|
-
"module": "CommonJS",
|
|
7
|
-
"moduleResolution": "node",
|
|
8
|
-
"esModuleInterop": true,
|
|
9
|
-
"outDir": "./dist",
|
|
10
|
-
"declaration": true,
|
|
11
|
-
"baseUrl": ".",
|
|
12
|
-
"emitDecoratorMetadata": true,
|
|
13
|
-
"experimentalDecorators": true,
|
|
14
|
-
"paths": {
|
|
15
|
-
"@src/*": ["./src/*"],
|
|
16
|
-
"@/*" : ["./*"],
|
|
17
|
-
"@" : ["./src/index"]
|
|
18
|
-
}
|
|
19
|
-
},
|
|
20
|
-
"include": ["./src/**/*.ts", "./src/**/*.js","./jest/**/*.ts"],
|
|
21
|
-
"exclude": ["./node_modules/**/*"]
|
|
22
|
-
}
|