@kevisual/ai 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/ai-provider-browser.d.ts +9 -1
  2. package/dist/ai-provider-browser.js +13 -1
  3. package/dist/ai-provider.d.ts +9 -1
  4. package/dist/ai-provider.js +13 -1
  5. package/package.json +9 -5
  6. package/src/modules/logger.ts +6 -0
  7. package/src/provider/chat-adapter/custom.ts +14 -0
  8. package/src/provider/chat-adapter/dashscope.ts +10 -0
  9. package/src/provider/chat-adapter/deepseek.ts +10 -0
  10. package/src/provider/chat-adapter/model-scope.ts +11 -0
  11. package/src/provider/chat-adapter/ollama.ts +47 -0
  12. package/src/provider/chat-adapter/siliconflow.ts +39 -0
  13. package/src/provider/chat-adapter/volces.ts +10 -0
  14. package/src/provider/chat.ts +67 -0
  15. package/src/provider/core/chat.ts +152 -0
  16. package/src/provider/core/index.ts +27 -0
  17. package/src/provider/core/text-regex.ts +105 -0
  18. package/src/provider/core/type.ts +29 -0
  19. package/src/provider/index.ts +5 -0
  20. package/src/provider/knowledge-adapter/knowledge-base.ts +107 -0
  21. package/src/provider/knowledge-adapter/knowledge.ts +7 -0
  22. package/src/provider/knowledge-adapter/siliconflow.ts +24 -0
  23. package/src/provider/knowledge.ts +6 -0
  24. package/src/provider/media/index.ts +1 -0
  25. package/src/provider/media/video/siliconflow.ts +37 -0
  26. package/src/provider/utils/ai-config-type.ts +52 -0
  27. package/src/provider/utils/chunk.ts +86 -0
  28. package/src/provider/utils/index.ts +2 -0
  29. package/src/provider/utils/parse-config.ts +192 -0
  30. package/src/provider/utils/token.ts +34 -0
  31. package/src/test/chunks/01-get.ts +65 -0
  32. package/src/test/encrypt/index.ts +9 -0
  33. package/src/test/func-call/curl.sh +35 -0
  34. package/src/test/func-call/demo.ts +116 -0
  35. package/src/test/model-scope/index.ts +26 -0
  36. package/src/test/ollama-knowledge.ts +37 -0
  37. package/src/test/ollama.ts +86 -0
  38. package/src/test/provider/index.ts +7 -0
  39. package/src/test/siliconflow/common.ts +15 -0
  40. package/src/test/siliconflow/get.ts +22 -0
  41. package/src/test/siliconflow/knowledge/create.ts +18 -0
  42. package/src/test/siliconflow/knowledge/qwen.md +232 -0
  43. package/src/test/siliconflow/rerank/fc.ts +28 -0
  44. package/src/test/siliconflow/rerank/index.ts +34 -0
  45. package/src/test/siliconflow/videos/index.ts +100 -0
  46. package/src/utils/json.ts +12 -0
@@ -0,0 +1,34 @@
1
+ import { encoding_for_model, get_encoding } from 'tiktoken';
2
+
3
+
4
+ const MODEL_TO_ENCODING = {
5
+ 'gpt-4': 'cl100k_base',
6
+ 'gpt-4-turbo': 'cl100k_base',
7
+ 'gpt-3.5-turbo': 'cl100k_base',
8
+ 'text-embedding-ada-002': 'cl100k_base',
9
+ 'text-davinci-002': 'p50k_base',
10
+ 'text-davinci-003': 'p50k_base',
11
+ } as const;
12
+
13
+ export function numTokensFromString(text: string, model: keyof typeof MODEL_TO_ENCODING = 'gpt-3.5-turbo'): number {
14
+ try {
15
+ // 对于特定模型使用专门的编码器
16
+ const encoder = encoding_for_model(model);
17
+ const tokens = encoder.encode(text);
18
+ const tokenCount = tokens.length;
19
+ encoder.free(); // 释放编码器
20
+ return tokenCount;
21
+ } catch (error) {
22
+ try {
23
+ // 如果模型特定的编码器失败,尝试使用基础编码器
24
+ const encoder = get_encoding(MODEL_TO_ENCODING[model]);
25
+ const tokens = encoder.encode(text);
26
+ const tokenCount = tokens.length;
27
+ encoder.free(); // 释放编码器
28
+ return tokenCount;
29
+ } catch (error) {
30
+ // 如果编码失败,使用一个粗略的估计:平均每个字符0.25个token
31
+ return Math.ceil(text.length * 0.25);
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,65 @@
1
+ import { getChunks } from '../../provider/utils/chunk.ts';
2
+
3
+ const str = 'Hello world this is a test 你好沙盒 very big';
4
+
5
+
6
+ const str2 = `不能直接使用 tiktoken(OpenAI的分词器)来计算 Qwen 模型的 Token 数量,因为两者的分词规则(Tokenization)和词表(Vocabulary)完全不同。
7
+
8
+ 为什么不能混用?
9
+ 词表不同
10
+
11
+ tiktoken 是 OpenAI 为 GPT 系列设计的(如 gpt-3.5-turbo, gpt-4),其词表针对英语和代码优化。
12
+
13
+ Qwen 使用独立训练的 BPE 词表,对中文、多语言的支持更友好,分词粒度可能不同。
14
+
15
+ 分词结果差异大
16
+ 同一段文本,tiktoken 和 Qwen 的分词结果可能完全不同。例如:
17
+
18
+ OpenAI (tiktoken): "你好" → ['你', '好'](2 Tokens)
19
+
20
+ Qwen: "你好" → ['你好'](1 Token,如果词表中包含该组合)
21
+
22
+ 性能问题
23
+ 即使强制使用 tiktoken 计算 Qwen 的 Token,结果也不准确,可能导致:
24
+
25
+ 输入超出模型上下文限制(因统计偏差)。
26
+
27
+ API 计费或本地推理时出现意外错误。
28
+
29
+ 正确方法:用 Qwen 的分词器
30
+ 通过 Hugging Face transformers 加载 Qwen 的原生分词器:
31
+
32
+ python
33
+ 复制
34
+ from transformers import AutoTokenizer
35
+
36
+ # 加载 Qwen 的分词器(以 Qwen-7B 为例)
37
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True)
38
+
39
+ text = "你好,Qwen模型!"
40
+ tokens = tokenizer.tokenize(text) # 查看分词结果
41
+ token_count = len(tokenizer.encode(text, add_special_tokens=False))
42
+
43
+ print("分词结果:", tokens)
44
+ print("Token数量:", token_count)
45
+ 常见问题
46
+ 为什么需要 trust_remote_code=True?
47
+ Qwen 的分词器是自定义实现的(非 Hugging Face 原生),此参数允许从模型仓库加载运行代码。
48
+
49
+ 其他语言的 Token 计算?
50
+ Qwen 对非英语(如中文、日文)的分词效率较高,但仍需用其原生分词器统计。
51
+
52
+ 与 tiktoken 的速度对比?
53
+ tiktoken 是纯 Python 实现,速度较快;Qwen 的分词器基于 Hugging Face,可能稍慢但对齐模型需求。
54
+
55
+ 总结
56
+ 禁止混用:tiktoken ≠ Qwen 分词器。
57
+
58
+ 始终使用模型配套工具:Qwen 需通过 transformers 加载其官方分词器。
59
+
60
+ 中文场景特别注意:Qwen 对中文的分词更高效,直接使用可避免偏差。
61
+
62
+ 如果需要验证分词规则,可通过 tokenizer.vocab 查看词表内容(但注意词表通常较大)。`
63
+
64
+ const chunks = getChunks(str2);
65
+ console.log(chunks);
@@ -0,0 +1,9 @@
1
+ import { encryptAES, decryptAES } from '../..//provider/utils/parse-config.ts';
2
+
3
+ const plainx = process.env.API_KEY;
4
+ const decryptKey = process.env.DECRYPT_KEY;
5
+ const encrypt = encryptAES(plainx, decryptKey);
6
+ console.log('encrypt', encrypt);
7
+
8
+ const decrypt = decryptAES(encrypt, decryptKey);
9
+ console.log(decrypt);
@@ -0,0 +1,35 @@
1
+ curl --request POST \
2
+ --url https://api.siliconflow.cn/v1/chat/completions \
3
+ --header 'Authorization: Bearer sk-qbiigkzoaamuqxtwlgkugodncebkfbosemadfubjrseobpvx' \
4
+ --header 'Content-Type: application/json' \
5
+ --data '{
6
+ "model": "Qwen/Qwen3-14B",
7
+ "messages": [
8
+ {
9
+ "role": "user",
10
+ "content": "计算a+b的值"
11
+ }
12
+ ],
13
+ "stream": false,
14
+ "max_tokens": 512,
15
+ "stop": null,
16
+ "temperature": 0.7,
17
+ "top_p": 0.7,
18
+ "top_k": 50,
19
+ "frequency_penalty": 0.5,
20
+ "n": 1,
21
+ "response_format": {
22
+ "type": "text"
23
+ },
24
+ "tools": [
25
+ {
26
+ "type": "function",
27
+ "function": {
28
+ "description": "计算a,b,c算法的值,a=1,b=2,c=3",
29
+ "name": "compouted",
30
+ "parameters": {},
31
+ "strict": false
32
+ }
33
+ }
34
+ ]
35
+ }'
@@ -0,0 +1,116 @@
1
+ import { SiliconFlow } from '../..//provider/chat-adapter/siliconflow.ts';
2
+ import { Ollama } from '../..//provider/chat-adapter/ollama.ts';
3
+ import dotenv from 'dotenv';
4
+
5
+ dotenv.config();
6
+ const siliconflow = new SiliconFlow({
7
+ apiKey: process.env.SILICONFLOW_API_KEY,
8
+ model: 'Qwen/Qwen3-14B',
9
+ });
10
+ const ollama = new Ollama({
11
+ model: 'qwen3:32b',
12
+ apiKey: process.env.OLLAMA_API_KEY,
13
+ baseURL: process.env.OLLAMA_BASE_URL,
14
+ });
15
+ const main = async () => {
16
+ const usage = await siliconflow.getUsageInfo();
17
+ console.log(usage);
18
+ };
19
+ // 1. 定义工具函数
20
+ const availableFunctions: Record<string, (args: any) => Promise<any>> = {
21
+ get_time: async (args: { location: string }) => {
22
+ // 模拟API调用
23
+ console.log('time', args);
24
+ return {
25
+ time: '2022-03-22 12:00:00',
26
+ };
27
+ },
28
+ get_location: async (args: { symbol: string }) => {
29
+ // 模拟API调用
30
+ console.log('location', args);
31
+ return {
32
+ city: 'Beijing',
33
+ };
34
+ },
35
+ };
36
+
37
+ // main();
38
+ const funcCall = async (model = siliconflow) => {
39
+ const tools = [
40
+ {
41
+ type: 'function',
42
+ function: {
43
+ name: 'get_time',
44
+ description: '获取当前时间',
45
+ parameters: {
46
+ type: 'object',
47
+ properties: {
48
+ place: {
49
+ type: 'string',
50
+ description: '位置',
51
+ },
52
+ },
53
+ required: ['place'],
54
+ },
55
+ },
56
+ },
57
+ {
58
+ type: 'function',
59
+ function: {
60
+ name: 'get_location',
61
+ description: '获取当前位置',
62
+ // parameters: {},
63
+ parameters: {},
64
+ strict: false,
65
+ },
66
+ },
67
+ ];
68
+ const messages: any[] = [{ role: 'user', content: '获取当前位置的当前时间' }];
69
+ const res = await model.chat(messages, {
70
+ tools: tools as any,
71
+ });
72
+ console.log(res.choices[0]);
73
+ const assistantMessage = res.choices[0].message;
74
+ const finish_reason = res.choices[0].finish_reason;
75
+ messages.push(assistantMessage);
76
+ let toolCalls = assistantMessage.tool_calls;
77
+ console.log("toolCalls", JSON.stringify(toolCalls));
78
+ let maxRetries = 3;
79
+ while (toolCalls && toolCalls.length > 0) {
80
+ // 处理每个函数调用
81
+ for (const toolCall of toolCalls) {
82
+ const functionName = toolCall.function.name;
83
+ const functionArgs = JSON.parse(toolCall.function.arguments);
84
+ // 调用本地函数
85
+ const functionResponse = await availableFunctions[functionName](functionArgs);
86
+ // 将结果添加到消息历史
87
+ messages.push({
88
+ role: 'tool',
89
+ name: functionName,
90
+ content: JSON.stringify(functionResponse),
91
+ tool_call_id: toolCall.id,
92
+ });
93
+ }
94
+
95
+ // 第二次调用 - 将函数结果发送给模型获取最终回复
96
+ const secondResponse = await model.chat(messages, {
97
+ tools: tools as any,
98
+ });
99
+
100
+ const finalMessage = secondResponse.choices[0].message;
101
+ messages.push(finalMessage);
102
+ const _toolCalls = finalMessage.tool_calls;
103
+ console.log("toolCalls", JSON.stringify(toolCalls) ,finalMessage.role);
104
+ toolCalls = _toolCalls ? _toolCalls : [];
105
+ maxRetries--;
106
+ if (maxRetries <= 0) {
107
+ break;
108
+ }
109
+
110
+ console.log('tool calls', toolCalls);
111
+ }
112
+
113
+ console.log(messages);
114
+ };
115
+
116
+ funcCall(ollama as any);
@@ -0,0 +1,26 @@
1
+ import { ModelScope } from '../..//provider/chat-adapter/model-scope.ts';
2
+ import { log } from '../..//logger/index.ts';
3
+ import util from 'util';
4
+ import { config } from 'dotenv';
5
+ config();
6
+
7
+ const chat = new ModelScope({
8
+ apiKey: process.env.MODEL_SCOPE_API_KEY,
9
+ model: 'Qwen/Qwen2.5-Coder-32B-Instruct',
10
+ });
11
+
12
+ // chat.chat([{ role: 'user', content: 'Hello, world! 1 + 1 equals ?' }]);
13
+ const chatMessage = [{ role: 'user', content: 'Hello, world! 1 + 1 equals ?' }];
14
+
15
+ const main = async () => {
16
+ const res = await chat.test();
17
+ log.info('test', res);
18
+ };
19
+
20
+ main();
21
+ const mainChat = async () => {
22
+ const res = await chat.chat(chatMessage as any);
23
+ log.info('chat', res);
24
+ };
25
+
26
+ // mainChat();
@@ -0,0 +1,37 @@
1
+ import { Knowledge } from '../../../../src/provider/knowledge/knowledge.ts';
2
+ import fs from 'fs';
3
+ import dotenv from 'dotenv';
4
+
5
+ dotenv.config();
6
+ const knowledge = new Knowledge({
7
+ embeddingModel: 'bge-m3:latest',
8
+ baseURL: 'https://ollama.xiongxiao.me/v1',
9
+ model: 'qwq:latest',
10
+ apiKey: process.env.OLLAMA_API_KEY,
11
+ });
12
+
13
+ const main = async () => {
14
+ const res = await knowledge.generateEmbeddingCore('Hello world this is a test 你好沙盒 very big');
15
+ fs.writeFileSync('docs/embedding.json', JSON.stringify(res, null, 2));
16
+ console.log(res);
17
+ };
18
+
19
+ main();
20
+
21
+ const main2 = async () => {
22
+ const text1 = 'Hello, world! this is a test';
23
+ const text2 = 'Hello, world! this is a test 2';
24
+ const text3 = 'Hello, world! this is a test 3';
25
+ const text4 = 'Hello, world! this is a test 4';
26
+ const text5 = 'Hello, world! this is a test 5';
27
+ const text6 = 'Hello, world! this is a test 6';
28
+ const text7 = 'Hello, world! this is a test 7';
29
+ const text8 = 'Hello, world! this is a test 8';
30
+ const text9 = 'Hello, world! this is a test 9';
31
+ const text10 = 'Hello, world! this is a test 10';
32
+ const res = await knowledge.generateEmbeddingCore([text1, text2, text3, text4, text5, text6, text7, text8, text9, text10]);
33
+ fs.writeFileSync('docs/embedding2.json', JSON.stringify(res, null, 2));
34
+ console.log(res);
35
+ };
36
+
37
+ // main2();
@@ -0,0 +1,86 @@
1
+ import { Ollama } from '../../../../src/provider/chat-adapter/ollama.ts';
2
+ import util from 'util';
3
+ const chat = new Ollama({
4
+ baseURL: 'https://ollama.xiongxiao.me/v1',
5
+ apiKey: 'xiongxiao2233',
6
+ model: 'qwq:latest',
7
+ });
8
+
9
+ // chat.chat([{ role: 'user', content: 'Hello, world!' }]);
10
+
11
+ const main = async () => {
12
+ const res = await chat.test();
13
+ console.log(util.inspect(res, { depth: null, colors: true }));
14
+ };
15
+
16
+ // main();
17
+
18
+ const getJson = async () => {
19
+ const res = await chat.chat(
20
+ [
21
+ { role: 'system', content: '把发送的数据,返回给我对应的json,只处理完发送的数据。如果发送了多个,给我一个数组' },
22
+ // { role: 'user', content: '{"name":"John","age":30}' },
23
+ { role: 'user', content: 'name: 张三' },
24
+ { role: 'user', content: 'name: 李四, age: 18' },
25
+ ],
26
+ {
27
+ response_format: {
28
+ type: 'json_schema',
29
+ json_schema: {
30
+ name: 'user',
31
+ description: '用户信息',
32
+ schema: {
33
+ type: 'object',
34
+ // properties: {
35
+ // name: { type: 'string' },
36
+ // // age: { type: 'number' },
37
+ // },
38
+ // // required: ['name', 'age'],
39
+ // required: ['name'],
40
+ properties: {
41
+ name: { type: 'string' },
42
+ age: { type: 'number' },
43
+ },
44
+ required: ['name', 'age'],
45
+ },
46
+ },
47
+ },
48
+ n: 10,
49
+ },
50
+ );
51
+ console.log(util.inspect(res, { depth: null, colors: true }));
52
+ };
53
+
54
+ // getJson();
55
+
56
+ const createChat1 = async () => {
57
+ const res = await chat.chat(
58
+ [
59
+ { role: 'user', content: 'a=1, b=2, c=3' },
60
+ { role: 'user', content: 'a+b+c=?' },
61
+ { role: 'assistant', content: '给定的值为 \\( a = 1 \\), \\( b = 2 \\), \\( c = 3 \\)。\n' + '\n' + '因此,\\( a + b + c = 1 + 2 + 3 = 6 \\)。' },
62
+ { role: 'user', content: 'a+b+c+4=?' },
63
+ ],
64
+ {
65
+ model: 'qwen2.5:7b',
66
+ },
67
+ );
68
+ console.log(util.inspect(res, { depth: null, colors: true }));
69
+ };
70
+
71
+ // createChat1();
72
+
73
+ const getTags = async () => {
74
+ const res = await chat.listModels();
75
+ console.log(util.inspect(res, { depth: null, colors: true }));
76
+ };
77
+
78
+ // getTags();
79
+
80
+ const getRunModels = async () => {
81
+ const res = await chat.listRunModels();
82
+ console.log('current', new Date().toISOString());
83
+ console.log(util.inspect(res, { depth: null, colors: true }));
84
+ };
85
+
86
+ // getRunModels();
@@ -0,0 +1,7 @@
1
+ import { ProviderManager } from '../..//provider/index.ts';
2
+ import { config } from 'dotenv';
3
+ config();
4
+ const providerConfig = { provider: 'ModelScope', model: 'Qwen/Qwen2.5-Coder-32B-Instruct', apiKey: process.env.MODEL_SCOPE_API_KEY };
5
+ const provider = await ProviderManager.createProvider(providerConfig);
6
+ const result = await provider.chat([{ role: 'user', content: '你好' }]);
7
+ console.log(result);
@@ -0,0 +1,15 @@
1
+ import { SiliconFlow } from '../../../src/provider/chat-adapter/siliconflow.ts';
2
+ import { SiliconFlowKnowledge } from '../../provider/knowledge-adapter/siliconflow.ts';
3
+ import dotenv from 'dotenv';
4
+
5
+ dotenv.config();
6
+ export const siliconflow = new SiliconFlow({
7
+ apiKey: process.env.SILICONFLOW_API_KEY,
8
+ model: 'Qwen/Qwen2-7B-Instruct',
9
+ });
10
+
11
+ export const knowledge = new SiliconFlowKnowledge({
12
+ apiKey: process.env.SILICONFLOW_API_KEY,
13
+ model: 'Qwen/Qwen2-7B-Instruct',
14
+ embeddingModel: 'Pro/BAAI/bge-m3',
15
+ });
@@ -0,0 +1,22 @@
1
+ import { SiliconFlow } from '../..//provider/chat-adapter/siliconflow.ts';
2
+ import dotenv from 'dotenv';
3
+
4
+ dotenv.config();
5
+ const siliconflow = new SiliconFlow({
6
+ apiKey: process.env.SILICONFLOW_API_KEY,
7
+ model: 'Qwen/Qwen2-7B-Instruct',
8
+ });
9
+
10
+
11
+ const main = async () => {
12
+ const usage = await siliconflow.getUsageInfo();
13
+ console.log(usage);
14
+ };
15
+
16
+ main();
17
+ const mainChat = async () => {
18
+ const res = await siliconflow.chat([{ role: 'user', content: 'Hello, world! 1 + 1 equals ?' }]);
19
+ console.log(res);
20
+ };
21
+
22
+ // mainChat();
@@ -0,0 +1,18 @@
1
+ import { knowledge } from '../common.ts';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import { fileURLToPath } from 'url';
5
+ import { dirname } from 'path';
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = dirname(__filename);
9
+ // 包含: 9184 个汉字 953 个标点(全角) 2493 个字母 52 个数字
10
+ const content = fs.readFileSync(path.join(__dirname, 'qwen.md'), 'utf-8');
11
+ const text = 'Hello, world';
12
+ const main = async () => {
13
+ const res = await knowledge.generateEmbeddingCore([content, content]);
14
+ console.log(res);
15
+ // 8000 tokens 大概1w个字 2万个字符
16
+ console.log('speak', knowledge.getChatUsage());
17
+ };
18
+ main();