@kevisual/ai 0.0.11 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-provider-browser.d.ts +102 -20
- package/dist/ai-provider-browser.js +2056 -7512
- package/dist/ai-provider.d.ts +102 -20
- package/dist/ai-provider.js +220 -5676
- package/package.json +16 -16
- package/src/provider/chat-adapter/kimi.ts +10 -0
- package/src/provider/chat-adapter/ollama.ts +2 -2
- package/src/provider/chat-adapter/siliconflow.ts +1 -1
- package/src/provider/chat-adapter/zhipu.ts +10 -0
- package/src/provider/chat.ts +6 -0
- package/src/provider/core/chat.ts +104 -39
- package/src/provider/core/index.ts +0 -11
- package/src/provider/core/utils/index.ts +192 -0
- package/src/provider/knowledge-adapter/siliconflow.ts +2 -2
- package/src/test/aliyun/test.ts +46 -13
- package/src/provider/utils/chunk.ts +0 -86
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
import { numTokensFromString } from './token.ts';
|
|
2
|
-
|
|
3
|
-
// 常量定义
|
|
4
|
-
const CHUNK_SIZE = 512; // 每个chunk的最大token数
|
|
5
|
-
const MAGIC_SEPARATOR = '🦛';
|
|
6
|
-
const DELIMITER = [',', '.', '!', '?', '\n', ',', '。', '!', '?'];
|
|
7
|
-
const PARAGRAPH_DELIMITER = '\n\n';
|
|
8
|
-
|
|
9
|
-
export interface Chunk {
|
|
10
|
-
chunkId: number;
|
|
11
|
-
text: string;
|
|
12
|
-
tokens: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* 确保每个chunk的大小不超过最大token数
|
|
17
|
-
* @param chunk 输入的文本块
|
|
18
|
-
* @returns 分割后的文本块及其token数的数组
|
|
19
|
-
*/
|
|
20
|
-
function ensureChunkSize(chunk: string): Array<[string, number]> {
|
|
21
|
-
const tokens = numTokensFromString(chunk);
|
|
22
|
-
if (tokens <= CHUNK_SIZE) {
|
|
23
|
-
return [[chunk, tokens]];
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
// 在分隔符后添加魔法分隔符
|
|
27
|
-
let processedChunk = chunk;
|
|
28
|
-
for (const delimiter of DELIMITER) {
|
|
29
|
-
// 转义特殊字符
|
|
30
|
-
const escapedDelimiter = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
31
|
-
processedChunk = processedChunk.replace(new RegExp(escapedDelimiter, 'g'), delimiter + MAGIC_SEPARATOR);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const chunks: Array<[string, number]> = [];
|
|
35
|
-
let tail = '';
|
|
36
|
-
|
|
37
|
-
// 按CHUNK_SIZE分割文本
|
|
38
|
-
for (let i = 0; i < processedChunk.length; i += CHUNK_SIZE) {
|
|
39
|
-
const sentences = (processedChunk.slice(i, i + CHUNK_SIZE) + ' ').split(MAGIC_SEPARATOR);
|
|
40
|
-
const currentChunk = tail + sentences.slice(0, -1).join('');
|
|
41
|
-
if (currentChunk.trim()) {
|
|
42
|
-
const tokenCount = numTokensFromString(currentChunk);
|
|
43
|
-
chunks.push([currentChunk, tokenCount]);
|
|
44
|
-
}
|
|
45
|
-
tail = sentences[sentences.length - 1].trim();
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// 处理最后剩余的tail
|
|
49
|
-
if (tail) {
|
|
50
|
-
const tokenCount = numTokensFromString(tail);
|
|
51
|
-
chunks.push([tail, tokenCount]);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return chunks;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* 将文本分割成chunks
|
|
59
|
-
* @param text 输入文本
|
|
60
|
-
* @returns 分割后的chunks数组
|
|
61
|
-
*/
|
|
62
|
-
export async function getChunks(text: string): Promise<Chunk[]> {
|
|
63
|
-
// 按段落分割文本
|
|
64
|
-
const paragraphs = text
|
|
65
|
-
.split(PARAGRAPH_DELIMITER)
|
|
66
|
-
.map((p) => p.trim())
|
|
67
|
-
.filter((p) => p);
|
|
68
|
-
|
|
69
|
-
const chunks: Chunk[] = [];
|
|
70
|
-
let currentIndex = 0;
|
|
71
|
-
|
|
72
|
-
// 处理每个段落
|
|
73
|
-
for (const paragraph of paragraphs) {
|
|
74
|
-
const splittedParagraph = ensureChunkSize(paragraph);
|
|
75
|
-
for (const [text, tokens] of splittedParagraph) {
|
|
76
|
-
chunks.push({
|
|
77
|
-
chunkId: currentIndex,
|
|
78
|
-
text,
|
|
79
|
-
tokens,
|
|
80
|
-
});
|
|
81
|
-
currentIndex++;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
return chunks;
|
|
86
|
-
}
|