@kevisual/ai 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-provider-browser.d.ts +9 -1
- package/dist/ai-provider-browser.js +13 -1
- package/dist/ai-provider.d.ts +9 -1
- package/dist/ai-provider.js +13 -1
- package/package.json +9 -5
- package/src/modules/logger.ts +6 -0
- package/src/provider/chat-adapter/custom.ts +14 -0
- package/src/provider/chat-adapter/dashscope.ts +10 -0
- package/src/provider/chat-adapter/deepseek.ts +10 -0
- package/src/provider/chat-adapter/model-scope.ts +11 -0
- package/src/provider/chat-adapter/ollama.ts +47 -0
- package/src/provider/chat-adapter/siliconflow.ts +39 -0
- package/src/provider/chat-adapter/volces.ts +10 -0
- package/src/provider/chat.ts +67 -0
- package/src/provider/core/chat.ts +152 -0
- package/src/provider/core/index.ts +27 -0
- package/src/provider/core/text-regex.ts +105 -0
- package/src/provider/core/type.ts +29 -0
- package/src/provider/index.ts +5 -0
- package/src/provider/knowledge-adapter/knowledge-base.ts +107 -0
- package/src/provider/knowledge-adapter/knowledge.ts +7 -0
- package/src/provider/knowledge-adapter/siliconflow.ts +24 -0
- package/src/provider/knowledge.ts +6 -0
- package/src/provider/media/index.ts +1 -0
- package/src/provider/media/video/siliconflow.ts +37 -0
- package/src/provider/utils/ai-config-type.ts +52 -0
- package/src/provider/utils/chunk.ts +86 -0
- package/src/provider/utils/index.ts +2 -0
- package/src/provider/utils/parse-config.ts +192 -0
- package/src/provider/utils/token.ts +34 -0
- package/src/test/chunks/01-get.ts +65 -0
- package/src/test/encrypt/index.ts +9 -0
- package/src/test/func-call/curl.sh +35 -0
- package/src/test/func-call/demo.ts +116 -0
- package/src/test/model-scope/index.ts +26 -0
- package/src/test/ollama-knowledge.ts +37 -0
- package/src/test/ollama.ts +86 -0
- package/src/test/provider/index.ts +7 -0
- package/src/test/siliconflow/common.ts +15 -0
- package/src/test/siliconflow/get.ts +22 -0
- package/src/test/siliconflow/knowledge/create.ts +18 -0
- package/src/test/siliconflow/knowledge/qwen.md +232 -0
- package/src/test/siliconflow/rerank/fc.ts +28 -0
- package/src/test/siliconflow/rerank/index.ts +34 -0
- package/src/test/siliconflow/videos/index.ts +100 -0
- package/src/utils/json.ts +12 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
// Updated: Aug. 20, 2024
|
|
2
|
+
// Live demo: https://jina.ai/tokenizer
|
|
3
|
+
// LICENSE: Apache-2.0 (https://www.apache.org/licenses/LICENSE-2.0)
|
|
4
|
+
// COPYRIGHT: Jina AI
|
|
5
|
+
|
|
6
|
+
// Define variables for magic numbers
|
|
7
|
+
const MAX_HEADING_LENGTH = 7;
|
|
8
|
+
const MAX_HEADING_CONTENT_LENGTH = 200;
|
|
9
|
+
const MAX_HEADING_UNDERLINE_LENGTH = 200;
|
|
10
|
+
const MAX_HTML_HEADING_ATTRIBUTES_LENGTH = 100;
|
|
11
|
+
const MAX_LIST_ITEM_LENGTH = 200;
|
|
12
|
+
const MAX_NESTED_LIST_ITEMS = 6;
|
|
13
|
+
const MAX_LIST_INDENT_SPACES = 7;
|
|
14
|
+
const MAX_BLOCKQUOTE_LINE_LENGTH = 200;
|
|
15
|
+
const MAX_BLOCKQUOTE_LINES = 15;
|
|
16
|
+
const MAX_CODE_BLOCK_LENGTH = 1500;
|
|
17
|
+
const MAX_CODE_LANGUAGE_LENGTH = 20;
|
|
18
|
+
const MAX_INDENTED_CODE_LINES = 20;
|
|
19
|
+
const MAX_TABLE_CELL_LENGTH = 200;
|
|
20
|
+
const MAX_TABLE_ROWS = 20;
|
|
21
|
+
const MAX_HTML_TABLE_LENGTH = 2000;
|
|
22
|
+
const MIN_HORIZONTAL_RULE_LENGTH = 3;
|
|
23
|
+
const MAX_SENTENCE_LENGTH = 400;
|
|
24
|
+
const MAX_QUOTED_TEXT_LENGTH = 300;
|
|
25
|
+
const MAX_PARENTHETICAL_CONTENT_LENGTH = 200;
|
|
26
|
+
const MAX_NESTED_PARENTHESES = 5;
|
|
27
|
+
const MAX_MATH_INLINE_LENGTH = 100;
|
|
28
|
+
const MAX_MATH_BLOCK_LENGTH = 500;
|
|
29
|
+
const MAX_PARAGRAPH_LENGTH = 1000;
|
|
30
|
+
const MAX_STANDALONE_LINE_LENGTH = 800;
|
|
31
|
+
const MAX_HTML_TAG_ATTRIBUTES_LENGTH = 100;
|
|
32
|
+
const MAX_HTML_TAG_CONTENT_LENGTH = 1000;
|
|
33
|
+
const LOOKAHEAD_RANGE = 100; // Number of characters to look ahead for a sentence boundary
|
|
34
|
+
|
|
35
|
+
const AVOID_AT_START = `[\\s\\]})>,']`;
|
|
36
|
+
const PUNCTUATION = `[.!?…]|\\.{3}|[\\u2026\\u2047-\\u2049]|[\\p{Emoji_Presentation}\\p{Extended_Pictographic}]`;
|
|
37
|
+
const QUOTE_END = `(?:'(?=\`)|''(?=\`\`))`;
|
|
38
|
+
const SENTENCE_END = `(?:${PUNCTUATION}(?<!${AVOID_AT_START}(?=${PUNCTUATION}))|${QUOTE_END})(?=\\S|$)`;
|
|
39
|
+
const SENTENCE_BOUNDARY = `(?:${SENTENCE_END}|(?=[\\r\\n]|$))`;
|
|
40
|
+
const LOOKAHEAD_PATTERN = `(?:(?!${SENTENCE_END}).){1,${LOOKAHEAD_RANGE}}${SENTENCE_END}`;
|
|
41
|
+
const NOT_PUNCTUATION_SPACE = `(?!${PUNCTUATION}\\s)`;
|
|
42
|
+
const SENTENCE_PATTERN = `${NOT_PUNCTUATION_SPACE}(?:[^\\r\\n]{1,{MAX_LENGTH}}${SENTENCE_BOUNDARY}|[^\\r\\n]{1,{MAX_LENGTH}}(?=${PUNCTUATION}|${QUOTE_END})(?:${LOOKAHEAD_PATTERN})?)${AVOID_AT_START}*`;
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
export const textSplitter = new RegExp(
|
|
46
|
+
"(" +
|
|
47
|
+
// 1. Headings (Setext-style, Markdown, and HTML-style, with length constraints)
|
|
48
|
+
`(?:^(?:[#*=-]{1,${MAX_HEADING_LENGTH}}|\\w[^\\r\\n]{0,${MAX_HEADING_CONTENT_LENGTH}}\\r?\\n[-=]{2,${MAX_HEADING_UNDERLINE_LENGTH}}|<h[1-6][^>]{0,${MAX_HTML_HEADING_ATTRIBUTES_LENGTH}}>)[^\\r\\n]{1,${MAX_HEADING_CONTENT_LENGTH}}(?:</h[1-6]>)?(?:\\r?\\n|$))` +
|
|
49
|
+
"|" +
|
|
50
|
+
// New pattern for citations
|
|
51
|
+
`(?:\\[[0-9]+\\][^\\r\\n]{1,${MAX_STANDALONE_LINE_LENGTH}})` +
|
|
52
|
+
"|" +
|
|
53
|
+
// 2. List items (bulleted, numbered, lettered, or task lists, including nested, up to three levels, with length constraints)
|
|
54
|
+
`(?:(?:^|\\r?\\n)[ \\t]{0,3}(?:[-*+•]|\\d{1,3}\\.\\w\\.|\\[[ xX]\\])[ \\t]+${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_LIST_ITEM_LENGTH))}` +
|
|
55
|
+
`(?:(?:\\r?\\n[ \\t]{2,5}(?:[-*+•]|\\d{1,3}\\.\\w\\.|\\[[ xX]\\])[ \\t]+${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_LIST_ITEM_LENGTH))}){0,${MAX_NESTED_LIST_ITEMS}}` +
|
|
56
|
+
`(?:\\r?\\n[ \\t]{4,${MAX_LIST_INDENT_SPACES}}(?:[-*+•]|\\d{1,3}\\.\\w\\.|\\[[ xX]\\])[ \\t]+${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_LIST_ITEM_LENGTH))}){0,${MAX_NESTED_LIST_ITEMS}})?)` +
|
|
57
|
+
"|" +
|
|
58
|
+
// 3. Block quotes (including nested quotes and citations, up to three levels, with length constraints)
|
|
59
|
+
`(?:(?:^>(?:>|\\s{2,}){0,2}${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_BLOCKQUOTE_LINE_LENGTH))}\\r?\\n?){1,${MAX_BLOCKQUOTE_LINES}})` +
|
|
60
|
+
"|" +
|
|
61
|
+
// 4. Code blocks (fenced, indented, or HTML pre/code tags, with length constraints)
|
|
62
|
+
`(?:(?:^|\\r?\\n)(?:\`\`\`|~~~)(?:\\w{0,${MAX_CODE_LANGUAGE_LENGTH}})?\\r?\\n[\\s\\S]{0,${MAX_CODE_BLOCK_LENGTH}}?(?:\`\`\`|~~~)\\r?\\n?` +
|
|
63
|
+
`|(?:(?:^|\\r?\\n)(?: {4}|\\t)[^\\r\\n]{0,${MAX_LIST_ITEM_LENGTH}}(?:\\r?\\n(?: {4}|\\t)[^\\r\\n]{0,${MAX_LIST_ITEM_LENGTH}}){0,${MAX_INDENTED_CODE_LINES}}\\r?\\n?)` +
|
|
64
|
+
`|(?:<pre>(?:<code>)?[\\s\\S]{0,${MAX_CODE_BLOCK_LENGTH}}?(?:</code>)?</pre>))` +
|
|
65
|
+
"|" +
|
|
66
|
+
// 5. Tables (Markdown, grid tables, and HTML tables, with length constraints)
|
|
67
|
+
`(?:(?:^|\\r?\\n)(?:\\|[^\\r\\n]{0,${MAX_TABLE_CELL_LENGTH}}\\|(?:\\r?\\n\\|[-:]{1,${MAX_TABLE_CELL_LENGTH}}\\|){0,1}(?:\\r?\\n\\|[^\\r\\n]{0,${MAX_TABLE_CELL_LENGTH}}\\|){0,${MAX_TABLE_ROWS}}` +
|
|
68
|
+
`|<table>[\\s\\S]{0,${MAX_HTML_TABLE_LENGTH}}?</table>))` +
|
|
69
|
+
"|" +
|
|
70
|
+
// 6. Horizontal rules (Markdown and HTML hr tag)
|
|
71
|
+
`(?:^(?:[-*_]){${MIN_HORIZONTAL_RULE_LENGTH},}\\s*$|<hr\\s*/?>)` +
|
|
72
|
+
"|" +
|
|
73
|
+
// 10. Standalone lines or phrases (including single-line blocks and HTML elements, with length constraints)
|
|
74
|
+
`(?!${AVOID_AT_START})(?:^(?:<[a-zA-Z][^>]{0,${MAX_HTML_TAG_ATTRIBUTES_LENGTH}}>)?${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_STANDALONE_LINE_LENGTH))}(?:</[a-zA-Z]+>)?(?:\\r?\\n|$))` +
|
|
75
|
+
"|" +
|
|
76
|
+
// 7. Sentences or phrases ending with punctuation (including ellipsis and Unicode punctuation)
|
|
77
|
+
`(?!${AVOID_AT_START})${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_SENTENCE_LENGTH))}` +
|
|
78
|
+
"|" +
|
|
79
|
+
// 8. Quoted text, parenthetical phrases, or bracketed content (with length constraints)
|
|
80
|
+
"(?:" +
|
|
81
|
+
`(?<!\\w)\"\"\"[^\"]{0,${MAX_QUOTED_TEXT_LENGTH}}\"\"\"(?!\\w)` +
|
|
82
|
+
`|(?<!\\w)(?:['\"\`'"])[^\\r\\n]{0,${MAX_QUOTED_TEXT_LENGTH}}\\1(?!\\w)` +
|
|
83
|
+
`|(?<!\\w)\`[^\\r\\n]{0,${MAX_QUOTED_TEXT_LENGTH}}'(?!\\w)` +
|
|
84
|
+
`|(?<!\\w)\`\`[^\\r\\n]{0,${MAX_QUOTED_TEXT_LENGTH}}''(?!\\w)` +
|
|
85
|
+
`|\\([^\\r\\n()]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}(?:\\([^\\r\\n()]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}\\)[^\\r\\n()]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}){0,${MAX_NESTED_PARENTHESES}}\\)` +
|
|
86
|
+
`|\\[[^\\r\\n\\[\\]]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}(?:\\[[^\\r\\n\\[\\]]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}\\][^\\r\\n\\[\\]]{0,${MAX_PARENTHETICAL_CONTENT_LENGTH}}){0,${MAX_NESTED_PARENTHESES}}\\]` +
|
|
87
|
+
`|\\$[^\\r\\n$]{0,${MAX_MATH_INLINE_LENGTH}}\\$` +
|
|
88
|
+
`|\`[^\`\\r\\n]{0,${MAX_MATH_INLINE_LENGTH}}\`` +
|
|
89
|
+
")" +
|
|
90
|
+
"|" +
|
|
91
|
+
// 9. Paragraphs (with length constraints)
|
|
92
|
+
`(?!${AVOID_AT_START})(?:(?:^|\\r?\\n\\r?\\n)(?:<p>)?${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_PARAGRAPH_LENGTH))}(?:</p>)?(?=\\r?\\n\\r?\\n|$))` +
|
|
93
|
+
"|" +
|
|
94
|
+
// 11. HTML-like tags and their content (including self-closing tags and attributes, with length constraints)
|
|
95
|
+
`(?:<[a-zA-Z][^>]{0,${MAX_HTML_TAG_ATTRIBUTES_LENGTH}}(?:>[\\s\\S]{0,${MAX_HTML_TAG_CONTENT_LENGTH}}?</[a-zA-Z]+>|\\s*/>))` +
|
|
96
|
+
"|" +
|
|
97
|
+
// 12. LaTeX-style math expressions (inline and block, with length constraints)
|
|
98
|
+
`(?:(?:\\$\\$[\\s\\S]{0,${MAX_MATH_BLOCK_LENGTH}}?\\$\\$)|(?:\\$[^\\$\\r\\n]{0,${MAX_MATH_INLINE_LENGTH}}\\$))` +
|
|
99
|
+
"|" +
|
|
100
|
+
// 14. Fallback for any remaining content (with length constraints)
|
|
101
|
+
`(?!${AVOID_AT_START})${SENTENCE_PATTERN.replace(/{MAX_LENGTH}/g, String(MAX_STANDALONE_LINE_LENGTH))}` +
|
|
102
|
+
")",
|
|
103
|
+
"gmu"
|
|
104
|
+
);
|
|
105
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
|
|
3
|
+
export type ChatMessage = OpenAI.Chat.Completions.ChatCompletionMessageParam;
|
|
4
|
+
export type ChatMessageOptions = Partial<OpenAI.Chat.Completions.ChatCompletionCreateParams>;
|
|
5
|
+
export type ChatMessageComplete = OpenAI.Chat.Completions.ChatCompletion;
|
|
6
|
+
export type ChatMessageStream = OpenAI.Chat.Completions.ChatCompletion;
|
|
7
|
+
|
|
8
|
+
export type EmbeddingMessage = Partial<OpenAI.Embeddings.EmbeddingCreateParams>;
|
|
9
|
+
export type EmbeddingMessageComplete = OpenAI.Embeddings.CreateEmbeddingResponse;
|
|
10
|
+
export interface BaseChatInterface {
|
|
11
|
+
chat(messages: ChatMessage[], options?: ChatMessageOptions): Promise<ChatMessageComplete>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface BaseChatUsageInterface {
|
|
15
|
+
/**
|
|
16
|
+
* 提示词令牌
|
|
17
|
+
*/
|
|
18
|
+
prompt_tokens: number;
|
|
19
|
+
/**
|
|
20
|
+
* 总令牌
|
|
21
|
+
*/
|
|
22
|
+
total_tokens: number;
|
|
23
|
+
/**
|
|
24
|
+
* 完成令牌
|
|
25
|
+
*/
|
|
26
|
+
completion_tokens: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export type ChatStream = AsyncGenerator<ChatMessageComplete, void, unknown>;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { BaseChat, BaseChatOptions } from '../core/chat.ts';
|
|
2
|
+
import { EmbeddingMessage } from '../core/type.ts';
|
|
3
|
+
|
|
4
|
+
export type KnowledgeOptions<T = Record<string, any>> = BaseChatOptions<
|
|
5
|
+
{
|
|
6
|
+
embeddingModel?: string;
|
|
7
|
+
splitSize?: number; // 分块大小 默认 2000
|
|
8
|
+
splitOverlap?: number; // 分块重叠 默认 200
|
|
9
|
+
batchSize?: number; // 批量大小 默认 4, 4*2000=8000
|
|
10
|
+
} & T
|
|
11
|
+
>;
|
|
12
|
+
/**
|
|
13
|
+
* 知识库构建
|
|
14
|
+
* 1. Embedding generate
|
|
15
|
+
* 2. retriever
|
|
16
|
+
* 3. reranker
|
|
17
|
+
*/
|
|
18
|
+
export class KnowledgeBase extends BaseChat {
|
|
19
|
+
embeddingModel: string;
|
|
20
|
+
splitSize: number;
|
|
21
|
+
splitOverlap: number;
|
|
22
|
+
batchSize: number;
|
|
23
|
+
constructor(options: KnowledgeOptions) {
|
|
24
|
+
super(options);
|
|
25
|
+
this.embeddingModel = options.embeddingModel;
|
|
26
|
+
this.splitSize = options.splitSize || 2000;
|
|
27
|
+
this.splitOverlap = options.splitOverlap || 200;
|
|
28
|
+
this.prompt_tokens = 0;
|
|
29
|
+
this.total_tokens = 0;
|
|
30
|
+
this.batchSize = options.batchSize || 4;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* 生成embedding
|
|
34
|
+
* @param text
|
|
35
|
+
* @returns
|
|
36
|
+
*/
|
|
37
|
+
async generateEmbedding(text: string | string[]) {
|
|
38
|
+
try {
|
|
39
|
+
const res = await this.generateEmbeddingCore(text, { model: this.embeddingModel });
|
|
40
|
+
return { code: 200, data: res.data };
|
|
41
|
+
} catch (error) {
|
|
42
|
+
const has413 = error?.message?.includes('413');
|
|
43
|
+
if (has413) {
|
|
44
|
+
return {
|
|
45
|
+
code: 413,
|
|
46
|
+
message: '请求过大,请分割文本',
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
code: error?.code || 500,
|
|
51
|
+
message: '生成embedding失败',
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* 批量生成embedding
|
|
57
|
+
* @param text
|
|
58
|
+
* @returns
|
|
59
|
+
*/
|
|
60
|
+
async generateEmbeddingBatch(textArray: string[]) {
|
|
61
|
+
const batchSize = this.batchSize || 4;
|
|
62
|
+
const embeddings: number[][] = [];
|
|
63
|
+
for (let i = 0; i < textArray.length; i += batchSize) {
|
|
64
|
+
const batch = textArray.slice(i, i + batchSize);
|
|
65
|
+
const res = await this.generateEmbedding(batch);
|
|
66
|
+
if (res.code === 200) {
|
|
67
|
+
embeddings.push(...res.data.map((item) => item.embedding));
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return embeddings;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* 分割长文本, 生成对应的embedding
|
|
74
|
+
* @param text
|
|
75
|
+
* @returns
|
|
76
|
+
*/
|
|
77
|
+
async splitLongText(text: string) {
|
|
78
|
+
// 分割文本
|
|
79
|
+
const chunks: string[] = [];
|
|
80
|
+
let startIndex = 0;
|
|
81
|
+
|
|
82
|
+
while (startIndex < text.length) {
|
|
83
|
+
// 计算当前chunk的结束位置
|
|
84
|
+
const endIndex = Math.min(startIndex + this.splitSize, text.length);
|
|
85
|
+
|
|
86
|
+
// 提取当前chunk
|
|
87
|
+
const chunk = text.substring(startIndex, endIndex);
|
|
88
|
+
chunks.push(chunk);
|
|
89
|
+
|
|
90
|
+
// 移动到下一个起始位置,考虑重叠
|
|
91
|
+
startIndex = endIndex - this.splitOverlap;
|
|
92
|
+
|
|
93
|
+
// 如果下一个起始位置已经超出或者太接近文本结尾,就结束循环
|
|
94
|
+
if (startIndex >= text.length - this.splitOverlap) {
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// 为每个chunk生成embedding
|
|
100
|
+
const embeddings = await this.generateEmbeddingBatch(chunks);
|
|
101
|
+
// 返回文本片段和对应的embedding
|
|
102
|
+
return chunks.map((chunk, index) => ({
|
|
103
|
+
text: chunk,
|
|
104
|
+
embedding: embeddings[index],
|
|
105
|
+
}));
|
|
106
|
+
}
|
|
107
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { KnowledgeBase, KnowledgeOptions } from './knowledge-base.ts';
|
|
2
|
+
|
|
3
|
+
export class SiliconFlowKnowledge extends KnowledgeBase {
|
|
4
|
+
static BASE_URL = 'https://api.siliconflow.cn/v1';
|
|
5
|
+
constructor(options: KnowledgeOptions) {
|
|
6
|
+
super({ ...options, baseURL: options?.baseURL ?? SiliconFlowKnowledge.BASE_URL });
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async rerank(data: RerankOptions) {
|
|
10
|
+
return this.openai.post('/rerank', {
|
|
11
|
+
body: data,
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export type RerankOptions = {
|
|
17
|
+
model: string;
|
|
18
|
+
query: string;
|
|
19
|
+
documents: string[];
|
|
20
|
+
top_n?: number;
|
|
21
|
+
return_documents?: boolean;
|
|
22
|
+
max_chunks_per_doc?: number;
|
|
23
|
+
overlap_tokens?: number;
|
|
24
|
+
};
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { SiliconFlowKnowledge } from './knowledge-adapter/siliconflow.ts';
|
|
2
|
+
import { KnowledgeBase, KnowledgeOptions } from './knowledge-adapter/knowledge-base.ts';
|
|
3
|
+
import { RerankOptions } from './knowledge-adapter/siliconflow.ts';
|
|
4
|
+
export { KnowledgeBase, KnowledgeOptions, RerankOptions };
|
|
5
|
+
|
|
6
|
+
export { SiliconFlowKnowledge };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './video/siliconflow.ts';
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { SiliconFlow } from '../../chat-adapter/siliconflow.ts';
|
|
2
|
+
export class VideoSiliconFlow extends SiliconFlow {
|
|
3
|
+
constructor(opts: any) {
|
|
4
|
+
super(opts);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
async uploadAudioVoice(audioBase64: string | Blob | File) {
|
|
8
|
+
const pathname = 'uploads/audio/voice';
|
|
9
|
+
const url = `${this.baseURL}/${pathname}`;
|
|
10
|
+
const headers = {
|
|
11
|
+
'Content-Type': 'multipart/form-data',
|
|
12
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
13
|
+
};
|
|
14
|
+
const formData = new FormData();
|
|
15
|
+
// formData.append('audio', 'data:audio/mpeg;base64,aGVsbG93b3JsZA==');
|
|
16
|
+
// formData.append('audio', audioBase64);
|
|
17
|
+
formData.append('file', audioBase64);
|
|
18
|
+
formData.append('model', 'FunAudioLLM/CosyVoice2-0.5B');
|
|
19
|
+
formData.append('customName', 'test_name');
|
|
20
|
+
formData.append('text', '在一无所知中, 梦里的一天结束了,一个新的轮回便会开始');
|
|
21
|
+
|
|
22
|
+
const res = await fetch(url, {
|
|
23
|
+
method: 'POST',
|
|
24
|
+
headers,
|
|
25
|
+
body: formData,
|
|
26
|
+
}).then((res) => res.json());
|
|
27
|
+
console.log('uploadAudioVoice', res);
|
|
28
|
+
}
|
|
29
|
+
async audioSpeech() {
|
|
30
|
+
this.openai.audio.speech.create({
|
|
31
|
+
model: 'FunAudioLLM/CosyVoice2-0.5B',
|
|
32
|
+
voice: 'alloy',
|
|
33
|
+
input: '在一无所知中, 梦里的一天结束了,一个新的轮回便会开始',
|
|
34
|
+
response_format: 'mp3',
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type { Permission } from '@kevisual/permission';
|
|
2
|
+
|
|
3
|
+
export type AIModel = {
|
|
4
|
+
/**
|
|
5
|
+
* 提供商
|
|
6
|
+
*/
|
|
7
|
+
provider: string;
|
|
8
|
+
/**
|
|
9
|
+
* 模型名称
|
|
10
|
+
*/
|
|
11
|
+
model: string;
|
|
12
|
+
/**
|
|
13
|
+
* 模型组
|
|
14
|
+
*/
|
|
15
|
+
group: string;
|
|
16
|
+
/**
|
|
17
|
+
* 每日请求频率限制
|
|
18
|
+
*/
|
|
19
|
+
dayLimit?: number;
|
|
20
|
+
/**
|
|
21
|
+
* 总的token限制
|
|
22
|
+
*/
|
|
23
|
+
tokenLimit?: number;
|
|
24
|
+
};
|
|
25
|
+
export type SecretKey = {
|
|
26
|
+
/**
|
|
27
|
+
* 组
|
|
28
|
+
*/
|
|
29
|
+
group: string;
|
|
30
|
+
/**
|
|
31
|
+
* API密钥
|
|
32
|
+
*/
|
|
33
|
+
apiKey: string;
|
|
34
|
+
/**
|
|
35
|
+
* 解密密钥
|
|
36
|
+
*/
|
|
37
|
+
decryptKey?: string;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export type AIConfig = {
|
|
41
|
+
title?: string;
|
|
42
|
+
description?: string;
|
|
43
|
+
models: AIModel[];
|
|
44
|
+
secretKeys: SecretKey[];
|
|
45
|
+
permission?: Permission;
|
|
46
|
+
filter?: {
|
|
47
|
+
objectKey: string;
|
|
48
|
+
type: 'array' | 'object';
|
|
49
|
+
operate: 'removeAttribute' | 'remove';
|
|
50
|
+
attribute: string[];
|
|
51
|
+
}[];
|
|
52
|
+
};
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { numTokensFromString } from './token.ts';
|
|
2
|
+
|
|
3
|
+
// 常量定义
|
|
4
|
+
const CHUNK_SIZE = 512; // 每个chunk的最大token数
|
|
5
|
+
const MAGIC_SEPARATOR = '🦛';
|
|
6
|
+
const DELIMITER = [',', '.', '!', '?', '\n', ',', '。', '!', '?'];
|
|
7
|
+
const PARAGRAPH_DELIMITER = '\n\n';
|
|
8
|
+
|
|
9
|
+
export interface Chunk {
|
|
10
|
+
chunkId: number;
|
|
11
|
+
text: string;
|
|
12
|
+
tokens: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 确保每个chunk的大小不超过最大token数
|
|
17
|
+
* @param chunk 输入的文本块
|
|
18
|
+
* @returns 分割后的文本块及其token数的数组
|
|
19
|
+
*/
|
|
20
|
+
function ensureChunkSize(chunk: string): Array<[string, number]> {
|
|
21
|
+
const tokens = numTokensFromString(chunk);
|
|
22
|
+
if (tokens <= CHUNK_SIZE) {
|
|
23
|
+
return [[chunk, tokens]];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// 在分隔符后添加魔法分隔符
|
|
27
|
+
let processedChunk = chunk;
|
|
28
|
+
for (const delimiter of DELIMITER) {
|
|
29
|
+
// 转义特殊字符
|
|
30
|
+
const escapedDelimiter = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
31
|
+
processedChunk = processedChunk.replace(new RegExp(escapedDelimiter, 'g'), delimiter + MAGIC_SEPARATOR);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const chunks: Array<[string, number]> = [];
|
|
35
|
+
let tail = '';
|
|
36
|
+
|
|
37
|
+
// 按CHUNK_SIZE分割文本
|
|
38
|
+
for (let i = 0; i < processedChunk.length; i += CHUNK_SIZE) {
|
|
39
|
+
const sentences = (processedChunk.slice(i, i + CHUNK_SIZE) + ' ').split(MAGIC_SEPARATOR);
|
|
40
|
+
const currentChunk = tail + sentences.slice(0, -1).join('');
|
|
41
|
+
if (currentChunk.trim()) {
|
|
42
|
+
const tokenCount = numTokensFromString(currentChunk);
|
|
43
|
+
chunks.push([currentChunk, tokenCount]);
|
|
44
|
+
}
|
|
45
|
+
tail = sentences[sentences.length - 1].trim();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// 处理最后剩余的tail
|
|
49
|
+
if (tail) {
|
|
50
|
+
const tokenCount = numTokensFromString(tail);
|
|
51
|
+
chunks.push([tail, tokenCount]);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return chunks;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* 将文本分割成chunks
|
|
59
|
+
* @param text 输入文本
|
|
60
|
+
* @returns 分割后的chunks数组
|
|
61
|
+
*/
|
|
62
|
+
export async function getChunks(text: string): Promise<Chunk[]> {
|
|
63
|
+
// 按段落分割文本
|
|
64
|
+
const paragraphs = text
|
|
65
|
+
.split(PARAGRAPH_DELIMITER)
|
|
66
|
+
.map((p) => p.trim())
|
|
67
|
+
.filter((p) => p);
|
|
68
|
+
|
|
69
|
+
const chunks: Chunk[] = [];
|
|
70
|
+
let currentIndex = 0;
|
|
71
|
+
|
|
72
|
+
// 处理每个段落
|
|
73
|
+
for (const paragraph of paragraphs) {
|
|
74
|
+
const splittedParagraph = ensureChunkSize(paragraph);
|
|
75
|
+
for (const [text, tokens] of splittedParagraph) {
|
|
76
|
+
chunks.push({
|
|
77
|
+
chunkId: currentIndex,
|
|
78
|
+
text,
|
|
79
|
+
tokens,
|
|
80
|
+
});
|
|
81
|
+
currentIndex++;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return chunks;
|
|
86
|
+
}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import AES from 'crypto-js/aes.js';
|
|
2
|
+
import Utf8 from 'crypto-js/enc-utf8.js';
|
|
3
|
+
import type { AIConfig } from './ai-config-type.js';
|
|
4
|
+
const CryptoJS = { AES, enc: { Utf8 } };
|
|
5
|
+
// 加密函数
|
|
6
|
+
export function encryptAES(plainText: string, secretKey: string) {
|
|
7
|
+
return CryptoJS.AES.encrypt(plainText, secretKey).toString();
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// 解密函数
|
|
11
|
+
export function decryptAES(cipherText: string, secretKey: string) {
|
|
12
|
+
const bytes = CryptoJS.AES.decrypt(cipherText, secretKey);
|
|
13
|
+
return bytes.toString(CryptoJS.enc.Utf8);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
type AIModel = {
|
|
17
|
+
/**
|
|
18
|
+
* 提供商
|
|
19
|
+
*/
|
|
20
|
+
provider: string;
|
|
21
|
+
/**
|
|
22
|
+
* 模型名称
|
|
23
|
+
*/
|
|
24
|
+
model: string;
|
|
25
|
+
/**
|
|
26
|
+
* 模型组
|
|
27
|
+
*/
|
|
28
|
+
group: string;
|
|
29
|
+
/**
|
|
30
|
+
* 每日请求频率限制
|
|
31
|
+
*/
|
|
32
|
+
dayLimit?: number;
|
|
33
|
+
/**
|
|
34
|
+
* 总的token限制
|
|
35
|
+
*/
|
|
36
|
+
tokenLimit?: number;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
type SecretKey = {
|
|
40
|
+
/**
|
|
41
|
+
* 组
|
|
42
|
+
*/
|
|
43
|
+
group: string;
|
|
44
|
+
/**
|
|
45
|
+
* API密钥
|
|
46
|
+
*/
|
|
47
|
+
apiKey: string;
|
|
48
|
+
/**
|
|
49
|
+
* 解密密钥
|
|
50
|
+
*/
|
|
51
|
+
decryptKey?: string;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export type GetProviderOpts = {
|
|
55
|
+
model: string;
|
|
56
|
+
group: string;
|
|
57
|
+
decryptKey?: string;
|
|
58
|
+
};
|
|
59
|
+
export type ProviderResult = {
|
|
60
|
+
provider: string;
|
|
61
|
+
model: string;
|
|
62
|
+
group: string;
|
|
63
|
+
apiKey: string;
|
|
64
|
+
dayLimit?: number;
|
|
65
|
+
tokenLimit?: number;
|
|
66
|
+
baseURL?: string;
|
|
67
|
+
/**
|
|
68
|
+
* 解密密钥
|
|
69
|
+
*/
|
|
70
|
+
decryptKey?: string;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
export class AIConfigParser {
|
|
74
|
+
private config: AIConfig;
|
|
75
|
+
result: ProviderResult;
|
|
76
|
+
constructor(config: AIConfig) {
|
|
77
|
+
this.config = config;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* 获取模型配置
|
|
81
|
+
* @param opts
|
|
82
|
+
* @returns
|
|
83
|
+
*/
|
|
84
|
+
getProvider(opts: GetProviderOpts): ProviderResult {
|
|
85
|
+
const { model, group, decryptKey } = opts;
|
|
86
|
+
const modelConfig = this.config.models.find((m) => m.model === model && m.group === group);
|
|
87
|
+
const groupConfig = this.config.secretKeys.find((m) => m.group === group);
|
|
88
|
+
if (!modelConfig) {
|
|
89
|
+
throw new Error(`在模型组 ${group} 中未找到模型 ${model}`);
|
|
90
|
+
}
|
|
91
|
+
const mergeConfig = {
|
|
92
|
+
...modelConfig,
|
|
93
|
+
...groupConfig,
|
|
94
|
+
decryptKey: decryptKey || groupConfig?.decryptKey,
|
|
95
|
+
};
|
|
96
|
+
// 验证模型配置
|
|
97
|
+
if (!mergeConfig.provider) {
|
|
98
|
+
throw new Error(`模型 ${model} 未配置提供商`);
|
|
99
|
+
}
|
|
100
|
+
if (!mergeConfig.model) {
|
|
101
|
+
throw new Error(`模型 ${model} 未配置模型名称`);
|
|
102
|
+
}
|
|
103
|
+
if (!mergeConfig.apiKey) {
|
|
104
|
+
throw new Error(`组 ${group} 未配置 API 密钥`);
|
|
105
|
+
}
|
|
106
|
+
if (!mergeConfig.group) {
|
|
107
|
+
throw new Error(`组 ${group} 未配置`);
|
|
108
|
+
}
|
|
109
|
+
this.result = mergeConfig;
|
|
110
|
+
return mergeConfig;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* 获取解密密钥
|
|
114
|
+
* @param opts
|
|
115
|
+
* @returns
|
|
116
|
+
*/
|
|
117
|
+
async getSecretKey(opts?: {
|
|
118
|
+
getCache?: (key: string) => Promise<string>;
|
|
119
|
+
setCache?: (key: string, value: string) => Promise<void>;
|
|
120
|
+
providerResult?: ProviderResult;
|
|
121
|
+
}) {
|
|
122
|
+
const { getCache, setCache, providerResult } = opts || {};
|
|
123
|
+
const { apiKey, decryptKey, group = '', model } = providerResult || this.result;
|
|
124
|
+
const cacheKey = `${group}--${model}`;
|
|
125
|
+
if (!decryptKey) {
|
|
126
|
+
return apiKey;
|
|
127
|
+
}
|
|
128
|
+
if (getCache) {
|
|
129
|
+
const cache = await getCache(cacheKey);
|
|
130
|
+
if (cache) {
|
|
131
|
+
return cache;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const secretKey = decryptAES(apiKey, decryptKey);
|
|
135
|
+
if (setCache) {
|
|
136
|
+
await setCache(cacheKey, secretKey);
|
|
137
|
+
}
|
|
138
|
+
return secretKey;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* 加密
|
|
142
|
+
* @param plainText
|
|
143
|
+
* @param secretKey
|
|
144
|
+
* @returns
|
|
145
|
+
*/
|
|
146
|
+
encrypt(plainText: string, secretKey: string) {
|
|
147
|
+
return encryptAES(plainText, secretKey);
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* 解密
|
|
151
|
+
* @param cipherText
|
|
152
|
+
* @param secretKey
|
|
153
|
+
* @returns
|
|
154
|
+
*/
|
|
155
|
+
decrypt(cipherText: string, secretKey: string) {
|
|
156
|
+
return decryptAES(cipherText, secretKey);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* 获取模型配置
|
|
161
|
+
* @returns
|
|
162
|
+
*/
|
|
163
|
+
getSelectOpts() {
|
|
164
|
+
const { models, secretKeys = [] } = this.config;
|
|
165
|
+
|
|
166
|
+
return models.map((model) => {
|
|
167
|
+
const selectOpts = secretKeys.find((m) => m.group === model.group);
|
|
168
|
+
return {
|
|
169
|
+
...model,
|
|
170
|
+
...selectOpts,
|
|
171
|
+
};
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
getConfig(keepSecret?: boolean, config?: AIConfig) {
|
|
175
|
+
const chatConfig = config ?? this.config;
|
|
176
|
+
if (keepSecret) {
|
|
177
|
+
return chatConfig;
|
|
178
|
+
}
|
|
179
|
+
// 过滤掉secret中的所有apiKey,移除掉并返回chatConfig
|
|
180
|
+
const { secretKeys = [], ...rest } = chatConfig || {};
|
|
181
|
+
return {
|
|
182
|
+
...rest,
|
|
183
|
+
secretKeys: secretKeys.map((item) => {
|
|
184
|
+
return {
|
|
185
|
+
...item,
|
|
186
|
+
apiKey: undefined,
|
|
187
|
+
decryptKey: undefined,
|
|
188
|
+
};
|
|
189
|
+
}),
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
}
|