@kevisual/ai 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ export class AIUtils {
2
+ /**
3
+ * 从 Markdown 代码块中提取 JSON
4
+ * @param str 包含 JSON 的字符串
5
+ * @returns 解析后的对象或 null
6
+ */
7
+ extractJsonFromMarkdown(str: string): any | null {
8
+ // Try to extract JSON from ```json ... ```
9
+ const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
10
+ const match = str.match(jsonRegex);
11
+ let jsonStr = match && match[1] ? match[1] : str;
12
+
13
+ try {
14
+ return JSON.parse(jsonStr);
15
+ } catch {
16
+ return null;
17
+ }
18
+ }
19
+
20
+ /**
21
+ * 从 Markdown 代码块中提取代码
22
+ * @param str Markdown 字符串
23
+ * @param language 语言类型,不指定则返回所有代码块
24
+ * @returns 提取的代码字符串或数组
25
+ */
26
+ extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
27
+ if (language) {
28
+ const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
29
+ const matches = str.match(regex);
30
+ if (!matches) return null;
31
+ return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
32
+ }
33
+
34
+ const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
35
+ const matches = [...str.matchAll(regex)];
36
+ if (matches.length === 0) return null;
37
+ return matches.map(m => m[1].trim());
38
+ }
39
+
40
+ /**
41
+ * 清理 AI 响应中的多余空白和格式
42
+ * @param str 原始字符串
43
+ * @returns 清理后的字符串
44
+ */
45
+ cleanResponse(str: string): string {
46
+ return str
47
+ .trim()
48
+ .replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
49
+ .replace(/[ \t]+$/gm, ''); // 删除行尾空格
50
+ }
51
+
52
+ /**
53
+ * 从 AI 响应中提取标签
54
+ * @param str 响应字符串
55
+ * @returns 标签数组
56
+ */
57
+ extractTags(str: string): string[] {
58
+ const tagPatterns = [
59
+ /#(\w+)/g, // #tag 格式
60
+ /\[(\w+)\]/g, // [tag] 格式
61
+ /tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
62
+ ];
63
+
64
+ const tags = new Set<string>();
65
+
66
+ for (const pattern of tagPatterns) {
67
+ const matches = str.matchAll(pattern);
68
+ for (const match of matches) {
69
+ if (match[1]) {
70
+ const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
71
+ extracted.forEach(tag => tags.add(tag));
72
+ }
73
+ }
74
+ }
75
+
76
+ return Array.from(tags);
77
+ }
78
+
79
+ /**
80
+ * 从文本中提取 URL
81
+ * @param str 文本字符串
82
+ * @returns URL 数组
83
+ */
84
+ extractUrls(str: string): string[] {
85
+ const urlRegex = /(https?:\/\/[^\s]+)/g;
86
+ const matches = str.match(urlRegex);
87
+ return matches || [];
88
+ }
89
+
90
+ /**
91
+ * 分割长文本为指定 token 数量的块
92
+ * @param text 原始文本
93
+ * @param maxTokens 每块最大 token 数(粗略估算:1 token ≈ 4 字符)
94
+ * @returns 文本块数组
95
+ */
96
+ chunkText(text: string, maxTokens: number = 1000): string[] {
97
+ const chunkSize = maxTokens * 4; // 粗略估算
98
+ const chunks: string[] = [];
99
+
100
+ // 按段落分割
101
+ const paragraphs = text.split(/\n\n+/);
102
+ let currentChunk = '';
103
+
104
+ for (const paragraph of paragraphs) {
105
+ if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
106
+ chunks.push(currentChunk.trim());
107
+ currentChunk = paragraph;
108
+ } else {
109
+ currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
110
+ }
111
+ }
112
+
113
+ if (currentChunk) {
114
+ chunks.push(currentChunk.trim());
115
+ }
116
+
117
+ return chunks;
118
+ }
119
+
120
+ /**
121
+ * 移除 AI 响应中的思考过程(thinking 标签)
122
+ * @param str 响应字符串
123
+ * @returns 清理后的字符串
124
+ */
125
+ removeThinkingTags(str: string): string {
126
+ return str
127
+ .replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
128
+ .replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
129
+ .trim();
130
+ }
131
+
132
+ /**
133
+ * 转义特殊字符用于 AI 提示词
134
+ * @param str 原始字符串
135
+ * @returns 转义后的字符串
136
+ */
137
+ escapeForPrompt(str: string): string {
138
+ return str
139
+ .replace(/\\/g, '\\\\')
140
+ .replace(/`/g, '\\`')
141
+ .replace(/\$/g, '\\$');
142
+ }
143
+
144
+ /**
145
+ * 统计文本的大致 token 数量
146
+ * @param text 文本
147
+ * @returns 估算的 token 数量
148
+ */
149
+ estimateTokens(text: string): number {
150
+ // 简单估算:中文约 1.5 字符/token,英文约 4 字符/token
151
+ const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
152
+ const otherChars = text.length - chineseChars;
153
+ return Math.ceil(chineseChars / 1.5 + otherChars / 4);
154
+ }
155
+
156
+ /**
157
+ * 从响应中提取结构化数据(key: value 格式)
158
+ * @param str 响应字符串
159
+ * @returns 键值对对象
160
+ */
161
+ extractKeyValuePairs(str: string): Record<string, string> {
162
+ const result: Record<string, string> = {};
163
+ const lines = str.split('\n');
164
+
165
+ for (const line of lines) {
166
+ const match = line.match(/^([^::]+)[::]\s*(.+)$/);
167
+ if (match) {
168
+ const key = match[1].trim();
169
+ const value = match[2].trim();
170
+ result[key] = value;
171
+ }
172
+ }
173
+
174
+ return result;
175
+ }
176
+
177
+ /**
178
+ * 验证 AI 响应是否完整(检查截断)
179
+ * @param str 响应字符串
180
+ * @returns 是否完整
181
+ */
182
+ isResponseComplete(str: string): boolean {
183
+ const incompleteSigns = [
184
+ /```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
185
+ /\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
186
+ /\[[\s\S]*(?<!\])$/, // 未闭合的数组
187
+ /\.{3,}$/, // 结尾省略号
188
+ ];
189
+
190
+ return !incompleteSigns.some(pattern => pattern.test(str.trim()));
191
+ }
192
+ }
@@ -1,86 +0,0 @@
1
- import { numTokensFromString } from './token.ts';
2
-
3
- // 常量定义
4
- const CHUNK_SIZE = 512; // 每个chunk的最大token数
5
- const MAGIC_SEPARATOR = '🦛';
6
- const DELIMITER = [',', '.', '!', '?', '\n', ',', '。', '!', '?'];
7
- const PARAGRAPH_DELIMITER = '\n\n';
8
-
9
- export interface Chunk {
10
- chunkId: number;
11
- text: string;
12
- tokens: number;
13
- }
14
-
15
- /**
16
- * 确保每个chunk的大小不超过最大token数
17
- * @param chunk 输入的文本块
18
- * @returns 分割后的文本块及其token数的数组
19
- */
20
- function ensureChunkSize(chunk: string): Array<[string, number]> {
21
- const tokens = numTokensFromString(chunk);
22
- if (tokens <= CHUNK_SIZE) {
23
- return [[chunk, tokens]];
24
- }
25
-
26
- // 在分隔符后添加魔法分隔符
27
- let processedChunk = chunk;
28
- for (const delimiter of DELIMITER) {
29
- // 转义特殊字符
30
- const escapedDelimiter = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
31
- processedChunk = processedChunk.replace(new RegExp(escapedDelimiter, 'g'), delimiter + MAGIC_SEPARATOR);
32
- }
33
-
34
- const chunks: Array<[string, number]> = [];
35
- let tail = '';
36
-
37
- // 按CHUNK_SIZE分割文本
38
- for (let i = 0; i < processedChunk.length; i += CHUNK_SIZE) {
39
- const sentences = (processedChunk.slice(i, i + CHUNK_SIZE) + ' ').split(MAGIC_SEPARATOR);
40
- const currentChunk = tail + sentences.slice(0, -1).join('');
41
- if (currentChunk.trim()) {
42
- const tokenCount = numTokensFromString(currentChunk);
43
- chunks.push([currentChunk, tokenCount]);
44
- }
45
- tail = sentences[sentences.length - 1].trim();
46
- }
47
-
48
- // 处理最后剩余的tail
49
- if (tail) {
50
- const tokenCount = numTokensFromString(tail);
51
- chunks.push([tail, tokenCount]);
52
- }
53
-
54
- return chunks;
55
- }
56
-
57
- /**
58
- * 将文本分割成chunks
59
- * @param text 输入文本
60
- * @returns 分割后的chunks数组
61
- */
62
- export async function getChunks(text: string): Promise<Chunk[]> {
63
- // 按段落分割文本
64
- const paragraphs = text
65
- .split(PARAGRAPH_DELIMITER)
66
- .map((p) => p.trim())
67
- .filter((p) => p);
68
-
69
- const chunks: Chunk[] = [];
70
- let currentIndex = 0;
71
-
72
- // 处理每个段落
73
- for (const paragraph of paragraphs) {
74
- const splittedParagraph = ensureChunkSize(paragraph);
75
- for (const [text, tokens] of splittedParagraph) {
76
- chunks.push({
77
- chunkId: currentIndex,
78
- text,
79
- tokens,
80
- });
81
- currentIndex++;
82
- }
83
- }
84
-
85
- return chunks;
86
- }