npm - @kevisual/ai - Versions diffs - 0.0.12 → 0.0.13 - Mend

@kevisual/ai 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/ai-provider-browser.d.ts +93 -13
package/dist/ai-provider-browser.js +1672 -1549
package/dist/ai-provider.d.ts +93 -13
package/dist/ai-provider.js +136 -13
package/package.json +9 -9
package/src/provider/chat-adapter/kimi.ts +10 -0
package/src/provider/chat-adapter/zhipu.ts +10 -0
package/src/provider/chat.ts +6 -0
package/src/provider/core/chat.ts +4 -26
package/src/provider/core/utils/index.ts +192 -0
package/src/provider/utils/chunk.ts +0 -86

package/src/provider/core/utils/index.ts ADDED Viewed

@@ -0,0 +1,192 @@
+export class AIUtils {
+  /**
+   * 从 Markdown 代码块中提取 JSON
+   * @param str 包含 JSON 的字符串
+   * @returns 解析后的对象或 null
+   */
+  extractJsonFromMarkdown(str: string): any | null {
+    // Try to extract JSON from ```json ... ```
+    const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
+    const match = str.match(jsonRegex);
+    let jsonStr = match && match[1] ? match[1] : str;
+    try {
+      return JSON.parse(jsonStr);
+    } catch {
+      return null;
+    }
+  }
+  /**
+   * 从 Markdown 代码块中提取代码
+   * @param str Markdown 字符串
+   * @param language 语言类型，不指定则返回所有代码块
+   * @returns 提取的代码字符串或数组
+   */
+  extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
+    if (language) {
+      const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
+      const matches = str.match(regex);
+      if (!matches) return null;
+      return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
+    }
+    const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
+    const matches = [...str.matchAll(regex)];
+    if (matches.length === 0) return null;
+    return matches.map(m => m[1].trim());
+  }
+  /**
+   * 清理 AI 响应中的多余空白和格式
+   * @param str 原始字符串
+   * @returns 清理后的字符串
+   */
+  cleanResponse(str: string): string {
+    return str
+      .trim()
+      .replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
+      .replace(/[ \t]+$/gm, ''); // 删除行尾空格
+  }
+  /**
+   * 从 AI 响应中提取标签
+   * @param str 响应字符串
+   * @returns 标签数组
+   */
+  extractTags(str: string): string[] {
+    const tagPatterns = [
+      /#(\w+)/g, // #tag 格式
+      /\[(\w+)\]/g, // [tag] 格式
+      /tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
+    ];
+    const tags = new Set<string>();
+    for (const pattern of tagPatterns) {
+      const matches = str.matchAll(pattern);
+      for (const match of matches) {
+        if (match[1]) {
+          const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
+          extracted.forEach(tag => tags.add(tag));
+        }
+      }
+    }
+    return Array.from(tags);
+  }
+  /**
+   * 从文本中提取 URL
+   * @param str 文本字符串
+   * @returns URL 数组
+   */
+  extractUrls(str: string): string[] {
+    const urlRegex = /(https?:\/\/[^\s]+)/g;
+    const matches = str.match(urlRegex);
+    return matches || [];
+  }
+  /**
+   * 分割长文本为指定 token 数量的块
+   * @param text 原始文本
+   * @param maxTokens 每块最大 token 数（粗略估算：1 token ≈ 4 字符）
+   * @returns 文本块数组
+   */
+  chunkText(text: string, maxTokens: number = 1000): string[] {
+    const chunkSize = maxTokens * 4; // 粗略估算
+    const chunks: string[] = [];
+    // 按段落分割
+    const paragraphs = text.split(/\n\n+/);
+    let currentChunk = '';
+    for (const paragraph of paragraphs) {
+      if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
+        chunks.push(currentChunk.trim());
+        currentChunk = paragraph;
+      } else {
+        currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
+      }
+    }
+    if (currentChunk) {
+      chunks.push(currentChunk.trim());
+    }
+    return chunks;
+  }
+  /**
+   * 移除 AI 响应中的思考过程（thinking 标签）
+   * @param str 响应字符串
+   * @returns 清理后的字符串
+   */
+  removeThinkingTags(str: string): string {
+    return str
+      .replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
+      .replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
+      .trim();
+  }
+  /**
+   * 转义特殊字符用于 AI 提示词
+   * @param str 原始字符串
+   * @returns 转义后的字符串
+   */
+  escapeForPrompt(str: string): string {
+    return str
+      .replace(/\\/g, '\\\\')
+      .replace(/`/g, '\\`')
+      .replace(/\$/g, '\\$');
+  }
+  /**
+   * 统计文本的大致 token 数量
+   * @param text 文本
+   * @returns 估算的 token 数量
+   */
+  estimateTokens(text: string): number {
+    // 简单估算：中文约 1.5 字符/token，英文约 4 字符/token
+    const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
+    const otherChars = text.length - chineseChars;
+    return Math.ceil(chineseChars / 1.5 + otherChars / 4);
+  }
+  /**
+   * 从响应中提取结构化数据（key: value 格式）
+   * @param str 响应字符串
+   * @returns 键值对对象
+   */
+  extractKeyValuePairs(str: string): Record<string, string> {
+    const result: Record<string, string> = {};
+    const lines = str.split('\n');
+    for (const line of lines) {
+      const match = line.match(/^([^:：]+)[：:]\s*(.+)$/);
+      if (match) {
+        const key = match[1].trim();
+        const value = match[2].trim();
+        result[key] = value;
+      }
+    }
+    return result;
+  }
+  /**
+   * 验证 AI 响应是否完整（检查截断）
+   * @param str 响应字符串
+   * @returns 是否完整
+   */
+  isResponseComplete(str: string): boolean {
+    const incompleteSigns = [
+      /```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
+      /\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
+      /\[[\s\S]*(?<!\])$/, // 未闭合的数组
+      /\.{3,}$/, // 结尾省略号
+    ];
+    return !incompleteSigns.some(pattern => pattern.test(str.trim()));
+  }
+}

package/src/provider/utils/chunk.ts DELETED Viewed

@@ -1,86 +0,0 @@
-import { numTokensFromString } from './token.ts';
-// 常量定义
-const CHUNK_SIZE = 512; // 每个chunk的最大token数
-const MAGIC_SEPARATOR = '🦛';
-const DELIMITER = [',', '.', '!', '?', '\n', '，', '。', '！', '？'];
-const PARAGRAPH_DELIMITER = '\n\n';
-export interface Chunk {
-  chunkId: number;
-  text: string;
-  tokens: number;
-}
-/**
- * 确保每个chunk的大小不超过最大token数
- * @param chunk 输入的文本块
- * @returns 分割后的文本块及其token数的数组
- */
-function ensureChunkSize(chunk: string): Array<[string, number]> {
-  const tokens = numTokensFromString(chunk);
-  if (tokens <= CHUNK_SIZE) {
-    return [[chunk, tokens]];
-  }
-  // 在分隔符后添加魔法分隔符
-  let processedChunk = chunk;
-  for (const delimiter of DELIMITER) {
-    // 转义特殊字符
-    const escapedDelimiter = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-    processedChunk = processedChunk.replace(new RegExp(escapedDelimiter, 'g'), delimiter + MAGIC_SEPARATOR);
-  }
-  const chunks: Array<[string, number]> = [];
-  let tail = '';
-  // 按CHUNK_SIZE分割文本
-  for (let i = 0; i < processedChunk.length; i += CHUNK_SIZE) {
-    const sentences = (processedChunk.slice(i, i + CHUNK_SIZE) + ' ').split(MAGIC_SEPARATOR);
-    const currentChunk = tail + sentences.slice(0, -1).join('');
-    if (currentChunk.trim()) {
-      const tokenCount = numTokensFromString(currentChunk);
-      chunks.push([currentChunk, tokenCount]);
-    }
-    tail = sentences[sentences.length - 1].trim();
-  }
-  // 处理最后剩余的tail
-  if (tail) {
-    const tokenCount = numTokensFromString(tail);
-    chunks.push([tail, tokenCount]);
-  }
-  return chunks;
-}
-/**
- * 将文本分割成chunks
- * @param text 输入文本
- * @returns 分割后的chunks数组
- */
-export async function getChunks(text: string): Promise<Chunk[]> {
-  // 按段落分割文本
-  const paragraphs = text
-    .split(PARAGRAPH_DELIMITER)
-    .map((p) => p.trim())
-    .filter((p) => p);
-  const chunks: Chunk[] = [];
-  let currentIndex = 0;
-  // 处理每个段落
-  for (const paragraph of paragraphs) {
-    const splittedParagraph = ensureChunkSize(paragraph);
-    for (const [text, tokens] of splittedParagraph) {
-      chunks.push({
-        chunkId: currentIndex,
-        text,
-        tokens,
-      });
-      currentIndex++;
-    }
-  }
-  return chunks;
-}