npm - koishi-plugin-best-cave - Versions diffs - 2.7.18 → 2.7.19 - Mend

koishi-plugin-best-cave 2.7.18 → 2.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/AIManager.d.ts ADDED Viewed

@@ -0,0 +1,95 @@
+import { Context, Logger } from 'koishi';
+import { Config, CaveObject, StoredElement } from './index';
+import { FileManager } from './FileManager';
+/**
+ * @description 定义了数据库 `cave_meta` 表的结构模型。
+ */
+export interface CaveMetaObject {
+    cave: number;
+    keywords: string[];
+    description: string;
+    rating: number;
+}
+declare module 'koishi' {
+    interface Tables {
+        cave_meta: CaveMetaObject;
+    }
+}
+/**
+ * @class AIManager
+ * @description AI 管理器，作为连接 AI 服务与回声洞功能的核心模块。
+ */
+export declare class AIManager {
+    private ctx;
+    private config;
+    private logger;
+    private fileManager;
+    private http;
+    private readonly ANALYSIS_SYSTEM_PROMPT;
+    private readonly DUPLICATE_CHECK_SYSTEM_PROMPT;
+    /**
+     * @constructor
+     * @param {Context} ctx - Koishi 的上下文对象，用于访问核心服务如数据库和 HTTP 客户端。
+     * @param {Config} config - 插件的配置对象。
+     * @param {Logger} logger - 日志记录器实例。
+     * @param {FileManager} fileManager - 文件管理器实例，用于处理媒体文件。
+     */
+    constructor(ctx: Context, config: Config, logger: Logger, fileManager: FileManager);
+    /**
+     * @description 注册所有与 AIManager 功能相关的 Koishi 命令，包括 AI 分析和内容比较。
+     * @param {any} cave - 主命令的实例，用于挂载子命令。
+     */
+    registerCommands(cave: any): void;
+    /**
+     * @description 对新提交的内容执行 AI 驱动的查重检查。
+     * @param {StoredElement[]} newElements - 新提交的内容元素数组。
+     * @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - (可选) 与内容关联的媒体文件缓存。
+     * @returns {Promise<{ duplicate: boolean; ids?: number[] }>} 一个对象，包含查重结果和（如果重复）重复的回声洞 ID 数组。
+     * @throws {Error} 当 AI 分析或比较过程中发生严重错误时抛出。
+     */
+    checkForDuplicates(newElements: StoredElement[], mediaBuffers?: {
+        fileName: string;
+        buffer: Buffer;
+    }[]): Promise<{
+        duplicate: boolean;
+        ids?: number[];
+    }>;
+    /**
+     * @description 对单个或批量回声洞执行内容分析，提取关键词、生成描述并评分。
+     * @param {CaveObject[]} caves - 需要分析的回声洞对象数组。
+     * @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - (可选) 预加载的媒体文件缓存，以避免重复读取。
+     * @returns {Promise<CaveMetaObject[]>} 一个 Promise，解析为包含分析结果的 `CaveMetaObject` 对象数组。
+     */
+    analyze(caves: CaveObject[], mediaBuffers?: {
+        fileName: string;
+        buffer: Buffer;
+    }[]): Promise<CaveMetaObject[]>;
+    /**
+     * @description 调用 AI 判断两个回声洞内容是否在语义上重复或高度相似。
+     * @param {CaveObject} caveA - 第一个回声洞对象。
+     * @param {CaveObject} caveB - 第二个回声洞对象。
+     * @returns {Promise<boolean>} 如果内容被 AI 判断为重复，则返回 true，否则返回 false。
+     * @throws {Error} 当 AI 请求失败时抛出。
+     * @private
+     */
+    private isContentDuplicateAI;
+    /**
+     * @description 计算两组关键词之间的 Jaccard 相似度。
+     * Jaccard 相似度 = (交集大小 / 并集大小)。
+     * @param {string[]} keywordsA -第一组关键词。
+     * @param {string[]} keywordsB - 第二组关键词。
+     * @returns {number} 返回 0 到 100 之间的相似度得分。
+     * @private
+     */
+    private calculateKeywordSimilarity;
+    /**
+     * @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑。
+     * @template T - 期望从 AI 响应的 JSON 中解析出的数据类型。
+     * @param {any[]} messages - 发送给 AI 的消息数组，通常包含用户消息。
+     * @param {string} systemPrompt - 指导 AI 行为的系统级指令。
+     * @returns {Promise<T>} 一个 Promise，解析为从 AI 响应中提取并解析的 JSON 对象。
+     * @throws {Error} 当网络请求失败、AI 未返回有效内容或 JSON 解析失败时抛出。
+     * @private
+     */
+    private requestAI;
+}

package/lib/index.d.ts ADDED Viewed

@@ -0,0 +1,66 @@
+import { Context, Schema } from 'koishi';
+import { CaveHashObject } from './HashManager';
+import { CaveMetaObject } from './AIManager';
+export declare const name = "best-cave";
+export declare const inject: string[];
+export declare const usage = "\n<div style=\"border-radius: 10px; border: 1px solid #ddd; padding: 16px; margin-bottom: 20px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);\">\n  <h2 style=\"margin-top: 0; color: #4a6ee0;\">\uD83D\uDCCC \u63D2\u4EF6\u8BF4\u660E</h2>\n  <p>\uD83D\uDCD6 <strong>\u4F7F\u7528\u6587\u6863</strong>\uFF1A\u8BF7\u70B9\u51FB\u5DE6\u4E0A\u89D2\u7684 <strong>\u63D2\u4EF6\u4E3B\u9875</strong> \u67E5\u770B\u63D2\u4EF6\u4F7F\u7528\u6587\u6863</p>\n  <p>\uD83D\uDD0D <strong>\u66F4\u591A\u63D2\u4EF6</strong>\uFF1A\u53EF\u8BBF\u95EE <a href=\"https://github.com/YisRime\" style=\"color:#4a6ee0;text-decoration:none;\">\u82E1\u6DDE\u7684 GitHub</a> \u67E5\u770B\u672C\u4EBA\u7684\u6240\u6709\u63D2\u4EF6</p>\n</div>\n<div style=\"border-radius: 10px; border: 1px solid #ddd; padding: 16px; margin-bottom: 20px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);\">\n  <h2 style=\"margin-top: 0; color: #e0574a;\">\u2764\uFE0F \u652F\u6301\u4E0E\u53CD\u9988</h2>\n  <p>\uD83C\uDF1F \u559C\u6B22\u8FD9\u4E2A\u63D2\u4EF6\uFF1F\u8BF7\u5728 <a href=\"https://github.com/YisRime\" style=\"color:#e0574a;text-decoration:none;\">GitHub</a> \u4E0A\u7ED9\u6211\u4E00\u4E2A Star\uFF01</p>\n  <p>\uD83D\uDC1B \u9047\u5230\u95EE\u9898\uFF1F\u8BF7\u901A\u8FC7 <strong>Issues</strong> \u63D0\u4EA4\u53CD\u9988\uFF0C\u6216\u52A0\u5165 QQ \u7FA4 <a href=\"https://qm.qq.com/q/PdLMx9Jowq\" style=\"color:#e0574a;text-decoration:none;\"><strong>855571375</strong></a> \u8FDB\u884C\u4EA4\u6D41</p>\n</div>\n";
+/**
+ * @description 存储在合并转发中的单个节点的数据结构。
+ */
+export interface ForwardNode {
+    userId: string;
+    userName: string;
+    elements: StoredElement[];
+}
+/**
+ * @description 存储在数据库中的单个消息元素。
+ */
+export interface StoredElement {
+    type: 'text' | 'image' | 'video' | 'audio' | 'file' | 'at' | 'forward' | 'reply' | 'face';
+    content?: string | ForwardNode[];
+    file?: string;
+}
+/**
+ * @description 数据库 `cave` 表的完整对象模型。
+ */
+export interface CaveObject {
+    id: number;
+    elements: StoredElement[];
+    channelId: string;
+    userId: string;
+    userName: string;
+    status: 'active' | 'delete' | 'pending' | 'preload';
+    time: Date;
+}
+declare module 'koishi' {
+    interface Tables {
+        cave: CaveObject;
+        cave_hash: CaveHashObject;
+        cave_meta: CaveMetaObject;
+    }
+}
+export interface Config {
+    perChannel: boolean;
+    adminChannel: string;
+    enableName: boolean;
+    enableIO: boolean;
+    enablePend: boolean;
+    caveFormat: string;
+    enableSimilarity: boolean;
+    textThreshold: number;
+    imageThreshold: number;
+    localPath?: string;
+    enableS3: boolean;
+    endpoint?: string;
+    region?: string;
+    accessKeyId?: string;
+    secretAccessKey?: string;
+    bucket?: string;
+    publicUrl?: string;
+    enableAI: boolean;
+    aiEndpoint?: string;
+    aiApiKey?: string;
+    aiModel?: string;
+}
+export declare const Config: Schema<Config>;
+export declare function apply(ctx: Context, config: Config): void;

package/lib/index.js CHANGED Viewed

@@ -1014,9 +1014,9 @@ var path3 = __toESM(require("path"));
 var AIManager = class {
   /**
    * @constructor
-   * @param {Context} ctx - Koishi 的上下文对象，提供框架核心功能。
+   * @param {Context} ctx - Koishi 的上下文对象，用于访问核心服务如数据库和 HTTP 客户端。
    * @param {Config} config - 插件的配置对象。
-   * @param {Logger} logger - 日志记录器实例，用于输出日志。
+   * @param {Logger} logger - 日志记录器实例。
    * @param {FileManager} fileManager - 文件管理器实例，用于处理媒体文件。
    */
   constructor(ctx, config, logger2, fileManager) {
@@ -1038,11 +1038,39 @@ var AIManager = class {
     __name(this, "AIManager");
   }
   http;
-  requestCount = 0;
-  rateLimitResetTime = 0;
+  ANALYSIS_SYSTEM_PROMPT = `你是一位专业的“数字人类学家”和“迷因（Meme）专家”，擅长分析解读网络社群“回声洞”（一种消息存档）中的内容。这些内容通常是笑话、网络梗、游戏截图、或有趣的引言。你的任务是分析用户提供的内容（可能包含文本和图片），并以严格的 JSON 格式返回分析结果。
+请严格遵循以下规则和格式：
+1.  **角色定位**：将自己视为熟悉网络流行文化、游戏、动漫和各类“梗”的专家。
+2.  **语言要求**：\`keywords\` 和 \`description\` 的内容必须全部为中文。
+3.  **分析与输出**：你的回复**必须且只能**是一个包裹在 \`\`\`json ... \`\`\` 代码块中的 JSON 对象，不包含任何解释性文字。该 JSON 对象必须包含以下三个键：
+    *   \`"keywords"\` (字符串数组): 提取一组全面的中文标签 (tags)，这组标签的组合应能**精准地定义和分类**该内容，便于未来搜索。不需要限制数量，但追求准确和全面，应包含具体的人名、作品名、游戏名、事件名、或网络梗的专有名词。
+    *   \`"description"\` (字符串): 用一句简洁的中文**概括内容的核心思想或解释其“梗”的来源和用法**。
+    *   \`"rating"\` (0-100的整数): 根据以下**细化评分标准**进行综合评分：
+        *   **创意与原创性 (0-10分)**：是否为原创或独特的二次创作？常见的截图或转发应酌情减分。
+        *   **趣味性与信息量 (0-40分)**：内容是否有趣、引人发笑或包含有价值的信息？
+        *   **文化价值与传播潜力 (0-30分)**：是否属于经典“梗”或具有成为新流行“梗”的潜力？
+        *   **内容质量与清晰度 (0-20分)**：对于图片，是否清晰、无过多水印或压缩痕迹？对于文本，是否排版清晰、易于阅读？**图片模糊、带有严重水印应在此项大幅扣分**。`;
+  DUPLICATE_CHECK_SYSTEM_PROMPT = `你是一位严谨的“网络文化内容查重专家”，尤其擅长识别网络梗、Copypasta（定型文）和笑话的变体。你的任务是比较用户提供的两段内容（content_a 和 content_b），判断它们在**语义上或作为“梗”的本质上是否表达了相同或高度相似的核心思想**。
+请严格遵循以下规则：
+1.  **重复的核心定义**：专注于核心含义，忽略无关紧要的格式、标点符号、错别字或语气差异。只要两段内容指向**同一个梗、同一个笑话、同一个句式模板或同一个核心事件**，就应视为重复。
+2.  **常见的重复类型包括**：
+    *   **文字变体**：用词略有不同，但表达完全相同的意思。
+    *   **句式模板应用**：使用相同的“梗”句式，即使替换了其中的主体。
+    *   **核心思想转述**：用不同的话复述了同一个意思或笑话。
+    *   **跨语言相同梗**：同一个梗的不同语言或音译版本。
+3.  **非重复的界定**：主题相似但**核心信息、笑点或结论不同**，则不应视为重复。
+4.  **严格的JSON输出**：你的回复**必须且只能**是一个包裹在 \`\`\`json ... \`\`\` 代码块中的 JSON 对象。
+5.  **唯一的输出键**：该 JSON 对象必须仅包含一个布尔类型的键 \`"duplicate"\`。如果内容重复或高度相似，值为 \`true\`，否则为 \`false\`。`;
   /**
-   * @description 注册所有与 AIManager 功能相关的 Koishi 命令。
-   * @param {any} cave - Koishi 命令实例，用于挂载子命令。
+   * @description 注册所有与 AIManager 功能相关的 Koishi 命令，包括 AI 分析和内容比较。
+   * @param {any} cave - 主命令的实例，用于挂载子命令。
    */
   registerCommands(cave) {
     cave.subcommand(".ai", "分析回声洞", { hidden: true, authority: 4 }).usage("分析尚未分析的回声洞，补全回声洞记录。").action(async ({ session }) => {
@@ -1053,15 +1081,18 @@ var AIManager = class {
         const cavesToAnalyze = allCaves.filter((cave2) => !analyzedCaveIds.has(cave2.id));
         if (cavesToAnalyze.length === 0) return "无需分析回声洞";
         await session.send(`开始分析 ${cavesToAnalyze.length} 个回声洞...`);
-        let totalSuccessCount = 0;
+        let successCount = 0;
         const batchSize = 10;
         for (let i = 0; i < cavesToAnalyze.length; i += batchSize) {
           const batch = cavesToAnalyze.slice(i, i + batchSize);
           this.logger.info(`[${i + 1}/${cavesToAnalyze.length}] 正在分析 ${batch.length} 条回声洞...`);
-          const successCountInBatch = await this.analyzeAndStore(batch);
-          totalSuccessCount += successCountInBatch;
+          const analyses = await this.analyze(batch);
+          if (analyses.length > 0) {
+            await this.ctx.database.upsert("cave_meta", analyses);
+            successCount += analyses.length;
+          }
         }
-        return `已分析 ${totalSuccessCount} 个回声洞`;
+        return `已分析 ${successCount} 个回声洞`;
       } catch (error) {
         this.logger.error("分析回声洞失败:", error);
         return `操作失败: ${error.message}`;
@@ -1082,12 +1113,8 @@ var AIManager = class {
             const meta2 = allMeta[j];
             const pairKey = [meta1.cave, meta2.cave].sort((a, b) => a - b).join("-");
             if (checkedPairs.has(pairKey)) continue;
-            const keywords1 = new Set(meta1.keywords);
-            const keywords2 = new Set(meta2.keywords);
-            const intersection = new Set([...keywords1].filter((x) => keywords2.has(x)));
-            const union = /* @__PURE__ */ new Set([...keywords1, ...keywords2]);
-            const similarity = union.size > 0 ? intersection.size / union.size : 0;
-            if (similarity * 100 >= 80) {
+            const similarity = this.calculateKeywordSimilarity(meta1.keywords, meta2.keywords);
+            if (similarity >= 80) {
               const cave1 = allCaves.get(meta1.cave);
               const cave2 = allCaves.get(meta2.cave);
               if (cave1 && cave2 && await this.isContentDuplicateAI(cave1, cave2)) foundPairs.add(`${cave1.id} & ${cave2.id}`);
@@ -1095,11 +1122,9 @@ var AIManager = class {
             }
           }
         }
-        if (foundPairs.size === 0) return "未发现高重复性的内容";
-        let report = `已发现 ${foundPairs.size} 组高重复性的内容:
-`;
-        report += [...foundPairs].join("\n");
-        return report.trim();
+        if (foundPairs.size === 0) return "检查完成，未发现高重复性的内容。";
+        return `检查完成，共发现 ${foundPairs.size} 组可能重复的内容:
+${[...foundPairs].join("\n")}`;
       } catch (error) {
         this.logger.error("检查重复性失败:", error);
         return `检查失败: ${error.message}`;
@@ -1109,143 +1134,121 @@ var AIManager = class {
   /**
    * @description 对新提交的内容执行 AI 驱动的查重检查。
    * @param {StoredElement[]} newElements - 新提交的内容元素数组。
-   * @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组。
-   * @returns {Promise<{ duplicate: boolean; ids?: number[] }>} 一个 Promise，解析为一个对象，指示内容是否重复以及重复的回声洞 ID 数组（如果存在）。
+   * @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - (可选) 与内容关联的媒体文件缓存。
+   * @returns {Promise<{ duplicate: boolean; ids?: number[] }>} 一个对象，包含查重结果和（如果重复）重复的回声洞 ID 数组。
+   * @throws {Error} 当 AI 分析或比较过程中发生严重错误时抛出。
    */
   async checkForDuplicates(newElements, mediaBuffers) {
     try {
       const dummyCave = { id: 0, elements: newElements, channelId: "", userId: "", userName: "", status: "preload", time: /* @__PURE__ */ new Date() };
-      const [newAnalysis] = await this.getAnalyses([dummyCave], mediaBuffers ? new Map(mediaBuffers.map((m) => [m.fileName, m.buffer])) : void 0);
-      if (!newAnalysis?.keywords?.length) return { duplicate: false, ids: [] };
+      const [newAnalysis] = await this.analyze([dummyCave], mediaBuffers);
+      if (!newAnalysis?.keywords?.length) return { duplicate: false };
       const allMeta = await this.ctx.database.get("cave_meta", {}, { fields: ["cave", "keywords"] });
-      const newKeywordsSet = new Set(newAnalysis.keywords);
-      const similarCaveIds = allMeta.filter((meta) => {
-        if (!meta.keywords?.length) return false;
-        const existingKeywordsSet = new Set(meta.keywords);
-        const intersection = new Set([...newKeywordsSet].filter((x) => existingKeywordsSet.has(x)));
-        const union = /* @__PURE__ */ new Set([...newKeywordsSet, ...existingKeywordsSet]);
-        const similarity = union.size > 0 ? intersection.size / union.size : 0;
-        return similarity * 100 >= 80;
-      }).map((meta) => meta.cave);
-      if (similarCaveIds.length === 0) return { duplicate: false, ids: [] };
+      const similarCaveIds = allMeta.filter((meta) => this.calculateKeywordSimilarity(newAnalysis.keywords, meta.keywords) >= 80).map((meta) => meta.cave);
+      if (similarCaveIds.length === 0) return { duplicate: false };
       const potentialDuplicates = await this.ctx.database.get("cave", { id: { $in: similarCaveIds } });
-      const duplicateIds = [];
-      for (const existingCave of potentialDuplicates) if (await this.isContentDuplicateAI(dummyCave, existingCave)) duplicateIds.push(existingCave.id);
+      const comparisonPromises = potentialDuplicates.map(async (existingCave) => {
+        if (await this.isContentDuplicateAI(dummyCave, existingCave)) return existingCave.id;
+        return null;
+      });
+      const duplicateIds = (await Promise.all(comparisonPromises)).filter((id) => id !== null);
       return { duplicate: duplicateIds.length > 0, ids: duplicateIds };
     } catch (error) {
       this.logger.error("查重回声洞出错:", error);
-      return { duplicate: false, ids: [] };
+      return { duplicate: false };
     }
   }
   /**
-   * @description 对单个或批量回声洞执行完整的分析和存储流程。
-   * @param {CaveObject[]} caves - 要分析的回声洞对象数组。
-   * @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组，仅在分析新内容时使用。
-   * @returns {Promise<number>} 一个 Promise，解析为成功分析和存储的条目数。
+   * @description 对单个或批量回声洞执行内容分析，提取关键词、生成描述并评分。
+   * @param {CaveObject[]} caves - 需要分析的回声洞对象数组。
+   * @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - (可选) 预加载的媒体文件缓存，以避免重复读取。
+   * @returns {Promise<CaveMetaObject[]>} 一个 Promise，解析为包含分析结果的 `CaveMetaObject` 对象数组。
    */
-  async analyzeAndStore(caves, mediaBuffers) {
+  async analyze(caves, mediaBuffers) {
     const mediaMap = mediaBuffers ? new Map(mediaBuffers.map((m) => [m.fileName, m.buffer])) : void 0;
-    const results = await this.getAnalyses(caves, mediaMap);
-    if (!results?.length) return 0;
-    const caveMetaObjects = results.map((res) => ({
-      cave: res.cave,
-      keywords: res.keywords || [],
-      description: res.description || "",
-      rating: Math.max(0, Math.min(100, res.rating || 0))
-    }));
-    await this.ctx.database.upsert("cave_meta", caveMetaObjects);
-    return caveMetaObjects.length;
+    const analysisPromises = caves.map(async (cave) => {
+      const combinedText = cave.elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join("\n");
+      const imageElements = await Promise.all(
+        cave.elements.filter((el) => el.type === "image" && el.file).map(async (el) => {
+          try {
+            const buffer = mediaMap?.get(el.file) ?? await this.fileManager.readFile(el.file);
+            const mimeType = path3.extname(el.file).toLowerCase() === ".png" ? "image/png" : "image/jpeg";
+            return {
+              type: "image_url",
+              image_url: { url: `data:${mimeType};base64,${buffer.toString("base64")}` }
+            };
+          } catch (error) {
+            this.logger.warn(`读取文件（${el.file}）失败:`, error);
+          }
+        })
+      );
+      const images = imageElements.filter(Boolean);
+      if (!combinedText.trim() && images.length === 0) return null;
+      const contentForAI = [{ type: "text", text: `请分析以下内容：
+${combinedText}` }, ...images];
+      const userMessage = { role: "user", content: contentForAI };
+      const response = await this.requestAI([userMessage], this.ANALYSIS_SYSTEM_PROMPT);
+      if (response) return {
+        cave: cave.id,
+        keywords: response.keywords || [],
+        description: response.description || "",
+        rating: Math.max(0, Math.min(100, response.rating || 0))
+      };
+      return null;
+    });
+    const results = await Promise.all(analysisPromises);
+    return results.filter((result) => !!result);
   }
   /**
-   * @description 调用 AI 判断两个回声洞内容是否重复或高度相似。
+   * @description 调用 AI 判断两个回声洞内容是否在语义上重复或高度相似。
    * @param {CaveObject} caveA - 第一个回声洞对象。
    * @param {CaveObject} caveB - 第二个回声洞对象。
-   * @returns {Promise<boolean>} 如果内容相似则返回 true，否则返回 false。
+   * @returns {Promise<boolean>} 如果内容被 AI 判断为重复，则返回 true，否则返回 false。
+   * @throws {Error} 当 AI 请求失败时抛出。
+   * @private
    */
   async isContentDuplicateAI(caveA, caveB) {
     try {
       const formatContent = /* @__PURE__ */ __name((elements) => elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" "), "formatContent");
-      const userMessage = {
-        role: "user",
-        content: JSON.stringify({
-          content_a: { id: caveA.id, text: formatContent(caveA.elements) },
-          content_b: { id: caveB.id, text: formatContent(caveB.elements) }
-        })
+      const userMessageContent = {
+        content_a: { id: caveA.id, text: formatContent(caveA.elements) },
+        content_b: { id: caveB.id, text: formatContent(caveB.elements) }
       };
-      const prompt = `你是一位内容查重专家。请判断 content_a 和 content_b 是否重复或高度相似。你的回复必须且只能是一个包裹在 \`\`\`json ... \`\`\` 代码块中的 JSON 对象，该对象仅包含一个键 "duplicate" (布尔值)。`;
-      const response = await this.requestAI([userMessage], prompt);
-      return response.duplicate || false;
+      const userMessage = { role: "user", content: JSON.stringify(userMessageContent) };
+      const response = await this.requestAI([userMessage], this.DUPLICATE_CHECK_SYSTEM_PROMPT);
+      return response?.duplicate || false;
     } catch (error) {
       this.logger.error(`比较回声洞（${caveA.id}）与（${caveB.id}）失败:`, error);
       return false;
     }
   }
   /**
-   * @description 为一批回声洞准备内容，并向 AI 发送单个请求以获取所有分析结果。
-   * @param {CaveObject[]} caves - 要分析的回声洞对象数组。
-   * @param {Map<string, Buffer>} [mediaBufferMap] - 可选的媒体文件名到其缓冲区的映射。
-   * @returns {Promise<CaveMetaObject[]>} 一个 Promise，解析为 AI 返回的分析结果数组。
+   * @description 计算两组关键词之间的 Jaccard 相似度。
+   * Jaccard 相似度 = (交集大小 / 并集大小)。
+   * @param {string[]} keywordsA -第一组关键词。
+   * @param {string[]} keywordsB - 第二组关键词。
+   * @returns {number} 返回 0 到 100 之间的相似度得分。
+   * @private
    */
-  async getAnalyses(caves, mediaBufferMap) {
-    const results = [];
-    for (const cave of caves) {
-      try {
-        const combinedText = cave.elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join("\n");
-        const imageElements = await Promise.all(
-          cave.elements.filter((el) => el.type === "image" && el.file).map(async (el) => {
-            try {
-              const buffer = mediaBufferMap?.get(el.file) ?? await this.fileManager.readFile(el.file);
-              const mimeType = path3.extname(el.file).toLowerCase() === ".png" ? "image/png" : "image/jpeg";
-              return {
-                type: "image_url",
-                image_url: { url: `data:${mimeType};base64,${buffer.toString("base64")}` }
-              };
-            } catch (error) {
-              this.logger.warn(`读取文件（${el.file}）失败:`, error);
-              return null;
-            }
-          })
-        );
-        const validImages = imageElements.filter(Boolean);
-        if (!combinedText.trim() && validImages.length === 0) continue;
-        const contentForAI = [{ type: "text", text: combinedText }];
-        contentForAI.push(...validImages);
-        const userMessage = { role: "user", content: contentForAI };
-        const analysePrompt = `你是一位内容分析专家。请使用中文，分析我提供的内容（包含文本和可能的图片），并为其总结关键词、概括内容并评分。你的回复必须且只能是一个包裹在 \`\`\`json ... \`\`\` 代码块中的有效 JSON 对象。该对象必须包含 "keywords" (字符串数组), "description" (字符串), 和 "rating" (0-100的整数)。`;
-        const response = await this.requestAI([userMessage], analysePrompt);
-        if (response) {
-          results.push({
-            cave: cave.id,
-            keywords: response.keywords || [],
-            description: response.description || "",
-            rating: response.rating || 0
-          });
-        }
-      } catch (error) {
-        this.logger.error(`分析回声洞（${cave.id}）失败:`, error);
-      }
-    }
-    return results;
+  calculateKeywordSimilarity(keywordsA, keywordsB) {
+    if (!keywordsA?.length || !keywordsB?.length) return 0;
+    const setA = new Set(keywordsA);
+    const setB = new Set(keywordsB);
+    const intersection = new Set([...setA].filter((x) => setB.has(x)));
+    const union = /* @__PURE__ */ new Set([...setA, ...setB]);
+    return union.size > 0 ? intersection.size / union.size * 100 : 0;
   }
   /**
-   * @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑，并稳健地解析 JSON 响应。
-   * @param {any[]} messages - 发送给 AI 的消息数组，遵循 OpenAI 格式。
-   * @param {string} systemPrompt - 系统提示词，用于指导 AI 的行为。
-   * @returns {Promise<T>} 一个 Promise，解析为从 AI 接收到的、解析后的 JSON 对象。
-   * @throws {Error} 当 AI 返回空或无效内容时抛出错误。
+   * @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑。
+   * @template T - 期望从 AI 响应的 JSON 中解析出的数据类型。
+   * @param {any[]} messages - 发送给 AI 的消息数组，通常包含用户消息。
+   * @param {string} systemPrompt - 指导 AI 行为的系统级指令。
+   * @returns {Promise<T>} 一个 Promise，解析为从 AI 响应中提取并解析的 JSON 对象。
+   * @throws {Error} 当网络请求失败、AI 未返回有效内容或 JSON 解析失败时抛出。
+   * @private
    */
   async requestAI(messages, systemPrompt) {
-    const now = Date.now();
-    if (now > this.rateLimitResetTime) {
-      this.rateLimitResetTime = now + 6e4;
-      this.requestCount = 0;
-    }
-    if (this.requestCount >= this.config.aiRPM) {
-      const delay = this.rateLimitResetTime - now;
-      if (delay > 0) await new Promise((resolve) => setTimeout(resolve, delay));
-      this.rateLimitResetTime = Date.now() + 6e4;
-      this.requestCount = 0;
-    }
     const payload = {
       model: this.config.aiModel,
       messages: [{ role: "system", content: systemPrompt }, ...messages]
@@ -1255,26 +1258,17 @@ var AIManager = class {
       "Content-Type": "application/json",
       "Authorization": `Bearer ${this.config.aiApiKey}`
     };
-    this.requestCount++;
-    const response = await this.http.post(fullUrl, payload, { headers, timeout: 9e4 });
+    const response = await this.http.post(fullUrl, payload, { headers, timeout: 6e4 });
     const content = response?.choices?.[0]?.message?.content;
-    if (typeof content !== "string" || !content.trim()) {
-      this.logger.error("原始响应:", JSON.stringify(response, null, 2));
-      throw new Error("响应无效");
-    }
+    if (typeof content !== "string" || !content.trim()) throw new Error();
     try {
       const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
       const match = content.match(jsonRegex);
-      let jsonString = "";
-      if (match && match[1]) {
-        jsonString = match[1];
-      } else {
-        jsonString = content;
-      }
+      const jsonString = match && match[1] ? match[1] : content;
       return JSON.parse(jsonString);
     } catch (error) {
-      this.logger.error("解析 JSON 失败:", error);
-      throw new Error("解析失败");
+      this.logger.error("解析 AI 响应 JSON 失败:", error, "原始响应:", JSON.stringify(response, null, 2), "内容:", content);
+      throw new error();
     }
   }
 };
@@ -1311,10 +1305,9 @@ var Config = import_koishi3.Schema.intersect([
   }).description("复核配置"),
   import_koishi3.Schema.object({
     enableAI: import_koishi3.Schema.boolean().default(false).description("启用 AI"),
-    aiEndpoint: import_koishi3.Schema.string().description("端点 (Endpoint)").role("link").default("https://generativelanguage.googleapis.com/v1beta/openai"),
+    aiEndpoint: import_koishi3.Schema.string().description("端点 (Endpoint)").role("link").default("https://api.siliconflow.cn/v1"),
     aiApiKey: import_koishi3.Schema.string().description("密钥 (Key)").role("secret"),
-    aiModel: import_koishi3.Schema.string().description("模型 (Model)").default("gemini-2.5-flash"),
-    aiRPM: import_koishi3.Schema.number().description("每分钟请求数 (RPM)").default(60)
+    aiModel: import_koishi3.Schema.string().description("模型 (Model)").default("THUDM/GLM-4.1V-9B-Thinking")
   }).description("模型配置"),
   import_koishi3.Schema.object({
     localPath: import_koishi3.Schema.string().description("文件映射路径"),
@@ -1430,7 +1423,10 @@ function apply(ctx, config) {
       if (hasMedia) finalStatus = await handleFileUploads(ctx, config, fileManager, logger, newCave, downloadedMedia, reusableIds, needsReview);
       if (finalStatus !== "preload") {
         newCave.status = finalStatus;
-        if (aiManager) await aiManager.analyzeAndStore([newCave], downloadedMedia);
+        if (aiManager) {
+          const analyses = await aiManager.analyze([newCave], downloadedMedia);
+          if (analyses.length > 0) await ctx.database.upsert("cave_meta", analyses);
+        }
         if (hashManager) {
           const allHashesToInsert = [...textHashesToStore, ...imageHashesToStore].map((h4) => ({ ...h4, cave: newCave.id }));
           if (allHashesToInsert.length > 0) await ctx.database.upsert("cave_hash", allHashesToInsert);

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "koishi-plugin-best-cave",
   "description": "功能强大、高度可定制的回声洞。支持丰富的媒体类型、内容查重、人工审核、用户昵称、数据迁移以及本地/S3 双重文件存储后端。",
-  "version": "2.7.18",
+  "version": "2.7.19",
   "contributors": [
     "Yis_Rime <yis_rime@outlook.com>"
   ],