npm - koishi-plugin-best-cave - Versions diffs - 2.2.7 → 2.2.9 - Mend

koishi-plugin-best-cave 2.2.7 → 2.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/HashManager.d.ts CHANGED Viewed

@@ -7,12 +7,12 @@ import { FileManager } from './FileManager';
 export interface CaveHashObject {
     cave: number;
     hash: string;
-    type: 'simhash' | 'phash_color' | 'dhash_gray' | 'sub_phash_q1' | 'sub_phash_q2' | 'sub_phash_q3' | 'sub_phash_q4';
+    type: 'simhash' | 'phash_g' | 'phash_q1' | 'phash_q2' | 'phash_q3' | 'phash_q4';
 }
 /**
  * @class HashManager
- * @description 封装了所有与文本和图片哈希生成、相似度比较、以及相关命令的功能。
- * 实现了高精度的混合策略查重方案。
+ * @description 负责生成、存储和比较文本与图片的哈希值。
+ * 实现了基于 Simhash 的文本查重和基于 DCT 感知哈希 (pHash) 的图片查重方案。
  */
 export declare class HashManager {
     private ctx;
@@ -22,36 +22,40 @@ export declare class HashManager {
     /**
      * @constructor
      * @param ctx - Koishi 上下文，用于数据库操作。
-     * @param config - 插件配置，用于获取相似度阈值。
+     * @param config - 插件配置，用于获取相似度阈值等。
      * @param logger - 日志记录器实例。
-     * @param fileManager - 文件管理器实例，用于处理历史数据。
+     * @param fileManager - 文件管理器实例，用于读取图片文件。
      */
     constructor(ctx: Context, config: Config, logger: Logger, fileManager: FileManager);
     /**
-     * @description 注册与哈希校验相关的子命令。
+     * @description 注册与哈希功能相关的 `.hash` 和 `.check` 子命令。
      * @param cave - 主 `cave` 命令实例。
      */
     registerCommands(cave: any): void;
     /**
      * @description 检查数据库中所有回声洞，为没有哈希记录的历史数据生成哈希。
-     * @returns {Promise<string>} 一个包含操作结果的报告字符串。
+     * @returns 一个包含操作结果的报告字符串。
      */
     generateHashesForHistoricalCaves(): Promise<string>;
     /**
-     * @description 为单个回声洞对象生成所有类型的哈希。
+     * @description 为单个回声洞对象生成所有类型的哈希（文本+图片）。
      * @param cave - 回声洞对象。
-     * @returns {Promise<CaveHashObject[]>} 生成的哈希对象数组。
+     * @returns 生成的哈希对象数组。
      */
     generateAllHashesForCave(cave: Pick<CaveObject, 'id' | 'elements'>): Promise<CaveHashObject[]>;
     /**
-     * @description 为单个图片Buffer生成所有类型的哈希。
+     * @description 对数据库中所有哈希进行两两比较，找出相似度过高的内容。
+     * @returns 一个包含检查结果的报告字符串。
+     */
+    checkForSimilarCaves(): Promise<string>;
+    /**
+     * @description 为单个图片Buffer生成全局pHash和四个象限的局部pHash。
      * @param imageBuffer - 图片的Buffer数据。
-     * @returns {Promise<object>} 包含所有图片哈希的对象。
+     * @returns 包含全局哈希和四象限哈希的对象。
      */
     generateAllImageHashes(imageBuffer: Buffer): Promise<{
-        colorPHash: string;
-        dHash: string;
-        subHashes: {
+        globalHash: string;
+        quadrantHashes: {
             q1: string;
             q2: string;
             q3: string;
@@ -59,58 +63,36 @@ export declare class HashManager {
         };
     }>;
     /**
-     * @description 对回声洞进行混合策略的相似度与重复内容检查。
-     * @returns {Promise<string>} 一个包含操作结果的报告字符串。
-     */
-    checkForSimilarCaves(): Promise<string>;
-    /**
-     * @description 从单通道原始像素数据计算pHash。
-     * @param channelData - 单通道的像素值数组。
-     * @param size - 图像的边长（例如16）。
-     * @returns {string} 该通道的二进制哈希字符串。
+     * @description 执行二维离散余弦变换 (DCT-II)。
+     * @param matrix - 输入的 N x N 像素亮度矩阵。
+     * @returns DCT变换后的 N x N 系数矩阵。
      */
-    private _calculateHashFromRawChannel;
+    private _dct2D;
     /**
-     * @description 生成768位颜色感知哈希（Color pHash）。
-     * @param imageBuffer - 图片的 Buffer 数据。
-     * @returns {Promise<string>} 768位二进制哈希对应的192位十六进制字符串。
+     * @description pHash 算法核心实现。
+     * @param imageBuffer - 图片的Buffer。
+     * @param size - 期望的哈希位数 (必须是完全平方数, 如 64 或 256)。
+     * @returns 十六进制pHash字符串。
      */
-    generateColorPHash(imageBuffer: Buffer): Promise<string>;
-    /**
-     * @description 生成256位差异哈希（dHash）。
-     * @param imageBuffer - 图片的 Buffer 数据。
-     * @returns {Promise<string>} 256位二进制哈希对应的64位十六进制字符串。
-     */
-    generateDHash(imageBuffer: Buffer): Promise<string>;
-    /**
-     * @description 将图片切割为4个象限并为每个象限生成Color pHash。
-     * @param imageBuffer - 图片的 Buffer 数据。
-     * @returns {Promise<object>} 包含四个象限哈希的对象。
-     */
-    generateImageSubHashes(imageBuffer: Buffer): Promise<{
-        q1: string;
-        q2: string;
-        q3: string;
-        q4: string;
-    }>;
+    private _generatePHash;
     /**
-     * @description 计算两个十六进制哈希字符串之间的汉明距离。
-     * @param hex1 - 第一个十六进制哈希字符串。
-     * @param hex2 - 第二个十六进制哈希字符串。
-     * @returns {number} 两个哈希之间的距离。
+     * @description 计算两个十六进制哈希字符串之间的汉明距离 (不同位的数量)。
+     * @param hex1 - 第一个哈希。
+     * @param hex2 - 第二个哈希。
+     * @returns 汉明距离。
      */
     calculateHammingDistance(hex1: string, hex2: string): number;
     /**
-     * @description 根据汉明距离计算图片或文本哈希的相似度。
-     * @param hex1 - 第一个十六进制哈希字符串。
-     * @param hex2 - 第二个十六进制哈希字符串。
-     * @returns {number} 范围在0到1之间的相似度得分。
+     * @description 根据汉明距离计算相似度百分比。
+     * @param hex1 - 第一个哈希。
+     * @param hex2 - 第二个哈希。
+     * @returns 相似度 (0-100)。
      */
     calculateSimilarity(hex1: string, hex2: string): number;
     /**
-     * @description 为文本生成基于 Simhash 算法的哈希字符串。
+     * @description 为文本生成 64 位 Simhash 字符串。
      * @param text - 需要处理的文本。
-     * @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
+     * @returns 16位十六进制 Simhash 字符串。
      */
     generateTextSimhash(text: string): string;
 }

package/lib/index.d.ts CHANGED Viewed

@@ -39,7 +39,7 @@ export interface Config {
     caveFormat: string;
     enableSimilarity: boolean;
     textThreshold: number;
-    imageThreshold: number;
+    imageWholeThreshold: number;
     localPath?: string;
     enableS3: boolean;
     endpoint?: string;

package/lib/index.js CHANGED Viewed

@@ -431,62 +431,38 @@ async function handleFileUploads(ctx, config, fileManager, logger2, reviewManage
   try {
     const downloadedMedia = [];
     const imageHashesToStore = [];
-    const existingHashes = hashManager ? await ctx.database.get("cave_hash", { type: { $ne: "simhash" } }) : [];
-    const existingColorPHashes = existingHashes.filter((h4) => h4.type === "phash_color");
-    const existingDHashes = existingHashes.filter((h4) => h4.type === "dhash_gray");
-    const existingSubHashObjects = existingHashes.filter((h4) => h4.type.startsWith("sub_phash_"));
+    const allExistingImageHashes = hashManager ? await ctx.database.get("cave_hash", { type: { $ne: "simhash" } }) : [];
+    const existingGlobalHashes = allExistingImageHashes.filter((h4) => h4.type === "phash_g");
+    const existingQuadrantHashes = allExistingImageHashes.filter((h4) => h4.type.startsWith("phash_q"));
     for (const media of mediaToToSave) {
       const buffer = Buffer.from(await ctx.http.get(media.sourceUrl, { responseType: "arraybuffer", timeout: 3e4 }));
       downloadedMedia.push({ fileName: media.fileName, buffer });
       if (hashManager && [".png", ".jpg", ".jpeg", ".webp"].includes(path2.extname(media.fileName).toLowerCase())) {
-        const { colorPHash, dHash, subHashes } = await hashManager.generateAllImageHashes(buffer);
-        let caveToDelete = null;
-        let highestCombinedSimilarity = 0;
-        const similarityScores = /* @__PURE__ */ new Map();
-        for (const existing of existingColorPHashes) {
-          const similarity = hashManager.calculateSimilarity(colorPHash, existing.hash);
-          if (similarity >= config.imageThreshold) {
-            if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
-            similarityScores.get(existing.cave).colorSim = similarity;
+        const { globalHash, quadrantHashes } = await hashManager.generateAllImageHashes(buffer);
+        for (const existing of existingGlobalHashes) {
+          const similarity = hashManager.calculateSimilarity(globalHash, existing.hash);
+          if (similarity >= config.imageWholeThreshold) {
+            await session.send(`图片与回声洞（${existing.cave}）的相似度为 ${similarity.toFixed(2)}%，超过阈值`);
+            await ctx.database.upsert("cave", [{ id: cave.id, status: "delete" }]);
+            cleanupPendingDeletions(ctx, fileManager, logger2, reusableIds);
+            return;
           }
         }
-        for (const existing of existingDHashes) {
-          const similarity = hashManager.calculateSimilarity(dHash, existing.hash);
-          if (similarity >= config.imageThreshold) {
-            if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
-            similarityScores.get(existing.cave).dSim = similarity;
-          }
-        }
-        for (const [caveId, scores] of similarityScores.entries()) {
-          if (scores.colorSim && scores.dSim) {
-            caveToDelete = caveId;
-            highestCombinedSimilarity = scores.colorSim;
-            break;
-          }
-        }
-        if (caveToDelete) {
-          await session.send(`图片与回声洞（${caveToDelete}）的相似度为 ${(highestCombinedSimilarity * 100).toFixed(2)}%，超过阈值`);
-          await ctx.database.upsert("cave", [{ id: cave.id, status: "delete" }]);
-          cleanupPendingDeletions(ctx, fileManager, logger2, reusableIds);
-          return;
-        }
         const notifiedPartialCaves = /* @__PURE__ */ new Set();
-        for (const newSubHash of Object.values(subHashes)) {
-          for (const existing of existingSubHashObjects) {
+        for (const newSubHash of Object.values(quadrantHashes)) {
+          for (const existing of existingQuadrantHashes) {
             if (notifiedPartialCaves.has(existing.cave)) continue;
-            const similarity = hashManager.calculateSimilarity(newSubHash, existing.hash);
-            if (similarity >= config.imageThreshold) {
-              await session.send(`图片局部与回声洞（${existing.cave}）的相似度为 ${(similarity * 100).toFixed(2)}%`);
+            if (newSubHash === existing.hash) {
+              await session.send(`图片局部与回声洞（${existing.cave}）存在完全相同的区块`);
               notifiedPartialCaves.add(existing.cave);
             }
           }
         }
-        imageHashesToStore.push({ hash: colorPHash, type: "phash_color" });
-        imageHashesToStore.push({ hash: dHash, type: "dhash_gray" });
-        imageHashesToStore.push({ hash: subHashes.q1, type: "sub_phash_q1" });
-        imageHashesToStore.push({ hash: subHashes.q2, type: "sub_phash_q2" });
-        imageHashesToStore.push({ hash: subHashes.q3, type: "sub_phash_q3" });
-        imageHashesToStore.push({ hash: subHashes.q4, type: "sub_phash_q4" });
+        imageHashesToStore.push({ hash: globalHash, type: "phash_g" });
+        imageHashesToStore.push({ hash: quadrantHashes.q1, type: "phash_q1" });
+        imageHashesToStore.push({ hash: quadrantHashes.q2, type: "phash_q2" });
+        imageHashesToStore.push({ hash: quadrantHashes.q3, type: "phash_q3" });
+        imageHashesToStore.push({ hash: quadrantHashes.q4, type: "phash_q4" });
       }
     }
     await Promise.all(downloadedMedia.map((item) => fileManager.saveFile(item.fileName, item.buffer)));
@@ -606,9 +582,9 @@ var HashManager = class {
   /**
    * @constructor
    * @param ctx - Koishi 上下文，用于数据库操作。
-   * @param config - 插件配置，用于获取相似度阈值。
+   * @param config - 插件配置，用于获取相似度阈值等。
    * @param logger - 日志记录器实例。
-   * @param fileManager - 文件管理器实例，用于处理历史数据。
+   * @param fileManager - 文件管理器实例，用于读取图片文件。
    */
   constructor(ctx, config, logger2, fileManager) {
     this.ctx = ctx;
@@ -627,7 +603,7 @@ var HashManager = class {
     __name(this, "HashManager");
   }
   /**
-   * @description 注册与哈希校验相关的子命令。
+   * @description 注册与哈希功能相关的 `.hash` 和 `.check` 子命令。
    * @param cave - 主 `cave` 命令实例。
    */
   registerCommands(cave) {
@@ -637,7 +613,7 @@ var HashManager = class {
         return "此指令仅限在管理群组中使用";
       }
     }, "adminCheck");
-    cave.subcommand(".hash", "校验回声洞").usage("校验所有回声洞，补全所有哈希记录。").action(async (argv) => {
+    cave.subcommand(".hash", "校验回声洞").usage("校验缺失哈希的回声洞，补全哈希记录。").action(async (argv) => {
       const checkResult = adminCheck(argv);
       if (checkResult) return checkResult;
       await argv.session.send("正在处理，请稍候...");
@@ -648,7 +624,7 @@ var HashManager = class {
         return `操作失败: ${error.message}`;
       }
     });
-    cave.subcommand(".check", "检查回声洞").usage("检查所有已存在哈希的回声洞的相似度。").action(async (argv) => {
+    cave.subcommand(".check", "检查相似度").usage("检查所有回声洞，找出相似度过高的内容。").action(async (argv) => {
       const checkResult = adminCheck(argv);
       if (checkResult) return checkResult;
       await argv.session.send("正在检查，请稍候...");
@@ -662,239 +638,160 @@ var HashManager = class {
   }
   /**
    * @description 检查数据库中所有回声洞，为没有哈希记录的历史数据生成哈希。
-   * @returns {Promise<string>} 一个包含操作结果的报告字符串。
+   * @returns 一个包含操作结果的报告字符串。
    */
   async generateHashesForHistoricalCaves() {
     const allCaves = await this.ctx.database.get("cave", { status: "active" });
-    const existingHashes = await this.ctx.database.get("cave_hash", {}, { fields: ["cave", "hash", "type"] });
+    const existingHashes = await this.ctx.database.get("cave_hash", {});
     const existingHashSet = new Set(existingHashes.map((h4) => `${h4.cave}-${h4.hash}-${h4.type}`));
-    const processedCaveIds = new Set(existingHashes.map((h4) => h4.cave));
-    const cavesToProcess = allCaves.filter((cave) => !processedCaveIds.has(cave.id));
-    const totalToProcessCount = cavesToProcess.length;
-    if (totalToProcessCount === 0) {
-      return "无需补全回声洞哈希";
-    }
-    this.logger.info(`开始补全 ${totalToProcessCount} 个回声洞的哈希...`);
+    if (allCaves.length === 0) return "无需补全回声洞哈希";
+    this.logger.info(`开始补全 ${allCaves.length} 个回声洞的哈希...`);
     let hashesToInsert = [];
-    const batchHashSet = /* @__PURE__ */ new Set();
     let processedCaveCount = 0;
     let totalHashesGenerated = 0;
     let errorCount = 0;
     const flushBatch = /* @__PURE__ */ __name(async () => {
-      const batchSize = hashesToInsert.length;
-      if (batchSize === 0) return;
+      if (hashesToInsert.length === 0) return;
       await this.ctx.database.upsert("cave_hash", hashesToInsert);
-      totalHashesGenerated += batchSize;
-      this.logger.info(`正在导入 ${batchSize} 条回声洞哈希... (已处理 ${processedCaveCount}/${totalToProcessCount})`);
+      totalHashesGenerated += hashesToInsert.length;
+      this.logger.info(`[${processedCaveCount}/${allCaves.length}] 正在导入 ${hashesToInsert.length} 条回声洞哈希...`);
       hashesToInsert = [];
-      batchHashSet.clear();
     }, "flushBatch");
-    for (const cave of cavesToProcess) {
+    for (const cave of allCaves) {
       processedCaveCount++;
       try {
         const newHashesForCave = await this.generateAllHashesForCave(cave);
         for (const hashObj of newHashesForCave) {
           const uniqueKey = `${hashObj.cave}-${hashObj.hash}-${hashObj.type}`;
-          if (!existingHashSet.has(uniqueKey) && !batchHashSet.has(uniqueKey)) {
+          if (!existingHashSet.has(uniqueKey)) {
             hashesToInsert.push(hashObj);
-            batchHashSet.add(uniqueKey);
+            existingHashSet.add(uniqueKey);
           }
         }
+        if (hashesToInsert.length >= 100) {
+          await flushBatch();
+        }
       } catch (error) {
         errorCount++;
-        this.logger.warn(`补全回声洞（${cave.id}）时发生错误: ${error.message}`);
-        continue;
-      }
-      if (hashesToInsert.length >= 100) {
-        await flushBatch();
+        this.logger.warn(`补全回声洞（${cave.id}）哈希时发生错误: ${error.message}`);
       }
     }
     await flushBatch();
-    return `已补全 ${totalToProcessCount} 个回声洞的 ${totalHashesGenerated} 条哈希（失败${errorCount} 条）`;
+    return `已补全 ${allCaves.length} 个回声洞的 ${totalHashesGenerated} 条哈希（失败 ${errorCount} 条）`;
   }
   /**
-   * @description 为单个回声洞对象生成所有类型的哈希。
+   * @description 为单个回声洞对象生成所有类型的哈希（文本+图片）。
    * @param cave - 回声洞对象。
-   * @returns {Promise<CaveHashObject[]>} 生成的哈希对象数组。
+   * @returns 生成的哈希对象数组。
    */
   async generateAllHashesForCave(cave) {
-    const allHashes = [];
+    const tempHashes = [];
+    const uniqueHashTracker = /* @__PURE__ */ new Set();
+    const addUniqueHash = /* @__PURE__ */ __name((hashObj) => {
+      const key = `${hashObj.hash}-${hashObj.type}`;
+      if (!uniqueHashTracker.has(key)) {
+        tempHashes.push(hashObj);
+        uniqueHashTracker.add(key);
+      }
+    }, "addUniqueHash");
     const combinedText = cave.elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" ");
     if (combinedText) {
       const textHash = this.generateTextSimhash(combinedText);
-      if (textHash) {
-        allHashes.push({ cave: cave.id, hash: textHash, type: "simhash" });
-      }
+      if (textHash) addUniqueHash({ cave: cave.id, hash: textHash, type: "simhash" });
     }
     for (const el of cave.elements.filter((el2) => el2.type === "image" && el2.file)) {
       try {
         const imageBuffer = await this.fileManager.readFile(el.file);
-        const imageHashes = await this.generateAllImageHashes(imageBuffer);
-        allHashes.push({ cave: cave.id, hash: imageHashes.colorPHash, type: "phash_color" });
-        allHashes.push({ cave: cave.id, hash: imageHashes.dHash, type: "dhash_gray" });
-        allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q1, type: "sub_phash_q1" });
-        allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q2, type: "sub_phash_q2" });
-        allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q3, type: "sub_phash_q3" });
-        allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q4, type: "sub_phash_q4" });
+        const { globalHash, quadrantHashes } = await this.generateAllImageHashes(imageBuffer);
+        addUniqueHash({ cave: cave.id, hash: globalHash, type: "phash_g" });
+        addUniqueHash({ cave: cave.id, hash: quadrantHashes.q1, type: "phash_q1" });
+        addUniqueHash({ cave: cave.id, hash: quadrantHashes.q2, type: "phash_q2" });
+        addUniqueHash({ cave: cave.id, hash: quadrantHashes.q3, type: "phash_q3" });
+        addUniqueHash({ cave: cave.id, hash: quadrantHashes.q4, type: "phash_q4" });
       } catch (e) {
-        this.logger.warn(`无法为回声洞（${cave.id}）的内容（${el.file}）生成哈希:`, e);
+        this.logger.warn(`无法为回声洞（${cave.id}）的图片（${el.file}）生成哈希:`, e);
       }
     }
-    return allHashes;
-  }
-  /**
-   * @description 为单个图片Buffer生成所有类型的哈希。
-   * @param imageBuffer - 图片的Buffer数据。
-   * @returns {Promise<object>} 包含所有图片哈希的对象。
-   */
-  async generateAllImageHashes(imageBuffer) {
-    const [colorPHash, dHash, subHashes] = await Promise.all([
-      this.generateColorPHash(imageBuffer),
-      this.generateDHash(imageBuffer),
-      this.generateImageSubHashes(imageBuffer)
-    ]);
-    return { colorPHash, dHash, subHashes };
+    return tempHashes;
   }
   /**
-   * @description 对回声洞进行混合策略的相似度与重复内容检查。
-   * @returns {Promise<string>} 一个包含操作结果的报告字符串。
+   * @description 对数据库中所有哈希进行两两比较，找出相似度过高的内容。
+   * @returns 一个包含检查结果的报告字符串。
    */
   async checkForSimilarCaves() {
     const allHashes = await this.ctx.database.get("cave_hash", {});
-    const caves = await this.ctx.database.get("cave", { status: "active" }, { fields: ["id"] });
-    const allCaveIds = caves.map((c) => c.id);
-    const hashGroups = {
-      simhash: /* @__PURE__ */ new Map(),
-      phash_color: /* @__PURE__ */ new Map(),
-      dhash_gray: /* @__PURE__ */ new Map()
-    };
-    const subHashToCaves = /* @__PURE__ */ new Map();
+    const allCaveIds = [...new Set(allHashes.map((h4) => h4.cave))];
+    const textHashes = /* @__PURE__ */ new Map();
+    const globalHashes = /* @__PURE__ */ new Map();
+    const quadrantHashes = /* @__PURE__ */ new Map();
     for (const hash of allHashes) {
-      if (hashGroups[hash.type]) {
-        if (!hashGroups[hash.type].has(hash.cave)) hashGroups[hash.type].set(hash.cave, []);
-        hashGroups[hash.type].get(hash.cave).push(hash.hash);
-      } else if (hash.type.startsWith("sub_phash_")) {
-        if (!subHashToCaves.has(hash.hash)) subHashToCaves.set(hash.hash, /* @__PURE__ */ new Set());
-        subHashToCaves.get(hash.hash).add(hash.cave);
+      if (hash.type === "simhash") {
+        textHashes.set(hash.cave, hash.hash);
+      } else if (hash.type === "phash_g") {
+        globalHashes.set(hash.cave, hash.hash);
+      } else if (hash.type.startsWith("phash_q")) {
+        if (!quadrantHashes.has(hash.cave)) quadrantHashes.set(hash.cave, /* @__PURE__ */ new Set());
+        quadrantHashes.get(hash.cave).add(hash.hash);
       }
     }
     const similarPairs = {
       text: /* @__PURE__ */ new Set(),
-      image_color: /* @__PURE__ */ new Set(),
-      image_dhash: /* @__PURE__ */ new Set()
+      global: /* @__PURE__ */ new Set(),
+      partial: /* @__PURE__ */ new Set()
     };
     for (let i = 0; i < allCaveIds.length; i++) {
       for (let j = i + 1; j < allCaveIds.length; j++) {
         const id1 = allCaveIds[i];
         const id2 = allCaveIds[j];
-        const simhash1 = hashGroups.simhash.get(id1)?.[0];
-        const simhash2 = hashGroups.simhash.get(id2)?.[0];
-        if (simhash1 && simhash2) {
-          const sim = this.calculateSimilarity(simhash1, simhash2);
-          if (sim >= this.config.textThreshold) {
-            similarPairs.text.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
+        const pair = [id1, id2].sort((a, b) => a - b).join(" & ");
+        const text1 = textHashes.get(id1);
+        const text2 = textHashes.get(id2);
+        if (text1 && text2) {
+          const similarity = this.calculateSimilarity(text1, text2);
+          if (similarity >= this.config.textThreshold) {
+            similarPairs.text.add(`${pair} = ${similarity.toFixed(2)}%`);
           }
         }
-        const colorHashes1 = hashGroups.phash_color.get(id1) || [];
-        const colorHashes2 = hashGroups.phash_color.get(id2) || [];
-        for (const h1 of colorHashes1) {
-          for (const h22 of colorHashes2) {
-            const sim = this.calculateSimilarity(h1, h22);
-            if (sim >= this.config.imageThreshold) {
-              similarPairs.image_color.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
-            }
+        const global1 = globalHashes.get(id1);
+        const global2 = globalHashes.get(id2);
+        if (global1 && global2) {
+          const similarity = this.calculateSimilarity(global1, global2);
+          if (similarity >= this.config.imageWholeThreshold) {
+            similarPairs.global.add(`${pair} = ${similarity.toFixed(2)}%`);
           }
         }
-        const dHashes1 = hashGroups.dhash_gray.get(id1) || [];
-        const dHashes2 = hashGroups.dhash_gray.get(id2) || [];
-        for (const h1 of dHashes1) {
-          for (const h22 of dHashes2) {
-            const sim = this.calculateSimilarity(h1, h22);
-            if (sim >= this.config.imageThreshold) {
-              similarPairs.image_dhash.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
+        const quads1 = quadrantHashes.get(id1);
+        const quads2 = quadrantHashes.get(id2);
+        if (quads1 && quads2 && quads1.size > 0 && quads2.size > 0) {
+          let matchFound = false;
+          for (const h1 of quads1) {
+            if (quads2.has(h1)) {
+              matchFound = true;
+              break;
             }
           }
+          if (matchFound) {
+            similarPairs.partial.add(pair);
+          }
         }
       }
     }
-    const subHashDuplicates = [];
-    subHashToCaves.forEach((caves2) => {
-      if (caves2.size > 1) {
-        const sortedCaves = [...caves2].sort((a, b) => a - b).join(", ");
-        subHashDuplicates.push(`[${sortedCaves}]`);
-      }
-    });
-    const totalFindings = similarPairs.text.size + similarPairs.image_color.size + similarPairs.image_dhash.size + subHashDuplicates.length;
+    const totalFindings = similarPairs.text.size + similarPairs.global.size + similarPairs.partial.size;
     if (totalFindings === 0) return "未发现高相似度的内容";
-    let report = `已发现 ${totalFindings} 组高相似度或重复的内容:`;
-    if (similarPairs.text.size > 0) report += "\n文本近似:\n" + [...similarPairs.text].join("\n");
-    if (similarPairs.image_color.size > 0) report += "\n图片整体相似:\n" + [...similarPairs.image_color].join("\n");
-    if (similarPairs.image_dhash.size > 0) report += "\n图片结构相似:\n" + [...similarPairs.image_dhash].join("\n");
-    if (subHashDuplicates.length > 0) report += "\n图片局部重复:\n" + [...new Set(subHashDuplicates)].join("\n");
+    let report = `已发现 ${totalFindings} 组高相似度的内容:`;
+    if (similarPairs.text.size > 0) report += "\n文本内容相似:\n" + [...similarPairs.text].join("\n");
+    if (similarPairs.global.size > 0) report += "\n图片整体相似:\n" + [...similarPairs.global].join("\n");
+    if (similarPairs.partial.size > 0) report += "\n图片局部相同:\n" + [...similarPairs.partial].join("\n");
     return report.trim();
   }
   /**
-   * @description 从单通道原始像素数据计算pHash。
-   * @param channelData - 单通道的像素值数组。
-   * @param size - 图像的边长（例如16）。
-   * @returns {string} 该通道的二进制哈希字符串。
-   */
-  _calculateHashFromRawChannel(channelData, size) {
-    const totalLuminance = channelData.reduce((acc, val) => acc + val, 0);
-    const avgLuminance = totalLuminance / (size * size);
-    return channelData.map((lum) => lum > avgLuminance ? "1" : "0").join("");
-  }
-  /**
-   * @description 生成768位颜色感知哈希（Color pHash）。
-   * @param imageBuffer - 图片的 Buffer 数据。
-   * @returns {Promise<string>} 768位二进制哈希对应的192位十六进制字符串。
-   */
-  async generateColorPHash(imageBuffer) {
-    const { data, info } = await (0, import_sharp.default)(imageBuffer).resize(16, 16, { fit: "fill" }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
-    const { channels } = info;
-    const r = [], g = [], b = [];
-    for (let i = 0; i < data.length; i += channels) {
-      r.push(data[i]);
-      g.push(data[i + 1]);
-      b.push(data[i + 2]);
-    }
-    const rHash = this._calculateHashFromRawChannel(r, 16);
-    const gHash = this._calculateHashFromRawChannel(g, 16);
-    const bHash = this._calculateHashFromRawChannel(b, 16);
-    const combinedHash = rHash + gHash + bHash;
-    let hex = "";
-    for (let i = 0; i < combinedHash.length; i += 4) {
-      hex += parseInt(combinedHash.substring(i, i + 4), 2).toString(16);
-    }
-    return hex.padStart(192, "0");
-  }
-  /**
-   * @description 生成256位差异哈希（dHash）。
-   * @param imageBuffer - 图片的 Buffer 数据。
-   * @returns {Promise<string>} 256位二进制哈希对应的64位十六进制字符串。
-   */
-  async generateDHash(imageBuffer) {
-    const pixels = await (0, import_sharp.default)(imageBuffer).grayscale().resize(17, 16, { fit: "fill" }).raw().toBuffer();
-    let hash = "";
-    for (let y = 0; y < 16; y++) {
-      for (let x = 0; x < 16; x++) {
-        const i = y * 17 + x;
-        hash += pixels[i] > pixels[i + 1] ? "1" : "0";
-      }
-    }
-    return BigInt("0b" + hash).toString(16).padStart(64, "0");
-  }
-  /**
-   * @description 将图片切割为4个象限并为每个象限生成Color pHash。
-   * @param imageBuffer - 图片的 Buffer 数据。
-   * @returns {Promise<object>} 包含四个象限哈希的对象。
+   * @description 为单个图片Buffer生成全局pHash和四个象限的局部pHash。
+   * @param imageBuffer - 图片的Buffer数据。
+   * @returns 包含全局哈希和四象限哈希的对象。
    */
-  async generateImageSubHashes(imageBuffer) {
+  async generateAllImageHashes(imageBuffer) {
+    const globalHash = await this._generatePHash(imageBuffer, 256);
     const { width, height } = await (0, import_sharp.default)(imageBuffer).metadata();
-    if (!width || !height || width < 16 || height < 16) {
-      const fallbackHash = await this.generateColorPHash(imageBuffer);
-      return { q1: fallbackHash, q2: fallbackHash, q3: fallbackHash, q4: fallbackHash };
-    }
     const w2 = Math.floor(width / 2), h22 = Math.floor(height / 2);
     const regions = [
       { left: 0, top: 0, width: w2, height: h22 },
@@ -904,17 +801,73 @@ var HashManager = class {
     ];
     const [q1, q2, q3, q4] = await Promise.all(
       regions.map((region) => {
-        if (region.width < 8 || region.height < 8) return this.generateColorPHash(imageBuffer);
-        return (0, import_sharp.default)(imageBuffer).extract(region).toBuffer().then((b) => this.generateColorPHash(b));
+        if (region.width < 16 || region.height < 16) return this._generatePHash(imageBuffer, 64);
+        return (0, import_sharp.default)(imageBuffer).extract(region).toBuffer().then((b) => this._generatePHash(b, 64));
       })
     );
-    return { q1, q2, q3, q4 };
+    return { globalHash, quadrantHashes: { q1, q2, q3, q4 } };
   }
   /**
-   * @description 计算两个十六进制哈希字符串之间的汉明距离。
-   * @param hex1 - 第一个十六进制哈希字符串。
-   * @param hex2 - 第二个十六进制哈希字符串。
-   * @returns {number} 两个哈希之间的距离。
+   * @description 执行二维离散余弦变换 (DCT-II)。
+   * @param matrix - 输入的 N x N 像素亮度矩阵。
+   * @returns DCT变换后的 N x N 系数矩阵。
+   */
+  _dct2D(matrix) {
+    const N = matrix.length;
+    if (N === 0) return [];
+    const cosines = Array.from(
+      { length: N },
+      (_, i) => Array.from({ length: N }, (_2, j) => Math.cos(Math.PI * (2 * i + 1) * j / (2 * N)))
+    );
+    const applyDct1D = /* @__PURE__ */ __name((input) => {
+      const output = new Array(N).fill(0);
+      const scale = Math.sqrt(2 / N);
+      for (let k = 0; k < N; k++) {
+        let sum = 0;
+        for (let n = 0; n < N; n++) {
+          sum += input[n] * cosines[n][k];
+        }
+        output[k] = scale * sum;
+      }
+      output[0] /= Math.sqrt(2);
+      return output;
+    }, "applyDct1D");
+    const tempMatrix = matrix.map((row) => applyDct1D(row));
+    const transposed = tempMatrix[0].map((_, col) => tempMatrix.map((row) => row[col]));
+    const dctResult = transposed.map((row) => applyDct1D(row));
+    return dctResult[0].map((_, col) => dctResult.map((row) => row[col]));
+  }
+  /**
+   * @description pHash 算法核心实现。
+   * @param imageBuffer - 图片的Buffer。
+   * @param size - 期望的哈希位数 (必须是完全平方数, 如 64 或 256)。
+   * @returns 十六进制pHash字符串。
+   */
+  async _generatePHash(imageBuffer, size) {
+    const dctSize = 32;
+    const hashGridSize = Math.sqrt(size);
+    if (!Number.isInteger(hashGridSize)) throw new Error("哈希位数必须是完全平方数");
+    const pixels = await (0, import_sharp.default)(imageBuffer).grayscale().resize(dctSize, dctSize, { fit: "fill" }).raw().toBuffer();
+    const matrix = [];
+    for (let y = 0; y < dctSize; y++) {
+      matrix.push(Array.from(pixels.slice(y * dctSize, (y + 1) * dctSize)));
+    }
+    const dctMatrix = this._dct2D(matrix);
+    const coefficients = [];
+    for (let y = 0; y < hashGridSize; y++) {
+      for (let x = 0; x < hashGridSize; x++) {
+        coefficients.push(dctMatrix[y][x]);
+      }
+    }
+    const median = [...coefficients.slice(1)].sort((a, b) => a - b)[Math.floor((coefficients.length - 1) / 2)];
+    const binaryHash = coefficients.map((val) => val > median ? "1" : "0").join("");
+    return BigInt("0b" + binaryHash).toString(16).padStart(size / 4, "0");
+  }
+  /**
+   * @description 计算两个十六进制哈希字符串之间的汉明距离 (不同位的数量)。
+   * @param hex1 - 第一个哈希。
+   * @param hex2 - 第二个哈希。
+   * @returns 汉明距离。
    */
   calculateHammingDistance(hex1, hex2) {
     let distance = 0;
@@ -927,26 +880,24 @@ var HashManager = class {
     return distance;
   }
   /**
-   * @description 根据汉明距离计算图片或文本哈希的相似度。
-   * @param hex1 - 第一个十六进制哈希字符串。
-   * @param hex2 - 第二个十六进制哈希字符串。
-   * @returns {number} 范围在0到1之间的相似度得分。
+   * @description 根据汉明距离计算相似度百分比。
+   * @param hex1 - 第一个哈希。
+   * @param hex2 - 第二个哈希。
+   * @returns 相似度 (0-100)。
    */
   calculateSimilarity(hex1, hex2) {
     const distance = this.calculateHammingDistance(hex1, hex2);
     const hashLength = Math.max(hex1.length, hex2.length) * 4;
-    return hashLength === 0 ? 1 : 1 - distance / hashLength;
+    return hashLength === 0 ? 100 : (1 - distance / hashLength) * 100;
   }
   /**
-   * @description 为文本生成基于 Simhash 算法的哈希字符串。
+   * @description 为文本生成 64 位 Simhash 字符串。
    * @param text - 需要处理的文本。
-   * @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
+   * @returns 16位十六进制 Simhash 字符串。
    */
   generateTextSimhash(text) {
     const cleanText = (text || "").toLowerCase().replace(/\s+/g, "");
-    if (!cleanText) {
-      return "";
-    }
+    if (!cleanText) return "";
     const n = 2;
     const tokens = /* @__PURE__ */ new Set();
     if (cleanText.length < n) {
@@ -957,9 +908,7 @@ var HashManager = class {
       }
     }
     const tokenArray = Array.from(tokens);
-    if (tokenArray.length === 0) {
-      return "";
-    }
+    if (tokenArray.length === 0) return "";
     const vector = new Array(64).fill(0);
     tokenArray.forEach((token) => {
       const hash = crypto.createHash("md5").update(token).digest();
@@ -973,8 +922,8 @@ var HashManager = class {
 };
 function hexToBinary(hex) {
   let bin = "";
-  for (let i = 0; i < hex.length; i++) {
-    bin += parseInt(hex[i], 16).toString(2).padStart(4, "0");
+  for (const char of hex) {
+    bin += parseInt(char, 16).toString(2).padStart(4, "0");
   }
   return bin;
 }
@@ -1008,8 +957,8 @@ var Config = import_koishi3.Schema.intersect([
   import_koishi3.Schema.object({
     enableReview: import_koishi3.Schema.boolean().default(false).description("启用审核"),
     enableSimilarity: import_koishi3.Schema.boolean().default(false).description("启用查重"),
-    textThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("文本相似度阈值"),
-    imageThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("图片相似度阈值")
+    textThreshold: import_koishi3.Schema.number().min(0).max(100).step(0.01).default(95).description("文本相似度阈值 (%)"),
+    imageWholeThreshold: import_koishi3.Schema.number().min(0).max(100).step(0.01).default(95).description("图片相似度阈值 (%)")
   }).description("复核配置"),
   import_koishi3.Schema.object({
     localPath: import_koishi3.Schema.string().description("文件映射路径"),
@@ -1086,7 +1035,7 @@ function apply(ctx, config) {
             for (const existing of existingTextHashes) {
               const similarity = hashManager.calculateSimilarity(newSimhash, existing.hash);
               if (similarity >= config.textThreshold) {
-                return `文本与回声洞（${existing.cave}）的相似度为 ${(similarity * 100).toFixed(2)}%，超过阈值`;
+                return `文本与回声洞（${existing.cave}）的相似度为 ${similarity.toFixed(2)}%，超过阈值`;
               }
             }
             textHashesToStore.push({ hash: newSimhash, type: "simhash" });

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "koishi-plugin-best-cave",
   "description": "功能强大、高度可定制的回声洞。支持丰富的媒体类型、内容查重、人工审核、用户昵称、数据迁移以及本地/S3 双重文件存储后端。",
-  "version": "2.2.7",
+  "version": "2.2.9",
   "contributors": [
     "Yis_Rime <yis_rime@outlook.com>"
   ],