koishi-plugin-best-cave 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,10 +24,15 @@ export declare class HashManager {
24
24
  */
25
25
  registerCommands(cave: any): void;
26
26
  /**
27
- * @description 检查数据库中所有回声洞,为没有哈希记录的历史数据生成哈希,并在此之后对所有内容进行相似度检查。
27
+ * @description 检查数据库中所有回声洞,为没有哈希记录的历史数据生成哈希。
28
28
  * @returns {Promise<string>} 一个包含操作结果的报告字符串。
29
29
  */
30
- validateAllCaves(): Promise<string>;
30
+ generateHashesForHistoricalCaves(): Promise<string>;
31
+ /**
32
+ * @description 对所有已存在哈希的回声洞进行相似度检查。
33
+ * @returns {Promise<string>} 一个包含操作结果的报告字符串。
34
+ */
35
+ checkForSimilarCaves(): Promise<string>;
31
36
  /**
32
37
  * @description 将图片切割为4个象限并为每个象限生成pHash。
33
38
  * @param imageBuffer - 图片的 Buffer 数据。
package/lib/index.js CHANGED
@@ -597,25 +597,40 @@ var HashManager = class {
597
597
  * @param cave - 主 `cave` 命令实例。
598
598
  */
599
599
  registerCommands(cave) {
600
- cave.subcommand(".hash", "校验回声洞").usage("校验所有回声洞,为历史数据生成哈希,并检查现有内容的相似度。").action(async ({ session }) => {
600
+ const adminCheck = /* @__PURE__ */ __name(({ session }) => {
601
601
  const adminChannelId = this.config.adminChannel?.split(":")[1];
602
602
  if (session.channelId !== adminChannelId) {
603
603
  return "此指令仅限在管理群组中使用";
604
604
  }
605
- await session.send("正在处理,请稍候...");
605
+ }, "adminCheck");
606
+ cave.subcommand(".hash", "校验回声洞").usage("校验所有回声洞,补全所有哈希记录。").action(async (argv) => {
607
+ const checkResult = adminCheck(argv);
608
+ if (checkResult) return checkResult;
609
+ await argv.session.send("正在处理,请稍候...");
606
610
  try {
607
- return await this.validateAllCaves();
611
+ return await this.generateHashesForHistoricalCaves();
608
612
  } catch (error) {
609
- this.logger.error("校验哈希失败:", error);
610
- return `校验失败: ${error.message}`;
613
+ this.logger.error("生成历史哈希失败:", error);
614
+ return `操作失败: ${error.message}`;
615
+ }
616
+ });
617
+ cave.subcommand(".check", "检查回声洞").usage("检查所有已存在哈希的回声洞的相似度。").action(async (argv) => {
618
+ const checkResult = adminCheck(argv);
619
+ if (checkResult) return checkResult;
620
+ await argv.session.send("正在检查,请稍候...");
621
+ try {
622
+ return await this.checkForSimilarCaves();
623
+ } catch (error) {
624
+ this.logger.error("检查相似度失败:", error);
625
+ return `检查失败: ${error.message}`;
611
626
  }
612
627
  });
613
628
  }
614
629
  /**
615
- * @description 检查数据库中所有回声洞,为没有哈希记录的历史数据生成哈希,并在此之后对所有内容进行相似度检查。
630
+ * @description 检查数据库中所有回声洞,为没有哈希记录的历史数据生成哈希。
616
631
  * @returns {Promise<string>} 一个包含操作结果的报告字符串。
617
632
  */
618
- async validateAllCaves() {
633
+ async generateHashesForHistoricalCaves() {
619
634
  const allCaves = await this.ctx.database.get("cave", { status: "active" });
620
635
  const existingHashedCaveIds = new Set((await this.ctx.database.get("cave_hash", {}, { fields: ["cave"] })).map((h4) => h4.cave));
621
636
  let hashesToInsert = [];
@@ -625,8 +640,12 @@ var HashManager = class {
625
640
  const flushHashes = /* @__PURE__ */ __name(async () => {
626
641
  if (hashesToInsert.length > 0) {
627
642
  this.logger.info(`补全第 ${batchStartCaveCount + 1} 到 ${historicalCount} 条回声洞哈希中...`);
628
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
629
- totalHashesGenerated += hashesToInsert.length;
643
+ try {
644
+ await this.ctx.database.upsert("cave_hash", hashesToInsert);
645
+ totalHashesGenerated += hashesToInsert.length;
646
+ } catch (error) {
647
+ this.logger.error(`导入哈希失败: ${error.message}`);
648
+ }
630
649
  hashesToInsert = [];
631
650
  batchStartCaveCount = historicalCount;
632
651
  }
@@ -634,26 +653,39 @@ var HashManager = class {
634
653
  for (const cave of allCaves) {
635
654
  if (existingHashedCaveIds.has(cave.id)) continue;
636
655
  historicalCount++;
656
+ const newHashesForCave = [];
637
657
  const combinedText = cave.elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" ");
638
- if (combinedText) {
639
- hashesToInsert.push({ cave: cave.id, hash: this.generateTextSimhash(combinedText), type: "sim" });
658
+ const textHash = this.generateTextSimhash(combinedText);
659
+ if (textHash) {
660
+ newHashesForCave.push({ cave: cave.id, hash: textHash, type: "sim" });
640
661
  }
641
662
  for (const el of cave.elements.filter((el2) => el2.type === "image" && el2.file)) {
642
663
  try {
643
664
  const imageBuffer = await this.fileManager.readFile(el.file);
644
665
  const pHash = await this.generateImagePHash(imageBuffer);
645
- hashesToInsert.push({ cave: cave.id, hash: pHash, type: "phash" });
666
+ newHashesForCave.push({ cave: cave.id, hash: pHash, type: "phash" });
646
667
  const subHashes = await this.generateImageSubHashes(imageBuffer);
647
- subHashes.forEach((subHash) => hashesToInsert.push({ cave: cave.id, hash: subHash, type: "sub" }));
668
+ subHashes.forEach((subHash) => newHashesForCave.push({ cave: cave.id, hash: subHash, type: "sub" }));
648
669
  } catch (e) {
649
670
  this.logger.warn(`无法为回声洞(${cave.id})的内容(${el.file})生成哈希:`, e);
650
671
  }
651
672
  }
673
+ const uniqueHashesMap = /* @__PURE__ */ new Map();
674
+ newHashesForCave.forEach((h4) => {
675
+ const uniqueKey = `${h4.type}-${h4.hash}`;
676
+ uniqueHashesMap.set(uniqueKey, h4);
677
+ });
678
+ hashesToInsert.push(...uniqueHashesMap.values());
652
679
  if (hashesToInsert.length >= 100) await flushHashes();
653
680
  }
654
681
  await flushHashes();
655
- const generationReport = totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希
656
- ` : "无需补全回声洞的哈希\n";
682
+ return totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希` : "无需补全回声洞哈希";
683
+ }
684
+ /**
685
+ * @description 对所有已存在哈希的回声洞进行相似度检查。
686
+ * @returns {Promise<string>} 一个包含操作结果的报告字符串。
687
+ */
688
+ async checkForSimilarCaves() {
657
689
  const allHashes = await this.ctx.database.get("cave_hash", {});
658
690
  const caveTextHashes = /* @__PURE__ */ new Map();
659
691
  const caveImagePHashes = /* @__PURE__ */ new Map();
@@ -665,7 +697,7 @@ var HashManager = class {
665
697
  caveImagePHashes.get(hash.cave).push(hash.hash);
666
698
  }
667
699
  }
668
- const caveIds = allCaves.map((c) => c.id);
700
+ const caveIds = Array.from(/* @__PURE__ */ new Set([...caveTextHashes.keys(), ...caveImagePHashes.keys()]));
669
701
  const similarPairs = /* @__PURE__ */ new Set();
670
702
  for (let i = 0; i < caveIds.length; i++) {
671
703
  for (let j = i + 1; j < caveIds.length; j++) {
@@ -676,7 +708,7 @@ var HashManager = class {
676
708
  if (textHash1 && textHash2) {
677
709
  const textSim = this.calculateSimilarity(textHash1, textHash2);
678
710
  if (textSim >= this.config.textThreshold) {
679
- similarPairs.add(`文本:(${id1},${id2}),相似度:${(textSim * 100).toFixed(2)}%`);
711
+ similarPairs.add(`文本${id1}&${id2}=${(textSim * 100).toFixed(2)}%`);
680
712
  }
681
713
  }
682
714
  const imageHashes1 = caveImagePHashes.get(id1) || [];
@@ -686,17 +718,15 @@ var HashManager = class {
686
718
  for (const imgHash2 of imageHashes2) {
687
719
  const imgSim = this.calculateSimilarity(imgHash1, imgHash2);
688
720
  if (imgSim >= this.config.imageThreshold) {
689
- similarPairs.add(`图片:(${id1},${id2}),相似度:${(imgSim * 100).toFixed(2)}%`);
721
+ similarPairs.add(`图片${id1}&${id2}=${(imgSim * 100).toFixed(2)}%`);
690
722
  }
691
723
  }
692
724
  }
693
725
  }
694
726
  }
695
727
  }
696
- const similarityReport = similarPairs.size > 0 ? `发现 ${similarPairs.size} 对高相似度内容:
728
+ return similarPairs.size > 0 ? `已发现 ${similarPairs.size} 对高相似度内容:
697
729
  ` + [...similarPairs].join("\n") : "未发现高相似度内容";
698
- return `校验完成:
699
- ${generationReport}${similarityReport}`;
700
730
  }
701
731
  /**
702
732
  * @description 将图片切割为4个象限并为每个象限生成pHash。
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "koishi-plugin-best-cave",
3
3
  "description": "功能强大、高度可定制的回声洞。支持丰富的媒体类型、内容查重、人工审核、用户昵称、数据迁移以及本地/S3 双重文件存储后端。",
4
- "version": "2.2.0",
4
+ "version": "2.2.2",
5
5
  "contributors": [
6
6
  "Yis_Rime <yis_rime@outlook.com>"
7
7
  ],