koishi-plugin-best-cave 2.2.6 → 2.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.d.ts CHANGED
@@ -39,7 +39,8 @@ export interface Config {
39
39
  caveFormat: string;
40
40
  enableSimilarity: boolean;
41
41
  textThreshold: number;
42
- imageThreshold: number;
42
+ imageWholeThreshold: number;
43
+ imagePartThreshold: number;
43
44
  localPath?: string;
44
45
  enableS3: boolean;
45
46
  endpoint?: string;
package/lib/index.js CHANGED
@@ -445,14 +445,14 @@ async function handleFileUploads(ctx, config, fileManager, logger2, reviewManage
445
445
  const similarityScores = /* @__PURE__ */ new Map();
446
446
  for (const existing of existingColorPHashes) {
447
447
  const similarity = hashManager.calculateSimilarity(colorPHash, existing.hash);
448
- if (similarity >= config.imageThreshold) {
448
+ if (similarity >= config.imageWholeThreshold) {
449
449
  if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
450
450
  similarityScores.get(existing.cave).colorSim = similarity;
451
451
  }
452
452
  }
453
453
  for (const existing of existingDHashes) {
454
454
  const similarity = hashManager.calculateSimilarity(dHash, existing.hash);
455
- if (similarity >= config.imageThreshold) {
455
+ if (similarity >= config.imageWholeThreshold) {
456
456
  if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
457
457
  similarityScores.get(existing.cave).dSim = similarity;
458
458
  }
@@ -475,7 +475,7 @@ async function handleFileUploads(ctx, config, fileManager, logger2, reviewManage
475
475
  for (const existing of existingSubHashObjects) {
476
476
  if (notifiedPartialCaves.has(existing.cave)) continue;
477
477
  const similarity = hashManager.calculateSimilarity(newSubHash, existing.hash);
478
- if (similarity >= config.imageThreshold) {
478
+ if (similarity >= config.imagePartThreshold) {
479
479
  await session.send(`图片局部与回声洞(${existing.cave})的相似度为 ${(similarity * 100).toFixed(2)}%`);
480
480
  notifiedPartialCaves.add(existing.cave);
481
481
  }
@@ -666,26 +666,51 @@ var HashManager = class {
666
666
  */
667
667
  async generateHashesForHistoricalCaves() {
668
668
  const allCaves = await this.ctx.database.get("cave", { status: "active" });
669
- const existingHashedCaveIds = new Set((await this.ctx.database.get("cave_hash", {}, { fields: ["cave"] })).map((h4) => h4.cave));
669
+ const existingHashes = await this.ctx.database.get("cave_hash", {}, { fields: ["cave", "hash", "type"] });
670
+ const existingHashSet = new Set(existingHashes.map((h4) => `${h4.cave}-${h4.hash}-${h4.type}`));
671
+ const processedCaveIds = new Set(existingHashes.map((h4) => h4.cave));
672
+ const cavesToProcess = allCaves.filter((cave) => !processedCaveIds.has(cave.id));
673
+ const totalToProcessCount = cavesToProcess.length;
674
+ if (totalToProcessCount === 0) {
675
+ return "无需补全回声洞哈希";
676
+ }
677
+ this.logger.info(`开始补全 ${totalToProcessCount} 个回声洞的哈希...`);
670
678
  let hashesToInsert = [];
671
- let historicalCount = 0;
679
+ const batchHashSet = /* @__PURE__ */ new Set();
680
+ let processedCaveCount = 0;
672
681
  let totalHashesGenerated = 0;
673
- for (const cave of allCaves) {
674
- if (existingHashedCaveIds.has(cave.id)) continue;
675
- historicalCount++;
676
- const newHashesForCave = await this.generateAllHashesForCave(cave);
677
- hashesToInsert.push(...newHashesForCave);
682
+ let errorCount = 0;
683
+ const flushBatch = /* @__PURE__ */ __name(async () => {
684
+ const batchSize = hashesToInsert.length;
685
+ if (batchSize === 0) return;
686
+ await this.ctx.database.upsert("cave_hash", hashesToInsert);
687
+ totalHashesGenerated += batchSize;
688
+ this.logger.info(`正在导入 ${batchSize} 条回声洞哈希... (已处理 ${processedCaveCount}/${totalToProcessCount})`);
689
+ hashesToInsert = [];
690
+ batchHashSet.clear();
691
+ }, "flushBatch");
692
+ for (const cave of cavesToProcess) {
693
+ processedCaveCount++;
694
+ try {
695
+ const newHashesForCave = await this.generateAllHashesForCave(cave);
696
+ for (const hashObj of newHashesForCave) {
697
+ const uniqueKey = `${hashObj.cave}-${hashObj.hash}-${hashObj.type}`;
698
+ if (!existingHashSet.has(uniqueKey) && !batchHashSet.has(uniqueKey)) {
699
+ hashesToInsert.push(hashObj);
700
+ batchHashSet.add(uniqueKey);
701
+ }
702
+ }
703
+ } catch (error) {
704
+ errorCount++;
705
+ this.logger.warn(`补全回声洞(${cave.id})时发生错误: ${error.message}`);
706
+ continue;
707
+ }
678
708
  if (hashesToInsert.length >= 100) {
679
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
680
- totalHashesGenerated += hashesToInsert.length;
681
- hashesToInsert = [];
709
+ await flushBatch();
682
710
  }
683
711
  }
684
- if (hashesToInsert.length > 0) {
685
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
686
- totalHashesGenerated += hashesToInsert.length;
687
- }
688
- return totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希` : "无需补全回声洞哈希";
712
+ await flushBatch();
713
+ return `已补全 ${totalToProcessCount} 个回声洞的 ${totalHashesGenerated} 条哈希(失败 ${errorCount} 条)`;
689
714
  }
690
715
  /**
691
716
  * @description 为单个回声洞对象生成所有类型的哈希。
@@ -743,20 +768,21 @@ var HashManager = class {
743
768
  phash_color: /* @__PURE__ */ new Map(),
744
769
  dhash_gray: /* @__PURE__ */ new Map()
745
770
  };
746
- const subHashToCaves = /* @__PURE__ */ new Map();
771
+ const subHashGroups = /* @__PURE__ */ new Map();
747
772
  for (const hash of allHashes) {
748
773
  if (hashGroups[hash.type]) {
749
774
  if (!hashGroups[hash.type].has(hash.cave)) hashGroups[hash.type].set(hash.cave, []);
750
775
  hashGroups[hash.type].get(hash.cave).push(hash.hash);
751
776
  } else if (hash.type.startsWith("sub_phash_")) {
752
- if (!subHashToCaves.has(hash.hash)) subHashToCaves.set(hash.hash, /* @__PURE__ */ new Set());
753
- subHashToCaves.get(hash.hash).add(hash.cave);
777
+ if (!subHashGroups.has(hash.cave)) subHashGroups.set(hash.cave, []);
778
+ subHashGroups.get(hash.cave).push(hash.hash);
754
779
  }
755
780
  }
756
781
  const similarPairs = {
757
782
  text: /* @__PURE__ */ new Set(),
758
783
  image_color: /* @__PURE__ */ new Set(),
759
- image_dhash: /* @__PURE__ */ new Set()
784
+ image_dhash: /* @__PURE__ */ new Set(),
785
+ image_part: /* @__PURE__ */ new Set()
760
786
  };
761
787
  for (let i = 0; i < allCaveIds.length; i++) {
762
788
  for (let j = i + 1; j < allCaveIds.length; j++) {
@@ -775,7 +801,7 @@ var HashManager = class {
775
801
  for (const h1 of colorHashes1) {
776
802
  for (const h22 of colorHashes2) {
777
803
  const sim = this.calculateSimilarity(h1, h22);
778
- if (sim >= this.config.imageThreshold) {
804
+ if (sim >= this.config.imageWholeThreshold) {
779
805
  similarPairs.image_color.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
780
806
  }
781
807
  }
@@ -785,27 +811,36 @@ var HashManager = class {
785
811
  for (const h1 of dHashes1) {
786
812
  for (const h22 of dHashes2) {
787
813
  const sim = this.calculateSimilarity(h1, h22);
788
- if (sim >= this.config.imageThreshold) {
814
+ if (sim >= this.config.imageWholeThreshold) {
789
815
  similarPairs.image_dhash.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
790
816
  }
791
817
  }
792
818
  }
819
+ const subHashes1 = subHashGroups.get(id1) || [];
820
+ const subHashes2 = subHashGroups.get(id2) || [];
821
+ if (subHashes1.length > 0 && subHashes2.length > 0) {
822
+ let maxPartSim = 0;
823
+ for (const h1 of subHashes1) {
824
+ for (const h22 of subHashes2) {
825
+ const sim = this.calculateSimilarity(h1, h22);
826
+ if (sim > maxPartSim) {
827
+ maxPartSim = sim;
828
+ }
829
+ }
830
+ }
831
+ if (maxPartSim >= this.config.imagePartThreshold) {
832
+ similarPairs.image_part.add(`${id1} & ${id2} = ${(maxPartSim * 100).toFixed(2)}%`);
833
+ }
834
+ }
793
835
  }
794
836
  }
795
- const subHashDuplicates = [];
796
- subHashToCaves.forEach((caves2) => {
797
- if (caves2.size > 1) {
798
- const sortedCaves = [...caves2].sort((a, b) => a - b).join(", ");
799
- subHashDuplicates.push(`[${sortedCaves}]`);
800
- }
801
- });
802
- const totalFindings = similarPairs.text.size + similarPairs.image_color.size + similarPairs.image_dhash.size + subHashDuplicates.length;
837
+ const totalFindings = similarPairs.text.size + similarPairs.image_color.size + similarPairs.image_dhash.size + similarPairs.image_part.size;
803
838
  if (totalFindings === 0) return "未发现高相似度的内容";
804
- let report = `已发现 ${totalFindings} 组高相似度或重复的内容:`;
839
+ let report = `已发现 ${totalFindings} 组高相似度的内容:`;
805
840
  if (similarPairs.text.size > 0) report += "\n文本近似:\n" + [...similarPairs.text].join("\n");
806
- if (similarPairs.image_color.size > 0) report += "\n图片整体相似:\n" + [...similarPairs.image_color].join("\n");
841
+ if (similarPairs.image_color.size > 0) report += "\n图片颜色相似:\n" + [...similarPairs.image_color].join("\n");
807
842
  if (similarPairs.image_dhash.size > 0) report += "\n图片结构相似:\n" + [...similarPairs.image_dhash].join("\n");
808
- if (subHashDuplicates.length > 0) report += "\n图片局部重复:\n" + [...new Set(subHashDuplicates)].join("\n");
843
+ if (similarPairs.image_part.size > 0) report += "\n图片局部近似:\n" + [...similarPairs.image_part].join("\n");
809
844
  return report.trim();
810
845
  }
811
846
  /**
@@ -918,11 +953,25 @@ var HashManager = class {
918
953
  * @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
919
954
  */
920
955
  generateTextSimhash(text) {
921
- if (!text?.trim()) return "";
922
- const tokens = text.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/).filter(Boolean);
923
- if (tokens.length === 0) return "";
956
+ const cleanText = (text || "").toLowerCase().replace(/\s+/g, "");
957
+ if (!cleanText) {
958
+ return "";
959
+ }
960
+ const n = 2;
961
+ const tokens = /* @__PURE__ */ new Set();
962
+ if (cleanText.length < n) {
963
+ tokens.add(cleanText);
964
+ } else {
965
+ for (let i = 0; i <= cleanText.length - n; i++) {
966
+ tokens.add(cleanText.substring(i, i + n));
967
+ }
968
+ }
969
+ const tokenArray = Array.from(tokens);
970
+ if (tokenArray.length === 0) {
971
+ return "";
972
+ }
924
973
  const vector = new Array(64).fill(0);
925
- tokens.forEach((token) => {
974
+ tokenArray.forEach((token) => {
926
975
  const hash = crypto.createHash("md5").update(token).digest();
927
976
  for (let i = 0; i < 64; i++) {
928
977
  vector[i] += hash[Math.floor(i / 8)] >> i % 8 & 1 ? 1 : -1;
@@ -970,7 +1019,8 @@ var Config = import_koishi3.Schema.intersect([
970
1019
  enableReview: import_koishi3.Schema.boolean().default(false).description("启用审核"),
971
1020
  enableSimilarity: import_koishi3.Schema.boolean().default(false).description("启用查重"),
972
1021
  textThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("文本相似度阈值"),
973
- imageThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("图片相似度阈值")
1022
+ imageWholeThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("图片整体相似度阈值"),
1023
+ imagePartThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.95).description("图片局部相似度阈值")
974
1024
  }).description("复核配置"),
975
1025
  import_koishi3.Schema.object({
976
1026
  localPath: import_koishi3.Schema.string().description("文件映射路径"),
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "koishi-plugin-best-cave",
3
3
  "description": "功能强大、高度可定制的回声洞。支持丰富的媒体类型、内容查重、人工审核、用户昵称、数据迁移以及本地/S3 双重文件存储后端。",
4
- "version": "2.2.6",
4
+ "version": "2.2.8",
5
5
  "contributors": [
6
6
  "Yis_Rime <yis_rime@outlook.com>"
7
7
  ],