koishi-plugin-best-cave 2.2.5 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/index.js +69 -26
  2. package/package.json +1 -1
package/lib/index.js CHANGED
@@ -666,26 +666,51 @@ var HashManager = class {
666
666
  */
667
667
  async generateHashesForHistoricalCaves() {
668
668
  const allCaves = await this.ctx.database.get("cave", { status: "active" });
669
- const existingHashedCaveIds = new Set((await this.ctx.database.get("cave_hash", {}, { fields: ["cave"] })).map((h4) => h4.cave));
669
+ const existingHashes = await this.ctx.database.get("cave_hash", {}, { fields: ["cave", "hash", "type"] });
670
+ const existingHashSet = new Set(existingHashes.map((h4) => `${h4.cave}-${h4.hash}-${h4.type}`));
671
+ const processedCaveIds = new Set(existingHashes.map((h4) => h4.cave));
672
+ const cavesToProcess = allCaves.filter((cave) => !processedCaveIds.has(cave.id));
673
+ const totalToProcessCount = cavesToProcess.length;
674
+ if (totalToProcessCount === 0) {
675
+ return "无需补全回声洞哈希";
676
+ }
677
+ this.logger.info(`开始补全 ${totalToProcessCount} 个回声洞的哈希...`);
670
678
  let hashesToInsert = [];
671
- let historicalCount = 0;
679
+ const batchHashSet = /* @__PURE__ */ new Set();
680
+ let processedCaveCount = 0;
672
681
  let totalHashesGenerated = 0;
673
- for (const cave of allCaves) {
674
- if (existingHashedCaveIds.has(cave.id)) continue;
675
- historicalCount++;
676
- const newHashesForCave = await this.generateAllHashesForCave(cave);
677
- hashesToInsert.push(...newHashesForCave);
682
+ let errorCount = 0;
683
+ const flushBatch = /* @__PURE__ */ __name(async () => {
684
+ const batchSize = hashesToInsert.length;
685
+ if (batchSize === 0) return;
686
+ await this.ctx.database.upsert("cave_hash", hashesToInsert);
687
+ totalHashesGenerated += batchSize;
688
+ this.logger.info(`正在导入 ${batchSize} 条回声洞哈希... (已处理 ${processedCaveCount}/${totalToProcessCount})`);
689
+ hashesToInsert = [];
690
+ batchHashSet.clear();
691
+ }, "flushBatch");
692
+ for (const cave of cavesToProcess) {
693
+ processedCaveCount++;
694
+ try {
695
+ const newHashesForCave = await this.generateAllHashesForCave(cave);
696
+ for (const hashObj of newHashesForCave) {
697
+ const uniqueKey = `${hashObj.cave}-${hashObj.hash}-${hashObj.type}`;
698
+ if (!existingHashSet.has(uniqueKey) && !batchHashSet.has(uniqueKey)) {
699
+ hashesToInsert.push(hashObj);
700
+ batchHashSet.add(uniqueKey);
701
+ }
702
+ }
703
+ } catch (error) {
704
+ errorCount++;
705
+ this.logger.warn(`补全回声洞(${cave.id})时发生错误: ${error.message}`);
706
+ continue;
707
+ }
678
708
  if (hashesToInsert.length >= 100) {
679
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
680
- totalHashesGenerated += hashesToInsert.length;
681
- hashesToInsert = [];
709
+ await flushBatch();
682
710
  }
683
711
  }
684
- if (hashesToInsert.length > 0) {
685
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
686
- totalHashesGenerated += hashesToInsert.length;
687
- }
688
- return totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希` : "无需补全回声洞哈希";
712
+ await flushBatch();
713
+ return `已补全 ${totalToProcessCount} 个回声洞的 ${totalHashesGenerated} 条哈希(失败${errorCount} 条)`;
689
714
  }
690
715
  /**
691
716
  * @description 为单个回声洞对象生成所有类型的哈希。
@@ -697,7 +722,9 @@ var HashManager = class {
697
722
  const combinedText = cave.elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" ");
698
723
  if (combinedText) {
699
724
  const textHash = this.generateTextSimhash(combinedText);
700
- allHashes.push({ cave: cave.id, hash: textHash, type: "simhash" });
725
+ if (textHash) {
726
+ allHashes.push({ cave: cave.id, hash: textHash, type: "simhash" });
727
+ }
701
728
  }
702
729
  for (const el of cave.elements.filter((el2) => el2.type === "image" && el2.file)) {
703
730
  try {
@@ -916,11 +943,25 @@ var HashManager = class {
916
943
  * @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
917
944
  */
918
945
  generateTextSimhash(text) {
919
- if (!text?.trim()) return "";
920
- const tokens = text.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/).filter(Boolean);
921
- if (tokens.length === 0) return "";
946
+ const cleanText = (text || "").toLowerCase().replace(/\s+/g, "");
947
+ if (!cleanText) {
948
+ return "";
949
+ }
950
+ const n = 2;
951
+ const tokens = /* @__PURE__ */ new Set();
952
+ if (cleanText.length < n) {
953
+ tokens.add(cleanText);
954
+ } else {
955
+ for (let i = 0; i <= cleanText.length - n; i++) {
956
+ tokens.add(cleanText.substring(i, i + n));
957
+ }
958
+ }
959
+ const tokenArray = Array.from(tokens);
960
+ if (tokenArray.length === 0) {
961
+ return "";
962
+ }
922
963
  const vector = new Array(64).fill(0);
923
- tokens.forEach((token) => {
964
+ tokenArray.forEach((token) => {
924
965
  const hash = crypto.createHash("md5").update(token).digest();
925
966
  for (let i = 0; i < 64; i++) {
926
967
  vector[i] += hash[Math.floor(i / 8)] >> i % 8 & 1 ? 1 : -1;
@@ -1040,14 +1081,16 @@ function apply(ctx, config) {
1040
1081
  const combinedText = finalElementsForDb.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" ");
1041
1082
  if (combinedText) {
1042
1083
  const newSimhash = hashManager.generateTextSimhash(combinedText);
1043
- const existingTextHashes = await ctx.database.get("cave_hash", { type: "simhash" });
1044
- for (const existing of existingTextHashes) {
1045
- const similarity = hashManager.calculateSimilarity(newSimhash, existing.hash);
1046
- if (similarity >= config.textThreshold) {
1047
- return `文本与回声洞(${existing.cave})的相似度为 ${(similarity * 100).toFixed(2)}%,超过阈值`;
1084
+ if (newSimhash) {
1085
+ const existingTextHashes = await ctx.database.get("cave_hash", { type: "simhash" });
1086
+ for (const existing of existingTextHashes) {
1087
+ const similarity = hashManager.calculateSimilarity(newSimhash, existing.hash);
1088
+ if (similarity >= config.textThreshold) {
1089
+ return `文本与回声洞(${existing.cave})的相似度为 ${(similarity * 100).toFixed(2)}%,超过阈值`;
1090
+ }
1048
1091
  }
1092
+ textHashesToStore.push({ hash: newSimhash, type: "simhash" });
1049
1093
  }
1050
- textHashesToStore.push({ hash: newSimhash, type: "simhash" });
1051
1094
  }
1052
1095
  }
1053
1096
  const userName = (config.enableProfile ? await profileManager.getNickname(session.userId) : null) || session.username;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "koishi-plugin-best-cave",
3
3
  "description": "功能强大、高度可定制的回声洞。支持丰富的媒体类型、内容查重、人工审核、用户昵称、数据迁移以及本地/S3 双重文件存储后端。",
4
- "version": "2.2.5",
4
+ "version": "2.2.7",
5
5
  "contributors": [
6
6
  "Yis_Rime <yis_rime@outlook.com>"
7
7
  ],