koishi-plugin-best-cave 2.2.6 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/index.js +58 -19
  2. package/package.json +1 -1
package/lib/index.js CHANGED
@@ -666,26 +666,51 @@ var HashManager = class {
666
666
  */
667
667
  async generateHashesForHistoricalCaves() {
668
668
  const allCaves = await this.ctx.database.get("cave", { status: "active" });
669
- const existingHashedCaveIds = new Set((await this.ctx.database.get("cave_hash", {}, { fields: ["cave"] })).map((h4) => h4.cave));
669
+ const existingHashes = await this.ctx.database.get("cave_hash", {}, { fields: ["cave", "hash", "type"] });
670
+ const existingHashSet = new Set(existingHashes.map((h4) => `${h4.cave}-${h4.hash}-${h4.type}`));
671
+ const processedCaveIds = new Set(existingHashes.map((h4) => h4.cave));
672
+ const cavesToProcess = allCaves.filter((cave) => !processedCaveIds.has(cave.id));
673
+ const totalToProcessCount = cavesToProcess.length;
674
+ if (totalToProcessCount === 0) {
675
+ return "无需补全回声洞哈希";
676
+ }
677
+ this.logger.info(`开始补全 ${totalToProcessCount} 个回声洞的哈希...`);
670
678
  let hashesToInsert = [];
671
- let historicalCount = 0;
679
+ const batchHashSet = /* @__PURE__ */ new Set();
680
+ let processedCaveCount = 0;
672
681
  let totalHashesGenerated = 0;
673
- for (const cave of allCaves) {
674
- if (existingHashedCaveIds.has(cave.id)) continue;
675
- historicalCount++;
676
- const newHashesForCave = await this.generateAllHashesForCave(cave);
677
- hashesToInsert.push(...newHashesForCave);
682
+ let errorCount = 0;
683
+ const flushBatch = /* @__PURE__ */ __name(async () => {
684
+ const batchSize = hashesToInsert.length;
685
+ if (batchSize === 0) return;
686
+ await this.ctx.database.upsert("cave_hash", hashesToInsert);
687
+ totalHashesGenerated += batchSize;
688
+ this.logger.info(`正在导入 ${batchSize} 条回声洞哈希... (已处理 ${processedCaveCount}/${totalToProcessCount})`);
689
+ hashesToInsert = [];
690
+ batchHashSet.clear();
691
+ }, "flushBatch");
692
+ for (const cave of cavesToProcess) {
693
+ processedCaveCount++;
694
+ try {
695
+ const newHashesForCave = await this.generateAllHashesForCave(cave);
696
+ for (const hashObj of newHashesForCave) {
697
+ const uniqueKey = `${hashObj.cave}-${hashObj.hash}-${hashObj.type}`;
698
+ if (!existingHashSet.has(uniqueKey) && !batchHashSet.has(uniqueKey)) {
699
+ hashesToInsert.push(hashObj);
700
+ batchHashSet.add(uniqueKey);
701
+ }
702
+ }
703
+ } catch (error) {
704
+ errorCount++;
705
+ this.logger.warn(`补全回声洞(${cave.id})时发生错误: ${error.message}`);
706
+ continue;
707
+ }
678
708
  if (hashesToInsert.length >= 100) {
679
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
680
- totalHashesGenerated += hashesToInsert.length;
681
- hashesToInsert = [];
709
+ await flushBatch();
682
710
  }
683
711
  }
684
- if (hashesToInsert.length > 0) {
685
- await this.ctx.database.upsert("cave_hash", hashesToInsert);
686
- totalHashesGenerated += hashesToInsert.length;
687
- }
688
- return totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希` : "无需补全回声洞哈希";
712
+ await flushBatch();
713
+ return `已补全 ${totalToProcessCount} 个回声洞的 ${totalHashesGenerated} 条哈希(失败${errorCount} 条)`;
689
714
  }
690
715
  /**
691
716
  * @description 为单个回声洞对象生成所有类型的哈希。
@@ -918,11 +943,25 @@ var HashManager = class {
918
943
  * @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
919
944
  */
920
945
  generateTextSimhash(text) {
921
- if (!text?.trim()) return "";
922
- const tokens = text.toLowerCase().split(/[^a-z0-9\u4e00-\u9fa5]+/).filter(Boolean);
923
- if (tokens.length === 0) return "";
946
+ const cleanText = (text || "").toLowerCase().replace(/\s+/g, "");
947
+ if (!cleanText) {
948
+ return "";
949
+ }
950
+ const n = 2;
951
+ const tokens = /* @__PURE__ */ new Set();
952
+ if (cleanText.length < n) {
953
+ tokens.add(cleanText);
954
+ } else {
955
+ for (let i = 0; i <= cleanText.length - n; i++) {
956
+ tokens.add(cleanText.substring(i, i + n));
957
+ }
958
+ }
959
+ const tokenArray = Array.from(tokens);
960
+ if (tokenArray.length === 0) {
961
+ return "";
962
+ }
924
963
  const vector = new Array(64).fill(0);
925
- tokens.forEach((token) => {
964
+ tokenArray.forEach((token) => {
926
965
  const hash = crypto.createHash("md5").update(token).digest();
927
966
  for (let i = 0; i < 64; i++) {
928
967
  vector[i] += hash[Math.floor(i / 8)] >> i % 8 & 1 ? 1 : -1;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "koishi-plugin-best-cave",
3
3
  "description": "功能强大、高度可定制的回声洞。支持丰富的媒体类型、内容查重、人工审核、用户昵称、数据迁移以及本地/S3 双重文件存储后端。",
4
- "version": "2.2.6",
4
+ "version": "2.2.7",
5
5
  "contributors": [
6
6
  "Yis_Rime <yis_rime@outlook.com>"
7
7
  ],