koishi-plugin-best-cave 2.2.6 → 2.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.d.ts +2 -1
- package/lib/index.js +90 -40
- package/package.json +1 -1
package/lib/index.d.ts
CHANGED
|
@@ -39,7 +39,8 @@ export interface Config {
|
|
|
39
39
|
caveFormat: string;
|
|
40
40
|
enableSimilarity: boolean;
|
|
41
41
|
textThreshold: number;
|
|
42
|
-
|
|
42
|
+
imageWholeThreshold: number;
|
|
43
|
+
imagePartThreshold: number;
|
|
43
44
|
localPath?: string;
|
|
44
45
|
enableS3: boolean;
|
|
45
46
|
endpoint?: string;
|
package/lib/index.js
CHANGED
|
@@ -445,14 +445,14 @@ async function handleFileUploads(ctx, config, fileManager, logger2, reviewManage
|
|
|
445
445
|
const similarityScores = /* @__PURE__ */ new Map();
|
|
446
446
|
for (const existing of existingColorPHashes) {
|
|
447
447
|
const similarity = hashManager.calculateSimilarity(colorPHash, existing.hash);
|
|
448
|
-
if (similarity >= config.
|
|
448
|
+
if (similarity >= config.imageWholeThreshold) {
|
|
449
449
|
if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
|
|
450
450
|
similarityScores.get(existing.cave).colorSim = similarity;
|
|
451
451
|
}
|
|
452
452
|
}
|
|
453
453
|
for (const existing of existingDHashes) {
|
|
454
454
|
const similarity = hashManager.calculateSimilarity(dHash, existing.hash);
|
|
455
|
-
if (similarity >= config.
|
|
455
|
+
if (similarity >= config.imageWholeThreshold) {
|
|
456
456
|
if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
|
|
457
457
|
similarityScores.get(existing.cave).dSim = similarity;
|
|
458
458
|
}
|
|
@@ -475,7 +475,7 @@ async function handleFileUploads(ctx, config, fileManager, logger2, reviewManage
|
|
|
475
475
|
for (const existing of existingSubHashObjects) {
|
|
476
476
|
if (notifiedPartialCaves.has(existing.cave)) continue;
|
|
477
477
|
const similarity = hashManager.calculateSimilarity(newSubHash, existing.hash);
|
|
478
|
-
if (similarity >= config.
|
|
478
|
+
if (similarity >= config.imagePartThreshold) {
|
|
479
479
|
await session.send(`图片局部与回声洞(${existing.cave})的相似度为 ${(similarity * 100).toFixed(2)}%`);
|
|
480
480
|
notifiedPartialCaves.add(existing.cave);
|
|
481
481
|
}
|
|
@@ -666,26 +666,51 @@ var HashManager = class {
|
|
|
666
666
|
*/
|
|
667
667
|
async generateHashesForHistoricalCaves() {
|
|
668
668
|
const allCaves = await this.ctx.database.get("cave", { status: "active" });
|
|
669
|
-
const
|
|
669
|
+
const existingHashes = await this.ctx.database.get("cave_hash", {}, { fields: ["cave", "hash", "type"] });
|
|
670
|
+
const existingHashSet = new Set(existingHashes.map((h4) => `${h4.cave}-${h4.hash}-${h4.type}`));
|
|
671
|
+
const processedCaveIds = new Set(existingHashes.map((h4) => h4.cave));
|
|
672
|
+
const cavesToProcess = allCaves.filter((cave) => !processedCaveIds.has(cave.id));
|
|
673
|
+
const totalToProcessCount = cavesToProcess.length;
|
|
674
|
+
if (totalToProcessCount === 0) {
|
|
675
|
+
return "无需补全回声洞哈希";
|
|
676
|
+
}
|
|
677
|
+
this.logger.info(`开始补全 ${totalToProcessCount} 个回声洞的哈希...`);
|
|
670
678
|
let hashesToInsert = [];
|
|
671
|
-
|
|
679
|
+
const batchHashSet = /* @__PURE__ */ new Set();
|
|
680
|
+
let processedCaveCount = 0;
|
|
672
681
|
let totalHashesGenerated = 0;
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
682
|
+
let errorCount = 0;
|
|
683
|
+
const flushBatch = /* @__PURE__ */ __name(async () => {
|
|
684
|
+
const batchSize = hashesToInsert.length;
|
|
685
|
+
if (batchSize === 0) return;
|
|
686
|
+
await this.ctx.database.upsert("cave_hash", hashesToInsert);
|
|
687
|
+
totalHashesGenerated += batchSize;
|
|
688
|
+
this.logger.info(`正在导入 ${batchSize} 条回声洞哈希... (已处理 ${processedCaveCount}/${totalToProcessCount})`);
|
|
689
|
+
hashesToInsert = [];
|
|
690
|
+
batchHashSet.clear();
|
|
691
|
+
}, "flushBatch");
|
|
692
|
+
for (const cave of cavesToProcess) {
|
|
693
|
+
processedCaveCount++;
|
|
694
|
+
try {
|
|
695
|
+
const newHashesForCave = await this.generateAllHashesForCave(cave);
|
|
696
|
+
for (const hashObj of newHashesForCave) {
|
|
697
|
+
const uniqueKey = `${hashObj.cave}-${hashObj.hash}-${hashObj.type}`;
|
|
698
|
+
if (!existingHashSet.has(uniqueKey) && !batchHashSet.has(uniqueKey)) {
|
|
699
|
+
hashesToInsert.push(hashObj);
|
|
700
|
+
batchHashSet.add(uniqueKey);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
} catch (error) {
|
|
704
|
+
errorCount++;
|
|
705
|
+
this.logger.warn(`补全回声洞(${cave.id})时发生错误: ${error.message}`);
|
|
706
|
+
continue;
|
|
707
|
+
}
|
|
678
708
|
if (hashesToInsert.length >= 100) {
|
|
679
|
-
await
|
|
680
|
-
totalHashesGenerated += hashesToInsert.length;
|
|
681
|
-
hashesToInsert = [];
|
|
709
|
+
await flushBatch();
|
|
682
710
|
}
|
|
683
711
|
}
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
totalHashesGenerated += hashesToInsert.length;
|
|
687
|
-
}
|
|
688
|
-
return totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希` : "无需补全回声洞哈希";
|
|
712
|
+
await flushBatch();
|
|
713
|
+
return `已补全 ${totalToProcessCount} 个回声洞的 ${totalHashesGenerated} 条哈希(失败 ${errorCount} 条)`;
|
|
689
714
|
}
|
|
690
715
|
/**
|
|
691
716
|
* @description 为单个回声洞对象生成所有类型的哈希。
|
|
@@ -743,20 +768,21 @@ var HashManager = class {
|
|
|
743
768
|
phash_color: /* @__PURE__ */ new Map(),
|
|
744
769
|
dhash_gray: /* @__PURE__ */ new Map()
|
|
745
770
|
};
|
|
746
|
-
const
|
|
771
|
+
const subHashGroups = /* @__PURE__ */ new Map();
|
|
747
772
|
for (const hash of allHashes) {
|
|
748
773
|
if (hashGroups[hash.type]) {
|
|
749
774
|
if (!hashGroups[hash.type].has(hash.cave)) hashGroups[hash.type].set(hash.cave, []);
|
|
750
775
|
hashGroups[hash.type].get(hash.cave).push(hash.hash);
|
|
751
776
|
} else if (hash.type.startsWith("sub_phash_")) {
|
|
752
|
-
if (!
|
|
753
|
-
|
|
777
|
+
if (!subHashGroups.has(hash.cave)) subHashGroups.set(hash.cave, []);
|
|
778
|
+
subHashGroups.get(hash.cave).push(hash.hash);
|
|
754
779
|
}
|
|
755
780
|
}
|
|
756
781
|
const similarPairs = {
|
|
757
782
|
text: /* @__PURE__ */ new Set(),
|
|
758
783
|
image_color: /* @__PURE__ */ new Set(),
|
|
759
|
-
image_dhash: /* @__PURE__ */ new Set()
|
|
784
|
+
image_dhash: /* @__PURE__ */ new Set(),
|
|
785
|
+
image_part: /* @__PURE__ */ new Set()
|
|
760
786
|
};
|
|
761
787
|
for (let i = 0; i < allCaveIds.length; i++) {
|
|
762
788
|
for (let j = i + 1; j < allCaveIds.length; j++) {
|
|
@@ -775,7 +801,7 @@ var HashManager = class {
|
|
|
775
801
|
for (const h1 of colorHashes1) {
|
|
776
802
|
for (const h22 of colorHashes2) {
|
|
777
803
|
const sim = this.calculateSimilarity(h1, h22);
|
|
778
|
-
if (sim >= this.config.
|
|
804
|
+
if (sim >= this.config.imageWholeThreshold) {
|
|
779
805
|
similarPairs.image_color.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
|
|
780
806
|
}
|
|
781
807
|
}
|
|
@@ -785,27 +811,36 @@ var HashManager = class {
|
|
|
785
811
|
for (const h1 of dHashes1) {
|
|
786
812
|
for (const h22 of dHashes2) {
|
|
787
813
|
const sim = this.calculateSimilarity(h1, h22);
|
|
788
|
-
if (sim >= this.config.
|
|
814
|
+
if (sim >= this.config.imageWholeThreshold) {
|
|
789
815
|
similarPairs.image_dhash.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
|
|
790
816
|
}
|
|
791
817
|
}
|
|
792
818
|
}
|
|
819
|
+
const subHashes1 = subHashGroups.get(id1) || [];
|
|
820
|
+
const subHashes2 = subHashGroups.get(id2) || [];
|
|
821
|
+
if (subHashes1.length > 0 && subHashes2.length > 0) {
|
|
822
|
+
let maxPartSim = 0;
|
|
823
|
+
for (const h1 of subHashes1) {
|
|
824
|
+
for (const h22 of subHashes2) {
|
|
825
|
+
const sim = this.calculateSimilarity(h1, h22);
|
|
826
|
+
if (sim > maxPartSim) {
|
|
827
|
+
maxPartSim = sim;
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
if (maxPartSim >= this.config.imagePartThreshold) {
|
|
832
|
+
similarPairs.image_part.add(`${id1} & ${id2} = ${(maxPartSim * 100).toFixed(2)}%`);
|
|
833
|
+
}
|
|
834
|
+
}
|
|
793
835
|
}
|
|
794
836
|
}
|
|
795
|
-
const
|
|
796
|
-
subHashToCaves.forEach((caves2) => {
|
|
797
|
-
if (caves2.size > 1) {
|
|
798
|
-
const sortedCaves = [...caves2].sort((a, b) => a - b).join(", ");
|
|
799
|
-
subHashDuplicates.push(`[${sortedCaves}]`);
|
|
800
|
-
}
|
|
801
|
-
});
|
|
802
|
-
const totalFindings = similarPairs.text.size + similarPairs.image_color.size + similarPairs.image_dhash.size + subHashDuplicates.length;
|
|
837
|
+
const totalFindings = similarPairs.text.size + similarPairs.image_color.size + similarPairs.image_dhash.size + similarPairs.image_part.size;
|
|
803
838
|
if (totalFindings === 0) return "未发现高相似度的内容";
|
|
804
|
-
let report = `已发现 ${totalFindings}
|
|
839
|
+
let report = `已发现 ${totalFindings} 组高相似度的内容:`;
|
|
805
840
|
if (similarPairs.text.size > 0) report += "\n文本近似:\n" + [...similarPairs.text].join("\n");
|
|
806
|
-
if (similarPairs.image_color.size > 0) report += "\n
|
|
841
|
+
if (similarPairs.image_color.size > 0) report += "\n图片颜色相似:\n" + [...similarPairs.image_color].join("\n");
|
|
807
842
|
if (similarPairs.image_dhash.size > 0) report += "\n图片结构相似:\n" + [...similarPairs.image_dhash].join("\n");
|
|
808
|
-
if (
|
|
843
|
+
if (similarPairs.image_part.size > 0) report += "\n图片局部近似:\n" + [...similarPairs.image_part].join("\n");
|
|
809
844
|
return report.trim();
|
|
810
845
|
}
|
|
811
846
|
/**
|
|
@@ -918,11 +953,25 @@ var HashManager = class {
|
|
|
918
953
|
* @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
|
|
919
954
|
*/
|
|
920
955
|
generateTextSimhash(text) {
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
956
|
+
const cleanText = (text || "").toLowerCase().replace(/\s+/g, "");
|
|
957
|
+
if (!cleanText) {
|
|
958
|
+
return "";
|
|
959
|
+
}
|
|
960
|
+
const n = 2;
|
|
961
|
+
const tokens = /* @__PURE__ */ new Set();
|
|
962
|
+
if (cleanText.length < n) {
|
|
963
|
+
tokens.add(cleanText);
|
|
964
|
+
} else {
|
|
965
|
+
for (let i = 0; i <= cleanText.length - n; i++) {
|
|
966
|
+
tokens.add(cleanText.substring(i, i + n));
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
const tokenArray = Array.from(tokens);
|
|
970
|
+
if (tokenArray.length === 0) {
|
|
971
|
+
return "";
|
|
972
|
+
}
|
|
924
973
|
const vector = new Array(64).fill(0);
|
|
925
|
-
|
|
974
|
+
tokenArray.forEach((token) => {
|
|
926
975
|
const hash = crypto.createHash("md5").update(token).digest();
|
|
927
976
|
for (let i = 0; i < 64; i++) {
|
|
928
977
|
vector[i] += hash[Math.floor(i / 8)] >> i % 8 & 1 ? 1 : -1;
|
|
@@ -970,7 +1019,8 @@ var Config = import_koishi3.Schema.intersect([
|
|
|
970
1019
|
enableReview: import_koishi3.Schema.boolean().default(false).description("启用审核"),
|
|
971
1020
|
enableSimilarity: import_koishi3.Schema.boolean().default(false).description("启用查重"),
|
|
972
1021
|
textThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("文本相似度阈值"),
|
|
973
|
-
|
|
1022
|
+
imageWholeThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.9).description("图片整体相似度阈值"),
|
|
1023
|
+
imagePartThreshold: import_koishi3.Schema.number().min(0).max(1).step(0.01).default(0.95).description("图片局部相似度阈值")
|
|
974
1024
|
}).description("复核配置"),
|
|
975
1025
|
import_koishi3.Schema.object({
|
|
976
1026
|
localPath: import_koishi3.Schema.string().description("文件映射路径"),
|