koishi-plugin-best-cave 2.2.3 → 2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/HashManager.d.ts +64 -16
- package/lib/Utils.d.ts +2 -16
- package/lib/index.d.ts +53 -0
- package/lib/index.js +242 -151
- package/package.json +1 -1
package/lib/HashManager.d.ts
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
1
|
import { Context, Logger } from 'koishi';
|
|
2
|
-
import { Config } from './index';
|
|
2
|
+
import { Config, CaveObject } from './index';
|
|
3
3
|
import { FileManager } from './FileManager';
|
|
4
|
+
/**
|
|
5
|
+
* @description 数据库 `cave_hash` 表的完整对象模型。
|
|
6
|
+
*/
|
|
7
|
+
export interface CaveHashObject {
|
|
8
|
+
cave: number;
|
|
9
|
+
hash: string;
|
|
10
|
+
type: 'simhash' | 'phash_color' | 'dhash_gray' | 'sub_phash_q1' | 'sub_phash_q2' | 'sub_phash_q3' | 'sub_phash_q4';
|
|
11
|
+
}
|
|
4
12
|
/**
|
|
5
13
|
* @class HashManager
|
|
6
14
|
* @description 封装了所有与文本和图片哈希生成、相似度比较、以及相关命令的功能。
|
|
15
|
+
* 实现了高精度的混合策略查重方案。
|
|
7
16
|
*/
|
|
8
17
|
export declare class HashManager {
|
|
9
18
|
private ctx;
|
|
@@ -28,41 +37,80 @@ export declare class HashManager {
|
|
|
28
37
|
* @returns {Promise<string>} 一个包含操作结果的报告字符串。
|
|
29
38
|
*/
|
|
30
39
|
generateHashesForHistoricalCaves(): Promise<string>;
|
|
40
|
+
/**
|
|
41
|
+
* @description 为单个回声洞对象生成所有类型的哈希。
|
|
42
|
+
* @param cave - 回声洞对象。
|
|
43
|
+
* @returns {Promise<CaveHashObject[]>} 生成的哈希对象数组。
|
|
44
|
+
*/
|
|
45
|
+
generateAllHashesForCave(cave: Pick<CaveObject, 'id' | 'elements'>): Promise<CaveHashObject[]>;
|
|
46
|
+
/**
|
|
47
|
+
* @description 为单个图片Buffer生成所有类型的哈希。
|
|
48
|
+
* @param imageBuffer - 图片的Buffer数据。
|
|
49
|
+
* @returns {Promise<object>} 包含所有图片哈希的对象。
|
|
50
|
+
*/
|
|
51
|
+
generateAllImageHashes(imageBuffer: Buffer): Promise<{
|
|
52
|
+
colorPHash: string;
|
|
53
|
+
dHash: string;
|
|
54
|
+
subHashes: {
|
|
55
|
+
q1: string;
|
|
56
|
+
q2: string;
|
|
57
|
+
q3: string;
|
|
58
|
+
q4: string;
|
|
59
|
+
};
|
|
60
|
+
}>;
|
|
31
61
|
/**
|
|
32
62
|
* @description 对回声洞进行混合策略的相似度与重复内容检查。
|
|
33
63
|
* @returns {Promise<string>} 一个包含操作结果的报告字符串。
|
|
34
64
|
*/
|
|
35
65
|
checkForSimilarCaves(): Promise<string>;
|
|
36
66
|
/**
|
|
37
|
-
* @description
|
|
67
|
+
* @description 从单通道原始像素数据计算pHash。
|
|
68
|
+
* @param channelData - 单通道的像素值数组。
|
|
69
|
+
* @param size - 图像的边长(例如16)。
|
|
70
|
+
* @returns {string} 该通道的二进制哈希字符串。
|
|
71
|
+
*/
|
|
72
|
+
private _calculateHashFromRawChannel;
|
|
73
|
+
/**
|
|
74
|
+
* @description 生成768位颜色感知哈希(Color pHash)。
|
|
75
|
+
* @param imageBuffer - 图片的 Buffer 数据。
|
|
76
|
+
* @returns {Promise<string>} 768位二进制哈希对应的192位十六进制字符串。
|
|
77
|
+
*/
|
|
78
|
+
generateColorPHash(imageBuffer: Buffer): Promise<string>;
|
|
79
|
+
/**
|
|
80
|
+
* @description 生成256位差异哈希(dHash)。
|
|
38
81
|
* @param imageBuffer - 图片的 Buffer 数据。
|
|
39
|
-
* @returns {Promise<
|
|
82
|
+
* @returns {Promise<string>} 256位二进制哈希对应的64位十六进制字符串。
|
|
40
83
|
*/
|
|
41
|
-
|
|
84
|
+
generateDHash(imageBuffer: Buffer): Promise<string>;
|
|
42
85
|
/**
|
|
43
|
-
* @description
|
|
44
|
-
* @param imageBuffer 图片的 Buffer 数据。
|
|
45
|
-
* @returns
|
|
86
|
+
* @description 将图片切割为4个象限并为每个象限生成Color pHash。
|
|
87
|
+
* @param imageBuffer - 图片的 Buffer 数据。
|
|
88
|
+
* @returns {Promise<object>} 包含四个象限哈希的对象。
|
|
46
89
|
*/
|
|
47
|
-
|
|
90
|
+
generateImageSubHashes(imageBuffer: Buffer): Promise<{
|
|
91
|
+
q1: string;
|
|
92
|
+
q2: string;
|
|
93
|
+
q3: string;
|
|
94
|
+
q4: string;
|
|
95
|
+
}>;
|
|
48
96
|
/**
|
|
49
|
-
* @description
|
|
50
|
-
* @param
|
|
51
|
-
* @param
|
|
97
|
+
* @description 计算两个十六进制哈希字符串之间的汉明距离。
|
|
98
|
+
* @param hex1 - 第一个十六进制哈希字符串。
|
|
99
|
+
* @param hex2 - 第二个十六进制哈希字符串。
|
|
52
100
|
* @returns {number} 两个哈希之间的距离。
|
|
53
101
|
*/
|
|
54
|
-
calculateHammingDistance(
|
|
102
|
+
calculateHammingDistance(hex1: string, hex2: string): number;
|
|
55
103
|
/**
|
|
56
104
|
* @description 根据汉明距离计算图片或文本哈希的相似度。
|
|
57
|
-
* @param
|
|
58
|
-
* @param
|
|
105
|
+
* @param hex1 - 第一个十六进制哈希字符串。
|
|
106
|
+
* @param hex2 - 第二个十六进制哈希字符串。
|
|
59
107
|
* @returns {number} 范围在0到1之间的相似度得分。
|
|
60
108
|
*/
|
|
61
|
-
calculateSimilarity(
|
|
109
|
+
calculateSimilarity(hex1: string, hex2: string): number;
|
|
62
110
|
/**
|
|
63
111
|
* @description 为文本生成基于 Simhash 算法的哈希字符串。
|
|
64
112
|
* @param text - 需要处理的文本。
|
|
65
|
-
* @returns {string} 64位二进制 Simhash
|
|
113
|
+
* @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
|
|
66
114
|
*/
|
|
67
115
|
generateTextSimhash(text: string): string;
|
|
68
116
|
}
|
package/lib/Utils.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Context, h, Logger, Session } from 'koishi';
|
|
2
|
-
import { CaveObject, Config, StoredElement
|
|
2
|
+
import { CaveObject, Config, StoredElement } from './index';
|
|
3
3
|
import { FileManager } from './FileManager';
|
|
4
|
-
import { HashManager } from './HashManager';
|
|
4
|
+
import { HashManager, CaveHashObject } from './HashManager';
|
|
5
5
|
import { ReviewManager } from './ReviewManager';
|
|
6
6
|
/**
|
|
7
7
|
* @description 将数据库存储的 StoredElement[] 转换为 Koishi 的 h() 元素数组。
|
|
@@ -65,20 +65,6 @@ export declare function processMessageElements(sourceElements: h[], newId: numbe
|
|
|
65
65
|
fileName: string;
|
|
66
66
|
}[];
|
|
67
67
|
}>;
|
|
68
|
-
/**
|
|
69
|
-
* @description 异步处理文件上传、查重和状态更新的后台任务。
|
|
70
|
-
* @param ctx - Koishi 上下文。
|
|
71
|
-
* @param config - 插件配置。
|
|
72
|
-
* @param fileManager - FileManager 实例,用于保存文件。
|
|
73
|
-
* @param logger - 日志记录器实例。
|
|
74
|
-
* @param reviewManager - ReviewManager 实例,用于提交审核。
|
|
75
|
-
* @param cave - 刚刚在数据库中创建的 `preload` 状态的回声洞对象。
|
|
76
|
-
* @param mediaToSave - 需要下载和处理的媒体文件列表。
|
|
77
|
-
* @param reusableIds - 可复用 ID 的内存缓存。
|
|
78
|
-
* @param session - 触发此操作的用户会话,用于发送反馈。
|
|
79
|
-
* @param hashManager - HashManager 实例,如果启用则用于哈希计算和比较。
|
|
80
|
-
* @param textHashesToStore - 已预先计算好的、待存入数据库的文本哈希对象数组。
|
|
81
|
-
*/
|
|
82
68
|
export declare function handleFileUploads(ctx: Context, config: Config, fileManager: FileManager, logger: Logger, reviewManager: ReviewManager, cave: CaveObject, mediaToToSave: {
|
|
83
69
|
sourceUrl: string;
|
|
84
70
|
fileName: string;
|
package/lib/index.d.ts
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { Context, Schema } from 'koishi';
|
|
2
|
+
import { CaveHashObject } from './HashManager';
|
|
3
|
+
export declare const name = "best-cave";
|
|
4
|
+
export declare const inject: string[];
|
|
5
|
+
export declare const usage = "\n<div style=\"border-radius: 10px; border: 1px solid #ddd; padding: 16px; margin-bottom: 20px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);\">\n <h2 style=\"margin-top: 0; color: #4a6ee0;\">\uD83D\uDCCC \u63D2\u4EF6\u8BF4\u660E</h2>\n <p>\uD83D\uDCD6 <strong>\u4F7F\u7528\u6587\u6863</strong>\uFF1A\u8BF7\u70B9\u51FB\u5DE6\u4E0A\u89D2\u7684 <strong>\u63D2\u4EF6\u4E3B\u9875</strong> \u67E5\u770B\u63D2\u4EF6\u4F7F\u7528\u6587\u6863</p>\n <p>\uD83D\uDD0D <strong>\u66F4\u591A\u63D2\u4EF6</strong>\uFF1A\u53EF\u8BBF\u95EE <a href=\"https://github.com/YisRime\" style=\"color:#4a6ee0;text-decoration:none;\">\u82E1\u6DDE\u7684 GitHub</a> \u67E5\u770B\u672C\u4EBA\u7684\u6240\u6709\u63D2\u4EF6</p>\n</div>\n<div style=\"border-radius: 10px; border: 1px solid #ddd; padding: 16px; margin-bottom: 20px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);\">\n <h2 style=\"margin-top: 0; color: #e0574a;\">\u2764\uFE0F \u652F\u6301\u4E0E\u53CD\u9988</h2>\n <p>\uD83C\uDF1F \u559C\u6B22\u8FD9\u4E2A\u63D2\u4EF6\uFF1F\u8BF7\u5728 <a href=\"https://github.com/YisRime\" style=\"color:#e0574a;text-decoration:none;\">GitHub</a> \u4E0A\u7ED9\u6211\u4E00\u4E2A Star\uFF01</p>\n <p>\uD83D\uDC1B \u9047\u5230\u95EE\u9898\uFF1F\u8BF7\u901A\u8FC7 <strong>Issues</strong> \u63D0\u4EA4\u53CD\u9988\uFF0C\u6216\u52A0\u5165 QQ \u7FA4 <a href=\"https://qm.qq.com/q/PdLMx9Jowq\" style=\"color:#e0574a;text-decoration:none;\"><strong>855571375</strong></a> \u8FDB\u884C\u4EA4\u6D41</p>\n</div>\n";
|
|
6
|
+
/**
|
|
7
|
+
* @description 存储在数据库中的单个消息元素。
|
|
8
|
+
*/
|
|
9
|
+
export interface StoredElement {
|
|
10
|
+
type: 'text' | 'image' | 'video' | 'audio' | 'file';
|
|
11
|
+
content?: string;
|
|
12
|
+
file?: string;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* @description 数据库 `cave` 表的完整对象模型。
|
|
16
|
+
*/
|
|
17
|
+
export interface CaveObject {
|
|
18
|
+
id: number;
|
|
19
|
+
elements: StoredElement[];
|
|
20
|
+
channelId: string;
|
|
21
|
+
userId: string;
|
|
22
|
+
userName: string;
|
|
23
|
+
status: 'active' | 'delete' | 'pending' | 'preload';
|
|
24
|
+
time: Date;
|
|
25
|
+
}
|
|
26
|
+
declare module 'koishi' {
|
|
27
|
+
interface Tables {
|
|
28
|
+
cave: CaveObject;
|
|
29
|
+
cave_hash: CaveHashObject;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export interface Config {
|
|
33
|
+
coolDown: number;
|
|
34
|
+
perChannel: boolean;
|
|
35
|
+
adminChannel: string;
|
|
36
|
+
enableProfile: boolean;
|
|
37
|
+
enableIO: boolean;
|
|
38
|
+
enableReview: boolean;
|
|
39
|
+
caveFormat: string;
|
|
40
|
+
enableSimilarity: boolean;
|
|
41
|
+
textThreshold: number;
|
|
42
|
+
imageThreshold: number;
|
|
43
|
+
localPath?: string;
|
|
44
|
+
enableS3: boolean;
|
|
45
|
+
endpoint?: string;
|
|
46
|
+
region?: string;
|
|
47
|
+
accessKeyId?: string;
|
|
48
|
+
secretAccessKey?: string;
|
|
49
|
+
bucket?: string;
|
|
50
|
+
publicUrl?: string;
|
|
51
|
+
}
|
|
52
|
+
export declare const Config: Schema<Config>;
|
|
53
|
+
export declare function apply(ctx: Context, config: Config): void;
|
package/lib/index.js
CHANGED
|
@@ -431,35 +431,62 @@ async function handleFileUploads(ctx, config, fileManager, logger2, reviewManage
|
|
|
431
431
|
try {
|
|
432
432
|
const downloadedMedia = [];
|
|
433
433
|
const imageHashesToStore = [];
|
|
434
|
-
const
|
|
435
|
-
const
|
|
434
|
+
const existingHashes = hashManager ? await ctx.database.get("cave_hash", { type: { $ne: "simhash" } }) : [];
|
|
435
|
+
const existingColorPHashes = existingHashes.filter((h4) => h4.type === "phash_color");
|
|
436
|
+
const existingDHashes = existingHashes.filter((h4) => h4.type === "dhash_gray");
|
|
437
|
+
const existingSubHashObjects = existingHashes.filter((h4) => h4.type.startsWith("sub_phash_"));
|
|
436
438
|
for (const media of mediaToToSave) {
|
|
437
439
|
const buffer = Buffer.from(await ctx.http.get(media.sourceUrl, { responseType: "arraybuffer", timeout: 3e4 }));
|
|
438
440
|
downloadedMedia.push({ fileName: media.fileName, buffer });
|
|
439
441
|
if (hashManager && [".png", ".jpg", ".jpeg", ".webp"].includes(path2.extname(media.fileName).toLowerCase())) {
|
|
440
|
-
const
|
|
441
|
-
|
|
442
|
-
|
|
442
|
+
const { colorPHash, dHash, subHashes } = await hashManager.generateAllImageHashes(buffer);
|
|
443
|
+
let caveToDelete = null;
|
|
444
|
+
let highestCombinedSimilarity = 0;
|
|
445
|
+
const similarityScores = /* @__PURE__ */ new Map();
|
|
446
|
+
for (const existing of existingColorPHashes) {
|
|
447
|
+
const similarity = hashManager.calculateSimilarity(colorPHash, existing.hash);
|
|
443
448
|
if (similarity >= config.imageThreshold) {
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
reusableIds.add(cave.id);
|
|
447
|
-
return;
|
|
449
|
+
if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
|
|
450
|
+
similarityScores.get(existing.cave).colorSim = similarity;
|
|
448
451
|
}
|
|
449
452
|
}
|
|
450
|
-
const
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
453
|
+
for (const existing of existingDHashes) {
|
|
454
|
+
const similarity = hashManager.calculateSimilarity(dHash, existing.hash);
|
|
455
|
+
if (similarity >= config.imageThreshold) {
|
|
456
|
+
if (!similarityScores.has(existing.cave)) similarityScores.set(existing.cave, {});
|
|
457
|
+
similarityScores.get(existing.cave).dSim = similarity;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
for (const [caveId, scores] of similarityScores.entries()) {
|
|
461
|
+
if (scores.colorSim && scores.dSim) {
|
|
462
|
+
caveToDelete = caveId;
|
|
463
|
+
highestCombinedSimilarity = scores.colorSim;
|
|
464
|
+
break;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
if (caveToDelete) {
|
|
468
|
+
await session.send(`图片与回声洞(${caveToDelete})的相似度为 ${(highestCombinedSimilarity * 100).toFixed(2)}%,超过阈值`);
|
|
469
|
+
await ctx.database.upsert("cave", [{ id: cave.id, status: "delete" }]);
|
|
470
|
+
cleanupPendingDeletions(ctx, fileManager, logger2, reusableIds);
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
473
|
+
const notifiedPartialCaves = /* @__PURE__ */ new Set();
|
|
474
|
+
for (const newSubHash of Object.values(subHashes)) {
|
|
475
|
+
for (const existing of existingSubHashObjects) {
|
|
476
|
+
if (notifiedPartialCaves.has(existing.cave)) continue;
|
|
455
477
|
const similarity = hashManager.calculateSimilarity(newSubHash, existing.hash);
|
|
456
478
|
if (similarity >= config.imageThreshold) {
|
|
457
479
|
await session.send(`图片局部与回声洞(${existing.cave})的相似度为 ${(similarity * 100).toFixed(2)}%`);
|
|
480
|
+
notifiedPartialCaves.add(existing.cave);
|
|
458
481
|
}
|
|
459
482
|
}
|
|
460
483
|
}
|
|
461
|
-
|
|
462
|
-
imageHashesToStore.push(
|
|
484
|
+
imageHashesToStore.push({ hash: colorPHash, type: "phash_color" });
|
|
485
|
+
imageHashesToStore.push({ hash: dHash, type: "dhash_gray" });
|
|
486
|
+
imageHashesToStore.push({ hash: subHashes.q1, type: "sub_phash_q1" });
|
|
487
|
+
imageHashesToStore.push({ hash: subHashes.q2, type: "sub_phash_q2" });
|
|
488
|
+
imageHashesToStore.push({ hash: subHashes.q3, type: "sub_phash_q3" });
|
|
489
|
+
imageHashesToStore.push({ hash: subHashes.q4, type: "sub_phash_q4" });
|
|
463
490
|
}
|
|
464
491
|
}
|
|
465
492
|
await Promise.all(downloadedMedia.map((item) => fileManager.saveFile(item.fileName, item.buffer)));
|
|
@@ -643,195 +670,250 @@ var HashManager = class {
|
|
|
643
670
|
let hashesToInsert = [];
|
|
644
671
|
let historicalCount = 0;
|
|
645
672
|
let totalHashesGenerated = 0;
|
|
646
|
-
let batchStartCaveCount = 0;
|
|
647
|
-
const flushHashes = /* @__PURE__ */ __name(async () => {
|
|
648
|
-
if (hashesToInsert.length > 0) {
|
|
649
|
-
this.logger.info(`补全第 ${batchStartCaveCount + 1} 到 ${historicalCount} 条回声洞哈希中...`);
|
|
650
|
-
try {
|
|
651
|
-
await this.ctx.database.upsert("cave_hash", hashesToInsert);
|
|
652
|
-
totalHashesGenerated += hashesToInsert.length;
|
|
653
|
-
} catch (error) {
|
|
654
|
-
this.logger.error(`导入哈希失败: ${error.message}`);
|
|
655
|
-
}
|
|
656
|
-
hashesToInsert = [];
|
|
657
|
-
batchStartCaveCount = historicalCount;
|
|
658
|
-
}
|
|
659
|
-
}, "flushHashes");
|
|
660
673
|
for (const cave of allCaves) {
|
|
661
674
|
if (existingHashedCaveIds.has(cave.id)) continue;
|
|
662
675
|
historicalCount++;
|
|
663
|
-
const newHashesForCave =
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
for (const el of cave.elements.filter((el2) => el2.type === "image" && el2.file)) {
|
|
670
|
-
try {
|
|
671
|
-
const imageBuffer = await this.fileManager.readFile(el.file);
|
|
672
|
-
const pHash = await this.generateImagePHash(imageBuffer);
|
|
673
|
-
newHashesForCave.push({ cave: cave.id, hash: pHash, type: "phash" });
|
|
674
|
-
const subHashes = await this.generateImageSubHashes(imageBuffer);
|
|
675
|
-
subHashes.forEach((subHash) => newHashesForCave.push({ cave: cave.id, hash: subHash, type: "sub" }));
|
|
676
|
-
} catch (e) {
|
|
677
|
-
this.logger.warn(`无法为回声洞(${cave.id})的内容(${el.file})生成哈希:`, e);
|
|
678
|
-
}
|
|
676
|
+
const newHashesForCave = await this.generateAllHashesForCave(cave);
|
|
677
|
+
hashesToInsert.push(...newHashesForCave);
|
|
678
|
+
if (hashesToInsert.length >= 100) {
|
|
679
|
+
await this.ctx.database.upsert("cave_hash", hashesToInsert);
|
|
680
|
+
totalHashesGenerated += hashesToInsert.length;
|
|
681
|
+
hashesToInsert = [];
|
|
679
682
|
}
|
|
680
|
-
const uniqueHashesMap = /* @__PURE__ */ new Map();
|
|
681
|
-
newHashesForCave.forEach((h4) => {
|
|
682
|
-
const uniqueKey = `${h4.type}-${h4.hash}`;
|
|
683
|
-
uniqueHashesMap.set(uniqueKey, h4);
|
|
684
|
-
});
|
|
685
|
-
hashesToInsert.push(...uniqueHashesMap.values());
|
|
686
|
-
if (hashesToInsert.length >= 100) await flushHashes();
|
|
687
683
|
}
|
|
688
|
-
|
|
684
|
+
if (hashesToInsert.length > 0) {
|
|
685
|
+
await this.ctx.database.upsert("cave_hash", hashesToInsert);
|
|
686
|
+
totalHashesGenerated += hashesToInsert.length;
|
|
687
|
+
}
|
|
689
688
|
return totalHashesGenerated > 0 ? `已补全 ${historicalCount} 个回声洞的 ${totalHashesGenerated} 条哈希` : "无需补全回声洞哈希";
|
|
690
689
|
}
|
|
690
|
+
/**
|
|
691
|
+
* @description 为单个回声洞对象生成所有类型的哈希。
|
|
692
|
+
* @param cave - 回声洞对象。
|
|
693
|
+
* @returns {Promise<CaveHashObject[]>} 生成的哈希对象数组。
|
|
694
|
+
*/
|
|
695
|
+
async generateAllHashesForCave(cave) {
|
|
696
|
+
const allHashes = [];
|
|
697
|
+
const combinedText = cave.elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" ");
|
|
698
|
+
if (combinedText) {
|
|
699
|
+
const textHash = this.generateTextSimhash(combinedText);
|
|
700
|
+
allHashes.push({ cave: cave.id, hash: textHash, type: "simhash" });
|
|
701
|
+
}
|
|
702
|
+
for (const el of cave.elements.filter((el2) => el2.type === "image" && el2.file)) {
|
|
703
|
+
try {
|
|
704
|
+
const imageBuffer = await this.fileManager.readFile(el.file);
|
|
705
|
+
const imageHashes = await this.generateAllImageHashes(imageBuffer);
|
|
706
|
+
allHashes.push({ cave: cave.id, hash: imageHashes.colorPHash, type: "phash_color" });
|
|
707
|
+
allHashes.push({ cave: cave.id, hash: imageHashes.dHash, type: "dhash_gray" });
|
|
708
|
+
allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q1, type: "sub_phash_q1" });
|
|
709
|
+
allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q2, type: "sub_phash_q2" });
|
|
710
|
+
allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q3, type: "sub_phash_q3" });
|
|
711
|
+
allHashes.push({ cave: cave.id, hash: imageHashes.subHashes.q4, type: "sub_phash_q4" });
|
|
712
|
+
} catch (e) {
|
|
713
|
+
this.logger.warn(`无法为回声洞(${cave.id})的内容(${el.file})生成哈希:`, e);
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
return allHashes;
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* @description 为单个图片Buffer生成所有类型的哈希。
|
|
720
|
+
* @param imageBuffer - 图片的Buffer数据。
|
|
721
|
+
* @returns {Promise<object>} 包含所有图片哈希的对象。
|
|
722
|
+
*/
|
|
723
|
+
async generateAllImageHashes(imageBuffer) {
|
|
724
|
+
const [colorPHash, dHash, subHashes] = await Promise.all([
|
|
725
|
+
this.generateColorPHash(imageBuffer),
|
|
726
|
+
this.generateDHash(imageBuffer),
|
|
727
|
+
this.generateImageSubHashes(imageBuffer)
|
|
728
|
+
]);
|
|
729
|
+
return { colorPHash, dHash, subHashes };
|
|
730
|
+
}
|
|
691
731
|
/**
|
|
692
732
|
* @description 对回声洞进行混合策略的相似度与重复内容检查。
|
|
693
733
|
* @returns {Promise<string>} 一个包含操作结果的报告字符串。
|
|
694
734
|
*/
|
|
695
735
|
async checkForSimilarCaves() {
|
|
696
736
|
const allHashes = await this.ctx.database.get("cave_hash", {});
|
|
697
|
-
const
|
|
698
|
-
const
|
|
737
|
+
const caves = await this.ctx.database.get("cave", { status: "active" }, { fields: ["id"] });
|
|
738
|
+
const allCaveIds = caves.map((c) => c.id);
|
|
739
|
+
const hashGroups = {
|
|
740
|
+
simhash: /* @__PURE__ */ new Map(),
|
|
741
|
+
phash_color: /* @__PURE__ */ new Map(),
|
|
742
|
+
dhash_gray: /* @__PURE__ */ new Map()
|
|
743
|
+
};
|
|
699
744
|
const subHashToCaves = /* @__PURE__ */ new Map();
|
|
700
745
|
for (const hash of allHashes) {
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
caveImagePHashes.get(hash.cave).push(hash.hash);
|
|
708
|
-
break;
|
|
709
|
-
case "sub":
|
|
710
|
-
if (!subHashToCaves.has(hash.hash)) subHashToCaves.set(hash.hash, /* @__PURE__ */ new Set());
|
|
711
|
-
subHashToCaves.get(hash.hash).add(hash.cave);
|
|
712
|
-
break;
|
|
746
|
+
if (hashGroups[hash.type]) {
|
|
747
|
+
if (!hashGroups[hash.type].has(hash.cave)) hashGroups[hash.type].set(hash.cave, []);
|
|
748
|
+
hashGroups[hash.type].get(hash.cave).push(hash.hash);
|
|
749
|
+
} else if (hash.type.startsWith("sub_phash_")) {
|
|
750
|
+
if (!subHashToCaves.has(hash.hash)) subHashToCaves.set(hash.hash, /* @__PURE__ */ new Set());
|
|
751
|
+
subHashToCaves.get(hash.hash).add(hash.cave);
|
|
713
752
|
}
|
|
714
753
|
}
|
|
715
|
-
const
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
}
|
|
721
|
-
});
|
|
722
|
-
const textSimilarPairs = [];
|
|
723
|
-
const imageSimilarPairs = [];
|
|
724
|
-
const allCaveIds = Array.from(/* @__PURE__ */ new Set([...caveTextHashes.keys(), ...caveImagePHashes.keys()]));
|
|
754
|
+
const similarPairs = {
|
|
755
|
+
text: /* @__PURE__ */ new Set(),
|
|
756
|
+
image_color: /* @__PURE__ */ new Set(),
|
|
757
|
+
image_dhash: /* @__PURE__ */ new Set()
|
|
758
|
+
};
|
|
725
759
|
for (let i = 0; i < allCaveIds.length; i++) {
|
|
726
760
|
for (let j = i + 1; j < allCaveIds.length; j++) {
|
|
727
761
|
const id1 = allCaveIds[i];
|
|
728
762
|
const id2 = allCaveIds[j];
|
|
729
|
-
const
|
|
730
|
-
const
|
|
731
|
-
if (
|
|
732
|
-
const
|
|
733
|
-
if (
|
|
734
|
-
|
|
763
|
+
const simhash1 = hashGroups.simhash.get(id1)?.[0];
|
|
764
|
+
const simhash2 = hashGroups.simhash.get(id2)?.[0];
|
|
765
|
+
if (simhash1 && simhash2) {
|
|
766
|
+
const sim = this.calculateSimilarity(simhash1, simhash2);
|
|
767
|
+
if (sim >= this.config.textThreshold) {
|
|
768
|
+
similarPairs.text.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
|
|
735
769
|
}
|
|
736
770
|
}
|
|
737
|
-
const
|
|
738
|
-
const
|
|
739
|
-
|
|
740
|
-
for (const
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
771
|
+
const colorHashes1 = hashGroups.phash_color.get(id1) || [];
|
|
772
|
+
const colorHashes2 = hashGroups.phash_color.get(id2) || [];
|
|
773
|
+
for (const h1 of colorHashes1) {
|
|
774
|
+
for (const h22 of colorHashes2) {
|
|
775
|
+
const sim = this.calculateSimilarity(h1, h22);
|
|
776
|
+
if (sim >= this.config.imageThreshold) {
|
|
777
|
+
similarPairs.image_color.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
const dHashes1 = hashGroups.dhash_gray.get(id1) || [];
|
|
782
|
+
const dHashes2 = hashGroups.dhash_gray.get(id2) || [];
|
|
783
|
+
for (const h1 of dHashes1) {
|
|
784
|
+
for (const h22 of dHashes2) {
|
|
785
|
+
const sim = this.calculateSimilarity(h1, h22);
|
|
786
|
+
if (sim >= this.config.imageThreshold) {
|
|
787
|
+
similarPairs.image_dhash.add(`${id1} & ${id2} = ${(sim * 100).toFixed(2)}%`);
|
|
746
788
|
}
|
|
747
789
|
}
|
|
748
790
|
}
|
|
749
791
|
}
|
|
750
792
|
}
|
|
751
|
-
const
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
793
|
+
const subHashDuplicates = [];
|
|
794
|
+
subHashToCaves.forEach((caves2) => {
|
|
795
|
+
if (caves2.size > 1) {
|
|
796
|
+
const sortedCaves = [...caves2].sort((a, b) => a - b).join(", ");
|
|
797
|
+
subHashDuplicates.push(`[${sortedCaves}]`);
|
|
798
|
+
}
|
|
799
|
+
});
|
|
800
|
+
const totalFindings = similarPairs.text.size + similarPairs.image_color.size + similarPairs.image_dhash.size + subHashDuplicates.length;
|
|
801
|
+
if (totalFindings === 0) return "未发现高相似度的内容";
|
|
802
|
+
let report = `已发现 ${totalFindings} 组高相似度或重复的内容:`;
|
|
803
|
+
if (similarPairs.text.size > 0) report += "\n文本近似:\n" + [...similarPairs.text].join("\n");
|
|
804
|
+
if (similarPairs.image_color.size > 0) report += "\n图片整体相似:\n" + [...similarPairs.image_color].join("\n");
|
|
805
|
+
if (similarPairs.image_dhash.size > 0) report += "\n图片结构相似:\n" + [...similarPairs.image_dhash].join("\n");
|
|
806
|
+
if (subHashDuplicates.length > 0) report += "\n图片局部重复:\n" + [...new Set(subHashDuplicates)].join("\n");
|
|
807
|
+
return report.trim();
|
|
808
|
+
}
|
|
809
|
+
/**
|
|
810
|
+
* @description 从单通道原始像素数据计算pHash。
|
|
811
|
+
* @param channelData - 单通道的像素值数组。
|
|
812
|
+
* @param size - 图像的边长(例如16)。
|
|
813
|
+
* @returns {string} 该通道的二进制哈希字符串。
|
|
814
|
+
*/
|
|
815
|
+
_calculateHashFromRawChannel(channelData, size) {
|
|
816
|
+
const totalLuminance = channelData.reduce((acc, val) => acc + val, 0);
|
|
817
|
+
const avgLuminance = totalLuminance / (size * size);
|
|
818
|
+
return channelData.map((lum) => lum > avgLuminance ? "1" : "0").join("");
|
|
819
|
+
}
|
|
820
|
+
/**
|
|
821
|
+
* @description 生成768位颜色感知哈希(Color pHash)。
|
|
822
|
+
* @param imageBuffer - 图片的 Buffer 数据。
|
|
823
|
+
* @returns {Promise<string>} 768位二进制哈希对应的192位十六进制字符串。
|
|
824
|
+
*/
|
|
825
|
+
async generateColorPHash(imageBuffer) {
|
|
826
|
+
const { data, info } = await (0, import_sharp.default)(imageBuffer).resize(16, 16, { fit: "fill" }).removeAlpha().raw().toBuffer({ resolveWithObject: true });
|
|
827
|
+
const { channels } = info;
|
|
828
|
+
const r = [], g = [], b = [];
|
|
829
|
+
for (let i = 0; i < data.length; i += channels) {
|
|
830
|
+
r.push(data[i]);
|
|
831
|
+
g.push(data[i + 1]);
|
|
832
|
+
b.push(data[i + 2]);
|
|
762
833
|
}
|
|
763
|
-
|
|
764
|
-
|
|
834
|
+
const rHash = this._calculateHashFromRawChannel(r, 16);
|
|
835
|
+
const gHash = this._calculateHashFromRawChannel(g, 16);
|
|
836
|
+
const bHash = this._calculateHashFromRawChannel(b, 16);
|
|
837
|
+
const combinedHash = rHash + gHash + bHash;
|
|
838
|
+
let hex = "";
|
|
839
|
+
for (let i = 0; i < combinedHash.length; i += 4) {
|
|
840
|
+
hex += parseInt(combinedHash.substring(i, i + 4), 2).toString(16);
|
|
765
841
|
}
|
|
766
|
-
return
|
|
842
|
+
return hex.padStart(192, "0");
|
|
767
843
|
}
|
|
768
844
|
/**
|
|
769
|
-
* @description
|
|
845
|
+
* @description 生成256位差异哈希(dHash)。
|
|
770
846
|
* @param imageBuffer - 图片的 Buffer 数据。
|
|
771
|
-
* @returns {Promise<
|
|
847
|
+
* @returns {Promise<string>} 256位二进制哈希对应的64位十六进制字符串。
|
|
772
848
|
*/
|
|
773
|
-
async
|
|
774
|
-
const
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
{ left: 0, top: 0, width: Math.floor(width / 2), height: Math.floor(height / 2) },
|
|
781
|
-
{ left: Math.floor(width / 2), top: 0, width: Math.ceil(width / 2), height: Math.floor(height / 2) },
|
|
782
|
-
{ left: 0, top: Math.floor(height / 2), width: Math.floor(width / 2), height: Math.ceil(height / 2) },
|
|
783
|
-
{ left: Math.floor(width / 2), top: Math.floor(height / 2), width: Math.ceil(width / 2), height: Math.ceil(height / 2) }
|
|
784
|
-
];
|
|
785
|
-
for (const region of regions) {
|
|
786
|
-
if (region.width < 8 || region.height < 8) continue;
|
|
787
|
-
const quadrantBuffer = await (0, import_sharp.default)(imageBuffer).extract(region).toBuffer();
|
|
788
|
-
hashes.add(await this.generateImagePHash(quadrantBuffer));
|
|
849
|
+
async generateDHash(imageBuffer) {
|
|
850
|
+
const pixels = await (0, import_sharp.default)(imageBuffer).grayscale().resize(17, 16, { fit: "fill" }).raw().toBuffer();
|
|
851
|
+
let hash = "";
|
|
852
|
+
for (let y = 0; y < 16; y++) {
|
|
853
|
+
for (let x = 0; x < 16; x++) {
|
|
854
|
+
const i = y * 17 + x;
|
|
855
|
+
hash += pixels[i] > pixels[i + 1] ? "1" : "0";
|
|
789
856
|
}
|
|
790
|
-
} catch (e) {
|
|
791
|
-
this.logger.warn(`生成子哈希失败:`, e);
|
|
792
857
|
}
|
|
793
|
-
return
|
|
858
|
+
return BigInt("0b" + hash).toString(16).padStart(64, "0");
|
|
794
859
|
}
|
|
795
860
|
/**
|
|
796
|
-
* @description
|
|
797
|
-
* @param imageBuffer 图片的 Buffer 数据。
|
|
798
|
-
* @returns
|
|
861
|
+
* @description 将图片切割为4个象限并为每个象限生成Color pHash。
|
|
862
|
+
* @param imageBuffer - 图片的 Buffer 数据。
|
|
863
|
+
* @returns {Promise<object>} 包含四个象限哈希的对象。
|
|
799
864
|
*/
|
|
800
|
-
async
|
|
801
|
-
const
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
865
|
+
async generateImageSubHashes(imageBuffer) {
|
|
866
|
+
const { width, height } = await (0, import_sharp.default)(imageBuffer).metadata();
|
|
867
|
+
if (!width || !height || width < 16 || height < 16) {
|
|
868
|
+
const fallbackHash = await this.generateColorPHash(imageBuffer);
|
|
869
|
+
return { q1: fallbackHash, q2: fallbackHash, q3: fallbackHash, q4: fallbackHash };
|
|
870
|
+
}
|
|
871
|
+
const w2 = Math.floor(width / 2), h22 = Math.floor(height / 2);
|
|
872
|
+
const regions = [
|
|
873
|
+
{ left: 0, top: 0, width: w2, height: h22 },
|
|
874
|
+
{ left: w2, top: 0, width: width - w2, height: h22 },
|
|
875
|
+
{ left: 0, top: h22, width: w2, height: height - h22 },
|
|
876
|
+
{ left: w2, top: h22, width: width - w2, height: height - h22 }
|
|
877
|
+
];
|
|
878
|
+
const [q1, q2, q3, q4] = await Promise.all(
|
|
879
|
+
regions.map((region) => {
|
|
880
|
+
if (region.width < 8 || region.height < 8) return this.generateColorPHash(imageBuffer);
|
|
881
|
+
return (0, import_sharp.default)(imageBuffer).extract(region).toBuffer().then((b) => this.generateColorPHash(b));
|
|
882
|
+
})
|
|
883
|
+
);
|
|
884
|
+
return { q1, q2, q3, q4 };
|
|
805
885
|
}
|
|
806
886
|
/**
|
|
807
|
-
* @description
|
|
808
|
-
* @param
|
|
809
|
-
* @param
|
|
887
|
+
* @description 计算两个十六进制哈希字符串之间的汉明距离。
|
|
888
|
+
* @param hex1 - 第一个十六进制哈希字符串。
|
|
889
|
+
* @param hex2 - 第二个十六进制哈希字符串。
|
|
810
890
|
* @returns {number} 两个哈希之间的距离。
|
|
811
891
|
*/
|
|
812
|
-
calculateHammingDistance(
|
|
892
|
+
calculateHammingDistance(hex1, hex2) {
|
|
813
893
|
let distance = 0;
|
|
814
|
-
const
|
|
894
|
+
const bin1 = hexToBinary(hex1);
|
|
895
|
+
const bin2 = hexToBinary(hex2);
|
|
896
|
+
const len = Math.min(bin1.length, bin2.length);
|
|
815
897
|
for (let i = 0; i < len; i++) {
|
|
816
|
-
if (
|
|
898
|
+
if (bin1[i] !== bin2[i]) distance++;
|
|
817
899
|
}
|
|
818
900
|
return distance;
|
|
819
901
|
}
|
|
820
902
|
/**
|
|
821
903
|
* @description 根据汉明距离计算图片或文本哈希的相似度。
|
|
822
|
-
* @param
|
|
823
|
-
* @param
|
|
904
|
+
* @param hex1 - 第一个十六进制哈希字符串。
|
|
905
|
+
* @param hex2 - 第二个十六进制哈希字符串。
|
|
824
906
|
* @returns {number} 范围在0到1之间的相似度得分。
|
|
825
907
|
*/
|
|
826
|
-
calculateSimilarity(
|
|
827
|
-
const distance = this.calculateHammingDistance(
|
|
828
|
-
const hashLength = Math.max(
|
|
908
|
+
calculateSimilarity(hex1, hex2) {
|
|
909
|
+
const distance = this.calculateHammingDistance(hex1, hex2);
|
|
910
|
+
const hashLength = Math.max(hex1.length, hex2.length) * 4;
|
|
829
911
|
return hashLength === 0 ? 1 : 1 - distance / hashLength;
|
|
830
912
|
}
|
|
831
913
|
/**
|
|
832
914
|
* @description 为文本生成基于 Simhash 算法的哈希字符串。
|
|
833
915
|
* @param text - 需要处理的文本。
|
|
834
|
-
* @returns {string} 64位二进制 Simhash
|
|
916
|
+
* @returns {string} 64位二进制 Simhash 对应的16位十六进制字符串。
|
|
835
917
|
*/
|
|
836
918
|
generateTextSimhash(text) {
|
|
837
919
|
if (!text?.trim()) return "";
|
|
@@ -844,9 +926,18 @@ var HashManager = class {
|
|
|
844
926
|
vector[i] += hash[Math.floor(i / 8)] >> i % 8 & 1 ? 1 : -1;
|
|
845
927
|
}
|
|
846
928
|
});
|
|
847
|
-
|
|
929
|
+
const binaryHash = vector.map((v) => v > 0 ? "1" : "0").join("");
|
|
930
|
+
return BigInt("0b" + binaryHash).toString(16).padStart(16, "0");
|
|
848
931
|
}
|
|
849
932
|
};
|
|
933
|
+
function hexToBinary(hex) {
|
|
934
|
+
let bin = "";
|
|
935
|
+
for (let i = 0; i < hex.length; i++) {
|
|
936
|
+
bin += parseInt(hex[i], 16).toString(2).padStart(4, "0");
|
|
937
|
+
}
|
|
938
|
+
return bin;
|
|
939
|
+
}
|
|
940
|
+
__name(hexToBinary, "hexToBinary");
|
|
850
941
|
|
|
851
942
|
// src/index.ts
|
|
852
943
|
var name = "best-cave";
|
|
@@ -949,14 +1040,14 @@ function apply(ctx, config) {
|
|
|
949
1040
|
const combinedText = finalElementsForDb.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" ");
|
|
950
1041
|
if (combinedText) {
|
|
951
1042
|
const newSimhash = hashManager.generateTextSimhash(combinedText);
|
|
952
|
-
const existingTextHashes = await ctx.database.get("cave_hash", { type: "
|
|
1043
|
+
const existingTextHashes = await ctx.database.get("cave_hash", { type: "simhash" });
|
|
953
1044
|
for (const existing of existingTextHashes) {
|
|
954
1045
|
const similarity = hashManager.calculateSimilarity(newSimhash, existing.hash);
|
|
955
1046
|
if (similarity >= config.textThreshold) {
|
|
956
1047
|
return `文本与回声洞(${existing.cave})的相似度为 ${(similarity * 100).toFixed(2)}%,超过阈值`;
|
|
957
1048
|
}
|
|
958
1049
|
}
|
|
959
|
-
textHashesToStore.push({ hash: newSimhash, type: "
|
|
1050
|
+
textHashesToStore.push({ hash: newSimhash, type: "simhash" });
|
|
960
1051
|
}
|
|
961
1052
|
}
|
|
962
1053
|
const userName = (config.enableProfile ? await profileManager.getNickname(session.userId) : null) || session.username;
|