koishi-plugin-message-dedup 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/config.d.ts +2 -0
- package/lib/config.js +11 -0
- package/lib/database.d.ts +16 -0
- package/lib/database.js +87 -1
- package/lib/index.js +104 -25
- package/package.json +1 -1
package/lib/config.d.ts
CHANGED
|
@@ -6,6 +6,8 @@ export interface Config {
|
|
|
6
6
|
imageSimilarityThreshold: number;
|
|
7
7
|
linkExactMatch: boolean;
|
|
8
8
|
forwardContentMaxLength: number;
|
|
9
|
+
forwardImageMatchMode: 'all' | 'majority';
|
|
10
|
+
forwardImageSimilarityThreshold: number;
|
|
9
11
|
retentionDays: number;
|
|
10
12
|
stickerDir: string;
|
|
11
13
|
sendMethod: 'koishi' | 'onebot';
|
package/lib/config.js
CHANGED
|
@@ -25,6 +25,17 @@ exports.Config = koishi_1.Schema.object({
|
|
|
25
25
|
.min(100)
|
|
26
26
|
.max(2000)
|
|
27
27
|
.description('转发消息内容摘要最大长度'),
|
|
28
|
+
forwardImageMatchMode: koishi_1.Schema.union([
|
|
29
|
+
koishi_1.Schema.const('all').description('全部匹配'),
|
|
30
|
+
koishi_1.Schema.const('majority').description('过半匹配')
|
|
31
|
+
])
|
|
32
|
+
.default('all')
|
|
33
|
+
.description('转发消息图片匹配模式:全部匹配要求所有图片相同,过半匹配只需超半数图片相同'),
|
|
34
|
+
forwardImageSimilarityThreshold: koishi_1.Schema.number()
|
|
35
|
+
.default(10)
|
|
36
|
+
.min(0)
|
|
37
|
+
.max(32)
|
|
38
|
+
.description('转发消息图片相似度阈值(百分比,0为完全相同)'),
|
|
28
39
|
retentionDays: koishi_1.Schema.number()
|
|
29
40
|
.default(7)
|
|
30
41
|
.min(1)
|
package/lib/database.d.ts
CHANGED
|
@@ -16,7 +16,23 @@ export interface DedupRecord {
|
|
|
16
16
|
originalContent: string;
|
|
17
17
|
extraInfo: string;
|
|
18
18
|
}
|
|
19
|
+
/**
|
|
20
|
+
* 转发消息额外信息结构
|
|
21
|
+
*/
|
|
22
|
+
export interface ForwardExtraInfo {
|
|
23
|
+
forwardId: string;
|
|
24
|
+
preview: string;
|
|
25
|
+
textHash: string;
|
|
26
|
+
imageHashes: string[];
|
|
27
|
+
imageCount: number;
|
|
28
|
+
failedImages: number;
|
|
29
|
+
}
|
|
19
30
|
export declare function extendDatabase(ctx: Context): void;
|
|
20
31
|
export declare function findDuplicate(ctx: Context, guildId: string, contentType: 'image' | 'link' | 'forward', contentHash: string, imageThreshold?: number): Promise<DedupRecord | null>;
|
|
21
32
|
export declare function saveRecord(ctx: Context, record: DedupRecord): Promise<void>;
|
|
22
33
|
export declare function cleanupOldRecords(ctx: Context, retentionDays: number): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* 比较转发消息是否重复
|
|
36
|
+
* 使用文本哈希 + 图片哈希组合匹配
|
|
37
|
+
*/
|
|
38
|
+
export declare function compareForwardMessages(ctx: Context, guildId: string, newTextHash: string, newImageHashes: string[], imageMatchMode: 'all' | 'majority', imageThreshold: number): Promise<DedupRecord | null>;
|
package/lib/database.js
CHANGED
|
@@ -4,6 +4,7 @@ exports.extendDatabase = extendDatabase;
|
|
|
4
4
|
exports.findDuplicate = findDuplicate;
|
|
5
5
|
exports.saveRecord = saveRecord;
|
|
6
6
|
exports.cleanupOldRecords = cleanupOldRecords;
|
|
7
|
+
exports.compareForwardMessages = compareForwardMessages;
|
|
7
8
|
const hash_1 = require("./hash");
|
|
8
9
|
function extendDatabase(ctx) {
|
|
9
10
|
ctx.model.extend('message_dedup', {
|
|
@@ -23,10 +24,12 @@ function extendDatabase(ctx) {
|
|
|
23
24
|
});
|
|
24
25
|
}
|
|
25
26
|
async function findDuplicate(ctx, guildId, contentType, contentHash, imageThreshold) {
|
|
26
|
-
|
|
27
|
+
let records = await ctx.database.get('message_dedup', {
|
|
27
28
|
guildId,
|
|
28
29
|
contentType
|
|
29
30
|
});
|
|
31
|
+
// 按时间戳升序排序,确保返回最早匹配的记录(原消息)
|
|
32
|
+
records = records.sort((a, b) => a.timestamp - b.timestamp);
|
|
30
33
|
if (contentType === 'image' && imageThreshold !== undefined) {
|
|
31
34
|
// 图片需要计算汉明距离
|
|
32
35
|
// compareHashes返回0-1,0表示相同
|
|
@@ -60,3 +63,86 @@ async function cleanupOldRecords(ctx, retentionDays) {
|
|
|
60
63
|
timestamp: { $lt: cutoffTime }
|
|
61
64
|
});
|
|
62
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* 比较转发消息是否重复
|
|
68
|
+
* 使用文本哈希 + 图片哈希组合匹配
|
|
69
|
+
*/
|
|
70
|
+
async function compareForwardMessages(ctx, guildId, newTextHash, newImageHashes, imageMatchMode, imageThreshold) {
|
|
71
|
+
const records = await ctx.database.get('message_dedup', {
|
|
72
|
+
guildId,
|
|
73
|
+
contentType: 'forward'
|
|
74
|
+
});
|
|
75
|
+
// 按时间戳升序排序,确保返回最早匹配的记录(原消息)
|
|
76
|
+
const sortedRecords = records.sort((a, b) => a.timestamp - b.timestamp);
|
|
77
|
+
const thresholdRatio = imageThreshold / 100;
|
|
78
|
+
for (const record of sortedRecords) {
|
|
79
|
+
let extra;
|
|
80
|
+
try {
|
|
81
|
+
extra = JSON.parse(record.extraInfo);
|
|
82
|
+
}
|
|
83
|
+
catch {
|
|
84
|
+
// 兼容旧记录格式,跳过
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
// 1. 文本哈希必须匹配
|
|
88
|
+
if (extra.textHash !== newTextHash)
|
|
89
|
+
continue;
|
|
90
|
+
const newCount = newImageHashes.length;
|
|
91
|
+
const oldCount = extra.imageHashes?.length || 0;
|
|
92
|
+
// 2. 无图片情况:纯文本匹配
|
|
93
|
+
if (newCount === 0 && oldCount === 0) {
|
|
94
|
+
return record;
|
|
95
|
+
}
|
|
96
|
+
// 3. 单边无图片:不匹配
|
|
97
|
+
if (newCount === 0 || oldCount === 0) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
// 4. 全部匹配模式:数量必须相同
|
|
101
|
+
if (imageMatchMode === 'all' && newCount !== oldCount) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
// 5. 计算图片匹配数
|
|
105
|
+
const matchedCount = countImageMatches(newImageHashes, extra.imageHashes, thresholdRatio);
|
|
106
|
+
const totalCount = Math.max(newCount, oldCount);
|
|
107
|
+
// 6. 根据模式判断是否匹配
|
|
108
|
+
if (imageMatchMode === 'all') {
|
|
109
|
+
if (matchedCount === newCount && newCount === oldCount) {
|
|
110
|
+
return record;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else { // majority
|
|
114
|
+
if (matchedCount > totalCount / 2) {
|
|
115
|
+
return record;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* 计算两个图片哈希列表的匹配数量
|
|
123
|
+
* 使用贪心算法:每个新图片找最相似的未匹配旧图片
|
|
124
|
+
*/
|
|
125
|
+
function countImageMatches(hashes1, hashes2, threshold // 0-1 之间的阈值
|
|
126
|
+
) {
|
|
127
|
+
let matchCount = 0;
|
|
128
|
+
const used = new Set();
|
|
129
|
+
for (const h1 of hashes1) {
|
|
130
|
+
for (let i = 0; i < hashes2.length; i++) {
|
|
131
|
+
if (used.has(i))
|
|
132
|
+
continue;
|
|
133
|
+
try {
|
|
134
|
+
const distance = (0, hash_1.calculateHashDistance)(h1, hashes2[i]);
|
|
135
|
+
if (distance <= threshold) {
|
|
136
|
+
matchCount++;
|
|
137
|
+
used.add(i);
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
// 哈希格式错误,跳过
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return matchCount;
|
|
148
|
+
}
|
package/lib/index.js
CHANGED
|
@@ -160,6 +160,13 @@ async function processImage(imageUrl, session, username, originalContent, config
|
|
|
160
160
|
}
|
|
161
161
|
return null;
|
|
162
162
|
}
|
|
163
|
+
// 检测异常哈希(如全0或几乎全0),跳过处理避免误判
|
|
164
|
+
if (isAbnormalHash(hash)) {
|
|
165
|
+
if (config.debug) {
|
|
166
|
+
logger.warn(`异常图片哈希,跳过: ${hash}`);
|
|
167
|
+
}
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
163
170
|
if (config.debug) {
|
|
164
171
|
logger.info(`图片哈希: ${hash}`);
|
|
165
172
|
}
|
|
@@ -229,15 +236,18 @@ async function processLink(url, session, username, originalContent, config, ctx,
|
|
|
229
236
|
async function processForward(forwardElem, session, username, originalContent, config, ctx, logger) {
|
|
230
237
|
try {
|
|
231
238
|
if (config.debug) {
|
|
232
|
-
logger.info(`处理转发消息, elem: ${JSON.stringify(forwardElem, null, 2)}`);
|
|
239
|
+
logger.info(`处理转发消息, elem: ${JSON.stringify(forwardElem.attrs, null, 2)}`);
|
|
233
240
|
}
|
|
234
241
|
// 获取转发消息 ID
|
|
235
242
|
const forwardId = forwardElem.attrs?.id;
|
|
236
243
|
if (!forwardId) {
|
|
237
244
|
return null;
|
|
238
245
|
}
|
|
246
|
+
// 转发消息内容
|
|
247
|
+
let textParts = [];
|
|
248
|
+
let images = [];
|
|
249
|
+
let apiSuccess = false;
|
|
239
250
|
// 先尝试通过 OneBot API 获取转发消息内容
|
|
240
|
-
let content = '';
|
|
241
251
|
if (session.platform === 'onebot' && session.bot?.internal) {
|
|
242
252
|
const internal = session.bot.internal;
|
|
243
253
|
// 尝试多种 payload 格式
|
|
@@ -264,17 +274,49 @@ async function processForward(forwardElem, session, username, originalContent, c
|
|
|
264
274
|
if (forwardData) {
|
|
265
275
|
const messages = extractMessagesArray(forwardData);
|
|
266
276
|
if (messages && Array.isArray(messages) && messages.length > 0) {
|
|
267
|
-
|
|
268
|
-
if (node.message) {
|
|
269
|
-
return node.message
|
|
270
|
-
.filter((m) => m.type === 'text')
|
|
271
|
-
.map((m) => m.data?.text || '')
|
|
272
|
-
.join('');
|
|
273
|
-
}
|
|
274
|
-
return '';
|
|
275
|
-
}).join('\n');
|
|
277
|
+
apiSuccess = true;
|
|
276
278
|
if (config.debug) {
|
|
277
|
-
logger.info(
|
|
279
|
+
logger.info(`转发消息节点数量: ${messages.length}`);
|
|
280
|
+
}
|
|
281
|
+
// 提取文本和图片
|
|
282
|
+
for (const node of messages) {
|
|
283
|
+
const msgArray = node.message || node.content || node.data;
|
|
284
|
+
if (Array.isArray(msgArray)) {
|
|
285
|
+
for (const m of msgArray) {
|
|
286
|
+
if (m.type === 'text') {
|
|
287
|
+
textParts.push(m.data?.text || m.text || '');
|
|
288
|
+
}
|
|
289
|
+
if (m.type === 'image') {
|
|
290
|
+
const url = m.data?.url || m.data?.file || m.url || m.file || '';
|
|
291
|
+
if (url) {
|
|
292
|
+
// 下载图片并计算哈希
|
|
293
|
+
let hash = null;
|
|
294
|
+
try {
|
|
295
|
+
hash = await (0, hash_1.downloadAndHashImage)(url, ctx);
|
|
296
|
+
if (hash && isAbnormalHash(hash)) {
|
|
297
|
+
if (config.debug) {
|
|
298
|
+
logger.warn(`转发消息图片哈希异常,跳过: ${hash}`);
|
|
299
|
+
}
|
|
300
|
+
hash = null;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
catch (err) {
|
|
304
|
+
if (config.debug) {
|
|
305
|
+
logger.warn(`转发消息图片下载失败: ${url.slice(0, 50)}...`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
images.push({ url, hash });
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// 兼容字符串格式
|
|
314
|
+
if (typeof node.content === 'string') {
|
|
315
|
+
textParts.push(node.content);
|
|
316
|
+
}
|
|
317
|
+
if (typeof node.text === 'string') {
|
|
318
|
+
textParts.push(node.text);
|
|
319
|
+
}
|
|
278
320
|
}
|
|
279
321
|
break;
|
|
280
322
|
}
|
|
@@ -285,35 +327,44 @@ async function processForward(forwardElem, session, username, originalContent, c
|
|
|
285
327
|
}
|
|
286
328
|
}
|
|
287
329
|
}
|
|
288
|
-
//
|
|
289
|
-
|
|
290
|
-
const
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
const
|
|
295
|
-
const hash = (0, hash_1.calculateStringHash)(truncated);
|
|
330
|
+
// 提取有效图片哈希列表
|
|
331
|
+
const imageHashes = images.filter(img => img.hash !== null).map(img => img.hash);
|
|
332
|
+
const failedImages = images.filter(img => img.hash === null).length;
|
|
333
|
+
// 计算文本哈希
|
|
334
|
+
const textContent = textParts.join('\n').trim();
|
|
335
|
+
const textToHash = textContent.slice(0, config.forwardContentMaxLength);
|
|
336
|
+
const textHash = textContent ? (0, hash_1.calculateStringHash)(textToHash) : (0, hash_1.calculateStringHash)(forwardId);
|
|
296
337
|
if (config.debug) {
|
|
297
|
-
logger.info(
|
|
338
|
+
logger.info(`转发消息: 文本长度=${textToHash.length}, 图片=${imageHashes.length}, 失败=${failedImages}, API成功=${apiSuccess}`);
|
|
298
339
|
}
|
|
299
340
|
const guildId = session.guildId;
|
|
300
|
-
|
|
341
|
+
// 使用新的比较函数查询重复消息
|
|
342
|
+
const duplicate = await (0, database_1.compareForwardMessages)(ctx, guildId, textHash, imageHashes, config.forwardImageMatchMode, config.forwardImageSimilarityThreshold);
|
|
301
343
|
if (duplicate) {
|
|
302
344
|
if (config.debug) {
|
|
303
345
|
logger.info(`发现重复转发消息`);
|
|
304
346
|
}
|
|
305
347
|
return duplicate;
|
|
306
348
|
}
|
|
349
|
+
// 保存记录
|
|
350
|
+
const extraInfo = {
|
|
351
|
+
forwardId,
|
|
352
|
+
preview: textToHash.slice(0, 100),
|
|
353
|
+
textHash,
|
|
354
|
+
imageHashes,
|
|
355
|
+
imageCount: images.length,
|
|
356
|
+
failedImages
|
|
357
|
+
};
|
|
307
358
|
await (0, database_1.saveRecord)(ctx, {
|
|
308
359
|
guildId: session.guildId,
|
|
309
360
|
userId: session.userId,
|
|
310
361
|
username,
|
|
311
362
|
timestamp: Date.now(),
|
|
312
363
|
contentType: 'forward',
|
|
313
|
-
contentHash:
|
|
364
|
+
contentHash: textHash,
|
|
314
365
|
originalMessageId: session.messageId,
|
|
315
|
-
originalContent:
|
|
316
|
-
extraInfo: JSON.stringify(
|
|
366
|
+
originalContent: textToHash.slice(0, 100),
|
|
367
|
+
extraInfo: JSON.stringify(extraInfo)
|
|
317
368
|
});
|
|
318
369
|
return null;
|
|
319
370
|
}
|
|
@@ -435,3 +486,31 @@ function getRandomSticker(baseDir, stickerDir) {
|
|
|
435
486
|
}
|
|
436
487
|
return null;
|
|
437
488
|
}
|
|
489
|
+
/**
|
|
490
|
+
* 检测哈希是否异常(如几乎全0或全1)
|
|
491
|
+
* 异常哈希会导致误判,应跳过处理
|
|
492
|
+
*/
|
|
493
|
+
function isAbnormalHash(hash) {
|
|
494
|
+
// pHash 通常是64位十六进制或二进制
|
|
495
|
+
// 统计0和1的比例,如果比例极端则认为异常
|
|
496
|
+
// 如果是二进制格式(64位)
|
|
497
|
+
if (hash.length === 64 && /^[01]+$/.test(hash)) {
|
|
498
|
+
const zeros = hash.split('0').length - 1;
|
|
499
|
+
const ones = hash.split('1').length - 1;
|
|
500
|
+
// 如果超过90%是同一个值,认为异常
|
|
501
|
+
if (zeros >= 58 || ones >= 58) {
|
|
502
|
+
return true;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
// 如果是十六进制格式
|
|
506
|
+
if (/^[0-9a-fA-F]+$/.test(hash)) {
|
|
507
|
+
// 检查是否几乎全是0或几乎全是f
|
|
508
|
+
const nonZeroCount = hash.replace(/0/gi, '').length;
|
|
509
|
+
const nonFCount = hash.replace(/f/gi, '').length;
|
|
510
|
+
// 如果超过90%是同一个值
|
|
511
|
+
if (nonZeroCount <= hash.length * 0.1 || nonFCount <= hash.length * 0.1) {
|
|
512
|
+
return true;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
return false;
|
|
516
|
+
}
|