koishi-plugin-message-dedup 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/assets/dup-1.jpg +0 -0
- package/lib/config.d.ts +2 -0
- package/lib/config.js +13 -2
- package/lib/database.d.ts +16 -0
- package/lib/database.js +87 -1
- package/lib/index.js +181 -10
- package/package.json +1 -1
package/README.md
CHANGED
package/assets/dup-1.jpg
CHANGED
|
Binary file
|
package/lib/config.d.ts
CHANGED
|
@@ -6,6 +6,8 @@ export interface Config {
|
|
|
6
6
|
imageSimilarityThreshold: number;
|
|
7
7
|
linkExactMatch: boolean;
|
|
8
8
|
forwardContentMaxLength: number;
|
|
9
|
+
forwardImageMatchMode: 'all' | 'majority';
|
|
10
|
+
forwardImageSimilarityThreshold: number;
|
|
9
11
|
retentionDays: number;
|
|
10
12
|
stickerDir: string;
|
|
11
13
|
sendMethod: 'koishi' | 'onebot';
|
package/lib/config.js
CHANGED
|
@@ -4,8 +4,8 @@ exports.Config = void 0;
|
|
|
4
4
|
const koishi_1 = require("koishi");
|
|
5
5
|
exports.Config = koishi_1.Schema.object({
|
|
6
6
|
enableImage: koishi_1.Schema.boolean()
|
|
7
|
-
.default(
|
|
8
|
-
.description('
|
|
7
|
+
.default(true)
|
|
8
|
+
.description('启用图片去重(自动排除表情包)'),
|
|
9
9
|
enableLink: koishi_1.Schema.boolean()
|
|
10
10
|
.default(true)
|
|
11
11
|
.description('启用链接去重'),
|
|
@@ -25,6 +25,17 @@ exports.Config = koishi_1.Schema.object({
|
|
|
25
25
|
.min(100)
|
|
26
26
|
.max(2000)
|
|
27
27
|
.description('转发消息内容摘要最大长度'),
|
|
28
|
+
forwardImageMatchMode: koishi_1.Schema.union([
|
|
29
|
+
koishi_1.Schema.const('all').description('全部匹配'),
|
|
30
|
+
koishi_1.Schema.const('majority').description('过半匹配')
|
|
31
|
+
])
|
|
32
|
+
.default('all')
|
|
33
|
+
.description('转发消息图片匹配模式:全部匹配要求所有图片相同,过半匹配只需超半数图片相同'),
|
|
34
|
+
forwardImageSimilarityThreshold: koishi_1.Schema.number()
|
|
35
|
+
.default(10)
|
|
36
|
+
.min(0)
|
|
37
|
+
.max(32)
|
|
38
|
+
.description('转发消息图片相似度阈值(百分比,0为完全相同)'),
|
|
28
39
|
retentionDays: koishi_1.Schema.number()
|
|
29
40
|
.default(7)
|
|
30
41
|
.min(1)
|
package/lib/database.d.ts
CHANGED
|
@@ -16,7 +16,23 @@ export interface DedupRecord {
|
|
|
16
16
|
originalContent: string;
|
|
17
17
|
extraInfo: string;
|
|
18
18
|
}
|
|
19
|
+
/**
|
|
20
|
+
* 转发消息额外信息结构
|
|
21
|
+
*/
|
|
22
|
+
export interface ForwardExtraInfo {
|
|
23
|
+
forwardId: string;
|
|
24
|
+
preview: string;
|
|
25
|
+
textHash: string;
|
|
26
|
+
imageHashes: string[];
|
|
27
|
+
imageCount: number;
|
|
28
|
+
failedImages: number;
|
|
29
|
+
}
|
|
19
30
|
export declare function extendDatabase(ctx: Context): void;
|
|
20
31
|
export declare function findDuplicate(ctx: Context, guildId: string, contentType: 'image' | 'link' | 'forward', contentHash: string, imageThreshold?: number): Promise<DedupRecord | null>;
|
|
21
32
|
export declare function saveRecord(ctx: Context, record: DedupRecord): Promise<void>;
|
|
22
33
|
export declare function cleanupOldRecords(ctx: Context, retentionDays: number): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* 比较转发消息是否重复
|
|
36
|
+
* 使用文本哈希 + 图片哈希组合匹配
|
|
37
|
+
*/
|
|
38
|
+
export declare function compareForwardMessages(ctx: Context, guildId: string, newTextHash: string, newImageHashes: string[], imageMatchMode: 'all' | 'majority', imageThreshold: number): Promise<DedupRecord | null>;
|
package/lib/database.js
CHANGED
|
@@ -4,6 +4,7 @@ exports.extendDatabase = extendDatabase;
|
|
|
4
4
|
exports.findDuplicate = findDuplicate;
|
|
5
5
|
exports.saveRecord = saveRecord;
|
|
6
6
|
exports.cleanupOldRecords = cleanupOldRecords;
|
|
7
|
+
exports.compareForwardMessages = compareForwardMessages;
|
|
7
8
|
const hash_1 = require("./hash");
|
|
8
9
|
function extendDatabase(ctx) {
|
|
9
10
|
ctx.model.extend('message_dedup', {
|
|
@@ -23,10 +24,12 @@ function extendDatabase(ctx) {
|
|
|
23
24
|
});
|
|
24
25
|
}
|
|
25
26
|
async function findDuplicate(ctx, guildId, contentType, contentHash, imageThreshold) {
|
|
26
|
-
|
|
27
|
+
let records = await ctx.database.get('message_dedup', {
|
|
27
28
|
guildId,
|
|
28
29
|
contentType
|
|
29
30
|
});
|
|
31
|
+
// 按时间戳升序排序,确保返回最早匹配的记录(原消息)
|
|
32
|
+
records = records.sort((a, b) => a.timestamp - b.timestamp);
|
|
30
33
|
if (contentType === 'image' && imageThreshold !== undefined) {
|
|
31
34
|
// 图片需要计算汉明距离
|
|
32
35
|
// compareHashes返回0-1,0表示相同
|
|
@@ -60,3 +63,86 @@ async function cleanupOldRecords(ctx, retentionDays) {
|
|
|
60
63
|
timestamp: { $lt: cutoffTime }
|
|
61
64
|
});
|
|
62
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* 比较转发消息是否重复
|
|
68
|
+
* 使用文本哈希 + 图片哈希组合匹配
|
|
69
|
+
*/
|
|
70
|
+
async function compareForwardMessages(ctx, guildId, newTextHash, newImageHashes, imageMatchMode, imageThreshold) {
|
|
71
|
+
const records = await ctx.database.get('message_dedup', {
|
|
72
|
+
guildId,
|
|
73
|
+
contentType: 'forward'
|
|
74
|
+
});
|
|
75
|
+
// 按时间戳升序排序,确保返回最早匹配的记录(原消息)
|
|
76
|
+
const sortedRecords = records.sort((a, b) => a.timestamp - b.timestamp);
|
|
77
|
+
const thresholdRatio = imageThreshold / 100;
|
|
78
|
+
for (const record of sortedRecords) {
|
|
79
|
+
let extra;
|
|
80
|
+
try {
|
|
81
|
+
extra = JSON.parse(record.extraInfo);
|
|
82
|
+
}
|
|
83
|
+
catch {
|
|
84
|
+
// 兼容旧记录格式,跳过
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
// 1. 文本哈希必须匹配
|
|
88
|
+
if (extra.textHash !== newTextHash)
|
|
89
|
+
continue;
|
|
90
|
+
const newCount = newImageHashes.length;
|
|
91
|
+
const oldCount = extra.imageHashes?.length || 0;
|
|
92
|
+
// 2. 无图片情况:纯文本匹配
|
|
93
|
+
if (newCount === 0 && oldCount === 0) {
|
|
94
|
+
return record;
|
|
95
|
+
}
|
|
96
|
+
// 3. 单边无图片:不匹配
|
|
97
|
+
if (newCount === 0 || oldCount === 0) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
// 4. 全部匹配模式:数量必须相同
|
|
101
|
+
if (imageMatchMode === 'all' && newCount !== oldCount) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
// 5. 计算图片匹配数
|
|
105
|
+
const matchedCount = countImageMatches(newImageHashes, extra.imageHashes, thresholdRatio);
|
|
106
|
+
const totalCount = Math.max(newCount, oldCount);
|
|
107
|
+
// 6. 根据模式判断是否匹配
|
|
108
|
+
if (imageMatchMode === 'all') {
|
|
109
|
+
if (matchedCount === newCount && newCount === oldCount) {
|
|
110
|
+
return record;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else { // majority
|
|
114
|
+
if (matchedCount > totalCount / 2) {
|
|
115
|
+
return record;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* 计算两个图片哈希列表的匹配数量
|
|
123
|
+
* 使用贪心算法:每个新图片找最相似的未匹配旧图片
|
|
124
|
+
*/
|
|
125
|
+
function countImageMatches(hashes1, hashes2, threshold // 0-1 之间的阈值
|
|
126
|
+
) {
|
|
127
|
+
let matchCount = 0;
|
|
128
|
+
const used = new Set();
|
|
129
|
+
for (const h1 of hashes1) {
|
|
130
|
+
for (let i = 0; i < hashes2.length; i++) {
|
|
131
|
+
if (used.has(i))
|
|
132
|
+
continue;
|
|
133
|
+
try {
|
|
134
|
+
const distance = (0, hash_1.calculateHashDistance)(h1, hashes2[i]);
|
|
135
|
+
if (distance <= threshold) {
|
|
136
|
+
matchCount++;
|
|
137
|
+
used.add(i);
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
// 哈希格式错误,跳过
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return matchCount;
|
|
148
|
+
}
|
package/lib/index.js
CHANGED
|
@@ -94,10 +94,18 @@ async function processMessage(session, config, ctx, logger) {
|
|
|
94
94
|
return null;
|
|
95
95
|
const username = session.author?.nickname || session.author?.username || session.username || '未知用户';
|
|
96
96
|
const originalContent = session.content || extractTextFromElements(elements) || '';
|
|
97
|
-
// 1.
|
|
97
|
+
// 1. 检查图片(排除表情包:subType=1)
|
|
98
98
|
if (config.enableImage) {
|
|
99
99
|
for (const elem of elements) {
|
|
100
100
|
if (elem.type === 'img' && elem.attrs?.src) {
|
|
101
|
+
// 表情包 subType 为 1,跳过
|
|
102
|
+
const subType = elem.attrs['sub-type'] ?? elem.attrs.subType;
|
|
103
|
+
if (subType === 1 || subType === '1') {
|
|
104
|
+
if (config.debug) {
|
|
105
|
+
logger.info('跳过表情包');
|
|
106
|
+
}
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
101
109
|
const duplicate = await processImage(elem.attrs.src, session, username, originalContent, config, ctx, logger);
|
|
102
110
|
if (duplicate)
|
|
103
111
|
return duplicate;
|
|
@@ -116,6 +124,9 @@ async function processMessage(session, config, ctx, logger) {
|
|
|
116
124
|
}
|
|
117
125
|
// 3. 检查转发消息
|
|
118
126
|
if (config.enableForward) {
|
|
127
|
+
if (config.debug) {
|
|
128
|
+
logger.info(`检查转发消息, elements types: ${elements.map(e => e.type).join(', ')}`);
|
|
129
|
+
}
|
|
119
130
|
for (const elem of elements) {
|
|
120
131
|
if (elem.type === 'forward') {
|
|
121
132
|
const duplicate = await processForward(elem, session, username, originalContent, config, ctx, logger);
|
|
@@ -149,6 +160,13 @@ async function processImage(imageUrl, session, username, originalContent, config
|
|
|
149
160
|
}
|
|
150
161
|
return null;
|
|
151
162
|
}
|
|
163
|
+
// 检测异常哈希(如全0或几乎全0),跳过处理避免误判
|
|
164
|
+
if (isAbnormalHash(hash)) {
|
|
165
|
+
if (config.debug) {
|
|
166
|
+
logger.warn(`异常图片哈希,跳过: ${hash}`);
|
|
167
|
+
}
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
152
170
|
if (config.debug) {
|
|
153
171
|
logger.info(`图片哈希: ${hash}`);
|
|
154
172
|
}
|
|
@@ -217,33 +235,136 @@ async function processLink(url, session, username, originalContent, config, ctx,
|
|
|
217
235
|
}
|
|
218
236
|
async function processForward(forwardElem, session, username, originalContent, config, ctx, logger) {
|
|
219
237
|
try {
|
|
220
|
-
|
|
221
|
-
|
|
238
|
+
if (config.debug) {
|
|
239
|
+
logger.info(`处理转发消息, elem: ${JSON.stringify(forwardElem.attrs, null, 2)}`);
|
|
240
|
+
}
|
|
241
|
+
// 获取转发消息 ID
|
|
242
|
+
const forwardId = forwardElem.attrs?.id;
|
|
243
|
+
if (!forwardId) {
|
|
222
244
|
return null;
|
|
223
245
|
}
|
|
224
|
-
|
|
225
|
-
|
|
246
|
+
// 转发消息内容
|
|
247
|
+
let textParts = [];
|
|
248
|
+
let images = [];
|
|
249
|
+
let apiSuccess = false;
|
|
250
|
+
// 先尝试通过 OneBot API 获取转发消息内容
|
|
251
|
+
if (session.platform === 'onebot' && session.bot?.internal) {
|
|
252
|
+
const internal = session.bot.internal;
|
|
253
|
+
// 尝试多种 payload 格式
|
|
254
|
+
const payloads = [
|
|
255
|
+
{ message_id: forwardId },
|
|
256
|
+
{ id: forwardId },
|
|
257
|
+
];
|
|
258
|
+
// 尝试多种 API 调用方式
|
|
259
|
+
const callApi = async (action, params) => {
|
|
260
|
+
if (typeof internal._get === 'function') {
|
|
261
|
+
return await internal._get(action, params);
|
|
262
|
+
}
|
|
263
|
+
if (typeof internal.request === 'function') {
|
|
264
|
+
return await internal.request(action, params);
|
|
265
|
+
}
|
|
266
|
+
if (typeof internal.callAction === 'function') {
|
|
267
|
+
return await internal.callAction(action, params);
|
|
268
|
+
}
|
|
269
|
+
return null;
|
|
270
|
+
};
|
|
271
|
+
for (const payload of payloads) {
|
|
272
|
+
try {
|
|
273
|
+
const forwardData = await callApi('get_forward_msg', payload);
|
|
274
|
+
if (forwardData) {
|
|
275
|
+
const messages = extractMessagesArray(forwardData);
|
|
276
|
+
if (messages && Array.isArray(messages) && messages.length > 0) {
|
|
277
|
+
apiSuccess = true;
|
|
278
|
+
if (config.debug) {
|
|
279
|
+
logger.info(`转发消息节点数量: ${messages.length}`);
|
|
280
|
+
}
|
|
281
|
+
// 提取文本和图片
|
|
282
|
+
for (const node of messages) {
|
|
283
|
+
const msgArray = node.message || node.content || node.data;
|
|
284
|
+
if (Array.isArray(msgArray)) {
|
|
285
|
+
for (const m of msgArray) {
|
|
286
|
+
if (m.type === 'text') {
|
|
287
|
+
textParts.push(m.data?.text || m.text || '');
|
|
288
|
+
}
|
|
289
|
+
if (m.type === 'image') {
|
|
290
|
+
const url = m.data?.url || m.data?.file || m.url || m.file || '';
|
|
291
|
+
if (url) {
|
|
292
|
+
// 下载图片并计算哈希
|
|
293
|
+
let hash = null;
|
|
294
|
+
try {
|
|
295
|
+
hash = await (0, hash_1.downloadAndHashImage)(url, ctx);
|
|
296
|
+
if (hash && isAbnormalHash(hash)) {
|
|
297
|
+
if (config.debug) {
|
|
298
|
+
logger.warn(`转发消息图片哈希异常,跳过: ${hash}`);
|
|
299
|
+
}
|
|
300
|
+
hash = null;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
catch (err) {
|
|
304
|
+
if (config.debug) {
|
|
305
|
+
logger.warn(`转发消息图片下载失败: ${url.slice(0, 50)}...`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
images.push({ url, hash });
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// 兼容字符串格式
|
|
314
|
+
if (typeof node.content === 'string') {
|
|
315
|
+
textParts.push(node.content);
|
|
316
|
+
}
|
|
317
|
+
if (typeof node.text === 'string') {
|
|
318
|
+
textParts.push(node.text);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
break;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
catch (err) {
|
|
326
|
+
// API 调用失败,继续尝试
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
// 提取有效图片哈希列表
|
|
331
|
+
const imageHashes = images.filter(img => img.hash !== null).map(img => img.hash);
|
|
332
|
+
const failedImages = images.filter(img => img.hash === null).length;
|
|
333
|
+
// 计算文本哈希
|
|
334
|
+
const textContent = textParts.join('\n').trim();
|
|
335
|
+
const textToHash = textContent.slice(0, config.forwardContentMaxLength);
|
|
336
|
+
const textHash = textContent ? (0, hash_1.calculateStringHash)(textToHash) : (0, hash_1.calculateStringHash)(forwardId);
|
|
226
337
|
if (config.debug) {
|
|
227
|
-
logger.info(
|
|
338
|
+
logger.info(`转发消息: 文本长度=${textToHash.length}, 图片=${imageHashes.length}, 失败=${failedImages}, API成功=${apiSuccess}`);
|
|
228
339
|
}
|
|
229
340
|
const guildId = session.guildId;
|
|
230
|
-
|
|
341
|
+
// 使用新的比较函数查询重复消息
|
|
342
|
+
const duplicate = await (0, database_1.compareForwardMessages)(ctx, guildId, textHash, imageHashes, config.forwardImageMatchMode, config.forwardImageSimilarityThreshold);
|
|
231
343
|
if (duplicate) {
|
|
232
344
|
if (config.debug) {
|
|
233
345
|
logger.info(`发现重复转发消息`);
|
|
234
346
|
}
|
|
235
347
|
return duplicate;
|
|
236
348
|
}
|
|
349
|
+
// 保存记录
|
|
350
|
+
const extraInfo = {
|
|
351
|
+
forwardId,
|
|
352
|
+
preview: textToHash.slice(0, 100),
|
|
353
|
+
textHash,
|
|
354
|
+
imageHashes,
|
|
355
|
+
imageCount: images.length,
|
|
356
|
+
failedImages
|
|
357
|
+
};
|
|
237
358
|
await (0, database_1.saveRecord)(ctx, {
|
|
238
359
|
guildId: session.guildId,
|
|
239
360
|
userId: session.userId,
|
|
240
361
|
username,
|
|
241
362
|
timestamp: Date.now(),
|
|
242
363
|
contentType: 'forward',
|
|
243
|
-
contentHash:
|
|
364
|
+
contentHash: textHash,
|
|
244
365
|
originalMessageId: session.messageId,
|
|
245
|
-
originalContent:
|
|
246
|
-
extraInfo: JSON.stringify(
|
|
366
|
+
originalContent: textToHash.slice(0, 100),
|
|
367
|
+
extraInfo: JSON.stringify(extraInfo)
|
|
247
368
|
});
|
|
248
369
|
return null;
|
|
249
370
|
}
|
|
@@ -269,6 +390,28 @@ function extractForwardContent(elem) {
|
|
|
269
390
|
}
|
|
270
391
|
return content;
|
|
271
392
|
}
|
|
393
|
+
/**
|
|
394
|
+
* 从多种可能的数据结构中提取 messages 数组
|
|
395
|
+
* 参考 chatluna-forward-msg 的实现
|
|
396
|
+
*/
|
|
397
|
+
function extractMessagesArray(data) {
|
|
398
|
+
const candidates = [
|
|
399
|
+
data,
|
|
400
|
+
data?.messages,
|
|
401
|
+
data?.data,
|
|
402
|
+
data?.result,
|
|
403
|
+
data?.response,
|
|
404
|
+
data?.data?.messages,
|
|
405
|
+
data?.response?.messages,
|
|
406
|
+
data?.envelope?.result?.messages,
|
|
407
|
+
];
|
|
408
|
+
for (const item of candidates) {
|
|
409
|
+
if (Array.isArray(item)) {
|
|
410
|
+
return item;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
return null;
|
|
414
|
+
}
|
|
272
415
|
async function sendDuplicateWarning(session, duplicate, config, ctx) {
|
|
273
416
|
const date = new Date(duplicate.timestamp);
|
|
274
417
|
const dateStr = date.toLocaleString('zh-CN', {
|
|
@@ -343,3 +486,31 @@ function getRandomSticker(baseDir, stickerDir) {
|
|
|
343
486
|
}
|
|
344
487
|
return null;
|
|
345
488
|
}
|
|
489
|
+
/**
|
|
490
|
+
* 检测哈希是否异常(如几乎全0或全1)
|
|
491
|
+
* 异常哈希会导致误判,应跳过处理
|
|
492
|
+
*/
|
|
493
|
+
function isAbnormalHash(hash) {
|
|
494
|
+
// pHash 通常是64位十六进制或二进制
|
|
495
|
+
// 统计0和1的比例,如果比例极端则认为异常
|
|
496
|
+
// 如果是二进制格式(64位)
|
|
497
|
+
if (hash.length === 64 && /^[01]+$/.test(hash)) {
|
|
498
|
+
const zeros = hash.split('0').length - 1;
|
|
499
|
+
const ones = hash.split('1').length - 1;
|
|
500
|
+
// 如果超过90%是同一个值,认为异常
|
|
501
|
+
if (zeros >= 58 || ones >= 58) {
|
|
502
|
+
return true;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
// 如果是十六进制格式
|
|
506
|
+
if (/^[0-9a-fA-F]+$/.test(hash)) {
|
|
507
|
+
// 检查是否几乎全是0或几乎全是f
|
|
508
|
+
const nonZeroCount = hash.replace(/0/gi, '').length;
|
|
509
|
+
const nonFCount = hash.replace(/f/gi, '').length;
|
|
510
|
+
// 如果超过90%是同一个值
|
|
511
|
+
if (nonZeroCount <= hash.length * 0.1 || nonFCount <= hash.length * 0.1) {
|
|
512
|
+
return true;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
return false;
|
|
516
|
+
}
|