koishi-plugin-message-dedup 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.d.ts CHANGED
@@ -6,6 +6,8 @@ export interface Config {
6
6
  imageSimilarityThreshold: number;
7
7
  linkExactMatch: boolean;
8
8
  forwardContentMaxLength: number;
9
+ forwardImageMatchMode: 'all' | 'majority';
10
+ forwardImageSimilarityThreshold: number;
9
11
  retentionDays: number;
10
12
  stickerDir: string;
11
13
  sendMethod: 'koishi' | 'onebot';
package/lib/config.js CHANGED
@@ -25,6 +25,17 @@ exports.Config = koishi_1.Schema.object({
25
25
  .min(100)
26
26
  .max(2000)
27
27
  .description('转发消息内容摘要最大长度'),
28
+ forwardImageMatchMode: koishi_1.Schema.union([
29
+ koishi_1.Schema.const('all').description('全部匹配'),
30
+ koishi_1.Schema.const('majority').description('过半匹配')
31
+ ])
32
+ .default('all')
33
+ .description('转发消息图片匹配模式:全部匹配要求所有图片相同,过半匹配只需超半数图片相同'),
34
+ forwardImageSimilarityThreshold: koishi_1.Schema.number()
35
+ .default(10)
36
+ .min(0)
37
+ .max(32)
38
+ .description('转发消息图片相似度阈值(百分比,0为完全相同)'),
28
39
  retentionDays: koishi_1.Schema.number()
29
40
  .default(7)
30
41
  .min(1)
package/lib/database.d.ts CHANGED
@@ -16,7 +16,23 @@ export interface DedupRecord {
16
16
  originalContent: string;
17
17
  extraInfo: string;
18
18
  }
19
+ /**
20
+ * 转发消息额外信息结构
21
+ */
22
+ export interface ForwardExtraInfo {
23
+ forwardId: string;
24
+ preview: string;
25
+ textHash: string;
26
+ imageHashes: string[];
27
+ imageCount: number;
28
+ failedImages: number;
29
+ }
19
30
  export declare function extendDatabase(ctx: Context): void;
20
31
  export declare function findDuplicate(ctx: Context, guildId: string, contentType: 'image' | 'link' | 'forward', contentHash: string, imageThreshold?: number): Promise<DedupRecord | null>;
21
32
  export declare function saveRecord(ctx: Context, record: DedupRecord): Promise<void>;
22
33
  export declare function cleanupOldRecords(ctx: Context, retentionDays: number): Promise<void>;
34
+ /**
35
+ * 比较转发消息是否重复
36
+ * 使用文本哈希 + 图片哈希组合匹配
37
+ */
38
+ export declare function compareForwardMessages(ctx: Context, guildId: string, newTextHash: string, newImageHashes: string[], imageMatchMode: 'all' | 'majority', imageThreshold: number): Promise<DedupRecord | null>;
package/lib/database.js CHANGED
@@ -4,6 +4,7 @@ exports.extendDatabase = extendDatabase;
4
4
  exports.findDuplicate = findDuplicate;
5
5
  exports.saveRecord = saveRecord;
6
6
  exports.cleanupOldRecords = cleanupOldRecords;
7
+ exports.compareForwardMessages = compareForwardMessages;
7
8
  const hash_1 = require("./hash");
8
9
  function extendDatabase(ctx) {
9
10
  ctx.model.extend('message_dedup', {
@@ -23,10 +24,12 @@ function extendDatabase(ctx) {
23
24
  });
24
25
  }
25
26
  async function findDuplicate(ctx, guildId, contentType, contentHash, imageThreshold) {
26
- const records = await ctx.database.get('message_dedup', {
27
+ let records = await ctx.database.get('message_dedup', {
27
28
  guildId,
28
29
  contentType
29
30
  });
31
+ // 按时间戳升序排序,确保返回最早匹配的记录(原消息)
32
+ records = records.sort((a, b) => a.timestamp - b.timestamp);
30
33
  if (contentType === 'image' && imageThreshold !== undefined) {
31
34
  // 图片需要计算汉明距离
32
35
  // compareHashes返回0-1,0表示相同
@@ -60,3 +63,86 @@ async function cleanupOldRecords(ctx, retentionDays) {
60
63
  timestamp: { $lt: cutoffTime }
61
64
  });
62
65
  }
66
+ /**
67
+ * 比较转发消息是否重复
68
+ * 使用文本哈希 + 图片哈希组合匹配
69
+ */
70
+ async function compareForwardMessages(ctx, guildId, newTextHash, newImageHashes, imageMatchMode, imageThreshold) {
71
+ const records = await ctx.database.get('message_dedup', {
72
+ guildId,
73
+ contentType: 'forward'
74
+ });
75
+ // 按时间戳升序排序,确保返回最早匹配的记录(原消息)
76
+ const sortedRecords = records.sort((a, b) => a.timestamp - b.timestamp);
77
+ const thresholdRatio = imageThreshold / 100;
78
+ for (const record of sortedRecords) {
79
+ let extra;
80
+ try {
81
+ extra = JSON.parse(record.extraInfo);
82
+ }
83
+ catch {
84
+ // 兼容旧记录格式,跳过
85
+ continue;
86
+ }
87
+ // 1. 文本哈希必须匹配
88
+ if (extra.textHash !== newTextHash)
89
+ continue;
90
+ const newCount = newImageHashes.length;
91
+ const oldCount = extra.imageHashes?.length || 0;
92
+ // 2. 无图片情况:纯文本匹配
93
+ if (newCount === 0 && oldCount === 0) {
94
+ return record;
95
+ }
96
+ // 3. 单边无图片:不匹配
97
+ if (newCount === 0 || oldCount === 0) {
98
+ continue;
99
+ }
100
+ // 4. 全部匹配模式:数量必须相同
101
+ if (imageMatchMode === 'all' && newCount !== oldCount) {
102
+ continue;
103
+ }
104
+ // 5. 计算图片匹配数
105
+ const matchedCount = countImageMatches(newImageHashes, extra.imageHashes, thresholdRatio);
106
+ const totalCount = Math.max(newCount, oldCount);
107
+ // 6. 根据模式判断是否匹配
108
+ if (imageMatchMode === 'all') {
109
+ if (matchedCount === newCount && newCount === oldCount) {
110
+ return record;
111
+ }
112
+ }
113
+ else { // majority
114
+ if (matchedCount > totalCount / 2) {
115
+ return record;
116
+ }
117
+ }
118
+ }
119
+ return null;
120
+ }
121
+ /**
122
+ * 计算两个图片哈希列表的匹配数量
123
+ * 使用贪心算法:每个新图片找最相似的未匹配旧图片
124
+ */
125
+ function countImageMatches(hashes1, hashes2, threshold // 0-1 之间的阈值
126
+ ) {
127
+ let matchCount = 0;
128
+ const used = new Set();
129
+ for (const h1 of hashes1) {
130
+ for (let i = 0; i < hashes2.length; i++) {
131
+ if (used.has(i))
132
+ continue;
133
+ try {
134
+ const distance = (0, hash_1.calculateHashDistance)(h1, hashes2[i]);
135
+ if (distance <= threshold) {
136
+ matchCount++;
137
+ used.add(i);
138
+ break;
139
+ }
140
+ }
141
+ catch {
142
+ // 哈希格式错误,跳过
143
+ continue;
144
+ }
145
+ }
146
+ }
147
+ return matchCount;
148
+ }
package/lib/index.js CHANGED
@@ -160,6 +160,13 @@ async function processImage(imageUrl, session, username, originalContent, config
160
160
  }
161
161
  return null;
162
162
  }
163
+ // 检测异常哈希(如全0或几乎全0),跳过处理避免误判
164
+ if (isAbnormalHash(hash)) {
165
+ if (config.debug) {
166
+ logger.warn(`异常图片哈希,跳过: ${hash}`);
167
+ }
168
+ return null;
169
+ }
163
170
  if (config.debug) {
164
171
  logger.info(`图片哈希: ${hash}`);
165
172
  }
@@ -229,15 +236,18 @@ async function processLink(url, session, username, originalContent, config, ctx,
229
236
  async function processForward(forwardElem, session, username, originalContent, config, ctx, logger) {
230
237
  try {
231
238
  if (config.debug) {
232
- logger.info(`处理转发消息, elem: ${JSON.stringify(forwardElem, null, 2)}`);
239
+ logger.info(`处理转发消息, elem: ${JSON.stringify(forwardElem.attrs, null, 2)}`);
233
240
  }
234
241
  // 获取转发消息 ID
235
242
  const forwardId = forwardElem.attrs?.id;
236
243
  if (!forwardId) {
237
244
  return null;
238
245
  }
246
+ // 转发消息内容
247
+ let textParts = [];
248
+ let images = [];
249
+ let apiSuccess = false;
239
250
  // 先尝试通过 OneBot API 获取转发消息内容
240
- let content = '';
241
251
  if (session.platform === 'onebot' && session.bot?.internal) {
242
252
  const internal = session.bot.internal;
243
253
  // 尝试多种 payload 格式
@@ -264,17 +274,49 @@ async function processForward(forwardElem, session, username, originalContent, c
264
274
  if (forwardData) {
265
275
  const messages = extractMessagesArray(forwardData);
266
276
  if (messages && Array.isArray(messages) && messages.length > 0) {
267
- content = messages.map((node) => {
268
- if (node.message) {
269
- return node.message
270
- .filter((m) => m.type === 'text')
271
- .map((m) => m.data?.text || '')
272
- .join('');
273
- }
274
- return '';
275
- }).join('\n');
277
+ apiSuccess = true;
276
278
  if (config.debug) {
277
- logger.info(`get_forward_msg成功获取内容, 长度: ${content.length}`);
279
+ logger.info(`转发消息节点数量: ${messages.length}`);
280
+ }
281
+ // 提取文本和图片
282
+ for (const node of messages) {
283
+ const msgArray = node.message || node.content || node.data;
284
+ if (Array.isArray(msgArray)) {
285
+ for (const m of msgArray) {
286
+ if (m.type === 'text') {
287
+ textParts.push(m.data?.text || m.text || '');
288
+ }
289
+ if (m.type === 'image') {
290
+ const url = m.data?.url || m.data?.file || m.url || m.file || '';
291
+ if (url) {
292
+ // 下载图片并计算哈希
293
+ let hash = null;
294
+ try {
295
+ hash = await (0, hash_1.downloadAndHashImage)(url, ctx);
296
+ if (hash && isAbnormalHash(hash)) {
297
+ if (config.debug) {
298
+ logger.warn(`转发消息图片哈希异常,跳过: ${hash}`);
299
+ }
300
+ hash = null;
301
+ }
302
+ }
303
+ catch (err) {
304
+ if (config.debug) {
305
+ logger.warn(`转发消息图片下载失败: ${url.slice(0, 50)}...`);
306
+ }
307
+ }
308
+ images.push({ url, hash });
309
+ }
310
+ }
311
+ }
312
+ }
313
+ // 兼容字符串格式
314
+ if (typeof node.content === 'string') {
315
+ textParts.push(node.content);
316
+ }
317
+ if (typeof node.text === 'string') {
318
+ textParts.push(node.text);
319
+ }
278
320
  }
279
321
  break;
280
322
  }
@@ -285,35 +327,44 @@ async function processForward(forwardElem, session, username, originalContent, c
285
327
  }
286
328
  }
287
329
  }
288
- // 如果 API 获取失败,直接用 forwardId 作为去重标识
289
- // 同一内容的转发消息在不同用户发送时应该有相同的 ID
290
- const hashSource = content || forwardId;
291
- if (config.debug) {
292
- logger.info(`转发消息去重标识: ${hashSource.slice(0, 50)} (来源: ${content ? 'API内容' : 'forwardId'})`);
293
- }
294
- const truncated = hashSource.slice(0, config.forwardContentMaxLength);
295
- const hash = (0, hash_1.calculateStringHash)(truncated);
330
+ // 提取有效图片哈希列表
331
+ const imageHashes = images.filter(img => img.hash !== null).map(img => img.hash);
332
+ const failedImages = images.filter(img => img.hash === null).length;
333
+ // 计算文本哈希
334
+ const textContent = textParts.join('\n').trim();
335
+ const textToHash = textContent.slice(0, config.forwardContentMaxLength);
336
+ const textHash = textContent ? (0, hash_1.calculateStringHash)(textToHash) : (0, hash_1.calculateStringHash)(forwardId);
296
337
  if (config.debug) {
297
- logger.info(`转发消息哈希: ${hash}`);
338
+ logger.info(`转发消息: 文本长度=${textToHash.length}, 图片=${imageHashes.length}, 失败=${failedImages}, API成功=${apiSuccess}`);
298
339
  }
299
340
  const guildId = session.guildId;
300
- const duplicate = await (0, database_1.findDuplicate)(ctx, guildId, 'forward', hash);
341
+ // 使用新的比较函数查询重复消息
342
+ const duplicate = await (0, database_1.compareForwardMessages)(ctx, guildId, textHash, imageHashes, config.forwardImageMatchMode, config.forwardImageSimilarityThreshold);
301
343
  if (duplicate) {
302
344
  if (config.debug) {
303
345
  logger.info(`发现重复转发消息`);
304
346
  }
305
347
  return duplicate;
306
348
  }
349
+ // 保存记录
350
+ const extraInfo = {
351
+ forwardId,
352
+ preview: textToHash.slice(0, 100),
353
+ textHash,
354
+ imageHashes,
355
+ imageCount: images.length,
356
+ failedImages
357
+ };
307
358
  await (0, database_1.saveRecord)(ctx, {
308
359
  guildId: session.guildId,
309
360
  userId: session.userId,
310
361
  username,
311
362
  timestamp: Date.now(),
312
363
  contentType: 'forward',
313
- contentHash: hash,
364
+ contentHash: textHash,
314
365
  originalMessageId: session.messageId,
315
- originalContent: truncated.slice(0, 100),
316
- extraInfo: JSON.stringify({ forwardId, preview: truncated.slice(0, 100) })
366
+ originalContent: textToHash.slice(0, 100),
367
+ extraInfo: JSON.stringify(extraInfo)
317
368
  });
318
369
  return null;
319
370
  }
@@ -435,3 +486,31 @@ function getRandomSticker(baseDir, stickerDir) {
435
486
  }
436
487
  return null;
437
488
  }
489
+ /**
490
+ * 检测哈希是否异常(如几乎全0或全1)
491
+ * 异常哈希会导致误判,应跳过处理
492
+ */
493
+ function isAbnormalHash(hash) {
494
+ // pHash 通常是64位十六进制或二进制
495
+ // 统计0和1的比例,如果比例极端则认为异常
496
+ // 如果是二进制格式(64位)
497
+ if (hash.length === 64 && /^[01]+$/.test(hash)) {
498
+ const zeros = hash.split('0').length - 1;
499
+ const ones = hash.split('1').length - 1;
500
+ // 如果超过90%是同一个值,认为异常
501
+ if (zeros >= 58 || ones >= 58) {
502
+ return true;
503
+ }
504
+ }
505
+ // 如果是十六进制格式
506
+ if (/^[0-9a-fA-F]+$/.test(hash)) {
507
+ // 检查是否几乎全是0或几乎全是f
508
+ const nonZeroCount = hash.replace(/0/gi, '').length;
509
+ const nonFCount = hash.replace(/f/gi, '').length;
510
+ // 如果超过90%是同一个值
511
+ if (nonZeroCount <= hash.length * 0.1 || nonFCount <= hash.length * 0.1) {
512
+ return true;
513
+ }
514
+ }
515
+ return false;
516
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "koishi-plugin-message-dedup",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "description": "消息去重插件,检测群内重复的图片、链接、聊天记录",
5
5
  "main": "lib/index.js",
6
6
  "typings": "lib/index.d.ts",