koishi-plugin-message-dedup 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  Koishi 消息去重插件,检测群内重复的图片、链接、聊天记录。
4
4
 
5
- ![](assets/dup-1.jpg)
5
+ <p align="center">
6
+ <img src="assets/dup-1.jpg" width="200">
7
+ </p>
6
8
 
7
9
  ## 功能
8
10
 
package/assets/dup-1.jpg CHANGED
Binary file
package/lib/config.d.ts CHANGED
@@ -6,6 +6,8 @@ export interface Config {
6
6
  imageSimilarityThreshold: number;
7
7
  linkExactMatch: boolean;
8
8
  forwardContentMaxLength: number;
9
+ forwardImageMatchMode: 'all' | 'majority';
10
+ forwardImageSimilarityThreshold: number;
9
11
  retentionDays: number;
10
12
  stickerDir: string;
11
13
  sendMethod: 'koishi' | 'onebot';
package/lib/config.js CHANGED
@@ -4,8 +4,8 @@ exports.Config = void 0;
4
4
  const koishi_1 = require("koishi");
5
5
  exports.Config = koishi_1.Schema.object({
6
6
  enableImage: koishi_1.Schema.boolean()
7
- .default(false)
8
- .description('启用图片去重(默认关闭,可能误判表情包)'),
7
+ .default(true)
8
+ .description('启用图片去重(自动排除表情包)'),
9
9
  enableLink: koishi_1.Schema.boolean()
10
10
  .default(true)
11
11
  .description('启用链接去重'),
@@ -25,6 +25,17 @@ exports.Config = koishi_1.Schema.object({
25
25
  .min(100)
26
26
  .max(2000)
27
27
  .description('转发消息内容摘要最大长度'),
28
+ forwardImageMatchMode: koishi_1.Schema.union([
29
+ koishi_1.Schema.const('all').description('全部匹配'),
30
+ koishi_1.Schema.const('majority').description('过半匹配')
31
+ ])
32
+ .default('all')
33
+ .description('转发消息图片匹配模式:全部匹配要求所有图片相同,过半匹配只需超半数图片相同'),
34
+ forwardImageSimilarityThreshold: koishi_1.Schema.number()
35
+ .default(10)
36
+ .min(0)
37
+ .max(32)
38
+ .description('转发消息图片相似度阈值(百分比,0为完全相同)'),
28
39
  retentionDays: koishi_1.Schema.number()
29
40
  .default(7)
30
41
  .min(1)
package/lib/database.d.ts CHANGED
@@ -16,7 +16,23 @@ export interface DedupRecord {
16
16
  originalContent: string;
17
17
  extraInfo: string;
18
18
  }
19
+ /**
20
+ * 转发消息额外信息结构
21
+ */
22
+ export interface ForwardExtraInfo {
23
+ forwardId: string;
24
+ preview: string;
25
+ textHash: string;
26
+ imageHashes: string[];
27
+ imageCount: number;
28
+ failedImages: number;
29
+ }
19
30
  export declare function extendDatabase(ctx: Context): void;
20
31
  export declare function findDuplicate(ctx: Context, guildId: string, contentType: 'image' | 'link' | 'forward', contentHash: string, imageThreshold?: number): Promise<DedupRecord | null>;
21
32
  export declare function saveRecord(ctx: Context, record: DedupRecord): Promise<void>;
22
33
  export declare function cleanupOldRecords(ctx: Context, retentionDays: number): Promise<void>;
34
+ /**
35
+ * 比较转发消息是否重复
36
+ * 使用文本哈希 + 图片哈希组合匹配
37
+ */
38
+ export declare function compareForwardMessages(ctx: Context, guildId: string, newTextHash: string, newImageHashes: string[], imageMatchMode: 'all' | 'majority', imageThreshold: number): Promise<DedupRecord | null>;
package/lib/database.js CHANGED
@@ -4,6 +4,7 @@ exports.extendDatabase = extendDatabase;
4
4
  exports.findDuplicate = findDuplicate;
5
5
  exports.saveRecord = saveRecord;
6
6
  exports.cleanupOldRecords = cleanupOldRecords;
7
+ exports.compareForwardMessages = compareForwardMessages;
7
8
  const hash_1 = require("./hash");
8
9
  function extendDatabase(ctx) {
9
10
  ctx.model.extend('message_dedup', {
@@ -23,10 +24,12 @@ function extendDatabase(ctx) {
23
24
  });
24
25
  }
25
26
  async function findDuplicate(ctx, guildId, contentType, contentHash, imageThreshold) {
26
- const records = await ctx.database.get('message_dedup', {
27
+ let records = await ctx.database.get('message_dedup', {
27
28
  guildId,
28
29
  contentType
29
30
  });
31
+ // 按时间戳升序排序,确保返回最早匹配的记录(原消息)
32
+ records = records.sort((a, b) => a.timestamp - b.timestamp);
30
33
  if (contentType === 'image' && imageThreshold !== undefined) {
31
34
  // 图片需要计算汉明距离
32
35
  // compareHashes返回0-1,0表示相同
@@ -60,3 +63,86 @@ async function cleanupOldRecords(ctx, retentionDays) {
60
63
  timestamp: { $lt: cutoffTime }
61
64
  });
62
65
  }
66
+ /**
67
+ * 比较转发消息是否重复
68
+ * 使用文本哈希 + 图片哈希组合匹配
69
+ */
70
+ async function compareForwardMessages(ctx, guildId, newTextHash, newImageHashes, imageMatchMode, imageThreshold) {
71
+ const records = await ctx.database.get('message_dedup', {
72
+ guildId,
73
+ contentType: 'forward'
74
+ });
75
+ // 按时间戳升序排序,确保返回最早匹配的记录(原消息)
76
+ const sortedRecords = records.sort((a, b) => a.timestamp - b.timestamp);
77
+ const thresholdRatio = imageThreshold / 100;
78
+ for (const record of sortedRecords) {
79
+ let extra;
80
+ try {
81
+ extra = JSON.parse(record.extraInfo);
82
+ }
83
+ catch {
84
+ // 兼容旧记录格式,跳过
85
+ continue;
86
+ }
87
+ // 1. 文本哈希必须匹配
88
+ if (extra.textHash !== newTextHash)
89
+ continue;
90
+ const newCount = newImageHashes.length;
91
+ const oldCount = extra.imageHashes?.length || 0;
92
+ // 2. 无图片情况:纯文本匹配
93
+ if (newCount === 0 && oldCount === 0) {
94
+ return record;
95
+ }
96
+ // 3. 单边无图片:不匹配
97
+ if (newCount === 0 || oldCount === 0) {
98
+ continue;
99
+ }
100
+ // 4. 全部匹配模式:数量必须相同
101
+ if (imageMatchMode === 'all' && newCount !== oldCount) {
102
+ continue;
103
+ }
104
+ // 5. 计算图片匹配数
105
+ const matchedCount = countImageMatches(newImageHashes, extra.imageHashes, thresholdRatio);
106
+ const totalCount = Math.max(newCount, oldCount);
107
+ // 6. 根据模式判断是否匹配
108
+ if (imageMatchMode === 'all') {
109
+ if (matchedCount === newCount && newCount === oldCount) {
110
+ return record;
111
+ }
112
+ }
113
+ else { // majority
114
+ if (matchedCount > totalCount / 2) {
115
+ return record;
116
+ }
117
+ }
118
+ }
119
+ return null;
120
+ }
121
+ /**
122
+ * 计算两个图片哈希列表的匹配数量
123
+ * 使用贪心算法:每个新图片找最相似的未匹配旧图片
124
+ */
125
+ function countImageMatches(hashes1, hashes2, threshold // 0-1 之间的阈值
126
+ ) {
127
+ let matchCount = 0;
128
+ const used = new Set();
129
+ for (const h1 of hashes1) {
130
+ for (let i = 0; i < hashes2.length; i++) {
131
+ if (used.has(i))
132
+ continue;
133
+ try {
134
+ const distance = (0, hash_1.calculateHashDistance)(h1, hashes2[i]);
135
+ if (distance <= threshold) {
136
+ matchCount++;
137
+ used.add(i);
138
+ break;
139
+ }
140
+ }
141
+ catch {
142
+ // 哈希格式错误,跳过
143
+ continue;
144
+ }
145
+ }
146
+ }
147
+ return matchCount;
148
+ }
package/lib/index.js CHANGED
@@ -94,10 +94,18 @@ async function processMessage(session, config, ctx, logger) {
94
94
  return null;
95
95
  const username = session.author?.nickname || session.author?.username || session.username || '未知用户';
96
96
  const originalContent = session.content || extractTextFromElements(elements) || '';
97
- // 1. 检查图片
97
+ // 1. 检查图片(排除表情包:subType=1)
98
98
  if (config.enableImage) {
99
99
  for (const elem of elements) {
100
100
  if (elem.type === 'img' && elem.attrs?.src) {
101
+ // 表情包 subType 为 1,跳过
102
+ const subType = elem.attrs['sub-type'] ?? elem.attrs.subType;
103
+ if (subType === 1 || subType === '1') {
104
+ if (config.debug) {
105
+ logger.info('跳过表情包');
106
+ }
107
+ continue;
108
+ }
101
109
  const duplicate = await processImage(elem.attrs.src, session, username, originalContent, config, ctx, logger);
102
110
  if (duplicate)
103
111
  return duplicate;
@@ -116,6 +124,9 @@ async function processMessage(session, config, ctx, logger) {
116
124
  }
117
125
  // 3. 检查转发消息
118
126
  if (config.enableForward) {
127
+ if (config.debug) {
128
+ logger.info(`检查转发消息, elements types: ${elements.map(e => e.type).join(', ')}`);
129
+ }
119
130
  for (const elem of elements) {
120
131
  if (elem.type === 'forward') {
121
132
  const duplicate = await processForward(elem, session, username, originalContent, config, ctx, logger);
@@ -149,6 +160,13 @@ async function processImage(imageUrl, session, username, originalContent, config
149
160
  }
150
161
  return null;
151
162
  }
163
+ // 检测异常哈希(如全0或几乎全0),跳过处理避免误判
164
+ if (isAbnormalHash(hash)) {
165
+ if (config.debug) {
166
+ logger.warn(`异常图片哈希,跳过: ${hash}`);
167
+ }
168
+ return null;
169
+ }
152
170
  if (config.debug) {
153
171
  logger.info(`图片哈希: ${hash}`);
154
172
  }
@@ -217,33 +235,136 @@ async function processLink(url, session, username, originalContent, config, ctx,
217
235
  }
218
236
  async function processForward(forwardElem, session, username, originalContent, config, ctx, logger) {
219
237
  try {
220
- const content = extractForwardContent(forwardElem);
221
- if (!content || content.length < 10) {
238
+ if (config.debug) {
239
+ logger.info(`处理转发消息, elem: ${JSON.stringify(forwardElem.attrs, null, 2)}`);
240
+ }
241
+ // 获取转发消息 ID
242
+ const forwardId = forwardElem.attrs?.id;
243
+ if (!forwardId) {
222
244
  return null;
223
245
  }
224
- const truncated = content.slice(0, config.forwardContentMaxLength);
225
- const hash = (0, hash_1.calculateStringHash)(truncated);
246
+ // 转发消息内容
247
+ let textParts = [];
248
+ let images = [];
249
+ let apiSuccess = false;
250
+ // 先尝试通过 OneBot API 获取转发消息内容
251
+ if (session.platform === 'onebot' && session.bot?.internal) {
252
+ const internal = session.bot.internal;
253
+ // 尝试多种 payload 格式
254
+ const payloads = [
255
+ { message_id: forwardId },
256
+ { id: forwardId },
257
+ ];
258
+ // 尝试多种 API 调用方式
259
+ const callApi = async (action, params) => {
260
+ if (typeof internal._get === 'function') {
261
+ return await internal._get(action, params);
262
+ }
263
+ if (typeof internal.request === 'function') {
264
+ return await internal.request(action, params);
265
+ }
266
+ if (typeof internal.callAction === 'function') {
267
+ return await internal.callAction(action, params);
268
+ }
269
+ return null;
270
+ };
271
+ for (const payload of payloads) {
272
+ try {
273
+ const forwardData = await callApi('get_forward_msg', payload);
274
+ if (forwardData) {
275
+ const messages = extractMessagesArray(forwardData);
276
+ if (messages && Array.isArray(messages) && messages.length > 0) {
277
+ apiSuccess = true;
278
+ if (config.debug) {
279
+ logger.info(`转发消息节点数量: ${messages.length}`);
280
+ }
281
+ // 提取文本和图片
282
+ for (const node of messages) {
283
+ const msgArray = node.message || node.content || node.data;
284
+ if (Array.isArray(msgArray)) {
285
+ for (const m of msgArray) {
286
+ if (m.type === 'text') {
287
+ textParts.push(m.data?.text || m.text || '');
288
+ }
289
+ if (m.type === 'image') {
290
+ const url = m.data?.url || m.data?.file || m.url || m.file || '';
291
+ if (url) {
292
+ // 下载图片并计算哈希
293
+ let hash = null;
294
+ try {
295
+ hash = await (0, hash_1.downloadAndHashImage)(url, ctx);
296
+ if (hash && isAbnormalHash(hash)) {
297
+ if (config.debug) {
298
+ logger.warn(`转发消息图片哈希异常,跳过: ${hash}`);
299
+ }
300
+ hash = null;
301
+ }
302
+ }
303
+ catch (err) {
304
+ if (config.debug) {
305
+ logger.warn(`转发消息图片下载失败: ${url.slice(0, 50)}...`);
306
+ }
307
+ }
308
+ images.push({ url, hash });
309
+ }
310
+ }
311
+ }
312
+ }
313
+ // 兼容字符串格式
314
+ if (typeof node.content === 'string') {
315
+ textParts.push(node.content);
316
+ }
317
+ if (typeof node.text === 'string') {
318
+ textParts.push(node.text);
319
+ }
320
+ }
321
+ break;
322
+ }
323
+ }
324
+ }
325
+ catch (err) {
326
+ // API 调用失败,继续尝试
327
+ }
328
+ }
329
+ }
330
+ // 提取有效图片哈希列表
331
+ const imageHashes = images.filter(img => img.hash !== null).map(img => img.hash);
332
+ const failedImages = images.filter(img => img.hash === null).length;
333
+ // 计算文本哈希
334
+ const textContent = textParts.join('\n').trim();
335
+ const textToHash = textContent.slice(0, config.forwardContentMaxLength);
336
+ const textHash = textContent ? (0, hash_1.calculateStringHash)(textToHash) : (0, hash_1.calculateStringHash)(forwardId);
226
337
  if (config.debug) {
227
- logger.info(`转发消息哈希: ${hash}, 内容长度: ${content.length}`);
338
+ logger.info(`转发消息: 文本长度=${textToHash.length}, 图片=${imageHashes.length}, 失败=${failedImages}, API成功=${apiSuccess}`);
228
339
  }
229
340
  const guildId = session.guildId;
230
- const duplicate = await (0, database_1.findDuplicate)(ctx, guildId, 'forward', hash);
341
+ // 使用新的比较函数查询重复消息
342
+ const duplicate = await (0, database_1.compareForwardMessages)(ctx, guildId, textHash, imageHashes, config.forwardImageMatchMode, config.forwardImageSimilarityThreshold);
231
343
  if (duplicate) {
232
344
  if (config.debug) {
233
345
  logger.info(`发现重复转发消息`);
234
346
  }
235
347
  return duplicate;
236
348
  }
349
+ // 保存记录
350
+ const extraInfo = {
351
+ forwardId,
352
+ preview: textToHash.slice(0, 100),
353
+ textHash,
354
+ imageHashes,
355
+ imageCount: images.length,
356
+ failedImages
357
+ };
237
358
  await (0, database_1.saveRecord)(ctx, {
238
359
  guildId: session.guildId,
239
360
  userId: session.userId,
240
361
  username,
241
362
  timestamp: Date.now(),
242
363
  contentType: 'forward',
243
- contentHash: hash,
364
+ contentHash: textHash,
244
365
  originalMessageId: session.messageId,
245
- originalContent: truncated.slice(0, 100),
246
- extraInfo: JSON.stringify({ preview: truncated.slice(0, 100) })
366
+ originalContent: textToHash.slice(0, 100),
367
+ extraInfo: JSON.stringify(extraInfo)
247
368
  });
248
369
  return null;
249
370
  }
@@ -269,6 +390,28 @@ function extractForwardContent(elem) {
269
390
  }
270
391
  return content;
271
392
  }
393
+ /**
394
+ * 从多种可能的数据结构中提取 messages 数组
395
+ * 参考 chatluna-forward-msg 的实现
396
+ */
397
+ function extractMessagesArray(data) {
398
+ const candidates = [
399
+ data,
400
+ data?.messages,
401
+ data?.data,
402
+ data?.result,
403
+ data?.response,
404
+ data?.data?.messages,
405
+ data?.response?.messages,
406
+ data?.envelope?.result?.messages,
407
+ ];
408
+ for (const item of candidates) {
409
+ if (Array.isArray(item)) {
410
+ return item;
411
+ }
412
+ }
413
+ return null;
414
+ }
272
415
  async function sendDuplicateWarning(session, duplicate, config, ctx) {
273
416
  const date = new Date(duplicate.timestamp);
274
417
  const dateStr = date.toLocaleString('zh-CN', {
@@ -343,3 +486,31 @@ function getRandomSticker(baseDir, stickerDir) {
343
486
  }
344
487
  return null;
345
488
  }
489
+ /**
490
+ * 检测哈希是否异常(如几乎全0或全1)
491
+ * 异常哈希会导致误判,应跳过处理
492
+ */
493
+ function isAbnormalHash(hash) {
494
+ // pHash 通常是64位十六进制或二进制
495
+ // 统计0和1的比例,如果比例极端则认为异常
496
+ // 如果是二进制格式(64位)
497
+ if (hash.length === 64 && /^[01]+$/.test(hash)) {
498
+ const zeros = hash.split('0').length - 1;
499
+ const ones = hash.split('1').length - 1;
500
+ // 如果超过90%是同一个值,认为异常
501
+ if (zeros >= 58 || ones >= 58) {
502
+ return true;
503
+ }
504
+ }
505
+ // 如果是十六进制格式
506
+ if (/^[0-9a-fA-F]+$/.test(hash)) {
507
+ // 检查是否几乎全是0或几乎全是f
508
+ const nonZeroCount = hash.replace(/0/gi, '').length;
509
+ const nonFCount = hash.replace(/f/gi, '').length;
510
+ // 如果超过90%是同一个值
511
+ if (nonZeroCount <= hash.length * 0.1 || nonFCount <= hash.length * 0.1) {
512
+ return true;
513
+ }
514
+ }
515
+ return false;
516
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "koishi-plugin-message-dedup",
3
- "version": "0.0.1",
3
+ "version": "0.0.3",
4
4
  "description": "消息去重插件,检测群内重复的图片、链接、聊天记录",
5
5
  "main": "lib/index.js",
6
6
  "typings": "lib/index.d.ts",