@yaoyuanchao/dingtalk 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/monitor.ts CHANGED
@@ -1,1382 +1,1382 @@
1
- import type { DingTalkRobotMessage, ResolvedDingTalkAccount, ExtractedMessage } from "./types.js";
2
- import { sendViaSessionWebhook, sendMarkdownViaSessionWebhook, sendDingTalkRestMessage, batchGetUserInfo, downloadPicture, downloadMediaFile, cleanupOldMedia, uploadMediaFile, sendFileMessage, textToMarkdownFile, sendTypingIndicator } from "./api.js";
3
- import { getDingTalkRuntime } from "./runtime.js";
4
-
5
- // ============================================================================
6
- // Message Aggregation Buffer
7
- // ============================================================================
8
- // When users share links via DingTalk's "share link" feature, the message may
9
- // arrive as multiple separate messages (text + URL). This buffer aggregates
10
- // messages from the same sender within a short time window.
11
-
12
- interface BufferedMessage {
13
- messages: Array<{ text: string; timestamp: number; mediaPath?: string; mediaType?: string }>;
14
- timer: ReturnType<typeof setTimeout>;
15
- ctx: DingTalkMonitorContext;
16
- msg: DingTalkRobotMessage; // Keep latest msg for reply target
17
- replyTarget: any;
18
- sessionKey: string;
19
- isDm: boolean;
20
- senderId: string;
21
- senderName: string;
22
- conversationId: string;
23
- }
24
-
25
- const messageBuffer = new Map<string, BufferedMessage>();
26
- const AGGREGATION_DELAY_MS = 2000; // 2 seconds - balance between UX and catching split messages
27
-
28
- function getBufferKey(msg: DingTalkRobotMessage, accountId: string): string {
29
- return `${accountId}:${msg.conversationId}:${msg.senderId || msg.senderStaffId}`;
30
- }
31
-
32
- // ============================================================================
33
-
34
- export interface DingTalkMonitorContext {
35
- account: ResolvedDingTalkAccount;
36
- cfg: any;
37
- abortSignal: AbortSignal;
38
- log?: any;
39
- setStatus?: (update: Record<string, unknown>) => void;
40
- }
41
-
42
- export async function startDingTalkMonitor(ctx: DingTalkMonitorContext): Promise<void> {
43
- const { account, cfg, abortSignal, log, setStatus } = ctx;
44
-
45
- if (!account.clientId || !account.clientSecret) {
46
- throw new Error("DingTalk clientId/clientSecret not configured");
47
- }
48
-
49
- // Clean up old pictures on startup
50
- cleanupOldMedia();
51
-
52
- // Schedule periodic cleanup every hour
53
- const cleanupInterval = setInterval(() => {
54
- cleanupOldMedia();
55
- }, 60 * 60 * 1000); // 1 hour
56
-
57
- // Clean up on abort (only if abortSignal is provided)
58
- if (abortSignal) {
59
- abortSignal.addEventListener('abort', () => {
60
- clearInterval(cleanupInterval);
61
- });
62
- }
63
-
64
- let DWClient: any;
65
- let TOPIC_ROBOT: any;
66
- try {
67
- const mod = await import("dingtalk-stream");
68
- DWClient = mod.DWClient || mod.default?.DWClient || mod.default;
69
- TOPIC_ROBOT = mod.TOPIC_ROBOT || mod.default?.TOPIC_ROBOT || "/v1.0/im/bot/messages/get";
70
- } catch (err) {
71
- throw new Error("Failed to import dingtalk-stream SDK: " + err);
72
- }
73
-
74
- if (!DWClient) throw new Error("DWClient not found in dingtalk-stream");
75
-
76
- log?.info?.("[dingtalk:" + account.accountId + "] Starting Stream...");
77
-
78
- const client = new DWClient({
79
- clientId: account.clientId,
80
- clientSecret: account.clientSecret,
81
- });
82
-
83
- client.registerCallbackListener(TOPIC_ROBOT, async (downstream: any) => {
84
- // Immediately ACK to prevent DingTalk from retrying (60s timeout)
85
- // SDK method is socketCallBackResponse, not socketResponse
86
- try {
87
- client.socketCallBackResponse(downstream.headers.messageId, { status: 'SUCCESS' });
88
- } catch (_) { /* best-effort ACK */ }
89
-
90
- try {
91
- const data: DingTalkRobotMessage = typeof downstream.data === "string"
92
- ? JSON.parse(downstream.data) : downstream.data;
93
- setStatus?.({ lastInboundAt: Date.now() });
94
- await processInboundMessage(data, ctx);
95
- } catch (err) {
96
- log?.info?.("[dingtalk] Message error: " + err);
97
- }
98
- return { status: "SUCCESS", message: "OK" };
99
- });
100
-
101
- client.registerAllEventListener((msg: any) => {
102
- return { status: "SUCCESS", message: "OK" };
103
- });
104
-
105
- const onAbort = () => {
106
- try { client.disconnect?.(); } catch {}
107
- setStatus?.({ running: false, lastStopAt: Date.now() });
108
- };
109
- if (abortSignal) {
110
- abortSignal.addEventListener("abort", onAbort, { once: true });
111
- }
112
-
113
- await client.connect();
114
- log?.info?.("[dingtalk:" + account.accountId + "] Stream connected");
115
- setStatus?.({ running: true, lastStartAt: Date.now() });
116
- }
117
-
118
- /**
119
- * Extract message content from DingTalk message into a structured format.
120
- * Handles: text, richText, picture, audio, video, file.
121
- */
122
- async function extractMessageContent(
123
- msg: DingTalkRobotMessage,
124
- account: ResolvedDingTalkAccount,
125
- log?: any,
126
- ): Promise<ExtractedMessage> {
127
- const msgtype = msg.msgtype || 'text';
128
- const content = msg.content;
129
-
130
- switch (msgtype) {
131
- case 'text': {
132
- return {
133
- text: msg.text?.content?.trim() ?? '',
134
- messageType: 'text',
135
- };
136
- }
137
-
138
- case 'richText': {
139
- const result = await extractRichTextContent(msg, account, log);
140
- return { ...result, messageType: 'richText' };
141
- }
142
-
143
- case 'picture': {
144
- return extractPictureContent(msg, log);
145
- }
146
-
147
- case 'audio': {
148
- // DingTalk provides speech recognition result in content.recognition
149
- const recognition = content?.recognition;
150
- const downloadCode = content?.downloadCode;
151
- log?.info?.("[dingtalk] Audio message - recognition: " + (recognition || '(none)'));
152
- return {
153
- text: recognition || '[语音消息]',
154
- mediaDownloadCode: downloadCode,
155
- mediaType: 'audio',
156
- messageType: 'audio',
157
- };
158
- }
159
-
160
- case 'video': {
161
- const downloadCode = content?.downloadCode;
162
- log?.info?.("[dingtalk] Video message - downloadCode: " + (downloadCode || '(none)'));
163
- return {
164
- text: '[视频]',
165
- mediaDownloadCode: downloadCode,
166
- mediaType: 'video',
167
- messageType: 'video',
168
- };
169
- }
170
-
171
- case 'file': {
172
- const downloadCode = content?.downloadCode;
173
- const fileName = content?.fileName || '未知文件';
174
- log?.info?.("[dingtalk] File message - fileName: " + fileName);
175
- return {
176
- text: `[文件: ${fileName}]`,
177
- mediaDownloadCode: downloadCode,
178
- mediaType: 'file',
179
- mediaFileName: fileName,
180
- messageType: 'file',
181
- };
182
- }
183
-
184
- case 'link': {
185
- // Link card message - contains title, text, messageUrl, and optional picUrl
186
- // Structure: msg.link = { title, text, messageUrl, picUrl }
187
- const linkContent = msg.link || content;
188
- log?.info?.("[dingtalk] link message received: " + JSON.stringify(linkContent));
189
-
190
- if (linkContent) {
191
- const title = linkContent.title || '';
192
- const text = linkContent.text || '';
193
- const messageUrl = linkContent.messageUrl || '';
194
- const picUrl = linkContent.picUrl || '';
195
-
196
- // Combine all parts into a readable format
197
- const parts: string[] = [];
198
- if (title) parts.push(`[链接] ${title}`);
199
- if (text) parts.push(text);
200
- if (messageUrl) parts.push(`链接: ${messageUrl}`);
201
- if (picUrl) parts.push(`配图: ${picUrl}`);
202
-
203
- const resultText = parts.join('\n') || '[链接卡片]';
204
- log?.info?.("[dingtalk] Extracted link message: " + resultText.slice(0, 100));
205
-
206
- return {
207
- text: resultText,
208
- messageType: 'link',
209
- };
210
- }
211
-
212
- return {
213
- text: '[链接卡片]',
214
- messageType: 'link',
215
- };
216
- }
217
-
218
- case 'chatRecord': {
219
- // Chat record collection - contains multiple forwarded messages
220
- // Structure: content.chatRecord is a JSON string containing an array of messages
221
- const chatRecordContent = content || (msg as any).chatRecord;
222
- log?.info?.("[dingtalk] chatRecord message received");
223
-
224
- try {
225
- // chatRecord is a JSON string, need to parse it
226
- const chatRecordStr = chatRecordContent?.chatRecord;
227
- if (chatRecordStr && typeof chatRecordStr === 'string') {
228
- const records = JSON.parse(chatRecordStr) as Array<{
229
- senderId?: string;
230
- senderStaffId?: string; // Non-encrypted userId (available when app is published)
231
- senderNick?: string;
232
- msgType?: string;
233
- content?: string;
234
- downloadCode?: string; // For media messages (picture, video, file)
235
- createAt?: number;
236
- }>;
237
-
238
- if (Array.isArray(records) && records.length > 0) {
239
- // Debug: log first record structure with all keys
240
- const firstRecord = records[0];
241
- log?.info?.("[dingtalk] chatRecord first record keys: " + Object.keys(firstRecord).join(', '));
242
- log?.info?.("[dingtalk] chatRecord first record: " + JSON.stringify(firstRecord));
243
-
244
- // Collect unique userIds for batch lookup
245
- // Prefer senderStaffId (non-encrypted) over senderId
246
- const senderIds = [...new Set(
247
- records
248
- .map(r => r.senderStaffId || (r.senderId && !r.senderId.startsWith('$:') ? r.senderId : null))
249
- .filter((id): id is string => !!id)
250
- )].slice(0, 10); // Limit to 10 users
251
-
252
- log?.info?.("[dingtalk] chatRecord senderIds for lookup: " + JSON.stringify(senderIds));
253
-
254
- // Try to resolve sender names via API
255
- let senderNameMap = new Map<string, string>();
256
- if (senderIds.length > 0 && account.clientId && account.clientSecret) {
257
- try {
258
- senderNameMap = await batchGetUserInfo(account.clientId, account.clientSecret, senderIds, 3000);
259
- log?.info?.("[dingtalk] Resolved " + senderNameMap.size + " sender names from API");
260
- } catch (err) {
261
- log?.info?.("[dingtalk] Failed to resolve sender names: " + err);
262
- }
263
- }
264
-
265
- // Process records with async image downloads
266
- const formattedRecords = await Promise.all(records.map(async (record, idx) => {
267
- // Try: senderNick > API resolved name (via staffId or senderId) > fallback
268
- let sender = record.senderNick;
269
- if (!sender) {
270
- // Try to get name from API lookup
271
- const lookupId = record.senderStaffId || record.senderId;
272
- if (lookupId) {
273
- sender = senderNameMap.get(lookupId);
274
- }
275
- // Fallback for encrypted IDs
276
- if (!sender && record.senderId?.startsWith('$:')) {
277
- sender = '成员';
278
- }
279
- }
280
- sender = sender || '未知';
281
-
282
- // Handle different message types in chatRecord
283
- let msgContent: string;
284
- switch (record.msgType) {
285
- case 'text':
286
- msgContent = record.content || '[空消息]';
287
- break;
288
- case 'picture':
289
- case 'image':
290
- // Try to download the image
291
- if (record.downloadCode && account.clientId && account.clientSecret) {
292
- try {
293
- const robotCode = account.robotCode || account.clientId;
294
- const pictureResult = await downloadPicture(
295
- account.clientId, account.clientSecret, robotCode!, record.downloadCode,
296
- );
297
- if (pictureResult.filePath) {
298
- msgContent = `[图片: ${pictureResult.filePath}]`;
299
- log?.info?.("[dingtalk] Downloaded chatRecord picture: " + pictureResult.filePath);
300
- } else if (pictureResult.error) {
301
- msgContent = `[图片下载失败: ${pictureResult.error}]`;
302
- } else {
303
- msgContent = '[图片]';
304
- }
305
- } catch (err) {
306
- log?.info?.("[dingtalk] Error downloading chatRecord picture: " + err);
307
- msgContent = '[图片]';
308
- }
309
- } else {
310
- msgContent = '[图片]';
311
- }
312
- break;
313
- case 'video':
314
- msgContent = '[视频]';
315
- break;
316
- case 'file':
317
- msgContent = '[文件]';
318
- break;
319
- case 'voice':
320
- case 'audio':
321
- msgContent = '[语音]';
322
- break;
323
- case 'richText':
324
- msgContent = record.content || '[富文本消息]';
325
- break;
326
- case 'markdown':
327
- msgContent = record.content || '[Markdown消息]';
328
- break;
329
- default:
330
- msgContent = record.content || `[${record.msgType || '未知'}消息]`;
331
- }
332
- const time = record.createAt ? new Date(record.createAt).toLocaleString('zh-CN') : '';
333
- return `[${idx + 1}] ${sender}${time ? ` (${time})` : ''}: ${msgContent}`;
334
- }));
335
- const text = `[聊天记录合集 - ${records.length}条消息]\n${formattedRecords.join('\n')}`;
336
- log?.info?.("[dingtalk] Parsed chatRecord with " + records.length + " messages");
337
- return {
338
- text,
339
- messageType: 'chatRecord',
340
- };
341
- }
342
- }
343
- } catch (e) {
344
- log?.info?.("[dingtalk] Failed to parse chatRecord: " + (e instanceof Error ? e.message : String(e)));
345
- }
346
-
347
- // Fallback if structure is different or parsing failed
348
- log?.info?.("[dingtalk] chatRecord structure not recognized, full msg: " + JSON.stringify(msg).slice(0, 500));
349
- return {
350
- text: '[聊天记录合集]',
351
- messageType: 'chatRecord',
352
- };
353
- }
354
-
355
- default: {
356
- // Fallback: try text.content for unknown message types
357
- const text = msg.text?.content?.trim() || '';
358
- if (!text) {
359
- log?.info?.("[dingtalk] Unknown msgtype: " + msgtype + ", no text content found");
360
- // Log full message structure for debugging unknown types
361
- log?.info?.("[dingtalk] Unknown msgtype full structure: " + JSON.stringify(msg).slice(0, 1000));
362
- }
363
- return {
364
- text: text || `[${msgtype}消息]`,
365
- messageType: msgtype,
366
- };
367
- }
368
- }
369
- }
370
-
371
- /**
372
- * Extract content from richText messages.
373
- * Preserves all existing edge-case handling for DingTalk's varied richText formats.
374
- */
375
- async function extractRichTextContent(
376
- msg: DingTalkRobotMessage,
377
- account: ResolvedDingTalkAccount,
378
- log?: any,
379
- ): Promise<{ text: string; mediaDownloadCode?: string; mediaType?: 'image' }> {
380
- // First try: msg.text.content (DingTalk sometimes also provides text for richText)
381
- let text = msg.text?.content?.trim() ?? '';
382
-
383
- // Second try: msg.richText as various formats
384
- if (!text && msg.richText) {
385
- try {
386
- const richTextStr = typeof msg.richText === 'string'
387
- ? msg.richText
388
- : JSON.stringify(msg.richText);
389
- log?.info?.("[dingtalk] Received richText message (full): " + richTextStr);
390
-
391
- const rt = msg.richText as any;
392
-
393
- if (typeof msg.richText === 'string') {
394
- text = msg.richText.trim();
395
- } else if (rt) {
396
- text = rt.text?.trim()
397
- || rt.content?.trim()
398
- || rt.richText?.trim()
399
- || '';
400
-
401
- if (!text && Array.isArray(rt.richText)) {
402
- const textParts: string[] = [];
403
- for (const item of rt.richText) {
404
- if (item.text) {
405
- textParts.push(item.text);
406
- } else if (item.content) {
407
- textParts.push(item.content);
408
- }
409
- }
410
- text = textParts.join('').trim();
411
- }
412
- }
413
-
414
- if (text) {
415
- log?.info?.("[dingtalk] Extracted from richText: " + text.slice(0, 100));
416
- }
417
- } catch (err) {
418
- log?.info?.("[dingtalk] Failed to parse richText: " + err);
419
- }
420
- }
421
-
422
- // Third try: msg.content.richText array (when msgtype === 'richText')
423
- if (!text) {
424
- const content = msg.content;
425
- if (content?.richText && Array.isArray(content.richText)) {
426
- log?.info?.("[dingtalk] RichText message - msg.content: " + JSON.stringify(content).substring(0, 200));
427
- const parts: string[] = [];
428
-
429
- for (const item of content.richText) {
430
- if (item.msgType === "text" && item.content) {
431
- parts.push(item.content);
432
- } else if (item.text) {
433
- // DingTalk sometimes sends richText items as {text: "..."} without msgType wrapper
434
- parts.push(item.text);
435
- } else if ((item.msgType === "picture" || item.pictureDownloadCode || item.downloadCode) && (item.downloadCode || item.pictureDownloadCode)) {
436
- const downloadCode = item.downloadCode || item.pictureDownloadCode;
437
- try {
438
- const robotCode = account.robotCode || account.clientId;
439
- const pictureResult = await downloadPicture(
440
- account.clientId!, account.clientSecret!, robotCode!, downloadCode,
441
- );
442
- if (pictureResult.filePath) {
443
- parts.push(`[图片: ${pictureResult.filePath}]`);
444
- log?.info?.("[dingtalk] Downloaded picture from richText: " + pictureResult.filePath);
445
- } else if (pictureResult.error) {
446
- parts.push(`[图片下载失败: ${pictureResult.error}]`);
447
- } else {
448
- parts.push("[图片]");
449
- }
450
- } catch (err) {
451
- parts.push(`[图片下载出错: ${err}]`);
452
- log?.warn?.("[dingtalk] Error downloading picture from richText: " + err);
453
- }
454
- }
455
- }
456
-
457
- text = parts.join('');
458
- if (text) {
459
- log?.info?.("[dingtalk] Extracted from msg.content.richText: " + text.substring(0, 100));
460
- }
461
- }
462
- }
463
-
464
- return { text };
465
- }
466
-
467
- /**
468
- * Extract content from picture messages, returning the download code for media pipeline.
469
- */
470
- function extractPictureContent(msg: DingTalkRobotMessage, log?: any): ExtractedMessage {
471
- log?.info?.("[dingtalk] Picture message - msg.picture: " + JSON.stringify(msg.picture));
472
- log?.info?.("[dingtalk] Picture message - msg.content: " + JSON.stringify(msg.content));
473
-
474
- const content = msg.content;
475
- let downloadCode: string | undefined;
476
-
477
- if (msg.picture?.downloadCode) {
478
- downloadCode = msg.picture.downloadCode;
479
- } else if (content?.downloadCode) {
480
- downloadCode = content.downloadCode;
481
- }
482
-
483
- if (downloadCode) {
484
- log?.info?.("[dingtalk] Picture detected, downloadCode: " + downloadCode);
485
- return {
486
- text: '[用户发送了图片]',
487
- mediaDownloadCode: downloadCode,
488
- mediaType: 'image',
489
- messageType: 'picture',
490
- };
491
- }
492
-
493
- log?.info?.("[dingtalk] Picture msgtype but no downloadCode found");
494
- return {
495
- text: '[用户发送了图片(无法获取下载码)]',
496
- messageType: 'picture',
497
- };
498
- }
499
-
500
- async function processInboundMessage(
501
- msg: DingTalkRobotMessage,
502
- ctx: DingTalkMonitorContext,
503
- ): Promise<void> {
504
- const { account, cfg, log, setStatus } = ctx;
505
- const runtime = getDingTalkRuntime();
506
-
507
- const isDm = msg.conversationType === "1";
508
- const isGroup = msg.conversationType === "2";
509
-
510
- // Debug: log full message structure for debugging
511
- if (msg.msgtype === 'richText' || msg.picture || (msg.atUsers && msg.atUsers.length > 0)) {
512
- log?.info?.("[dingtalk-debug] Full message structure:");
513
- log?.info?.("[dingtalk-debug] msgtype: " + msg.msgtype);
514
- log?.info?.("[dingtalk-debug] text: " + JSON.stringify(msg.text));
515
- log?.info?.("[dingtalk-debug] richText: " + JSON.stringify(msg.richText));
516
- log?.info?.("[dingtalk-debug] picture: " + JSON.stringify(msg.picture));
517
- log?.info?.("[dingtalk-debug] atUsers: " + JSON.stringify(msg.atUsers));
518
- log?.info?.("[dingtalk-debug] RAW MESSAGE: " + JSON.stringify(msg).substring(0, 500));
519
- }
520
-
521
- // Extract message content using structured extractor
522
- const extracted = await extractMessageContent(msg, account, log);
523
-
524
- // Download media if present (picture/video/file — but skip audio when ASR text exists)
525
- let mediaPath: string | undefined;
526
- let mediaType: string | undefined;
527
-
528
- // For audio messages with successful ASR recognition, use the text directly
529
- // and skip downloading the .amr file (which would confuse the agent into
530
- // trying Whisper instead of reading the already-transcribed text).
531
- const skipMediaDownload = extracted.messageType === 'audio' && !!extracted.text;
532
-
533
- if (!skipMediaDownload && extracted.mediaDownloadCode && account.clientId && account.clientSecret) {
534
- const robotCode = account.robotCode || account.clientId;
535
- try {
536
- const result = await downloadMediaFile(
537
- account.clientId,
538
- account.clientSecret,
539
- robotCode,
540
- extracted.mediaDownloadCode,
541
- extracted.mediaType,
542
- );
543
- if (result.filePath) {
544
- mediaPath = result.filePath;
545
- mediaType = result.mimeType || extracted.mediaType;
546
- log?.info?.(`[dingtalk] Downloaded ${extracted.mediaType || 'media'}: ${result.filePath}`);
547
- } else if (result.error) {
548
- log?.warn?.(`[dingtalk] Media download failed: ${result.error}`);
549
- }
550
- } catch (err) {
551
- log?.warn?.(`[dingtalk] Media download error: ${err}`);
552
- }
553
- } else if (skipMediaDownload) {
554
- log?.info?.("[dingtalk] Audio ASR text available, skipping .amr download");
555
- }
556
-
557
- let rawBody = extracted.text;
558
-
559
- if (!rawBody && !mediaPath) {
560
- log?.info?.("[dingtalk] Empty message body after all attempts, skipping. msgtype=" + msg.msgtype);
561
- return;
562
- }
563
-
564
- // If we have media but no text, provide a placeholder
565
- if (!rawBody && mediaPath) {
566
- rawBody = `[${extracted.messageType}] 媒体文件已下载: ${mediaPath}`;
567
- }
568
-
569
- // Handle quoted/replied messages: extract the quoted content and prepend it
570
- if (msg.text && (msg.text as any).isReplyMsg) {
571
- log?.info?.("[dingtalk] Message is a reply, full text object: " + JSON.stringify(msg.text));
572
-
573
- if ((msg.text as any).repliedMsg) {
574
- try {
575
- const repliedMsg = (msg.text as any).repliedMsg;
576
- let quotedContent = "";
577
-
578
- // Extract quoted message content
579
- if (repliedMsg.content?.richText && Array.isArray(repliedMsg.content.richText)) {
580
- // richText format: array of {msgType, content} or {msgType, downloadCode}
581
- const parts: string[] = [];
582
-
583
- for (const item of repliedMsg.content.richText) {
584
- if (item.msgType === "text" && item.content) {
585
- parts.push(item.content);
586
- } else if (item.msgType === "picture" && item.downloadCode) {
587
- // Download the picture from quoted message
588
- try {
589
- const robotCode = account.robotCode || account.clientId;
590
- const pictureResult = await downloadPicture(
591
- account.clientId,
592
- account.clientSecret,
593
- robotCode,
594
- item.downloadCode,
595
- );
596
-
597
- if (pictureResult.filePath) {
598
- parts.push(`[图片: ${pictureResult.filePath}]`);
599
- log?.info?.("[dingtalk] Downloaded picture from quoted message: " + pictureResult.filePath);
600
- } else if (pictureResult.error) {
601
- parts.push(`[图片下载失败: ${pictureResult.error}]`);
602
- } else {
603
- parts.push("[图片]");
604
- }
605
- } catch (err) {
606
- parts.push(`[图片下载出错: ${err}]`);
607
- log?.warn?.("[dingtalk] Error downloading picture from quoted message: " + err);
608
- }
609
- }
610
- }
611
-
612
- quotedContent = parts.join("");
613
- } else if (repliedMsg.content?.text) {
614
- quotedContent = repliedMsg.content.text;
615
- } else if (typeof repliedMsg.content === "string") {
616
- quotedContent = repliedMsg.content;
617
- }
618
-
619
- if (quotedContent) {
620
- rawBody = `[引用回复: "${quotedContent.trim()}"]\n${rawBody}`;
621
- log?.info?.("[dingtalk] Added quoted message: " + quotedContent.slice(0, 50));
622
- } else {
623
- log?.info?.("[dingtalk] Reply message found but no content extracted, repliedMsg: " + JSON.stringify(repliedMsg));
624
- }
625
- } catch (err) {
626
- log?.info?.("[dingtalk] Failed to extract quoted message: " + err);
627
- }
628
- } else {
629
- log?.info?.("[dingtalk] Message marked as reply but no repliedMsg field found");
630
- }
631
- }
632
-
633
- // Handle @mentions: DingTalk removes @username from text.content
634
- // Query user info for mentioned users (those with staffId)
635
- if (msg.atUsers && msg.atUsers.length > 0) {
636
- log?.info?.("[dingtalk] Message has @mentions: " + JSON.stringify(msg.atUsers));
637
-
638
- // Filter users with staffId (exclude bots which don't have staffId)
639
- const userIds = msg.atUsers
640
- .filter(u => u.staffId)
641
- .map(u => u.staffId as string)
642
- .slice(0, 5); // Limit to 5 users to avoid too many API calls
643
-
644
- if (userIds.length > 0 && account.clientId && account.clientSecret) {
645
- try {
646
- // Batch query user info (3s timeout — needs token fetch + API call)
647
- const userInfoMap = await batchGetUserInfo(account.clientId, account.clientSecret, userIds, 3000);
648
-
649
- if (userInfoMap.size > 0) {
650
- // Build mention list: [@张三 @李四]
651
- const mentions = Array.from(userInfoMap.values()).map(name => `@${name}`).join(" ");
652
- rawBody = `[${mentions}] ${rawBody}`;
653
- log?.info?.("[dingtalk] Added user mentions: " + mentions);
654
- } else {
655
- // Fallback if no user info retrieved
656
- rawBody = `[有${msg.atUsers.length}人被@] ${rawBody}`;
657
- log?.info?.("[dingtalk] User info fetch failed, using count fallback");
658
- }
659
- } catch (err) {
660
- // Fallback on error
661
- rawBody = `[有${msg.atUsers.length}人被@] ${rawBody}`;
662
- log?.info?.("[dingtalk] Error fetching user info: " + err + ", using count fallback");
663
- }
664
- } else {
665
- // No staffId or credentials - use count fallback
666
- rawBody = `[有${msg.atUsers.length}人被@] ${rawBody}`;
667
- log?.info?.("[dingtalk] No staffId or credentials, using count fallback");
668
- }
669
- }
670
-
671
- const senderId = msg.senderStaffId || msg.senderId;
672
- const senderName = msg.senderNick || "";
673
- const conversationId = msg.conversationId;
674
-
675
- log?.info?.("[dingtalk] " + (isDm ? "DM" : "Group") + " from " + senderName + ": " + rawBody.slice(0, 50));
676
-
677
- // DM access control
678
- if (isDm) {
679
- const dmConfig = account.config.dm ?? {};
680
- if (dmConfig.enabled === false) return;
681
- const dmPolicy = dmConfig.policy ?? "pairing";
682
- if (dmPolicy === "disabled") return;
683
- if (dmPolicy !== "open") {
684
- const allowFrom = (dmConfig.allowFrom ?? []).map(String);
685
- if (!isSenderAllowed(senderId, allowFrom)) {
686
- log?.info?.("[dingtalk] DM denied for " + senderId);
687
- if (dmPolicy === "pairing" && msg.sessionWebhook) {
688
- await sendViaSessionWebhook(
689
- msg.sessionWebhook,
690
- "Access denied. Your staffId: " + senderId + "\nAsk admin to add you.",
691
- ).catch(() => {});
692
- }
693
- return;
694
- }
695
- }
696
- }
697
-
698
- // Group access control
699
- if (isGroup) {
700
- const groupPolicy = account.config.groupPolicy ?? "allowlist";
701
- if (groupPolicy === "disabled") return;
702
-
703
- // Check group whitelist
704
- if (groupPolicy === "allowlist") {
705
- const groupAllowlist = (account.config.groupAllowlist ?? []).map(String);
706
- if (groupAllowlist.length > 0 && !isGroupAllowed(conversationId, groupAllowlist)) {
707
- log?.info?.("[dingtalk] Group not in allowlist: " + conversationId);
708
- return;
709
- }
710
- }
711
-
712
- // Check @mention requirement
713
- const requireMention = account.config.requireMention !== false;
714
- if (requireMention && !msg.isInAtList) return;
715
- }
716
-
717
- const sessionKey = "dingtalk:" + account.accountId + ":" + (isDm ? "dm" : "group") + ":" + conversationId;
718
-
719
- const replyTarget = {
720
- sessionWebhook: msg.sessionWebhook,
721
- sessionWebhookExpiry: msg.sessionWebhookExpiredTime,
722
- conversationId,
723
- senderId,
724
- isDm,
725
- account,
726
- };
727
-
728
- // Check if message aggregation is enabled
729
- const aggregationEnabled = account.config.messageAggregation !== false;
730
- const aggregationDelayMs = account.config.messageAggregationDelayMs ?? AGGREGATION_DELAY_MS;
731
-
732
- if (aggregationEnabled) {
733
- // Buffer this message for aggregation
734
- await bufferMessageForAggregation({
735
- msg, ctx, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId,
736
- mediaPath, mediaType,
737
- });
738
- return; // Actual dispatch happens when timer fires
739
- }
740
-
741
- // No aggregation - dispatch immediately
742
- await dispatchMessage({
743
- ctx, msg, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId,
744
- mediaPath, mediaType,
745
- });
746
- }
747
-
748
- /**
749
- * Buffer a message for aggregation with other messages from the same sender.
750
- */
751
- async function bufferMessageForAggregation(params: {
752
- msg: DingTalkRobotMessage;
753
- ctx: DingTalkMonitorContext;
754
- rawBody: string;
755
- replyTarget: any;
756
- sessionKey: string;
757
- isDm: boolean;
758
- senderId: string;
759
- senderName: string;
760
- conversationId: string;
761
- mediaPath?: string;
762
- mediaType?: string;
763
- }): Promise<void> {
764
- const { msg, ctx, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId, mediaPath, mediaType } = params;
765
- const { account, log } = ctx;
766
- const bufferKey = getBufferKey(msg, account.accountId);
767
- const aggregationDelayMs = account.config.messageAggregationDelayMs ?? AGGREGATION_DELAY_MS;
768
-
769
- const existing = messageBuffer.get(bufferKey);
770
-
771
- if (existing) {
772
- // Add to existing buffer
773
- existing.messages.push({ text: rawBody, timestamp: Date.now(), mediaPath, mediaType });
774
- // Update to latest msg for reply target (use latest sessionWebhook)
775
- existing.msg = msg;
776
- existing.replyTarget = replyTarget;
777
-
778
- // Reset timer
779
- clearTimeout(existing.timer);
780
- existing.timer = setTimeout(() => {
781
- flushMessageBuffer(bufferKey);
782
- }, aggregationDelayMs);
783
-
784
- log?.info?.(`[dingtalk] Message buffered, total: ${existing.messages.length} messages`);
785
- } else {
786
- // Create new buffer entry
787
- const newEntry: BufferedMessage = {
788
- messages: [{ text: rawBody, timestamp: Date.now(), mediaPath, mediaType }],
789
- timer: setTimeout(() => {
790
- flushMessageBuffer(bufferKey);
791
- }, aggregationDelayMs),
792
- ctx,
793
- msg,
794
- replyTarget,
795
- sessionKey,
796
- isDm,
797
- senderId,
798
- senderName,
799
- conversationId,
800
- };
801
- messageBuffer.set(bufferKey, newEntry);
802
-
803
- log?.info?.(`[dingtalk] Message buffered (new), waiting ${aggregationDelayMs}ms for more...`);
804
- }
805
- }
806
-
807
- /**
808
- * Flush the message buffer and dispatch the combined message.
809
- */
810
- async function flushMessageBuffer(bufferKey: string): Promise<void> {
811
- const entry = messageBuffer.get(bufferKey);
812
- if (!entry) return;
813
-
814
- messageBuffer.delete(bufferKey);
815
-
816
- const { messages, ctx, msg, replyTarget, sessionKey, isDm, senderId, senderName, conversationId } = entry;
817
- const { log } = ctx;
818
-
819
- // Combine all messages
820
- const combinedText = messages.map(m => m.text).join('\n');
821
- // Use the last media if any
822
- const lastWithMedia = [...messages].reverse().find(m => m.mediaPath);
823
- const mediaPath = lastWithMedia?.mediaPath;
824
- const mediaType = lastWithMedia?.mediaType;
825
-
826
- log?.info?.(`[dingtalk] Flushing buffer: ${messages.length} message(s) combined into ${combinedText.length} chars`);
827
-
828
- // Dispatch the combined message
829
- await dispatchMessage({
830
- ctx, msg, rawBody: combinedText, replyTarget, sessionKey, isDm, senderId, senderName, conversationId,
831
- mediaPath, mediaType,
832
- });
833
- }
834
-
835
- /**
836
- * Dispatch a message to the agent (after aggregation or immediately).
837
- */
838
- async function dispatchMessage(params: {
839
- ctx: DingTalkMonitorContext;
840
- msg: DingTalkRobotMessage;
841
- rawBody: string;
842
- replyTarget: any;
843
- sessionKey: string;
844
- isDm: boolean;
845
- senderId: string;
846
- senderName: string;
847
- conversationId: string;
848
- mediaPath?: string;
849
- mediaType?: string;
850
- }): Promise<void> {
851
- const { ctx, msg, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId, mediaPath, mediaType } = params;
852
- const { account, cfg, log, setStatus } = ctx;
853
- const runtime = getDingTalkRuntime();
854
- const isGroup = !isDm;
855
-
856
- // Typing indicator cleanup function (will be called after dispatch completes)
857
- let typingCleanup: (() => Promise<void>) | null = null;
858
-
859
- // Send typing indicator (recallable) if enabled
860
- // This replaces the old showThinking feature with a better UX - the indicator disappears when reply arrives
861
- if (account.config.typingIndicator !== false && account.clientId && account.clientSecret) {
862
- try {
863
- const typingMessage = account.config.typingIndicatorMessage || '⏳ 思考中...';
864
- const robotCode = account.robotCode || account.clientId;
865
-
866
- const result = await sendTypingIndicator({
867
- clientId: account.clientId,
868
- clientSecret: account.clientSecret,
869
- robotCode,
870
- userId: isDm ? senderId : undefined,
871
- conversationId: !isDm ? conversationId : undefined,
872
- message: typingMessage,
873
- });
874
-
875
- if (result.error) {
876
- log?.info?.('[dingtalk] Typing indicator failed: ' + result.error);
877
- } else {
878
- typingCleanup = result.cleanup;
879
- log?.info?.('[dingtalk] Typing indicator sent (will be recalled on reply)');
880
- }
881
- } catch (err) {
882
- log?.info?.('[dingtalk] Typing indicator error: ' + err);
883
- }
884
- }
885
- // Legacy: Send thinking feedback (opt-in, non-recallable) - only if typingIndicator is explicitly disabled
886
- else if (account.config.showThinking && replyTarget.sessionWebhook) {
887
- try {
888
- await sendViaSessionWebhook(replyTarget.sessionWebhook, '正在思考...');
889
- log?.info?.('[dingtalk] Sent thinking indicator (legacy, non-recallable)');
890
- } catch (_) {
891
- // fire-and-forget, don't block processing
892
- }
893
- }
894
-
895
- // Load actual config if cfg is a config manager
896
- let actualCfg = cfg;
897
- if (cfg && typeof cfg.loadConfig === "function") {
898
- try {
899
- actualCfg = await cfg.loadConfig();
900
- } catch (err) {
901
- log?.info?.("[dingtalk] Failed to load config: " + err);
902
- }
903
- }
904
-
905
- // Check if the full Clawdbot Plugin SDK pipeline is available
906
- const hasFullPipeline = !!(
907
- runtime?.channel?.routing?.resolveAgentRoute &&
908
- runtime?.channel?.reply?.finalizeInboundContext &&
909
- runtime?.channel?.reply?.createReplyDispatcherWithTyping &&
910
- runtime?.channel?.reply?.dispatchReplyFromConfig
911
- );
912
-
913
- // Track if we've already cleaned up the typing indicator
914
- let typingCleaned = false;
915
- const cleanupTyping = async () => {
916
- if (typingCleanup && !typingCleaned) {
917
- typingCleaned = true;
918
- try {
919
- await typingCleanup();
920
- log?.info?.('[dingtalk] Typing indicator recalled');
921
- } catch (err) {
922
- log?.info?.('[dingtalk] Failed to recall typing indicator: ' + err);
923
- }
924
- }
925
- };
926
-
927
- try {
928
- if (hasFullPipeline) {
929
- // Full SDK pipeline: route → session → envelope → dispatch
930
- await dispatchWithFullPipeline({
931
- runtime, msg, rawBody, account, cfg: actualCfg, sessionKey, isDm,
932
- senderId, senderName, conversationId, replyTarget,
933
- mediaPath, mediaType, log, setStatus,
934
- onFirstReply: cleanupTyping,
935
- });
936
- } else if (runtime?.channel?.reply?.dispatchReplyWithBufferedBlockDispatcher) {
937
- // Fallback: existing buffered block dispatcher
938
- const ctxPayload = {
939
- Body: rawBody,
940
- RawBody: rawBody,
941
- CommandBody: rawBody,
942
- From: "dingtalk:" + senderId,
943
- To: isDm ? ("dingtalk:dm:" + senderId) : ("dingtalk:group:" + conversationId),
944
- SessionKey: sessionKey,
945
- AccountId: account.accountId,
946
- ChatType: isDm ? "direct" : "group",
947
- ConversationLabel: isDm ? senderName : (msg.conversationTitle ?? conversationId),
948
- SenderName: senderName || undefined,
949
- SenderId: senderId,
950
- WasMentioned: isGroup ? msg.isInAtList : undefined,
951
- Provider: "dingtalk",
952
- Surface: "dingtalk",
953
- MessageSid: msg.msgId,
954
- OriginatingChannel: "dingtalk",
955
- OriginatingTo: "dingtalk:" + conversationId,
956
- MediaPath: mediaPath,
957
- MediaType: mediaType,
958
- MediaUrl: mediaPath,
959
- };
960
-
961
- // Fire-and-forget: don't await to avoid blocking SDK callback during long agent runs
962
- runtime.channel.reply.dispatchReplyWithBufferedBlockDispatcher({
963
- ctx: ctxPayload,
964
- cfg: actualCfg,
965
- dispatcherOptions: {
966
- deliver: async (payload: any) => {
967
- // Recall typing indicator on first delivery
968
- await cleanupTyping();
969
-
970
- log?.info?.("[dingtalk] Deliver payload keys: " + Object.keys(payload || {}).join(',') + " text?=" + (typeof payload?.text) + " markdown?=" + (typeof payload?.markdown));
971
- const textToSend = resolveDeliverText(payload, log);
972
- if (textToSend) {
973
- await deliverReply(replyTarget, textToSend, log);
974
- setStatus?.({ lastOutboundAt: Date.now() });
975
- } else {
976
- log?.info?.("[dingtalk] Deliver: no text resolved from payload");
977
- }
978
- },
979
- onError: (err: any) => {
980
- // Also cleanup on error
981
- cleanupTyping().catch(() => {});
982
- log?.info?.("[dingtalk] Reply error: " + err);
983
- },
984
- },
985
- }).catch((err) => {
986
- cleanupTyping().catch(() => {});
987
- log?.info?.("[dingtalk] Dispatch failed: " + err);
988
- });
989
-
990
- // Record activity
991
- runtime.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
992
- } else {
993
- log?.info?.("[dingtalk] Runtime dispatch not available");
994
- await cleanupTyping();
995
- }
996
- } catch (err) {
997
- await cleanupTyping();
998
- log?.info?.("[dingtalk] Dispatch error: " + err);
999
- }
1000
- }
1001
-
1002
- /**
1003
- * Dispatch using the full Clawdbot Plugin SDK pipeline.
1004
- * Uses resolveAgentRoute → session → envelope → finalizeContext → dispatch.
1005
- */
1006
- async function dispatchWithFullPipeline(params: {
1007
- runtime: any;
1008
- msg: DingTalkRobotMessage;
1009
- rawBody: string;
1010
- account: ResolvedDingTalkAccount;
1011
- cfg: any;
1012
- sessionKey: string;
1013
- isDm: boolean;
1014
- senderId: string;
1015
- senderName: string;
1016
- conversationId: string;
1017
- replyTarget: any;
1018
- mediaPath?: string;
1019
- mediaType?: string;
1020
- log?: any;
1021
- setStatus?: (update: Record<string, unknown>) => void;
1022
- onFirstReply?: () => Promise<void>;
1023
- }): Promise<void> {
1024
- const { runtime: rt, msg, rawBody, account, cfg, isDm,
1025
- senderId, senderName, conversationId, replyTarget,
1026
- log, setStatus, onFirstReply } = params;
1027
-
1028
- let firstReplyFired = false;
1029
-
1030
- // 1. Resolve agent route
1031
- const route = rt.channel.routing.resolveAgentRoute({
1032
- cfg,
1033
- channel: 'dingtalk',
1034
- accountId: account.accountId,
1035
- peer: { kind: isDm ? 'dm' : 'group', id: isDm ? senderId : conversationId },
1036
- });
1037
-
1038
- // 2. Resolve store path
1039
- const storePath = rt.channel.session?.resolveStorePath?.(cfg?.session?.store, { agentId: route.agentId });
1040
-
1041
- // 3. Get envelope format options
1042
- const envelopeOptions = rt.channel.reply?.resolveEnvelopeFormatOptions?.(cfg) ?? {};
1043
-
1044
- // 4. Read previous timestamp for session continuity
1045
- const previousTimestamp = rt.channel.session?.readSessionUpdatedAt?.({ storePath, sessionKey: route.sessionKey });
1046
-
1047
- // 5. Format inbound envelope
1048
- const fromLabel = isDm ? `${senderName} (${senderId})` : `${msg.conversationTitle || conversationId} - ${senderName}`;
1049
- const body = rt.channel.reply.formatInboundEnvelope?.({
1050
- channel: 'DingTalk', from: fromLabel, timestamp: msg.createAt, body: rawBody,
1051
- chatType: isDm ? 'direct' : 'group', sender: { name: senderName, id: senderId },
1052
- previousTimestamp, envelope: envelopeOptions,
1053
- }) ?? rawBody;
1054
-
1055
- // 6. Finalize inbound context (includes media info)
1056
- const to = isDm ? `dingtalk:${senderId}` : `dingtalk:group:${conversationId}`;
1057
- const ctx = rt.channel.reply.finalizeInboundContext({
1058
- Body: body, RawBody: rawBody, CommandBody: rawBody, From: to, To: to,
1059
- SessionKey: route.sessionKey, AccountId: account.accountId,
1060
- ChatType: isDm ? 'direct' : 'group',
1061
- ConversationLabel: fromLabel,
1062
- GroupSubject: isDm ? undefined : (msg.conversationTitle || conversationId),
1063
- SenderName: senderName, SenderId: senderId,
1064
- Provider: 'dingtalk', Surface: 'dingtalk',
1065
- MessageSid: msg.msgId, Timestamp: msg.createAt,
1066
- MediaPath: params.mediaPath, MediaType: params.mediaType, MediaUrl: params.mediaPath,
1067
- CommandAuthorized: true,
1068
- OriginatingChannel: 'dingtalk', OriginatingTo: to,
1069
- });
1070
-
1071
- // 7. Record inbound session
1072
- if (rt.channel.session?.recordInboundSession) {
1073
- await rt.channel.session.recordInboundSession({
1074
- storePath, sessionKey: ctx.SessionKey || route.sessionKey, ctx,
1075
- updateLastRoute: isDm ? { sessionKey: route.mainSessionKey, channel: 'dingtalk', to: senderId, accountId: account.accountId } : undefined,
1076
- });
1077
- }
1078
-
1079
- // 8. Create typing-aware dispatcher
1080
- const { dispatcher, replyOptions, markDispatchIdle } = rt.channel.reply.createReplyDispatcherWithTyping({
1081
- responsePrefix: '',
1082
- deliver: async (payload: any) => {
1083
- // Recall typing indicator on first delivery
1084
- if (!firstReplyFired && onFirstReply) {
1085
- firstReplyFired = true;
1086
- await onFirstReply().catch((err) => {
1087
- log?.info?.("[dingtalk] onFirstReply error: " + err);
1088
- });
1089
- }
1090
-
1091
- try {
1092
- log?.info?.("[dingtalk] Pipeline deliver payload keys: " + Object.keys(payload || {}).join(',') + " text?=" + (typeof payload?.text) + " markdown?=" + (typeof payload?.markdown));
1093
- const textToSend = resolveDeliverText(payload, log);
1094
- if (!textToSend) {
1095
- log?.info?.("[dingtalk] Pipeline deliver: no text resolved from payload");
1096
- return { ok: true };
1097
- }
1098
- await deliverReply(replyTarget, textToSend, log);
1099
- setStatus?.({ lastOutboundAt: Date.now() });
1100
- return { ok: true };
1101
- } catch (err: any) {
1102
- log?.info?.("[dingtalk] Reply delivery failed: " + err.message);
1103
- return { ok: false, error: err.message };
1104
- }
1105
- },
1106
- });
1107
-
1108
- // 9. Dispatch reply from config
1109
- try {
1110
- await rt.channel.reply.dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyOptions });
1111
- } finally {
1112
- markDispatchIdle();
1113
- // Ensure typing indicator is cleaned up even if no reply was sent
1114
- if (!firstReplyFired && onFirstReply) {
1115
- await onFirstReply().catch(() => {});
1116
- }
1117
- }
1118
-
1119
- // 10. Record activity
1120
- rt.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
1121
- }
1122
-
1123
- /**
1124
- * Extract text + media URL from a deliver payload.
1125
- * The Clawdbot platform may send media URLs in separate fields (e.g. from the `message` tool).
1126
- * We merge them into the text as markdown image syntax so DingTalk can render them.
1127
- */
1128
- function resolveDeliverText(payload: any, log?: any): string | undefined {
1129
- // payload.markdown may be a boolean flag (not the actual text), so check type
1130
- let text = (typeof payload.markdown === 'string' && payload.markdown) || payload.text;
1131
-
1132
- // Guard: ensure text is a string (platform might send unexpected types)
1133
- if (text != null && typeof text !== 'string') {
1134
- log?.info?.("[dingtalk] Deliver payload has non-string text type=" + typeof text + ", payload keys=" + Object.keys(payload).join(','));
1135
- text = String(text);
1136
- }
1137
-
1138
- const mediaUrl = payload.mediaUrl || payload.media || payload.imageUrl || payload.image;
1139
-
1140
- if (mediaUrl && typeof mediaUrl === 'string' && mediaUrl.startsWith('http')) {
1141
- log?.info?.("[dingtalk] Deliver payload includes media URL: " + mediaUrl);
1142
- const imageMarkdown = `![image](${mediaUrl})`;
1143
- text = text ? `${text}\n\n${imageMarkdown}` : imageMarkdown;
1144
- }
1145
-
1146
- return text || undefined;
1147
- }
1148
-
1149
- async function deliverReply(target: any, text: string, log?: any): Promise<void> {
1150
- const now = Date.now();
1151
- const chunkLimit = target.account.config.textChunkLimit ?? 2000;
1152
- const messageFormat = target.account.config.messageFormat ?? "text";
1153
- const longTextMode = target.account.config.longTextMode ?? "chunk";
1154
- const longTextThreshold = target.account.config.longTextThreshold ?? 4000;
1155
-
1156
- // Check if we should send as file instead of text
1157
- if (longTextMode === 'file' && text.length > longTextThreshold) {
1158
- log?.info?.("[dingtalk] Text exceeds threshold (" + text.length + " > " + longTextThreshold + "), sending as file");
1159
-
1160
- // Only attempt file send if we have credentials (REST API required)
1161
- if (target.account.clientId && target.account.clientSecret) {
1162
- const fileSent = await sendTextAsFile(target, text, log);
1163
- if (fileSent) {
1164
- return; // Successfully sent as file
1165
- }
1166
- log?.info?.("[dingtalk] File send failed, falling back to chunked text");
1167
- } else {
1168
- log?.info?.("[dingtalk] No credentials for file send, falling back to chunked text");
1169
- }
1170
- }
1171
-
1172
- // Determine if this message should use markdown format
1173
- let isMarkdown: boolean;
1174
- if (messageFormat === 'auto') {
1175
- isMarkdown = detectMarkdownContent(text);
1176
- log?.info?.("[dingtalk] Auto-detected format: " + (isMarkdown ? "markdown" : "text"));
1177
- } else {
1178
- // Support both "markdown" and "richtext" (they're equivalent for DingTalk)
1179
- isMarkdown = messageFormat === "markdown" || messageFormat === "richtext";
1180
- }
1181
-
1182
- // Convert markdown tables to text format (DingTalk doesn't support tables)
1183
- let processedText = text;
1184
- if (isMarkdown) {
1185
- processedText = convertMarkdownTables(text);
1186
- // Convert bare image URLs to markdown syntax for proper display
1187
- processedText = convertImageUrlsToMarkdown(processedText);
1188
- }
1189
-
1190
- const chunks: string[] = [];
1191
- if (processedText.length <= chunkLimit) {
1192
- chunks.push(processedText);
1193
- } else {
1194
- for (let i = 0; i < processedText.length; i += chunkLimit) {
1195
- chunks.push(processedText.slice(i, i + chunkLimit));
1196
- }
1197
- }
1198
-
1199
- for (const chunk of chunks) {
1200
- let webhookSuccess = false;
1201
- const maxRetries = 2;
1202
-
1203
- // Try sessionWebhook with retry
1204
- if (target.sessionWebhook && now < target.sessionWebhookExpiry) {
1205
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
1206
- try {
1207
- log?.info?.("[dingtalk] Using sessionWebhook (attempt " + attempt + "/" + maxRetries + "), format=" + messageFormat);
1208
- log?.info?.("[dingtalk] Sending text (" + chunk.length + " chars): " + chunk.substring(0, 200));
1209
- let sendResult: { ok: boolean; errcode?: number; errmsg?: string };
1210
- if (isMarkdown) {
1211
- sendResult = await sendMarkdownViaSessionWebhook(target.sessionWebhook, "Reply", chunk);
1212
- } else {
1213
- sendResult = await sendViaSessionWebhook(target.sessionWebhook, chunk);
1214
- }
1215
- if (!sendResult.ok) {
1216
- throw new Error(`SessionWebhook rejected: errcode=${sendResult.errcode}, errmsg=${sendResult.errmsg}`);
1217
- }
1218
- log?.info?.("[dingtalk] SessionWebhook send OK (errcode=" + (sendResult.errcode ?? 0) + ")");
1219
- webhookSuccess = true;
1220
- break;
1221
- } catch (err) {
1222
- log?.info?.("[dingtalk] SessionWebhook attempt " + attempt + " failed: " + (err instanceof Error ? err.message : String(err)));
1223
- if (attempt < maxRetries) {
1224
- // Wait 1 second before retry
1225
- await new Promise(resolve => setTimeout(resolve, 1000));
1226
- }
1227
- }
1228
- }
1229
- }
1230
-
1231
- // Fallback to REST API if webhook failed after all retries
1232
- if (!webhookSuccess && target.account.clientId && target.account.clientSecret) {
1233
- try {
1234
- log?.info?.("[dingtalk] SessionWebhook failed after " + maxRetries + " attempts, using REST API fallback");
1235
- // REST API only supports text format
1236
- const textChunk = messageFormat === "markdown" ? chunk : chunk;
1237
- await sendDingTalkRestMessage({
1238
- clientId: target.account.clientId,
1239
- clientSecret: target.account.clientSecret,
1240
- robotCode: target.account.robotCode || target.account.clientId,
1241
- userId: target.isDm ? target.senderId : undefined,
1242
- conversationId: !target.isDm ? target.conversationId : undefined,
1243
- text: textChunk,
1244
- });
1245
- log?.info?.("[dingtalk] REST API send OK");
1246
- } catch (err) {
1247
- log?.info?.("[dingtalk] REST API also failed: " + (err instanceof Error ? err.stack : JSON.stringify(err)));
1248
- }
1249
- } else if (!webhookSuccess) {
1250
- log?.info?.("[dingtalk] No delivery method available!");
1251
- }
1252
- }
1253
- }
1254
-
1255
- /**
1256
- * Helper function to send text as a markdown file
1257
- * Used when longTextMode is 'file' and text exceeds threshold
1258
- */
1259
- async function sendTextAsFile(target: any, text: string, log?: any): Promise<boolean> {
1260
- try {
1261
- // Generate markdown file with UTF-8 BOM for proper Chinese display
1262
- const { buffer, fileName } = textToMarkdownFile(text, "AI Response");
1263
- log?.info?.("[dingtalk] Converting text to file: " + fileName + " (" + buffer.length + " bytes)");
1264
-
1265
- // Upload the file
1266
- const uploadResult = await uploadMediaFile({
1267
- clientId: target.account.clientId,
1268
- clientSecret: target.account.clientSecret,
1269
- robotCode: target.account.robotCode || target.account.clientId,
1270
- fileBuffer: buffer,
1271
- fileName: fileName,
1272
- fileType: 'file',
1273
- });
1274
-
1275
- if (!uploadResult.mediaId) {
1276
- log?.info?.("[dingtalk] File upload failed: " + (uploadResult.error || "no mediaId returned"));
1277
- return false;
1278
- }
1279
-
1280
- log?.info?.("[dingtalk] File uploaded, mediaId=" + uploadResult.mediaId);
1281
-
1282
- // Send the file message
1283
- const sendResult = await sendFileMessage({
1284
- clientId: target.account.clientId,
1285
- clientSecret: target.account.clientSecret,
1286
- robotCode: target.account.robotCode || target.account.clientId,
1287
- userId: target.isDm ? target.senderId : undefined,
1288
- conversationId: !target.isDm ? target.conversationId : undefined,
1289
- mediaId: uploadResult.mediaId,
1290
- fileName: fileName,
1291
- });
1292
-
1293
- if (!sendResult.ok) {
1294
- log?.info?.("[dingtalk] File send failed: " + (sendResult.error || "unknown error"));
1295
- return false;
1296
- }
1297
-
1298
- log?.info?.("[dingtalk] File sent successfully");
1299
- return true;
1300
- } catch (err) {
1301
- log?.info?.("[dingtalk] sendTextAsFile error: " + (err instanceof Error ? err.message : String(err)));
1302
- return false;
1303
- }
1304
- }
1305
-
1306
- /**
1307
- * Convert bare image URLs to markdown image syntax
1308
- * Detects patterns like "图1: https://..." or "https://...png" and converts to ![](url)
1309
- */
1310
- function convertImageUrlsToMarkdown(text: string): string {
1311
- // Pattern 1: "图X: https://..." format (common Agent output)
1312
- text = text.replace(/图(\d+):\s*(https?:\/\/[^\s]+\.(png|jpg|jpeg|gif|webp)(\?[^\s]*)?)/gi, (match, num, url) => {
1313
- return `![图${num}](${url})`;
1314
- });
1315
-
1316
- // Pattern 2: Bare image URLs on their own line or preceded by space
1317
- // But avoid converting URLs that are already in markdown syntax
1318
- text = text.replace(/(?<!\]\()(?:^|\s)(https?:\/\/[^\s]+\.(png|jpg|jpeg|gif|webp)(\?[^\s]*)?)/gim, (match, url) => {
1319
- // Check if this URL is already part of markdown image syntax
1320
- if (match.startsWith('](')) return match;
1321
- const leadingSpace = match.match(/^\s/);
1322
- return (leadingSpace ? leadingSpace[0] : '') + `![image](${url.trim()})`;
1323
- });
1324
-
1325
- return text;
1326
- }
1327
-
1328
- /**
1329
- * Convert markdown tables to plain text format
1330
- * DingTalk doesn't support markdown tables, so we convert them to readable text
1331
- */
1332
- function convertMarkdownTables(text: string): string {
1333
- // Match markdown tables (| col1 | col2 |\n|------|------|\n| val1 | val2 |)
1334
- const tableRegex = /(\|.+\|\n)+/g;
1335
-
1336
- return text.replace(tableRegex, (match) => {
1337
- const lines = match.trim().split('\n');
1338
- if (lines.length < 2) return match;
1339
-
1340
- // Check if it's a valid table (has separator line)
1341
- const hasSeparator = lines.some(line => /^[\s|:-]+$/.test(line.replace(/\|/g, '')));
1342
- if (!hasSeparator) return match;
1343
-
1344
- // Convert to plain text format
1345
- let result = '\n```\n';
1346
- for (const line of lines) {
1347
- // Skip separator lines (|---|---|)
1348
- if (/^[\s|:-]+$/.test(line.replace(/\|/g, ''))) continue;
1349
-
1350
- const cells = line.split('|').map(c => c.trim()).filter(c => c);
1351
- result += cells.join(' | ') + '\n';
1352
- }
1353
- result += '```\n';
1354
- return result;
1355
- });
1356
- }
1357
-
1358
- /**
1359
- * Detect if text contains markdown features worth rendering as markdown.
1360
- * Checks for headers, bold, code blocks, lists, blockquotes, links, and images.
1361
- */
1362
- function detectMarkdownContent(text: string): boolean {
1363
- return /^#{1,6}\s|^\s*[-*+]\s|^\s*\d+\.\s|^\s*>|```|\*\*[^*]+\*\*|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)/m.test(text);
1364
- }
1365
-
1366
- function isSenderAllowed(senderId: string, allowFrom: string[]): boolean {
1367
- if (allowFrom.includes("*")) return true;
1368
- const normalized = senderId.trim().toLowerCase();
1369
- return allowFrom.some((entry) => {
1370
- const e = String(entry).trim().toLowerCase();
1371
- return e === normalized;
1372
- });
1373
- }
1374
-
1375
- function isGroupAllowed(conversationId: string, allowlist: string[]): boolean {
1376
- if (allowlist.includes("*")) return true;
1377
- const normalized = conversationId.trim().toLowerCase();
1378
- return allowlist.some((entry) => {
1379
- const e = String(entry).trim().toLowerCase();
1380
- return e === normalized;
1381
- });
1382
- }
1
+ import type { DingTalkRobotMessage, ResolvedDingTalkAccount, ExtractedMessage } from "./types.js";
2
+ import { sendViaSessionWebhook, sendMarkdownViaSessionWebhook, sendDingTalkRestMessage, batchGetUserInfo, downloadPicture, downloadMediaFile, cleanupOldMedia, uploadMediaFile, sendFileMessage, textToMarkdownFile, sendTypingIndicator } from "./api.js";
3
+ import { getDingTalkRuntime } from "./runtime.js";
4
+
5
+ // ============================================================================
6
+ // Message Aggregation Buffer
7
+ // ============================================================================
8
+ // When users share links via DingTalk's "share link" feature, the message may
9
+ // arrive as multiple separate messages (text + URL). This buffer aggregates
10
+ // messages from the same sender within a short time window.
11
+
12
+ interface BufferedMessage {
13
+ messages: Array<{ text: string; timestamp: number; mediaPath?: string; mediaType?: string }>;
14
+ timer: ReturnType<typeof setTimeout>;
15
+ ctx: DingTalkMonitorContext;
16
+ msg: DingTalkRobotMessage; // Keep latest msg for reply target
17
+ replyTarget: any;
18
+ sessionKey: string;
19
+ isDm: boolean;
20
+ senderId: string;
21
+ senderName: string;
22
+ conversationId: string;
23
+ }
24
+
25
+ const messageBuffer = new Map<string, BufferedMessage>();
26
+ const AGGREGATION_DELAY_MS = 2000; // 2 seconds - balance between UX and catching split messages
27
+
28
+ function getBufferKey(msg: DingTalkRobotMessage, accountId: string): string {
29
+ return `${accountId}:${msg.conversationId}:${msg.senderId || msg.senderStaffId}`;
30
+ }
31
+
32
+ // ============================================================================
33
+
34
+ export interface DingTalkMonitorContext {
35
+ account: ResolvedDingTalkAccount;
36
+ cfg: any;
37
+ abortSignal: AbortSignal;
38
+ log?: any;
39
+ setStatus?: (update: Record<string, unknown>) => void;
40
+ }
41
+
42
+ export async function startDingTalkMonitor(ctx: DingTalkMonitorContext): Promise<void> {
43
+ const { account, cfg, abortSignal, log, setStatus } = ctx;
44
+
45
+ if (!account.clientId || !account.clientSecret) {
46
+ throw new Error("DingTalk clientId/clientSecret not configured");
47
+ }
48
+
49
+ // Clean up old pictures on startup
50
+ cleanupOldMedia();
51
+
52
+ // Schedule periodic cleanup every hour
53
+ const cleanupInterval = setInterval(() => {
54
+ cleanupOldMedia();
55
+ }, 60 * 60 * 1000); // 1 hour
56
+
57
+ // Clean up on abort (only if abortSignal is provided)
58
+ if (abortSignal) {
59
+ abortSignal.addEventListener('abort', () => {
60
+ clearInterval(cleanupInterval);
61
+ });
62
+ }
63
+
64
+ let DWClient: any;
65
+ let TOPIC_ROBOT: any;
66
+ try {
67
+ const mod = await import("dingtalk-stream");
68
+ DWClient = mod.DWClient || mod.default?.DWClient || mod.default;
69
+ TOPIC_ROBOT = mod.TOPIC_ROBOT || mod.default?.TOPIC_ROBOT || "/v1.0/im/bot/messages/get";
70
+ } catch (err) {
71
+ throw new Error("Failed to import dingtalk-stream SDK: " + err);
72
+ }
73
+
74
+ if (!DWClient) throw new Error("DWClient not found in dingtalk-stream");
75
+
76
+ log?.info?.("[dingtalk:" + account.accountId + "] Starting Stream...");
77
+
78
+ const client = new DWClient({
79
+ clientId: account.clientId,
80
+ clientSecret: account.clientSecret,
81
+ });
82
+
83
+ client.registerCallbackListener(TOPIC_ROBOT, async (downstream: any) => {
84
+ // Immediately ACK to prevent DingTalk from retrying (60s timeout)
85
+ // SDK method is socketCallBackResponse, not socketResponse
86
+ try {
87
+ client.socketCallBackResponse(downstream.headers.messageId, { status: 'SUCCESS' });
88
+ } catch (_) { /* best-effort ACK */ }
89
+
90
+ try {
91
+ const data: DingTalkRobotMessage = typeof downstream.data === "string"
92
+ ? JSON.parse(downstream.data) : downstream.data;
93
+ setStatus?.({ lastInboundAt: Date.now() });
94
+ await processInboundMessage(data, ctx);
95
+ } catch (err) {
96
+ log?.info?.("[dingtalk] Message error: " + err);
97
+ }
98
+ return { status: "SUCCESS", message: "OK" };
99
+ });
100
+
101
+ client.registerAllEventListener((msg: any) => {
102
+ return { status: "SUCCESS", message: "OK" };
103
+ });
104
+
105
+ const onAbort = () => {
106
+ try { client.disconnect?.(); } catch {}
107
+ setStatus?.({ running: false, lastStopAt: Date.now() });
108
+ };
109
+ if (abortSignal) {
110
+ abortSignal.addEventListener("abort", onAbort, { once: true });
111
+ }
112
+
113
+ await client.connect();
114
+ log?.info?.("[dingtalk:" + account.accountId + "] Stream connected");
115
+ setStatus?.({ running: true, lastStartAt: Date.now() });
116
+ }
117
+
118
+ /**
119
+ * Extract message content from DingTalk message into a structured format.
120
+ * Handles: text, richText, picture, audio, video, file.
121
+ */
122
+ async function extractMessageContent(
123
+ msg: DingTalkRobotMessage,
124
+ account: ResolvedDingTalkAccount,
125
+ log?: any,
126
+ ): Promise<ExtractedMessage> {
127
+ const msgtype = msg.msgtype || 'text';
128
+ const content = msg.content;
129
+
130
+ switch (msgtype) {
131
+ case 'text': {
132
+ return {
133
+ text: msg.text?.content?.trim() ?? '',
134
+ messageType: 'text',
135
+ };
136
+ }
137
+
138
+ case 'richText': {
139
+ const result = await extractRichTextContent(msg, account, log);
140
+ return { ...result, messageType: 'richText' };
141
+ }
142
+
143
+ case 'picture': {
144
+ return extractPictureContent(msg, log);
145
+ }
146
+
147
+ case 'audio': {
148
+ // DingTalk provides speech recognition result in content.recognition
149
+ const recognition = content?.recognition;
150
+ const downloadCode = content?.downloadCode;
151
+ log?.info?.("[dingtalk] Audio message - recognition: " + (recognition || '(none)'));
152
+ return {
153
+ text: recognition || '[语音消息]',
154
+ mediaDownloadCode: downloadCode,
155
+ mediaType: 'audio',
156
+ messageType: 'audio',
157
+ };
158
+ }
159
+
160
+ case 'video': {
161
+ const downloadCode = content?.downloadCode;
162
+ log?.info?.("[dingtalk] Video message - downloadCode: " + (downloadCode || '(none)'));
163
+ return {
164
+ text: '[视频]',
165
+ mediaDownloadCode: downloadCode,
166
+ mediaType: 'video',
167
+ messageType: 'video',
168
+ };
169
+ }
170
+
171
+ case 'file': {
172
+ const downloadCode = content?.downloadCode;
173
+ const fileName = content?.fileName || '未知文件';
174
+ log?.info?.("[dingtalk] File message - fileName: " + fileName);
175
+ return {
176
+ text: `[文件: ${fileName}]`,
177
+ mediaDownloadCode: downloadCode,
178
+ mediaType: 'file',
179
+ mediaFileName: fileName,
180
+ messageType: 'file',
181
+ };
182
+ }
183
+
184
+ case 'link': {
185
+ // Link card message - contains title, text, messageUrl, and optional picUrl
186
+ // Structure: msg.link = { title, text, messageUrl, picUrl }
187
+ const linkContent = msg.link || content;
188
+ log?.info?.("[dingtalk] link message received: " + JSON.stringify(linkContent));
189
+
190
+ if (linkContent) {
191
+ const title = linkContent.title || '';
192
+ const text = linkContent.text || '';
193
+ const messageUrl = linkContent.messageUrl || '';
194
+ const picUrl = linkContent.picUrl || '';
195
+
196
+ // Combine all parts into a readable format
197
+ const parts: string[] = [];
198
+ if (title) parts.push(`[链接] ${title}`);
199
+ if (text) parts.push(text);
200
+ if (messageUrl) parts.push(`链接: ${messageUrl}`);
201
+ if (picUrl) parts.push(`配图: ${picUrl}`);
202
+
203
+ const resultText = parts.join('\n') || '[链接卡片]';
204
+ log?.info?.("[dingtalk] Extracted link message: " + resultText.slice(0, 100));
205
+
206
+ return {
207
+ text: resultText,
208
+ messageType: 'link',
209
+ };
210
+ }
211
+
212
+ return {
213
+ text: '[链接卡片]',
214
+ messageType: 'link',
215
+ };
216
+ }
217
+
218
+ case 'chatRecord': {
219
+ // Chat record collection - contains multiple forwarded messages
220
+ // Structure: content.chatRecord is a JSON string containing an array of messages
221
+ const chatRecordContent = content || (msg as any).chatRecord;
222
+ log?.info?.("[dingtalk] chatRecord message received");
223
+
224
+ try {
225
+ // chatRecord is a JSON string, need to parse it
226
+ const chatRecordStr = chatRecordContent?.chatRecord;
227
+ if (chatRecordStr && typeof chatRecordStr === 'string') {
228
+ const records = JSON.parse(chatRecordStr) as Array<{
229
+ senderId?: string;
230
+ senderStaffId?: string; // Non-encrypted userId (available when app is published)
231
+ senderNick?: string;
232
+ msgType?: string;
233
+ content?: string;
234
+ downloadCode?: string; // For media messages (picture, video, file)
235
+ createAt?: number;
236
+ }>;
237
+
238
+ if (Array.isArray(records) && records.length > 0) {
239
+ // Debug: log first record structure with all keys
240
+ const firstRecord = records[0];
241
+ log?.info?.("[dingtalk] chatRecord first record keys: " + Object.keys(firstRecord).join(', '));
242
+ log?.info?.("[dingtalk] chatRecord first record: " + JSON.stringify(firstRecord));
243
+
244
+ // Collect unique userIds for batch lookup
245
+ // Prefer senderStaffId (non-encrypted) over senderId
246
+ const senderIds = [...new Set(
247
+ records
248
+ .map(r => r.senderStaffId || (r.senderId && !r.senderId.startsWith('$:') ? r.senderId : null))
249
+ .filter((id): id is string => !!id)
250
+ )].slice(0, 10); // Limit to 10 users
251
+
252
+ log?.info?.("[dingtalk] chatRecord senderIds for lookup: " + JSON.stringify(senderIds));
253
+
254
+ // Try to resolve sender names via API
255
+ let senderNameMap = new Map<string, string>();
256
+ if (senderIds.length > 0 && account.clientId && account.clientSecret) {
257
+ try {
258
+ senderNameMap = await batchGetUserInfo(account.clientId, account.clientSecret, senderIds, 3000);
259
+ log?.info?.("[dingtalk] Resolved " + senderNameMap.size + " sender names from API");
260
+ } catch (err) {
261
+ log?.info?.("[dingtalk] Failed to resolve sender names: " + err);
262
+ }
263
+ }
264
+
265
+ // Process records with async image downloads
266
+ const formattedRecords = await Promise.all(records.map(async (record, idx) => {
267
+ // Try: senderNick > API resolved name (via staffId or senderId) > fallback
268
+ let sender = record.senderNick;
269
+ if (!sender) {
270
+ // Try to get name from API lookup
271
+ const lookupId = record.senderStaffId || record.senderId;
272
+ if (lookupId) {
273
+ sender = senderNameMap.get(lookupId);
274
+ }
275
+ // Fallback for encrypted IDs
276
+ if (!sender && record.senderId?.startsWith('$:')) {
277
+ sender = '成员';
278
+ }
279
+ }
280
+ sender = sender || '未知';
281
+
282
+ // Handle different message types in chatRecord
283
+ let msgContent: string;
284
+ switch (record.msgType) {
285
+ case 'text':
286
+ msgContent = record.content || '[空消息]';
287
+ break;
288
+ case 'picture':
289
+ case 'image':
290
+ // Try to download the image
291
+ if (record.downloadCode && account.clientId && account.clientSecret) {
292
+ try {
293
+ const robotCode = account.robotCode || account.clientId;
294
+ const pictureResult = await downloadPicture(
295
+ account.clientId, account.clientSecret, robotCode!, record.downloadCode,
296
+ );
297
+ if (pictureResult.filePath) {
298
+ msgContent = `[图片: ${pictureResult.filePath}]`;
299
+ log?.info?.("[dingtalk] Downloaded chatRecord picture: " + pictureResult.filePath);
300
+ } else if (pictureResult.error) {
301
+ msgContent = `[图片下载失败: ${pictureResult.error}]`;
302
+ } else {
303
+ msgContent = '[图片]';
304
+ }
305
+ } catch (err) {
306
+ log?.info?.("[dingtalk] Error downloading chatRecord picture: " + err);
307
+ msgContent = '[图片]';
308
+ }
309
+ } else {
310
+ msgContent = '[图片]';
311
+ }
312
+ break;
313
+ case 'video':
314
+ msgContent = '[视频]';
315
+ break;
316
+ case 'file':
317
+ msgContent = '[文件]';
318
+ break;
319
+ case 'voice':
320
+ case 'audio':
321
+ msgContent = '[语音]';
322
+ break;
323
+ case 'richText':
324
+ msgContent = record.content || '[富文本消息]';
325
+ break;
326
+ case 'markdown':
327
+ msgContent = record.content || '[Markdown消息]';
328
+ break;
329
+ default:
330
+ msgContent = record.content || `[${record.msgType || '未知'}消息]`;
331
+ }
332
+ const time = record.createAt ? new Date(record.createAt).toLocaleString('zh-CN') : '';
333
+ return `[${idx + 1}] ${sender}${time ? ` (${time})` : ''}: ${msgContent}`;
334
+ }));
335
+ const text = `[聊天记录合集 - ${records.length}条消息]\n${formattedRecords.join('\n')}`;
336
+ log?.info?.("[dingtalk] Parsed chatRecord with " + records.length + " messages");
337
+ return {
338
+ text,
339
+ messageType: 'chatRecord',
340
+ };
341
+ }
342
+ }
343
+ } catch (e) {
344
+ log?.info?.("[dingtalk] Failed to parse chatRecord: " + (e instanceof Error ? e.message : String(e)));
345
+ }
346
+
347
+ // Fallback if structure is different or parsing failed
348
+ log?.info?.("[dingtalk] chatRecord structure not recognized, full msg: " + JSON.stringify(msg).slice(0, 500));
349
+ return {
350
+ text: '[聊天记录合集]',
351
+ messageType: 'chatRecord',
352
+ };
353
+ }
354
+
355
+ default: {
356
+ // Fallback: try text.content for unknown message types
357
+ const text = msg.text?.content?.trim() || '';
358
+ if (!text) {
359
+ log?.info?.("[dingtalk] Unknown msgtype: " + msgtype + ", no text content found");
360
+ // Log full message structure for debugging unknown types
361
+ log?.info?.("[dingtalk] Unknown msgtype full structure: " + JSON.stringify(msg).slice(0, 1000));
362
+ }
363
+ return {
364
+ text: text || `[${msgtype}消息]`,
365
+ messageType: msgtype,
366
+ };
367
+ }
368
+ }
369
+ }
370
+
371
+ /**
372
+ * Extract content from richText messages.
373
+ * Preserves all existing edge-case handling for DingTalk's varied richText formats.
374
+ */
375
+ async function extractRichTextContent(
376
+ msg: DingTalkRobotMessage,
377
+ account: ResolvedDingTalkAccount,
378
+ log?: any,
379
+ ): Promise<{ text: string; mediaDownloadCode?: string; mediaType?: 'image' }> {
380
+ // First try: msg.text.content (DingTalk sometimes also provides text for richText)
381
+ let text = msg.text?.content?.trim() ?? '';
382
+
383
+ // Second try: msg.richText as various formats
384
+ if (!text && msg.richText) {
385
+ try {
386
+ const richTextStr = typeof msg.richText === 'string'
387
+ ? msg.richText
388
+ : JSON.stringify(msg.richText);
389
+ log?.info?.("[dingtalk] Received richText message (full): " + richTextStr);
390
+
391
+ const rt = msg.richText as any;
392
+
393
+ if (typeof msg.richText === 'string') {
394
+ text = msg.richText.trim();
395
+ } else if (rt) {
396
+ text = rt.text?.trim()
397
+ || rt.content?.trim()
398
+ || rt.richText?.trim()
399
+ || '';
400
+
401
+ if (!text && Array.isArray(rt.richText)) {
402
+ const textParts: string[] = [];
403
+ for (const item of rt.richText) {
404
+ if (item.text) {
405
+ textParts.push(item.text);
406
+ } else if (item.content) {
407
+ textParts.push(item.content);
408
+ }
409
+ }
410
+ text = textParts.join('').trim();
411
+ }
412
+ }
413
+
414
+ if (text) {
415
+ log?.info?.("[dingtalk] Extracted from richText: " + text.slice(0, 100));
416
+ }
417
+ } catch (err) {
418
+ log?.info?.("[dingtalk] Failed to parse richText: " + err);
419
+ }
420
+ }
421
+
422
+ // Third try: msg.content.richText array (when msgtype === 'richText')
423
+ if (!text) {
424
+ const content = msg.content;
425
+ if (content?.richText && Array.isArray(content.richText)) {
426
+ log?.info?.("[dingtalk] RichText message - msg.content: " + JSON.stringify(content).substring(0, 200));
427
+ const parts: string[] = [];
428
+
429
+ for (const item of content.richText) {
430
+ if (item.msgType === "text" && item.content) {
431
+ parts.push(item.content);
432
+ } else if (item.text) {
433
+ // DingTalk sometimes sends richText items as {text: "..."} without msgType wrapper
434
+ parts.push(item.text);
435
+ } else if ((item.msgType === "picture" || item.pictureDownloadCode || item.downloadCode) && (item.downloadCode || item.pictureDownloadCode)) {
436
+ const downloadCode = item.downloadCode || item.pictureDownloadCode;
437
+ try {
438
+ const robotCode = account.robotCode || account.clientId;
439
+ const pictureResult = await downloadPicture(
440
+ account.clientId!, account.clientSecret!, robotCode!, downloadCode,
441
+ );
442
+ if (pictureResult.filePath) {
443
+ parts.push(`[图片: ${pictureResult.filePath}]`);
444
+ log?.info?.("[dingtalk] Downloaded picture from richText: " + pictureResult.filePath);
445
+ } else if (pictureResult.error) {
446
+ parts.push(`[图片下载失败: ${pictureResult.error}]`);
447
+ } else {
448
+ parts.push("[图片]");
449
+ }
450
+ } catch (err) {
451
+ parts.push(`[图片下载出错: ${err}]`);
452
+ log?.warn?.("[dingtalk] Error downloading picture from richText: " + err);
453
+ }
454
+ }
455
+ }
456
+
457
+ text = parts.join('');
458
+ if (text) {
459
+ log?.info?.("[dingtalk] Extracted from msg.content.richText: " + text.substring(0, 100));
460
+ }
461
+ }
462
+ }
463
+
464
+ return { text };
465
+ }
466
+
467
+ /**
468
+ * Extract content from picture messages, returning the download code for media pipeline.
469
+ */
470
+ function extractPictureContent(msg: DingTalkRobotMessage, log?: any): ExtractedMessage {
471
+ log?.info?.("[dingtalk] Picture message - msg.picture: " + JSON.stringify(msg.picture));
472
+ log?.info?.("[dingtalk] Picture message - msg.content: " + JSON.stringify(msg.content));
473
+
474
+ const content = msg.content;
475
+ let downloadCode: string | undefined;
476
+
477
+ if (msg.picture?.downloadCode) {
478
+ downloadCode = msg.picture.downloadCode;
479
+ } else if (content?.downloadCode) {
480
+ downloadCode = content.downloadCode;
481
+ }
482
+
483
+ if (downloadCode) {
484
+ log?.info?.("[dingtalk] Picture detected, downloadCode: " + downloadCode);
485
+ return {
486
+ text: '[用户发送了图片]',
487
+ mediaDownloadCode: downloadCode,
488
+ mediaType: 'image',
489
+ messageType: 'picture',
490
+ };
491
+ }
492
+
493
+ log?.info?.("[dingtalk] Picture msgtype but no downloadCode found");
494
+ return {
495
+ text: '[用户发送了图片(无法获取下载码)]',
496
+ messageType: 'picture',
497
+ };
498
+ }
499
+
500
+ async function processInboundMessage(
501
+ msg: DingTalkRobotMessage,
502
+ ctx: DingTalkMonitorContext,
503
+ ): Promise<void> {
504
+ const { account, cfg, log, setStatus } = ctx;
505
+ const runtime = getDingTalkRuntime();
506
+
507
+ const isDm = msg.conversationType === "1";
508
+ const isGroup = msg.conversationType === "2";
509
+
510
+ // Debug: log full message structure for debugging
511
+ if (msg.msgtype === 'richText' || msg.picture || (msg.atUsers && msg.atUsers.length > 0)) {
512
+ log?.info?.("[dingtalk-debug] Full message structure:");
513
+ log?.info?.("[dingtalk-debug] msgtype: " + msg.msgtype);
514
+ log?.info?.("[dingtalk-debug] text: " + JSON.stringify(msg.text));
515
+ log?.info?.("[dingtalk-debug] richText: " + JSON.stringify(msg.richText));
516
+ log?.info?.("[dingtalk-debug] picture: " + JSON.stringify(msg.picture));
517
+ log?.info?.("[dingtalk-debug] atUsers: " + JSON.stringify(msg.atUsers));
518
+ log?.info?.("[dingtalk-debug] RAW MESSAGE: " + JSON.stringify(msg).substring(0, 500));
519
+ }
520
+
521
+ // Extract message content using structured extractor
522
+ const extracted = await extractMessageContent(msg, account, log);
523
+
524
+ // Download media if present (picture/video/file — but skip audio when ASR text exists)
525
+ let mediaPath: string | undefined;
526
+ let mediaType: string | undefined;
527
+
528
+ // For audio messages with successful ASR recognition, use the text directly
529
+ // and skip downloading the .amr file (which would confuse the agent into
530
+ // trying Whisper instead of reading the already-transcribed text).
531
+ const skipMediaDownload = extracted.messageType === 'audio' && !!extracted.text;
532
+
533
+ if (!skipMediaDownload && extracted.mediaDownloadCode && account.clientId && account.clientSecret) {
534
+ const robotCode = account.robotCode || account.clientId;
535
+ try {
536
+ const result = await downloadMediaFile(
537
+ account.clientId,
538
+ account.clientSecret,
539
+ robotCode,
540
+ extracted.mediaDownloadCode,
541
+ extracted.mediaType,
542
+ );
543
+ if (result.filePath) {
544
+ mediaPath = result.filePath;
545
+ mediaType = result.mimeType || extracted.mediaType;
546
+ log?.info?.(`[dingtalk] Downloaded ${extracted.mediaType || 'media'}: ${result.filePath}`);
547
+ } else if (result.error) {
548
+ log?.warn?.(`[dingtalk] Media download failed: ${result.error}`);
549
+ }
550
+ } catch (err) {
551
+ log?.warn?.(`[dingtalk] Media download error: ${err}`);
552
+ }
553
+ } else if (skipMediaDownload) {
554
+ log?.info?.("[dingtalk] Audio ASR text available, skipping .amr download");
555
+ }
556
+
557
+ let rawBody = extracted.text;
558
+
559
+ if (!rawBody && !mediaPath) {
560
+ log?.info?.("[dingtalk] Empty message body after all attempts, skipping. msgtype=" + msg.msgtype);
561
+ return;
562
+ }
563
+
564
+ // If we have media but no text, provide a placeholder
565
+ if (!rawBody && mediaPath) {
566
+ rawBody = `[${extracted.messageType}] 媒体文件已下载: ${mediaPath}`;
567
+ }
568
+
569
+ // Handle quoted/replied messages: extract the quoted content and prepend it
570
+ if (msg.text && (msg.text as any).isReplyMsg) {
571
+ log?.info?.("[dingtalk] Message is a reply, full text object: " + JSON.stringify(msg.text));
572
+
573
+ if ((msg.text as any).repliedMsg) {
574
+ try {
575
+ const repliedMsg = (msg.text as any).repliedMsg;
576
+ let quotedContent = "";
577
+
578
+ // Extract quoted message content
579
+ if (repliedMsg.content?.richText && Array.isArray(repliedMsg.content.richText)) {
580
+ // richText format: array of {msgType, content} or {msgType, downloadCode}
581
+ const parts: string[] = [];
582
+
583
+ for (const item of repliedMsg.content.richText) {
584
+ if (item.msgType === "text" && item.content) {
585
+ parts.push(item.content);
586
+ } else if (item.msgType === "picture" && item.downloadCode) {
587
+ // Download the picture from quoted message
588
+ try {
589
+ const robotCode = account.robotCode || account.clientId;
590
+ const pictureResult = await downloadPicture(
591
+ account.clientId,
592
+ account.clientSecret,
593
+ robotCode,
594
+ item.downloadCode,
595
+ );
596
+
597
+ if (pictureResult.filePath) {
598
+ parts.push(`[图片: ${pictureResult.filePath}]`);
599
+ log?.info?.("[dingtalk] Downloaded picture from quoted message: " + pictureResult.filePath);
600
+ } else if (pictureResult.error) {
601
+ parts.push(`[图片下载失败: ${pictureResult.error}]`);
602
+ } else {
603
+ parts.push("[图片]");
604
+ }
605
+ } catch (err) {
606
+ parts.push(`[图片下载出错: ${err}]`);
607
+ log?.warn?.("[dingtalk] Error downloading picture from quoted message: " + err);
608
+ }
609
+ }
610
+ }
611
+
612
+ quotedContent = parts.join("");
613
+ } else if (repliedMsg.content?.text) {
614
+ quotedContent = repliedMsg.content.text;
615
+ } else if (typeof repliedMsg.content === "string") {
616
+ quotedContent = repliedMsg.content;
617
+ }
618
+
619
+ if (quotedContent) {
620
+ rawBody = `[引用回复: "${quotedContent.trim()}"]\n${rawBody}`;
621
+ log?.info?.("[dingtalk] Added quoted message: " + quotedContent.slice(0, 50));
622
+ } else {
623
+ log?.info?.("[dingtalk] Reply message found but no content extracted, repliedMsg: " + JSON.stringify(repliedMsg));
624
+ }
625
+ } catch (err) {
626
+ log?.info?.("[dingtalk] Failed to extract quoted message: " + err);
627
+ }
628
+ } else {
629
+ log?.info?.("[dingtalk] Message marked as reply but no repliedMsg field found");
630
+ }
631
+ }
632
+
633
+ // Handle @mentions: DingTalk removes @username from text.content
634
+ // Query user info for mentioned users (those with staffId)
635
+ if (msg.atUsers && msg.atUsers.length > 0) {
636
+ log?.info?.("[dingtalk] Message has @mentions: " + JSON.stringify(msg.atUsers));
637
+
638
+ // Filter users with staffId (exclude bots which don't have staffId)
639
+ const userIds = msg.atUsers
640
+ .filter(u => u.staffId)
641
+ .map(u => u.staffId as string)
642
+ .slice(0, 5); // Limit to 5 users to avoid too many API calls
643
+
644
+ if (userIds.length > 0 && account.clientId && account.clientSecret) {
645
+ try {
646
+ // Batch query user info (3s timeout — needs token fetch + API call)
647
+ const userInfoMap = await batchGetUserInfo(account.clientId, account.clientSecret, userIds, 3000);
648
+
649
+ if (userInfoMap.size > 0) {
650
+ // Build mention list: [@张三 @李四]
651
+ const mentions = Array.from(userInfoMap.values()).map(name => `@${name}`).join(" ");
652
+ rawBody = `[${mentions}] ${rawBody}`;
653
+ log?.info?.("[dingtalk] Added user mentions: " + mentions);
654
+ } else {
655
+ // Fallback if no user info retrieved
656
+ rawBody = `[有${msg.atUsers.length}人被@] ${rawBody}`;
657
+ log?.info?.("[dingtalk] User info fetch failed, using count fallback");
658
+ }
659
+ } catch (err) {
660
+ // Fallback on error
661
+ rawBody = `[有${msg.atUsers.length}人被@] ${rawBody}`;
662
+ log?.info?.("[dingtalk] Error fetching user info: " + err + ", using count fallback");
663
+ }
664
+ } else {
665
+ // No staffId or credentials - use count fallback
666
+ rawBody = `[有${msg.atUsers.length}人被@] ${rawBody}`;
667
+ log?.info?.("[dingtalk] No staffId or credentials, using count fallback");
668
+ }
669
+ }
670
+
671
+ const senderId = msg.senderStaffId || msg.senderId;
672
+ const senderName = msg.senderNick || "";
673
+ const conversationId = msg.conversationId;
674
+
675
+ log?.info?.("[dingtalk] " + (isDm ? "DM" : "Group") + " from " + senderName + ": " + rawBody.slice(0, 50));
676
+
677
+ // DM access control
678
+ if (isDm) {
679
+ const dmConfig = account.config.dm ?? {};
680
+ if (dmConfig.enabled === false) return;
681
+ const dmPolicy = dmConfig.policy ?? "pairing";
682
+ if (dmPolicy === "disabled") return;
683
+ if (dmPolicy !== "open") {
684
+ const allowFrom = (dmConfig.allowFrom ?? []).map(String);
685
+ if (!isSenderAllowed(senderId, allowFrom)) {
686
+ log?.info?.("[dingtalk] DM denied for " + senderId);
687
+ if (dmPolicy === "pairing" && msg.sessionWebhook) {
688
+ await sendViaSessionWebhook(
689
+ msg.sessionWebhook,
690
+ "Access denied. Your staffId: " + senderId + "\nAsk admin to add you.",
691
+ ).catch(() => {});
692
+ }
693
+ return;
694
+ }
695
+ }
696
+ }
697
+
698
+ // Group access control
699
+ if (isGroup) {
700
+ const groupPolicy = account.config.groupPolicy ?? "allowlist";
701
+ if (groupPolicy === "disabled") return;
702
+
703
+ // Check group whitelist
704
+ if (groupPolicy === "allowlist") {
705
+ const groupAllowlist = (account.config.groupAllowlist ?? []).map(String);
706
+ if (groupAllowlist.length > 0 && !isGroupAllowed(conversationId, groupAllowlist)) {
707
+ log?.info?.("[dingtalk] Group not in allowlist: " + conversationId);
708
+ return;
709
+ }
710
+ }
711
+
712
+ // Check @mention requirement
713
+ const requireMention = account.config.requireMention !== false;
714
+ if (requireMention && !msg.isInAtList) return;
715
+ }
716
+
717
+ const sessionKey = "dingtalk:" + account.accountId + ":" + (isDm ? "dm" : "group") + ":" + conversationId;
718
+
719
+ const replyTarget = {
720
+ sessionWebhook: msg.sessionWebhook,
721
+ sessionWebhookExpiry: msg.sessionWebhookExpiredTime,
722
+ conversationId,
723
+ senderId,
724
+ isDm,
725
+ account,
726
+ };
727
+
728
+ // Check if message aggregation is enabled
729
+ const aggregationEnabled = account.config.messageAggregation !== false;
730
+ const aggregationDelayMs = account.config.messageAggregationDelayMs ?? AGGREGATION_DELAY_MS;
731
+
732
+ if (aggregationEnabled) {
733
+ // Buffer this message for aggregation
734
+ await bufferMessageForAggregation({
735
+ msg, ctx, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId,
736
+ mediaPath, mediaType,
737
+ });
738
+ return; // Actual dispatch happens when timer fires
739
+ }
740
+
741
+ // No aggregation - dispatch immediately
742
+ await dispatchMessage({
743
+ ctx, msg, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId,
744
+ mediaPath, mediaType,
745
+ });
746
+ }
747
+
748
+ /**
749
+ * Buffer a message for aggregation with other messages from the same sender.
750
+ */
751
+ async function bufferMessageForAggregation(params: {
752
+ msg: DingTalkRobotMessage;
753
+ ctx: DingTalkMonitorContext;
754
+ rawBody: string;
755
+ replyTarget: any;
756
+ sessionKey: string;
757
+ isDm: boolean;
758
+ senderId: string;
759
+ senderName: string;
760
+ conversationId: string;
761
+ mediaPath?: string;
762
+ mediaType?: string;
763
+ }): Promise<void> {
764
+ const { msg, ctx, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId, mediaPath, mediaType } = params;
765
+ const { account, log } = ctx;
766
+ const bufferKey = getBufferKey(msg, account.accountId);
767
+ const aggregationDelayMs = account.config.messageAggregationDelayMs ?? AGGREGATION_DELAY_MS;
768
+
769
+ const existing = messageBuffer.get(bufferKey);
770
+
771
+ if (existing) {
772
+ // Add to existing buffer
773
+ existing.messages.push({ text: rawBody, timestamp: Date.now(), mediaPath, mediaType });
774
+ // Update to latest msg for reply target (use latest sessionWebhook)
775
+ existing.msg = msg;
776
+ existing.replyTarget = replyTarget;
777
+
778
+ // Reset timer
779
+ clearTimeout(existing.timer);
780
+ existing.timer = setTimeout(() => {
781
+ flushMessageBuffer(bufferKey);
782
+ }, aggregationDelayMs);
783
+
784
+ log?.info?.(`[dingtalk] Message buffered, total: ${existing.messages.length} messages`);
785
+ } else {
786
+ // Create new buffer entry
787
+ const newEntry: BufferedMessage = {
788
+ messages: [{ text: rawBody, timestamp: Date.now(), mediaPath, mediaType }],
789
+ timer: setTimeout(() => {
790
+ flushMessageBuffer(bufferKey);
791
+ }, aggregationDelayMs),
792
+ ctx,
793
+ msg,
794
+ replyTarget,
795
+ sessionKey,
796
+ isDm,
797
+ senderId,
798
+ senderName,
799
+ conversationId,
800
+ };
801
+ messageBuffer.set(bufferKey, newEntry);
802
+
803
+ log?.info?.(`[dingtalk] Message buffered (new), waiting ${aggregationDelayMs}ms for more...`);
804
+ }
805
+ }
806
+
807
+ /**
808
+ * Flush the message buffer and dispatch the combined message.
809
+ */
810
+ async function flushMessageBuffer(bufferKey: string): Promise<void> {
811
+ const entry = messageBuffer.get(bufferKey);
812
+ if (!entry) return;
813
+
814
+ messageBuffer.delete(bufferKey);
815
+
816
+ const { messages, ctx, msg, replyTarget, sessionKey, isDm, senderId, senderName, conversationId } = entry;
817
+ const { log } = ctx;
818
+
819
+ // Combine all messages
820
+ const combinedText = messages.map(m => m.text).join('\n');
821
+ // Use the last media if any
822
+ const lastWithMedia = [...messages].reverse().find(m => m.mediaPath);
823
+ const mediaPath = lastWithMedia?.mediaPath;
824
+ const mediaType = lastWithMedia?.mediaType;
825
+
826
+ log?.info?.(`[dingtalk] Flushing buffer: ${messages.length} message(s) combined into ${combinedText.length} chars`);
827
+
828
+ // Dispatch the combined message
829
+ await dispatchMessage({
830
+ ctx, msg, rawBody: combinedText, replyTarget, sessionKey, isDm, senderId, senderName, conversationId,
831
+ mediaPath, mediaType,
832
+ });
833
+ }
834
+
835
+ /**
836
+ * Dispatch a message to the agent (after aggregation or immediately).
837
+ */
838
+ async function dispatchMessage(params: {
839
+ ctx: DingTalkMonitorContext;
840
+ msg: DingTalkRobotMessage;
841
+ rawBody: string;
842
+ replyTarget: any;
843
+ sessionKey: string;
844
+ isDm: boolean;
845
+ senderId: string;
846
+ senderName: string;
847
+ conversationId: string;
848
+ mediaPath?: string;
849
+ mediaType?: string;
850
+ }): Promise<void> {
851
+ const { ctx, msg, rawBody, replyTarget, sessionKey, isDm, senderId, senderName, conversationId, mediaPath, mediaType } = params;
852
+ const { account, cfg, log, setStatus } = ctx;
853
+ const runtime = getDingTalkRuntime();
854
+ const isGroup = !isDm;
855
+
856
+ // Typing indicator cleanup function (will be called after dispatch completes)
857
+ let typingCleanup: (() => Promise<void>) | null = null;
858
+
859
+ // Send typing indicator (recallable) if enabled
860
+ // This replaces the old showThinking feature with a better UX - the indicator disappears when reply arrives
861
+ if (account.config.typingIndicator !== false && account.clientId && account.clientSecret) {
862
+ try {
863
+ const typingMessage = account.config.typingIndicatorMessage || '⏳ 思考中...';
864
+ const robotCode = account.robotCode || account.clientId;
865
+
866
+ const result = await sendTypingIndicator({
867
+ clientId: account.clientId,
868
+ clientSecret: account.clientSecret,
869
+ robotCode,
870
+ userId: isDm ? senderId : undefined,
871
+ conversationId: !isDm ? conversationId : undefined,
872
+ message: typingMessage,
873
+ });
874
+
875
+ if (result.error) {
876
+ log?.info?.('[dingtalk] Typing indicator failed: ' + result.error);
877
+ } else {
878
+ typingCleanup = result.cleanup;
879
+ log?.info?.('[dingtalk] Typing indicator sent (will be recalled on reply)');
880
+ }
881
+ } catch (err) {
882
+ log?.info?.('[dingtalk] Typing indicator error: ' + err);
883
+ }
884
+ }
885
+ // Legacy: Send thinking feedback (opt-in, non-recallable) - only if typingIndicator is explicitly disabled
886
+ else if (account.config.showThinking && replyTarget.sessionWebhook) {
887
+ try {
888
+ await sendViaSessionWebhook(replyTarget.sessionWebhook, '正在思考...');
889
+ log?.info?.('[dingtalk] Sent thinking indicator (legacy, non-recallable)');
890
+ } catch (_) {
891
+ // fire-and-forget, don't block processing
892
+ }
893
+ }
894
+
895
+ // Load actual config if cfg is a config manager
896
+ let actualCfg = cfg;
897
+ if (cfg && typeof cfg.loadConfig === "function") {
898
+ try {
899
+ actualCfg = await cfg.loadConfig();
900
+ } catch (err) {
901
+ log?.info?.("[dingtalk] Failed to load config: " + err);
902
+ }
903
+ }
904
+
905
+ // Check if the full Clawdbot Plugin SDK pipeline is available
906
+ const hasFullPipeline = !!(
907
+ runtime?.channel?.routing?.resolveAgentRoute &&
908
+ runtime?.channel?.reply?.finalizeInboundContext &&
909
+ runtime?.channel?.reply?.createReplyDispatcherWithTyping &&
910
+ runtime?.channel?.reply?.dispatchReplyFromConfig
911
+ );
912
+
913
+ // Track if we've already cleaned up the typing indicator
914
+ let typingCleaned = false;
915
+ const cleanupTyping = async () => {
916
+ if (typingCleanup && !typingCleaned) {
917
+ typingCleaned = true;
918
+ try {
919
+ await typingCleanup();
920
+ log?.info?.('[dingtalk] Typing indicator recalled');
921
+ } catch (err) {
922
+ log?.info?.('[dingtalk] Failed to recall typing indicator: ' + err);
923
+ }
924
+ }
925
+ };
926
+
927
+ try {
928
+ if (hasFullPipeline) {
929
+ // Full SDK pipeline: route → session → envelope → dispatch
930
+ await dispatchWithFullPipeline({
931
+ runtime, msg, rawBody, account, cfg: actualCfg, sessionKey, isDm,
932
+ senderId, senderName, conversationId, replyTarget,
933
+ mediaPath, mediaType, log, setStatus,
934
+ onFirstReply: cleanupTyping,
935
+ });
936
+ } else if (runtime?.channel?.reply?.dispatchReplyWithBufferedBlockDispatcher) {
937
+ // Fallback: existing buffered block dispatcher
938
+ const ctxPayload = {
939
+ Body: rawBody,
940
+ RawBody: rawBody,
941
+ CommandBody: rawBody,
942
+ From: "dingtalk:" + senderId,
943
+ To: isDm ? ("dingtalk:dm:" + senderId) : ("dingtalk:group:" + conversationId),
944
+ SessionKey: sessionKey,
945
+ AccountId: account.accountId,
946
+ ChatType: isDm ? "direct" : "group",
947
+ ConversationLabel: isDm ? senderName : (msg.conversationTitle ?? conversationId),
948
+ SenderName: senderName || undefined,
949
+ SenderId: senderId,
950
+ WasMentioned: isGroup ? msg.isInAtList : undefined,
951
+ Provider: "dingtalk",
952
+ Surface: "dingtalk",
953
+ MessageSid: msg.msgId,
954
+ OriginatingChannel: "dingtalk",
955
+ OriginatingTo: "dingtalk:" + conversationId,
956
+ MediaPath: mediaPath,
957
+ MediaType: mediaType,
958
+ MediaUrl: mediaPath,
959
+ };
960
+
961
+ // Fire-and-forget: don't await to avoid blocking SDK callback during long agent runs
962
+ runtime.channel.reply.dispatchReplyWithBufferedBlockDispatcher({
963
+ ctx: ctxPayload,
964
+ cfg: actualCfg,
965
+ dispatcherOptions: {
966
+ deliver: async (payload: any) => {
967
+ // Recall typing indicator on first delivery
968
+ await cleanupTyping();
969
+
970
+ log?.info?.("[dingtalk] Deliver payload keys: " + Object.keys(payload || {}).join(',') + " text?=" + (typeof payload?.text) + " markdown?=" + (typeof payload?.markdown));
971
+ const textToSend = resolveDeliverText(payload, log);
972
+ if (textToSend) {
973
+ await deliverReply(replyTarget, textToSend, log);
974
+ setStatus?.({ lastOutboundAt: Date.now() });
975
+ } else {
976
+ log?.info?.("[dingtalk] Deliver: no text resolved from payload");
977
+ }
978
+ },
979
+ onError: (err: any) => {
980
+ // Also cleanup on error
981
+ cleanupTyping().catch(() => {});
982
+ log?.info?.("[dingtalk] Reply error: " + err);
983
+ },
984
+ },
985
+ }).catch((err) => {
986
+ cleanupTyping().catch(() => {});
987
+ log?.info?.("[dingtalk] Dispatch failed: " + err);
988
+ });
989
+
990
+ // Record activity
991
+ runtime.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
992
+ } else {
993
+ log?.info?.("[dingtalk] Runtime dispatch not available");
994
+ await cleanupTyping();
995
+ }
996
+ } catch (err) {
997
+ await cleanupTyping();
998
+ log?.info?.("[dingtalk] Dispatch error: " + err);
999
+ }
1000
+ }
1001
+
1002
+ /**
1003
+ * Dispatch using the full Clawdbot Plugin SDK pipeline.
1004
+ * Uses resolveAgentRoute → session → envelope → finalizeContext → dispatch.
1005
+ */
1006
+ async function dispatchWithFullPipeline(params: {
1007
+ runtime: any;
1008
+ msg: DingTalkRobotMessage;
1009
+ rawBody: string;
1010
+ account: ResolvedDingTalkAccount;
1011
+ cfg: any;
1012
+ sessionKey: string;
1013
+ isDm: boolean;
1014
+ senderId: string;
1015
+ senderName: string;
1016
+ conversationId: string;
1017
+ replyTarget: any;
1018
+ mediaPath?: string;
1019
+ mediaType?: string;
1020
+ log?: any;
1021
+ setStatus?: (update: Record<string, unknown>) => void;
1022
+ onFirstReply?: () => Promise<void>;
1023
+ }): Promise<void> {
1024
+ const { runtime: rt, msg, rawBody, account, cfg, isDm,
1025
+ senderId, senderName, conversationId, replyTarget,
1026
+ log, setStatus, onFirstReply } = params;
1027
+
1028
+ let firstReplyFired = false;
1029
+
1030
+ // 1. Resolve agent route
1031
+ const route = rt.channel.routing.resolveAgentRoute({
1032
+ cfg,
1033
+ channel: 'dingtalk',
1034
+ accountId: account.accountId,
1035
+ peer: { kind: isDm ? 'dm' : 'group', id: isDm ? senderId : conversationId },
1036
+ });
1037
+
1038
+ // 2. Resolve store path
1039
+ const storePath = rt.channel.session?.resolveStorePath?.(cfg?.session?.store, { agentId: route.agentId });
1040
+
1041
+ // 3. Get envelope format options
1042
+ const envelopeOptions = rt.channel.reply?.resolveEnvelopeFormatOptions?.(cfg) ?? {};
1043
+
1044
+ // 4. Read previous timestamp for session continuity
1045
+ const previousTimestamp = rt.channel.session?.readSessionUpdatedAt?.({ storePath, sessionKey: route.sessionKey });
1046
+
1047
+ // 5. Format inbound envelope
1048
+ const fromLabel = isDm ? `${senderName} (${senderId})` : `${msg.conversationTitle || conversationId} - ${senderName}`;
1049
+ const body = rt.channel.reply.formatInboundEnvelope?.({
1050
+ channel: 'DingTalk', from: fromLabel, timestamp: msg.createAt, body: rawBody,
1051
+ chatType: isDm ? 'direct' : 'group', sender: { name: senderName, id: senderId },
1052
+ previousTimestamp, envelope: envelopeOptions,
1053
+ }) ?? rawBody;
1054
+
1055
+ // 6. Finalize inbound context (includes media info)
1056
+ const to = isDm ? `dingtalk:${senderId}` : `dingtalk:group:${conversationId}`;
1057
+ const ctx = rt.channel.reply.finalizeInboundContext({
1058
+ Body: body, RawBody: rawBody, CommandBody: rawBody, From: to, To: to,
1059
+ SessionKey: route.sessionKey, AccountId: account.accountId,
1060
+ ChatType: isDm ? 'direct' : 'group',
1061
+ ConversationLabel: fromLabel,
1062
+ GroupSubject: isDm ? undefined : (msg.conversationTitle || conversationId),
1063
+ SenderName: senderName, SenderId: senderId,
1064
+ Provider: 'dingtalk', Surface: 'dingtalk',
1065
+ MessageSid: msg.msgId, Timestamp: msg.createAt,
1066
+ MediaPath: params.mediaPath, MediaType: params.mediaType, MediaUrl: params.mediaPath,
1067
+ CommandAuthorized: true,
1068
+ OriginatingChannel: 'dingtalk', OriginatingTo: to,
1069
+ });
1070
+
1071
+ // 7. Record inbound session
1072
+ if (rt.channel.session?.recordInboundSession) {
1073
+ await rt.channel.session.recordInboundSession({
1074
+ storePath, sessionKey: ctx.SessionKey || route.sessionKey, ctx,
1075
+ updateLastRoute: isDm ? { sessionKey: route.mainSessionKey, channel: 'dingtalk', to: senderId, accountId: account.accountId } : undefined,
1076
+ });
1077
+ }
1078
+
1079
+ // 8. Create typing-aware dispatcher
1080
+ const { dispatcher, replyOptions, markDispatchIdle } = rt.channel.reply.createReplyDispatcherWithTyping({
1081
+ responsePrefix: '',
1082
+ deliver: async (payload: any) => {
1083
+ // Recall typing indicator on first delivery
1084
+ if (!firstReplyFired && onFirstReply) {
1085
+ firstReplyFired = true;
1086
+ await onFirstReply().catch((err) => {
1087
+ log?.info?.("[dingtalk] onFirstReply error: " + err);
1088
+ });
1089
+ }
1090
+
1091
+ try {
1092
+ log?.info?.("[dingtalk] Pipeline deliver payload keys: " + Object.keys(payload || {}).join(',') + " text?=" + (typeof payload?.text) + " markdown?=" + (typeof payload?.markdown));
1093
+ const textToSend = resolveDeliverText(payload, log);
1094
+ if (!textToSend) {
1095
+ log?.info?.("[dingtalk] Pipeline deliver: no text resolved from payload");
1096
+ return { ok: true };
1097
+ }
1098
+ await deliverReply(replyTarget, textToSend, log);
1099
+ setStatus?.({ lastOutboundAt: Date.now() });
1100
+ return { ok: true };
1101
+ } catch (err: any) {
1102
+ log?.info?.("[dingtalk] Reply delivery failed: " + err.message);
1103
+ return { ok: false, error: err.message };
1104
+ }
1105
+ },
1106
+ });
1107
+
1108
+ // 9. Dispatch reply from config
1109
+ try {
1110
+ await rt.channel.reply.dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyOptions });
1111
+ } finally {
1112
+ markDispatchIdle();
1113
+ // Ensure typing indicator is cleaned up even if no reply was sent
1114
+ if (!firstReplyFired && onFirstReply) {
1115
+ await onFirstReply().catch(() => {});
1116
+ }
1117
+ }
1118
+
1119
+ // 10. Record activity
1120
+ rt.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
1121
+ }
1122
+
1123
+ /**
1124
+ * Extract text + media URL from a deliver payload.
1125
+ * The Clawdbot platform may send media URLs in separate fields (e.g. from the `message` tool).
1126
+ * We merge them into the text as markdown image syntax so DingTalk can render them.
1127
+ */
1128
+ function resolveDeliverText(payload: any, log?: any): string | undefined {
1129
+ // payload.markdown may be a boolean flag (not the actual text), so check type
1130
+ let text = (typeof payload.markdown === 'string' && payload.markdown) || payload.text;
1131
+
1132
+ // Guard: ensure text is a string (platform might send unexpected types)
1133
+ if (text != null && typeof text !== 'string') {
1134
+ log?.info?.("[dingtalk] Deliver payload has non-string text type=" + typeof text + ", payload keys=" + Object.keys(payload).join(','));
1135
+ text = String(text);
1136
+ }
1137
+
1138
+ const mediaUrl = payload.mediaUrl || payload.media || payload.imageUrl || payload.image;
1139
+
1140
+ if (mediaUrl && typeof mediaUrl === 'string' && mediaUrl.startsWith('http')) {
1141
+ log?.info?.("[dingtalk] Deliver payload includes media URL: " + mediaUrl);
1142
+ const imageMarkdown = `![image](${mediaUrl})`;
1143
+ text = text ? `${text}\n\n${imageMarkdown}` : imageMarkdown;
1144
+ }
1145
+
1146
+ return text || undefined;
1147
+ }
1148
+
1149
+ async function deliverReply(target: any, text: string, log?: any): Promise<void> {
1150
+ const now = Date.now();
1151
+ const chunkLimit = target.account.config.textChunkLimit ?? 2000;
1152
+ const messageFormat = target.account.config.messageFormat ?? "text";
1153
+ const longTextMode = target.account.config.longTextMode ?? "chunk";
1154
+ const longTextThreshold = target.account.config.longTextThreshold ?? 4000;
1155
+
1156
+ // Check if we should send as file instead of text
1157
+ if (longTextMode === 'file' && text.length > longTextThreshold) {
1158
+ log?.info?.("[dingtalk] Text exceeds threshold (" + text.length + " > " + longTextThreshold + "), sending as file");
1159
+
1160
+ // Only attempt file send if we have credentials (REST API required)
1161
+ if (target.account.clientId && target.account.clientSecret) {
1162
+ const fileSent = await sendTextAsFile(target, text, log);
1163
+ if (fileSent) {
1164
+ return; // Successfully sent as file
1165
+ }
1166
+ log?.info?.("[dingtalk] File send failed, falling back to chunked text");
1167
+ } else {
1168
+ log?.info?.("[dingtalk] No credentials for file send, falling back to chunked text");
1169
+ }
1170
+ }
1171
+
1172
+ // Determine if this message should use markdown format
1173
+ let isMarkdown: boolean;
1174
+ if (messageFormat === 'auto') {
1175
+ isMarkdown = detectMarkdownContent(text);
1176
+ log?.info?.("[dingtalk] Auto-detected format: " + (isMarkdown ? "markdown" : "text"));
1177
+ } else {
1178
+ // Support both "markdown" and "richtext" (they're equivalent for DingTalk)
1179
+ isMarkdown = messageFormat === "markdown" || messageFormat === "richtext";
1180
+ }
1181
+
1182
+ // Convert markdown tables to text format (DingTalk doesn't support tables)
1183
+ let processedText = text;
1184
+ if (isMarkdown) {
1185
+ processedText = convertMarkdownTables(text);
1186
+ // Convert bare image URLs to markdown syntax for proper display
1187
+ processedText = convertImageUrlsToMarkdown(processedText);
1188
+ }
1189
+
1190
+ const chunks: string[] = [];
1191
+ if (processedText.length <= chunkLimit) {
1192
+ chunks.push(processedText);
1193
+ } else {
1194
+ for (let i = 0; i < processedText.length; i += chunkLimit) {
1195
+ chunks.push(processedText.slice(i, i + chunkLimit));
1196
+ }
1197
+ }
1198
+
1199
+ for (const chunk of chunks) {
1200
+ let webhookSuccess = false;
1201
+ const maxRetries = 2;
1202
+
1203
+ // Try sessionWebhook with retry
1204
+ if (target.sessionWebhook && now < target.sessionWebhookExpiry) {
1205
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1206
+ try {
1207
+ log?.info?.("[dingtalk] Using sessionWebhook (attempt " + attempt + "/" + maxRetries + "), format=" + messageFormat);
1208
+ log?.info?.("[dingtalk] Sending text (" + chunk.length + " chars): " + chunk.substring(0, 200));
1209
+ let sendResult: { ok: boolean; errcode?: number; errmsg?: string };
1210
+ if (isMarkdown) {
1211
+ sendResult = await sendMarkdownViaSessionWebhook(target.sessionWebhook, "Reply", chunk);
1212
+ } else {
1213
+ sendResult = await sendViaSessionWebhook(target.sessionWebhook, chunk);
1214
+ }
1215
+ if (!sendResult.ok) {
1216
+ throw new Error(`SessionWebhook rejected: errcode=${sendResult.errcode}, errmsg=${sendResult.errmsg}`);
1217
+ }
1218
+ log?.info?.("[dingtalk] SessionWebhook send OK (errcode=" + (sendResult.errcode ?? 0) + ")");
1219
+ webhookSuccess = true;
1220
+ break;
1221
+ } catch (err) {
1222
+ log?.info?.("[dingtalk] SessionWebhook attempt " + attempt + " failed: " + (err instanceof Error ? err.message : String(err)));
1223
+ if (attempt < maxRetries) {
1224
+ // Wait 1 second before retry
1225
+ await new Promise(resolve => setTimeout(resolve, 1000));
1226
+ }
1227
+ }
1228
+ }
1229
+ }
1230
+
1231
+ // Fallback to REST API if webhook failed after all retries
1232
+ if (!webhookSuccess && target.account.clientId && target.account.clientSecret) {
1233
+ try {
1234
+ log?.info?.("[dingtalk] SessionWebhook failed after " + maxRetries + " attempts, using REST API fallback");
1235
+ // REST API only supports text format
1236
+ const textChunk = messageFormat === "markdown" ? chunk : chunk;
1237
+ await sendDingTalkRestMessage({
1238
+ clientId: target.account.clientId,
1239
+ clientSecret: target.account.clientSecret,
1240
+ robotCode: target.account.robotCode || target.account.clientId,
1241
+ userId: target.isDm ? target.senderId : undefined,
1242
+ conversationId: !target.isDm ? target.conversationId : undefined,
1243
+ text: textChunk,
1244
+ });
1245
+ log?.info?.("[dingtalk] REST API send OK");
1246
+ } catch (err) {
1247
+ log?.info?.("[dingtalk] REST API also failed: " + (err instanceof Error ? err.stack : JSON.stringify(err)));
1248
+ }
1249
+ } else if (!webhookSuccess) {
1250
+ log?.info?.("[dingtalk] No delivery method available!");
1251
+ }
1252
+ }
1253
+ }
1254
+
1255
+ /**
1256
+ * Helper function to send text as a markdown file
1257
+ * Used when longTextMode is 'file' and text exceeds threshold
1258
+ */
1259
+ async function sendTextAsFile(target: any, text: string, log?: any): Promise<boolean> {
1260
+ try {
1261
+ // Generate markdown file with UTF-8 BOM for proper Chinese display
1262
+ const { buffer, fileName } = textToMarkdownFile(text, "AI Response");
1263
+ log?.info?.("[dingtalk] Converting text to file: " + fileName + " (" + buffer.length + " bytes)");
1264
+
1265
+ // Upload the file
1266
+ const uploadResult = await uploadMediaFile({
1267
+ clientId: target.account.clientId,
1268
+ clientSecret: target.account.clientSecret,
1269
+ robotCode: target.account.robotCode || target.account.clientId,
1270
+ fileBuffer: buffer,
1271
+ fileName: fileName,
1272
+ fileType: 'file',
1273
+ });
1274
+
1275
+ if (!uploadResult.mediaId) {
1276
+ log?.info?.("[dingtalk] File upload failed: " + (uploadResult.error || "no mediaId returned"));
1277
+ return false;
1278
+ }
1279
+
1280
+ log?.info?.("[dingtalk] File uploaded, mediaId=" + uploadResult.mediaId);
1281
+
1282
+ // Send the file message
1283
+ const sendResult = await sendFileMessage({
1284
+ clientId: target.account.clientId,
1285
+ clientSecret: target.account.clientSecret,
1286
+ robotCode: target.account.robotCode || target.account.clientId,
1287
+ userId: target.isDm ? target.senderId : undefined,
1288
+ conversationId: !target.isDm ? target.conversationId : undefined,
1289
+ mediaId: uploadResult.mediaId,
1290
+ fileName: fileName,
1291
+ });
1292
+
1293
+ if (!sendResult.ok) {
1294
+ log?.info?.("[dingtalk] File send failed: " + (sendResult.error || "unknown error"));
1295
+ return false;
1296
+ }
1297
+
1298
+ log?.info?.("[dingtalk] File sent successfully");
1299
+ return true;
1300
+ } catch (err) {
1301
+ log?.info?.("[dingtalk] sendTextAsFile error: " + (err instanceof Error ? err.message : String(err)));
1302
+ return false;
1303
+ }
1304
+ }
1305
+
1306
+ /**
1307
+ * Convert bare image URLs to markdown image syntax
1308
+ * Detects patterns like "图1: https://..." or "https://...png" and converts to ![](url)
1309
+ */
1310
+ function convertImageUrlsToMarkdown(text: string): string {
1311
+ // Pattern 1: "图X: https://..." format (common Agent output)
1312
+ text = text.replace(/图(\d+):\s*(https?:\/\/[^\s]+\.(png|jpg|jpeg|gif|webp)(\?[^\s]*)?)/gi, (match, num, url) => {
1313
+ return `![图${num}](${url})`;
1314
+ });
1315
+
1316
+ // Pattern 2: Bare image URLs on their own line or preceded by space
1317
+ // But avoid converting URLs that are already in markdown syntax
1318
+ text = text.replace(/(?<!\]\()(?:^|\s)(https?:\/\/[^\s]+\.(png|jpg|jpeg|gif|webp)(\?[^\s]*)?)/gim, (match, url) => {
1319
+ // Check if this URL is already part of markdown image syntax
1320
+ if (match.startsWith('](')) return match;
1321
+ const leadingSpace = match.match(/^\s/);
1322
+ return (leadingSpace ? leadingSpace[0] : '') + `![image](${url.trim()})`;
1323
+ });
1324
+
1325
+ return text;
1326
+ }
1327
+
1328
+ /**
1329
+ * Convert markdown tables to plain text format
1330
+ * DingTalk doesn't support markdown tables, so we convert them to readable text
1331
+ */
1332
+ function convertMarkdownTables(text: string): string {
1333
+ // Match markdown tables (| col1 | col2 |\n|------|------|\n| val1 | val2 |)
1334
+ const tableRegex = /(\|.+\|\n)+/g;
1335
+
1336
+ return text.replace(tableRegex, (match) => {
1337
+ const lines = match.trim().split('\n');
1338
+ if (lines.length < 2) return match;
1339
+
1340
+ // Check if it's a valid table (has separator line)
1341
+ const hasSeparator = lines.some(line => /^[\s|:-]+$/.test(line.replace(/\|/g, '')));
1342
+ if (!hasSeparator) return match;
1343
+
1344
+ // Convert to plain text format
1345
+ let result = '\n```\n';
1346
+ for (const line of lines) {
1347
+ // Skip separator lines (|---|---|)
1348
+ if (/^[\s|:-]+$/.test(line.replace(/\|/g, ''))) continue;
1349
+
1350
+ const cells = line.split('|').map(c => c.trim()).filter(c => c);
1351
+ result += cells.join(' | ') + '\n';
1352
+ }
1353
+ result += '```\n';
1354
+ return result;
1355
+ });
1356
+ }
1357
+
1358
+ /**
1359
+ * Detect if text contains markdown features worth rendering as markdown.
1360
+ * Checks for headers, bold, code blocks, lists, blockquotes, links, and images.
1361
+ */
1362
+ function detectMarkdownContent(text: string): boolean {
1363
+ return /^#{1,6}\s|^\s*[-*+]\s|^\s*\d+\.\s|^\s*>|```|\*\*[^*]+\*\*|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)/m.test(text);
1364
+ }
1365
+
1366
+ function isSenderAllowed(senderId: string, allowFrom: string[]): boolean {
1367
+ if (allowFrom.includes("*")) return true;
1368
+ const normalized = senderId.trim().toLowerCase();
1369
+ return allowFrom.some((entry) => {
1370
+ const e = String(entry).trim().toLowerCase();
1371
+ return e === normalized;
1372
+ });
1373
+ }
1374
+
1375
+ function isGroupAllowed(conversationId: string, allowlist: string[]): boolean {
1376
+ if (allowlist.includes("*")) return true;
1377
+ const normalized = conversationId.trim().toLowerCase();
1378
+ return allowlist.some((entry) => {
1379
+ const e = String(entry).trim().toLowerCase();
1380
+ return e === normalized;
1381
+ });
1382
+ }