@yaoyuanchao/dingtalk 1.2.1 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yaoyuanchao/dingtalk",
3
- "version": "1.2.1",
3
+ "version": "1.3.1",
4
4
  "type": "module",
5
5
  "description": "DingTalk channel plugin for Clawdbot with Stream Mode support",
6
6
  "license": "MIT",
package/src/api.ts CHANGED
@@ -7,8 +7,8 @@ import os from "node:os";
7
7
  const DINGTALK_API_BASE = "https://api.dingtalk.com/v1.0";
8
8
  const DINGTALK_OAPI_BASE = "https://oapi.dingtalk.com";
9
9
 
10
- /** Temp directory for downloaded pictures */
11
- const TEMP_DIR = path.join(os.tmpdir(), "dingtalk-pictures");
10
+ /** Temp directory for downloaded media files */
11
+ const TEMP_DIR = path.join(os.tmpdir(), "dingtalk-media");
12
12
 
13
13
  /** Cache access tokens per clientId */
14
14
  const tokenCache = new Map<string, { token: string; expiresAt: number }>();
@@ -359,8 +359,78 @@ export async function downloadPicture(
359
359
  }
360
360
  }
361
361
 
362
- /** Clean up old picture files (older than 1 hour) */
363
- export function cleanupOldPictures(): void {
362
+ /** Extension mapping for media types */
363
+ const MEDIA_EXTENSIONS: Record<string, string> = {
364
+ 'image/jpeg': '.jpg',
365
+ 'image/png': '.png',
366
+ 'image/gif': '.gif',
367
+ 'image/webp': '.webp',
368
+ 'audio/amr': '.amr',
369
+ 'audio/mpeg': '.mp3',
370
+ 'audio/mp4': '.m4a',
371
+ 'video/mp4': '.mp4',
372
+ 'application/pdf': '.pdf',
373
+ 'application/octet-stream': '.bin',
374
+ };
375
+
376
+ /** Download media file (picture/audio/video/file) from DingTalk */
377
+ export async function downloadMediaFile(
378
+ clientId: string,
379
+ clientSecret: string,
380
+ robotCode: string,
381
+ downloadCode: string,
382
+ mediaType?: string,
383
+ ): Promise<{ filePath?: string; mimeType?: string; error?: string }> {
384
+ try {
385
+ const token = await getDingTalkAccessToken(clientId, clientSecret);
386
+
387
+ const url = `${DINGTALK_API_BASE}/robot/messageFiles/download`;
388
+ const headers = { "x-acs-dingtalk-access-token": token };
389
+ const body = { downloadCode, robotCode };
390
+
391
+ const response = await jsonPost(url, body, headers);
392
+
393
+ if (response.errcode && response.errcode !== 0) {
394
+ console.warn(`[dingtalk] Media download failed: ${response.errmsg}`);
395
+ return { error: response.errmsg || "Download failed" };
396
+ }
397
+
398
+ if (response.downloadUrl) {
399
+ const mediaBuffer = await httpGetBuffer(response.downloadUrl);
400
+
401
+ if (!fs.existsSync(TEMP_DIR)) {
402
+ fs.mkdirSync(TEMP_DIR, { recursive: true });
403
+ }
404
+
405
+ // Determine file extension from content type or media type hint
406
+ const contentType = response.contentType || '';
407
+ const ext = MEDIA_EXTENSIONS[contentType]
408
+ || (mediaType === 'audio' ? '.amr' : undefined)
409
+ || (mediaType === 'video' ? '.mp4' : undefined)
410
+ || (mediaType === 'image' ? '.jpg' : undefined)
411
+ || '.bin';
412
+
413
+ const timestamp = Date.now();
414
+ const prefix = mediaType || 'media';
415
+ const filename = `${prefix}_${timestamp}${ext}`;
416
+ const filePath = path.join(TEMP_DIR, filename);
417
+
418
+ fs.writeFileSync(filePath, mediaBuffer);
419
+
420
+ console.log(`[dingtalk] Media downloaded: ${filePath} (${mediaBuffer.length} bytes, type=${contentType || mediaType || 'unknown'})`);
421
+
422
+ return { filePath, mimeType: contentType || undefined };
423
+ }
424
+
425
+ return { error: "No download URL in response" };
426
+ } catch (err) {
427
+ console.warn(`[dingtalk] Error downloading media:`, err);
428
+ return { error: String(err) };
429
+ }
430
+ }
431
+
432
+ /** Clean up old media files (older than 1 hour) */
433
+ export function cleanupOldMedia(): void {
364
434
  try {
365
435
  if (!fs.existsSync(TEMP_DIR)) return;
366
436
 
@@ -373,10 +443,13 @@ export function cleanupOldPictures(): void {
373
443
 
374
444
  if (stats.mtimeMs < oneHourAgo) {
375
445
  fs.unlinkSync(filePath);
376
- console.log(`[dingtalk] Cleaned up old picture: ${filePath}`);
446
+ console.log(`[dingtalk] Cleaned up old media: ${filePath}`);
377
447
  }
378
448
  }
379
449
  } catch (err) {
380
- console.warn(`[dingtalk] Error cleaning up pictures:`, err);
450
+ console.warn(`[dingtalk] Error cleaning up media:`, err);
381
451
  }
382
452
  }
453
+
454
+ /** @deprecated Use cleanupOldMedia() instead */
455
+ export const cleanupOldPictures = cleanupOldMedia;
package/src/channel.ts CHANGED
@@ -97,9 +97,15 @@ export const dingtalkPlugin = {
97
97
  messageFormat: {
98
98
  type: 'string',
99
99
  title: 'Message Format',
100
- enum: ['text', 'markdown'],
100
+ enum: ['text', 'markdown', 'auto'],
101
101
  default: 'text',
102
- description: 'text=plain text, markdown=DingTalk markdown (limited: no tables, use text for tables)',
102
+ description: 'text=plain text, markdown=always markdown, auto=detect markdown features in response',
103
+ },
104
+ showThinking: {
105
+ type: 'boolean',
106
+ title: 'Show Thinking Indicator',
107
+ default: false,
108
+ description: 'Send "正在思考..." feedback before AI processing begins',
103
109
  },
104
110
  },
105
111
  required: ['clientId', 'clientSecret'],
@@ -235,6 +241,16 @@ export const dingtalkPlugin = {
235
241
 
236
242
  log.info?.('[dingtalk] Starting Stream connection...');
237
243
 
244
+ // Record start activity
245
+ (runtime as any).channel?.activity?.record?.('dingtalk', account.accountId, 'start');
246
+
247
+ // Record stop activity on abort
248
+ if (signal) {
249
+ signal.addEventListener('abort', () => {
250
+ (runtime as any).channel?.activity?.record?.('dingtalk', account.accountId, 'stop');
251
+ }, { once: true });
252
+ }
253
+
238
254
  try {
239
255
  await startDingTalkMonitor({
240
256
  account,
@@ -9,8 +9,8 @@ export const groupPolicySchema = z.enum(['disabled', 'allowlist', 'open'], {
9
9
  description: 'Group chat access control policy',
10
10
  });
11
11
 
12
- export const messageFormatSchema = z.enum(['text', 'markdown', 'richtext'], {
13
- description: 'Message format for bot responses (richtext is an alias for markdown)',
12
+ export const messageFormatSchema = z.enum(['text', 'markdown', 'richtext', 'auto'], {
13
+ description: 'Message format for bot responses (richtext is an alias for markdown, auto detects markdown features)',
14
14
  });
15
15
 
16
16
  // DingTalk 配置 Schema
@@ -60,9 +60,14 @@ export const dingTalkConfigSchema = z.object({
60
60
  'Message format:\n' +
61
61
  ' - text: Plain text (recommended, supports tables)\n' +
62
62
  ' - markdown: DingTalk markdown (limited support, no tables)\n' +
63
- ' - richtext: Alias for markdown (deprecated, use markdown instead)'
63
+ ' - richtext: Alias for markdown (deprecated, use markdown instead)\n' +
64
+ ' - auto: Auto-detect markdown features in response'
64
65
  ),
65
66
 
67
+ // 思考反馈
68
+ showThinking: z.boolean().default(false)
69
+ .describe('Send "正在思考..." feedback before AI responds'),
70
+
66
71
  // 高级配置(可选)
67
72
  textChunkLimit: z.number().int().positive().default(2000).optional()
68
73
  .describe('Text chunk size limit for long messages'),
package/src/monitor.ts CHANGED
@@ -1,5 +1,5 @@
1
- import type { DingTalkRobotMessage, ResolvedDingTalkAccount } from "./types.js";
2
- import { sendViaSessionWebhook, sendMarkdownViaSessionWebhook, sendDingTalkRestMessage, batchGetUserInfo, downloadPicture, cleanupOldPictures } from "./api.js";
1
+ import type { DingTalkRobotMessage, ResolvedDingTalkAccount, ExtractedMessage } from "./types.js";
2
+ import { sendViaSessionWebhook, sendMarkdownViaSessionWebhook, sendDingTalkRestMessage, batchGetUserInfo, downloadPicture, downloadMediaFile, cleanupOldMedia } from "./api.js";
3
3
  import { getDingTalkRuntime } from "./runtime.js";
4
4
 
5
5
  export interface DingTalkMonitorContext {
@@ -18,11 +18,11 @@ export async function startDingTalkMonitor(ctx: DingTalkMonitorContext): Promise
18
18
  }
19
19
 
20
20
  // Clean up old pictures on startup
21
- cleanupOldPictures();
21
+ cleanupOldMedia();
22
22
 
23
23
  // Schedule periodic cleanup every hour
24
24
  const cleanupInterval = setInterval(() => {
25
- cleanupOldPictures();
25
+ cleanupOldMedia();
26
26
  }, 60 * 60 * 1000); // 1 hour
27
27
 
28
28
  // Clean up on abort (only if abortSignal is provided)
@@ -80,32 +80,100 @@ export async function startDingTalkMonitor(ctx: DingTalkMonitorContext): Promise
80
80
  setStatus?.({ running: true, lastStartAt: Date.now() });
81
81
  }
82
82
 
83
- async function processInboundMessage(
83
+ /**
84
+ * Extract message content from DingTalk message into a structured format.
85
+ * Handles: text, richText, picture, audio, video, file.
86
+ */
87
+ async function extractMessageContent(
84
88
  msg: DingTalkRobotMessage,
85
- ctx: DingTalkMonitorContext,
86
- ): Promise<void> {
87
- const { account, cfg, log, setStatus } = ctx;
88
- const runtime = getDingTalkRuntime();
89
+ account: ResolvedDingTalkAccount,
90
+ log?: any,
91
+ ): Promise<ExtractedMessage> {
92
+ const msgtype = msg.msgtype || 'text';
93
+ const content = msg.content;
94
+
95
+ switch (msgtype) {
96
+ case 'text': {
97
+ return {
98
+ text: msg.text?.content?.trim() ?? '',
99
+ messageType: 'text',
100
+ };
101
+ }
89
102
 
90
- const isDm = msg.conversationType === "1";
91
- const isGroup = msg.conversationType === "2";
103
+ case 'richText': {
104
+ const result = await extractRichTextContent(msg, account, log);
105
+ return { ...result, messageType: 'richText' };
106
+ }
92
107
 
93
- // Debug: log full message structure for debugging
94
- if (msg.msgtype === 'richText' || msg.picture || (msg.atUsers && msg.atUsers.length > 0)) {
95
- log?.info?.("[dingtalk-debug] Full message structure:");
96
- log?.info?.("[dingtalk-debug] msgtype: " + msg.msgtype);
97
- log?.info?.("[dingtalk-debug] text: " + JSON.stringify(msg.text));
98
- log?.info?.("[dingtalk-debug] richText: " + JSON.stringify(msg.richText));
99
- log?.info?.("[dingtalk-debug] picture: " + JSON.stringify(msg.picture));
100
- log?.info?.("[dingtalk-debug] atUsers: " + JSON.stringify(msg.atUsers));
101
- log?.info?.("[dingtalk-debug] RAW MESSAGE: " + JSON.stringify(msg).substring(0, 500));
102
- }
108
+ case 'picture': {
109
+ return extractPictureContent(msg, log);
110
+ }
111
+
112
+ case 'audio': {
113
+ // DingTalk provides speech recognition result in content.recognition
114
+ const recognition = content?.recognition;
115
+ const downloadCode = content?.downloadCode;
116
+ log?.info?.("[dingtalk] Audio message - recognition: " + (recognition || '(none)'));
117
+ return {
118
+ text: recognition || '[语音消息]',
119
+ mediaDownloadCode: downloadCode,
120
+ mediaType: 'audio',
121
+ messageType: 'audio',
122
+ };
123
+ }
124
+
125
+ case 'video': {
126
+ const downloadCode = content?.downloadCode;
127
+ log?.info?.("[dingtalk] Video message - downloadCode: " + (downloadCode || '(none)'));
128
+ return {
129
+ text: '[视频]',
130
+ mediaDownloadCode: downloadCode,
131
+ mediaType: 'video',
132
+ messageType: 'video',
133
+ };
134
+ }
135
+
136
+ case 'file': {
137
+ const downloadCode = content?.downloadCode;
138
+ const fileName = content?.fileName || '未知文件';
139
+ log?.info?.("[dingtalk] File message - fileName: " + fileName);
140
+ return {
141
+ text: `[文件: ${fileName}]`,
142
+ mediaDownloadCode: downloadCode,
143
+ mediaType: 'file',
144
+ mediaFileName: fileName,
145
+ messageType: 'file',
146
+ };
147
+ }
103
148
 
104
- // Extract message content from text or richText
105
- let rawBody = msg.text?.content?.trim() ?? "";
149
+ default: {
150
+ // Fallback: try text.content for unknown message types
151
+ const text = msg.text?.content?.trim() || '';
152
+ if (!text) {
153
+ log?.info?.("[dingtalk] Unknown msgtype: " + msgtype + ", no text content found");
154
+ }
155
+ return {
156
+ text: text || `[${msgtype}消息]`,
157
+ messageType: msgtype,
158
+ };
159
+ }
160
+ }
161
+ }
106
162
 
107
- // If text is empty, try to extract from richText
108
- if (!rawBody && msg.richText) {
163
+ /**
164
+ * Extract content from richText messages.
165
+ * Preserves all existing edge-case handling for DingTalk's varied richText formats.
166
+ */
167
+ async function extractRichTextContent(
168
+ msg: DingTalkRobotMessage,
169
+ account: ResolvedDingTalkAccount,
170
+ log?: any,
171
+ ): Promise<{ text: string; mediaDownloadCode?: string; mediaType?: 'image' }> {
172
+ // First try: msg.text.content (DingTalk sometimes also provides text for richText)
173
+ let text = msg.text?.content?.trim() ?? '';
174
+
175
+ // Second try: msg.richText as various formats
176
+ if (!text && msg.richText) {
109
177
  try {
110
178
  const richTextStr = typeof msg.richText === 'string'
111
179
  ? msg.richText
@@ -114,71 +182,55 @@ async function processInboundMessage(
114
182
 
115
183
  const rt = msg.richText as any;
116
184
 
117
- // Try multiple possible fields for text content
118
185
  if (typeof msg.richText === 'string') {
119
- // If it's a string, use it directly
120
- rawBody = msg.richText.trim();
186
+ text = msg.richText.trim();
121
187
  } else if (rt) {
122
- // Try various possible field names
123
- rawBody = rt.text?.trim()
188
+ text = rt.text?.trim()
124
189
  || rt.content?.trim()
125
190
  || rt.richText?.trim()
126
- || "";
191
+ || '';
127
192
 
128
- // If still empty, try to extract from richText array structure
129
- if (!rawBody && Array.isArray(rt.richText)) {
193
+ if (!text && Array.isArray(rt.richText)) {
130
194
  const textParts: string[] = [];
131
195
  for (const item of rt.richText) {
132
- // Handle different types of richText elements
133
196
  if (item.text) {
134
197
  textParts.push(item.text);
135
198
  } else if (item.content) {
136
199
  textParts.push(item.content);
137
200
  }
138
- // Note: @mention text should be included in item.text by DingTalk
139
201
  }
140
- rawBody = textParts.join('').trim();
202
+ text = textParts.join('').trim();
141
203
  }
142
204
  }
143
205
 
144
- if (rawBody) {
145
- log?.info?.("[dingtalk] Extracted from richText: " + rawBody.slice(0, 100));
206
+ if (text) {
207
+ log?.info?.("[dingtalk] Extracted from richText: " + text.slice(0, 100));
146
208
  }
147
209
  } catch (err) {
148
210
  log?.info?.("[dingtalk] Failed to parse richText: " + err);
149
211
  }
150
212
  }
151
213
 
152
- // Additional fallback: try to get content from text.content even for richText messages
153
- if (!rawBody && msg.text?.content) {
154
- rawBody = msg.text.content.trim();
155
- log?.info?.("[dingtalk] Using text.content as fallback: " + rawBody.slice(0, 100));
156
- }
157
-
158
- // Handle richText messages (when msgtype === 'richText', data is in msg.content.richText)
159
- if (!rawBody && msg.msgtype === 'richText') {
160
- const content = (msg as any).content;
161
- log?.info?.("[dingtalk] RichText message - msg.content: " + JSON.stringify(content).substring(0, 200));
162
-
214
+ // Third try: msg.content.richText array (when msgtype === 'richText')
215
+ if (!text) {
216
+ const content = msg.content;
163
217
  if (content?.richText && Array.isArray(content.richText)) {
218
+ log?.info?.("[dingtalk] RichText message - msg.content: " + JSON.stringify(content).substring(0, 200));
164
219
  const parts: string[] = [];
165
220
 
166
221
  for (const item of content.richText) {
167
222
  if (item.msgType === "text" && item.content) {
168
223
  parts.push(item.content);
224
+ } else if (item.text) {
225
+ // DingTalk sometimes sends richText items as {text: "..."} without msgType wrapper
226
+ parts.push(item.text);
169
227
  } else if ((item.msgType === "picture" || item.pictureDownloadCode || item.downloadCode) && (item.downloadCode || item.pictureDownloadCode)) {
170
- // Handle picture: msgType may be absent, check for downloadCode fields
171
228
  const downloadCode = item.downloadCode || item.pictureDownloadCode;
172
- // Download the picture from richText message
173
229
  try {
174
230
  const robotCode = account.robotCode || account.clientId;
175
231
  const pictureResult = await downloadPicture(
176
- account.clientId,
177
- account.clientSecret,
178
- robotCode,
179
- downloadCode,
232
+ account.clientId!, account.clientSecret!, robotCode!, downloadCode,
180
233
  );
181
-
182
234
  if (pictureResult.filePath) {
183
235
  parts.push(`[图片: ${pictureResult.filePath}]`);
184
236
  log?.info?.("[dingtalk] Downloaded picture from richText: " + pictureResult.filePath);
@@ -194,69 +246,111 @@ async function processInboundMessage(
194
246
  }
195
247
  }
196
248
 
197
- rawBody = parts.join("");
198
- if (rawBody) {
199
- log?.info?.("[dingtalk] Extracted from msg.content.richText: " + rawBody.substring(0, 100));
249
+ text = parts.join('');
250
+ if (text) {
251
+ log?.info?.("[dingtalk] Extracted from msg.content.richText: " + text.substring(0, 100));
200
252
  }
201
253
  }
202
254
  }
203
255
 
204
- // Handle picture messages
205
- if (!rawBody && msg.msgtype === 'picture') {
206
- log?.info?.("[dingtalk] Picture message - msg.picture: " + JSON.stringify(msg.picture));
207
- log?.info?.("[dingtalk] Picture message - msg.content: " + JSON.stringify((msg as any).content));
208
- log?.info?.("[dingtalk] Full msg keys: " + Object.keys(msg).join(', '));
256
+ return { text };
257
+ }
209
258
 
210
- const content = (msg as any).content;
211
- let downloadCode: string | undefined;
259
+ /**
260
+ * Extract content from picture messages, returning the download code for media pipeline.
261
+ */
262
+ function extractPictureContent(msg: DingTalkRobotMessage, log?: any): ExtractedMessage {
263
+ log?.info?.("[dingtalk] Picture message - msg.picture: " + JSON.stringify(msg.picture));
264
+ log?.info?.("[dingtalk] Picture message - msg.content: " + JSON.stringify(msg.content));
212
265
 
213
- if (msg.picture?.downloadCode) {
214
- downloadCode = msg.picture.downloadCode;
215
- } else if (content?.downloadCode) {
216
- downloadCode = content.downloadCode;
217
- }
266
+ const content = msg.content;
267
+ let downloadCode: string | undefined;
218
268
 
219
- if (downloadCode) {
220
- log?.info?.("[dingtalk] Picture detected, downloadCode: " + downloadCode);
269
+ if (msg.picture?.downloadCode) {
270
+ downloadCode = msg.picture.downloadCode;
271
+ } else if (content?.downloadCode) {
272
+ downloadCode = content.downloadCode;
273
+ }
221
274
 
222
- // Try to download the picture
223
- try {
224
- const robotCode = account.robotCode || account.clientId;
225
- const pictureResult = await downloadPicture(
226
- account.clientId,
227
- account.clientSecret,
228
- robotCode,
229
- downloadCode,
230
- );
231
-
232
- if (pictureResult.error) {
233
- rawBody = `[用户发送了图片,但下载失败: ${pictureResult.error}]`;
234
- log?.warn?.("[dingtalk] Picture download failed: " + pictureResult.error);
235
- } else if (pictureResult.filePath) {
236
- rawBody = `[用户发送了图片]\n图片已保存到: ${pictureResult.filePath}`;
237
- log?.info?.("[dingtalk] Picture downloaded successfully: " + pictureResult.filePath);
238
-
239
- // Note: If Agent supports multimodal input, we could pass the base64 or file path
240
- // For now, we just notify the agent that a picture was sent
241
- } else {
242
- rawBody = "[用户发送了图片,但无法获取下载链接]";
243
- }
244
- } catch (err) {
245
- rawBody = `[用户发送了图片,下载时出错: ${err}]`;
246
- log?.warn?.("[dingtalk] Error downloading picture: " + err);
275
+ if (downloadCode) {
276
+ log?.info?.("[dingtalk] Picture detected, downloadCode: " + downloadCode);
277
+ return {
278
+ text: '[用户发送了图片]',
279
+ mediaDownloadCode: downloadCode,
280
+ mediaType: 'image',
281
+ messageType: 'picture',
282
+ };
283
+ }
284
+
285
+ log?.info?.("[dingtalk] Picture msgtype but no downloadCode found");
286
+ return {
287
+ text: '[用户发送了图片(无法获取下载码)]',
288
+ messageType: 'picture',
289
+ };
290
+ }
291
+
292
+ async function processInboundMessage(
293
+ msg: DingTalkRobotMessage,
294
+ ctx: DingTalkMonitorContext,
295
+ ): Promise<void> {
296
+ const { account, cfg, log, setStatus } = ctx;
297
+ const runtime = getDingTalkRuntime();
298
+
299
+ const isDm = msg.conversationType === "1";
300
+ const isGroup = msg.conversationType === "2";
301
+
302
+ // Debug: log full message structure for debugging
303
+ if (msg.msgtype === 'richText' || msg.picture || (msg.atUsers && msg.atUsers.length > 0)) {
304
+ log?.info?.("[dingtalk-debug] Full message structure:");
305
+ log?.info?.("[dingtalk-debug] msgtype: " + msg.msgtype);
306
+ log?.info?.("[dingtalk-debug] text: " + JSON.stringify(msg.text));
307
+ log?.info?.("[dingtalk-debug] richText: " + JSON.stringify(msg.richText));
308
+ log?.info?.("[dingtalk-debug] picture: " + JSON.stringify(msg.picture));
309
+ log?.info?.("[dingtalk-debug] atUsers: " + JSON.stringify(msg.atUsers));
310
+ log?.info?.("[dingtalk-debug] RAW MESSAGE: " + JSON.stringify(msg).substring(0, 500));
311
+ }
312
+
313
+ // Extract message content using structured extractor
314
+ const extracted = await extractMessageContent(msg, account, log);
315
+
316
+ // Download media if present (picture/audio/video/file)
317
+ let mediaPath: string | undefined;
318
+ let mediaType: string | undefined;
319
+
320
+ if (extracted.mediaDownloadCode && account.clientId && account.clientSecret) {
321
+ const robotCode = account.robotCode || account.clientId;
322
+ try {
323
+ const result = await downloadMediaFile(
324
+ account.clientId,
325
+ account.clientSecret,
326
+ robotCode,
327
+ extracted.mediaDownloadCode,
328
+ extracted.mediaType,
329
+ );
330
+ if (result.filePath) {
331
+ mediaPath = result.filePath;
332
+ mediaType = result.mimeType || extracted.mediaType;
333
+ log?.info?.(`[dingtalk] Downloaded ${extracted.mediaType || 'media'}: ${result.filePath}`);
334
+ } else if (result.error) {
335
+ log?.warn?.(`[dingtalk] Media download failed: ${result.error}`);
247
336
  }
248
- } else {
249
- // Even if we can't get picture info, allow the message through
250
- rawBody = "[用户发送了图片(无法获取下载码)]";
251
- log?.info?.("[dingtalk] Picture msgtype but no downloadCode found");
337
+ } catch (err) {
338
+ log?.warn?.(`[dingtalk] Media download error: ${err}`);
252
339
  }
253
340
  }
254
341
 
255
- if (!rawBody) {
256
- log?.info?.("[dingtalk] Empty message body after all attempts, skipping. msgtype=" + msg.msgtype + ", hasText=" + !!msg.text + ", hasRichText=" + !!msg.richText + ", hasPicture=" + !!msg.picture);
342
+ let rawBody = extracted.text;
343
+
344
+ if (!rawBody && !mediaPath) {
345
+ log?.info?.("[dingtalk] Empty message body after all attempts, skipping. msgtype=" + msg.msgtype);
257
346
  return;
258
347
  }
259
348
 
349
+ // If we have media but no text, provide a placeholder
350
+ if (!rawBody && mediaPath) {
351
+ rawBody = `[${extracted.messageType}] 媒体文件已下载: ${mediaPath}`;
352
+ }
353
+
260
354
  // Handle quoted/replied messages: extract the quoted content and prepend it
261
355
  if (msg.text && (msg.text as any).isReplyMsg) {
262
356
  log?.info?.("[dingtalk] Message is a reply, full text object: " + JSON.stringify(msg.text));
@@ -334,8 +428,8 @@ async function processInboundMessage(
334
428
 
335
429
  if (userIds.length > 0 && account.clientId && account.clientSecret) {
336
430
  try {
337
- // Batch query user info with 500ms timeout
338
- const userInfoMap = await batchGetUserInfo(account.clientId, account.clientSecret, userIds, 500);
431
+ // Batch query user info (3s timeout — needs token fetch + API call)
432
+ const userInfoMap = await batchGetUserInfo(account.clientId, account.clientSecret, userIds, 3000);
339
433
 
340
434
  if (userInfoMap.size > 0) {
341
435
  // Build mention list: [@张三 @李四]
@@ -416,19 +510,44 @@ async function processInboundMessage(
416
510
  account,
417
511
  };
418
512
 
513
+ // Send thinking feedback (opt-in)
514
+ if (account.config.showThinking && msg.sessionWebhook) {
515
+ try {
516
+ await sendViaSessionWebhook(msg.sessionWebhook, '正在思考...');
517
+ log?.info?.('[dingtalk] Sent thinking indicator');
518
+ } catch (_) {
519
+ // fire-and-forget, don't block processing
520
+ }
521
+ }
522
+
419
523
  // Load actual config if cfg is a config manager
420
524
  let actualCfg = cfg;
421
525
  if (cfg && typeof cfg.loadConfig === "function") {
422
526
  try {
423
527
  actualCfg = await cfg.loadConfig();
424
- console.warn("[dingtalk-debug] Loaded actual config, agents.defaults.model:", JSON.stringify(actualCfg?.agents?.defaults?.model, null, 2));
425
528
  } catch (err) {
426
- console.warn("[dingtalk-debug] Failed to load config:", err);
529
+ log?.info?.("[dingtalk] Failed to load config: " + err);
427
530
  }
428
531
  }
429
532
 
533
+ // Check if the full Clawdbot Plugin SDK pipeline is available
534
+ const hasFullPipeline = !!(
535
+ runtime?.channel?.routing?.resolveAgentRoute &&
536
+ runtime?.channel?.reply?.finalizeInboundContext &&
537
+ runtime?.channel?.reply?.createReplyDispatcherWithTyping &&
538
+ runtime?.channel?.reply?.dispatchReplyFromConfig
539
+ );
540
+
430
541
  try {
431
- if (runtime?.channel?.reply?.dispatchReplyWithBufferedBlockDispatcher) {
542
+ if (hasFullPipeline) {
543
+ // Full SDK pipeline: route → session → envelope → dispatch
544
+ await dispatchWithFullPipeline({
545
+ runtime, msg, rawBody, account, cfg: actualCfg, sessionKey, isDm,
546
+ senderId, senderName, conversationId, replyTarget,
547
+ mediaPath, mediaType, log, setStatus,
548
+ });
549
+ } else if (runtime?.channel?.reply?.dispatchReplyWithBufferedBlockDispatcher) {
550
+ // Fallback: existing buffered block dispatcher
432
551
  const ctxPayload = {
433
552
  Body: rawBody,
434
553
  RawBody: rawBody,
@@ -447,6 +566,9 @@ async function processInboundMessage(
447
566
  MessageSid: msg.msgId,
448
567
  OriginatingChannel: "dingtalk",
449
568
  OriginatingTo: "dingtalk:" + conversationId,
569
+ MediaPath: mediaPath,
570
+ MediaType: mediaType,
571
+ MediaUrl: mediaPath,
450
572
  };
451
573
 
452
574
  // Fire-and-forget: don't await to avoid blocking SDK callback during long agent runs
@@ -467,6 +589,9 @@ async function processInboundMessage(
467
589
  }).catch((err) => {
468
590
  log?.info?.("[dingtalk] Dispatch failed: " + err);
469
591
  });
592
+
593
+ // Record activity
594
+ runtime.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
470
595
  } else {
471
596
  log?.info?.("[dingtalk] Runtime dispatch not available");
472
597
  }
@@ -475,12 +600,122 @@ async function processInboundMessage(
475
600
  }
476
601
  }
477
602
 
603
+ /**
604
+ * Dispatch using the full Clawdbot Plugin SDK pipeline.
605
+ * Uses resolveAgentRoute → session → envelope → finalizeContext → dispatch.
606
+ */
607
+ async function dispatchWithFullPipeline(params: {
608
+ runtime: any;
609
+ msg: DingTalkRobotMessage;
610
+ rawBody: string;
611
+ account: ResolvedDingTalkAccount;
612
+ cfg: any;
613
+ sessionKey: string;
614
+ isDm: boolean;
615
+ senderId: string;
616
+ senderName: string;
617
+ conversationId: string;
618
+ replyTarget: any;
619
+ mediaPath?: string;
620
+ mediaType?: string;
621
+ log?: any;
622
+ setStatus?: (update: Record<string, unknown>) => void;
623
+ }): Promise<void> {
624
+ const { runtime: rt, msg, rawBody, account, cfg, isDm,
625
+ senderId, senderName, conversationId, replyTarget,
626
+ log, setStatus } = params;
627
+
628
+ // 1. Resolve agent route
629
+ const route = rt.channel.routing.resolveAgentRoute({
630
+ cfg,
631
+ channel: 'dingtalk',
632
+ accountId: account.accountId,
633
+ peer: { kind: isDm ? 'dm' : 'group', id: isDm ? senderId : conversationId },
634
+ });
635
+
636
+ // 2. Resolve store path
637
+ const storePath = rt.channel.session?.resolveStorePath?.(cfg?.session?.store, { agentId: route.agentId });
638
+
639
+ // 3. Get envelope format options
640
+ const envelopeOptions = rt.channel.reply?.resolveEnvelopeFormatOptions?.(cfg) ?? {};
641
+
642
+ // 4. Read previous timestamp for session continuity
643
+ const previousTimestamp = rt.channel.session?.readSessionUpdatedAt?.({ storePath, sessionKey: route.sessionKey });
644
+
645
+ // 5. Format inbound envelope
646
+ const fromLabel = isDm ? `${senderName} (${senderId})` : `${msg.conversationTitle || conversationId} - ${senderName}`;
647
+ const body = rt.channel.reply.formatInboundEnvelope?.({
648
+ channel: 'DingTalk', from: fromLabel, timestamp: msg.createAt, body: rawBody,
649
+ chatType: isDm ? 'direct' : 'group', sender: { name: senderName, id: senderId },
650
+ previousTimestamp, envelope: envelopeOptions,
651
+ }) ?? rawBody;
652
+
653
+ // 6. Finalize inbound context (includes media info)
654
+ const to = isDm ? `dingtalk:${senderId}` : `dingtalk:group:${conversationId}`;
655
+ const ctx = rt.channel.reply.finalizeInboundContext({
656
+ Body: body, RawBody: rawBody, CommandBody: rawBody, From: to, To: to,
657
+ SessionKey: route.sessionKey, AccountId: account.accountId,
658
+ ChatType: isDm ? 'direct' : 'group',
659
+ ConversationLabel: fromLabel,
660
+ GroupSubject: isDm ? undefined : (msg.conversationTitle || conversationId),
661
+ SenderName: senderName, SenderId: senderId,
662
+ Provider: 'dingtalk', Surface: 'dingtalk',
663
+ MessageSid: msg.msgId, Timestamp: msg.createAt,
664
+ MediaPath: params.mediaPath, MediaType: params.mediaType, MediaUrl: params.mediaPath,
665
+ CommandAuthorized: true,
666
+ OriginatingChannel: 'dingtalk', OriginatingTo: to,
667
+ });
668
+
669
+ // 7. Record inbound session
670
+ if (rt.channel.session?.recordInboundSession) {
671
+ await rt.channel.session.recordInboundSession({
672
+ storePath, sessionKey: ctx.SessionKey || route.sessionKey, ctx,
673
+ updateLastRoute: isDm ? { sessionKey: route.mainSessionKey, channel: 'dingtalk', to: senderId, accountId: account.accountId } : undefined,
674
+ });
675
+ }
676
+
677
+ // 8. Create typing-aware dispatcher
678
+ const { dispatcher, replyOptions, markDispatchIdle } = rt.channel.reply.createReplyDispatcherWithTyping({
679
+ responsePrefix: '',
680
+ deliver: async (payload: any) => {
681
+ try {
682
+ const textToSend = payload.markdown || payload.text;
683
+ if (!textToSend) return { ok: true };
684
+ await deliverReply(replyTarget, textToSend, log);
685
+ setStatus?.({ lastOutboundAt: Date.now() });
686
+ return { ok: true };
687
+ } catch (err: any) {
688
+ log?.info?.("[dingtalk] Reply delivery failed: " + err.message);
689
+ return { ok: false, error: err.message };
690
+ }
691
+ },
692
+ });
693
+
694
+ // 9. Dispatch reply from config
695
+ try {
696
+ await rt.channel.reply.dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyOptions });
697
+ } finally {
698
+ markDispatchIdle();
699
+ }
700
+
701
+ // 10. Record activity
702
+ rt.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
703
+ }
704
+
478
705
  async function deliverReply(target: any, text: string, log?: any): Promise<void> {
479
706
  const now = Date.now();
480
707
  const chunkLimit = 2000;
481
708
  const messageFormat = target.account.config.messageFormat ?? "text";
482
- // Support both "markdown" and "richtext" (they're equivalent for DingTalk)
483
- const isMarkdown = messageFormat === "markdown" || messageFormat === "richtext";
709
+
710
+ // Determine if this message should use markdown format
711
+ let isMarkdown: boolean;
712
+ if (messageFormat === 'auto') {
713
+ isMarkdown = detectMarkdownContent(text);
714
+ log?.info?.("[dingtalk] Auto-detected format: " + (isMarkdown ? "markdown" : "text"));
715
+ } else {
716
+ // Support both "markdown" and "richtext" (they're equivalent for DingTalk)
717
+ isMarkdown = messageFormat === "markdown" || messageFormat === "richtext";
718
+ }
484
719
 
485
720
  // Convert markdown tables to text format (DingTalk doesn't support tables)
486
721
  let processedText = text;
@@ -603,6 +838,14 @@ function convertMarkdownTables(text: string): string {
603
838
  });
604
839
  }
605
840
 
841
+ /**
842
+ * Detect if text contains markdown features worth rendering as markdown.
843
+ * Checks for headers, bold, code blocks, lists, blockquotes, links, and images.
844
+ */
845
+ function detectMarkdownContent(text: string): boolean {
846
+ return /^#{1,6}\s|^\s*[-*+]\s|^\s*\d+\.\s|^\s*>|```|\*\*[^*]+\*\*|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)/m.test(text);
847
+ }
848
+
606
849
  function isSenderAllowed(senderId: string, allowFrom: string[]): boolean {
607
850
  if (allowFrom.includes("*")) return true;
608
851
  const normalized = senderId.trim().toLowerCase();
package/src/types.ts CHANGED
@@ -15,12 +15,15 @@ export interface DingTalkRobotMessage {
15
15
  sessionWebhook: string;
16
16
  robotCode: string;
17
17
  msgtype: string;
18
- text?: { content: string };
18
+ text?: { content: string; isReplyMsg?: boolean; repliedMsg?: any };
19
19
  richText?: unknown;
20
20
  picture?: { downloadCode: string };
21
+ /** Generic content field used by audio/video/file message types */
22
+ content?: any;
21
23
  atUsers?: Array<{ dingtalkId: string; staffId?: string }>;
22
24
  isInAtList?: boolean;
23
25
  conversationTitle?: string;
26
+ senderPlatform?: string;
24
27
  }
25
28
 
26
29
  /** Resolved account for DingTalk */
@@ -36,6 +39,20 @@ export interface ResolvedDingTalkAccount {
36
39
  config: Record<string, any>;
37
40
  }
38
41
 
42
+ /** Extracted message content from DingTalk */
43
+ export interface ExtractedMessage {
44
+ /** Textual representation of the message */
45
+ text: string;
46
+ /** Download code for media (picture/audio/video/file) */
47
+ mediaDownloadCode?: string;
48
+ /** Media type category */
49
+ mediaType?: 'image' | 'audio' | 'video' | 'file';
50
+ /** Original file name (for file messages) */
51
+ mediaFileName?: string;
52
+ /** Original DingTalk msgtype */
53
+ messageType: string;
54
+ }
55
+
39
56
  /** DingTalk channel config shape */
40
57
  export interface DingTalkChannelConfig {
41
58
  enabled?: boolean;
@@ -51,6 +68,7 @@ export interface DingTalkChannelConfig {
51
68
  groupAllowlist?: string[];
52
69
  requireMention?: boolean;
53
70
  textChunkLimit?: number;
54
- messageFormat?: 'text' | 'markdown' | 'richtext';
71
+ messageFormat?: 'text' | 'markdown' | 'richtext' | 'auto';
72
+ showThinking?: boolean;
55
73
  [key: string]: unknown;
56
74
  }