@yaoyuanchao/dingtalk 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -69,15 +69,12 @@ This release transforms the DingTalk plugin into an official Clawdbot plugin tha
69
69
 
70
70
  ### Migration Guide
71
71
 
72
- If you're upgrading from v0.1.0:
72
+ If you're upgrading from v0.1.0, see [UPGRADE.md](./UPGRADE.md) for detailed steps.
73
73
 
74
+ Summary:
74
75
  1. **No configuration changes required** - existing configs work as-is
75
- 2. **Optional**: Try the new onboarding wizard for fresh setup
76
- 3. **Optional**: Reinstall via NPM for easier updates:
77
- ```bash
78
- clawdbot plugins uninstall dingtalk
79
- clawdbot plugins install @yaoyuanchao/dingtalk
80
- ```
76
+ 2. Backup, stop gateway, delete old plugin, install via NPM, restart
77
+ 3. **Optional**: Try the new onboarding wizard for fresh setup
81
78
 
82
79
  ## [0.1.0] - 2026-01-26
83
80
 
package/README.md CHANGED
@@ -24,6 +24,8 @@
24
24
 
25
25
  ## 快速开始
26
26
 
27
+ > **从 v0.1.0 升级?** 查看 [升级指南](./UPGRADE.md)
28
+
27
29
  ### 方式一:官方安装(推荐)
28
30
 
29
31
  ```bash
@@ -81,7 +83,7 @@ clawdbot plugins install .
81
83
  "allowFrom": ["YOUR_STAFF_ID"]
82
84
  },
83
85
  "groupPolicy": "allowlist",
84
- "groupAllowlist": ["cidlnNrtqQ4kGskU56Qni6zTg=="],
86
+ "groupAllowlist": ["YOUR_CONVERSATION_ID"],
85
87
  "requireMention": true
86
88
  }
87
89
  }
@@ -147,7 +149,7 @@ tail -f /tmp/clawdbot/clawdbot-$(date +%Y-%m-%d).log | grep dingtalk
147
149
 
148
150
  1. 在钉钉中找到机器人
149
151
  2. 发送任意消息
150
- 3. 机器人会回复:"Access denied. Your staffId: 050914185922786044 Ask admin to add you."
152
+ 3. 机器人会回复:"Access denied. Your staffId: XXXXXXXXXXXXXXXXXXXX Ask admin to add you."
151
153
  4. 将这个 staffId 添加到配置文件的 `dm.allowFrom` 数组中
152
154
  5. 重启 gateway
153
155
 
@@ -168,7 +170,7 @@ tail -f /tmp/clawdbot/clawdbot-$(date +%Y-%m-%d).log | grep "dingtalk.*Group"
168
170
  1. 临时修改配置为 `groupPolicy: "open"`
169
171
  2. 重启 gateway
170
172
  3. 在群聊中 @机器人发送消息
171
- 4. 查看日志获取 conversationId(格式类似 `cidlnNrtqQ4kGskU56Qni6zTg==`)
173
+ 4. 查看日志获取 conversationId(格式类似 `cidXXXXXXXXXXXXXXXXXX==`)
172
174
  5. 将 conversationId 添加到 `groupAllowlist` 数组
173
175
  6. 改回 `groupPolicy: "allowlist"` 并重启
174
176
 
@@ -177,8 +179,8 @@ tail -f /tmp/clawdbot/clawdbot-$(date +%Y-%m-%d).log | grep "dingtalk.*Group"
177
179
  {
178
180
  "groupPolicy": "allowlist",
179
181
  "groupAllowlist": [
180
- "cidlnNrtqQ4kGskU56Qni6zTg==",
181
- "anotherConversationId123=="
182
+ "cidXXXXXXXXXXXXXXXXXX==",
183
+ "cidYYYYYYYYYYYYYYYYYY=="
182
184
  ],
183
185
  "requireMention": true
184
186
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yaoyuanchao/dingtalk",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "type": "module",
5
5
  "description": "DingTalk channel plugin for Clawdbot with Stream Mode support",
6
6
  "license": "MIT",
package/src/api.ts CHANGED
@@ -7,8 +7,8 @@ import os from "node:os";
7
7
  const DINGTALK_API_BASE = "https://api.dingtalk.com/v1.0";
8
8
  const DINGTALK_OAPI_BASE = "https://oapi.dingtalk.com";
9
9
 
10
- /** Temp directory for downloaded pictures */
11
- const TEMP_DIR = path.join(os.tmpdir(), "dingtalk-pictures");
10
+ /** Temp directory for downloaded media files */
11
+ const TEMP_DIR = path.join(os.tmpdir(), "dingtalk-media");
12
12
 
13
13
  /** Cache access tokens per clientId */
14
14
  const tokenCache = new Map<string, { token: string; expiresAt: number }>();
@@ -359,8 +359,78 @@ export async function downloadPicture(
359
359
  }
360
360
  }
361
361
 
362
- /** Clean up old picture files (older than 1 hour) */
363
- export function cleanupOldPictures(): void {
362
+ /** Extension mapping for media types */
363
+ const MEDIA_EXTENSIONS: Record<string, string> = {
364
+ 'image/jpeg': '.jpg',
365
+ 'image/png': '.png',
366
+ 'image/gif': '.gif',
367
+ 'image/webp': '.webp',
368
+ 'audio/amr': '.amr',
369
+ 'audio/mpeg': '.mp3',
370
+ 'audio/mp4': '.m4a',
371
+ 'video/mp4': '.mp4',
372
+ 'application/pdf': '.pdf',
373
+ 'application/octet-stream': '.bin',
374
+ };
375
+
376
+ /** Download media file (picture/audio/video/file) from DingTalk */
377
+ export async function downloadMediaFile(
378
+ clientId: string,
379
+ clientSecret: string,
380
+ robotCode: string,
381
+ downloadCode: string,
382
+ mediaType?: string,
383
+ ): Promise<{ filePath?: string; mimeType?: string; error?: string }> {
384
+ try {
385
+ const token = await getDingTalkAccessToken(clientId, clientSecret);
386
+
387
+ const url = `${DINGTALK_API_BASE}/robot/messageFiles/download`;
388
+ const headers = { "x-acs-dingtalk-access-token": token };
389
+ const body = { downloadCode, robotCode };
390
+
391
+ const response = await jsonPost(url, body, headers);
392
+
393
+ if (response.errcode && response.errcode !== 0) {
394
+ console.warn(`[dingtalk] Media download failed: ${response.errmsg}`);
395
+ return { error: response.errmsg || "Download failed" };
396
+ }
397
+
398
+ if (response.downloadUrl) {
399
+ const mediaBuffer = await httpGetBuffer(response.downloadUrl);
400
+
401
+ if (!fs.existsSync(TEMP_DIR)) {
402
+ fs.mkdirSync(TEMP_DIR, { recursive: true });
403
+ }
404
+
405
+ // Determine file extension from content type or media type hint
406
+ const contentType = response.contentType || '';
407
+ const ext = MEDIA_EXTENSIONS[contentType]
408
+ || (mediaType === 'audio' ? '.amr' : undefined)
409
+ || (mediaType === 'video' ? '.mp4' : undefined)
410
+ || (mediaType === 'image' ? '.jpg' : undefined)
411
+ || '.bin';
412
+
413
+ const timestamp = Date.now();
414
+ const prefix = mediaType || 'media';
415
+ const filename = `${prefix}_${timestamp}${ext}`;
416
+ const filePath = path.join(TEMP_DIR, filename);
417
+
418
+ fs.writeFileSync(filePath, mediaBuffer);
419
+
420
+ console.log(`[dingtalk] Media downloaded: ${filePath} (${mediaBuffer.length} bytes, type=${contentType || mediaType || 'unknown'})`);
421
+
422
+ return { filePath, mimeType: contentType || undefined };
423
+ }
424
+
425
+ return { error: "No download URL in response" };
426
+ } catch (err) {
427
+ console.warn(`[dingtalk] Error downloading media:`, err);
428
+ return { error: String(err) };
429
+ }
430
+ }
431
+
432
+ /** Clean up old media files (older than 1 hour) */
433
+ export function cleanupOldMedia(): void {
364
434
  try {
365
435
  if (!fs.existsSync(TEMP_DIR)) return;
366
436
 
@@ -373,10 +443,13 @@ export function cleanupOldPictures(): void {
373
443
 
374
444
  if (stats.mtimeMs < oneHourAgo) {
375
445
  fs.unlinkSync(filePath);
376
- console.log(`[dingtalk] Cleaned up old picture: ${filePath}`);
446
+ console.log(`[dingtalk] Cleaned up old media: ${filePath}`);
377
447
  }
378
448
  }
379
449
  } catch (err) {
380
- console.warn(`[dingtalk] Error cleaning up pictures:`, err);
450
+ console.warn(`[dingtalk] Error cleaning up media:`, err);
381
451
  }
382
452
  }
453
+
454
+ /** @deprecated Use cleanupOldMedia() instead */
455
+ export const cleanupOldPictures = cleanupOldMedia;
package/src/channel.ts CHANGED
@@ -97,9 +97,15 @@ export const dingtalkPlugin = {
97
97
  messageFormat: {
98
98
  type: 'string',
99
99
  title: 'Message Format',
100
- enum: ['text', 'markdown'],
100
+ enum: ['text', 'markdown', 'auto'],
101
101
  default: 'text',
102
- description: 'text=plain text, markdown=DingTalk markdown (limited: no tables, use text for tables)',
102
+ description: 'text=plain text, markdown=always markdown, auto=detect markdown features in response',
103
+ },
104
+ showThinking: {
105
+ type: 'boolean',
106
+ title: 'Show Thinking Indicator',
107
+ default: false,
108
+ description: 'Send "正在思考..." feedback before AI processing begins',
103
109
  },
104
110
  },
105
111
  required: ['clientId', 'clientSecret'],
@@ -235,6 +241,16 @@ export const dingtalkPlugin = {
235
241
 
236
242
  log.info?.('[dingtalk] Starting Stream connection...');
237
243
 
244
+ // Record start activity
245
+ (runtime as any).channel?.activity?.record?.('dingtalk', account.accountId, 'start');
246
+
247
+ // Record stop activity on abort
248
+ if (signal) {
249
+ signal.addEventListener('abort', () => {
250
+ (runtime as any).channel?.activity?.record?.('dingtalk', account.accountId, 'stop');
251
+ }, { once: true });
252
+ }
253
+
238
254
  try {
239
255
  await startDingTalkMonitor({
240
256
  account,
@@ -9,8 +9,8 @@ export const groupPolicySchema = z.enum(['disabled', 'allowlist', 'open'], {
9
9
  description: 'Group chat access control policy',
10
10
  });
11
11
 
12
- export const messageFormatSchema = z.enum(['text', 'markdown'], {
13
- description: 'Message format for bot responses',
12
+ export const messageFormatSchema = z.enum(['text', 'markdown', 'richtext', 'auto'], {
13
+ description: 'Message format for bot responses (richtext is an alias for markdown, auto detects markdown features)',
14
14
  });
15
15
 
16
16
  // DingTalk 配置 Schema
@@ -59,9 +59,15 @@ export const dingTalkConfigSchema = z.object({
59
59
  .describe(
60
60
  'Message format:\n' +
61
61
  ' - text: Plain text (recommended, supports tables)\n' +
62
- ' - markdown: DingTalk markdown (limited support, no tables)'
62
+ ' - markdown: DingTalk markdown (limited support, no tables)\n' +
63
+ ' - richtext: Alias for markdown (deprecated, use markdown instead)\n' +
64
+ ' - auto: Auto-detect markdown features in response'
63
65
  ),
64
66
 
67
+ // 思考反馈
68
+ showThinking: z.boolean().default(false)
69
+ .describe('Send "正在思考..." feedback before AI responds'),
70
+
65
71
  // 高级配置(可选)
66
72
  textChunkLimit: z.number().int().positive().default(2000).optional()
67
73
  .describe('Text chunk size limit for long messages'),
package/src/monitor.ts CHANGED
@@ -1,5 +1,5 @@
1
- import type { DingTalkRobotMessage, ResolvedDingTalkAccount } from "./types.js";
2
- import { sendViaSessionWebhook, sendMarkdownViaSessionWebhook, sendDingTalkRestMessage, batchGetUserInfo, downloadPicture, cleanupOldPictures } from "./api.js";
1
+ import type { DingTalkRobotMessage, ResolvedDingTalkAccount, ExtractedMessage } from "./types.js";
2
+ import { sendViaSessionWebhook, sendMarkdownViaSessionWebhook, sendDingTalkRestMessage, batchGetUserInfo, downloadPicture, downloadMediaFile, cleanupOldMedia } from "./api.js";
3
3
  import { getDingTalkRuntime } from "./runtime.js";
4
4
 
5
5
  export interface DingTalkMonitorContext {
@@ -18,11 +18,11 @@ export async function startDingTalkMonitor(ctx: DingTalkMonitorContext): Promise
18
18
  }
19
19
 
20
20
  // Clean up old pictures on startup
21
- cleanupOldPictures();
21
+ cleanupOldMedia();
22
22
 
23
23
  // Schedule periodic cleanup every hour
24
24
  const cleanupInterval = setInterval(() => {
25
- cleanupOldPictures();
25
+ cleanupOldMedia();
26
26
  }, 60 * 60 * 1000); // 1 hour
27
27
 
28
28
  // Clean up on abort (only if abortSignal is provided)
@@ -80,32 +80,100 @@ export async function startDingTalkMonitor(ctx: DingTalkMonitorContext): Promise
80
80
  setStatus?.({ running: true, lastStartAt: Date.now() });
81
81
  }
82
82
 
83
- async function processInboundMessage(
83
+ /**
84
+ * Extract message content from DingTalk message into a structured format.
85
+ * Handles: text, richText, picture, audio, video, file.
86
+ */
87
+ async function extractMessageContent(
84
88
  msg: DingTalkRobotMessage,
85
- ctx: DingTalkMonitorContext,
86
- ): Promise<void> {
87
- const { account, cfg, log, setStatus } = ctx;
88
- const runtime = getDingTalkRuntime();
89
+ account: ResolvedDingTalkAccount,
90
+ log?: any,
91
+ ): Promise<ExtractedMessage> {
92
+ const msgtype = msg.msgtype || 'text';
93
+ const content = msg.content;
94
+
95
+ switch (msgtype) {
96
+ case 'text': {
97
+ return {
98
+ text: msg.text?.content?.trim() ?? '',
99
+ messageType: 'text',
100
+ };
101
+ }
89
102
 
90
- const isDm = msg.conversationType === "1";
91
- const isGroup = msg.conversationType === "2";
103
+ case 'richText': {
104
+ const result = await extractRichTextContent(msg, account, log);
105
+ return { ...result, messageType: 'richText' };
106
+ }
92
107
 
93
- // Debug: log full message structure for debugging
94
- if (msg.msgtype === 'richText' || msg.picture || (msg.atUsers && msg.atUsers.length > 0)) {
95
- log?.info?.("[dingtalk-debug] Full message structure:");
96
- log?.info?.("[dingtalk-debug] msgtype: " + msg.msgtype);
97
- log?.info?.("[dingtalk-debug] text: " + JSON.stringify(msg.text));
98
- log?.info?.("[dingtalk-debug] richText: " + JSON.stringify(msg.richText));
99
- log?.info?.("[dingtalk-debug] picture: " + JSON.stringify(msg.picture));
100
- log?.info?.("[dingtalk-debug] atUsers: " + JSON.stringify(msg.atUsers));
101
- log?.info?.("[dingtalk-debug] RAW MESSAGE: " + JSON.stringify(msg).substring(0, 500));
102
- }
108
+ case 'picture': {
109
+ return extractPictureContent(msg, log);
110
+ }
111
+
112
+ case 'audio': {
113
+ // DingTalk provides speech recognition result in content.recognition
114
+ const recognition = content?.recognition;
115
+ const downloadCode = content?.downloadCode;
116
+ log?.info?.("[dingtalk] Audio message - recognition: " + (recognition || '(none)'));
117
+ return {
118
+ text: recognition || '[语音消息]',
119
+ mediaDownloadCode: downloadCode,
120
+ mediaType: 'audio',
121
+ messageType: 'audio',
122
+ };
123
+ }
124
+
125
+ case 'video': {
126
+ const downloadCode = content?.downloadCode;
127
+ log?.info?.("[dingtalk] Video message - downloadCode: " + (downloadCode || '(none)'));
128
+ return {
129
+ text: '[视频]',
130
+ mediaDownloadCode: downloadCode,
131
+ mediaType: 'video',
132
+ messageType: 'video',
133
+ };
134
+ }
135
+
136
+ case 'file': {
137
+ const downloadCode = content?.downloadCode;
138
+ const fileName = content?.fileName || '未知文件';
139
+ log?.info?.("[dingtalk] File message - fileName: " + fileName);
140
+ return {
141
+ text: `[文件: ${fileName}]`,
142
+ mediaDownloadCode: downloadCode,
143
+ mediaType: 'file',
144
+ mediaFileName: fileName,
145
+ messageType: 'file',
146
+ };
147
+ }
103
148
 
104
- // Extract message content from text or richText
105
- let rawBody = msg.text?.content?.trim() ?? "";
149
+ default: {
150
+ // Fallback: try text.content for unknown message types
151
+ const text = msg.text?.content?.trim() || '';
152
+ if (!text) {
153
+ log?.info?.("[dingtalk] Unknown msgtype: " + msgtype + ", no text content found");
154
+ }
155
+ return {
156
+ text: text || `[${msgtype}消息]`,
157
+ messageType: msgtype,
158
+ };
159
+ }
160
+ }
161
+ }
106
162
 
107
- // If text is empty, try to extract from richText
108
- if (!rawBody && msg.richText) {
163
+ /**
164
+ * Extract content from richText messages.
165
+ * Preserves all existing edge-case handling for DingTalk's varied richText formats.
166
+ */
167
+ async function extractRichTextContent(
168
+ msg: DingTalkRobotMessage,
169
+ account: ResolvedDingTalkAccount,
170
+ log?: any,
171
+ ): Promise<{ text: string; mediaDownloadCode?: string; mediaType?: 'image' }> {
172
+ // First try: msg.text.content (DingTalk sometimes also provides text for richText)
173
+ let text = msg.text?.content?.trim() ?? '';
174
+
175
+ // Second try: msg.richText as various formats
176
+ if (!text && msg.richText) {
109
177
  try {
110
178
  const richTextStr = typeof msg.richText === 'string'
111
179
  ? msg.richText
@@ -114,71 +182,52 @@ async function processInboundMessage(
114
182
 
115
183
  const rt = msg.richText as any;
116
184
 
117
- // Try multiple possible fields for text content
118
185
  if (typeof msg.richText === 'string') {
119
- // If it's a string, use it directly
120
- rawBody = msg.richText.trim();
186
+ text = msg.richText.trim();
121
187
  } else if (rt) {
122
- // Try various possible field names
123
- rawBody = rt.text?.trim()
188
+ text = rt.text?.trim()
124
189
  || rt.content?.trim()
125
190
  || rt.richText?.trim()
126
- || "";
191
+ || '';
127
192
 
128
- // If still empty, try to extract from richText array structure
129
- if (!rawBody && Array.isArray(rt.richText)) {
193
+ if (!text && Array.isArray(rt.richText)) {
130
194
  const textParts: string[] = [];
131
195
  for (const item of rt.richText) {
132
- // Handle different types of richText elements
133
196
  if (item.text) {
134
197
  textParts.push(item.text);
135
198
  } else if (item.content) {
136
199
  textParts.push(item.content);
137
200
  }
138
- // Note: @mention text should be included in item.text by DingTalk
139
201
  }
140
- rawBody = textParts.join('').trim();
202
+ text = textParts.join('').trim();
141
203
  }
142
204
  }
143
205
 
144
- if (rawBody) {
145
- log?.info?.("[dingtalk] Extracted from richText: " + rawBody.slice(0, 100));
206
+ if (text) {
207
+ log?.info?.("[dingtalk] Extracted from richText: " + text.slice(0, 100));
146
208
  }
147
209
  } catch (err) {
148
210
  log?.info?.("[dingtalk] Failed to parse richText: " + err);
149
211
  }
150
212
  }
151
213
 
152
- // Additional fallback: try to get content from text.content even for richText messages
153
- if (!rawBody && msg.text?.content) {
154
- rawBody = msg.text.content.trim();
155
- log?.info?.("[dingtalk] Using text.content as fallback: " + rawBody.slice(0, 100));
156
- }
157
-
158
- // Handle richText messages (when msgtype === 'richText', data is in msg.content.richText)
159
- if (!rawBody && msg.msgtype === 'richText') {
160
- const content = (msg as any).content;
161
- log?.info?.("[dingtalk] RichText message - msg.content: " + JSON.stringify(content).substring(0, 200));
162
-
214
+ // Third try: msg.content.richText array (when msgtype === 'richText')
215
+ if (!text) {
216
+ const content = msg.content;
163
217
  if (content?.richText && Array.isArray(content.richText)) {
218
+ log?.info?.("[dingtalk] RichText message - msg.content: " + JSON.stringify(content).substring(0, 200));
164
219
  const parts: string[] = [];
165
220
 
166
221
  for (const item of content.richText) {
167
222
  if (item.msgType === "text" && item.content) {
168
223
  parts.push(item.content);
169
224
  } else if ((item.msgType === "picture" || item.pictureDownloadCode || item.downloadCode) && (item.downloadCode || item.pictureDownloadCode)) {
170
- // Handle picture: msgType may be absent, check for downloadCode fields
171
225
  const downloadCode = item.downloadCode || item.pictureDownloadCode;
172
- // Download the picture from richText message
173
226
  try {
174
227
  const robotCode = account.robotCode || account.clientId;
175
228
  const pictureResult = await downloadPicture(
176
- account.clientId,
177
- account.clientSecret,
178
- robotCode,
179
- downloadCode,
229
+ account.clientId!, account.clientSecret!, robotCode!, downloadCode,
180
230
  );
181
-
182
231
  if (pictureResult.filePath) {
183
232
  parts.push(`[图片: ${pictureResult.filePath}]`);
184
233
  log?.info?.("[dingtalk] Downloaded picture from richText: " + pictureResult.filePath);
@@ -194,69 +243,111 @@ async function processInboundMessage(
194
243
  }
195
244
  }
196
245
 
197
- rawBody = parts.join("");
198
- if (rawBody) {
199
- log?.info?.("[dingtalk] Extracted from msg.content.richText: " + rawBody.substring(0, 100));
246
+ text = parts.join('');
247
+ if (text) {
248
+ log?.info?.("[dingtalk] Extracted from msg.content.richText: " + text.substring(0, 100));
200
249
  }
201
250
  }
202
251
  }
203
252
 
204
- // Handle picture messages
205
- if (!rawBody && msg.msgtype === 'picture') {
206
- log?.info?.("[dingtalk] Picture message - msg.picture: " + JSON.stringify(msg.picture));
207
- log?.info?.("[dingtalk] Picture message - msg.content: " + JSON.stringify((msg as any).content));
208
- log?.info?.("[dingtalk] Full msg keys: " + Object.keys(msg).join(', '));
253
+ return { text };
254
+ }
209
255
 
210
- const content = (msg as any).content;
211
- let downloadCode: string | undefined;
256
+ /**
257
+ * Extract content from picture messages, returning the download code for media pipeline.
258
+ */
259
+ function extractPictureContent(msg: DingTalkRobotMessage, log?: any): ExtractedMessage {
260
+ log?.info?.("[dingtalk] Picture message - msg.picture: " + JSON.stringify(msg.picture));
261
+ log?.info?.("[dingtalk] Picture message - msg.content: " + JSON.stringify(msg.content));
212
262
 
213
- if (msg.picture?.downloadCode) {
214
- downloadCode = msg.picture.downloadCode;
215
- } else if (content?.downloadCode) {
216
- downloadCode = content.downloadCode;
217
- }
263
+ const content = msg.content;
264
+ let downloadCode: string | undefined;
218
265
 
219
- if (downloadCode) {
220
- log?.info?.("[dingtalk] Picture detected, downloadCode: " + downloadCode);
266
+ if (msg.picture?.downloadCode) {
267
+ downloadCode = msg.picture.downloadCode;
268
+ } else if (content?.downloadCode) {
269
+ downloadCode = content.downloadCode;
270
+ }
221
271
 
222
- // Try to download the picture
223
- try {
224
- const robotCode = account.robotCode || account.clientId;
225
- const pictureResult = await downloadPicture(
226
- account.clientId,
227
- account.clientSecret,
228
- robotCode,
229
- downloadCode,
230
- );
231
-
232
- if (pictureResult.error) {
233
- rawBody = `[用户发送了图片,但下载失败: ${pictureResult.error}]`;
234
- log?.warn?.("[dingtalk] Picture download failed: " + pictureResult.error);
235
- } else if (pictureResult.filePath) {
236
- rawBody = `[用户发送了图片]\n图片已保存到: ${pictureResult.filePath}`;
237
- log?.info?.("[dingtalk] Picture downloaded successfully: " + pictureResult.filePath);
238
-
239
- // Note: If Agent supports multimodal input, we could pass the base64 or file path
240
- // For now, we just notify the agent that a picture was sent
241
- } else {
242
- rawBody = "[用户发送了图片,但无法获取下载链接]";
243
- }
244
- } catch (err) {
245
- rawBody = `[用户发送了图片,下载时出错: ${err}]`;
246
- log?.warn?.("[dingtalk] Error downloading picture: " + err);
272
+ if (downloadCode) {
273
+ log?.info?.("[dingtalk] Picture detected, downloadCode: " + downloadCode);
274
+ return {
275
+ text: '[用户发送了图片]',
276
+ mediaDownloadCode: downloadCode,
277
+ mediaType: 'image',
278
+ messageType: 'picture',
279
+ };
280
+ }
281
+
282
+ log?.info?.("[dingtalk] Picture msgtype but no downloadCode found");
283
+ return {
284
+ text: '[用户发送了图片(无法获取下载码)]',
285
+ messageType: 'picture',
286
+ };
287
+ }
288
+
289
+ async function processInboundMessage(
290
+ msg: DingTalkRobotMessage,
291
+ ctx: DingTalkMonitorContext,
292
+ ): Promise<void> {
293
+ const { account, cfg, log, setStatus } = ctx;
294
+ const runtime = getDingTalkRuntime();
295
+
296
+ const isDm = msg.conversationType === "1";
297
+ const isGroup = msg.conversationType === "2";
298
+
299
+ // Debug: log full message structure for debugging
300
+ if (msg.msgtype === 'richText' || msg.picture || (msg.atUsers && msg.atUsers.length > 0)) {
301
+ log?.info?.("[dingtalk-debug] Full message structure:");
302
+ log?.info?.("[dingtalk-debug] msgtype: " + msg.msgtype);
303
+ log?.info?.("[dingtalk-debug] text: " + JSON.stringify(msg.text));
304
+ log?.info?.("[dingtalk-debug] richText: " + JSON.stringify(msg.richText));
305
+ log?.info?.("[dingtalk-debug] picture: " + JSON.stringify(msg.picture));
306
+ log?.info?.("[dingtalk-debug] atUsers: " + JSON.stringify(msg.atUsers));
307
+ log?.info?.("[dingtalk-debug] RAW MESSAGE: " + JSON.stringify(msg).substring(0, 500));
308
+ }
309
+
310
+ // Extract message content using structured extractor
311
+ const extracted = await extractMessageContent(msg, account, log);
312
+
313
+ // Download media if present (picture/audio/video/file)
314
+ let mediaPath: string | undefined;
315
+ let mediaType: string | undefined;
316
+
317
+ if (extracted.mediaDownloadCode && account.clientId && account.clientSecret) {
318
+ const robotCode = account.robotCode || account.clientId;
319
+ try {
320
+ const result = await downloadMediaFile(
321
+ account.clientId,
322
+ account.clientSecret,
323
+ robotCode,
324
+ extracted.mediaDownloadCode,
325
+ extracted.mediaType,
326
+ );
327
+ if (result.filePath) {
328
+ mediaPath = result.filePath;
329
+ mediaType = result.mimeType || extracted.mediaType;
330
+ log?.info?.(`[dingtalk] Downloaded ${extracted.mediaType || 'media'}: ${result.filePath}`);
331
+ } else if (result.error) {
332
+ log?.warn?.(`[dingtalk] Media download failed: ${result.error}`);
247
333
  }
248
- } else {
249
- // Even if we can't get picture info, allow the message through
250
- rawBody = "[用户发送了图片(无法获取下载码)]";
251
- log?.info?.("[dingtalk] Picture msgtype but no downloadCode found");
334
+ } catch (err) {
335
+ log?.warn?.(`[dingtalk] Media download error: ${err}`);
252
336
  }
253
337
  }
254
338
 
255
- if (!rawBody) {
256
- log?.info?.("[dingtalk] Empty message body after all attempts, skipping. msgtype=" + msg.msgtype + ", hasText=" + !!msg.text + ", hasRichText=" + !!msg.richText + ", hasPicture=" + !!msg.picture);
339
+ let rawBody = extracted.text;
340
+
341
+ if (!rawBody && !mediaPath) {
342
+ log?.info?.("[dingtalk] Empty message body after all attempts, skipping. msgtype=" + msg.msgtype);
257
343
  return;
258
344
  }
259
345
 
346
+ // If we have media but no text, provide a placeholder
347
+ if (!rawBody && mediaPath) {
348
+ rawBody = `[${extracted.messageType}] 媒体文件已下载: ${mediaPath}`;
349
+ }
350
+
260
351
  // Handle quoted/replied messages: extract the quoted content and prepend it
261
352
  if (msg.text && (msg.text as any).isReplyMsg) {
262
353
  log?.info?.("[dingtalk] Message is a reply, full text object: " + JSON.stringify(msg.text));
@@ -416,19 +507,44 @@ async function processInboundMessage(
416
507
  account,
417
508
  };
418
509
 
510
+ // Send thinking feedback (opt-in)
511
+ if (account.config.showThinking && msg.sessionWebhook) {
512
+ try {
513
+ await sendViaSessionWebhook(msg.sessionWebhook, '正在思考...');
514
+ log?.info?.('[dingtalk] Sent thinking indicator');
515
+ } catch (_) {
516
+ // fire-and-forget, don't block processing
517
+ }
518
+ }
519
+
419
520
  // Load actual config if cfg is a config manager
420
521
  let actualCfg = cfg;
421
522
  if (cfg && typeof cfg.loadConfig === "function") {
422
523
  try {
423
524
  actualCfg = await cfg.loadConfig();
424
- console.warn("[dingtalk-debug] Loaded actual config, agents.defaults.model:", JSON.stringify(actualCfg?.agents?.defaults?.model, null, 2));
425
525
  } catch (err) {
426
- console.warn("[dingtalk-debug] Failed to load config:", err);
526
+ log?.info?.("[dingtalk] Failed to load config: " + err);
427
527
  }
428
528
  }
429
529
 
530
+ // Check if the full Clawdbot Plugin SDK pipeline is available
531
+ const hasFullPipeline = !!(
532
+ runtime?.channel?.routing?.resolveAgentRoute &&
533
+ runtime?.channel?.reply?.finalizeInboundContext &&
534
+ runtime?.channel?.reply?.createReplyDispatcherWithTyping &&
535
+ runtime?.channel?.reply?.dispatchReplyFromConfig
536
+ );
537
+
430
538
  try {
431
- if (runtime?.channel?.reply?.dispatchReplyWithBufferedBlockDispatcher) {
539
+ if (hasFullPipeline) {
540
+ // Full SDK pipeline: route → session → envelope → dispatch
541
+ await dispatchWithFullPipeline({
542
+ runtime, msg, rawBody, account, cfg: actualCfg, sessionKey, isDm,
543
+ senderId, senderName, conversationId, replyTarget,
544
+ mediaPath, mediaType, log, setStatus,
545
+ });
546
+ } else if (runtime?.channel?.reply?.dispatchReplyWithBufferedBlockDispatcher) {
547
+ // Fallback: existing buffered block dispatcher
432
548
  const ctxPayload = {
433
549
  Body: rawBody,
434
550
  RawBody: rawBody,
@@ -447,6 +563,9 @@ async function processInboundMessage(
447
563
  MessageSid: msg.msgId,
448
564
  OriginatingChannel: "dingtalk",
449
565
  OriginatingTo: "dingtalk:" + conversationId,
566
+ MediaPath: mediaPath,
567
+ MediaType: mediaType,
568
+ MediaUrl: mediaPath,
450
569
  };
451
570
 
452
571
  // Fire-and-forget: don't await to avoid blocking SDK callback during long agent runs
@@ -467,6 +586,9 @@ async function processInboundMessage(
467
586
  }).catch((err) => {
468
587
  log?.info?.("[dingtalk] Dispatch failed: " + err);
469
588
  });
589
+
590
+ // Record activity
591
+ runtime.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
470
592
  } else {
471
593
  log?.info?.("[dingtalk] Runtime dispatch not available");
472
594
  }
@@ -475,12 +597,122 @@ async function processInboundMessage(
475
597
  }
476
598
  }
477
599
 
600
+ /**
601
+ * Dispatch using the full Clawdbot Plugin SDK pipeline.
602
+ * Uses resolveAgentRoute → session → envelope → finalizeContext → dispatch.
603
+ */
604
+ async function dispatchWithFullPipeline(params: {
605
+ runtime: any;
606
+ msg: DingTalkRobotMessage;
607
+ rawBody: string;
608
+ account: ResolvedDingTalkAccount;
609
+ cfg: any;
610
+ sessionKey: string;
611
+ isDm: boolean;
612
+ senderId: string;
613
+ senderName: string;
614
+ conversationId: string;
615
+ replyTarget: any;
616
+ mediaPath?: string;
617
+ mediaType?: string;
618
+ log?: any;
619
+ setStatus?: (update: Record<string, unknown>) => void;
620
+ }): Promise<void> {
621
+ const { runtime: rt, msg, rawBody, account, cfg, isDm,
622
+ senderId, senderName, conversationId, replyTarget,
623
+ log, setStatus } = params;
624
+
625
+ // 1. Resolve agent route
626
+ const route = rt.channel.routing.resolveAgentRoute({
627
+ cfg,
628
+ channel: 'dingtalk',
629
+ accountId: account.accountId,
630
+ peer: { kind: isDm ? 'dm' : 'group', id: isDm ? senderId : conversationId },
631
+ });
632
+
633
+ // 2. Resolve store path
634
+ const storePath = rt.channel.session?.resolveStorePath?.(cfg?.session?.store, { agentId: route.agentId });
635
+
636
+ // 3. Get envelope format options
637
+ const envelopeOptions = rt.channel.reply?.resolveEnvelopeFormatOptions?.(cfg) ?? {};
638
+
639
+ // 4. Read previous timestamp for session continuity
640
+ const previousTimestamp = rt.channel.session?.readSessionUpdatedAt?.({ storePath, sessionKey: route.sessionKey });
641
+
642
+ // 5. Format inbound envelope
643
+ const fromLabel = isDm ? `${senderName} (${senderId})` : `${msg.conversationTitle || conversationId} - ${senderName}`;
644
+ const body = rt.channel.reply.formatInboundEnvelope?.({
645
+ channel: 'DingTalk', from: fromLabel, timestamp: msg.createAt, body: rawBody,
646
+ chatType: isDm ? 'direct' : 'group', sender: { name: senderName, id: senderId },
647
+ previousTimestamp, envelope: envelopeOptions,
648
+ }) ?? rawBody;
649
+
650
+ // 6. Finalize inbound context (includes media info)
651
+ const to = isDm ? `dingtalk:${senderId}` : `dingtalk:group:${conversationId}`;
652
+ const ctx = rt.channel.reply.finalizeInboundContext({
653
+ Body: body, RawBody: rawBody, CommandBody: rawBody, From: to, To: to,
654
+ SessionKey: route.sessionKey, AccountId: account.accountId,
655
+ ChatType: isDm ? 'direct' : 'group',
656
+ ConversationLabel: fromLabel,
657
+ GroupSubject: isDm ? undefined : (msg.conversationTitle || conversationId),
658
+ SenderName: senderName, SenderId: senderId,
659
+ Provider: 'dingtalk', Surface: 'dingtalk',
660
+ MessageSid: msg.msgId, Timestamp: msg.createAt,
661
+ MediaPath: params.mediaPath, MediaType: params.mediaType, MediaUrl: params.mediaPath,
662
+ CommandAuthorized: true,
663
+ OriginatingChannel: 'dingtalk', OriginatingTo: to,
664
+ });
665
+
666
+ // 7. Record inbound session
667
+ if (rt.channel.session?.recordInboundSession) {
668
+ await rt.channel.session.recordInboundSession({
669
+ storePath, sessionKey: ctx.SessionKey || route.sessionKey, ctx,
670
+ updateLastRoute: isDm ? { sessionKey: route.mainSessionKey, channel: 'dingtalk', to: senderId, accountId: account.accountId } : undefined,
671
+ });
672
+ }
673
+
674
+ // 8. Create typing-aware dispatcher
675
+ const { dispatcher, replyOptions, markDispatchIdle } = rt.channel.reply.createReplyDispatcherWithTyping({
676
+ responsePrefix: '',
677
+ deliver: async (payload: any) => {
678
+ try {
679
+ const textToSend = payload.markdown || payload.text;
680
+ if (!textToSend) return { ok: true };
681
+ await deliverReply(replyTarget, textToSend, log);
682
+ setStatus?.({ lastOutboundAt: Date.now() });
683
+ return { ok: true };
684
+ } catch (err: any) {
685
+ log?.info?.("[dingtalk] Reply delivery failed: " + err.message);
686
+ return { ok: false, error: err.message };
687
+ }
688
+ },
689
+ });
690
+
691
+ // 9. Dispatch reply from config
692
+ try {
693
+ await rt.channel.reply.dispatchReplyFromConfig({ ctx, cfg, dispatcher, replyOptions });
694
+ } finally {
695
+ markDispatchIdle();
696
+ }
697
+
698
+ // 10. Record activity
699
+ rt.channel?.activity?.record?.('dingtalk', account.accountId, 'message');
700
+ }
701
+
478
702
  async function deliverReply(target: any, text: string, log?: any): Promise<void> {
479
703
  const now = Date.now();
480
704
  const chunkLimit = 2000;
481
705
  const messageFormat = target.account.config.messageFormat ?? "text";
482
- // Support both "markdown" and "richtext" (they're equivalent for DingTalk)
483
- const isMarkdown = messageFormat === "markdown" || messageFormat === "richtext";
706
+
707
+ // Determine if this message should use markdown format
708
+ let isMarkdown: boolean;
709
+ if (messageFormat === 'auto') {
710
+ isMarkdown = detectMarkdownContent(text);
711
+ log?.info?.("[dingtalk] Auto-detected format: " + (isMarkdown ? "markdown" : "text"));
712
+ } else {
713
+ // Support both "markdown" and "richtext" (they're equivalent for DingTalk)
714
+ isMarkdown = messageFormat === "markdown" || messageFormat === "richtext";
715
+ }
484
716
 
485
717
  // Convert markdown tables to text format (DingTalk doesn't support tables)
486
718
  let processedText = text;
@@ -603,6 +835,14 @@ function convertMarkdownTables(text: string): string {
603
835
  });
604
836
  }
605
837
 
838
+ /**
839
+ * Detect if text contains markdown features worth rendering as markdown.
840
+ * Checks for headers, bold, code blocks, lists, blockquotes, links, and images.
841
+ */
842
+ function detectMarkdownContent(text: string): boolean {
843
+ return /^#{1,6}\s|^\s*[-*+]\s|^\s*\d+\.\s|^\s*>|```|\*\*[^*]+\*\*|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)/m.test(text);
844
+ }
845
+
606
846
  function isSenderAllowed(senderId: string, allowFrom: string[]): boolean {
607
847
  if (allowFrom.includes("*")) return true;
608
848
  const normalized = senderId.trim().toLowerCase();
package/src/types.ts CHANGED
@@ -15,12 +15,15 @@ export interface DingTalkRobotMessage {
15
15
  sessionWebhook: string;
16
16
  robotCode: string;
17
17
  msgtype: string;
18
- text?: { content: string };
18
+ text?: { content: string; isReplyMsg?: boolean; repliedMsg?: any };
19
19
  richText?: unknown;
20
20
  picture?: { downloadCode: string };
21
+ /** Generic content field used by audio/video/file message types */
22
+ content?: any;
21
23
  atUsers?: Array<{ dingtalkId: string; staffId?: string }>;
22
24
  isInAtList?: boolean;
23
25
  conversationTitle?: string;
26
+ senderPlatform?: string;
24
27
  }
25
28
 
26
29
  /** Resolved account for DingTalk */
@@ -36,6 +39,20 @@ export interface ResolvedDingTalkAccount {
36
39
  config: Record<string, any>;
37
40
  }
38
41
 
42
+ /** Extracted message content from DingTalk */
43
+ export interface ExtractedMessage {
44
+ /** Textual representation of the message */
45
+ text: string;
46
+ /** Download code for media (picture/audio/video/file) */
47
+ mediaDownloadCode?: string;
48
+ /** Media type category */
49
+ mediaType?: 'image' | 'audio' | 'video' | 'file';
50
+ /** Original file name (for file messages) */
51
+ mediaFileName?: string;
52
+ /** Original DingTalk msgtype */
53
+ messageType: string;
54
+ }
55
+
39
56
  /** DingTalk channel config shape */
40
57
  export interface DingTalkChannelConfig {
41
58
  enabled?: boolean;
@@ -51,6 +68,7 @@ export interface DingTalkChannelConfig {
51
68
  groupAllowlist?: string[];
52
69
  requireMention?: boolean;
53
70
  textChunkLimit?: number;
54
- messageFormat?: 'text' | 'markdown';
71
+ messageFormat?: 'text' | 'markdown' | 'richtext' | 'auto';
72
+ showThinking?: boolean;
55
73
  [key: string]: unknown;
56
74
  }