npm - @sunnoy/wecom - Versions diffs - 1.0.0 → 1.1.0 - Mend

@sunnoy/wecom 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -9,6 +9,7 @@
 - 🌊 **Streaming Output**: Built on WeCom's latest AI bot streaming mechanism for smooth typewriter-style responses.
 - 🤖 **Dynamic Agent Management**: Automatically creates isolated agents per direct message user or group chat, with independent workspaces and conversation contexts.
 - 👥 **Deep Group Chat Integration**: Supports group message parsing with @mention triggering.
+- 🎤 **Voice Message Support**: Automatically processes voice messages transcribed by WeCom into text for AI interaction (direct messages only).
 - 🖼️ **Image Support**: Automatic base64 encoding and sending of local images (screenshots, generated images) without requiring additional configuration.
 - 🛠️ **Command Enhancement**: Built-in commands (e.g., `/new` for new sessions, `/status` for status) with allowlist configuration.
 - 🔒 **Security & Authentication**: Full support for WeCom message encryption/decryption, URL verification, and sender validation.
@@ -216,6 +217,10 @@ AI: [Takes screenshot] → Image displays properly in WeCom ✅
 If an image fails to process (size limit, invalid format), the text response will still be delivered and an error will be logged.
+### Q: Does the bot support voice messages?
+**A:** Yes! Voice messages in direct chats are automatically transcribed by WeCom and processed as text. No additional configuration needed.
 ### Q: How to configure auth token for public-facing OpenClaw with WeCom callbacks?
 **A:** WeCom bot **does not need** OpenClaw's Gateway Auth Token.

package/README_ZH.md CHANGED Viewed

@@ -9,6 +9,7 @@
 - 🌊 **流式输出 (Streaming)**: 基于企业微信最新的 AI 机器人流式分片机制，实现流畅的打字机式回复体验。
 - 🤖 **动态 Agent 管理**: 默认按"每个私聊用户 / 每个群聊"自动创建独立 Agent。每个 Agent 拥有独立的工作区与对话上下文，实现更强的数据隔离。
 - 👥 **群聊深度集成**: 支持群聊消息解析，可通过 @提及（At-mention）精准触发机器人响应。
+- 🎤 **语音消息支持**: 自动处理企业微信转录后的语音消息，转换为文本进行 AI 交互（仅限私聊）。
 - 🖼️ **图片支持**: 自动将本地图片（截图、生成的图像）进行 base64 编码并发送，无需额外配置。
 - 🛠️ **指令增强**: 内置常用指令支持（如 `/new` 开启新会话、`/status` 查看状态等），并提供指令白名单配置功能。
 - 🔒 **安全与认证**: 完整支持企业微信消息加解密、URL 验证及发送者身份校验。
@@ -216,6 +217,10 @@ AI：[执行截图] → 图片在企业微信中正常显示 ✅
 如果图片处理失败（超出大小限制、格式不支持等），文本回复仍会正常发送，错误信息会记录在日志中。
+### Q: 机器人支持语音消息吗？
+**A:** 支持！私聊中的语音消息会被企业微信自动转录为文字并作为文本处理，无需额外配置。
 ### Q: OpenClaw 开放公网需要 auth token，企业微信回调如何配置？
 **A:** 企业微信机器人**不需要**配置 OpenClaw 的 Gateway Auth Token。

package/index.js CHANGED Viewed

@@ -805,9 +805,45 @@ async function deliverWecomReply({ payload, account, responseUrl, senderId, stre
     senderId,
   });
+  // 处理绝对路径的 MEDIA: 行（OpenClaw 会拒绝它们，所以我们需要手动处理）
+  const mediaRegex = /^MEDIA:\s*(.+)$/gm;
+  const mediaMatches = [];
+  let match;
+  while ((match = mediaRegex.exec(text)) !== null) {
+    const mediaPath = match[1].trim();
+    // 检查是否是绝对路径（以 / 开头）
+    if (mediaPath.startsWith("/")) {
+      mediaMatches.push({
+        fullMatch: match[0],
+        path: mediaPath
+      });
+      logger.debug("Detected absolute path MEDIA line", {
+        streamId,
+        mediaPath,
+        line: match[0]
+      });
+    }
+  }
+  // 如果检测到绝对路径的 MEDIA 行，将图片加入队列并从文本中移除
+  let processedText = text;
+  if (mediaMatches.length > 0 && streamId) {
+    for (const media of mediaMatches) {
+      const queued = streamManager.queueImage(streamId, media.path);
+      if (queued) {
+        // 从文本中移除这行
+        processedText = processedText.replace(media.fullMatch, "").trim();
+        logger.info("Queued absolute path image for stream", {
+          streamId,
+          imagePath: media.path
+        });
+      }
+    }
+  }
   // 所有消息都通过流式发送
-  if (!text.trim()) {
-    logger.debug("WeCom: empty block, skipping stream update");
+  if (!processedText.trim()) {
+    logger.debug("WeCom: empty block after processing, skipping stream update");
     return;
   }
@@ -834,10 +870,10 @@ async function deliverWecomReply({ payload, account, responseUrl, senderId, stre
     // 尝试从 activeStreams 获取
     const activeStreamId = activeStreams.get(senderId);
     if (activeStreamId && streamManager.hasStream(activeStreamId)) {
-      appendToStream(activeStreamId, text);
+      appendToStream(activeStreamId, processedText);
       logger.debug("WeCom stream appended (via activeStreams)", {
         streamId: activeStreamId,
-        contentLength: text.length,
+        contentLength: processedText.length,
       });
       return;
     }
@@ -850,10 +886,10 @@ async function deliverWecomReply({ payload, account, responseUrl, senderId, stre
     return;
   }
-  appendToStream(streamId, text);
+  appendToStream(streamId, processedText);
   logger.debug("WeCom stream appended", {
     streamId,
-    contentLength: text.length,
+    contentLength: processedText.length,
     to: senderId
   });
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@sunnoy/wecom",
-    "version": "1.0.0",
+    "version": "1.1.0",
     "description": "Enterprise WeChat AI Bot channel plugin for OpenClaw",
     "type": "module",
     "main": "index.js",

package/webhook.js CHANGED Viewed

@@ -209,6 +209,50 @@ export class WecomWebhook {
                 query: { timestamp, nonce },
             };
         }
+        else if (msgtype === "voice") {
+            // Voice message (single chat only) - WeCom automatically transcribes to text
+            const content = data.voice?.content || "";
+            const msgId = data.msgid || `msg_${Date.now()}`;
+            const fromUser = data.from?.userid || "";
+            const responseUrl = data.response_url || "";
+            const chatType = data.chattype || "single";
+            const chatId = data.chatid || "";
+            // Check for duplicates
+            if (this.deduplicator.isDuplicate(msgId)) {
+                logger.debug("Duplicate voice message ignored", { msgId });
+                return null;
+            }
+            // Validate content
+            if (!content.trim()) {
+                logger.warn("Empty voice message received", { msgId, fromUser });
+                return null;
+            }
+            logger.info("Received voice message (auto-transcribed by WeCom)", {
+                fromUser,
+                chatType,
+                chatId: chatId || "(private)",
+                originalType: "voice",
+                transcribedLength: content.length,
+                preview: content.substring(0, 50)
+            });
+            // Treat voice as text since WeCom already transcribed it
+            return {
+                message: {
+                    msgId,
+                    msgType: "text",
+                    content,
+                    fromUser,
+                    chatType,
+                    chatId,
+                    responseUrl,
+                },
+                query: { timestamp, nonce },
+            };
+        }
         else if (msgtype === "event") {
             logger.info("Received event", { event: data.event });
             return {