npm - @marshulll/openclaw-wecom - Versions diffs - 0.1.15 → 0.1.16 - Mend

@marshulll/openclaw-wecom 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/docs/wecom.config.full.example.json +11 -1
package/package.json +1 -1
package/wecom/src/config-schema.ts +20 -0
package/wecom/src/media-vision.ts +98 -0
package/wecom/src/types.ts +10 -0
package/wecom/src/wecom-app.ts +19 -2
package/wecom/src/wecom-bot.ts +23 -9

package/docs/wecom.config.full.example.json CHANGED Viewed

@@ -8,7 +8,17 @@
         "tempDir": "/tmp/openclaw-wecom",
         "retentionHours": 72,
         "cleanupOnStart": true,
-        "maxBytes": 10485760
+        "maxBytes": 10485760,
+        "vision": {
+          "enabled": true,
+          "baseUrl": "https://newapi.looksunlight.com/v1",
+          "apiKey": "YOUR_API_KEY",
+          "model": "gpt-4o-mini",
+          "prompt": "请描述图片内容并尽量提取可见文字。",
+          "maxTokens": 400,
+          "timeoutMs": 15000,
+          "maxBytes": 5242880
+        }
       },
       "botMediaBridge": true,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@marshulll/openclaw-wecom",
-  "version": "0.1.15",
+  "version": "0.1.16",
   "type": "module",
   "description": "OpenClaw WeCom channel plugin (intelligent bot + internal app)",
   "author": "OpenClaw",

package/wecom/src/config-schema.ts CHANGED Viewed

@@ -47,6 +47,16 @@ const accountSchema = z.object({
     retentionHours: z.number().optional(),
     cleanupOnStart: z.boolean().optional(),
     maxBytes: z.number().optional(),
+    vision: z.object({
+      enabled: z.boolean().optional(),
+      baseUrl: z.string().optional(),
+      apiKey: z.string().optional(),
+      model: z.string().optional(),
+      prompt: z.string().optional(),
+      maxTokens: z.number().optional(),
+      timeoutMs: z.number().optional(),
+      maxBytes: z.number().optional(),
+    }).optional(),
   }).optional(),
   network: z.object({
@@ -81,6 +91,16 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
     retentionHours: z.number().optional(),
     cleanupOnStart: z.boolean().optional(),
     maxBytes: z.number().optional(),
+    vision: z.object({
+      enabled: z.boolean().optional(),
+      baseUrl: z.string().optional(),
+      apiKey: z.string().optional(),
+      model: z.string().optional(),
+      prompt: z.string().optional(),
+      maxTokens: z.number().optional(),
+      timeoutMs: z.number().optional(),
+      maxBytes: z.number().optional(),
+    }).optional(),
   }).optional(),
   network: z.object({

package/wecom/src/media-vision.ts ADDED Viewed

@@ -0,0 +1,98 @@
+import type { WecomAccountConfig } from "./types.js";
+export type VisionConfig = {
+  enabled?: boolean;
+  baseUrl?: string;
+  apiKey?: string;
+  model?: string;
+  prompt?: string;
+  maxTokens?: number;
+  timeoutMs?: number;
+  maxBytes?: number;
+};
+function resolveBaseUrl(raw?: string): string | null {
+  const value = raw?.trim();
+  if (!value) return null;
+  if (value.endsWith("/v1")) return value;
+  return `${value.replace(/\/+$/, "")}/v1`;
+}
+export function resolveVisionConfig(accountConfig: WecomAccountConfig): VisionConfig | null {
+  const vision = accountConfig.media?.vision;
+  if (!vision?.enabled) return null;
+  const baseUrl = resolveBaseUrl(
+    vision.baseUrl
+      || process.env.OPENAI_BASE_URL
+      || process.env.OPENAI_API_BASE
+      || process.env.OPENAI_ENDPOINT,
+  );
+  const apiKey = vision.apiKey || process.env.OPENAI_API_KEY || process.env.OPENAI_KEY;
+  if (!baseUrl || !apiKey) return null;
+  return {
+    enabled: true,
+    baseUrl,
+    apiKey,
+    model: vision.model || process.env.OPENAI_MODEL || "gpt-4o-mini",
+    prompt: vision.prompt
+      || "请描述图片内容并尽量提取可见文字。输出简洁中文要点。",
+    maxTokens: typeof vision.maxTokens === "number" ? vision.maxTokens : 400,
+    timeoutMs: typeof vision.timeoutMs === "number" ? vision.timeoutMs : 15000,
+    maxBytes: typeof vision.maxBytes === "number" ? vision.maxBytes : undefined,
+  };
+}
+export async function describeImageWithVision(params: {
+  config: VisionConfig;
+  buffer: Buffer;
+  mimeType: string;
+}): Promise<string | null> {
+  const { config, buffer, mimeType } = params;
+  if (!config.enabled || !config.baseUrl || !config.apiKey) return null;
+  if (config.maxBytes && buffer.length > config.maxBytes) {
+    return null;
+  }
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
+  try {
+    const imageBase64 = buffer.toString("base64");
+    const payload = {
+      model: config.model,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: config.prompt },
+            { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
+          ],
+        },
+      ],
+      max_tokens: config.maxTokens ?? 400,
+    };
+    const res = await fetch(`${config.baseUrl}/chat/completions`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${config.apiKey}`,
+      },
+      body: JSON.stringify(payload),
+      signal: controller.signal,
+    });
+    if (!res.ok) return null;
+    const data = await res.json() as any;
+    const content = data?.choices?.[0]?.message?.content;
+    if (typeof content !== "string") return null;
+    return content.trim() || null;
+  } catch {
+    return null;
+  } finally {
+    clearTimeout(timeout);
+  }
+}

package/wecom/src/types.ts CHANGED Viewed

@@ -47,6 +47,16 @@ export type WecomAccountConfig = {
     retentionHours?: number;
     cleanupOnStart?: boolean;
     maxBytes?: number;
+    vision?: {
+      enabled?: boolean;
+      baseUrl?: string;
+      apiKey?: string;
+      model?: string;
+      prompt?: string;
+      maxTokens?: number;
+      timeoutMs?: number;
+      maxBytes?: number;
+    };
   };
   // Network behavior

package/wecom/src/wecom-app.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { decryptWecomEncrypted, verifyWecomSignature } from "./crypto.js";
 import { getWecomRuntime } from "./runtime.js";
 import { handleCommand } from "./commands.js";
 import { markdownToWecomText } from "./format.js";
+import { describeImageWithVision, resolveVisionConfig } from "./media-vision.js";
 import { downloadWecomMedia, fetchMediaFromUrl, sendWecomFile, sendWecomImage, sendWecomText, sendWecomVideo, sendWecomVoice, uploadWecomMedia } from "./wecom-api.js";
 const xmlParser = new XMLParser({
@@ -26,6 +27,7 @@ type MediaCacheEntry = {
   type: "image" | "voice" | "video" | "file";
   mimeType?: string;
   url?: string;
+  summary?: string;
   createdAt: number;
   size: number;
 };
@@ -467,7 +469,11 @@ async function processAppMessage(params: {
       if (cached) {
         mediaContext = { type: cached.type, path: cached.path, mimeType: cached.mimeType, url: cached.url };
         logVerbose(target, `app image cache hit: ${cached.path}`);
-        messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
+        if (cached.summary) {
+          messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${cached.summary}\n\n请根据识别结果回复用户。`;
+        } else {
+          messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
+        }
       } else {
         let buffer: Buffer | null = null;
         let contentType = "";
@@ -498,16 +504,27 @@ async function processAppMessage(params: {
             await writeFile(tempImagePath, buffer);
             const mimeType = contentType || "image/jpeg";
             mediaContext = { type: "image", path: tempImagePath, mimeType, url: picUrl || undefined };
+            const visionConfig = resolveVisionConfig(target.account.config);
+            const summary = visionConfig
+              ? await describeImageWithVision({ config: visionConfig, buffer, mimeType })
+              : null;
             storeCachedMedia(cacheKey, {
               path: tempImagePath,
               type: "image",
               mimeType,
               url: picUrl || undefined,
+              summary: summary ?? undefined,
               createdAt: Date.now(),
               size: buffer.length,
             });
             logVerbose(target, `app image saved (${buffer.length} bytes): ${tempImagePath}`);
-            messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
+            if (summary) {
+              messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`;
+            } else {
+              messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
+            }
           }
         } else {
           messageText = "[用户发送了一张图片，但下载失败]\n\n请告诉用户图片处理暂时不可用。";

package/wecom/src/wecom-bot.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import type { ResolvedWecomAccount, WecomInboundMessage } from "./types.js";
 import { computeWecomMsgSignature, decryptWecomEncrypted, encryptWecomPlaintext, verifyWecomSignature } from "./crypto.js";
 import { fetchMediaFromUrl, sendWecomFile, sendWecomImage, sendWecomVideo, sendWecomVoice, uploadWecomMedia } from "./wecom-api.js";
 import { getWecomRuntime } from "./runtime.js";
+import { describeImageWithVision, resolveVisionConfig } from "./media-vision.js";
 const STREAM_TTL_MS = 10 * 60 * 1000;
 const STREAM_MAX_BYTES = 20_480;
@@ -20,7 +21,7 @@ const DEDUPE_MAX_ENTRIES = 2_000;
 const MEDIA_CACHE_MAX_ENTRIES = 200;
 const cleanupExecuted = new Set<string>();
-const mediaCache = new Map<string, { entry: InboundMedia; createdAt: number; size: number }>();
+const mediaCache = new Map<string, { entry: InboundMedia; createdAt: number; size: number; summary?: string }>();
 type StreamState = {
   streamId: string;
@@ -593,9 +594,12 @@ async function buildBotMediaMessage(params: {
     const cacheKey = buildMediaCacheKey({ url, base64 });
     const cached = await getCachedMedia(cacheKey, resolveMediaRetentionMs(target));
     if (cached) {
+      const text = msgtype === "image" && cached.summary
+        ? `[用户发送了一张图片]\n\n[图片识别结果]\n${cached.summary}\n\n请根据识别结果回复用户。`
+        : buildInboundMediaPrompt(msgtype, filename);
       return {
-        text: buildInboundMediaPrompt(msgtype, filename),
-        media: cached,
+        text,
+        media: cached.media,
       };
     }
@@ -670,9 +674,19 @@ async function buildBotMediaMessage(params: {
         mimeType: contentType || "image/jpeg",
         url,
       };
-      storeCachedMedia(cacheKey, media, buffer.length);
+      const visionConfig = resolveVisionConfig(target.account.config);
+      const summary = visionConfig
+        ? await describeImageWithVision({
+          config: visionConfig,
+          buffer,
+          mimeType: media.mimeType || "image/jpeg",
+        })
+        : null;
+      storeCachedMedia(cacheKey, media, buffer.length, summary ?? undefined);
       return {
-        text: buildInboundMediaPrompt("image"),
+        text: summary
+          ? `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`
+          : buildInboundMediaPrompt("image"),
         media,
       };
     }
@@ -812,7 +826,7 @@ function pruneMediaCache(): void {
 async function getCachedMedia(
   key: string | null,
   retentionMs?: number,
-): Promise<InboundMedia | null> {
+): Promise<{ media: InboundMedia; summary?: string } | null> {
   if (!key) return null;
   const cached = mediaCache.get(key);
   if (!cached) return null;
@@ -826,12 +840,12 @@ async function getCachedMedia(
     mediaCache.delete(key);
     return null;
   }
-  return cached.entry;
+  return { media: cached.entry, summary: cached.summary };
 }
-function storeCachedMedia(key: string | null, entry: InboundMedia, size: number): void {
+function storeCachedMedia(key: string | null, entry: InboundMedia, size: number, summary?: string): void {
   if (!key) return;
-  mediaCache.set(key, { entry, createdAt: Date.now(), size });
+  mediaCache.set(key, { entry, createdAt: Date.now(), size, summary });
   pruneMediaCache();
 }