npm - @marshulll/openclaw-wecom - Versions diffs - 0.1.21 → 0.1.23 - Mend

@marshulll/openclaw-wecom 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.en.md +1 -0
package/README.md +1 -0
package/README.zh.md +1 -0
package/docs/wecom.config.full.example.json +6 -1
package/package.json +1 -1
package/wecom/src/config-schema.ts +12 -0
package/wecom/src/media-auto.ts +115 -14
package/wecom/src/media-vision.ts +44 -35
package/wecom/src/types.ts +6 -0
package/wecom/src/wecom-app.ts +8 -0

package/README.en.md CHANGED Viewed

@@ -97,6 +97,7 @@ Install guide: `docs/INSTALL.md`
 ## Media auto recognition (optional)
 - **Voice send/receive does NOT require API**; only auto transcription needs an OpenAI-compatible API
 - **Video recognition requires ffmpeg** (install on server, then set `media.auto.video.enabled = true`)
+- Video recognition supports **light / full** modes (default: light)
 - Small text files can be previewed automatically
 ## Send queue & operation logs (optional)

package/README.md CHANGED Viewed

@@ -99,6 +99,7 @@ openclaw gateway restart
 ## 多媒体自动识别（可选）
 - **语音收发不需要 API**，只有开启“语音自动转写”才需要 OpenAI 兼容接口
 - **视频识别需要 ffmpeg**（服务器已安装后，将 `media.auto.video.enabled` 设为 `true`）
+- 视频识别支持 **light / full** 两种模式（默认 light）
 - 文本文件可自动预览（小文件直接读入）
 ## 发送队列与操作日志（可选）

package/README.zh.md CHANGED Viewed

@@ -99,6 +99,7 @@ openclaw gateway restart
 ## 多媒体自动识别（可选）
 - **语音收发不需要 API**，只有开启“语音自动转写”才需要 OpenAI 兼容接口
 - **视频识别需要 ffmpeg**（服务器已安装后，将 `media.auto.video.enabled` 设为 `true`）
+- 视频识别支持 **light / full** 两种模式（默认 light）
 - 文本文件可自动预览（小文件直接读入）
 ## 发送队列与操作日志（可选）

package/docs/wecom.config.full.example.json CHANGED Viewed

@@ -39,7 +39,12 @@
           "video": {
             "enabled": true,
             "ffmpegPath": "ffmpeg",
-            "maxBytes": 104857600
+            "maxBytes": 104857600,
+            "mode": "light",
+            "frames": 5,
+            "intervalSec": 2,
+            "maxDurationSec": 60,
+            "maxFrames": 30
           }
         }
       },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@marshulll/openclaw-wecom",
-  "version": "0.1.21",
+  "version": "0.1.23",
   "type": "module",
   "description": "OpenClaw WeCom channel plugin (intelligent bot + internal app)",
   "author": "OpenClaw",

package/wecom/src/config-schema.ts CHANGED Viewed

@@ -79,6 +79,12 @@ const accountSchema = z.object({
         enabled: z.boolean().optional(),
         ffmpegPath: z.string().optional(),
         maxBytes: z.number().optional(),
+        mode: z.enum(["light", "full"]).optional(),
+        frames: z.number().optional(),
+        intervalSec: z.number().optional(),
+        maxDurationSec: z.number().optional(),
+        maxFrames: z.number().optional(),
+        includeAudio: z.boolean().optional(),
       }).optional(),
     }).optional(),
   }).optional(),
@@ -155,6 +161,12 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
         enabled: z.boolean().optional(),
         ffmpegPath: z.string().optional(),
         maxBytes: z.number().optional(),
+        mode: z.enum(["light", "full"]).optional(),
+        frames: z.number().optional(),
+        intervalSec: z.number().optional(),
+        maxDurationSec: z.number().optional(),
+        maxFrames: z.number().optional(),
+        includeAudio: z.boolean().optional(),
       }).optional(),
     }).optional(),
   }).optional(),

package/wecom/src/media-auto.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { readFile, stat, writeFile, mkdtemp, rm } from "node:fs/promises";
+import { readFile, stat, writeFile, mkdtemp, rm, readdir } from "node:fs/promises";
 import { spawn } from "node:child_process";
 import { tmpdir } from "node:os";
 import { basename, extname, join } from "node:path";
@@ -49,6 +49,12 @@ export type ResolvedAutoFileConfig = {
 export type ResolvedAutoVideoConfig = {
   ffmpegPath: string;
   maxBytes?: number;
+  mode: "light" | "full";
+  frames: number;
+  intervalSec: number;
+  maxDurationSec: number;
+  maxFrames: number;
+  includeAudio: boolean;
 };
 export function resolveAutoAudioConfig(cfg: WecomAccountConfig): ResolvedAutoAudioConfig | null {
@@ -86,9 +92,29 @@ export function resolveAutoFileConfig(cfg: WecomAccountConfig): ResolvedAutoFile
 export function resolveAutoVideoConfig(cfg: WecomAccountConfig): ResolvedAutoVideoConfig | null {
   const video = cfg.media?.auto?.video;
   if (!cfg.media?.auto?.enabled || !video?.enabled) return null;
+  const mode = video.mode === "full" ? "full" : "light";
+  const maxDurationSec = typeof video.maxDurationSec === "number" && video.maxDurationSec > 0
+    ? video.maxDurationSec
+    : mode === "full" ? 120 : 60;
+  const frames = typeof video.frames === "number" && video.frames > 0
+    ? video.frames
+    : mode === "full" ? 12 : 5;
+  const intervalSec = typeof video.intervalSec === "number" && video.intervalSec > 0
+    ? video.intervalSec
+    : Math.max(1, Math.round(maxDurationSec / Math.max(frames, 1)));
+  const maxFrames = typeof video.maxFrames === "number" && video.maxFrames > 0
+    ? video.maxFrames
+    : mode === "full" ? 30 : frames;
+  const includeAudio = video.includeAudio === true;
   return {
     ffmpegPath: video.ffmpegPath?.trim() || "ffmpeg",
     maxBytes: typeof video.maxBytes === "number" && video.maxBytes > 0 ? video.maxBytes : undefined,
+    mode,
+    frames,
+    intervalSec,
+    maxDurationSec,
+    maxFrames,
+    includeAudio,
   };
 }
@@ -182,6 +208,39 @@ async function runFfmpegExtractFrame(params: {
   });
 }
+async function runFfmpegExtractFrames(params: {
+  ffmpegPath: string;
+  videoPath: string;
+  outputPattern: string;
+  fps: number;
+  maxDurationSec: number;
+}): Promise<void> {
+  await new Promise<void>((resolve, reject) => {
+    const proc = spawn(params.ffmpegPath, [
+      "-y",
+      "-i",
+      params.videoPath,
+      "-t",
+      String(params.maxDurationSec),
+      "-vf",
+      `fps=${params.fps}`,
+      "-q:v",
+      "2",
+      params.outputPattern,
+    ]);
+    proc.on("error", reject);
+    proc.on("close", (code) => {
+      if (code === 0) resolve();
+      else reject(new Error(`ffmpeg exited with code ${code ?? "unknown"}`));
+    });
+  });
+}
+function truncateText(text: string, maxChars: number): string {
+  if (text.length <= maxChars) return text;
+  return `${text.slice(0, maxChars)}…`;
+}
 export async function summarizeVideoWithVision(params: {
   cfg: ResolvedAutoVideoConfig;
   account: WecomAccountConfig;
@@ -195,19 +254,61 @@ export async function summarizeVideoWithVision(params: {
   const tempDir = await mkdtemp(join(tmpdir(), "openclaw-wecom-frame-"));
   const framePath = join(tempDir, `${basename(params.videoPath)}.jpg`);
   try {
-    await runFfmpegExtractFrame({
-      ffmpegPath: params.cfg.ffmpegPath,
-      videoPath: params.videoPath,
-      framePath,
-    });
-    const buffer = await readFile(framePath);
-    if (!buffer.length) return null;
-    const summary = await describeImageWithVision({
-      config: visionConfig,
-      buffer,
-      mimeType: "image/jpeg",
-    });
-    return summary ?? null;
+    const summaries: string[] = [];
+    if (params.cfg.mode === "light") {
+      const fps = Math.max(0.05, params.cfg.frames / Math.max(params.cfg.maxDurationSec, 1));
+      await runFfmpegExtractFrames({
+        ffmpegPath: params.cfg.ffmpegPath,
+        videoPath: params.videoPath,
+        outputPattern: join(tempDir, "frame-%03d.jpg"),
+        fps,
+        maxDurationSec: params.cfg.maxDurationSec,
+      });
+      const frames = (await readdir(tempDir))
+        .filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
+        .sort()
+        .slice(0, params.cfg.maxFrames);
+      for (const frame of frames) {
+        const buffer = await readFile(join(tempDir, frame));
+        if (!buffer.length) continue;
+        const summary = await describeImageWithVision({
+          config: visionConfig,
+          buffer,
+          mimeType: "image/jpeg",
+        });
+        if (summary) summaries.push(summary);
+      }
+    } else {
+      const fps = Math.max(0.1, 1 / Math.max(params.cfg.intervalSec, 1));
+      await runFfmpegExtractFrames({
+        ffmpegPath: params.cfg.ffmpegPath,
+        videoPath: params.videoPath,
+        outputPattern: join(tempDir, "frame-%03d.jpg"),
+        fps,
+        maxDurationSec: params.cfg.maxDurationSec,
+      });
+      const frames = (await readdir(tempDir))
+        .filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
+        .sort()
+        .slice(0, params.cfg.maxFrames);
+      for (const frame of frames) {
+        const buffer = await readFile(join(tempDir, frame));
+        if (!buffer.length) continue;
+        const summary = await describeImageWithVision({
+          config: visionConfig,
+          buffer,
+          mimeType: "image/jpeg",
+        });
+        if (summary) summaries.push(summary);
+      }
+    }
+    if (summaries.length === 0) return null;
+    const unique = Array.from(new Set(summaries.map((s) => s.trim()).filter(Boolean)));
+    const maxChars = 1600;
+    const lines = unique.slice(0, params.cfg.maxFrames).map((s, idx) => `${idx + 1}. ${s}`);
+    const joined = truncateText(lines.join("\n"), maxChars);
+    return `关键帧概述（${unique.length}帧）\n${joined}`;
   } catch {
     return null;
   } finally {

package/wecom/src/media-vision.ts CHANGED Viewed

@@ -56,43 +56,52 @@ export async function describeImageWithVision(params: {
     return null;
   }
-  const controller = new AbortController();
-  const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
+  const imageBase64 = buffer.toString("base64");
+  const payload = {
+    model: config.model,
+    messages: [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: config.prompt },
+          { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
+        ],
+      },
+    ],
+    max_tokens: config.maxTokens ?? 400,
+  };
-  try {
-    const imageBase64 = buffer.toString("base64");
-    const payload = {
-      model: config.model,
-      messages: [
-        {
-          role: "user",
-          content: [
-            { type: "text", text: config.prompt },
-            { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
-          ],
+  for (let attempt = 0; attempt < 2; attempt += 1) {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
+    try {
+      const res = await fetch(`${config.baseUrl}/chat/completions`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Bearer ${config.apiKey}`,
         },
-      ],
-      max_tokens: config.maxTokens ?? 400,
-    };
-    const res = await fetch(`${config.baseUrl}/chat/completions`, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${config.apiKey}`,
-      },
-      body: JSON.stringify(payload),
-      signal: controller.signal,
-    });
+        body: JSON.stringify(payload),
+        signal: controller.signal,
+      });
-    if (!res.ok) return null;
-    const data = await res.json() as any;
-    const content = data?.choices?.[0]?.message?.content;
-    if (typeof content !== "string") return null;
-    return content.trim() || null;
-  } catch {
-    return null;
-  } finally {
-    clearTimeout(timeout);
+      if (!res.ok) {
+        continue;
+      }
+      const data = await res.json() as any;
+      const content = data?.choices?.[0]?.message?.content;
+      if (typeof content !== "string") {
+        continue;
+      }
+      const trimmed = content.trim();
+      if (!trimmed) continue;
+      return trimmed;
+    } catch {
+      // retry once
+    } finally {
+      clearTimeout(timeout);
+    }
   }
+  return null;
 }

package/wecom/src/types.ts CHANGED Viewed

@@ -79,6 +79,12 @@ export type WecomAccountConfig = {
         enabled?: boolean;
         ffmpegPath?: string;
         maxBytes?: number;
+        mode?: "light" | "full";
+        frames?: number;
+        intervalSec?: number;
+        maxDurationSec?: number;
+        maxFrames?: number;
+        includeAudio?: boolean;
       };
     };
   };

package/wecom/src/wecom-app.ts CHANGED Viewed

@@ -695,6 +695,14 @@ async function processAppMessage(params: {
               createdAt: Date.now(),
               size: buffer.length,
             });
+            if (visionConfig && !summary) {
+              await appendOperationLog(target, {
+                action: "vision-image-failed",
+                accountId: target.account.accountId,
+                path: tempImagePath,
+                size: buffer.length,
+              });
+            }
             logVerbose(target, `app image saved (${buffer.length} bytes): ${tempImagePath}`);
             if (summary) {
               messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`;