@marshulll/openclaw-wecom 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.en.md CHANGED
@@ -97,6 +97,7 @@ Install guide: `docs/INSTALL.md`
97
97
  ## Media auto recognition (optional)
98
98
  - **Voice send/receive does NOT require API**; only auto transcription needs an OpenAI-compatible API
99
99
  - **Video recognition requires ffmpeg** (install on server, then set `media.auto.video.enabled = true`)
100
+ - Video recognition supports **light / full** modes (default: light)
100
101
  - Small text files can be previewed automatically
101
102
 
102
103
  ## Send queue & operation logs (optional)
package/README.md CHANGED
@@ -99,6 +99,7 @@ openclaw gateway restart
99
99
  ## 多媒体自动识别(可选)
100
100
  - **语音收发不需要 API**,只有开启“语音自动转写”才需要 OpenAI 兼容接口
101
101
  - **视频识别需要 ffmpeg**(服务器已安装后,将 `media.auto.video.enabled` 设为 `true`)
102
+ - 视频识别支持 **light / full** 两种模式(默认 light)
102
103
  - 文本文件可自动预览(小文件直接读入)
103
104
 
104
105
  ## 发送队列与操作日志(可选)
package/README.zh.md CHANGED
@@ -99,6 +99,7 @@ openclaw gateway restart
99
99
  ## 多媒体自动识别(可选)
100
100
  - **语音收发不需要 API**,只有开启“语音自动转写”才需要 OpenAI 兼容接口
101
101
  - **视频识别需要 ffmpeg**(服务器已安装后,将 `media.auto.video.enabled` 设为 `true`)
102
+ - 视频识别支持 **light / full** 两种模式(默认 light)
102
103
  - 文本文件可自动预览(小文件直接读入)
103
104
 
104
105
  ## 发送队列与操作日志(可选)
@@ -39,7 +39,12 @@
39
39
  "video": {
40
40
  "enabled": true,
41
41
  "ffmpegPath": "ffmpeg",
42
- "maxBytes": 104857600
42
+ "maxBytes": 104857600,
43
+ "mode": "light",
44
+ "frames": 5,
45
+ "intervalSec": 2,
46
+ "maxDurationSec": 60,
47
+ "maxFrames": 30
43
48
  }
44
49
  }
45
50
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@marshulll/openclaw-wecom",
3
- "version": "0.1.21",
3
+ "version": "0.1.23",
4
4
  "type": "module",
5
5
  "description": "OpenClaw WeCom channel plugin (intelligent bot + internal app)",
6
6
  "author": "OpenClaw",
@@ -79,6 +79,12 @@ const accountSchema = z.object({
79
79
  enabled: z.boolean().optional(),
80
80
  ffmpegPath: z.string().optional(),
81
81
  maxBytes: z.number().optional(),
82
+ mode: z.enum(["light", "full"]).optional(),
83
+ frames: z.number().optional(),
84
+ intervalSec: z.number().optional(),
85
+ maxDurationSec: z.number().optional(),
86
+ maxFrames: z.number().optional(),
87
+ includeAudio: z.boolean().optional(),
82
88
  }).optional(),
83
89
  }).optional(),
84
90
  }).optional(),
@@ -155,6 +161,12 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
155
161
  enabled: z.boolean().optional(),
156
162
  ffmpegPath: z.string().optional(),
157
163
  maxBytes: z.number().optional(),
164
+ mode: z.enum(["light", "full"]).optional(),
165
+ frames: z.number().optional(),
166
+ intervalSec: z.number().optional(),
167
+ maxDurationSec: z.number().optional(),
168
+ maxFrames: z.number().optional(),
169
+ includeAudio: z.boolean().optional(),
158
170
  }).optional(),
159
171
  }).optional(),
160
172
  }).optional(),
@@ -1,4 +1,4 @@
1
- import { readFile, stat, writeFile, mkdtemp, rm } from "node:fs/promises";
1
+ import { readFile, stat, writeFile, mkdtemp, rm, readdir } from "node:fs/promises";
2
2
  import { spawn } from "node:child_process";
3
3
  import { tmpdir } from "node:os";
4
4
  import { basename, extname, join } from "node:path";
@@ -49,6 +49,12 @@ export type ResolvedAutoFileConfig = {
49
49
  export type ResolvedAutoVideoConfig = {
50
50
  ffmpegPath: string;
51
51
  maxBytes?: number;
52
+ mode: "light" | "full";
53
+ frames: number;
54
+ intervalSec: number;
55
+ maxDurationSec: number;
56
+ maxFrames: number;
57
+ includeAudio: boolean;
52
58
  };
53
59
 
54
60
  export function resolveAutoAudioConfig(cfg: WecomAccountConfig): ResolvedAutoAudioConfig | null {
@@ -86,9 +92,29 @@ export function resolveAutoFileConfig(cfg: WecomAccountConfig): ResolvedAutoFile
86
92
  export function resolveAutoVideoConfig(cfg: WecomAccountConfig): ResolvedAutoVideoConfig | null {
87
93
  const video = cfg.media?.auto?.video;
88
94
  if (!cfg.media?.auto?.enabled || !video?.enabled) return null;
95
+ const mode = video.mode === "full" ? "full" : "light";
96
+ const maxDurationSec = typeof video.maxDurationSec === "number" && video.maxDurationSec > 0
97
+ ? video.maxDurationSec
98
+ : mode === "full" ? 120 : 60;
99
+ const frames = typeof video.frames === "number" && video.frames > 0
100
+ ? video.frames
101
+ : mode === "full" ? 12 : 5;
102
+ const intervalSec = typeof video.intervalSec === "number" && video.intervalSec > 0
103
+ ? video.intervalSec
104
+ : Math.max(1, Math.round(maxDurationSec / Math.max(frames, 1)));
105
+ const maxFrames = typeof video.maxFrames === "number" && video.maxFrames > 0
106
+ ? video.maxFrames
107
+ : mode === "full" ? 30 : frames;
108
+ const includeAudio = video.includeAudio === true;
89
109
  return {
90
110
  ffmpegPath: video.ffmpegPath?.trim() || "ffmpeg",
91
111
  maxBytes: typeof video.maxBytes === "number" && video.maxBytes > 0 ? video.maxBytes : undefined,
112
+ mode,
113
+ frames,
114
+ intervalSec,
115
+ maxDurationSec,
116
+ maxFrames,
117
+ includeAudio,
92
118
  };
93
119
  }
94
120
 
@@ -182,6 +208,39 @@ async function runFfmpegExtractFrame(params: {
182
208
  });
183
209
  }
184
210
 
211
+ async function runFfmpegExtractFrames(params: {
212
+ ffmpegPath: string;
213
+ videoPath: string;
214
+ outputPattern: string;
215
+ fps: number;
216
+ maxDurationSec: number;
217
+ }): Promise<void> {
218
+ await new Promise<void>((resolve, reject) => {
219
+ const proc = spawn(params.ffmpegPath, [
220
+ "-y",
221
+ "-i",
222
+ params.videoPath,
223
+ "-t",
224
+ String(params.maxDurationSec),
225
+ "-vf",
226
+ `fps=${params.fps}`,
227
+ "-q:v",
228
+ "2",
229
+ params.outputPattern,
230
+ ]);
231
+ proc.on("error", reject);
232
+ proc.on("close", (code) => {
233
+ if (code === 0) resolve();
234
+ else reject(new Error(`ffmpeg exited with code ${code ?? "unknown"}`));
235
+ });
236
+ });
237
+ }
238
+
239
+ function truncateText(text: string, maxChars: number): string {
240
+ if (text.length <= maxChars) return text;
241
+ return `${text.slice(0, maxChars)}…`;
242
+ }
243
+
185
244
  export async function summarizeVideoWithVision(params: {
186
245
  cfg: ResolvedAutoVideoConfig;
187
246
  account: WecomAccountConfig;
@@ -195,19 +254,61 @@ export async function summarizeVideoWithVision(params: {
195
254
  const tempDir = await mkdtemp(join(tmpdir(), "openclaw-wecom-frame-"));
196
255
  const framePath = join(tempDir, `${basename(params.videoPath)}.jpg`);
197
256
  try {
198
- await runFfmpegExtractFrame({
199
- ffmpegPath: params.cfg.ffmpegPath,
200
- videoPath: params.videoPath,
201
- framePath,
202
- });
203
- const buffer = await readFile(framePath);
204
- if (!buffer.length) return null;
205
- const summary = await describeImageWithVision({
206
- config: visionConfig,
207
- buffer,
208
- mimeType: "image/jpeg",
209
- });
210
- return summary ?? null;
257
+ const summaries: string[] = [];
258
+ if (params.cfg.mode === "light") {
259
+ const fps = Math.max(0.05, params.cfg.frames / Math.max(params.cfg.maxDurationSec, 1));
260
+ await runFfmpegExtractFrames({
261
+ ffmpegPath: params.cfg.ffmpegPath,
262
+ videoPath: params.videoPath,
263
+ outputPattern: join(tempDir, "frame-%03d.jpg"),
264
+ fps,
265
+ maxDurationSec: params.cfg.maxDurationSec,
266
+ });
267
+ const frames = (await readdir(tempDir))
268
+ .filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
269
+ .sort()
270
+ .slice(0, params.cfg.maxFrames);
271
+ for (const frame of frames) {
272
+ const buffer = await readFile(join(tempDir, frame));
273
+ if (!buffer.length) continue;
274
+ const summary = await describeImageWithVision({
275
+ config: visionConfig,
276
+ buffer,
277
+ mimeType: "image/jpeg",
278
+ });
279
+ if (summary) summaries.push(summary);
280
+ }
281
+ } else {
282
+ const fps = Math.max(0.1, 1 / Math.max(params.cfg.intervalSec, 1));
283
+ await runFfmpegExtractFrames({
284
+ ffmpegPath: params.cfg.ffmpegPath,
285
+ videoPath: params.videoPath,
286
+ outputPattern: join(tempDir, "frame-%03d.jpg"),
287
+ fps,
288
+ maxDurationSec: params.cfg.maxDurationSec,
289
+ });
290
+ const frames = (await readdir(tempDir))
291
+ .filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
292
+ .sort()
293
+ .slice(0, params.cfg.maxFrames);
294
+ for (const frame of frames) {
295
+ const buffer = await readFile(join(tempDir, frame));
296
+ if (!buffer.length) continue;
297
+ const summary = await describeImageWithVision({
298
+ config: visionConfig,
299
+ buffer,
300
+ mimeType: "image/jpeg",
301
+ });
302
+ if (summary) summaries.push(summary);
303
+ }
304
+ }
305
+
306
+ if (summaries.length === 0) return null;
307
+ const unique = Array.from(new Set(summaries.map((s) => s.trim()).filter(Boolean)));
308
+ const maxChars = 1600;
309
+ const lines = unique.slice(0, params.cfg.maxFrames).map((s, idx) => `${idx + 1}. ${s}`);
310
+ const joined = truncateText(lines.join("\n"), maxChars);
311
+ return `关键帧概述(${unique.length}帧)\n${joined}`;
211
312
  } catch {
212
313
  return null;
213
314
  } finally {
@@ -56,43 +56,52 @@ export async function describeImageWithVision(params: {
56
56
  return null;
57
57
  }
58
58
 
59
- const controller = new AbortController();
60
- const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
59
+ const imageBase64 = buffer.toString("base64");
60
+ const payload = {
61
+ model: config.model,
62
+ messages: [
63
+ {
64
+ role: "user",
65
+ content: [
66
+ { type: "text", text: config.prompt },
67
+ { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
68
+ ],
69
+ },
70
+ ],
71
+ max_tokens: config.maxTokens ?? 400,
72
+ };
61
73
 
62
- try {
63
- const imageBase64 = buffer.toString("base64");
64
- const payload = {
65
- model: config.model,
66
- messages: [
67
- {
68
- role: "user",
69
- content: [
70
- { type: "text", text: config.prompt },
71
- { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
72
- ],
74
+ for (let attempt = 0; attempt < 2; attempt += 1) {
75
+ const controller = new AbortController();
76
+ const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
77
+ try {
78
+ const res = await fetch(`${config.baseUrl}/chat/completions`, {
79
+ method: "POST",
80
+ headers: {
81
+ "Content-Type": "application/json",
82
+ Authorization: `Bearer ${config.apiKey}`,
73
83
  },
74
- ],
75
- max_tokens: config.maxTokens ?? 400,
76
- };
77
-
78
- const res = await fetch(`${config.baseUrl}/chat/completions`, {
79
- method: "POST",
80
- headers: {
81
- "Content-Type": "application/json",
82
- Authorization: `Bearer ${config.apiKey}`,
83
- },
84
- body: JSON.stringify(payload),
85
- signal: controller.signal,
86
- });
84
+ body: JSON.stringify(payload),
85
+ signal: controller.signal,
86
+ });
87
87
 
88
- if (!res.ok) return null;
89
- const data = await res.json() as any;
90
- const content = data?.choices?.[0]?.message?.content;
91
- if (typeof content !== "string") return null;
92
- return content.trim() || null;
93
- } catch {
94
- return null;
95
- } finally {
96
- clearTimeout(timeout);
88
+ if (!res.ok) {
89
+ continue;
90
+ }
91
+ const data = await res.json() as any;
92
+ const content = data?.choices?.[0]?.message?.content;
93
+ if (typeof content !== "string") {
94
+ continue;
95
+ }
96
+ const trimmed = content.trim();
97
+ if (!trimmed) continue;
98
+ return trimmed;
99
+ } catch {
100
+ // retry once
101
+ } finally {
102
+ clearTimeout(timeout);
103
+ }
97
104
  }
105
+
106
+ return null;
98
107
  }
@@ -79,6 +79,12 @@ export type WecomAccountConfig = {
79
79
  enabled?: boolean;
80
80
  ffmpegPath?: string;
81
81
  maxBytes?: number;
82
+ mode?: "light" | "full";
83
+ frames?: number;
84
+ intervalSec?: number;
85
+ maxDurationSec?: number;
86
+ maxFrames?: number;
87
+ includeAudio?: boolean;
82
88
  };
83
89
  };
84
90
  };
@@ -695,6 +695,14 @@ async function processAppMessage(params: {
695
695
  createdAt: Date.now(),
696
696
  size: buffer.length,
697
697
  });
698
+ if (visionConfig && !summary) {
699
+ await appendOperationLog(target, {
700
+ action: "vision-image-failed",
701
+ accountId: target.account.accountId,
702
+ path: tempImagePath,
703
+ size: buffer.length,
704
+ });
705
+ }
698
706
  logVerbose(target, `app image saved (${buffer.length} bytes): ${tempImagePath}`);
699
707
  if (summary) {
700
708
  messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`;