@marshulll/openclaw-wecom 0.1.22 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.en.md CHANGED
@@ -97,6 +97,7 @@ Install guide: `docs/INSTALL.md`
97
97
  ## Media auto recognition (optional)
98
98
  - **Voice send/receive does NOT require API**; only auto transcription needs an OpenAI-compatible API
99
99
  - **Video recognition requires ffmpeg** (install on server, then set `media.auto.video.enabled = true`)
100
+ - Video recognition supports **light / full** modes (default: light)
100
101
  - Small text files can be previewed automatically
101
102
 
102
103
  ## Send queue & operation logs (optional)
package/README.md CHANGED
@@ -99,6 +99,7 @@ openclaw gateway restart
99
99
  ## 多媒体自动识别(可选)
100
100
  - **语音收发不需要 API**,只有开启“语音自动转写”才需要 OpenAI 兼容接口
101
101
  - **视频识别需要 ffmpeg**(服务器已安装后,将 `media.auto.video.enabled` 设为 `true`)
102
+ - 视频识别支持 **light / full** 两种模式(默认 light)
102
103
  - 文本文件可自动预览(小文件直接读入)
103
104
 
104
105
  ## 发送队列与操作日志(可选)
package/README.zh.md CHANGED
@@ -99,6 +99,7 @@ openclaw gateway restart
99
99
  ## 多媒体自动识别(可选)
100
100
  - **语音收发不需要 API**,只有开启“语音自动转写”才需要 OpenAI 兼容接口
101
101
  - **视频识别需要 ffmpeg**(服务器已安装后,将 `media.auto.video.enabled` 设为 `true`)
102
+ - 视频识别支持 **light / full** 两种模式(默认 light)
102
103
  - 文本文件可自动预览(小文件直接读入)
103
104
 
104
105
  ## 发送队列与操作日志(可选)
@@ -39,7 +39,12 @@
39
39
  "video": {
40
40
  "enabled": true,
41
41
  "ffmpegPath": "ffmpeg",
42
- "maxBytes": 104857600
42
+ "maxBytes": 104857600,
43
+ "mode": "light",
44
+ "frames": 5,
45
+ "intervalSec": 2,
46
+ "maxDurationSec": 60,
47
+ "maxFrames": 30
43
48
  }
44
49
  }
45
50
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@marshulll/openclaw-wecom",
3
- "version": "0.1.22",
3
+ "version": "0.1.23",
4
4
  "type": "module",
5
5
  "description": "OpenClaw WeCom channel plugin (intelligent bot + internal app)",
6
6
  "author": "OpenClaw",
@@ -79,6 +79,12 @@ const accountSchema = z.object({
79
79
  enabled: z.boolean().optional(),
80
80
  ffmpegPath: z.string().optional(),
81
81
  maxBytes: z.number().optional(),
82
+ mode: z.enum(["light", "full"]).optional(),
83
+ frames: z.number().optional(),
84
+ intervalSec: z.number().optional(),
85
+ maxDurationSec: z.number().optional(),
86
+ maxFrames: z.number().optional(),
87
+ includeAudio: z.boolean().optional(),
82
88
  }).optional(),
83
89
  }).optional(),
84
90
  }).optional(),
@@ -155,6 +161,12 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
155
161
  enabled: z.boolean().optional(),
156
162
  ffmpegPath: z.string().optional(),
157
163
  maxBytes: z.number().optional(),
164
+ mode: z.enum(["light", "full"]).optional(),
165
+ frames: z.number().optional(),
166
+ intervalSec: z.number().optional(),
167
+ maxDurationSec: z.number().optional(),
168
+ maxFrames: z.number().optional(),
169
+ includeAudio: z.boolean().optional(),
158
170
  }).optional(),
159
171
  }).optional(),
160
172
  }).optional(),
@@ -1,4 +1,4 @@
1
- import { readFile, stat, writeFile, mkdtemp, rm } from "node:fs/promises";
1
+ import { readFile, stat, writeFile, mkdtemp, rm, readdir } from "node:fs/promises";
2
2
  import { spawn } from "node:child_process";
3
3
  import { tmpdir } from "node:os";
4
4
  import { basename, extname, join } from "node:path";
@@ -49,6 +49,12 @@ export type ResolvedAutoFileConfig = {
49
49
  export type ResolvedAutoVideoConfig = {
50
50
  ffmpegPath: string;
51
51
  maxBytes?: number;
52
+ mode: "light" | "full";
53
+ frames: number;
54
+ intervalSec: number;
55
+ maxDurationSec: number;
56
+ maxFrames: number;
57
+ includeAudio: boolean;
52
58
  };
53
59
 
54
60
  export function resolveAutoAudioConfig(cfg: WecomAccountConfig): ResolvedAutoAudioConfig | null {
@@ -86,9 +92,29 @@ export function resolveAutoFileConfig(cfg: WecomAccountConfig): ResolvedAutoFile
86
92
  export function resolveAutoVideoConfig(cfg: WecomAccountConfig): ResolvedAutoVideoConfig | null {
87
93
  const video = cfg.media?.auto?.video;
88
94
  if (!cfg.media?.auto?.enabled || !video?.enabled) return null;
95
+ const mode = video.mode === "full" ? "full" : "light";
96
+ const maxDurationSec = typeof video.maxDurationSec === "number" && video.maxDurationSec > 0
97
+ ? video.maxDurationSec
98
+ : mode === "full" ? 120 : 60;
99
+ const frames = typeof video.frames === "number" && video.frames > 0
100
+ ? video.frames
101
+ : mode === "full" ? 12 : 5;
102
+ const intervalSec = typeof video.intervalSec === "number" && video.intervalSec > 0
103
+ ? video.intervalSec
104
+ : Math.max(1, Math.round(maxDurationSec / Math.max(frames, 1)));
105
+ const maxFrames = typeof video.maxFrames === "number" && video.maxFrames > 0
106
+ ? video.maxFrames
107
+ : mode === "full" ? 30 : frames;
108
+ const includeAudio = video.includeAudio === true;
89
109
  return {
90
110
  ffmpegPath: video.ffmpegPath?.trim() || "ffmpeg",
91
111
  maxBytes: typeof video.maxBytes === "number" && video.maxBytes > 0 ? video.maxBytes : undefined,
112
+ mode,
113
+ frames,
114
+ intervalSec,
115
+ maxDurationSec,
116
+ maxFrames,
117
+ includeAudio,
92
118
  };
93
119
  }
94
120
 
@@ -182,6 +208,39 @@ async function runFfmpegExtractFrame(params: {
182
208
  });
183
209
  }
184
210
 
211
+ async function runFfmpegExtractFrames(params: {
212
+ ffmpegPath: string;
213
+ videoPath: string;
214
+ outputPattern: string;
215
+ fps: number;
216
+ maxDurationSec: number;
217
+ }): Promise<void> {
218
+ await new Promise<void>((resolve, reject) => {
219
+ const proc = spawn(params.ffmpegPath, [
220
+ "-y",
221
+ "-i",
222
+ params.videoPath,
223
+ "-t",
224
+ String(params.maxDurationSec),
225
+ "-vf",
226
+ `fps=${params.fps}`,
227
+ "-q:v",
228
+ "2",
229
+ params.outputPattern,
230
+ ]);
231
+ proc.on("error", reject);
232
+ proc.on("close", (code) => {
233
+ if (code === 0) resolve();
234
+ else reject(new Error(`ffmpeg exited with code ${code ?? "unknown"}`));
235
+ });
236
+ });
237
+ }
238
+
239
+ function truncateText(text: string, maxChars: number): string {
240
+ if (text.length <= maxChars) return text;
241
+ return `${text.slice(0, maxChars)}…`;
242
+ }
243
+
185
244
  export async function summarizeVideoWithVision(params: {
186
245
  cfg: ResolvedAutoVideoConfig;
187
246
  account: WecomAccountConfig;
@@ -195,19 +254,61 @@ export async function summarizeVideoWithVision(params: {
195
254
  const tempDir = await mkdtemp(join(tmpdir(), "openclaw-wecom-frame-"));
196
255
  const framePath = join(tempDir, `${basename(params.videoPath)}.jpg`);
197
256
  try {
198
- await runFfmpegExtractFrame({
199
- ffmpegPath: params.cfg.ffmpegPath,
200
- videoPath: params.videoPath,
201
- framePath,
202
- });
203
- const buffer = await readFile(framePath);
204
- if (!buffer.length) return null;
205
- const summary = await describeImageWithVision({
206
- config: visionConfig,
207
- buffer,
208
- mimeType: "image/jpeg",
209
- });
210
- return summary ?? null;
257
+ const summaries: string[] = [];
258
+ if (params.cfg.mode === "light") {
259
+ const fps = Math.max(0.05, params.cfg.frames / Math.max(params.cfg.maxDurationSec, 1));
260
+ await runFfmpegExtractFrames({
261
+ ffmpegPath: params.cfg.ffmpegPath,
262
+ videoPath: params.videoPath,
263
+ outputPattern: join(tempDir, "frame-%03d.jpg"),
264
+ fps,
265
+ maxDurationSec: params.cfg.maxDurationSec,
266
+ });
267
+ const frames = (await readdir(tempDir))
268
+ .filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
269
+ .sort()
270
+ .slice(0, params.cfg.maxFrames);
271
+ for (const frame of frames) {
272
+ const buffer = await readFile(join(tempDir, frame));
273
+ if (!buffer.length) continue;
274
+ const summary = await describeImageWithVision({
275
+ config: visionConfig,
276
+ buffer,
277
+ mimeType: "image/jpeg",
278
+ });
279
+ if (summary) summaries.push(summary);
280
+ }
281
+ } else {
282
+ const fps = Math.max(0.1, 1 / Math.max(params.cfg.intervalSec, 1));
283
+ await runFfmpegExtractFrames({
284
+ ffmpegPath: params.cfg.ffmpegPath,
285
+ videoPath: params.videoPath,
286
+ outputPattern: join(tempDir, "frame-%03d.jpg"),
287
+ fps,
288
+ maxDurationSec: params.cfg.maxDurationSec,
289
+ });
290
+ const frames = (await readdir(tempDir))
291
+ .filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
292
+ .sort()
293
+ .slice(0, params.cfg.maxFrames);
294
+ for (const frame of frames) {
295
+ const buffer = await readFile(join(tempDir, frame));
296
+ if (!buffer.length) continue;
297
+ const summary = await describeImageWithVision({
298
+ config: visionConfig,
299
+ buffer,
300
+ mimeType: "image/jpeg",
301
+ });
302
+ if (summary) summaries.push(summary);
303
+ }
304
+ }
305
+
306
+ if (summaries.length === 0) return null;
307
+ const unique = Array.from(new Set(summaries.map((s) => s.trim()).filter(Boolean)));
308
+ const maxChars = 1600;
309
+ const lines = unique.slice(0, params.cfg.maxFrames).map((s, idx) => `${idx + 1}. ${s}`);
310
+ const joined = truncateText(lines.join("\n"), maxChars);
311
+ return `关键帧概述(${unique.length}帧)\n${joined}`;
211
312
  } catch {
212
313
  return null;
213
314
  } finally {
@@ -79,6 +79,12 @@ export type WecomAccountConfig = {
79
79
  enabled?: boolean;
80
80
  ffmpegPath?: string;
81
81
  maxBytes?: number;
82
+ mode?: "light" | "full";
83
+ frames?: number;
84
+ intervalSec?: number;
85
+ maxDurationSec?: number;
86
+ maxFrames?: number;
87
+ includeAudio?: boolean;
82
88
  };
83
89
  };
84
90
  };