@marshulll/openclaw-wecom 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +1 -0
- package/README.md +1 -0
- package/README.zh.md +1 -0
- package/docs/wecom.config.full.example.json +6 -1
- package/package.json +1 -1
- package/wecom/src/config-schema.ts +12 -0
- package/wecom/src/media-auto.ts +115 -14
- package/wecom/src/types.ts +6 -0
package/README.en.md
CHANGED
|
@@ -97,6 +97,7 @@ Install guide: `docs/INSTALL.md`
|
|
|
97
97
|
## Media auto recognition (optional)
|
|
98
98
|
- **Voice send/receive does NOT require API**; only auto transcription needs an OpenAI-compatible API
|
|
99
99
|
- **Video recognition requires ffmpeg** (install on server, then set `media.auto.video.enabled = true`)
|
|
100
|
+
- Video recognition supports **light / full** modes (default: light)
|
|
100
101
|
- Small text files can be previewed automatically
|
|
101
102
|
|
|
102
103
|
## Send queue & operation logs (optional)
|
package/README.md
CHANGED
|
@@ -99,6 +99,7 @@ openclaw gateway restart
|
|
|
99
99
|
## 多媒体自动识别(可选)
|
|
100
100
|
- **语音收发不需要 API**,只有开启“语音自动转写”才需要 OpenAI 兼容接口
|
|
101
101
|
- **视频识别需要 ffmpeg**(服务器已安装后,将 `media.auto.video.enabled` 设为 `true`)
|
|
102
|
+
- 视频识别支持 **light / full** 两种模式(默认 light)
|
|
102
103
|
- 文本文件可自动预览(小文件直接读入)
|
|
103
104
|
|
|
104
105
|
## 发送队列与操作日志(可选)
|
package/README.zh.md
CHANGED
|
@@ -99,6 +99,7 @@ openclaw gateway restart
|
|
|
99
99
|
## 多媒体自动识别(可选)
|
|
100
100
|
- **语音收发不需要 API**,只有开启“语音自动转写”才需要 OpenAI 兼容接口
|
|
101
101
|
- **视频识别需要 ffmpeg**(服务器已安装后,将 `media.auto.video.enabled` 设为 `true`)
|
|
102
|
+
- 视频识别支持 **light / full** 两种模式(默认 light)
|
|
102
103
|
- 文本文件可自动预览(小文件直接读入)
|
|
103
104
|
|
|
104
105
|
## 发送队列与操作日志(可选)
|
package/package.json
CHANGED
|
@@ -79,6 +79,12 @@ const accountSchema = z.object({
|
|
|
79
79
|
enabled: z.boolean().optional(),
|
|
80
80
|
ffmpegPath: z.string().optional(),
|
|
81
81
|
maxBytes: z.number().optional(),
|
|
82
|
+
mode: z.enum(["light", "full"]).optional(),
|
|
83
|
+
frames: z.number().optional(),
|
|
84
|
+
intervalSec: z.number().optional(),
|
|
85
|
+
maxDurationSec: z.number().optional(),
|
|
86
|
+
maxFrames: z.number().optional(),
|
|
87
|
+
includeAudio: z.boolean().optional(),
|
|
82
88
|
}).optional(),
|
|
83
89
|
}).optional(),
|
|
84
90
|
}).optional(),
|
|
@@ -155,6 +161,12 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
|
|
|
155
161
|
enabled: z.boolean().optional(),
|
|
156
162
|
ffmpegPath: z.string().optional(),
|
|
157
163
|
maxBytes: z.number().optional(),
|
|
164
|
+
mode: z.enum(["light", "full"]).optional(),
|
|
165
|
+
frames: z.number().optional(),
|
|
166
|
+
intervalSec: z.number().optional(),
|
|
167
|
+
maxDurationSec: z.number().optional(),
|
|
168
|
+
maxFrames: z.number().optional(),
|
|
169
|
+
includeAudio: z.boolean().optional(),
|
|
158
170
|
}).optional(),
|
|
159
171
|
}).optional(),
|
|
160
172
|
}).optional(),
|
package/wecom/src/media-auto.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { readFile, stat, writeFile, mkdtemp, rm } from "node:fs/promises";
|
|
1
|
+
import { readFile, stat, writeFile, mkdtemp, rm, readdir } from "node:fs/promises";
|
|
2
2
|
import { spawn } from "node:child_process";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { basename, extname, join } from "node:path";
|
|
@@ -49,6 +49,12 @@ export type ResolvedAutoFileConfig = {
|
|
|
49
49
|
export type ResolvedAutoVideoConfig = {
|
|
50
50
|
ffmpegPath: string;
|
|
51
51
|
maxBytes?: number;
|
|
52
|
+
mode: "light" | "full";
|
|
53
|
+
frames: number;
|
|
54
|
+
intervalSec: number;
|
|
55
|
+
maxDurationSec: number;
|
|
56
|
+
maxFrames: number;
|
|
57
|
+
includeAudio: boolean;
|
|
52
58
|
};
|
|
53
59
|
|
|
54
60
|
export function resolveAutoAudioConfig(cfg: WecomAccountConfig): ResolvedAutoAudioConfig | null {
|
|
@@ -86,9 +92,29 @@ export function resolveAutoFileConfig(cfg: WecomAccountConfig): ResolvedAutoFile
|
|
|
86
92
|
export function resolveAutoVideoConfig(cfg: WecomAccountConfig): ResolvedAutoVideoConfig | null {
|
|
87
93
|
const video = cfg.media?.auto?.video;
|
|
88
94
|
if (!cfg.media?.auto?.enabled || !video?.enabled) return null;
|
|
95
|
+
const mode = video.mode === "full" ? "full" : "light";
|
|
96
|
+
const maxDurationSec = typeof video.maxDurationSec === "number" && video.maxDurationSec > 0
|
|
97
|
+
? video.maxDurationSec
|
|
98
|
+
: mode === "full" ? 120 : 60;
|
|
99
|
+
const frames = typeof video.frames === "number" && video.frames > 0
|
|
100
|
+
? video.frames
|
|
101
|
+
: mode === "full" ? 12 : 5;
|
|
102
|
+
const intervalSec = typeof video.intervalSec === "number" && video.intervalSec > 0
|
|
103
|
+
? video.intervalSec
|
|
104
|
+
: Math.max(1, Math.round(maxDurationSec / Math.max(frames, 1)));
|
|
105
|
+
const maxFrames = typeof video.maxFrames === "number" && video.maxFrames > 0
|
|
106
|
+
? video.maxFrames
|
|
107
|
+
: mode === "full" ? 30 : frames;
|
|
108
|
+
const includeAudio = video.includeAudio === true;
|
|
89
109
|
return {
|
|
90
110
|
ffmpegPath: video.ffmpegPath?.trim() || "ffmpeg",
|
|
91
111
|
maxBytes: typeof video.maxBytes === "number" && video.maxBytes > 0 ? video.maxBytes : undefined,
|
|
112
|
+
mode,
|
|
113
|
+
frames,
|
|
114
|
+
intervalSec,
|
|
115
|
+
maxDurationSec,
|
|
116
|
+
maxFrames,
|
|
117
|
+
includeAudio,
|
|
92
118
|
};
|
|
93
119
|
}
|
|
94
120
|
|
|
@@ -182,6 +208,39 @@ async function runFfmpegExtractFrame(params: {
|
|
|
182
208
|
});
|
|
183
209
|
}
|
|
184
210
|
|
|
211
|
+
async function runFfmpegExtractFrames(params: {
|
|
212
|
+
ffmpegPath: string;
|
|
213
|
+
videoPath: string;
|
|
214
|
+
outputPattern: string;
|
|
215
|
+
fps: number;
|
|
216
|
+
maxDurationSec: number;
|
|
217
|
+
}): Promise<void> {
|
|
218
|
+
await new Promise<void>((resolve, reject) => {
|
|
219
|
+
const proc = spawn(params.ffmpegPath, [
|
|
220
|
+
"-y",
|
|
221
|
+
"-i",
|
|
222
|
+
params.videoPath,
|
|
223
|
+
"-t",
|
|
224
|
+
String(params.maxDurationSec),
|
|
225
|
+
"-vf",
|
|
226
|
+
`fps=${params.fps}`,
|
|
227
|
+
"-q:v",
|
|
228
|
+
"2",
|
|
229
|
+
params.outputPattern,
|
|
230
|
+
]);
|
|
231
|
+
proc.on("error", reject);
|
|
232
|
+
proc.on("close", (code) => {
|
|
233
|
+
if (code === 0) resolve();
|
|
234
|
+
else reject(new Error(`ffmpeg exited with code ${code ?? "unknown"}`));
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function truncateText(text: string, maxChars: number): string {
|
|
240
|
+
if (text.length <= maxChars) return text;
|
|
241
|
+
return `${text.slice(0, maxChars)}…`;
|
|
242
|
+
}
|
|
243
|
+
|
|
185
244
|
export async function summarizeVideoWithVision(params: {
|
|
186
245
|
cfg: ResolvedAutoVideoConfig;
|
|
187
246
|
account: WecomAccountConfig;
|
|
@@ -195,19 +254,61 @@ export async function summarizeVideoWithVision(params: {
|
|
|
195
254
|
const tempDir = await mkdtemp(join(tmpdir(), "openclaw-wecom-frame-"));
|
|
196
255
|
const framePath = join(tempDir, `${basename(params.videoPath)}.jpg`);
|
|
197
256
|
try {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
257
|
+
const summaries: string[] = [];
|
|
258
|
+
if (params.cfg.mode === "light") {
|
|
259
|
+
const fps = Math.max(0.05, params.cfg.frames / Math.max(params.cfg.maxDurationSec, 1));
|
|
260
|
+
await runFfmpegExtractFrames({
|
|
261
|
+
ffmpegPath: params.cfg.ffmpegPath,
|
|
262
|
+
videoPath: params.videoPath,
|
|
263
|
+
outputPattern: join(tempDir, "frame-%03d.jpg"),
|
|
264
|
+
fps,
|
|
265
|
+
maxDurationSec: params.cfg.maxDurationSec,
|
|
266
|
+
});
|
|
267
|
+
const frames = (await readdir(tempDir))
|
|
268
|
+
.filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
|
|
269
|
+
.sort()
|
|
270
|
+
.slice(0, params.cfg.maxFrames);
|
|
271
|
+
for (const frame of frames) {
|
|
272
|
+
const buffer = await readFile(join(tempDir, frame));
|
|
273
|
+
if (!buffer.length) continue;
|
|
274
|
+
const summary = await describeImageWithVision({
|
|
275
|
+
config: visionConfig,
|
|
276
|
+
buffer,
|
|
277
|
+
mimeType: "image/jpeg",
|
|
278
|
+
});
|
|
279
|
+
if (summary) summaries.push(summary);
|
|
280
|
+
}
|
|
281
|
+
} else {
|
|
282
|
+
const fps = Math.max(0.1, 1 / Math.max(params.cfg.intervalSec, 1));
|
|
283
|
+
await runFfmpegExtractFrames({
|
|
284
|
+
ffmpegPath: params.cfg.ffmpegPath,
|
|
285
|
+
videoPath: params.videoPath,
|
|
286
|
+
outputPattern: join(tempDir, "frame-%03d.jpg"),
|
|
287
|
+
fps,
|
|
288
|
+
maxDurationSec: params.cfg.maxDurationSec,
|
|
289
|
+
});
|
|
290
|
+
const frames = (await readdir(tempDir))
|
|
291
|
+
.filter((name) => name.startsWith("frame-") && name.endsWith(".jpg"))
|
|
292
|
+
.sort()
|
|
293
|
+
.slice(0, params.cfg.maxFrames);
|
|
294
|
+
for (const frame of frames) {
|
|
295
|
+
const buffer = await readFile(join(tempDir, frame));
|
|
296
|
+
if (!buffer.length) continue;
|
|
297
|
+
const summary = await describeImageWithVision({
|
|
298
|
+
config: visionConfig,
|
|
299
|
+
buffer,
|
|
300
|
+
mimeType: "image/jpeg",
|
|
301
|
+
});
|
|
302
|
+
if (summary) summaries.push(summary);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if (summaries.length === 0) return null;
|
|
307
|
+
const unique = Array.from(new Set(summaries.map((s) => s.trim()).filter(Boolean)));
|
|
308
|
+
const maxChars = 1600;
|
|
309
|
+
const lines = unique.slice(0, params.cfg.maxFrames).map((s, idx) => `${idx + 1}. ${s}`);
|
|
310
|
+
const joined = truncateText(lines.join("\n"), maxChars);
|
|
311
|
+
return `关键帧概述(${unique.length}帧)\n${joined}`;
|
|
211
312
|
} catch {
|
|
212
313
|
return null;
|
|
213
314
|
} finally {
|
package/wecom/src/types.ts
CHANGED
|
@@ -79,6 +79,12 @@ export type WecomAccountConfig = {
|
|
|
79
79
|
enabled?: boolean;
|
|
80
80
|
ffmpegPath?: string;
|
|
81
81
|
maxBytes?: number;
|
|
82
|
+
mode?: "light" | "full";
|
|
83
|
+
frames?: number;
|
|
84
|
+
intervalSec?: number;
|
|
85
|
+
maxDurationSec?: number;
|
|
86
|
+
maxFrames?: number;
|
|
87
|
+
includeAudio?: boolean;
|
|
82
88
|
};
|
|
83
89
|
};
|
|
84
90
|
};
|