@marshulll/openclaw-wecom 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/wecom.config.full.example.json +11 -1
- package/package.json +1 -1
- package/wecom/src/config-schema.ts +20 -0
- package/wecom/src/media-vision.ts +98 -0
- package/wecom/src/types.ts +10 -0
- package/wecom/src/wecom-app.ts +204 -54
- package/wecom/src/wecom-bot.ts +124 -8
|
@@ -8,7 +8,17 @@
|
|
|
8
8
|
"tempDir": "/tmp/openclaw-wecom",
|
|
9
9
|
"retentionHours": 72,
|
|
10
10
|
"cleanupOnStart": true,
|
|
11
|
-
"maxBytes": 10485760
|
|
11
|
+
"maxBytes": 10485760,
|
|
12
|
+
"vision": {
|
|
13
|
+
"enabled": true,
|
|
14
|
+
"baseUrl": "https://newapi.looksunlight.com/v1",
|
|
15
|
+
"apiKey": "YOUR_API_KEY",
|
|
16
|
+
"model": "gpt-4o-mini",
|
|
17
|
+
"prompt": "请描述图片内容并尽量提取可见文字。",
|
|
18
|
+
"maxTokens": 400,
|
|
19
|
+
"timeoutMs": 15000,
|
|
20
|
+
"maxBytes": 5242880
|
|
21
|
+
}
|
|
12
22
|
},
|
|
13
23
|
"botMediaBridge": true,
|
|
14
24
|
|
package/package.json
CHANGED
|
@@ -47,6 +47,16 @@ const accountSchema = z.object({
|
|
|
47
47
|
retentionHours: z.number().optional(),
|
|
48
48
|
cleanupOnStart: z.boolean().optional(),
|
|
49
49
|
maxBytes: z.number().optional(),
|
|
50
|
+
vision: z.object({
|
|
51
|
+
enabled: z.boolean().optional(),
|
|
52
|
+
baseUrl: z.string().optional(),
|
|
53
|
+
apiKey: z.string().optional(),
|
|
54
|
+
model: z.string().optional(),
|
|
55
|
+
prompt: z.string().optional(),
|
|
56
|
+
maxTokens: z.number().optional(),
|
|
57
|
+
timeoutMs: z.number().optional(),
|
|
58
|
+
maxBytes: z.number().optional(),
|
|
59
|
+
}).optional(),
|
|
50
60
|
}).optional(),
|
|
51
61
|
|
|
52
62
|
network: z.object({
|
|
@@ -81,6 +91,16 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
|
|
|
81
91
|
retentionHours: z.number().optional(),
|
|
82
92
|
cleanupOnStart: z.boolean().optional(),
|
|
83
93
|
maxBytes: z.number().optional(),
|
|
94
|
+
vision: z.object({
|
|
95
|
+
enabled: z.boolean().optional(),
|
|
96
|
+
baseUrl: z.string().optional(),
|
|
97
|
+
apiKey: z.string().optional(),
|
|
98
|
+
model: z.string().optional(),
|
|
99
|
+
prompt: z.string().optional(),
|
|
100
|
+
maxTokens: z.number().optional(),
|
|
101
|
+
timeoutMs: z.number().optional(),
|
|
102
|
+
maxBytes: z.number().optional(),
|
|
103
|
+
}).optional(),
|
|
84
104
|
}).optional(),
|
|
85
105
|
|
|
86
106
|
network: z.object({
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import type { WecomAccountConfig } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export type VisionConfig = {
|
|
4
|
+
enabled?: boolean;
|
|
5
|
+
baseUrl?: string;
|
|
6
|
+
apiKey?: string;
|
|
7
|
+
model?: string;
|
|
8
|
+
prompt?: string;
|
|
9
|
+
maxTokens?: number;
|
|
10
|
+
timeoutMs?: number;
|
|
11
|
+
maxBytes?: number;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function resolveBaseUrl(raw?: string): string | null {
|
|
15
|
+
const value = raw?.trim();
|
|
16
|
+
if (!value) return null;
|
|
17
|
+
if (value.endsWith("/v1")) return value;
|
|
18
|
+
return `${value.replace(/\/+$/, "")}/v1`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function resolveVisionConfig(accountConfig: WecomAccountConfig): VisionConfig | null {
|
|
22
|
+
const vision = accountConfig.media?.vision;
|
|
23
|
+
if (!vision?.enabled) return null;
|
|
24
|
+
|
|
25
|
+
const baseUrl = resolveBaseUrl(
|
|
26
|
+
vision.baseUrl
|
|
27
|
+
|| process.env.OPENAI_BASE_URL
|
|
28
|
+
|| process.env.OPENAI_API_BASE
|
|
29
|
+
|| process.env.OPENAI_ENDPOINT,
|
|
30
|
+
);
|
|
31
|
+
const apiKey = vision.apiKey || process.env.OPENAI_API_KEY || process.env.OPENAI_KEY;
|
|
32
|
+
if (!baseUrl || !apiKey) return null;
|
|
33
|
+
|
|
34
|
+
return {
|
|
35
|
+
enabled: true,
|
|
36
|
+
baseUrl,
|
|
37
|
+
apiKey,
|
|
38
|
+
model: vision.model || process.env.OPENAI_MODEL || "gpt-4o-mini",
|
|
39
|
+
prompt: vision.prompt
|
|
40
|
+
|| "请描述图片内容并尽量提取可见文字。输出简洁中文要点。",
|
|
41
|
+
maxTokens: typeof vision.maxTokens === "number" ? vision.maxTokens : 400,
|
|
42
|
+
timeoutMs: typeof vision.timeoutMs === "number" ? vision.timeoutMs : 15000,
|
|
43
|
+
maxBytes: typeof vision.maxBytes === "number" ? vision.maxBytes : undefined,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function describeImageWithVision(params: {
|
|
48
|
+
config: VisionConfig;
|
|
49
|
+
buffer: Buffer;
|
|
50
|
+
mimeType: string;
|
|
51
|
+
}): Promise<string | null> {
|
|
52
|
+
const { config, buffer, mimeType } = params;
|
|
53
|
+
if (!config.enabled || !config.baseUrl || !config.apiKey) return null;
|
|
54
|
+
|
|
55
|
+
if (config.maxBytes && buffer.length > config.maxBytes) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const controller = new AbortController();
|
|
60
|
+
const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const imageBase64 = buffer.toString("base64");
|
|
64
|
+
const payload = {
|
|
65
|
+
model: config.model,
|
|
66
|
+
messages: [
|
|
67
|
+
{
|
|
68
|
+
role: "user",
|
|
69
|
+
content: [
|
|
70
|
+
{ type: "text", text: config.prompt },
|
|
71
|
+
{ type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
max_tokens: config.maxTokens ?? 400,
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const res = await fetch(`${config.baseUrl}/chat/completions`, {
|
|
79
|
+
method: "POST",
|
|
80
|
+
headers: {
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
83
|
+
},
|
|
84
|
+
body: JSON.stringify(payload),
|
|
85
|
+
signal: controller.signal,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
if (!res.ok) return null;
|
|
89
|
+
const data = await res.json() as any;
|
|
90
|
+
const content = data?.choices?.[0]?.message?.content;
|
|
91
|
+
if (typeof content !== "string") return null;
|
|
92
|
+
return content.trim() || null;
|
|
93
|
+
} catch {
|
|
94
|
+
return null;
|
|
95
|
+
} finally {
|
|
96
|
+
clearTimeout(timeout);
|
|
97
|
+
}
|
|
98
|
+
}
|
package/wecom/src/types.ts
CHANGED
|
@@ -47,6 +47,16 @@ export type WecomAccountConfig = {
|
|
|
47
47
|
retentionHours?: number;
|
|
48
48
|
cleanupOnStart?: boolean;
|
|
49
49
|
maxBytes?: number;
|
|
50
|
+
vision?: {
|
|
51
|
+
enabled?: boolean;
|
|
52
|
+
baseUrl?: string;
|
|
53
|
+
apiKey?: string;
|
|
54
|
+
model?: string;
|
|
55
|
+
prompt?: string;
|
|
56
|
+
maxTokens?: number;
|
|
57
|
+
timeoutMs?: number;
|
|
58
|
+
maxBytes?: number;
|
|
59
|
+
};
|
|
50
60
|
};
|
|
51
61
|
|
|
52
62
|
// Network behavior
|
package/wecom/src/wecom-app.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { IncomingMessage, ServerResponse } from "node:http";
|
|
2
|
+
import crypto from "node:crypto";
|
|
2
3
|
import { XMLParser } from "fast-xml-parser";
|
|
3
4
|
import { mkdir, readdir, rm, stat, writeFile } from "node:fs/promises";
|
|
4
5
|
import { tmpdir } from "node:os";
|
|
@@ -9,6 +10,7 @@ import { decryptWecomEncrypted, verifyWecomSignature } from "./crypto.js";
|
|
|
9
10
|
import { getWecomRuntime } from "./runtime.js";
|
|
10
11
|
import { handleCommand } from "./commands.js";
|
|
11
12
|
import { markdownToWecomText } from "./format.js";
|
|
13
|
+
import { describeImageWithVision, resolveVisionConfig } from "./media-vision.js";
|
|
12
14
|
import { downloadWecomMedia, fetchMediaFromUrl, sendWecomFile, sendWecomImage, sendWecomText, sendWecomVideo, sendWecomVoice, uploadWecomMedia } from "./wecom-api.js";
|
|
13
15
|
|
|
14
16
|
const xmlParser = new XMLParser({
|
|
@@ -18,6 +20,19 @@ const xmlParser = new XMLParser({
|
|
|
18
20
|
});
|
|
19
21
|
|
|
20
22
|
const MAX_REQUEST_BODY_SIZE = 1024 * 1024;
|
|
23
|
+
const MEDIA_CACHE_MAX_ENTRIES = 200;
|
|
24
|
+
|
|
25
|
+
type MediaCacheEntry = {
|
|
26
|
+
path: string;
|
|
27
|
+
type: "image" | "voice" | "video" | "file";
|
|
28
|
+
mimeType?: string;
|
|
29
|
+
url?: string;
|
|
30
|
+
summary?: string;
|
|
31
|
+
createdAt: number;
|
|
32
|
+
size: number;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const mediaCache = new Map<string, MediaCacheEntry>();
|
|
21
36
|
|
|
22
37
|
function parseIncomingXml(xml: string): Record<string, any> {
|
|
23
38
|
const obj = xmlParser.parse(xml);
|
|
@@ -146,6 +161,11 @@ function resolveMediaMaxBytes(target: WecomWebhookTarget): number | undefined {
|
|
|
146
161
|
return typeof maxBytes === "number" && maxBytes > 0 ? maxBytes : undefined;
|
|
147
162
|
}
|
|
148
163
|
|
|
164
|
+
function resolveMediaRetentionMs(target: WecomWebhookTarget): number | undefined {
|
|
165
|
+
const hours = target.account.config.media?.retentionHours;
|
|
166
|
+
return typeof hours === "number" && hours > 0 ? hours * 3600 * 1000 : undefined;
|
|
167
|
+
}
|
|
168
|
+
|
|
149
169
|
function normalizeMediaType(raw?: string): "image" | "voice" | "video" | "file" | null {
|
|
150
170
|
if (!raw) return null;
|
|
151
171
|
const value = raw.toLowerCase();
|
|
@@ -164,6 +184,51 @@ function sanitizeFilename(name: string, fallback: string): string {
|
|
|
164
184
|
return finalName || fallback;
|
|
165
185
|
}
|
|
166
186
|
|
|
187
|
+
function hashKey(input: string): string {
|
|
188
|
+
return crypto.createHash("sha1").update(input).digest("hex");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function buildMediaCacheKey(params: { mediaId?: string; url?: string }): string | null {
|
|
192
|
+
if (params.mediaId) return `media:${params.mediaId}`;
|
|
193
|
+
if (params.url) return `url:${hashKey(params.url)}`;
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function pruneMediaCache(): void {
|
|
198
|
+
if (mediaCache.size <= MEDIA_CACHE_MAX_ENTRIES) return;
|
|
199
|
+
const entries = Array.from(mediaCache.entries())
|
|
200
|
+
.sort((a, b) => a[1].createdAt - b[1].createdAt);
|
|
201
|
+
const excess = entries.length - MEDIA_CACHE_MAX_ENTRIES;
|
|
202
|
+
for (let i = 0; i < excess; i += 1) {
|
|
203
|
+
mediaCache.delete(entries[i]![0]);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async function getCachedMedia(
|
|
208
|
+
key: string | null,
|
|
209
|
+
retentionMs?: number,
|
|
210
|
+
): Promise<MediaCacheEntry | null> {
|
|
211
|
+
if (!key) return null;
|
|
212
|
+
const entry = mediaCache.get(key);
|
|
213
|
+
if (!entry) return null;
|
|
214
|
+
if (retentionMs && Date.now() - entry.createdAt > retentionMs) {
|
|
215
|
+
mediaCache.delete(key);
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
try {
|
|
219
|
+
await stat(entry.path);
|
|
220
|
+
} catch {
|
|
221
|
+
mediaCache.delete(key);
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
return entry;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function storeCachedMedia(key: string | null, entry: MediaCacheEntry): void {
|
|
228
|
+
if (!key) return;
|
|
229
|
+
mediaCache.set(key, entry);
|
|
230
|
+
pruneMediaCache();
|
|
231
|
+
}
|
|
167
232
|
|
|
168
233
|
async function startAgentForApp(params: {
|
|
169
234
|
target: WecomWebhookTarget;
|
|
@@ -174,6 +239,7 @@ async function startAgentForApp(params: {
|
|
|
174
239
|
media?: {
|
|
175
240
|
type: "image" | "voice" | "video" | "file";
|
|
176
241
|
path: string;
|
|
242
|
+
mimeType?: string;
|
|
177
243
|
url?: string;
|
|
178
244
|
} | null;
|
|
179
245
|
}): Promise<void> {
|
|
@@ -229,6 +295,9 @@ async function startAgentForApp(params: {
|
|
|
229
295
|
if (media?.path) {
|
|
230
296
|
ctxPayload.MediaPath = media.path;
|
|
231
297
|
ctxPayload.MediaType = media.type;
|
|
298
|
+
if (media.mimeType) {
|
|
299
|
+
(ctxPayload as any).MediaMimeType = media.mimeType;
|
|
300
|
+
}
|
|
232
301
|
if (media.url) {
|
|
233
302
|
ctxPayload.MediaUrl = media.url;
|
|
234
303
|
}
|
|
@@ -331,7 +400,8 @@ async function processAppMessage(params: {
|
|
|
331
400
|
if (!fromUser) return;
|
|
332
401
|
|
|
333
402
|
let messageText = "";
|
|
334
|
-
|
|
403
|
+
const retentionMs = resolveMediaRetentionMs(target);
|
|
404
|
+
let mediaContext: { type: "image" | "voice" | "video" | "file"; path: string; mimeType?: string; url?: string } | null = null;
|
|
335
405
|
|
|
336
406
|
if (msgType === "text") {
|
|
337
407
|
messageText = String(msgObj?.Content ?? "");
|
|
@@ -345,24 +415,40 @@ async function processAppMessage(params: {
|
|
|
345
415
|
const mediaId = String(msgObj?.MediaId ?? "");
|
|
346
416
|
if (mediaId) {
|
|
347
417
|
try {
|
|
348
|
-
const
|
|
349
|
-
const
|
|
350
|
-
if (
|
|
351
|
-
|
|
418
|
+
const cacheKey = buildMediaCacheKey({ mediaId });
|
|
419
|
+
const cached = await getCachedMedia(cacheKey, retentionMs);
|
|
420
|
+
if (cached) {
|
|
421
|
+
mediaContext = { type: cached.type, path: cached.path, mimeType: cached.mimeType, url: cached.url };
|
|
422
|
+
logVerbose(target, `app voice cache hit: ${cached.path}`);
|
|
423
|
+
messageText = "[用户发送了一条语音消息]\n\n请根据语音内容回复用户。";
|
|
352
424
|
} else {
|
|
353
|
-
const
|
|
354
|
-
const
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
target
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
425
|
+
const media = await downloadWecomMedia({ account: target.account, mediaId });
|
|
426
|
+
const maxBytes = resolveMediaMaxBytes(target);
|
|
427
|
+
if (maxBytes && media.buffer.length > maxBytes) {
|
|
428
|
+
messageText = "[语音消息过大,未处理]\n\n请发送更短的语音消息。";
|
|
429
|
+
} else {
|
|
430
|
+
const ext = resolveExtFromContentType(media.contentType, "amr");
|
|
431
|
+
const tempDir = resolveMediaTempDir(target);
|
|
432
|
+
await mkdir(tempDir, { recursive: true });
|
|
433
|
+
await cleanupMediaDir(
|
|
434
|
+
tempDir,
|
|
435
|
+
target.account.config.media?.retentionHours,
|
|
436
|
+
target.account.config.media?.cleanupOnStart,
|
|
437
|
+
);
|
|
438
|
+
const tempVoicePath = join(tempDir, `voice-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`);
|
|
439
|
+
await writeFile(tempVoicePath, media.buffer);
|
|
440
|
+
const mimeType = media.contentType || "audio/amr";
|
|
441
|
+
mediaContext = { type: "voice", path: tempVoicePath, mimeType };
|
|
442
|
+
storeCachedMedia(cacheKey, {
|
|
443
|
+
path: tempVoicePath,
|
|
444
|
+
type: "voice",
|
|
445
|
+
mimeType,
|
|
446
|
+
createdAt: Date.now(),
|
|
447
|
+
size: media.buffer.length,
|
|
448
|
+
});
|
|
449
|
+
logVerbose(target, `app voice saved (${media.buffer.length} bytes): ${tempVoicePath}`);
|
|
450
|
+
messageText = "[用户发送了一条语音消息]\n\n请根据语音内容回复用户。";
|
|
451
|
+
}
|
|
366
452
|
}
|
|
367
453
|
} catch (err) {
|
|
368
454
|
target.runtime.error?.(`wecom app voice download failed: ${String(err)}`);
|
|
@@ -378,39 +464,71 @@ async function processAppMessage(params: {
|
|
|
378
464
|
const mediaId = String(msgObj?.MediaId ?? "");
|
|
379
465
|
const picUrl = String(msgObj?.PicUrl ?? "");
|
|
380
466
|
try {
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if (
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
const media = await fetchMediaFromUrl(picUrl, target.account);
|
|
389
|
-
buffer = media.buffer;
|
|
390
|
-
contentType = media.contentType;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
if (buffer) {
|
|
394
|
-
const maxBytes = resolveMediaMaxBytes(target);
|
|
395
|
-
if (maxBytes && buffer.length > maxBytes) {
|
|
396
|
-
messageText = "[图片过大,未处理]\n\n请发送更小的图片。";
|
|
467
|
+
const cacheKey = buildMediaCacheKey({ mediaId, url: picUrl });
|
|
468
|
+
const cached = await getCachedMedia(cacheKey, retentionMs);
|
|
469
|
+
if (cached) {
|
|
470
|
+
mediaContext = { type: cached.type, path: cached.path, mimeType: cached.mimeType, url: cached.url };
|
|
471
|
+
logVerbose(target, `app image cache hit: ${cached.path}`);
|
|
472
|
+
if (cached.summary) {
|
|
473
|
+
messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${cached.summary}\n\n请根据识别结果回复用户。`;
|
|
397
474
|
} else {
|
|
398
|
-
|
|
399
|
-
const tempDir = resolveMediaTempDir(target);
|
|
400
|
-
await mkdir(tempDir, { recursive: true });
|
|
401
|
-
await cleanupMediaDir(
|
|
402
|
-
tempDir,
|
|
403
|
-
target.account.config.media?.retentionHours,
|
|
404
|
-
target.account.config.media?.cleanupOnStart,
|
|
405
|
-
);
|
|
406
|
-
const tempImagePath = join(tempDir, `image-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`);
|
|
407
|
-
await writeFile(tempImagePath, buffer);
|
|
408
|
-
mediaContext = { type: "image", path: tempImagePath, url: picUrl || undefined };
|
|
409
|
-
logVerbose(target, `app image saved (${buffer.length} bytes): ${tempImagePath}`);
|
|
410
|
-
messageText = `[用户发送了一张图片,已保存到: ${tempImagePath}]\n\n请根据图片内容回复用户。`;
|
|
475
|
+
messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
|
|
411
476
|
}
|
|
412
477
|
} else {
|
|
413
|
-
|
|
478
|
+
let buffer: Buffer | null = null;
|
|
479
|
+
let contentType = "";
|
|
480
|
+
if (mediaId) {
|
|
481
|
+
const media = await downloadWecomMedia({ account: target.account, mediaId });
|
|
482
|
+
buffer = media.buffer;
|
|
483
|
+
contentType = media.contentType;
|
|
484
|
+
} else if (picUrl) {
|
|
485
|
+
const media = await fetchMediaFromUrl(picUrl, target.account);
|
|
486
|
+
buffer = media.buffer;
|
|
487
|
+
contentType = media.contentType;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
if (buffer) {
|
|
491
|
+
const maxBytes = resolveMediaMaxBytes(target);
|
|
492
|
+
if (maxBytes && buffer.length > maxBytes) {
|
|
493
|
+
messageText = "[图片过大,未处理]\n\n请发送更小的图片。";
|
|
494
|
+
} else {
|
|
495
|
+
const ext = resolveExtFromContentType(contentType, "jpg");
|
|
496
|
+
const tempDir = resolveMediaTempDir(target);
|
|
497
|
+
await mkdir(tempDir, { recursive: true });
|
|
498
|
+
await cleanupMediaDir(
|
|
499
|
+
tempDir,
|
|
500
|
+
target.account.config.media?.retentionHours,
|
|
501
|
+
target.account.config.media?.cleanupOnStart,
|
|
502
|
+
);
|
|
503
|
+
const tempImagePath = join(tempDir, `image-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`);
|
|
504
|
+
await writeFile(tempImagePath, buffer);
|
|
505
|
+
const mimeType = contentType || "image/jpeg";
|
|
506
|
+
mediaContext = { type: "image", path: tempImagePath, mimeType, url: picUrl || undefined };
|
|
507
|
+
|
|
508
|
+
const visionConfig = resolveVisionConfig(target.account.config);
|
|
509
|
+
const summary = visionConfig
|
|
510
|
+
? await describeImageWithVision({ config: visionConfig, buffer, mimeType })
|
|
511
|
+
: null;
|
|
512
|
+
|
|
513
|
+
storeCachedMedia(cacheKey, {
|
|
514
|
+
path: tempImagePath,
|
|
515
|
+
type: "image",
|
|
516
|
+
mimeType,
|
|
517
|
+
url: picUrl || undefined,
|
|
518
|
+
summary: summary ?? undefined,
|
|
519
|
+
createdAt: Date.now(),
|
|
520
|
+
size: buffer.length,
|
|
521
|
+
});
|
|
522
|
+
logVerbose(target, `app image saved (${buffer.length} bytes): ${tempImagePath}`);
|
|
523
|
+
if (summary) {
|
|
524
|
+
messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`;
|
|
525
|
+
} else {
|
|
526
|
+
messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
} else {
|
|
530
|
+
messageText = "[用户发送了一张图片,但下载失败]\n\n请告诉用户图片处理暂时不可用。";
|
|
531
|
+
}
|
|
414
532
|
}
|
|
415
533
|
} catch (err) {
|
|
416
534
|
target.runtime.error?.(`wecom app image download failed: ${String(err)}`);
|
|
@@ -429,7 +547,14 @@ async function processAppMessage(params: {
|
|
|
429
547
|
const mediaId = String(msgObj?.MediaId ?? "");
|
|
430
548
|
if (mediaId) {
|
|
431
549
|
try {
|
|
432
|
-
const
|
|
550
|
+
const cacheKey = buildMediaCacheKey({ mediaId });
|
|
551
|
+
const cached = await getCachedMedia(cacheKey, retentionMs);
|
|
552
|
+
if (cached) {
|
|
553
|
+
mediaContext = { type: cached.type, path: cached.path, mimeType: cached.mimeType, url: cached.url };
|
|
554
|
+
logVerbose(target, `app video cache hit: ${cached.path}`);
|
|
555
|
+
messageText = "[用户发送了一个视频文件]\n\n请根据视频内容回复用户。";
|
|
556
|
+
} else {
|
|
557
|
+
const media = await downloadWecomMedia({ account: target.account, mediaId });
|
|
433
558
|
const maxBytes = resolveMediaMaxBytes(target);
|
|
434
559
|
if (maxBytes && media.buffer.length > maxBytes) {
|
|
435
560
|
messageText = "[视频过大,未处理]\n\n请发送更小的视频。";
|
|
@@ -444,9 +569,18 @@ async function processAppMessage(params: {
|
|
|
444
569
|
);
|
|
445
570
|
const tempVideoPath = join(tempDir, `video-${Date.now()}-${Math.random().toString(36).slice(2)}.${ext}`);
|
|
446
571
|
await writeFile(tempVideoPath, media.buffer);
|
|
447
|
-
|
|
572
|
+
const mimeType = media.contentType || "video/mp4";
|
|
573
|
+
mediaContext = { type: "video", path: tempVideoPath, mimeType };
|
|
574
|
+
storeCachedMedia(cacheKey, {
|
|
575
|
+
path: tempVideoPath,
|
|
576
|
+
type: "video",
|
|
577
|
+
mimeType,
|
|
578
|
+
createdAt: Date.now(),
|
|
579
|
+
size: media.buffer.length,
|
|
580
|
+
});
|
|
448
581
|
logVerbose(target, `app video saved (${media.buffer.length} bytes): ${tempVideoPath}`);
|
|
449
|
-
messageText =
|
|
582
|
+
messageText = "[用户发送了一个视频文件]\n\n请根据视频内容回复用户。";
|
|
583
|
+
}
|
|
450
584
|
}
|
|
451
585
|
} catch (err) {
|
|
452
586
|
target.runtime.error?.(`wecom app video download failed: ${String(err)}`);
|
|
@@ -460,7 +594,14 @@ async function processAppMessage(params: {
|
|
|
460
594
|
const fileName = String(msgObj?.FileName ?? "");
|
|
461
595
|
if (mediaId) {
|
|
462
596
|
try {
|
|
463
|
-
const
|
|
597
|
+
const cacheKey = buildMediaCacheKey({ mediaId });
|
|
598
|
+
const cached = await getCachedMedia(cacheKey, retentionMs);
|
|
599
|
+
if (cached) {
|
|
600
|
+
mediaContext = { type: cached.type, path: cached.path, mimeType: cached.mimeType, url: cached.url };
|
|
601
|
+
logVerbose(target, `app file cache hit: ${cached.path}`);
|
|
602
|
+
messageText = `[用户发送了一个文件: ${fileName || "未知文件"}]\n\n请根据文件内容回复用户。`;
|
|
603
|
+
} else {
|
|
604
|
+
const media = await downloadWecomMedia({ account: target.account, mediaId });
|
|
464
605
|
const maxBytes = resolveMediaMaxBytes(target);
|
|
465
606
|
if (maxBytes && media.buffer.length > maxBytes) {
|
|
466
607
|
messageText = "[文件过大,未处理]\n\n请发送更小的文件。";
|
|
@@ -476,9 +617,18 @@ async function processAppMessage(params: {
|
|
|
476
617
|
const safeName = sanitizeFilename(fileName, `file-${Date.now()}.${ext}`);
|
|
477
618
|
const tempFilePath = join(tempDir, safeName);
|
|
478
619
|
await writeFile(tempFilePath, media.buffer);
|
|
479
|
-
|
|
620
|
+
const mimeType = media.contentType || "application/octet-stream";
|
|
621
|
+
mediaContext = { type: "file", path: tempFilePath, mimeType };
|
|
622
|
+
storeCachedMedia(cacheKey, {
|
|
623
|
+
path: tempFilePath,
|
|
624
|
+
type: "file",
|
|
625
|
+
mimeType,
|
|
626
|
+
createdAt: Date.now(),
|
|
627
|
+
size: media.buffer.length,
|
|
628
|
+
});
|
|
480
629
|
logVerbose(target, `app file saved (${media.buffer.length} bytes): ${tempFilePath}`);
|
|
481
|
-
messageText = `[用户发送了一个文件: ${safeName}
|
|
630
|
+
messageText = `[用户发送了一个文件: ${safeName}]\n\n请根据文件内容回复用户。`;
|
|
631
|
+
}
|
|
482
632
|
}
|
|
483
633
|
} catch (err) {
|
|
484
634
|
target.runtime.error?.(`wecom app file download failed: ${String(err)}`);
|
package/wecom/src/wecom-bot.ts
CHANGED
|
@@ -11,14 +11,17 @@ import type { ResolvedWecomAccount, WecomInboundMessage } from "./types.js";
|
|
|
11
11
|
import { computeWecomMsgSignature, decryptWecomEncrypted, encryptWecomPlaintext, verifyWecomSignature } from "./crypto.js";
|
|
12
12
|
import { fetchMediaFromUrl, sendWecomFile, sendWecomImage, sendWecomVideo, sendWecomVoice, uploadWecomMedia } from "./wecom-api.js";
|
|
13
13
|
import { getWecomRuntime } from "./runtime.js";
|
|
14
|
+
import { describeImageWithVision, resolveVisionConfig } from "./media-vision.js";
|
|
14
15
|
|
|
15
16
|
const STREAM_TTL_MS = 10 * 60 * 1000;
|
|
16
17
|
const STREAM_MAX_BYTES = 20_480;
|
|
17
18
|
const STREAM_MAX_ENTRIES = 500;
|
|
18
19
|
const DEDUPE_TTL_MS = 2 * 60 * 1000;
|
|
19
20
|
const DEDUPE_MAX_ENTRIES = 2_000;
|
|
21
|
+
const MEDIA_CACHE_MAX_ENTRIES = 200;
|
|
20
22
|
|
|
21
23
|
const cleanupExecuted = new Set<string>();
|
|
24
|
+
const mediaCache = new Map<string, { entry: InboundMedia; createdAt: number; size: number; summary?: string }>();
|
|
22
25
|
|
|
23
26
|
type StreamState = {
|
|
24
27
|
streamId: string;
|
|
@@ -34,6 +37,7 @@ type StreamState = {
|
|
|
34
37
|
type InboundMedia = {
|
|
35
38
|
path: string;
|
|
36
39
|
type: string;
|
|
40
|
+
mimeType?: string;
|
|
37
41
|
url?: string;
|
|
38
42
|
};
|
|
39
43
|
|
|
@@ -382,6 +386,9 @@ async function startAgentForStream(params: {
|
|
|
382
386
|
if (inbound.media) {
|
|
383
387
|
ctxPayload.MediaPath = inbound.media.path;
|
|
384
388
|
ctxPayload.MediaType = inbound.media.type;
|
|
389
|
+
if (inbound.media.mimeType) {
|
|
390
|
+
(ctxPayload as any).MediaMimeType = inbound.media.mimeType;
|
|
391
|
+
}
|
|
385
392
|
if (inbound.media.url) {
|
|
386
393
|
ctxPayload.MediaUrl = inbound.media.url;
|
|
387
394
|
}
|
|
@@ -584,6 +591,18 @@ async function buildBotMediaMessage(params: {
|
|
|
584
591
|
if (!url && !base64) return { text: fallbackLabel };
|
|
585
592
|
|
|
586
593
|
try {
|
|
594
|
+
const cacheKey = buildMediaCacheKey({ url, base64 });
|
|
595
|
+
const cached = await getCachedMedia(cacheKey, resolveMediaRetentionMs(target));
|
|
596
|
+
if (cached) {
|
|
597
|
+
const text = msgtype === "image" && cached.summary
|
|
598
|
+
? `[用户发送了一张图片]\n\n[图片识别结果]\n${cached.summary}\n\n请根据识别结果回复用户。`
|
|
599
|
+
: buildInboundMediaPrompt(msgtype, filename);
|
|
600
|
+
return {
|
|
601
|
+
text,
|
|
602
|
+
media: cached.media,
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
|
|
587
606
|
let buffer: Buffer | null = null;
|
|
588
607
|
let contentType = "";
|
|
589
608
|
if (base64) {
|
|
@@ -629,9 +648,16 @@ async function buildBotMediaMessage(params: {
|
|
|
629
648
|
const safeName = sanitizeFilename(filename || "", `file-${Date.now()}.${ext}`);
|
|
630
649
|
const tempFilePath = join(tempDir, safeName);
|
|
631
650
|
await writeFile(tempFilePath, buffer);
|
|
651
|
+
const media: InboundMedia = {
|
|
652
|
+
path: tempFilePath,
|
|
653
|
+
type: "file",
|
|
654
|
+
mimeType: contentType || "application/octet-stream",
|
|
655
|
+
url,
|
|
656
|
+
};
|
|
657
|
+
storeCachedMedia(cacheKey, media, buffer.length);
|
|
632
658
|
return {
|
|
633
|
-
text:
|
|
634
|
-
media
|
|
659
|
+
text: buildInboundMediaPrompt("file", safeName),
|
|
660
|
+
media,
|
|
635
661
|
};
|
|
636
662
|
}
|
|
637
663
|
|
|
@@ -642,21 +668,52 @@ async function buildBotMediaMessage(params: {
|
|
|
642
668
|
await writeFile(tempPath, buffer);
|
|
643
669
|
|
|
644
670
|
if (msgtype === "image") {
|
|
671
|
+
const media: InboundMedia = {
|
|
672
|
+
path: tempPath,
|
|
673
|
+
type: "image",
|
|
674
|
+
mimeType: contentType || "image/jpeg",
|
|
675
|
+
url,
|
|
676
|
+
};
|
|
677
|
+
const visionConfig = resolveVisionConfig(target.account.config);
|
|
678
|
+
const summary = visionConfig
|
|
679
|
+
? await describeImageWithVision({
|
|
680
|
+
config: visionConfig,
|
|
681
|
+
buffer,
|
|
682
|
+
mimeType: media.mimeType || "image/jpeg",
|
|
683
|
+
})
|
|
684
|
+
: null;
|
|
685
|
+
storeCachedMedia(cacheKey, media, buffer.length, summary ?? undefined);
|
|
645
686
|
return {
|
|
646
|
-
text:
|
|
647
|
-
|
|
687
|
+
text: summary
|
|
688
|
+
? `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`
|
|
689
|
+
: buildInboundMediaPrompt("image"),
|
|
690
|
+
media,
|
|
648
691
|
};
|
|
649
692
|
}
|
|
650
693
|
if (msgtype === "voice") {
|
|
694
|
+
const media: InboundMedia = {
|
|
695
|
+
path: tempPath,
|
|
696
|
+
type: "voice",
|
|
697
|
+
mimeType: contentType || "audio/amr",
|
|
698
|
+
url,
|
|
699
|
+
};
|
|
700
|
+
storeCachedMedia(cacheKey, media, buffer.length);
|
|
651
701
|
return {
|
|
652
|
-
text:
|
|
653
|
-
media
|
|
702
|
+
text: buildInboundMediaPrompt("voice"),
|
|
703
|
+
media,
|
|
654
704
|
};
|
|
655
705
|
}
|
|
656
706
|
if (msgtype === "video") {
|
|
707
|
+
const media: InboundMedia = {
|
|
708
|
+
path: tempPath,
|
|
709
|
+
type: "video",
|
|
710
|
+
mimeType: contentType || "video/mp4",
|
|
711
|
+
url,
|
|
712
|
+
};
|
|
713
|
+
storeCachedMedia(cacheKey, media, buffer.length);
|
|
657
714
|
return {
|
|
658
|
-
text:
|
|
659
|
-
media
|
|
715
|
+
text: buildInboundMediaPrompt("video"),
|
|
716
|
+
media,
|
|
660
717
|
};
|
|
661
718
|
}
|
|
662
719
|
return { text: fallbackLabel };
|
|
@@ -741,6 +798,65 @@ function mediaSentLabel(type: string): string {
|
|
|
741
798
|
return "[已发送媒体]";
|
|
742
799
|
}
|
|
743
800
|
|
|
801
|
+
function resolveMediaRetentionMs(target: WecomWebhookTarget): number | undefined {
|
|
802
|
+
const hours = target.account.config.media?.retentionHours;
|
|
803
|
+
return typeof hours === "number" && hours > 0 ? hours * 3600 * 1000 : undefined;
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
function hashCacheKey(input: string): string {
|
|
807
|
+
return crypto.createHash("sha1").update(input).digest("hex");
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
function buildMediaCacheKey(params: { url?: string; base64?: string }): string | null {
|
|
811
|
+
if (params.url) return `url:${hashCacheKey(params.url)}`;
|
|
812
|
+
if (params.base64) return `b64:${hashCacheKey(params.base64)}`;
|
|
813
|
+
return null;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
function pruneMediaCache(): void {
|
|
817
|
+
if (mediaCache.size <= MEDIA_CACHE_MAX_ENTRIES) return;
|
|
818
|
+
const entries = Array.from(mediaCache.entries())
|
|
819
|
+
.sort((a, b) => a[1].createdAt - b[1].createdAt);
|
|
820
|
+
const excess = entries.length - MEDIA_CACHE_MAX_ENTRIES;
|
|
821
|
+
for (let i = 0; i < excess; i += 1) {
|
|
822
|
+
mediaCache.delete(entries[i]![0]);
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
async function getCachedMedia(
|
|
827
|
+
key: string | null,
|
|
828
|
+
retentionMs?: number,
|
|
829
|
+
): Promise<{ media: InboundMedia; summary?: string } | null> {
|
|
830
|
+
if (!key) return null;
|
|
831
|
+
const cached = mediaCache.get(key);
|
|
832
|
+
if (!cached) return null;
|
|
833
|
+
if (retentionMs && Date.now() - cached.createdAt > retentionMs) {
|
|
834
|
+
mediaCache.delete(key);
|
|
835
|
+
return null;
|
|
836
|
+
}
|
|
837
|
+
try {
|
|
838
|
+
await stat(cached.entry.path);
|
|
839
|
+
} catch {
|
|
840
|
+
mediaCache.delete(key);
|
|
841
|
+
return null;
|
|
842
|
+
}
|
|
843
|
+
return { media: cached.entry, summary: cached.summary };
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
function storeCachedMedia(key: string | null, entry: InboundMedia, size: number, summary?: string): void {
|
|
847
|
+
if (!key) return;
|
|
848
|
+
mediaCache.set(key, { entry, createdAt: Date.now(), size, summary });
|
|
849
|
+
pruneMediaCache();
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
function buildInboundMediaPrompt(msgtype: "image" | "voice" | "video" | "file", filename?: string): string {
|
|
853
|
+
if (msgtype === "image") return "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
|
|
854
|
+
if (msgtype === "voice") return "[用户发送了一条语音消息]\n\n请根据语音内容回复用户。";
|
|
855
|
+
if (msgtype === "video") return "[用户发送了一个视频文件]\n\n请根据视频内容回复用户。";
|
|
856
|
+
const label = filename ? `用户发送了一个文件: ${filename}` : "用户发送了一个文件";
|
|
857
|
+
return `[${label}]\n\n请根据文件内容回复用户。`;
|
|
858
|
+
}
|
|
859
|
+
|
|
744
860
|
function shouldHandleBot(account: ResolvedWecomAccount): boolean {
|
|
745
861
|
return account.mode === "bot" || account.mode === "both";
|
|
746
862
|
}
|