@marshulll/openclaw-wecom 0.1.15 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/wecom.config.full.example.json +11 -1
- package/package.json +1 -1
- package/wecom/src/config-schema.ts +20 -0
- package/wecom/src/media-vision.ts +98 -0
- package/wecom/src/types.ts +10 -0
- package/wecom/src/wecom-app.ts +19 -2
- package/wecom/src/wecom-bot.ts +23 -9
|
@@ -8,7 +8,17 @@
|
|
|
8
8
|
"tempDir": "/tmp/openclaw-wecom",
|
|
9
9
|
"retentionHours": 72,
|
|
10
10
|
"cleanupOnStart": true,
|
|
11
|
-
"maxBytes": 10485760
|
|
11
|
+
"maxBytes": 10485760,
|
|
12
|
+
"vision": {
|
|
13
|
+
"enabled": true,
|
|
14
|
+
"baseUrl": "https://newapi.looksunlight.com/v1",
|
|
15
|
+
"apiKey": "YOUR_API_KEY",
|
|
16
|
+
"model": "gpt-4o-mini",
|
|
17
|
+
"prompt": "请描述图片内容并尽量提取可见文字。",
|
|
18
|
+
"maxTokens": 400,
|
|
19
|
+
"timeoutMs": 15000,
|
|
20
|
+
"maxBytes": 5242880
|
|
21
|
+
}
|
|
12
22
|
},
|
|
13
23
|
"botMediaBridge": true,
|
|
14
24
|
|
package/package.json
CHANGED
|
@@ -47,6 +47,16 @@ const accountSchema = z.object({
|
|
|
47
47
|
retentionHours: z.number().optional(),
|
|
48
48
|
cleanupOnStart: z.boolean().optional(),
|
|
49
49
|
maxBytes: z.number().optional(),
|
|
50
|
+
vision: z.object({
|
|
51
|
+
enabled: z.boolean().optional(),
|
|
52
|
+
baseUrl: z.string().optional(),
|
|
53
|
+
apiKey: z.string().optional(),
|
|
54
|
+
model: z.string().optional(),
|
|
55
|
+
prompt: z.string().optional(),
|
|
56
|
+
maxTokens: z.number().optional(),
|
|
57
|
+
timeoutMs: z.number().optional(),
|
|
58
|
+
maxBytes: z.number().optional(),
|
|
59
|
+
}).optional(),
|
|
50
60
|
}).optional(),
|
|
51
61
|
|
|
52
62
|
network: z.object({
|
|
@@ -81,6 +91,16 @@ export const WecomConfigSchema = ensureJsonSchema(z.object({
|
|
|
81
91
|
retentionHours: z.number().optional(),
|
|
82
92
|
cleanupOnStart: z.boolean().optional(),
|
|
83
93
|
maxBytes: z.number().optional(),
|
|
94
|
+
vision: z.object({
|
|
95
|
+
enabled: z.boolean().optional(),
|
|
96
|
+
baseUrl: z.string().optional(),
|
|
97
|
+
apiKey: z.string().optional(),
|
|
98
|
+
model: z.string().optional(),
|
|
99
|
+
prompt: z.string().optional(),
|
|
100
|
+
maxTokens: z.number().optional(),
|
|
101
|
+
timeoutMs: z.number().optional(),
|
|
102
|
+
maxBytes: z.number().optional(),
|
|
103
|
+
}).optional(),
|
|
84
104
|
}).optional(),
|
|
85
105
|
|
|
86
106
|
network: z.object({
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import type { WecomAccountConfig } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export type VisionConfig = {
|
|
4
|
+
enabled?: boolean;
|
|
5
|
+
baseUrl?: string;
|
|
6
|
+
apiKey?: string;
|
|
7
|
+
model?: string;
|
|
8
|
+
prompt?: string;
|
|
9
|
+
maxTokens?: number;
|
|
10
|
+
timeoutMs?: number;
|
|
11
|
+
maxBytes?: number;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function resolveBaseUrl(raw?: string): string | null {
|
|
15
|
+
const value = raw?.trim();
|
|
16
|
+
if (!value) return null;
|
|
17
|
+
if (value.endsWith("/v1")) return value;
|
|
18
|
+
return `${value.replace(/\/+$/, "")}/v1`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function resolveVisionConfig(accountConfig: WecomAccountConfig): VisionConfig | null {
|
|
22
|
+
const vision = accountConfig.media?.vision;
|
|
23
|
+
if (!vision?.enabled) return null;
|
|
24
|
+
|
|
25
|
+
const baseUrl = resolveBaseUrl(
|
|
26
|
+
vision.baseUrl
|
|
27
|
+
|| process.env.OPENAI_BASE_URL
|
|
28
|
+
|| process.env.OPENAI_API_BASE
|
|
29
|
+
|| process.env.OPENAI_ENDPOINT,
|
|
30
|
+
);
|
|
31
|
+
const apiKey = vision.apiKey || process.env.OPENAI_API_KEY || process.env.OPENAI_KEY;
|
|
32
|
+
if (!baseUrl || !apiKey) return null;
|
|
33
|
+
|
|
34
|
+
return {
|
|
35
|
+
enabled: true,
|
|
36
|
+
baseUrl,
|
|
37
|
+
apiKey,
|
|
38
|
+
model: vision.model || process.env.OPENAI_MODEL || "gpt-4o-mini",
|
|
39
|
+
prompt: vision.prompt
|
|
40
|
+
|| "请描述图片内容并尽量提取可见文字。输出简洁中文要点。",
|
|
41
|
+
maxTokens: typeof vision.maxTokens === "number" ? vision.maxTokens : 400,
|
|
42
|
+
timeoutMs: typeof vision.timeoutMs === "number" ? vision.timeoutMs : 15000,
|
|
43
|
+
maxBytes: typeof vision.maxBytes === "number" ? vision.maxBytes : undefined,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function describeImageWithVision(params: {
|
|
48
|
+
config: VisionConfig;
|
|
49
|
+
buffer: Buffer;
|
|
50
|
+
mimeType: string;
|
|
51
|
+
}): Promise<string | null> {
|
|
52
|
+
const { config, buffer, mimeType } = params;
|
|
53
|
+
if (!config.enabled || !config.baseUrl || !config.apiKey) return null;
|
|
54
|
+
|
|
55
|
+
if (config.maxBytes && buffer.length > config.maxBytes) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const controller = new AbortController();
|
|
60
|
+
const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 15000);
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const imageBase64 = buffer.toString("base64");
|
|
64
|
+
const payload = {
|
|
65
|
+
model: config.model,
|
|
66
|
+
messages: [
|
|
67
|
+
{
|
|
68
|
+
role: "user",
|
|
69
|
+
content: [
|
|
70
|
+
{ type: "text", text: config.prompt },
|
|
71
|
+
{ type: "image_url", image_url: { url: `data:${mimeType};base64,${imageBase64}` } },
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
max_tokens: config.maxTokens ?? 400,
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const res = await fetch(`${config.baseUrl}/chat/completions`, {
|
|
79
|
+
method: "POST",
|
|
80
|
+
headers: {
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
83
|
+
},
|
|
84
|
+
body: JSON.stringify(payload),
|
|
85
|
+
signal: controller.signal,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
if (!res.ok) return null;
|
|
89
|
+
const data = await res.json() as any;
|
|
90
|
+
const content = data?.choices?.[0]?.message?.content;
|
|
91
|
+
if (typeof content !== "string") return null;
|
|
92
|
+
return content.trim() || null;
|
|
93
|
+
} catch {
|
|
94
|
+
return null;
|
|
95
|
+
} finally {
|
|
96
|
+
clearTimeout(timeout);
|
|
97
|
+
}
|
|
98
|
+
}
|
package/wecom/src/types.ts
CHANGED
|
@@ -47,6 +47,16 @@ export type WecomAccountConfig = {
|
|
|
47
47
|
retentionHours?: number;
|
|
48
48
|
cleanupOnStart?: boolean;
|
|
49
49
|
maxBytes?: number;
|
|
50
|
+
vision?: {
|
|
51
|
+
enabled?: boolean;
|
|
52
|
+
baseUrl?: string;
|
|
53
|
+
apiKey?: string;
|
|
54
|
+
model?: string;
|
|
55
|
+
prompt?: string;
|
|
56
|
+
maxTokens?: number;
|
|
57
|
+
timeoutMs?: number;
|
|
58
|
+
maxBytes?: number;
|
|
59
|
+
};
|
|
50
60
|
};
|
|
51
61
|
|
|
52
62
|
// Network behavior
|
package/wecom/src/wecom-app.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { decryptWecomEncrypted, verifyWecomSignature } from "./crypto.js";
|
|
|
10
10
|
import { getWecomRuntime } from "./runtime.js";
|
|
11
11
|
import { handleCommand } from "./commands.js";
|
|
12
12
|
import { markdownToWecomText } from "./format.js";
|
|
13
|
+
import { describeImageWithVision, resolveVisionConfig } from "./media-vision.js";
|
|
13
14
|
import { downloadWecomMedia, fetchMediaFromUrl, sendWecomFile, sendWecomImage, sendWecomText, sendWecomVideo, sendWecomVoice, uploadWecomMedia } from "./wecom-api.js";
|
|
14
15
|
|
|
15
16
|
const xmlParser = new XMLParser({
|
|
@@ -26,6 +27,7 @@ type MediaCacheEntry = {
|
|
|
26
27
|
type: "image" | "voice" | "video" | "file";
|
|
27
28
|
mimeType?: string;
|
|
28
29
|
url?: string;
|
|
30
|
+
summary?: string;
|
|
29
31
|
createdAt: number;
|
|
30
32
|
size: number;
|
|
31
33
|
};
|
|
@@ -467,7 +469,11 @@ async function processAppMessage(params: {
|
|
|
467
469
|
if (cached) {
|
|
468
470
|
mediaContext = { type: cached.type, path: cached.path, mimeType: cached.mimeType, url: cached.url };
|
|
469
471
|
logVerbose(target, `app image cache hit: ${cached.path}`);
|
|
470
|
-
|
|
472
|
+
if (cached.summary) {
|
|
473
|
+
messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${cached.summary}\n\n请根据识别结果回复用户。`;
|
|
474
|
+
} else {
|
|
475
|
+
messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
|
|
476
|
+
}
|
|
471
477
|
} else {
|
|
472
478
|
let buffer: Buffer | null = null;
|
|
473
479
|
let contentType = "";
|
|
@@ -498,16 +504,27 @@ async function processAppMessage(params: {
|
|
|
498
504
|
await writeFile(tempImagePath, buffer);
|
|
499
505
|
const mimeType = contentType || "image/jpeg";
|
|
500
506
|
mediaContext = { type: "image", path: tempImagePath, mimeType, url: picUrl || undefined };
|
|
507
|
+
|
|
508
|
+
const visionConfig = resolveVisionConfig(target.account.config);
|
|
509
|
+
const summary = visionConfig
|
|
510
|
+
? await describeImageWithVision({ config: visionConfig, buffer, mimeType })
|
|
511
|
+
: null;
|
|
512
|
+
|
|
501
513
|
storeCachedMedia(cacheKey, {
|
|
502
514
|
path: tempImagePath,
|
|
503
515
|
type: "image",
|
|
504
516
|
mimeType,
|
|
505
517
|
url: picUrl || undefined,
|
|
518
|
+
summary: summary ?? undefined,
|
|
506
519
|
createdAt: Date.now(),
|
|
507
520
|
size: buffer.length,
|
|
508
521
|
});
|
|
509
522
|
logVerbose(target, `app image saved (${buffer.length} bytes): ${tempImagePath}`);
|
|
510
|
-
|
|
523
|
+
if (summary) {
|
|
524
|
+
messageText = `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`;
|
|
525
|
+
} else {
|
|
526
|
+
messageText = "[用户发送了一张图片]\n\n请根据图片内容回复用户。";
|
|
527
|
+
}
|
|
511
528
|
}
|
|
512
529
|
} else {
|
|
513
530
|
messageText = "[用户发送了一张图片,但下载失败]\n\n请告诉用户图片处理暂时不可用。";
|
package/wecom/src/wecom-bot.ts
CHANGED
|
@@ -11,6 +11,7 @@ import type { ResolvedWecomAccount, WecomInboundMessage } from "./types.js";
|
|
|
11
11
|
import { computeWecomMsgSignature, decryptWecomEncrypted, encryptWecomPlaintext, verifyWecomSignature } from "./crypto.js";
|
|
12
12
|
import { fetchMediaFromUrl, sendWecomFile, sendWecomImage, sendWecomVideo, sendWecomVoice, uploadWecomMedia } from "./wecom-api.js";
|
|
13
13
|
import { getWecomRuntime } from "./runtime.js";
|
|
14
|
+
import { describeImageWithVision, resolveVisionConfig } from "./media-vision.js";
|
|
14
15
|
|
|
15
16
|
const STREAM_TTL_MS = 10 * 60 * 1000;
|
|
16
17
|
const STREAM_MAX_BYTES = 20_480;
|
|
@@ -20,7 +21,7 @@ const DEDUPE_MAX_ENTRIES = 2_000;
|
|
|
20
21
|
const MEDIA_CACHE_MAX_ENTRIES = 200;
|
|
21
22
|
|
|
22
23
|
const cleanupExecuted = new Set<string>();
|
|
23
|
-
const mediaCache = new Map<string, { entry: InboundMedia; createdAt: number; size: number }>();
|
|
24
|
+
const mediaCache = new Map<string, { entry: InboundMedia; createdAt: number; size: number; summary?: string }>();
|
|
24
25
|
|
|
25
26
|
type StreamState = {
|
|
26
27
|
streamId: string;
|
|
@@ -593,9 +594,12 @@ async function buildBotMediaMessage(params: {
|
|
|
593
594
|
const cacheKey = buildMediaCacheKey({ url, base64 });
|
|
594
595
|
const cached = await getCachedMedia(cacheKey, resolveMediaRetentionMs(target));
|
|
595
596
|
if (cached) {
|
|
597
|
+
const text = msgtype === "image" && cached.summary
|
|
598
|
+
? `[用户发送了一张图片]\n\n[图片识别结果]\n${cached.summary}\n\n请根据识别结果回复用户。`
|
|
599
|
+
: buildInboundMediaPrompt(msgtype, filename);
|
|
596
600
|
return {
|
|
597
|
-
text
|
|
598
|
-
media: cached,
|
|
601
|
+
text,
|
|
602
|
+
media: cached.media,
|
|
599
603
|
};
|
|
600
604
|
}
|
|
601
605
|
|
|
@@ -670,9 +674,19 @@ async function buildBotMediaMessage(params: {
|
|
|
670
674
|
mimeType: contentType || "image/jpeg",
|
|
671
675
|
url,
|
|
672
676
|
};
|
|
673
|
-
|
|
677
|
+
const visionConfig = resolveVisionConfig(target.account.config);
|
|
678
|
+
const summary = visionConfig
|
|
679
|
+
? await describeImageWithVision({
|
|
680
|
+
config: visionConfig,
|
|
681
|
+
buffer,
|
|
682
|
+
mimeType: media.mimeType || "image/jpeg",
|
|
683
|
+
})
|
|
684
|
+
: null;
|
|
685
|
+
storeCachedMedia(cacheKey, media, buffer.length, summary ?? undefined);
|
|
674
686
|
return {
|
|
675
|
-
text:
|
|
687
|
+
text: summary
|
|
688
|
+
? `[用户发送了一张图片]\n\n[图片识别结果]\n${summary}\n\n请根据识别结果回复用户。`
|
|
689
|
+
: buildInboundMediaPrompt("image"),
|
|
676
690
|
media,
|
|
677
691
|
};
|
|
678
692
|
}
|
|
@@ -812,7 +826,7 @@ function pruneMediaCache(): void {
|
|
|
812
826
|
async function getCachedMedia(
|
|
813
827
|
key: string | null,
|
|
814
828
|
retentionMs?: number,
|
|
815
|
-
): Promise<InboundMedia | null> {
|
|
829
|
+
): Promise<{ media: InboundMedia; summary?: string } | null> {
|
|
816
830
|
if (!key) return null;
|
|
817
831
|
const cached = mediaCache.get(key);
|
|
818
832
|
if (!cached) return null;
|
|
@@ -826,12 +840,12 @@ async function getCachedMedia(
|
|
|
826
840
|
mediaCache.delete(key);
|
|
827
841
|
return null;
|
|
828
842
|
}
|
|
829
|
-
return cached.entry;
|
|
843
|
+
return { media: cached.entry, summary: cached.summary };
|
|
830
844
|
}
|
|
831
845
|
|
|
832
|
-
function storeCachedMedia(key: string | null, entry: InboundMedia, size: number): void {
|
|
846
|
+
function storeCachedMedia(key: string | null, entry: InboundMedia, size: number, summary?: string): void {
|
|
833
847
|
if (!key) return;
|
|
834
|
-
mediaCache.set(key, { entry, createdAt: Date.now(), size });
|
|
848
|
+
mediaCache.set(key, { entry, createdAt: Date.now(), size, summary });
|
|
835
849
|
pruneMediaCache();
|
|
836
850
|
}
|
|
837
851
|
|