skyloom 1.22.0 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/gateway/channels/feishu.d.ts.map +1 -1
- package/dist/gateway/channels/feishu.js +53 -0
- package/dist/gateway/channels/feishu.js.map +1 -1
- package/dist/gateway/channels/qq.d.ts.map +1 -1
- package/dist/gateway/channels/qq.js +45 -0
- package/dist/gateway/channels/qq.js.map +1 -1
- package/dist/gateway/channels/wecom.d.ts.map +1 -1
- package/dist/gateway/channels/wecom.js +41 -0
- package/dist/gateway/channels/wecom.js.map +1 -1
- package/dist/gateway/gateway.d.ts.map +1 -1
- package/dist/gateway/gateway.js +79 -9
- package/dist/gateway/gateway.js.map +1 -1
- package/dist/gateway/helpers.d.ts +23 -0
- package/dist/gateway/helpers.d.ts.map +1 -1
- package/dist/gateway/helpers.js +90 -0
- package/dist/gateway/helpers.js.map +1 -1
- package/dist/gateway/types.d.ts +39 -0
- package/dist/gateway/types.d.ts.map +1 -1
- package/dist/gateway/types.js +25 -0
- package/dist/gateway/types.js.map +1 -1
- package/dist/gateway/vision.d.ts +23 -0
- package/dist/gateway/vision.d.ts.map +1 -0
- package/dist/gateway/vision.js +77 -0
- package/dist/gateway/vision.js.map +1 -0
- package/package.json +1 -1
- package/src/gateway/channels/feishu.ts +49 -2
- package/src/gateway/channels/qq.ts +43 -2
- package/src/gateway/channels/wecom.ts +47 -2
- package/src/gateway/gateway.ts +77 -8
- package/src/gateway/helpers.ts +60 -0
- package/src/gateway/types.ts +58 -0
- package/src/gateway/vision.ts +78 -0
- package/tests/gateway.test.ts +84 -1
package/src/gateway/gateway.ts
CHANGED
|
@@ -16,8 +16,11 @@ import { createServer, IncomingMessage, ServerResponse } from 'http';
|
|
|
16
16
|
import { getLogger } from '../core/logger';
|
|
17
17
|
import { createSystemContext } from '../core/factory';
|
|
18
18
|
import { buildAdapters } from './registry';
|
|
19
|
-
import { describeMedia } from './types';
|
|
19
|
+
import { describeMedia, parseReply } from './types';
|
|
20
|
+
import { isSendableSrc } from './helpers';
|
|
21
|
+
import { describeImages } from './vision';
|
|
20
22
|
import type { ChannelAdapter, InboundMessage, RawRequest } from './types';
|
|
23
|
+
import type { LoadedMedia } from './helpers';
|
|
21
24
|
|
|
22
25
|
const log = getLogger('gateway');
|
|
23
26
|
|
|
@@ -29,11 +32,44 @@ async function readBody(req: IncomingMessage): Promise<Buffer> {
|
|
|
29
32
|
}
|
|
30
33
|
|
|
31
34
|
/** Run an agent turn for an inbound message and collect the final text reply. */
|
|
32
|
-
/** Build the agent prompt
|
|
33
|
-
function buildPrompt(msg: InboundMessage): string {
|
|
35
|
+
/** Build the agent prompt: text + media description + any vision result. */
|
|
36
|
+
function buildPrompt(msg: InboundMessage, canSendMedia: boolean, visionText?: string | null): string {
|
|
37
|
+
const parts: string[] = [];
|
|
34
38
|
const mediaDesc = describeMedia(msg.media);
|
|
35
|
-
if (
|
|
36
|
-
|
|
39
|
+
if (msg.text) parts.push(msg.text);
|
|
40
|
+
if (mediaDesc) parts.push(`(用户发送了媒体: ${mediaDesc})`);
|
|
41
|
+
if (visionText) parts.push(`(图片内容识别: ${visionText})`);
|
|
42
|
+
if (canSendMedia) {
|
|
43
|
+
parts.push('(若需回发图片或文件,在回复中用 Markdown 图片  或 [[file:路径或URL]] 表示,路径可为本地文件或 http(s) 链接。)');
|
|
44
|
+
}
|
|
45
|
+
return parts.join('\n\n') || msg.text;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Download inbound images and run vision over them. Returns null if disabled. */
|
|
49
|
+
async function visionForMessage(
|
|
50
|
+
ctx: ReturnType<typeof createSystemContext>,
|
|
51
|
+
adapter: ChannelAdapter,
|
|
52
|
+
msg: InboundMessage,
|
|
53
|
+
): Promise<string | null> {
|
|
54
|
+
const chCfg = ((ctx.config as any).channels || {})[adapter.id] || {};
|
|
55
|
+
const llmCfg = (ctx.config as any).llm || {};
|
|
56
|
+
if (chCfg.vision === false) return null;
|
|
57
|
+
const model = chCfg.visionModel || llmCfg.vision_model || llmCfg.visionModel;
|
|
58
|
+
if (!model) return null; // vision is opt-in: requires a configured model
|
|
59
|
+
const images = (msg.media || []).filter((m) => m.kind === 'image');
|
|
60
|
+
if (!images.length || !adapter.fetchMedia) return null;
|
|
61
|
+
|
|
62
|
+
const loaded: LoadedMedia[] = [];
|
|
63
|
+
for (const att of images.slice(0, 4)) {
|
|
64
|
+
try {
|
|
65
|
+
const got = await adapter.fetchMedia(att, msg);
|
|
66
|
+
if (got) loaded.push({ data: got.data, filename: att.filename || 'image', contentType: got.contentType });
|
|
67
|
+
} catch (e) {
|
|
68
|
+
log.warn('vision_fetch_failed', { channel: adapter.id, error: String(e) });
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (!loaded.length) return null;
|
|
72
|
+
return describeImages(loaded, { model });
|
|
37
73
|
}
|
|
38
74
|
|
|
39
75
|
/** Resolve the agent for a channel message. */
|
|
@@ -52,16 +88,18 @@ async function dispatch(
|
|
|
52
88
|
const agent = resolveAgent(ctx, adapter);
|
|
53
89
|
if (!agent) throw new Error('no agent available');
|
|
54
90
|
await agent.init();
|
|
55
|
-
const
|
|
91
|
+
const visionText = await visionForMessage(ctx, adapter, msg);
|
|
92
|
+
const prompt = buildPrompt(msg, !!adapter.sendMedia, visionText);
|
|
56
93
|
|
|
57
94
|
// Streaming path: stream content chunks straight to the adapter (e.g. a Feishu
|
|
58
95
|
// card patched as text arrives). Falls back to collect-then-send otherwise.
|
|
59
96
|
const cfgStreaming = ((ctx.config as any).channels || {})[adapter.id]?.streaming !== false;
|
|
60
97
|
if (adapter.sendStreaming && cfgStreaming) {
|
|
98
|
+
let full = '';
|
|
61
99
|
async function* contentChunks(): AsyncGenerator<string> {
|
|
62
100
|
try {
|
|
63
101
|
for await (const ev of agent.chatStream(prompt)) {
|
|
64
|
-
if ((ev as any).type === 'content')
|
|
102
|
+
if ((ev as any).type === 'content') { const t = (ev as any).text as string; full += t; yield t; }
|
|
65
103
|
}
|
|
66
104
|
} catch (e) {
|
|
67
105
|
log.warn('gateway_agent_failed', { channel: adapter.id, error: String(e) });
|
|
@@ -69,6 +107,8 @@ async function dispatch(
|
|
|
69
107
|
}
|
|
70
108
|
}
|
|
71
109
|
await adapter.sendStreaming(msg.replyTo, contentChunks());
|
|
110
|
+
// After streaming the text, deliver any media the agent referenced.
|
|
111
|
+
await deliverMedia(adapter, msg, full);
|
|
72
112
|
return;
|
|
73
113
|
}
|
|
74
114
|
|
|
@@ -81,7 +121,36 @@ async function dispatch(
|
|
|
81
121
|
log.warn('gateway_agent_failed', { channel: adapter.id, error: String(e) });
|
|
82
122
|
text = `[出错了] ${String(e)}`;
|
|
83
123
|
}
|
|
84
|
-
|
|
124
|
+
// Non-streaming: split out media so the text message is clean.
|
|
125
|
+
if (adapter.sendMedia) {
|
|
126
|
+
const parsed = parseReply(text);
|
|
127
|
+
await adapter.send(msg.replyTo, parsed.text || '(无回复)');
|
|
128
|
+
await deliverMedia(adapter, msg, text, parsed.media);
|
|
129
|
+
} else {
|
|
130
|
+
await adapter.send(msg.replyTo, text.trim() || '(无回复)');
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** Upload+send any media the agent referenced in its reply. Best-effort. */
|
|
135
|
+
async function deliverMedia(
|
|
136
|
+
adapter: ChannelAdapter,
|
|
137
|
+
msg: InboundMessage,
|
|
138
|
+
fullText: string,
|
|
139
|
+
pre?: ReturnType<typeof parseReply>['media'],
|
|
140
|
+
): Promise<void> {
|
|
141
|
+
if (!adapter.sendMedia) return;
|
|
142
|
+
const media = pre ?? parseReply(fullText).media;
|
|
143
|
+
for (const item of media) {
|
|
144
|
+
if (!isSendableSrc(item.src)) {
|
|
145
|
+
log.warn('gateway_media_unsendable', { channel: adapter.id, src: item.src });
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
try {
|
|
149
|
+
await adapter.sendMedia(msg.replyTo, item);
|
|
150
|
+
} catch (e) {
|
|
151
|
+
log.warn('gateway_send_media_failed', { channel: adapter.id, src: item.src, error: String(e) });
|
|
152
|
+
}
|
|
153
|
+
}
|
|
85
154
|
}
|
|
86
155
|
|
|
87
156
|
export interface GatewayOptions {
|
package/src/gateway/helpers.ts
CHANGED
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import axios from 'axios';
|
|
8
|
+
import * as fs from 'fs';
|
|
9
|
+
import * as path from 'path';
|
|
8
10
|
|
|
9
11
|
/**
|
|
10
12
|
* Resolve a secret/config value. Accepts a literal string, or an env-ref object
|
|
@@ -58,6 +60,64 @@ export async function getJson(
|
|
|
58
60
|
return res.data;
|
|
59
61
|
}
|
|
60
62
|
|
|
63
|
+
/** A loaded binary plus its filename, ready to upload. */
|
|
64
|
+
export interface LoadedMedia {
|
|
65
|
+
data: Buffer;
|
|
66
|
+
filename: string;
|
|
67
|
+
contentType?: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Load media bytes from a local filesystem path or an http(s) URL. Local paths
|
|
72
|
+
* are read directly; remote URLs are fetched (capped at 30 MiB to avoid
|
|
73
|
+
* pulling something huge into memory). Throws if the source can't be loaded.
|
|
74
|
+
*/
|
|
75
|
+
export async function loadMedia(src: string): Promise<LoadedMedia> {
|
|
76
|
+
if (/^https?:\/\//i.test(src)) {
|
|
77
|
+
const res = await axios.get(src, {
|
|
78
|
+
responseType: 'arraybuffer',
|
|
79
|
+
timeout: 30000,
|
|
80
|
+
maxContentLength: 30 * 1024 * 1024,
|
|
81
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
82
|
+
});
|
|
83
|
+
const urlName = path.basename(new URL(src).pathname) || 'file';
|
|
84
|
+
const ct = res.headers['content-type'];
|
|
85
|
+
return {
|
|
86
|
+
data: Buffer.from(res.data),
|
|
87
|
+
filename: urlName,
|
|
88
|
+
contentType: typeof ct === 'string' ? ct : undefined,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
const data = fs.readFileSync(src); // throws ENOENT if missing — caller handles
|
|
92
|
+
return { data, filename: path.basename(src) };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/** Is this a sendable media source (http(s) URL or an existing local file)? */
|
|
96
|
+
export function isSendableSrc(src: string): boolean {
|
|
97
|
+
if (/^https?:\/\//i.test(src)) return true;
|
|
98
|
+
try { return fs.existsSync(src) && fs.statSync(src).isFile(); } catch { return false; }
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/** POST multipart/form-data (Node 18+ FormData/Blob), return parsed JSON. */
|
|
102
|
+
export async function postMultipart(
|
|
103
|
+
url: string,
|
|
104
|
+
fields: Record<string, string | { data: Buffer; filename: string; contentType?: string }>,
|
|
105
|
+
opts?: { headers?: Record<string, string>; timeoutMs?: number },
|
|
106
|
+
): Promise<any> {
|
|
107
|
+
const form = new FormData();
|
|
108
|
+
for (const [k, v] of Object.entries(fields)) {
|
|
109
|
+
if (typeof v === 'string') form.append(k, v);
|
|
110
|
+
else form.append(k, new Blob([v.data], v.contentType ? { type: v.contentType } : undefined), v.filename);
|
|
111
|
+
}
|
|
112
|
+
const res = await axios.post(url, form, {
|
|
113
|
+
headers: { ...(opts?.headers || {}) },
|
|
114
|
+
timeout: opts?.timeoutMs ?? 30000,
|
|
115
|
+
maxBodyLength: Infinity,
|
|
116
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
117
|
+
});
|
|
118
|
+
return res.data;
|
|
119
|
+
}
|
|
120
|
+
|
|
61
121
|
/**
|
|
62
122
|
* A small token cache: fetch an access token via `fetcher`, cache it until it
|
|
63
123
|
* is near expiry, and refresh transparently. Channels (Feishu/WeCom) all need
|
package/src/gateway/types.ts
CHANGED
|
@@ -43,6 +43,49 @@ export interface MediaAttachment {
|
|
|
43
43
|
url?: string;
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
/** An outbound media item the agent wants to send (parsed from its reply). */
|
|
47
|
+
export interface OutboundMedia {
|
|
48
|
+
kind: 'image' | 'file';
|
|
49
|
+
/** Local filesystem path or http(s) URL to the binary. */
|
|
50
|
+
src: string;
|
|
51
|
+
/** Optional caption / alt text. */
|
|
52
|
+
alt?: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** The result of splitting an agent reply into plain text + outbound media. */
|
|
56
|
+
export interface ParsedReply {
|
|
57
|
+
text: string;
|
|
58
|
+
media: OutboundMedia[];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Parse media directives out of an agent's reply so channels can upload+send
|
|
63
|
+
* them. Recognized forms (stripped from the returned text):
|
|
64
|
+
* - Markdown image: 
|
|
65
|
+
* - Explicit image: [[image:src]] or [[image:src|alt]]
|
|
66
|
+
* - Explicit file: [[file:src]] or [[file:src|alt]]
|
|
67
|
+
* `src` is a local path or http(s) URL. Only http(s) and existing local files
|
|
68
|
+
* are treated as media; anything else is left in the text untouched.
|
|
69
|
+
*/
|
|
70
|
+
export function parseReply(reply: string): ParsedReply {
|
|
71
|
+
const media: OutboundMedia[] = [];
|
|
72
|
+
let text = reply;
|
|
73
|
+
|
|
74
|
+
// [[image:src|alt]] / [[file:src|alt]]
|
|
75
|
+
text = text.replace(/\[\[(image|file):([^\]|]+)(?:\|([^\]]*))?\]\]/gi, (_m, kind, src, alt) => {
|
|
76
|
+
media.push({ kind: kind.toLowerCase() as 'image' | 'file', src: String(src).trim(), alt: alt ? String(alt).trim() : undefined });
|
|
77
|
+
return '';
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// Markdown images: 
|
|
81
|
+
text = text.replace(/!\[([^\]]*)\]\(([^)\s]+)(?:\s+"[^"]*")?\)/g, (_m, alt, src) => {
|
|
82
|
+
media.push({ kind: 'image', src: String(src).trim(), alt: alt ? String(alt).trim() : undefined });
|
|
83
|
+
return '';
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
return { text: text.replace(/\n{3,}/g, '\n\n').trim(), media };
|
|
87
|
+
}
|
|
88
|
+
|
|
46
89
|
/** Render a media list into a compact, model-readable description line. */
|
|
47
90
|
export function describeMedia(media: MediaAttachment[] | undefined): string {
|
|
48
91
|
if (!media || media.length === 0) return '';
|
|
@@ -119,6 +162,21 @@ export interface ChannelAdapter {
|
|
|
119
162
|
* should throttle their own updates and tolerate an empty/aborted stream.
|
|
120
163
|
*/
|
|
121
164
|
sendStreaming?(target: ReplyTarget, chunks: AsyncIterable<string>): Promise<void>;
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Optional: upload and send an image or file. When an adapter implements this,
|
|
168
|
+
* the gateway extracts media directives from the agent's reply (parseReply)
|
|
169
|
+
* and delivers them after the text. Adapters without it simply keep the
|
|
170
|
+
* media reference in the text.
|
|
171
|
+
*/
|
|
172
|
+
sendMedia?(target: ReplyTarget, item: OutboundMedia): Promise<void>;
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Optional: download an inbound media attachment's bytes so the gateway can
|
|
176
|
+
* run vision over an image. `att` is one entry from InboundMessage.media.
|
|
177
|
+
* Returns the binary or null if it can't be fetched.
|
|
178
|
+
*/
|
|
179
|
+
fetchMedia?(att: MediaAttachment, msg: InboundMessage): Promise<{ data: Buffer; contentType?: string } | null>;
|
|
122
180
|
}
|
|
123
181
|
|
|
124
182
|
/** Factory signature: build an adapter from its config block (or null if disabled/misconfigured). */
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision describe — turn an inbound image into a text description so the agent
|
|
3
|
+
* can "see" what the user sent, without rewiring the core text-only LLM loop.
|
|
4
|
+
*
|
|
5
|
+
* Self-contained on purpose: a single OpenAI-compatible chat/completions call
|
|
6
|
+
* with an image_url (base64 data URL) content block. The model + key are
|
|
7
|
+
* resolved from config.channels.<id>.visionModel / config.llm.vision_model
|
|
8
|
+
* (default gpt-4o-mini), falling back to env keys the same way the rest of
|
|
9
|
+
* Skyloom does. If no key/model is available, vision is skipped silently and the
|
|
10
|
+
* gateway just uses the media description line.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import axios from 'axios';
|
|
14
|
+
import { getLogger } from '../core/logger';
|
|
15
|
+
import type { LoadedMedia } from './helpers';
|
|
16
|
+
|
|
17
|
+
const log = getLogger('gateway-vision');
|
|
18
|
+
|
|
19
|
+
/** OpenAI-compatible base URL for a provider inferred from the model id. */
|
|
20
|
+
function baseUrlFor(model: string): string {
|
|
21
|
+
const l = model.toLowerCase();
|
|
22
|
+
if (l.includes('claude')) return 'https://api.anthropic.com/v1'; // not OpenAI-shaped; skipped below
|
|
23
|
+
if (l.includes('gemini')) return 'https://generativelanguage.googleapis.com/v1beta/openai';
|
|
24
|
+
if (l.includes('grok') || l.includes('xai')) return 'https://api.x.ai/v1';
|
|
25
|
+
if (l.includes('qwen') || l.includes('dashscope')) return 'https://dashscope.aliyuncs.com/compatible-mode/v1';
|
|
26
|
+
return 'https://api.openai.com/v1';
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Resolve an API key for the vision model from env (best-effort). */
|
|
30
|
+
function keyFor(model: string, env: NodeJS.ProcessEnv): string | undefined {
|
|
31
|
+
const l = model.toLowerCase();
|
|
32
|
+
const candidates = l.includes('gemini') ? ['GEMINI_API_KEY', 'GOOGLE_API_KEY']
|
|
33
|
+
: l.includes('grok') || l.includes('xai') ? ['XAI_API_KEY']
|
|
34
|
+
: l.includes('qwen') || l.includes('dashscope') ? ['DASHSCOPE_API_KEY', 'QWEN_API_KEY']
|
|
35
|
+
: ['OPENAI_API_KEY'];
|
|
36
|
+
for (const c of candidates) if (env[c]) return env[c];
|
|
37
|
+
return undefined;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface VisionOptions {
|
|
41
|
+
model?: string;
|
|
42
|
+
env?: NodeJS.ProcessEnv;
|
|
43
|
+
prompt?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Describe one or more images. Returns a description string, or null if vision
|
|
48
|
+
* is unavailable (no key/model) or fails — callers fall back to the media line.
|
|
49
|
+
*/
|
|
50
|
+
export async function describeImages(images: LoadedMedia[], opts: VisionOptions = {}): Promise<string | null> {
|
|
51
|
+
if (!images.length) return null;
|
|
52
|
+
const env = opts.env || process.env;
|
|
53
|
+
const model = opts.model || 'gpt-4o-mini';
|
|
54
|
+
// Anthropic isn't OpenAI-chat-shaped here; skip to keep this helper simple.
|
|
55
|
+
if (model.toLowerCase().includes('claude')) return null;
|
|
56
|
+
const key = keyFor(model, env);
|
|
57
|
+
if (!key) return null;
|
|
58
|
+
|
|
59
|
+
const prompt = opts.prompt || '请用中文简洁描述这些图片的内容(关键物体、文字、场景);如果含可读文字请转写出来。';
|
|
60
|
+
const content: any[] = [{ type: 'text', text: prompt }];
|
|
61
|
+
for (const img of images.slice(0, 4)) {
|
|
62
|
+
const mime = img.contentType || 'image/png';
|
|
63
|
+
content.push({ type: 'image_url', image_url: { url: `data:${mime};base64,${img.data.toString('base64')}` } });
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
const res = await axios.post(
|
|
68
|
+
`${baseUrlFor(model)}/chat/completions`,
|
|
69
|
+
{ model, messages: [{ role: 'user', content }], max_tokens: 500, temperature: 0.2 },
|
|
70
|
+
{ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` }, timeout: 30000, validateStatus: (s) => s >= 200 && s < 300 },
|
|
71
|
+
);
|
|
72
|
+
const text = res.data?.choices?.[0]?.message?.content;
|
|
73
|
+
return typeof text === 'string' && text.trim() ? text.trim() : null;
|
|
74
|
+
} catch (e) {
|
|
75
|
+
log.warn('vision_describe_failed', { model, error: String(e).slice(0, 160) });
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
package/tests/gateway.test.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { describe, it, expect } from "vitest";
|
|
2
2
|
import * as crypto from "crypto";
|
|
3
3
|
import { resolveSecret, TokenCache } from "../src/gateway/helpers";
|
|
4
|
-
import { describeMedia } from "../src/gateway/types";
|
|
4
|
+
import { describeMedia, parseReply } from "../src/gateway/types";
|
|
5
|
+
import { isSendableSrc } from "../src/gateway/helpers";
|
|
6
|
+
import { describeImages } from "../src/gateway/vision";
|
|
5
7
|
import { buildAdapters, SUPPORTED_CHANNELS } from "../src/gateway/registry";
|
|
6
8
|
import { decryptFeishu, createFeishuAdapter } from "../src/gateway/channels/feishu";
|
|
7
9
|
import { wecomSignature, decryptWecom, createWecomAdapter } from "../src/gateway/channels/wecom";
|
|
@@ -81,6 +83,87 @@ describe("gateway · media", () => {
|
|
|
81
83
|
});
|
|
82
84
|
});
|
|
83
85
|
|
|
86
|
+
describe("gateway · parseReply (outbound media)", () => {
|
|
87
|
+
it("extracts a markdown image and strips it from the text", () => {
|
|
88
|
+
const r = parseReply("看这张图  好看吧");
|
|
89
|
+
expect(r.media).toEqual([{ kind: "image", src: "https://x.com/cat.png", alt: "猫" }]);
|
|
90
|
+
expect(r.text).toContain("看这张图");
|
|
91
|
+
expect(r.text).not.toContain("![");
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it("extracts [[image:...]] and [[file:...|alt]] directives", () => {
|
|
95
|
+
const r = parseReply("结果:\n[[image:/tmp/out.png]]\n[[file:/tmp/report.pdf|季度报告]]");
|
|
96
|
+
expect(r.media).toEqual([
|
|
97
|
+
{ kind: "image", src: "/tmp/out.png", alt: undefined },
|
|
98
|
+
{ kind: "file", src: "/tmp/report.pdf", alt: "季度报告" },
|
|
99
|
+
]);
|
|
100
|
+
expect(r.text).toBe("结果:");
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("leaves text without media untouched", () => {
|
|
104
|
+
const r = parseReply("就是一段普通文字");
|
|
105
|
+
expect(r.media).toHaveLength(0);
|
|
106
|
+
expect(r.text).toBe("就是一段普通文字");
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("handles multiple images in one reply", () => {
|
|
110
|
+
const r = parseReply(" 和 ");
|
|
111
|
+
expect(r.media.map((m) => m.src)).toEqual(["http://h/1.png", "http://h/2.png"]);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
describe("gateway · isSendableSrc", () => {
|
|
116
|
+
it("accepts http(s) URLs, rejects bare non-existent paths", () => {
|
|
117
|
+
expect(isSendableSrc("https://x.com/a.png")).toBe(true);
|
|
118
|
+
expect(isSendableSrc("http://x.com/a.png")).toBe(true);
|
|
119
|
+
expect(isSendableSrc("/no/such/file/xyz.png")).toBe(false);
|
|
120
|
+
expect(isSendableSrc("not a path")).toBe(false);
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
describe("gateway · sendMedia capability", () => {
|
|
125
|
+
it("all three adapters expose sendMedia", () => {
|
|
126
|
+
const f = createFeishuAdapter({ appId: "a", appSecret: "s" }, {})!;
|
|
127
|
+
const w = createWecomAdapter({ corpId: "c", corpSecret: "s", token: "t", encodingAesKey: "k".repeat(43), agentId: 1 }, {})!;
|
|
128
|
+
const q = createQQAdapter({ appId: "1", secret: "supersecretseedvalue" }, {})!;
|
|
129
|
+
expect(typeof f.sendMedia).toBe("function");
|
|
130
|
+
expect(typeof w.sendMedia).toBe("function");
|
|
131
|
+
expect(typeof q.sendMedia).toBe("function");
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("qq sendMedia rejects a non-URL source", async () => {
|
|
135
|
+
const q = createQQAdapter({ appId: "1", secret: "supersecretseedvalue" }, {})!;
|
|
136
|
+
await expect(q.sendMedia!({ channel: "qq", kind: "group", groupOpenid: "g" }, { kind: "image", src: "/local/file.png" }))
|
|
137
|
+
.rejects.toThrow(/http\(s\) URL/);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
describe("gateway · vision (multimodal read)", () => {
|
|
142
|
+
it("describeImages returns null with no images", async () => {
|
|
143
|
+
expect(await describeImages([], { model: "gpt-4o-mini", env: {} })).toBeNull();
|
|
144
|
+
});
|
|
145
|
+
it("returns null when no API key is available (skips silently)", async () => {
|
|
146
|
+
const img = { data: Buffer.from("x"), filename: "a.png", contentType: "image/png" };
|
|
147
|
+
expect(await describeImages([img], { model: "gpt-4o-mini", env: {} })).toBeNull();
|
|
148
|
+
});
|
|
149
|
+
it("skips Anthropic models (not OpenAI-chat-shaped here)", async () => {
|
|
150
|
+
const img = { data: Buffer.from("x"), filename: "a.png" };
|
|
151
|
+
expect(await describeImages([img], { model: "claude-sonnet-4-6", env: { ANTHROPIC_API_KEY: "k" } })).toBeNull();
|
|
152
|
+
});
|
|
153
|
+
it("all three adapters expose fetchMedia", () => {
|
|
154
|
+
const f = createFeishuAdapter({ appId: "a", appSecret: "s" }, {})!;
|
|
155
|
+
const w = createWecomAdapter({ corpId: "c", corpSecret: "s", token: "t", encodingAesKey: "k".repeat(43), agentId: 1 }, {})!;
|
|
156
|
+
const q = createQQAdapter({ appId: "1", secret: "supersecretseedvalue" }, {})!;
|
|
157
|
+
expect(typeof f.fetchMedia).toBe("function");
|
|
158
|
+
expect(typeof w.fetchMedia).toBe("function");
|
|
159
|
+
expect(typeof q.fetchMedia).toBe("function");
|
|
160
|
+
});
|
|
161
|
+
it("qq fetchMedia returns null without a url", async () => {
|
|
162
|
+
const q = createQQAdapter({ appId: "1", secret: "supersecretseedvalue" }, {})!;
|
|
163
|
+
expect(await q.fetchMedia!({ kind: "image" } as any, {} as any)).toBeNull();
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
|
|
84
167
|
describe("gateway · registry", () => {
|
|
85
168
|
it("lists the three supported channels", () => {
|
|
86
169
|
expect(SUPPORTED_CHANNELS.sort()).toEqual(["feishu", "qq", "wecom"]);
|