skyloom 1.21.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,9 +14,10 @@
14
14
  */
15
15
 
16
16
  import * as crypto from 'crypto';
17
+ import axios from 'axios';
17
18
  import { getLogger } from '../../core/logger';
18
- import { resolveSecret, postJson, TokenCache } from '../helpers';
19
- import type { ChannelAdapter, RawRequest, ReplyTarget, WebhookOutcome } from '../types';
19
+ import { resolveSecret, postJson, postMultipart, loadMedia, TokenCache } from '../helpers';
20
+ import type { ChannelAdapter, InboundMessage, MediaAttachment, OutboundMedia, RawRequest, ReplyTarget, WebhookOutcome } from '../types';
20
21
 
21
22
  const log = getLogger('channel-feishu');
22
23
 
@@ -43,6 +44,10 @@ export function createFeishuAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAd
43
44
  const encryptKey = resolveSecret(cfg.encryptKey, env, 'FEISHU_ENCRYPT_KEY');
44
45
  const verificationToken = resolveSecret(cfg.verificationToken, env, 'FEISHU_VERIFICATION_TOKEN');
45
46
  const base = cfg.domain === 'lark' ? 'https://open.larksuite.com' : 'https://open.feishu.cn';
47
+ // 'card' replies render as an interactive card (supports streaming patches);
48
+ // 'raw' forces plain text; 'auto' (default) uses a card so streaming works.
49
+ const renderMode: 'auto' | 'raw' | 'card' = cfg.renderMode || 'auto';
50
+ const useCard = renderMode === 'card' || renderMode === 'auto';
46
51
 
47
52
  const tokenCache = new TokenCache(async () => {
48
53
  const data = await postJson(`${base}/open-apis/auth/v3/tenant_access_token/internal`, {
@@ -52,6 +57,36 @@ export function createFeishuAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAd
52
57
  return { token: data.tenant_access_token, expiresInSec: data.expire ?? 7200 };
53
58
  });
54
59
 
60
+ const authHeader = async () => ({ Authorization: `Bearer ${await tokenCache.get()}` });
61
+ const onTokenError = (code: number) => { if (code === 99991663 || code === 99991661) tokenCache.invalidate(); };
62
+
63
+ /** A minimal interactive card carrying a single markdown body. */
64
+ const cardContent = (text: string): string => JSON.stringify({
65
+ config: { wide_screen_mode: true, update_multi: true },
66
+ elements: [{ tag: 'markdown', content: text || ' ' }],
67
+ });
68
+
69
+ /** Create a card message in a chat; returns its message_id for later patches. */
70
+ const createCard = async (chatId: string, text: string): Promise<string | null> => {
71
+ const data = await postJson(
72
+ `${base}/open-apis/im/v1/messages?receive_id_type=chat_id`,
73
+ { receive_id: chatId, msg_type: 'interactive', content: cardContent(text) },
74
+ { headers: await authHeader() },
75
+ );
76
+ if (data.code !== 0) { onTokenError(data.code); throw new Error(`feishu card create ${data.code}: ${data.msg}`); }
77
+ return data.data?.message_id || null;
78
+ };
79
+
80
+ /** Patch an existing card message with new content. */
81
+ const patchCard = async (messageId: string, text: string): Promise<void> => {
82
+ const data = await postJson(
83
+ `${base}/open-apis/im/v1/messages/${messageId}`,
84
+ { content: cardContent(text) },
85
+ { headers: await authHeader() },
86
+ ).catch((e) => ({ code: -1, msg: String(e) }));
87
+ if (data && data.code !== 0) onTokenError(data.code);
88
+ };
89
+
55
90
  // De-dupe redelivered events (Feishu retries on slow ack).
56
91
  const seen = new Set<string>();
57
92
  const remember = (id: string): boolean => {
@@ -103,12 +138,45 @@ export function createFeishuAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAd
103
138
  const chatId = message.chat_id as string;
104
139
  const msgType = message.message_type as string;
105
140
  let text = '';
106
- if (msgType === 'text') {
107
- try { text = JSON.parse(message.content || '{}').text || ''; } catch { text = ''; }
108
- // Strip @mentions like "@_user_1 ".
109
- text = text.replace(/@_user_\d+/g, '').trim();
110
- } else {
111
- text = `[${msgType} 消息]`;
141
+ const media: MediaAttachment[] = [];
142
+ let content: any = {};
143
+ try { content = JSON.parse(message.content || '{}'); } catch { /* ignore */ }
144
+ switch (msgType) {
145
+ case 'text':
146
+ text = (content.text || '').replace(/@_user_\d+/g, '').trim(); // strip @mentions
147
+ break;
148
+ case 'image':
149
+ media.push({ kind: 'image', ref: content.image_key });
150
+ break;
151
+ case 'audio':
152
+ media.push({ kind: 'audio', ref: content.file_key });
153
+ break;
154
+ case 'media': // short video
155
+ media.push({ kind: 'video', ref: content.file_key, filename: content.file_name });
156
+ break;
157
+ case 'file':
158
+ media.push({ kind: 'file', ref: content.file_key, filename: content.file_name });
159
+ break;
160
+ case 'sticker':
161
+ media.push({ kind: 'sticker', ref: content.file_key });
162
+ break;
163
+ case 'post': { // rich text: pull plain text + embedded images
164
+ const blocks = content?.content;
165
+ if (Array.isArray(blocks)) {
166
+ for (const row of blocks) {
167
+ for (const el of row || []) {
168
+ if (el?.tag === 'text' && el.text) text += el.text;
169
+ else if (el?.tag === 'a' && el.text) text += el.text;
170
+ else if (el?.tag === 'img' && el.image_key) media.push({ kind: 'image', ref: el.image_key });
171
+ }
172
+ text += '\n';
173
+ }
174
+ }
175
+ text = text.trim();
176
+ break;
177
+ }
178
+ default:
179
+ text = `[${msgType} 消息]`;
112
180
  }
113
181
  const senderId = payload.event?.sender?.sender_id?.open_id || payload.event?.sender?.sender_id?.user_id || 'unknown';
114
182
 
@@ -118,6 +186,7 @@ export function createFeishuAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAd
118
186
  conversationId: chatId || senderId,
119
187
  userId: senderId,
120
188
  text,
189
+ media: media.length ? media : undefined,
121
190
  replyTo: { channel: 'feishu', chatId },
122
191
  raw: payload,
123
192
  },
@@ -127,16 +196,94 @@ export function createFeishuAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAd
127
196
  async send(target: ReplyTarget, text: string): Promise<void> {
128
197
  const chatId = target.chatId as string;
129
198
  if (!chatId) return;
130
- const token = await tokenCache.get();
199
+ if (useCard) { await createCard(chatId, text); return; }
131
200
  const data = await postJson(
132
201
  `${base}/open-apis/im/v1/messages?receive_id_type=chat_id`,
133
202
  { receive_id: chatId, msg_type: 'text', content: JSON.stringify({ text }) },
134
- { headers: { Authorization: `Bearer ${token}` } },
203
+ { headers: await authHeader() },
135
204
  );
136
- if (data.code !== 0) {
137
- if (data.code === 99991663 || data.code === 99991661) tokenCache.invalidate(); // token expired
138
- throw new Error(`feishu send error ${data.code}: ${data.msg}`);
205
+ if (data.code !== 0) { onTokenError(data.code); throw new Error(`feishu send error ${data.code}: ${data.msg}`); }
206
+ },
207
+
208
+ // Streaming reply: post a placeholder card, then patch it as text arrives —
209
+ // throttled (≥600ms apart) to stay well under Feishu's update rate limit.
210
+ async sendStreaming(target: ReplyTarget, chunks: AsyncIterable<string>): Promise<void> {
211
+ const chatId = target.chatId as string;
212
+ if (!chatId) return;
213
+ if (!useCard) { // plain-text mode can't patch; collect then send once
214
+ let all = '';
215
+ for await (const c of chunks) all += c;
216
+ await this.send(target, all.trim() || '(无回复)');
217
+ return;
139
218
  }
219
+ let messageId: string | null = null;
220
+ let acc = '';
221
+ let lastPatch = 0;
222
+ let dirty = false;
223
+ const MIN_INTERVAL = 600;
224
+ try {
225
+ messageId = await createCard(chatId, '思考中…');
226
+ } catch (e) { log.warn('feishu_card_create_failed', { error: String(e) }); return; }
227
+ if (!messageId) return;
228
+
229
+ for await (const chunk of chunks) {
230
+ acc += chunk;
231
+ dirty = true;
232
+ const now = Date.now();
233
+ if (now - lastPatch >= MIN_INTERVAL) {
234
+ lastPatch = now;
235
+ dirty = false;
236
+ await patchCard(messageId, acc);
237
+ }
238
+ }
239
+ // Final flush so the last tokens always land.
240
+ if (dirty || acc) await patchCard(messageId, acc.trim() || '(无回复)');
241
+ },
242
+
243
+ async sendMedia(target: ReplyTarget, item: OutboundMedia): Promise<void> {
244
+ const chatId = target.chatId as string;
245
+ if (!chatId) return;
246
+ const loaded = await loadMedia(item.src);
247
+ const headers = await authHeader();
248
+
249
+ if (item.kind === 'image') {
250
+ const up = await postMultipart(`${base}/open-apis/im/v1/images`, {
251
+ image_type: 'message',
252
+ image: { data: loaded.data, filename: loaded.filename || 'image', contentType: loaded.contentType || 'image/png' },
253
+ }, { headers });
254
+ if (up.code !== 0) { onTokenError(up.code); throw new Error(`feishu image upload ${up.code}: ${up.msg}`); }
255
+ const imageKey = up.data?.image_key;
256
+ const send = await postJson(`${base}/open-apis/im/v1/messages?receive_id_type=chat_id`,
257
+ { receive_id: chatId, msg_type: 'image', content: JSON.stringify({ image_key: imageKey }) },
258
+ { headers });
259
+ if (send.code !== 0) { onTokenError(send.code); throw new Error(`feishu image send ${send.code}: ${send.msg}`); }
260
+ return;
261
+ }
262
+
263
+ // file: upload to im/v1/files then send a file message
264
+ const up = await postMultipart(`${base}/open-apis/im/v1/files`, {
265
+ file_type: 'stream',
266
+ file_name: loaded.filename || 'file',
267
+ file: { data: loaded.data, filename: loaded.filename || 'file', contentType: loaded.contentType || 'application/octet-stream' },
268
+ }, { headers });
269
+ if (up.code !== 0) { onTokenError(up.code); throw new Error(`feishu file upload ${up.code}: ${up.msg}`); }
270
+ const fileKey = up.data?.file_key;
271
+ const send = await postJson(`${base}/open-apis/im/v1/messages?receive_id_type=chat_id`,
272
+ { receive_id: chatId, msg_type: 'file', content: JSON.stringify({ file_key: fileKey }) },
273
+ { headers });
274
+ if (send.code !== 0) { onTokenError(send.code); throw new Error(`feishu file send ${send.code}: ${send.msg}`); }
275
+ },
276
+
277
+ async fetchMedia(att: MediaAttachment, msg: InboundMessage): Promise<{ data: Buffer; contentType?: string } | null> {
278
+ const messageId = (msg.raw as any)?.event?.message?.message_id;
279
+ if (!messageId || !att.ref) return null;
280
+ const token = await tokenCache.get();
281
+ const res = await axios.get(
282
+ `${base}/open-apis/im/v1/messages/${messageId}/resources/${att.ref}?type=${att.kind === 'image' ? 'image' : 'file'}`,
283
+ { headers: { Authorization: `Bearer ${token}` }, responseType: 'arraybuffer', timeout: 30000, validateStatus: (s) => s >= 200 && s < 300 },
284
+ );
285
+ const ct = res.headers['content-type'];
286
+ return { data: Buffer.from(res.data), contentType: typeof ct === 'string' ? ct : undefined };
140
287
  },
141
288
  };
142
289
  }
@@ -17,8 +17,8 @@
17
17
 
18
18
  import * as crypto from 'crypto';
19
19
  import { getLogger } from '../../core/logger';
20
- import { resolveSecret, postJson, TokenCache } from '../helpers';
21
- import type { ChannelAdapter, RawRequest, ReplyTarget, WebhookOutcome } from '../types';
20
+ import { resolveSecret, postJson, loadMedia, TokenCache } from '../helpers';
21
+ import type { ChannelAdapter, MediaAttachment, OutboundMedia, RawRequest, ReplyTarget, WebhookOutcome } from '../types';
22
22
 
23
23
  const log = getLogger('channel-qq');
24
24
 
@@ -102,7 +102,17 @@ export function createQQAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAdapte
102
102
  else if (t === 'C2C_MESSAGE_CREATE') replyTo = { channel: 'qq', kind: 'c2c', userOpenid: d.author?.user_openid, msgId };
103
103
  else if (t === 'AT_MESSAGE_CREATE' || t === 'MESSAGE_CREATE') replyTo = { channel: 'qq', kind: 'channel', channelId: d.channel_id, msgId };
104
104
 
105
- if (!replyTo || !content) return { response: { status: 200, body: '' } };
105
+ // QQ delivers images/files as an attachments array on the event.
106
+ const media: MediaAttachment[] = [];
107
+ for (const att of (Array.isArray(d.attachments) ? d.attachments : [])) {
108
+ const ct = String(att?.content_type || '');
109
+ const kind: MediaAttachment['kind'] = ct.startsWith('image') ? 'image'
110
+ : ct.startsWith('audio') || ct.startsWith('voice') ? 'audio'
111
+ : ct.startsWith('video') ? 'video' : 'file';
112
+ media.push({ kind, ref: att?.id, filename: att?.filename, mimeType: att?.content_type, url: att?.url });
113
+ }
114
+
115
+ if (!replyTo || (!content && media.length === 0)) return { response: { status: 200, body: '' } };
106
116
 
107
117
  const userId = d.author?.user_openid || d.author?.id || d.author?.member_openid || 'unknown';
108
118
  return {
@@ -112,6 +122,7 @@ export function createQQAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAdapte
112
122
  conversationId: (replyTo.groupOpenid as string) || (replyTo.channelId as string) || (userId as string),
113
123
  userId,
114
124
  text: content,
125
+ media: media.length ? media : undefined,
115
126
  replyTo,
116
127
  raw: payload,
117
128
  },
@@ -136,5 +147,46 @@ export function createQQAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAdapte
136
147
  throw new Error(`qq send error: ${e?.response?.status || ''} ${String(e?.message || e).slice(0, 120)}`);
137
148
  }
138
149
  },
150
+
151
+ // QQ's v2 rich-media flow takes a URL (the platform fetches it): POST
152
+ // /files → file_info, then send msg_type:7 referencing that file_info.
153
+ // Group/C2C only; raw local bytes aren't supported, so the src must be a URL.
154
+ async sendMedia(target: ReplyTarget, item: OutboundMedia): Promise<void> {
155
+ if (!/^https?:\/\//i.test(item.src)) {
156
+ throw new Error('qq sendMedia requires an http(s) URL (platform fetches it)');
157
+ }
158
+ const base = target.kind === 'group'
159
+ ? `https://api.sgroup.qq.com/v2/groups/${target.groupOpenid}`
160
+ : target.kind === 'c2c'
161
+ ? `https://api.sgroup.qq.com/v2/users/${target.userOpenid}`
162
+ : null;
163
+ if (!base) throw new Error('qq sendMedia unsupported for channel target');
164
+ const headers = { ...(await authHeaders()), 'Content-Type': 'application/json' };
165
+ // file_type: 1=image 2=video 3=audio 4=file
166
+ const fileType = item.kind === 'image' ? 1 : 4;
167
+ let fileInfo: string;
168
+ try {
169
+ const up = await postJson(`${base}/files`, { file_type: fileType, url: item.src, srv_send_msg: false }, { headers });
170
+ fileInfo = up.file_info;
171
+ } catch (e: any) {
172
+ if (e?.response?.status === 401) tokenCache.invalidate();
173
+ throw new Error(`qq file upload error: ${e?.response?.status || ''} ${String(e?.message || e).slice(0, 120)}`);
174
+ }
175
+ const payload: any = { msg_type: 7, media: { file_info: fileInfo } };
176
+ if (target.msgId) payload.msg_id = target.msgId;
177
+ await postJson(`${base}/messages`, payload, { headers });
178
+ },
179
+
180
+ async fetchMedia(att: MediaAttachment): Promise<{ data: Buffer; contentType?: string } | null> {
181
+ // QQ delivers attachments with a direct URL — just download it.
182
+ if (!att.url) return null;
183
+ try {
184
+ const loaded = await loadMedia(att.url);
185
+ return { data: loaded.data, contentType: loaded.contentType || att.mimeType };
186
+ } catch (e) {
187
+ log.warn('qq_media_fetch_failed', { error: String(e) });
188
+ return null;
189
+ }
190
+ },
139
191
  };
140
192
  }
@@ -16,9 +16,10 @@
16
16
  */
17
17
 
18
18
  import * as crypto from 'crypto';
19
+ import axios from 'axios';
19
20
  import { getLogger } from '../../core/logger';
20
- import { resolveSecret, postJson, getJson, TokenCache } from '../helpers';
21
- import type { ChannelAdapter, RawRequest, ReplyTarget, WebhookOutcome } from '../types';
21
+ import { resolveSecret, postJson, getJson, postMultipart, loadMedia, TokenCache } from '../helpers';
22
+ import type { ChannelAdapter, MediaAttachment, OutboundMedia, RawRequest, ReplyTarget, WebhookOutcome } from '../types';
22
23
 
23
24
  const log = getLogger('channel-wecom');
24
25
 
@@ -108,17 +109,26 @@ export function createWecomAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAda
108
109
  const msgType = xmlField(inner, 'MsgType');
109
110
  const fromUser = xmlField(inner, 'FromUserName');
110
111
  let text = '';
111
- if (msgType === 'text') text = xmlField(inner, 'Content').trim();
112
- else text = `[${msgType} 消息]`;
112
+ const media: MediaAttachment[] = [];
113
+ switch (msgType) {
114
+ case 'text': text = xmlField(inner, 'Content').trim(); break;
115
+ case 'image': media.push({ kind: 'image', ref: xmlField(inner, 'MediaId'), url: xmlField(inner, 'PicUrl') || undefined }); break;
116
+ case 'voice': media.push({ kind: 'audio', ref: xmlField(inner, 'MediaId'), filename: xmlField(inner, 'MediaId') + '.' + (xmlField(inner, 'Format') || 'amr') }); break;
117
+ case 'video': media.push({ kind: 'video', ref: xmlField(inner, 'MediaId') }); break;
118
+ case 'file': media.push({ kind: 'file', ref: xmlField(inner, 'MediaId'), filename: xmlField(inner, 'FileName') || undefined }); break;
119
+ case 'location': text = `[位置] ${xmlField(inner, 'Label')} (${xmlField(inner, 'Location_X')},${xmlField(inner, 'Location_Y')})`; break;
120
+ default: text = `[${msgType} 消息]`;
121
+ }
113
122
 
114
123
  // Ack the callback immediately (empty 200); reply is pushed via the API.
115
124
  return {
116
125
  response: { status: 200, body: '' },
117
- message: text ? {
126
+ message: (text || media.length) ? {
118
127
  channel: 'wecom',
119
128
  conversationId: fromUser,
120
129
  userId: fromUser,
121
130
  text,
131
+ media: media.length ? media : undefined,
122
132
  replyTo: { channel: 'wecom', toUser: fromUser },
123
133
  raw: inner,
124
134
  } : undefined,
@@ -138,5 +148,49 @@ export function createWecomAdapter(cfg: any, env: NodeJS.ProcessEnv): ChannelAda
138
148
  throw new Error(`wecom send error ${data.errcode}: ${data.errmsg}`);
139
149
  }
140
150
  },
151
+
152
+ async sendMedia(target: ReplyTarget, item: OutboundMedia): Promise<void> {
153
+ const toUser = target.toUser as string;
154
+ if (!toUser || !agentId) return;
155
+ const loaded = await loadMedia(item.src);
156
+ const accessToken = await tokenCache.get();
157
+ const type = item.kind === 'image' ? 'image' : 'file';
158
+ // Upload to the temporary-media store (valid 3 days), then push by media_id.
159
+ const up = await postMultipart(
160
+ `https://qyapi.weixin.qq.com/cgi-bin/media/upload?access_token=${encodeURIComponent(accessToken)}&type=${type}`,
161
+ { media: { data: loaded.data, filename: loaded.filename || (type === 'image' ? 'image.png' : 'file'), contentType: loaded.contentType } },
162
+ );
163
+ if (up.errcode && up.errcode !== 0) {
164
+ if (up.errcode === 42001 || up.errcode === 40014) tokenCache.invalidate();
165
+ throw new Error(`wecom media upload ${up.errcode}: ${up.errmsg}`);
166
+ }
167
+ const mediaId = up.media_id;
168
+ const body: any = { touser: toUser, msgtype: type, agentid: Number(agentId) };
169
+ body[type] = { media_id: mediaId };
170
+ const send = await postJson(
171
+ `https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token=${encodeURIComponent(accessToken)}`,
172
+ body,
173
+ );
174
+ if (send.errcode !== 0) {
175
+ if (send.errcode === 42001 || send.errcode === 40014) tokenCache.invalidate();
176
+ throw new Error(`wecom media send ${send.errcode}: ${send.errmsg}`);
177
+ }
178
+ },
179
+
180
+ async fetchMedia(att: MediaAttachment): Promise<{ data: Buffer; contentType?: string } | null> {
181
+ if (!att.ref) return null;
182
+ const accessToken = await tokenCache.get();
183
+ const res = await axios.get(
184
+ `https://qyapi.weixin.qq.com/cgi-bin/media/get?access_token=${encodeURIComponent(accessToken)}&media_id=${encodeURIComponent(att.ref)}`,
185
+ { responseType: 'arraybuffer', timeout: 30000, validateStatus: (s) => s >= 200 && s < 300 },
186
+ );
187
+ // An error comes back as JSON, not the binary — detect and bail.
188
+ const ct = res.headers['content-type'];
189
+ if (typeof ct === 'string' && ct.includes('application/json')) {
190
+ log.warn('wecom_media_get_failed', { body: Buffer.from(res.data).toString('utf8').slice(0, 120) });
191
+ return null;
192
+ }
193
+ return { data: Buffer.from(res.data), contentType: typeof ct === 'string' ? ct : undefined };
194
+ },
141
195
  };
142
196
  }
@@ -16,7 +16,11 @@ import { createServer, IncomingMessage, ServerResponse } from 'http';
16
16
  import { getLogger } from '../core/logger';
17
17
  import { createSystemContext } from '../core/factory';
18
18
  import { buildAdapters } from './registry';
19
+ import { describeMedia, parseReply } from './types';
20
+ import { isSendableSrc } from './helpers';
21
+ import { describeImages } from './vision';
19
22
  import type { ChannelAdapter, InboundMessage, RawRequest } from './types';
23
+ import type { LoadedMedia } from './helpers';
20
24
 
21
25
  const log = getLogger('gateway');
22
26
 
@@ -28,27 +32,125 @@ async function readBody(req: IncomingMessage): Promise<Buffer> {
28
32
  }
29
33
 
30
34
  /** Run an agent turn for an inbound message and collect the final text reply. */
31
- async function runAgent(
35
+ /** Build the agent prompt: text + media description + any vision result. */
36
+ function buildPrompt(msg: InboundMessage, canSendMedia: boolean, visionText?: string | null): string {
37
+ const parts: string[] = [];
38
+ const mediaDesc = describeMedia(msg.media);
39
+ if (msg.text) parts.push(msg.text);
40
+ if (mediaDesc) parts.push(`(用户发送了媒体: ${mediaDesc})`);
41
+ if (visionText) parts.push(`(图片内容识别: ${visionText})`);
42
+ if (canSendMedia) {
43
+ parts.push('(若需回发图片或文件,在回复中用 Markdown 图片 ![说明](路径或URL) 或 [[file:路径或URL]] 表示,路径可为本地文件或 http(s) 链接。)');
44
+ }
45
+ return parts.join('\n\n') || msg.text;
46
+ }
47
+
48
+ /** Download inbound images and run vision over them. Returns null if disabled. */
49
+ async function visionForMessage(
32
50
  ctx: ReturnType<typeof createSystemContext>,
33
51
  adapter: ChannelAdapter,
34
52
  msg: InboundMessage,
35
- ): Promise<string> {
53
+ ): Promise<string | null> {
54
+ const chCfg = ((ctx.config as any).channels || {})[adapter.id] || {};
55
+ const llmCfg = (ctx.config as any).llm || {};
56
+ if (chCfg.vision === false) return null;
57
+ const model = chCfg.visionModel || llmCfg.vision_model || llmCfg.visionModel;
58
+ if (!model) return null; // vision is opt-in: requires a configured model
59
+ const images = (msg.media || []).filter((m) => m.kind === 'image');
60
+ if (!images.length || !adapter.fetchMedia) return null;
61
+
62
+ const loaded: LoadedMedia[] = [];
63
+ for (const att of images.slice(0, 4)) {
64
+ try {
65
+ const got = await adapter.fetchMedia(att, msg);
66
+ if (got) loaded.push({ data: got.data, filename: att.filename || 'image', contentType: got.contentType });
67
+ } catch (e) {
68
+ log.warn('vision_fetch_failed', { channel: adapter.id, error: String(e) });
69
+ }
70
+ }
71
+ if (!loaded.length) return null;
72
+ return describeImages(loaded, { model });
73
+ }
74
+
75
+ /** Resolve the agent for a channel message. */
76
+ function resolveAgent(ctx: ReturnType<typeof createSystemContext>, adapter: ChannelAdapter) {
36
77
  const cfgChannels = (ctx.config as any).channels || {};
37
78
  const agentName = cfgChannels[adapter.id]?.agent || adapter.defaultAgent || 'fair';
38
- const agent = ctx.agentMap.get(agentName) || ctx.agentMap.get('fair') || [...ctx.agentMap.values()][0];
39
- if (!agent) throw new Error('no agent available');
79
+ return ctx.agentMap.get(agentName) || ctx.agentMap.get('fair') || [...ctx.agentMap.values()][0];
80
+ }
40
81
 
82
+ /** Dispatch one inbound message to its agent and deliver the reply. */
83
+ async function dispatch(
84
+ ctx: ReturnType<typeof createSystemContext>,
85
+ adapter: ChannelAdapter,
86
+ msg: InboundMessage,
87
+ ): Promise<void> {
88
+ const agent = resolveAgent(ctx, adapter);
89
+ if (!agent) throw new Error('no agent available');
41
90
  await agent.init();
91
+ const visionText = await visionForMessage(ctx, adapter, msg);
92
+ const prompt = buildPrompt(msg, !!adapter.sendMedia, visionText);
93
+
94
+ // Streaming path: stream content chunks straight to the adapter (e.g. a Feishu
95
+ // card patched as text arrives). Falls back to collect-then-send otherwise.
96
+ const cfgStreaming = ((ctx.config as any).channels || {})[adapter.id]?.streaming !== false;
97
+ if (adapter.sendStreaming && cfgStreaming) {
98
+ let full = '';
99
+ async function* contentChunks(): AsyncGenerator<string> {
100
+ try {
101
+ for await (const ev of agent.chatStream(prompt)) {
102
+ if ((ev as any).type === 'content') { const t = (ev as any).text as string; full += t; yield t; }
103
+ }
104
+ } catch (e) {
105
+ log.warn('gateway_agent_failed', { channel: adapter.id, error: String(e) });
106
+ yield `\n[出错了] ${String(e)}`;
107
+ }
108
+ }
109
+ await adapter.sendStreaming(msg.replyTo, contentChunks());
110
+ // After streaming the text, deliver any media the agent referenced.
111
+ await deliverMedia(adapter, msg, full);
112
+ return;
113
+ }
114
+
42
115
  let text = '';
43
116
  try {
44
- for await (const ev of agent.chatStream(msg.text)) {
117
+ for await (const ev of agent.chatStream(prompt)) {
45
118
  if ((ev as any).type === 'content') text += (ev as any).text;
46
119
  }
47
120
  } catch (e) {
48
121
  log.warn('gateway_agent_failed', { channel: adapter.id, error: String(e) });
49
- return `[出错了] ${String(e)}`;
122
+ text = `[出错了] ${String(e)}`;
123
+ }
124
+ // Non-streaming: split out media so the text message is clean.
125
+ if (adapter.sendMedia) {
126
+ const parsed = parseReply(text);
127
+ await adapter.send(msg.replyTo, parsed.text || '(无回复)');
128
+ await deliverMedia(adapter, msg, text, parsed.media);
129
+ } else {
130
+ await adapter.send(msg.replyTo, text.trim() || '(无回复)');
131
+ }
132
+ }
133
+
134
+ /** Upload+send any media the agent referenced in its reply. Best-effort. */
135
+ async function deliverMedia(
136
+ adapter: ChannelAdapter,
137
+ msg: InboundMessage,
138
+ fullText: string,
139
+ pre?: ReturnType<typeof parseReply>['media'],
140
+ ): Promise<void> {
141
+ if (!adapter.sendMedia) return;
142
+ const media = pre ?? parseReply(fullText).media;
143
+ for (const item of media) {
144
+ if (!isSendableSrc(item.src)) {
145
+ log.warn('gateway_media_unsendable', { channel: adapter.id, src: item.src });
146
+ continue;
147
+ }
148
+ try {
149
+ await adapter.sendMedia(msg.replyTo, item);
150
+ } catch (e) {
151
+ log.warn('gateway_send_media_failed', { channel: adapter.id, src: item.src, error: String(e) });
152
+ }
50
153
  }
51
- return text.trim() || '(无回复)';
52
154
  }
53
155
 
54
156
  export interface GatewayOptions {
@@ -114,14 +216,8 @@ export async function startGateway(opts: GatewayOptions = {}): Promise<void> {
114
216
  // Route to an agent and deliver the reply asynchronously (after the ack).
115
217
  if (outcome.message) {
116
218
  const msg = outcome.message;
117
- void (async () => {
118
- try {
119
- const reply = await runAgent(ctx, adapter, msg);
120
- await adapter.send(msg.replyTo, reply);
121
- } catch (e) {
122
- log.warn('gateway_dispatch_failed', { channel: adapter.id, error: String(e) });
123
- }
124
- })();
219
+ void dispatch(ctx, adapter, msg).catch((e) =>
220
+ log.warn('gateway_dispatch_failed', { channel: adapter.id, error: String(e) }));
125
221
  }
126
222
  } catch (e) {
127
223
  log.warn('gateway_request_error', { error: String(e) });
@@ -5,6 +5,8 @@
5
5
  */
6
6
 
7
7
  import axios from 'axios';
8
+ import * as fs from 'fs';
9
+ import * as path from 'path';
8
10
 
9
11
  /**
10
12
  * Resolve a secret/config value. Accepts a literal string, or an env-ref object
@@ -58,6 +60,64 @@ export async function getJson(
58
60
  return res.data;
59
61
  }
60
62
 
63
+ /** A loaded binary plus its filename, ready to upload. */
64
+ export interface LoadedMedia {
65
+ data: Buffer;
66
+ filename: string;
67
+ contentType?: string;
68
+ }
69
+
70
+ /**
71
+ * Load media bytes from a local filesystem path or an http(s) URL. Local paths
72
+ * are read directly; remote URLs are fetched (capped at 30 MiB to avoid
73
+ * pulling something huge into memory). Throws if the source can't be loaded.
74
+ */
75
+ export async function loadMedia(src: string): Promise<LoadedMedia> {
76
+ if (/^https?:\/\//i.test(src)) {
77
+ const res = await axios.get(src, {
78
+ responseType: 'arraybuffer',
79
+ timeout: 30000,
80
+ maxContentLength: 30 * 1024 * 1024,
81
+ validateStatus: (s) => s >= 200 && s < 300,
82
+ });
83
+ const urlName = path.basename(new URL(src).pathname) || 'file';
84
+ const ct = res.headers['content-type'];
85
+ return {
86
+ data: Buffer.from(res.data),
87
+ filename: urlName,
88
+ contentType: typeof ct === 'string' ? ct : undefined,
89
+ };
90
+ }
91
+ const data = fs.readFileSync(src); // throws ENOENT if missing — caller handles
92
+ return { data, filename: path.basename(src) };
93
+ }
94
+
95
+ /** Is this a sendable media source (http(s) URL or an existing local file)? */
96
+ export function isSendableSrc(src: string): boolean {
97
+ if (/^https?:\/\//i.test(src)) return true;
98
+ try { return fs.existsSync(src) && fs.statSync(src).isFile(); } catch { return false; }
99
+ }
100
+
101
+ /** POST multipart/form-data (Node 18+ FormData/Blob), return parsed JSON. */
102
+ export async function postMultipart(
103
+ url: string,
104
+ fields: Record<string, string | { data: Buffer; filename: string; contentType?: string }>,
105
+ opts?: { headers?: Record<string, string>; timeoutMs?: number },
106
+ ): Promise<any> {
107
+ const form = new FormData();
108
+ for (const [k, v] of Object.entries(fields)) {
109
+ if (typeof v === 'string') form.append(k, v);
110
+ else form.append(k, new Blob([v.data], v.contentType ? { type: v.contentType } : undefined), v.filename);
111
+ }
112
+ const res = await axios.post(url, form, {
113
+ headers: { ...(opts?.headers || {}) },
114
+ timeout: opts?.timeoutMs ?? 30000,
115
+ maxBodyLength: Infinity,
116
+ validateStatus: (s) => s >= 200 && s < 300,
117
+ });
118
+ return res.data;
119
+ }
120
+
61
121
  /**
62
122
  * A small token cache: fetch an access token via `fetcher`, cache it until it
63
123
  * is near expiry, and refresh transparently. Channels (Feishu/WeCom) all need