@mininglamp-oss/cc-channel-octo 1.0.1-dev.0ac574a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/CHANGELOG.md +361 -0
  2. package/LICENSE +191 -0
  3. package/README.md +577 -0
  4. package/config.bot.example.json +15 -0
  5. package/config.example.json +33 -0
  6. package/dist/agent-bridge.d.ts +91 -0
  7. package/dist/agent-bridge.js +397 -0
  8. package/dist/agent-bridge.js.map +1 -0
  9. package/dist/cli.d.ts +109 -0
  10. package/dist/cli.js +467 -0
  11. package/dist/cli.js.map +1 -0
  12. package/dist/commands.d.ts +57 -0
  13. package/dist/commands.js +121 -0
  14. package/dist/commands.js.map +1 -0
  15. package/dist/config.d.ts +294 -0
  16. package/dist/config.js +344 -0
  17. package/dist/config.js.map +1 -0
  18. package/dist/configure.d.ts +11 -0
  19. package/dist/configure.js +106 -0
  20. package/dist/configure.js.map +1 -0
  21. package/dist/cron-evaluator.d.ts +53 -0
  22. package/dist/cron-evaluator.js +191 -0
  23. package/dist/cron-evaluator.js.map +1 -0
  24. package/dist/cron-fire-marker.d.ts +24 -0
  25. package/dist/cron-fire-marker.js +25 -0
  26. package/dist/cron-fire-marker.js.map +1 -0
  27. package/dist/cron-scheduler.d.ts +46 -0
  28. package/dist/cron-scheduler.js +114 -0
  29. package/dist/cron-scheduler.js.map +1 -0
  30. package/dist/cron-store.d.ts +62 -0
  31. package/dist/cron-store.js +63 -0
  32. package/dist/cron-store.js.map +1 -0
  33. package/dist/cron-tool.d.ts +44 -0
  34. package/dist/cron-tool.js +151 -0
  35. package/dist/cron-tool.js.map +1 -0
  36. package/dist/cwd-resolver.d.ts +72 -0
  37. package/dist/cwd-resolver.js +166 -0
  38. package/dist/cwd-resolver.js.map +1 -0
  39. package/dist/db-adapter.d.ts +21 -0
  40. package/dist/db-adapter.js +64 -0
  41. package/dist/db-adapter.js.map +1 -0
  42. package/dist/file-inline-wrap.d.ts +94 -0
  43. package/dist/file-inline-wrap.js +243 -0
  44. package/dist/file-inline-wrap.js.map +1 -0
  45. package/dist/gateway.d.ts +105 -0
  46. package/dist/gateway.js +425 -0
  47. package/dist/gateway.js.map +1 -0
  48. package/dist/group-config.d.ts +41 -0
  49. package/dist/group-config.js +104 -0
  50. package/dist/group-config.js.map +1 -0
  51. package/dist/group-context.d.ts +81 -0
  52. package/dist/group-context.js +466 -0
  53. package/dist/group-context.js.map +1 -0
  54. package/dist/inbound.d.ts +136 -0
  55. package/dist/inbound.js +667 -0
  56. package/dist/inbound.js.map +1 -0
  57. package/dist/index.d.ts +65 -0
  58. package/dist/index.js +1026 -0
  59. package/dist/index.js.map +1 -0
  60. package/dist/media-inbound.d.ts +38 -0
  61. package/dist/media-inbound.js +131 -0
  62. package/dist/media-inbound.js.map +1 -0
  63. package/dist/mention-utils.d.ts +108 -0
  64. package/dist/mention-utils.js +199 -0
  65. package/dist/mention-utils.js.map +1 -0
  66. package/dist/octo/api.d.ts +148 -0
  67. package/dist/octo/api.js +320 -0
  68. package/dist/octo/api.js.map +1 -0
  69. package/dist/octo/socket.d.ts +102 -0
  70. package/dist/octo/socket.js +793 -0
  71. package/dist/octo/socket.js.map +1 -0
  72. package/dist/octo/types.d.ts +126 -0
  73. package/dist/octo/types.js +35 -0
  74. package/dist/octo/types.js.map +1 -0
  75. package/dist/prompt-safety.d.ts +78 -0
  76. package/dist/prompt-safety.js +148 -0
  77. package/dist/prompt-safety.js.map +1 -0
  78. package/dist/session-router.d.ts +144 -0
  79. package/dist/session-router.js +490 -0
  80. package/dist/session-router.js.map +1 -0
  81. package/dist/session-store.d.ts +89 -0
  82. package/dist/session-store.js +297 -0
  83. package/dist/session-store.js.map +1 -0
  84. package/dist/skill-linker.d.ts +31 -0
  85. package/dist/skill-linker.js +160 -0
  86. package/dist/skill-linker.js.map +1 -0
  87. package/dist/stream-relay.d.ts +42 -0
  88. package/dist/stream-relay.js +243 -0
  89. package/dist/stream-relay.js.map +1 -0
  90. package/dist/url-policy.d.ts +103 -0
  91. package/dist/url-policy.js +290 -0
  92. package/dist/url-policy.js.map +1 -0
  93. package/package.json +79 -0
@@ -0,0 +1,667 @@
1
+ /**
2
+ * Inbound message resolver — converts BotMessage payload into LLM-friendly text.
3
+ *
4
+ * Each MessageType is rendered as either:
5
+ * - plain text (for Text)
6
+ * - text + media URL marker (for Image/GIF/Voice/Video/File)
7
+ * - structured placeholder (for Location/Card)
8
+ * - recursively expanded (for MultipleForward)
9
+ * - text + image URLs (for RichText)
10
+ *
11
+ * For text-extension files (.py/.ts/.md/.json etc.) the contents are inlined
12
+ * up to a small byte budget (G2) so the agent can actually answer questions
13
+ * about the file rather than just see its URL.
14
+ */
15
+ import { createWriteStream, statSync } from 'node:fs';
16
+ import { mkdir, unlink, readdir, stat } from 'node:fs/promises';
17
+ import { join } from 'node:path';
18
+ import { randomUUID } from 'node:crypto';
19
+ import { MessageType, RICH_TEXT_BLOCK_IMAGE, RICH_TEXT_BLOCK_TEXT, RICH_TEXT_IMAGE_PLACEHOLDER } from './octo/types.js';
20
+ import { truncateUtf8ByBytes } from './file-inline-wrap.js';
21
+ import { assertPublicUrl, fetchWithRedirectGuard } from './url-policy.js';
22
+ import { sanitizeDisplayName, sanitizePromptBody } from './prompt-safety.js';
23
+ /**
24
+ * S1 helper: same-host check for credential scoping.
25
+ * Returns true only when both URLs parse successfully and have matching host
26
+ * (case-insensitive). Falsy or malformed inputs return false (fail-closed).
27
+ */
28
+ export function isSameHost(url, apiUrl) {
29
+ try {
30
+ return new URL(url).host.toLowerCase() === new URL(apiUrl).host.toLowerCase();
31
+ }
32
+ catch {
33
+ return false;
34
+ }
35
+ }
36
+ // ─── Configuration ─────────────────────────────────────────────────────────
37
+ /** Extensions we will try to inline (text-like content). */
38
+ export const TEXT_FILE_EXTENSIONS = new Set([
39
+ 'txt', 'md', 'csv', 'json', 'xml', 'yaml', 'yml',
40
+ 'log', 'py', 'js', 'ts', 'tsx', 'jsx', 'mjs', 'cjs',
41
+ 'go', 'java', 'rs', 'c', 'h', 'cpp', 'hpp', 'cs', 'rb', 'php',
42
+ 'html', 'htm', 'css', 'scss', 'sass', 'less',
43
+ 'sh', 'bash', 'zsh', 'fish', 'ps1',
44
+ 'toml', 'ini', 'conf', 'cfg', 'env',
45
+ 'sql', 'graphql', 'gql', 'proto',
46
+ ]);
47
+ /** Maximum bytes to inline a text file in the LLM prompt (G2). */
48
+ export const INLINE_FILE_MAX_BYTES = 20 * 1024;
49
+ // ─── RichText / MultipleForward input budgets (C1 / Stage 6) ─────────────────────
50
+ //
51
+ // These caps apply per-payload at parse time. They are independent of — and
52
+ // strictly tighter than — the system-prompt-wide 100 KiB cap in agent-bridge
53
+ // (D1, PR#39). Goal: stop a single malicious payload from spending the
54
+ // entire system-prompt budget or triggering OOM during parsing.
55
+ /** Maximum blocks parsed from a RichText payload. */
56
+ export const RICH_TEXT_MAX_BLOCKS = 50;
57
+ /** Maximum image URLs extracted from a RichText payload. */
58
+ export const RICH_TEXT_MAX_MEDIA_URLS = 20;
59
+ /** Maximum bytes of rendered text from a single RichText payload (matches Text gate). */
60
+ export const RICH_TEXT_MAX_OUTPUT_BYTES = 32 * 1024;
61
+ /** Maximum recursion depth for MultipleForward expansion. */
62
+ export const MULTIPLE_FORWARD_MAX_DEPTH = 3;
63
+ /** Maximum number of inner messages rendered per MultipleForward level. */
64
+ export const MULTIPLE_FORWARD_MAX_MESSAGES = 50;
65
+ /** Maximum bytes of rendered transcript from a single MultipleForward payload. */
66
+ export const MULTIPLE_FORWARD_MAX_OUTPUT_BYTES = 8 * 1024;
67
+ /** Maximum bytes to download for any text file (inline or temp). */
68
+ const MAX_FILE_DOWNLOAD_BYTES = 5 * 1024 * 1024; // 5 MB
69
+ /** HTTP timeout for file download. */
70
+ const FILE_DOWNLOAD_TIMEOUT_MS = 30_000;
71
+ /** Temp directory for non-inlinable downloads. */
72
+ const TEMP_DIR = join('/tmp', 'cc-channel-octo', 'inbound-files');
73
+ // ─── URL helpers ───────────────────────────────────────────────────────────
74
+ /**
75
+ * Resolve a relative storage path against the bot API base.
76
+ *
77
+ * S1 + P1.2 (Stage 6): Hardened against absolute-URL smuggling and path
78
+ * traversal:
79
+ * - Reject scheme-relative URLs (`//attacker.com/...`)
80
+ * - Reject path-traversal segments (`..`, `.`)
81
+ * - Reject backslash injection
82
+ * - For absolute http(s) URLs: only allow when host matches apiUrl host.
83
+ * This is the chokepoint that prevents an attacker-controlled
84
+ * payload.url from later being fetched with the bot's Authorization
85
+ * header (which would leak botToken to the attacker's server).
86
+ */
87
+ export function buildMediaUrl(relUrl, apiUrl, cdnHost) {
88
+ if (!relUrl)
89
+ return undefined;
90
+ // Reject backslashes outright — they're not valid in URL paths and are a
91
+ // known Windows-style traversal vector when normalized.
92
+ if (relUrl.includes('\\'))
93
+ return undefined;
94
+ // Reject scheme-relative URLs (`//attacker.com/path`).
95
+ if (relUrl.startsWith('//'))
96
+ return undefined;
97
+ // Absolute http(s) URL — allow when the host matches the apiUrl host OR the
98
+ // configured/STS-derived CDN host. Octo serves media from a SEPARATE CDN
99
+ // (e.g. cdn.deepminer.com.cn) distinct from the API host, so a strict
100
+ // same-host check silently dropped every real image URL (#86). The download
101
+ // path still SSRF-checks (assertPublicUrl) and scopes the bot token per hop,
102
+ // so an allowed host is necessary but not sufficient to leak the token.
103
+ if (relUrl.startsWith('http://') || relUrl.startsWith('https://')) {
104
+ if (!apiUrl)
105
+ return undefined;
106
+ try {
107
+ const target = new URL(relUrl);
108
+ const targetHost = target.host.toLowerCase();
109
+ const base = new URL(apiUrl);
110
+ const allowedHosts = new Set([base.host.toLowerCase()]);
111
+ if (cdnHost)
112
+ allowedHosts.add(cdnHost.toLowerCase());
113
+ if (!allowedHosts.has(targetHost))
114
+ return undefined;
115
+ // Only allow http(s); the same-host protocol-downgrade check still applies
116
+ // to the apiUrl host (a CDN may legitimately use https regardless).
117
+ if (targetHost === base.host.toLowerCase() && target.protocol !== base.protocol)
118
+ return undefined;
119
+ if (target.protocol !== 'http:' && target.protocol !== 'https:')
120
+ return undefined;
121
+ return relUrl;
122
+ }
123
+ catch {
124
+ return undefined;
125
+ }
126
+ }
127
+ // Relative path — strip /file/ or /file/preview/ prefix then enforce no traversal.
128
+ //
129
+ // S4 follow-up (PR#38 round-3, Yujiawei + 李飞飞): the literal `..`/`.` check
130
+ // was bypassable via percent-encoded dot-segments (`%2e%2e`, `%2E.`, `.%2e`,
131
+ // etc.). WHATWG URL parser decodes `%2e` for dot-segment normalization, so
132
+ // `<apiHost>/file/%2e%2e/internal/secret.env` normalizes to
133
+ // `<apiHost>/internal/secret.env`, escaping the `/file/` sandbox. Combined
134
+ // with the same-host Authorization scoping, this exfiltrates internal
135
+ // authenticated paths using the bot's botToken.
136
+ //
137
+ // Fix: after assembling the candidate URL, parse via WHATWG `new URL()` and
138
+ // assert the normalized pathname is still under `/file/`. This catches ALL
139
+ // encoded-dot variants (lower/upper hex, mixed `%2e.`, raw `..`) in one
140
+ // check, no matter how attacker spells them.
141
+ let storagePath = relUrl;
142
+ if (storagePath.startsWith('file/preview/')) {
143
+ storagePath = storagePath.substring('file/preview/'.length);
144
+ }
145
+ else if (storagePath.startsWith('file/')) {
146
+ storagePath = storagePath.substring('file/'.length);
147
+ }
148
+ // Cheap literal pre-check still useful as defense-in-depth.
149
+ const segments = storagePath.split('/');
150
+ for (const seg of segments) {
151
+ if (seg === '..' || seg === '.')
152
+ return undefined;
153
+ }
154
+ if (storagePath.startsWith('/'))
155
+ return undefined;
156
+ // C1 follow-up (项 #3): encoded slash `%2F` defense-in-depth.
157
+ //
158
+ // WHATWG URL parser does NOT decode `%2F` in pathname (it stays literal in
159
+ // the path segment), so the WHATWG-canonical sandbox check below would
160
+ // accept `..%2f..%2finternal`. That's spec-correct — the bytes sent are
161
+ // /file/..%2f..%2finternal, a single path component under /file/.
162
+ //
163
+ // HOWEVER, some HTTP servers / proxies / CDNs (Apache with
164
+ // `AllowEncodedSlashes On`, certain reverse proxies) decode `%2F` as `/`
165
+ // server-side and THEN resolve dot-segments, escaping the sandbox.
166
+ //
167
+ // Production storage paths never contain `%2F` (legitimate filenames
168
+ // don't either — they'd be encoded as `%252F` at most). Cheap to reject
169
+ // outright as defense-in-depth against server-side decoding variance.
170
+ if (storagePath.includes('%2f') || storagePath.includes('%2F')) {
171
+ return undefined;
172
+ }
173
+ const baseUrl = apiUrl?.replace(/\/+$/, '') ?? '';
174
+ const candidate = `${baseUrl}/file/${storagePath}`;
175
+ // WHATWG-canonical sandbox check: after URL normalization, pathname must
176
+ // still start with `/file/`. If `%2e%2e` (or any other encoded-dot variant)
177
+ // would have escaped the prefix, normalize collapses it and we reject here.
178
+ try {
179
+ const normalized = new URL(candidate);
180
+ if (!normalized.pathname.startsWith('/file/'))
181
+ return undefined;
182
+ }
183
+ catch {
184
+ return undefined;
185
+ }
186
+ return candidate;
187
+ }
188
+ // ─── RichText (type=14) expansion ─────────────────────────────────────────
189
+ function normalizeRichTextBlocks(content) {
190
+ if (Array.isArray(content)) {
191
+ // C1 / P1.4: cap blocks parsed per payload. A malicious sender could send
192
+ // 10k blocks of empty text to spend parser CPU + downstream budget.
193
+ return content
194
+ .filter((b) => !!b && typeof b === 'object')
195
+ .slice(0, RICH_TEXT_MAX_BLOCKS);
196
+ }
197
+ if (typeof content === 'string' && content) {
198
+ return [{ type: RICH_TEXT_BLOCK_TEXT, text: content }];
199
+ }
200
+ return [];
201
+ }
202
+ /**
203
+ * Truncate a string to at most `maxBytes` UTF-8 bytes, appending a marker.
204
+ *
205
+ * Delegates to `truncateUtf8ByBytes` (file-inline-wrap.ts) which uses an O(1)
206
+ * walk-back algorithm — fixes the previous O(n) per-char while loop that was
207
+ * quadratic for very long CJK input (齐静春 PR#41 non-blocking review note).
208
+ */
209
+ function truncateByBytes(input, maxBytes, marker) {
210
+ const { truncated: shortened, wasTruncated } = truncateUtf8ByBytes(input, maxBytes);
211
+ return wasTruncated ? { text: shortened + marker, truncated: true } : { text: shortened, truncated: false };
212
+ }
213
+ /**
214
+ * Coerce a user-supplied coordinate to a finite number, or null. Accepts only a
215
+ * real number or a numeric string — rejects null/undefined/objects/booleans so
216
+ * `Number(null)===0` can't render a bogus `0` (and a non-numeric string can't
217
+ * forge a label).
218
+ */
219
+ function toFiniteCoord(v) {
220
+ if (typeof v === 'number')
221
+ return Number.isFinite(v) ? v : null;
222
+ if (typeof v === 'string' && v.trim() !== '') {
223
+ const n = Number(v);
224
+ return Number.isFinite(n) ? n : null;
225
+ }
226
+ return null;
227
+ }
228
+ function buildRichTextPlain(blocks) {
229
+ let out = '';
230
+ for (const blk of blocks) {
231
+ if (blk.type === RICH_TEXT_BLOCK_IMAGE) {
232
+ out += RICH_TEXT_IMAGE_PLACEHOLDER;
233
+ }
234
+ else if (blk.type === RICH_TEXT_BLOCK_TEXT) {
235
+ // Guard against non-string text (would render as "[object Object]")
236
+ out += typeof blk.text === 'string' ? blk.text : '';
237
+ }
238
+ else if (typeof blk.text === 'string' && blk.text) {
239
+ out += blk.text;
240
+ }
241
+ }
242
+ return out;
243
+ }
244
+ /**
245
+ * Expand a RichText (type=14) payload into `{ text, mediaUrls[] }`.
246
+ *
247
+ * Mirrors upstream's MultipleForward expansion pattern:
248
+ * - text: prefer top-level `plain` (server-authoritative); else assemble
249
+ * from content blocks (text → text, image → `[图片]` placeholder)
250
+ * - mediaUrls: collect all image-block `url` (sanitized for string type)
251
+ *
252
+ * C1 / P1.4 (Stage 6): output text is truncated to RICH_TEXT_MAX_OUTPUT_BYTES
253
+ * (32 KiB — matches the Text payload gate in session-router) and mediaUrls is
254
+ * capped at RICH_TEXT_MAX_MEDIA_URLS to prevent prompt-budget exhaustion.
255
+ */
256
+ export function resolveRichTextContent(payload, apiUrl, cdnHost) {
257
+ const blocks = normalizeRichTextBlocks(payload?.content);
258
+ const mediaUrls = [];
259
+ for (const blk of blocks) {
260
+ // Defensive: only collect string URLs so a malformed `{url: {}}` cannot
261
+ // crash buildMediaUrl downstream.
262
+ if (blk.type === RICH_TEXT_BLOCK_IMAGE && typeof blk.url === 'string' && blk.url) {
263
+ const full = buildMediaUrl(blk.url, apiUrl, cdnHost);
264
+ if (full)
265
+ mediaUrls.push(full);
266
+ if (mediaUrls.length >= RICH_TEXT_MAX_MEDIA_URLS)
267
+ break;
268
+ }
269
+ }
270
+ const topPlain = typeof payload?.plain === 'string' ? payload.plain : '';
271
+ const rawText = topPlain.trim() !== '' ? topPlain : buildRichTextPlain(blocks);
272
+ const { text } = truncateByBytes(rawText, RICH_TEXT_MAX_OUTPUT_BYTES, '\n[RichText truncated]');
273
+ return { text, mediaUrls };
274
+ }
275
+ // ─── Inner message rendering (MultipleForward children) ──────────────────
276
+ function resolveInnerMessageText(payload, apiUrl, cdnHost) {
277
+ if (!payload)
278
+ return '';
279
+ const fullUrl = buildMediaUrl(payload.url, apiUrl, cdnHost);
280
+ switch (payload.type) {
281
+ case MessageType.Text:
282
+ return payload.content ?? '';
283
+ case MessageType.Image:
284
+ return fullUrl ? `[图片]\n${fullUrl}` : '[图片]';
285
+ case MessageType.GIF:
286
+ return fullUrl ? `[GIF]\n${fullUrl}` : '[GIF]';
287
+ case MessageType.Voice:
288
+ return fullUrl ? `[语音]\n${fullUrl}` : '[语音]';
289
+ case MessageType.Video:
290
+ return fullUrl ? `[视频]\n${fullUrl}` : '[视频]';
291
+ case MessageType.Location:
292
+ return '[位置信息]';
293
+ case MessageType.Card:
294
+ return '[名片]';
295
+ case MessageType.File: {
296
+ // SECURITY: payload.name is user-controlled and lands inside a `[文件: …]`
297
+ // label; sanitizeDisplayName strips bracket/newline breakout chars so it
298
+ // can't forge a section marker or fake role label (prompt injection).
299
+ const safeName = payload.name ? sanitizeDisplayName(payload.name) : '';
300
+ const label = safeName ? `[文件: ${safeName}]` : '[文件]';
301
+ return fullUrl ? `${label}\n${fullUrl}` : label;
302
+ }
303
+ case MessageType.MultipleForward:
304
+ return '[合并转发]';
305
+ case MessageType.RichText: {
306
+ const rt = resolveRichTextContent(payload, apiUrl, cdnHost);
307
+ return rt.text || '[图文消息]';
308
+ }
309
+ default:
310
+ return payload.content ?? '[消息]';
311
+ }
312
+ }
313
+ /**
314
+ * Expand a MultipleForward payload into a readable transcript.
315
+ *
316
+ * C1 / P1.3 (Stage 6): bounded by three caps to prevent DoS via deeply nested
317
+ * or massive forwarded payloads:
318
+ * - depth ≤ MULTIPLE_FORWARD_MAX_DEPTH (default 3) — stack-safe
319
+ * - msgs ≤ MULTIPLE_FORWARD_MAX_MESSAGES per level — CPU bound
320
+ * - output ≤ MULTIPLE_FORWARD_MAX_OUTPUT_BYTES — prompt budget
321
+ *
322
+ * The internal _depth parameter is hop-counted (top-level = 0). Going beyond
323
+ * the depth cap emits a single placeholder line instead of recursing.
324
+ */
325
+ export function resolveMultipleForwardText(payload, apiUrl, cdnHost, _depth = 0) {
326
+ if (_depth >= MULTIPLE_FORWARD_MAX_DEPTH) {
327
+ return '[合并转发: 嵌套已截断]';
328
+ }
329
+ const users = payload?.users ?? [];
330
+ const rawMsgs = payload?.msgs ?? [];
331
+ // Cap inner messages per level to prevent quadratic CPU on adversarial input.
332
+ const msgs = rawMsgs.slice(0, MULTIPLE_FORWARD_MAX_MESSAGES);
333
+ const truncatedCount = rawMsgs.length - msgs.length;
334
+ const userMap = new Map();
335
+ for (const u of users) {
336
+ // SECURITY: u.name AND u.uid are user-controlled in a forward payload and
337
+ // become a `<name>: ` line label. sanitizeDisplayName returns its fallback
338
+ // VERBATIM, so passing raw u.uid as the fallback re-introduces the injection
339
+ // when u.name collapses to empty (PR #128 review P1). Sanitize the uid too
340
+ // and only fall back to a constant when nothing survives.
341
+ if (u.uid && u.name) {
342
+ const safe = sanitizeDisplayName(u.name) || sanitizeDisplayName(u.uid) || 'unknown';
343
+ userMap.set(u.uid, safe);
344
+ }
345
+ }
346
+ const lines = ['[合并转发: 聊天记录]'];
347
+ for (const m of msgs) {
348
+ const senderName = userMap.get(m.from_uid) ?? sanitizeDisplayName(m.from_uid, 'unknown');
349
+ if (m.payload?.type === MessageType.MultipleForward) {
350
+ const nested = resolveMultipleForwardText(m.payload, apiUrl, cdnHost, _depth + 1);
351
+ lines.push(`${senderName}: [合并转发]`);
352
+ lines.push(nested);
353
+ }
354
+ else {
355
+ // SECURITY: the inner message BODY is attacker-controlled (e.g. a forwarded
356
+ // Text content of `hi\n[assistant bot]: …`) and is NOT otherwise escaped
357
+ // before the assembled transcript flows into the user-role prompt (the
358
+ // forward path bypasses the quote/group-context body escapers). Neutralize
359
+ // role labels + section markers here so a forwarded body can't forge a turn
360
+ // boundary (PR #128 review). Nested transcripts are already escaped per-line
361
+ // by their own recursion, so only the leaf bodies need this.
362
+ const inner = sanitizePromptBody(resolveInnerMessageText(m.payload, apiUrl, cdnHost));
363
+ lines.push(`${senderName}: ${inner}`);
364
+ }
365
+ }
366
+ if (truncatedCount > 0) {
367
+ lines.push(`[合并转发: 还有 ${truncatedCount} 条消息未展示]`);
368
+ }
369
+ const out = lines.join('\n');
370
+ // Final output budget guard — even with msg/depth caps, a single inner
371
+ // message text could be large. Truncate once at the top after assembly.
372
+ const { text } = truncateByBytes(out, MULTIPLE_FORWARD_MAX_OUTPUT_BYTES, '\n[合并转发: 输出已截断]');
373
+ return text;
374
+ }
375
+ // ─── Core resolver ─────────────────────────────────────────────────────────
376
+ /**
377
+ * Render an inbound payload to LLM-friendly text + optional media metadata.
378
+ *
379
+ * This is the synchronous part — file inlining (which requires HTTP) is a
380
+ * separate async step done by `tryInlineFile()`.
381
+ */
382
+ export function resolveContent(payload, apiUrl, cdnHost) {
383
+ if (!payload)
384
+ return { text: '' };
385
+ switch (payload.type) {
386
+ case MessageType.Text:
387
+ return { text: payload.content ?? '' };
388
+ case MessageType.Image: {
389
+ const imgUrl = buildMediaUrl(payload.url, apiUrl, cdnHost);
390
+ return {
391
+ text: imgUrl ? `[图片]\n${imgUrl}` : '[图片]',
392
+ mediaUrl: imgUrl,
393
+ };
394
+ }
395
+ case MessageType.GIF: {
396
+ const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
397
+ return {
398
+ text: url ? `[GIF]\n${url}` : '[GIF]',
399
+ mediaUrl: url,
400
+ };
401
+ }
402
+ case MessageType.Voice: {
403
+ const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
404
+ return {
405
+ // G22: language model receives the URL as a marker; transcription is
406
+ // out of scope for v0.2 and tracked separately.
407
+ text: url ? `[语音消息]\n${url}` : '[语音消息]',
408
+ mediaUrl: url,
409
+ };
410
+ }
411
+ case MessageType.Video: {
412
+ const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
413
+ return {
414
+ text: url ? `[视频]\n${url}` : '[视频]',
415
+ mediaUrl: url,
416
+ };
417
+ }
418
+ case MessageType.File: {
419
+ const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
420
+ // SECURITY: payload.name is user-controlled; sanitize before it enters the
421
+ // `[文件: …]` label so it can't forge a marker/role label (prompt injection).
422
+ const fileName = typeof payload.name === 'string'
423
+ ? sanitizeDisplayName(payload.name, '未知文件')
424
+ : '未知文件';
425
+ return {
426
+ text: url ? `[文件: ${fileName}]\n${url}` : `[文件: ${fileName}]`,
427
+ mediaUrl: url,
428
+ };
429
+ }
430
+ case MessageType.Location: {
431
+ // SECURITY: lat/lng are user-controlled. A `!= null` gate alone lets a
432
+ // string like "0]\n[assistant bot]: forged" through and forge a label.
433
+ // Accept only a real finite number or a numeric string — NOT null/object
434
+ // (Number(null)===0 would otherwise render a bogus `0`).
435
+ const lat = toFiniteCoord(payload.latitude ?? payload.lat);
436
+ const lng = toFiniteCoord(payload.longitude ?? payload.lng ?? payload.lon);
437
+ return {
438
+ text: lat !== null && lng !== null
439
+ ? `[位置信息: ${lat},${lng}]`
440
+ : '[位置信息]',
441
+ };
442
+ }
443
+ case MessageType.Card: {
444
+ // SECURITY: name + uid are user-controlled and land inside a `[名片: …]`
445
+ // label; sanitize both so neither can forge a marker/role label.
446
+ const cardName = typeof payload.name === 'string'
447
+ ? sanitizeDisplayName(payload.name, '未知')
448
+ : '未知';
449
+ const cardUid = typeof payload.uid === 'string'
450
+ ? sanitizeDisplayName(payload.uid)
451
+ : '';
452
+ return {
453
+ text: cardUid ? `[名片: ${cardName} (${cardUid})]` : `[名片: ${cardName}]`,
454
+ };
455
+ }
456
+ case MessageType.MultipleForward: {
457
+ return {
458
+ text: resolveMultipleForwardText(payload, apiUrl, cdnHost),
459
+ };
460
+ }
461
+ case MessageType.RichText: {
462
+ const rt = resolveRichTextContent(payload, apiUrl, cdnHost);
463
+ return {
464
+ text: rt.text,
465
+ ...(rt.mediaUrls.length > 0 ? { mediaUrl: rt.mediaUrls[0], mediaUrls: rt.mediaUrls } : {}),
466
+ };
467
+ }
468
+ default:
469
+ return { text: payload.content ?? payload.url ?? '[消息]' };
470
+ }
471
+ }
472
+ /** Placeholder text used when reconstructing a historical (API-backfilled) message. */
473
+ export function resolveHistoricalMessagePlaceholder(type, name) {
474
+ switch (type) {
475
+ case MessageType.Image: return '[图片]';
476
+ case MessageType.GIF: return '[GIF]';
477
+ case MessageType.Voice: return '[语音消息]';
478
+ case MessageType.Video: return '[视频]';
479
+ case MessageType.File: return `[文件: ${name ? sanitizeDisplayName(name, '未知文件') : '未知文件'}]`;
480
+ case MessageType.Location: return '[位置信息]';
481
+ case MessageType.Card: return '[名片]';
482
+ case MessageType.MultipleForward: return '[合并转发]';
483
+ case MessageType.RichText: return '[图文消息]';
484
+ case MessageType.Text:
485
+ default:
486
+ return '';
487
+ }
488
+ }
489
+ // ─── File inlining (G2) ────────────────────────────────────────────────────
490
+ /** Best-effort cleanup of temp files older than 1 hour. */
491
+ async function cleanupOldTempFiles() {
492
+ try {
493
+ const entries = await readdir(TEMP_DIR);
494
+ const cutoff = Date.now() - 60 * 60 * 1000;
495
+ for (const entry of entries) {
496
+ try {
497
+ const filePath = join(TEMP_DIR, entry);
498
+ const info = await stat(filePath);
499
+ if (info.mtimeMs < cutoff)
500
+ await unlink(filePath);
501
+ }
502
+ catch {
503
+ /* best effort */
504
+ }
505
+ }
506
+ }
507
+ catch {
508
+ /* best effort */
509
+ }
510
+ }
511
+ function extractExtension(url, fallbackName) {
512
+ try {
513
+ const pathname = new URL(url).pathname;
514
+ const ext = pathname.split('.').pop()?.toLowerCase() ?? '';
515
+ if (ext && ext.length <= 8)
516
+ return ext;
517
+ }
518
+ catch {
519
+ /* fall through */
520
+ }
521
+ return fallbackName?.split('.').pop()?.toLowerCase() ?? '';
522
+ }
523
+ /**
524
+ * Attempt to inline a text file's contents, or download it to a temp path.
525
+ *
526
+ * Returns:
527
+ * - `{ inlined: text }` when the file is text-extension and under the inline cap
528
+ * - `{ tempPath }` when the file is text-extension but exceeds the cap
529
+ * (downloaded to disk so the agent can read it)
530
+ * - `{ description }` when the file isn't text or download failed
531
+ */
532
+ export async function tryResolveFile(params) {
533
+ const { url, botToken, apiUrl, filename, knownSize } = params;
534
+ const ext = extractExtension(url, filename);
535
+ if (!TEXT_FILE_EXTENSIONS.has(ext)) {
536
+ // Non-text — surface size info if known
537
+ return {
538
+ description: knownSize != null
539
+ ? `[文件: ${filename} (${formatBytes(knownSize)})]`
540
+ : `[文件: ${filename}]`,
541
+ };
542
+ }
543
+ // Skip download if known to exceed hard cap
544
+ if (knownSize != null && knownSize > MAX_FILE_DOWNLOAD_BYTES) {
545
+ return { description: `[文件: ${filename} (${formatBytes(knownSize)}) - 超过下载上限 ${formatBytes(MAX_FILE_DOWNLOAD_BYTES)}]` };
546
+ }
547
+ // S1: SSRF defense — reject private/loopback/link-local addresses.
548
+ try {
549
+ await assertPublicUrl(url);
550
+ }
551
+ catch (err) {
552
+ return { description: `[文件: ${filename} - 拒绝下载: ${String(err)}]` };
553
+ }
554
+ // S1 (re-review fix): scope Authorization PER HOP, not statically.
555
+ // The previous implementation set the header once based on the initial URL
556
+ // and then `fetchWithRedirectGuard` reused the same init across redirects —
557
+ // meaning a same-host initial URL that 302'd to attacker.com would still
558
+ // ship the Authorization header to the attacker. We now pass a perHopInit
559
+ // callback so the header is recomputed per hop and dropped whenever the
560
+ // current hop's host differs from apiUrl host.
561
+ const signal = AbortSignal.timeout(FILE_DOWNLOAD_TIMEOUT_MS);
562
+ // Download with streaming + size guard. fetchWithRedirectGuard
563
+ // re-validates SSRF on every redirect hop (S2) AND now re-decides the
564
+ // Authorization header per hop (S1 follow-up).
565
+ try {
566
+ const resp = await fetchWithRedirectGuard(url, (currentUrl) => {
567
+ const headers = {};
568
+ if (isSameHost(currentUrl, apiUrl)) {
569
+ headers.Authorization = `Bearer ${botToken}`;
570
+ }
571
+ return { headers, signal };
572
+ });
573
+ if (!resp.ok) {
574
+ return { description: `[文件: ${filename} - 下载失败 HTTP ${resp.status}]` };
575
+ }
576
+ const body = resp.body;
577
+ if (!body) {
578
+ return { description: `[文件: ${filename} - 响应无内容]` };
579
+ }
580
+ // Inline path: read up to INLINE_FILE_MAX_BYTES, fall through to temp on overflow
581
+ const reader = body.getReader();
582
+ const inlineChunks = [];
583
+ let inlineBytes = 0;
584
+ let exceededInline = false;
585
+ while (true) {
586
+ const { done, value } = await reader.read();
587
+ if (done)
588
+ break;
589
+ inlineBytes += value.byteLength;
590
+ if (inlineBytes > INLINE_FILE_MAX_BYTES) {
591
+ exceededInline = true;
592
+ // Continue draining for the temp path
593
+ inlineChunks.push(value);
594
+ if (inlineBytes > MAX_FILE_DOWNLOAD_BYTES) {
595
+ try {
596
+ reader.cancel();
597
+ }
598
+ catch { /* ignore */ }
599
+ return { description: `[文件: ${filename} (${formatBytes(inlineBytes)}) - 超过下载上限]` };
600
+ }
601
+ // Drain rest into chunks for temp write
602
+ break;
603
+ }
604
+ inlineChunks.push(value);
605
+ }
606
+ if (!exceededInline) {
607
+ // Inline the content
608
+ const buf = Buffer.concat(inlineChunks.map((c) => Buffer.from(c)));
609
+ return { inlined: buf.toString('utf-8') };
610
+ }
611
+ // Drain remaining body into the temp file
612
+ await mkdir(TEMP_DIR, { recursive: true });
613
+ cleanupOldTempFiles().catch(() => { });
614
+ const safeName = filename.replace(/[^a-zA-Z0-9._-]/g, '_') || 'file';
615
+ const tempPath = join(TEMP_DIR, `${randomUUID()}-${safeName}`);
616
+ const ws = createWriteStream(tempPath);
617
+ try {
618
+ for (const chunk of inlineChunks) {
619
+ if (!ws.write(chunk))
620
+ await new Promise((r) => ws.once('drain', r));
621
+ }
622
+ let totalBytes = inlineBytes;
623
+ while (true) {
624
+ const { done, value } = await reader.read();
625
+ if (done)
626
+ break;
627
+ totalBytes += value.byteLength;
628
+ if (totalBytes > MAX_FILE_DOWNLOAD_BYTES) {
629
+ try {
630
+ reader.cancel();
631
+ }
632
+ catch { /* ignore */ }
633
+ ws.destroy();
634
+ await unlink(tempPath).catch(() => { });
635
+ return { description: `[文件: ${filename} (${formatBytes(totalBytes)}) - 超过下载上限]` };
636
+ }
637
+ if (!ws.write(value))
638
+ await new Promise((r) => ws.once('drain', r));
639
+ }
640
+ ws.end();
641
+ await new Promise((resolve, reject) => {
642
+ ws.on('finish', () => resolve());
643
+ ws.on('error', reject);
644
+ });
645
+ const sizeInfo = statSync(tempPath).size;
646
+ return { tempPath, ...{ description: `[文件: ${filename} (${formatBytes(sizeInfo)}) - 已下载到 ${tempPath}]` } };
647
+ }
648
+ catch (err) {
649
+ ws.destroy();
650
+ await unlink(tempPath).catch(() => { });
651
+ return { description: `[文件: ${filename} - 下载错误: ${String(err)}]` };
652
+ }
653
+ }
654
+ catch (err) {
655
+ return { description: `[文件: ${filename} - ${String(err).includes('TimeoutError') ? '下载超时' : '网络错误'}]` };
656
+ }
657
+ }
658
+ function formatBytes(n) {
659
+ if (n < 1024)
660
+ return `${n}B`;
661
+ if (n < 1024 * 1024)
662
+ return `${(n / 1024).toFixed(1)}KB`;
663
+ if (n < 1024 * 1024 * 1024)
664
+ return `${(n / (1024 * 1024)).toFixed(1)}MB`;
665
+ return `${(n / (1024 * 1024 * 1024)).toFixed(1)}GB`;
666
+ }
667
+ //# sourceMappingURL=inbound.js.map