@mininglamp-oss/cc-channel-octo 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +349 -0
- package/LICENSE +191 -0
- package/README.md +577 -0
- package/config.bot.example.json +15 -0
- package/config.example.json +33 -0
- package/dist/agent-bridge.d.ts +79 -0
- package/dist/agent-bridge.js +392 -0
- package/dist/agent-bridge.js.map +1 -0
- package/dist/commands.d.ts +57 -0
- package/dist/commands.js +121 -0
- package/dist/commands.js.map +1 -0
- package/dist/config.d.ts +278 -0
- package/dist/config.js +330 -0
- package/dist/config.js.map +1 -0
- package/dist/cron-evaluator.d.ts +53 -0
- package/dist/cron-evaluator.js +191 -0
- package/dist/cron-evaluator.js.map +1 -0
- package/dist/cron-fire-marker.d.ts +24 -0
- package/dist/cron-fire-marker.js +25 -0
- package/dist/cron-fire-marker.js.map +1 -0
- package/dist/cron-scheduler.d.ts +46 -0
- package/dist/cron-scheduler.js +114 -0
- package/dist/cron-scheduler.js.map +1 -0
- package/dist/cron-store.d.ts +62 -0
- package/dist/cron-store.js +63 -0
- package/dist/cron-store.js.map +1 -0
- package/dist/cron-tool.d.ts +44 -0
- package/dist/cron-tool.js +151 -0
- package/dist/cron-tool.js.map +1 -0
- package/dist/cwd-resolver.d.ts +72 -0
- package/dist/cwd-resolver.js +166 -0
- package/dist/cwd-resolver.js.map +1 -0
- package/dist/db-adapter.d.ts +21 -0
- package/dist/db-adapter.js +64 -0
- package/dist/db-adapter.js.map +1 -0
- package/dist/file-inline-wrap.d.ts +94 -0
- package/dist/file-inline-wrap.js +243 -0
- package/dist/file-inline-wrap.js.map +1 -0
- package/dist/gateway.d.ts +100 -0
- package/dist/gateway.js +420 -0
- package/dist/gateway.js.map +1 -0
- package/dist/group-config.d.ts +41 -0
- package/dist/group-config.js +104 -0
- package/dist/group-config.js.map +1 -0
- package/dist/group-context.d.ts +64 -0
- package/dist/group-context.js +396 -0
- package/dist/group-context.js.map +1 -0
- package/dist/inbound.d.ts +136 -0
- package/dist/inbound.js +667 -0
- package/dist/inbound.js.map +1 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.js +922 -0
- package/dist/index.js.map +1 -0
- package/dist/media-inbound.d.ts +38 -0
- package/dist/media-inbound.js +131 -0
- package/dist/media-inbound.js.map +1 -0
- package/dist/mention-utils.d.ts +99 -0
- package/dist/mention-utils.js +185 -0
- package/dist/mention-utils.js.map +1 -0
- package/dist/octo/api.d.ts +148 -0
- package/dist/octo/api.js +320 -0
- package/dist/octo/api.js.map +1 -0
- package/dist/octo/socket.d.ts +102 -0
- package/dist/octo/socket.js +793 -0
- package/dist/octo/socket.js.map +1 -0
- package/dist/octo/types.d.ts +126 -0
- package/dist/octo/types.js +35 -0
- package/dist/octo/types.js.map +1 -0
- package/dist/prompt-safety.d.ts +78 -0
- package/dist/prompt-safety.js +148 -0
- package/dist/prompt-safety.js.map +1 -0
- package/dist/session-router.d.ts +127 -0
- package/dist/session-router.js +432 -0
- package/dist/session-router.js.map +1 -0
- package/dist/session-store.d.ts +89 -0
- package/dist/session-store.js +297 -0
- package/dist/session-store.js.map +1 -0
- package/dist/skill-linker.d.ts +31 -0
- package/dist/skill-linker.js +160 -0
- package/dist/skill-linker.js.map +1 -0
- package/dist/stream-relay.d.ts +42 -0
- package/dist/stream-relay.js +243 -0
- package/dist/stream-relay.js.map +1 -0
- package/dist/url-policy.d.ts +103 -0
- package/dist/url-policy.js +290 -0
- package/dist/url-policy.js.map +1 -0
- package/package.json +79 -0
package/dist/inbound.js
ADDED
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inbound message resolver — converts BotMessage payload into LLM-friendly text.
|
|
3
|
+
*
|
|
4
|
+
* Each MessageType is rendered as either:
|
|
5
|
+
* - plain text (for Text)
|
|
6
|
+
* - text + media URL marker (for Image/GIF/Voice/Video/File)
|
|
7
|
+
* - structured placeholder (for Location/Card)
|
|
8
|
+
* - recursively expanded (for MultipleForward)
|
|
9
|
+
* - text + image URLs (for RichText)
|
|
10
|
+
*
|
|
11
|
+
* For text-extension files (.py/.ts/.md/.json etc.) the contents are inlined
|
|
12
|
+
* up to a small byte budget (G2) so the agent can actually answer questions
|
|
13
|
+
* about the file rather than just see its URL.
|
|
14
|
+
*/
|
|
15
|
+
import { createWriteStream, statSync } from 'node:fs';
|
|
16
|
+
import { mkdir, unlink, readdir, stat } from 'node:fs/promises';
|
|
17
|
+
import { join } from 'node:path';
|
|
18
|
+
import { randomUUID } from 'node:crypto';
|
|
19
|
+
import { MessageType, RICH_TEXT_BLOCK_IMAGE, RICH_TEXT_BLOCK_TEXT, RICH_TEXT_IMAGE_PLACEHOLDER } from './octo/types.js';
|
|
20
|
+
import { truncateUtf8ByBytes } from './file-inline-wrap.js';
|
|
21
|
+
import { assertPublicUrl, fetchWithRedirectGuard } from './url-policy.js';
|
|
22
|
+
import { sanitizeDisplayName, sanitizePromptBody } from './prompt-safety.js';
|
|
23
|
+
/**
|
|
24
|
+
* S1 helper: same-host check for credential scoping.
|
|
25
|
+
* Returns true only when both URLs parse successfully and have matching host
|
|
26
|
+
* (case-insensitive). Falsy or malformed inputs return false (fail-closed).
|
|
27
|
+
*/
|
|
28
|
+
export function isSameHost(url, apiUrl) {
|
|
29
|
+
try {
|
|
30
|
+
return new URL(url).host.toLowerCase() === new URL(apiUrl).host.toLowerCase();
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// ─── Configuration ─────────────────────────────────────────────────────────
|
|
37
|
+
/** Extensions we will try to inline (text-like content). */
|
|
38
|
+
export const TEXT_FILE_EXTENSIONS = new Set([
|
|
39
|
+
'txt', 'md', 'csv', 'json', 'xml', 'yaml', 'yml',
|
|
40
|
+
'log', 'py', 'js', 'ts', 'tsx', 'jsx', 'mjs', 'cjs',
|
|
41
|
+
'go', 'java', 'rs', 'c', 'h', 'cpp', 'hpp', 'cs', 'rb', 'php',
|
|
42
|
+
'html', 'htm', 'css', 'scss', 'sass', 'less',
|
|
43
|
+
'sh', 'bash', 'zsh', 'fish', 'ps1',
|
|
44
|
+
'toml', 'ini', 'conf', 'cfg', 'env',
|
|
45
|
+
'sql', 'graphql', 'gql', 'proto',
|
|
46
|
+
]);
|
|
47
|
+
/** Maximum bytes to inline a text file in the LLM prompt (G2). */
|
|
48
|
+
export const INLINE_FILE_MAX_BYTES = 20 * 1024;
|
|
49
|
+
// ─── RichText / MultipleForward input budgets (C1 / Stage 6) ─────────────────────
|
|
50
|
+
//
|
|
51
|
+
// These caps apply per-payload at parse time. They are independent of — and
|
|
52
|
+
// strictly tighter than — the system-prompt-wide 100 KiB cap in agent-bridge
|
|
53
|
+
// (D1, PR#39). Goal: stop a single malicious payload from spending the
|
|
54
|
+
// entire system-prompt budget or triggering OOM during parsing.
|
|
55
|
+
/** Maximum blocks parsed from a RichText payload. */
|
|
56
|
+
export const RICH_TEXT_MAX_BLOCKS = 50;
|
|
57
|
+
/** Maximum image URLs extracted from a RichText payload. */
|
|
58
|
+
export const RICH_TEXT_MAX_MEDIA_URLS = 20;
|
|
59
|
+
/** Maximum bytes of rendered text from a single RichText payload (matches Text gate). */
|
|
60
|
+
export const RICH_TEXT_MAX_OUTPUT_BYTES = 32 * 1024;
|
|
61
|
+
/** Maximum recursion depth for MultipleForward expansion. */
|
|
62
|
+
export const MULTIPLE_FORWARD_MAX_DEPTH = 3;
|
|
63
|
+
/** Maximum number of inner messages rendered per MultipleForward level. */
|
|
64
|
+
export const MULTIPLE_FORWARD_MAX_MESSAGES = 50;
|
|
65
|
+
/** Maximum bytes of rendered transcript from a single MultipleForward payload. */
|
|
66
|
+
export const MULTIPLE_FORWARD_MAX_OUTPUT_BYTES = 8 * 1024;
|
|
67
|
+
/** Maximum bytes to download for any text file (inline or temp). */
|
|
68
|
+
const MAX_FILE_DOWNLOAD_BYTES = 5 * 1024 * 1024; // 5 MB
|
|
69
|
+
/** HTTP timeout for file download. */
|
|
70
|
+
const FILE_DOWNLOAD_TIMEOUT_MS = 30_000;
|
|
71
|
+
/** Temp directory for non-inlinable downloads. */
|
|
72
|
+
const TEMP_DIR = join('/tmp', 'cc-channel-octo', 'inbound-files');
|
|
73
|
+
// ─── URL helpers ───────────────────────────────────────────────────────────
|
|
74
|
+
/**
|
|
75
|
+
* Resolve a relative storage path against the bot API base.
|
|
76
|
+
*
|
|
77
|
+
* S1 + P1.2 (Stage 6): Hardened against absolute-URL smuggling and path
|
|
78
|
+
* traversal:
|
|
79
|
+
* - Reject scheme-relative URLs (`//attacker.com/...`)
|
|
80
|
+
* - Reject path-traversal segments (`..`, `.`)
|
|
81
|
+
* - Reject backslash injection
|
|
82
|
+
* - For absolute http(s) URLs: only allow when host matches apiUrl host.
|
|
83
|
+
* This is the chokepoint that prevents an attacker-controlled
|
|
84
|
+
* payload.url from later being fetched with the bot's Authorization
|
|
85
|
+
* header (which would leak botToken to the attacker's server).
|
|
86
|
+
*/
|
|
87
|
+
export function buildMediaUrl(relUrl, apiUrl, cdnHost) {
|
|
88
|
+
if (!relUrl)
|
|
89
|
+
return undefined;
|
|
90
|
+
// Reject backslashes outright — they're not valid in URL paths and are a
|
|
91
|
+
// known Windows-style traversal vector when normalized.
|
|
92
|
+
if (relUrl.includes('\\'))
|
|
93
|
+
return undefined;
|
|
94
|
+
// Reject scheme-relative URLs (`//attacker.com/path`).
|
|
95
|
+
if (relUrl.startsWith('//'))
|
|
96
|
+
return undefined;
|
|
97
|
+
// Absolute http(s) URL — allow when the host matches the apiUrl host OR the
|
|
98
|
+
// configured/STS-derived CDN host. Octo serves media from a SEPARATE CDN
|
|
99
|
+
// (e.g. cdn.deepminer.com.cn) distinct from the API host, so a strict
|
|
100
|
+
// same-host check silently dropped every real image URL (#86). The download
|
|
101
|
+
// path still SSRF-checks (assertPublicUrl) and scopes the bot token per hop,
|
|
102
|
+
// so an allowed host is necessary but not sufficient to leak the token.
|
|
103
|
+
if (relUrl.startsWith('http://') || relUrl.startsWith('https://')) {
|
|
104
|
+
if (!apiUrl)
|
|
105
|
+
return undefined;
|
|
106
|
+
try {
|
|
107
|
+
const target = new URL(relUrl);
|
|
108
|
+
const targetHost = target.host.toLowerCase();
|
|
109
|
+
const base = new URL(apiUrl);
|
|
110
|
+
const allowedHosts = new Set([base.host.toLowerCase()]);
|
|
111
|
+
if (cdnHost)
|
|
112
|
+
allowedHosts.add(cdnHost.toLowerCase());
|
|
113
|
+
if (!allowedHosts.has(targetHost))
|
|
114
|
+
return undefined;
|
|
115
|
+
// Only allow http(s); the same-host protocol-downgrade check still applies
|
|
116
|
+
// to the apiUrl host (a CDN may legitimately use https regardless).
|
|
117
|
+
if (targetHost === base.host.toLowerCase() && target.protocol !== base.protocol)
|
|
118
|
+
return undefined;
|
|
119
|
+
if (target.protocol !== 'http:' && target.protocol !== 'https:')
|
|
120
|
+
return undefined;
|
|
121
|
+
return relUrl;
|
|
122
|
+
}
|
|
123
|
+
catch {
|
|
124
|
+
return undefined;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
// Relative path — strip /file/ or /file/preview/ prefix then enforce no traversal.
|
|
128
|
+
//
|
|
129
|
+
// S4 follow-up (PR#38 round-3, Yujiawei + 李飞飞): the literal `..`/`.` check
|
|
130
|
+
// was bypassable via percent-encoded dot-segments (`%2e%2e`, `%2E.`, `.%2e`,
|
|
131
|
+
// etc.). WHATWG URL parser decodes `%2e` for dot-segment normalization, so
|
|
132
|
+
// `<apiHost>/file/%2e%2e/internal/secret.env` normalizes to
|
|
133
|
+
// `<apiHost>/internal/secret.env`, escaping the `/file/` sandbox. Combined
|
|
134
|
+
// with the same-host Authorization scoping, this exfiltrates internal
|
|
135
|
+
// authenticated paths using the bot's botToken.
|
|
136
|
+
//
|
|
137
|
+
// Fix: after assembling the candidate URL, parse via WHATWG `new URL()` and
|
|
138
|
+
// assert the normalized pathname is still under `/file/`. This catches ALL
|
|
139
|
+
// encoded-dot variants (lower/upper hex, mixed `%2e.`, raw `..`) in one
|
|
140
|
+
// check, no matter how attacker spells them.
|
|
141
|
+
let storagePath = relUrl;
|
|
142
|
+
if (storagePath.startsWith('file/preview/')) {
|
|
143
|
+
storagePath = storagePath.substring('file/preview/'.length);
|
|
144
|
+
}
|
|
145
|
+
else if (storagePath.startsWith('file/')) {
|
|
146
|
+
storagePath = storagePath.substring('file/'.length);
|
|
147
|
+
}
|
|
148
|
+
// Cheap literal pre-check still useful as defense-in-depth.
|
|
149
|
+
const segments = storagePath.split('/');
|
|
150
|
+
for (const seg of segments) {
|
|
151
|
+
if (seg === '..' || seg === '.')
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
if (storagePath.startsWith('/'))
|
|
155
|
+
return undefined;
|
|
156
|
+
// C1 follow-up (项 #3): encoded slash `%2F` defense-in-depth.
|
|
157
|
+
//
|
|
158
|
+
// WHATWG URL parser does NOT decode `%2F` in pathname (it stays literal in
|
|
159
|
+
// the path segment), so the WHATWG-canonical sandbox check below would
|
|
160
|
+
// accept `..%2f..%2finternal`. That's spec-correct — the bytes sent are
|
|
161
|
+
// /file/..%2f..%2finternal, a single path component under /file/.
|
|
162
|
+
//
|
|
163
|
+
// HOWEVER, some HTTP servers / proxies / CDNs (Apache with
|
|
164
|
+
// `AllowEncodedSlashes On`, certain reverse proxies) decode `%2F` as `/`
|
|
165
|
+
// server-side and THEN resolve dot-segments, escaping the sandbox.
|
|
166
|
+
//
|
|
167
|
+
// Production storage paths never contain `%2F` (legitimate filenames
|
|
168
|
+
// don't either — they'd be encoded as `%252F` at most). Cheap to reject
|
|
169
|
+
// outright as defense-in-depth against server-side decoding variance.
|
|
170
|
+
if (storagePath.includes('%2f') || storagePath.includes('%2F')) {
|
|
171
|
+
return undefined;
|
|
172
|
+
}
|
|
173
|
+
const baseUrl = apiUrl?.replace(/\/+$/, '') ?? '';
|
|
174
|
+
const candidate = `${baseUrl}/file/${storagePath}`;
|
|
175
|
+
// WHATWG-canonical sandbox check: after URL normalization, pathname must
|
|
176
|
+
// still start with `/file/`. If `%2e%2e` (or any other encoded-dot variant)
|
|
177
|
+
// would have escaped the prefix, normalize collapses it and we reject here.
|
|
178
|
+
try {
|
|
179
|
+
const normalized = new URL(candidate);
|
|
180
|
+
if (!normalized.pathname.startsWith('/file/'))
|
|
181
|
+
return undefined;
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
return undefined;
|
|
185
|
+
}
|
|
186
|
+
return candidate;
|
|
187
|
+
}
|
|
188
|
+
// ─── RichText (type=14) expansion ─────────────────────────────────────────
|
|
189
|
+
function normalizeRichTextBlocks(content) {
|
|
190
|
+
if (Array.isArray(content)) {
|
|
191
|
+
// C1 / P1.4: cap blocks parsed per payload. A malicious sender could send
|
|
192
|
+
// 10k blocks of empty text to spend parser CPU + downstream budget.
|
|
193
|
+
return content
|
|
194
|
+
.filter((b) => !!b && typeof b === 'object')
|
|
195
|
+
.slice(0, RICH_TEXT_MAX_BLOCKS);
|
|
196
|
+
}
|
|
197
|
+
if (typeof content === 'string' && content) {
|
|
198
|
+
return [{ type: RICH_TEXT_BLOCK_TEXT, text: content }];
|
|
199
|
+
}
|
|
200
|
+
return [];
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Truncate a string to at most `maxBytes` UTF-8 bytes, appending a marker.
|
|
204
|
+
*
|
|
205
|
+
* Delegates to `truncateUtf8ByBytes` (file-inline-wrap.ts) which uses an O(1)
|
|
206
|
+
* walk-back algorithm — fixes the previous O(n) per-char while loop that was
|
|
207
|
+
* quadratic for very long CJK input (齐静春 PR#41 non-blocking review note).
|
|
208
|
+
*/
|
|
209
|
+
function truncateByBytes(input, maxBytes, marker) {
|
|
210
|
+
const { truncated: shortened, wasTruncated } = truncateUtf8ByBytes(input, maxBytes);
|
|
211
|
+
return wasTruncated ? { text: shortened + marker, truncated: true } : { text: shortened, truncated: false };
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Coerce a user-supplied coordinate to a finite number, or null. Accepts only a
|
|
215
|
+
* real number or a numeric string — rejects null/undefined/objects/booleans so
|
|
216
|
+
* `Number(null)===0` can't render a bogus `0` (and a non-numeric string can't
|
|
217
|
+
* forge a label).
|
|
218
|
+
*/
|
|
219
|
+
function toFiniteCoord(v) {
|
|
220
|
+
if (typeof v === 'number')
|
|
221
|
+
return Number.isFinite(v) ? v : null;
|
|
222
|
+
if (typeof v === 'string' && v.trim() !== '') {
|
|
223
|
+
const n = Number(v);
|
|
224
|
+
return Number.isFinite(n) ? n : null;
|
|
225
|
+
}
|
|
226
|
+
return null;
|
|
227
|
+
}
|
|
228
|
+
function buildRichTextPlain(blocks) {
|
|
229
|
+
let out = '';
|
|
230
|
+
for (const blk of blocks) {
|
|
231
|
+
if (blk.type === RICH_TEXT_BLOCK_IMAGE) {
|
|
232
|
+
out += RICH_TEXT_IMAGE_PLACEHOLDER;
|
|
233
|
+
}
|
|
234
|
+
else if (blk.type === RICH_TEXT_BLOCK_TEXT) {
|
|
235
|
+
// Guard against non-string text (would render as "[object Object]")
|
|
236
|
+
out += typeof blk.text === 'string' ? blk.text : '';
|
|
237
|
+
}
|
|
238
|
+
else if (typeof blk.text === 'string' && blk.text) {
|
|
239
|
+
out += blk.text;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return out;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Expand a RichText (type=14) payload into `{ text, mediaUrls[] }`.
|
|
246
|
+
*
|
|
247
|
+
* Mirrors upstream's MultipleForward expansion pattern:
|
|
248
|
+
* - text: prefer top-level `plain` (server-authoritative); else assemble
|
|
249
|
+
* from content blocks (text → text, image → `[图片]` placeholder)
|
|
250
|
+
* - mediaUrls: collect all image-block `url` (sanitized for string type)
|
|
251
|
+
*
|
|
252
|
+
* C1 / P1.4 (Stage 6): output text is truncated to RICH_TEXT_MAX_OUTPUT_BYTES
|
|
253
|
+
* (32 KiB — matches the Text payload gate in session-router) and mediaUrls is
|
|
254
|
+
* capped at RICH_TEXT_MAX_MEDIA_URLS to prevent prompt-budget exhaustion.
|
|
255
|
+
*/
|
|
256
|
+
export function resolveRichTextContent(payload, apiUrl, cdnHost) {
|
|
257
|
+
const blocks = normalizeRichTextBlocks(payload?.content);
|
|
258
|
+
const mediaUrls = [];
|
|
259
|
+
for (const blk of blocks) {
|
|
260
|
+
// Defensive: only collect string URLs so a malformed `{url: {}}` cannot
|
|
261
|
+
// crash buildMediaUrl downstream.
|
|
262
|
+
if (blk.type === RICH_TEXT_BLOCK_IMAGE && typeof blk.url === 'string' && blk.url) {
|
|
263
|
+
const full = buildMediaUrl(blk.url, apiUrl, cdnHost);
|
|
264
|
+
if (full)
|
|
265
|
+
mediaUrls.push(full);
|
|
266
|
+
if (mediaUrls.length >= RICH_TEXT_MAX_MEDIA_URLS)
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
const topPlain = typeof payload?.plain === 'string' ? payload.plain : '';
|
|
271
|
+
const rawText = topPlain.trim() !== '' ? topPlain : buildRichTextPlain(blocks);
|
|
272
|
+
const { text } = truncateByBytes(rawText, RICH_TEXT_MAX_OUTPUT_BYTES, '\n[RichText truncated]');
|
|
273
|
+
return { text, mediaUrls };
|
|
274
|
+
}
|
|
275
|
+
// ─── Inner message rendering (MultipleForward children) ──────────────────
|
|
276
|
+
function resolveInnerMessageText(payload, apiUrl, cdnHost) {
|
|
277
|
+
if (!payload)
|
|
278
|
+
return '';
|
|
279
|
+
const fullUrl = buildMediaUrl(payload.url, apiUrl, cdnHost);
|
|
280
|
+
switch (payload.type) {
|
|
281
|
+
case MessageType.Text:
|
|
282
|
+
return payload.content ?? '';
|
|
283
|
+
case MessageType.Image:
|
|
284
|
+
return fullUrl ? `[图片]\n${fullUrl}` : '[图片]';
|
|
285
|
+
case MessageType.GIF:
|
|
286
|
+
return fullUrl ? `[GIF]\n${fullUrl}` : '[GIF]';
|
|
287
|
+
case MessageType.Voice:
|
|
288
|
+
return fullUrl ? `[语音]\n${fullUrl}` : '[语音]';
|
|
289
|
+
case MessageType.Video:
|
|
290
|
+
return fullUrl ? `[视频]\n${fullUrl}` : '[视频]';
|
|
291
|
+
case MessageType.Location:
|
|
292
|
+
return '[位置信息]';
|
|
293
|
+
case MessageType.Card:
|
|
294
|
+
return '[名片]';
|
|
295
|
+
case MessageType.File: {
|
|
296
|
+
// SECURITY: payload.name is user-controlled and lands inside a `[文件: …]`
|
|
297
|
+
// label; sanitizeDisplayName strips bracket/newline breakout chars so it
|
|
298
|
+
// can't forge a section marker or fake role label (prompt injection).
|
|
299
|
+
const safeName = payload.name ? sanitizeDisplayName(payload.name) : '';
|
|
300
|
+
const label = safeName ? `[文件: ${safeName}]` : '[文件]';
|
|
301
|
+
return fullUrl ? `${label}\n${fullUrl}` : label;
|
|
302
|
+
}
|
|
303
|
+
case MessageType.MultipleForward:
|
|
304
|
+
return '[合并转发]';
|
|
305
|
+
case MessageType.RichText: {
|
|
306
|
+
const rt = resolveRichTextContent(payload, apiUrl, cdnHost);
|
|
307
|
+
return rt.text || '[图文消息]';
|
|
308
|
+
}
|
|
309
|
+
default:
|
|
310
|
+
return payload.content ?? '[消息]';
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Expand a MultipleForward payload into a readable transcript.
|
|
315
|
+
*
|
|
316
|
+
* C1 / P1.3 (Stage 6): bounded by three caps to prevent DoS via deeply nested
|
|
317
|
+
* or massive forwarded payloads:
|
|
318
|
+
* - depth ≤ MULTIPLE_FORWARD_MAX_DEPTH (default 3) — stack-safe
|
|
319
|
+
* - msgs ≤ MULTIPLE_FORWARD_MAX_MESSAGES per level — CPU bound
|
|
320
|
+
* - output ≤ MULTIPLE_FORWARD_MAX_OUTPUT_BYTES — prompt budget
|
|
321
|
+
*
|
|
322
|
+
* The internal _depth parameter is hop-counted (top-level = 0). Going beyond
|
|
323
|
+
* the depth cap emits a single placeholder line instead of recursing.
|
|
324
|
+
*/
|
|
325
|
+
export function resolveMultipleForwardText(payload, apiUrl, cdnHost, _depth = 0) {
|
|
326
|
+
if (_depth >= MULTIPLE_FORWARD_MAX_DEPTH) {
|
|
327
|
+
return '[合并转发: 嵌套已截断]';
|
|
328
|
+
}
|
|
329
|
+
const users = payload?.users ?? [];
|
|
330
|
+
const rawMsgs = payload?.msgs ?? [];
|
|
331
|
+
// Cap inner messages per level to prevent quadratic CPU on adversarial input.
|
|
332
|
+
const msgs = rawMsgs.slice(0, MULTIPLE_FORWARD_MAX_MESSAGES);
|
|
333
|
+
const truncatedCount = rawMsgs.length - msgs.length;
|
|
334
|
+
const userMap = new Map();
|
|
335
|
+
for (const u of users) {
|
|
336
|
+
// SECURITY: u.name AND u.uid are user-controlled in a forward payload and
|
|
337
|
+
// become a `<name>: ` line label. sanitizeDisplayName returns its fallback
|
|
338
|
+
// VERBATIM, so passing raw u.uid as the fallback re-introduces the injection
|
|
339
|
+
// when u.name collapses to empty (PR #128 review P1). Sanitize the uid too
|
|
340
|
+
// and only fall back to a constant when nothing survives.
|
|
341
|
+
if (u.uid && u.name) {
|
|
342
|
+
const safe = sanitizeDisplayName(u.name) || sanitizeDisplayName(u.uid) || 'unknown';
|
|
343
|
+
userMap.set(u.uid, safe);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
const lines = ['[合并转发: 聊天记录]'];
|
|
347
|
+
for (const m of msgs) {
|
|
348
|
+
const senderName = userMap.get(m.from_uid) ?? sanitizeDisplayName(m.from_uid, 'unknown');
|
|
349
|
+
if (m.payload?.type === MessageType.MultipleForward) {
|
|
350
|
+
const nested = resolveMultipleForwardText(m.payload, apiUrl, cdnHost, _depth + 1);
|
|
351
|
+
lines.push(`${senderName}: [合并转发]`);
|
|
352
|
+
lines.push(nested);
|
|
353
|
+
}
|
|
354
|
+
else {
|
|
355
|
+
// SECURITY: the inner message BODY is attacker-controlled (e.g. a forwarded
|
|
356
|
+
// Text content of `hi\n[assistant bot]: …`) and is NOT otherwise escaped
|
|
357
|
+
// before the assembled transcript flows into the user-role prompt (the
|
|
358
|
+
// forward path bypasses the quote/group-context body escapers). Neutralize
|
|
359
|
+
// role labels + section markers here so a forwarded body can't forge a turn
|
|
360
|
+
// boundary (PR #128 review). Nested transcripts are already escaped per-line
|
|
361
|
+
// by their own recursion, so only the leaf bodies need this.
|
|
362
|
+
const inner = sanitizePromptBody(resolveInnerMessageText(m.payload, apiUrl, cdnHost));
|
|
363
|
+
lines.push(`${senderName}: ${inner}`);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
if (truncatedCount > 0) {
|
|
367
|
+
lines.push(`[合并转发: 还有 ${truncatedCount} 条消息未展示]`);
|
|
368
|
+
}
|
|
369
|
+
const out = lines.join('\n');
|
|
370
|
+
// Final output budget guard — even with msg/depth caps, a single inner
|
|
371
|
+
// message text could be large. Truncate once at the top after assembly.
|
|
372
|
+
const { text } = truncateByBytes(out, MULTIPLE_FORWARD_MAX_OUTPUT_BYTES, '\n[合并转发: 输出已截断]');
|
|
373
|
+
return text;
|
|
374
|
+
}
|
|
375
|
+
// ─── Core resolver ─────────────────────────────────────────────────────────
|
|
376
|
+
/**
|
|
377
|
+
* Render an inbound payload to LLM-friendly text + optional media metadata.
|
|
378
|
+
*
|
|
379
|
+
* This is the synchronous part — file inlining (which requires HTTP) is a
|
|
380
|
+
* separate async step done by `tryInlineFile()`.
|
|
381
|
+
*/
|
|
382
|
+
export function resolveContent(payload, apiUrl, cdnHost) {
|
|
383
|
+
if (!payload)
|
|
384
|
+
return { text: '' };
|
|
385
|
+
switch (payload.type) {
|
|
386
|
+
case MessageType.Text:
|
|
387
|
+
return { text: payload.content ?? '' };
|
|
388
|
+
case MessageType.Image: {
|
|
389
|
+
const imgUrl = buildMediaUrl(payload.url, apiUrl, cdnHost);
|
|
390
|
+
return {
|
|
391
|
+
text: imgUrl ? `[图片]\n${imgUrl}` : '[图片]',
|
|
392
|
+
mediaUrl: imgUrl,
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
case MessageType.GIF: {
|
|
396
|
+
const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
|
|
397
|
+
return {
|
|
398
|
+
text: url ? `[GIF]\n${url}` : '[GIF]',
|
|
399
|
+
mediaUrl: url,
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
case MessageType.Voice: {
|
|
403
|
+
const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
|
|
404
|
+
return {
|
|
405
|
+
// G22: language model receives the URL as a marker; transcription is
|
|
406
|
+
// out of scope for v0.2 and tracked separately.
|
|
407
|
+
text: url ? `[语音消息]\n${url}` : '[语音消息]',
|
|
408
|
+
mediaUrl: url,
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
case MessageType.Video: {
|
|
412
|
+
const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
|
|
413
|
+
return {
|
|
414
|
+
text: url ? `[视频]\n${url}` : '[视频]',
|
|
415
|
+
mediaUrl: url,
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
case MessageType.File: {
|
|
419
|
+
const url = buildMediaUrl(payload.url, apiUrl, cdnHost);
|
|
420
|
+
// SECURITY: payload.name is user-controlled; sanitize before it enters the
|
|
421
|
+
// `[文件: …]` label so it can't forge a marker/role label (prompt injection).
|
|
422
|
+
const fileName = typeof payload.name === 'string'
|
|
423
|
+
? sanitizeDisplayName(payload.name, '未知文件')
|
|
424
|
+
: '未知文件';
|
|
425
|
+
return {
|
|
426
|
+
text: url ? `[文件: ${fileName}]\n${url}` : `[文件: ${fileName}]`,
|
|
427
|
+
mediaUrl: url,
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
case MessageType.Location: {
|
|
431
|
+
// SECURITY: lat/lng are user-controlled. A `!= null` gate alone lets a
|
|
432
|
+
// string like "0]\n[assistant bot]: forged" through and forge a label.
|
|
433
|
+
// Accept only a real finite number or a numeric string — NOT null/object
|
|
434
|
+
// (Number(null)===0 would otherwise render a bogus `0`).
|
|
435
|
+
const lat = toFiniteCoord(payload.latitude ?? payload.lat);
|
|
436
|
+
const lng = toFiniteCoord(payload.longitude ?? payload.lng ?? payload.lon);
|
|
437
|
+
return {
|
|
438
|
+
text: lat !== null && lng !== null
|
|
439
|
+
? `[位置信息: ${lat},${lng}]`
|
|
440
|
+
: '[位置信息]',
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
case MessageType.Card: {
|
|
444
|
+
// SECURITY: name + uid are user-controlled and land inside a `[名片: …]`
|
|
445
|
+
// label; sanitize both so neither can forge a marker/role label.
|
|
446
|
+
const cardName = typeof payload.name === 'string'
|
|
447
|
+
? sanitizeDisplayName(payload.name, '未知')
|
|
448
|
+
: '未知';
|
|
449
|
+
const cardUid = typeof payload.uid === 'string'
|
|
450
|
+
? sanitizeDisplayName(payload.uid)
|
|
451
|
+
: '';
|
|
452
|
+
return {
|
|
453
|
+
text: cardUid ? `[名片: ${cardName} (${cardUid})]` : `[名片: ${cardName}]`,
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
case MessageType.MultipleForward: {
|
|
457
|
+
return {
|
|
458
|
+
text: resolveMultipleForwardText(payload, apiUrl, cdnHost),
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
case MessageType.RichText: {
|
|
462
|
+
const rt = resolveRichTextContent(payload, apiUrl, cdnHost);
|
|
463
|
+
return {
|
|
464
|
+
text: rt.text,
|
|
465
|
+
...(rt.mediaUrls.length > 0 ? { mediaUrl: rt.mediaUrls[0], mediaUrls: rt.mediaUrls } : {}),
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
default:
|
|
469
|
+
return { text: payload.content ?? payload.url ?? '[消息]' };
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
/** Placeholder text used when reconstructing a historical (API-backfilled) message. */
|
|
473
|
+
export function resolveHistoricalMessagePlaceholder(type, name) {
|
|
474
|
+
switch (type) {
|
|
475
|
+
case MessageType.Image: return '[图片]';
|
|
476
|
+
case MessageType.GIF: return '[GIF]';
|
|
477
|
+
case MessageType.Voice: return '[语音消息]';
|
|
478
|
+
case MessageType.Video: return '[视频]';
|
|
479
|
+
case MessageType.File: return `[文件: ${name ? sanitizeDisplayName(name, '未知文件') : '未知文件'}]`;
|
|
480
|
+
case MessageType.Location: return '[位置信息]';
|
|
481
|
+
case MessageType.Card: return '[名片]';
|
|
482
|
+
case MessageType.MultipleForward: return '[合并转发]';
|
|
483
|
+
case MessageType.RichText: return '[图文消息]';
|
|
484
|
+
case MessageType.Text:
|
|
485
|
+
default:
|
|
486
|
+
return '';
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
// ─── File inlining (G2) ────────────────────────────────────────────────────
|
|
490
|
+
/** Best-effort cleanup of temp files older than 1 hour. */
|
|
491
|
+
async function cleanupOldTempFiles() {
|
|
492
|
+
try {
|
|
493
|
+
const entries = await readdir(TEMP_DIR);
|
|
494
|
+
const cutoff = Date.now() - 60 * 60 * 1000;
|
|
495
|
+
for (const entry of entries) {
|
|
496
|
+
try {
|
|
497
|
+
const filePath = join(TEMP_DIR, entry);
|
|
498
|
+
const info = await stat(filePath);
|
|
499
|
+
if (info.mtimeMs < cutoff)
|
|
500
|
+
await unlink(filePath);
|
|
501
|
+
}
|
|
502
|
+
catch {
|
|
503
|
+
/* best effort */
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
catch {
|
|
508
|
+
/* best effort */
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
function extractExtension(url, fallbackName) {
|
|
512
|
+
try {
|
|
513
|
+
const pathname = new URL(url).pathname;
|
|
514
|
+
const ext = pathname.split('.').pop()?.toLowerCase() ?? '';
|
|
515
|
+
if (ext && ext.length <= 8)
|
|
516
|
+
return ext;
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
/* fall through */
|
|
520
|
+
}
|
|
521
|
+
return fallbackName?.split('.').pop()?.toLowerCase() ?? '';
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Attempt to inline a text file's contents, or download it to a temp path.
|
|
525
|
+
*
|
|
526
|
+
* Returns:
|
|
527
|
+
* - `{ inlined: text }` when the file is text-extension and under the inline cap
|
|
528
|
+
* - `{ tempPath }` when the file is text-extension but exceeds the cap
|
|
529
|
+
* (downloaded to disk so the agent can read it)
|
|
530
|
+
* - `{ description }` when the file isn't text or download failed
|
|
531
|
+
*/
|
|
532
|
+
export async function tryResolveFile(params) {
|
|
533
|
+
const { url, botToken, apiUrl, filename, knownSize } = params;
|
|
534
|
+
const ext = extractExtension(url, filename);
|
|
535
|
+
if (!TEXT_FILE_EXTENSIONS.has(ext)) {
|
|
536
|
+
// Non-text — surface size info if known
|
|
537
|
+
return {
|
|
538
|
+
description: knownSize != null
|
|
539
|
+
? `[文件: ${filename} (${formatBytes(knownSize)})]`
|
|
540
|
+
: `[文件: ${filename}]`,
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
// Skip download if known to exceed hard cap
|
|
544
|
+
if (knownSize != null && knownSize > MAX_FILE_DOWNLOAD_BYTES) {
|
|
545
|
+
return { description: `[文件: ${filename} (${formatBytes(knownSize)}) - 超过下载上限 ${formatBytes(MAX_FILE_DOWNLOAD_BYTES)}]` };
|
|
546
|
+
}
|
|
547
|
+
// S1: SSRF defense — reject private/loopback/link-local addresses.
|
|
548
|
+
try {
|
|
549
|
+
await assertPublicUrl(url);
|
|
550
|
+
}
|
|
551
|
+
catch (err) {
|
|
552
|
+
return { description: `[文件: ${filename} - 拒绝下载: ${String(err)}]` };
|
|
553
|
+
}
|
|
554
|
+
// S1 (re-review fix): scope Authorization PER HOP, not statically.
|
|
555
|
+
// The previous implementation set the header once based on the initial URL
|
|
556
|
+
// and then `fetchWithRedirectGuard` reused the same init across redirects —
|
|
557
|
+
// meaning a same-host initial URL that 302'd to attacker.com would still
|
|
558
|
+
// ship the Authorization header to the attacker. We now pass a perHopInit
|
|
559
|
+
// callback so the header is recomputed per hop and dropped whenever the
|
|
560
|
+
// current hop's host differs from apiUrl host.
|
|
561
|
+
const signal = AbortSignal.timeout(FILE_DOWNLOAD_TIMEOUT_MS);
|
|
562
|
+
// Download with streaming + size guard. fetchWithRedirectGuard
|
|
563
|
+
// re-validates SSRF on every redirect hop (S2) AND now re-decides the
|
|
564
|
+
// Authorization header per hop (S1 follow-up).
|
|
565
|
+
try {
|
|
566
|
+
const resp = await fetchWithRedirectGuard(url, (currentUrl) => {
|
|
567
|
+
const headers = {};
|
|
568
|
+
if (isSameHost(currentUrl, apiUrl)) {
|
|
569
|
+
headers.Authorization = `Bearer ${botToken}`;
|
|
570
|
+
}
|
|
571
|
+
return { headers, signal };
|
|
572
|
+
});
|
|
573
|
+
if (!resp.ok) {
|
|
574
|
+
return { description: `[文件: ${filename} - 下载失败 HTTP ${resp.status}]` };
|
|
575
|
+
}
|
|
576
|
+
const body = resp.body;
|
|
577
|
+
if (!body) {
|
|
578
|
+
return { description: `[文件: ${filename} - 响应无内容]` };
|
|
579
|
+
}
|
|
580
|
+
// Inline path: read up to INLINE_FILE_MAX_BYTES, fall through to temp on overflow
|
|
581
|
+
const reader = body.getReader();
|
|
582
|
+
const inlineChunks = [];
|
|
583
|
+
let inlineBytes = 0;
|
|
584
|
+
let exceededInline = false;
|
|
585
|
+
while (true) {
|
|
586
|
+
const { done, value } = await reader.read();
|
|
587
|
+
if (done)
|
|
588
|
+
break;
|
|
589
|
+
inlineBytes += value.byteLength;
|
|
590
|
+
if (inlineBytes > INLINE_FILE_MAX_BYTES) {
|
|
591
|
+
exceededInline = true;
|
|
592
|
+
// Continue draining for the temp path
|
|
593
|
+
inlineChunks.push(value);
|
|
594
|
+
if (inlineBytes > MAX_FILE_DOWNLOAD_BYTES) {
|
|
595
|
+
try {
|
|
596
|
+
reader.cancel();
|
|
597
|
+
}
|
|
598
|
+
catch { /* ignore */ }
|
|
599
|
+
return { description: `[文件: ${filename} (${formatBytes(inlineBytes)}) - 超过下载上限]` };
|
|
600
|
+
}
|
|
601
|
+
// Drain rest into chunks for temp write
|
|
602
|
+
break;
|
|
603
|
+
}
|
|
604
|
+
inlineChunks.push(value);
|
|
605
|
+
}
|
|
606
|
+
if (!exceededInline) {
|
|
607
|
+
// Inline the content
|
|
608
|
+
const buf = Buffer.concat(inlineChunks.map((c) => Buffer.from(c)));
|
|
609
|
+
return { inlined: buf.toString('utf-8') };
|
|
610
|
+
}
|
|
611
|
+
// Drain remaining body into the temp file
|
|
612
|
+
await mkdir(TEMP_DIR, { recursive: true });
|
|
613
|
+
cleanupOldTempFiles().catch(() => { });
|
|
614
|
+
const safeName = filename.replace(/[^a-zA-Z0-9._-]/g, '_') || 'file';
|
|
615
|
+
const tempPath = join(TEMP_DIR, `${randomUUID()}-${safeName}`);
|
|
616
|
+
const ws = createWriteStream(tempPath);
|
|
617
|
+
try {
|
|
618
|
+
for (const chunk of inlineChunks) {
|
|
619
|
+
if (!ws.write(chunk))
|
|
620
|
+
await new Promise((r) => ws.once('drain', r));
|
|
621
|
+
}
|
|
622
|
+
let totalBytes = inlineBytes;
|
|
623
|
+
while (true) {
|
|
624
|
+
const { done, value } = await reader.read();
|
|
625
|
+
if (done)
|
|
626
|
+
break;
|
|
627
|
+
totalBytes += value.byteLength;
|
|
628
|
+
if (totalBytes > MAX_FILE_DOWNLOAD_BYTES) {
|
|
629
|
+
try {
|
|
630
|
+
reader.cancel();
|
|
631
|
+
}
|
|
632
|
+
catch { /* ignore */ }
|
|
633
|
+
ws.destroy();
|
|
634
|
+
await unlink(tempPath).catch(() => { });
|
|
635
|
+
return { description: `[文件: ${filename} (${formatBytes(totalBytes)}) - 超过下载上限]` };
|
|
636
|
+
}
|
|
637
|
+
if (!ws.write(value))
|
|
638
|
+
await new Promise((r) => ws.once('drain', r));
|
|
639
|
+
}
|
|
640
|
+
ws.end();
|
|
641
|
+
await new Promise((resolve, reject) => {
|
|
642
|
+
ws.on('finish', () => resolve());
|
|
643
|
+
ws.on('error', reject);
|
|
644
|
+
});
|
|
645
|
+
const sizeInfo = statSync(tempPath).size;
|
|
646
|
+
return { tempPath, ...{ description: `[文件: ${filename} (${formatBytes(sizeInfo)}) - 已下载到 ${tempPath}]` } };
|
|
647
|
+
}
|
|
648
|
+
catch (err) {
|
|
649
|
+
ws.destroy();
|
|
650
|
+
await unlink(tempPath).catch(() => { });
|
|
651
|
+
return { description: `[文件: ${filename} - 下载错误: ${String(err)}]` };
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
catch (err) {
|
|
655
|
+
return { description: `[文件: ${filename} - ${String(err).includes('TimeoutError') ? '下载超时' : '网络错误'}]` };
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
function formatBytes(n) {
|
|
659
|
+
if (n < 1024)
|
|
660
|
+
return `${n}B`;
|
|
661
|
+
if (n < 1024 * 1024)
|
|
662
|
+
return `${(n / 1024).toFixed(1)}KB`;
|
|
663
|
+
if (n < 1024 * 1024 * 1024)
|
|
664
|
+
return `${(n / (1024 * 1024)).toFixed(1)}MB`;
|
|
665
|
+
return `${(n / (1024 * 1024 * 1024)).toFixed(1)}GB`;
|
|
666
|
+
}
|
|
667
|
+
//# sourceMappingURL=inbound.js.map
|