protocol-proxy 2.8.3 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -242,8 +242,150 @@ async function init() {
242
242
  : 'primary_fallback';
243
243
  }
244
244
 
245
+ // ==================== Token 估算与会话压缩 ====================
246
+
247
+ function estimateMessageTokens(msg) {
248
+ const len = (s) => (typeof s === 'string' ? s.length : JSON.stringify(s || '').length);
249
+ let chars = 0;
250
+ if (typeof msg.content === 'string') chars += len(msg.content);
251
+ else if (Array.isArray(msg.content)) {
252
+ for (const block of msg.content) {
253
+ // 多模态格式:只取文本内容,不序列化整个对象
254
+ if (typeof block === 'string') chars += len(block);
255
+ else if (block?.text) chars += len(block.text);
256
+ else if (block?.content) chars += len(block.content);
257
+ else chars += len(block); // fallback
258
+ }
259
+ }
260
+ if (msg.reasoning_content) chars += len(msg.reasoning_content);
261
+ if (msg.tool_calls) {
262
+ for (const tc of msg.tool_calls) {
263
+ chars += len(tc.function?.name || '') + len(tc.function?.arguments || '');
264
+ }
265
+ }
266
+ // chars/2 对中文更保守(中文 ~1-2 token/字),宁可高估触发压缩也别低估撑爆上下文
267
+ return Math.ceil(chars / 2) + 4;
268
+ }
269
+
270
+ function estimateConversationTokens(messages) {
271
+ return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
272
+ }
273
+
274
+ async function compressConversation(conv, maxContext, proxyUrl, proxyHeaders, defaultModel) {
275
+ const messages = conv.messages;
276
+ const PRESERVE_RECENT = 6;
277
+
278
+ // 提取之前的压缩摘要(存储在 conv.compressionSummary 中)
279
+ let existingSummary = conv.compressionSummary || '';
280
+
281
+ // 分割:旧消息(压缩)和新消息(保留)
282
+ let keepFrom = messages.length - PRESERVE_RECENT;
283
+ // 边界处理:向后扫描,不拆开 assistant(tool_calls) + tool 配对
284
+ while (keepFrom > 0) {
285
+ const msg = messages[keepFrom];
286
+ if (msg?.role === 'tool') {
287
+ let j = keepFrom - 1;
288
+ while (j > 0 && messages[j]?.role === 'tool') j--;
289
+ if (messages[j]?.role === 'assistant' && messages[j]?.tool_calls) {
290
+ keepFrom = j;
291
+ }
292
+ break;
293
+ }
294
+ break;
295
+ }
296
+
297
+ const oldMessages = messages.slice(0, keepFrom);
298
+ const recentMessages = messages.slice(keepFrom);
299
+
300
+ if (oldMessages.length === 0) return null;
301
+
302
+ // 构建启发式摘要信息
303
+ const userMsgs = oldMessages.filter(m => m.role === 'user').length;
304
+ const assistantMsgs = oldMessages.filter(m => m.role === 'assistant').length;
305
+ const toolMsgs = oldMessages.filter(m => m.role === 'tool').length;
306
+ const toolNames = [...new Set(
307
+ oldMessages.filter(m => m.tool_calls).flatMap(m => m.tool_calls.map(tc => tc.function?.name)).filter(Boolean)
308
+ )];
309
+
310
+ const stats = [
311
+ `- 范围: ${oldMessages.length} 条旧消息 (user=${userMsgs}, assistant=${assistantMsgs}, tool=${toolMsgs})`,
312
+ toolNames.length > 0 ? `- 使用的工具: ${toolNames.join(', ')}` : null,
313
+ existingSummary ? `- 之前的摘要:\n${existingSummary}` : null,
314
+ ].filter(Boolean).join('\n');
315
+
316
+ const recentUserMsgs = oldMessages.filter(m => m.role === 'user').slice(-3)
317
+ .map(m => typeof m.content === 'string' ? m.content.slice(0, 200) : '').filter(Boolean);
318
+
319
+ // 调用 LLM 生成摘要
320
+ const compressPrompt = `请将以下对话历史压缩为简洁的摘要。保留所有关键信息:用户的问题意图、发现的问题、工具调用的关键结果、得出的结论和建议。
321
+
322
+ 对话统计:
323
+ ${stats}
324
+
325
+ 最近的用户问题:
326
+ ${recentUserMsgs.map((m, i) => `${i + 1}. ${m}`).join('\n')}
327
+
328
+ 请用中文输出摘要,格式:
329
+ 1. 用户的主要目标/问题
330
+ 2. 已完成的调查/操作
331
+ 3. 关键发现和结论
332
+ 4. 未完成的工作(如有)
333
+
334
+ 摘要控制在 500 字以内。`;
335
+
336
+ let summary;
337
+ try {
338
+ const res = await fetch(proxyUrl, {
339
+ method: 'POST',
340
+ headers: proxyHeaders,
341
+ signal: AbortSignal.timeout(60000),
342
+ body: JSON.stringify({
343
+ model: defaultModel || 'gpt-4o',
344
+ messages: [
345
+ { role: 'system', content: '你是一个对话摘要助手。简洁准确地总结对话要点。' },
346
+ { role: 'user', content: compressPrompt },
347
+ ],
348
+ max_tokens: 1024,
349
+ stream: false,
350
+ }),
351
+ });
352
+ if (res.ok) {
353
+ const data = await res.json();
354
+ summary = data.choices?.[0]?.message?.content || '';
355
+ }
356
+ } catch (err) {
357
+ logger.log(`[compress] LLM 摘要失败: ${err.message}`);
358
+ }
359
+
360
+ // LLM 失败 → 启发式降级
361
+ if (!summary) {
362
+ const lastAssistant = oldMessages.filter(m => m.role === 'assistant' && m.content).pop();
363
+ const userQuestions = oldMessages.filter(m => m.role === 'user')
364
+ .map(m => typeof m.content === 'string' ? m.content.slice(0, 100) : '')
365
+ .filter(Boolean).slice(-3);
366
+ summary = stats +
367
+ (userQuestions.length ? '\n- 最近用户问题:\n' + userQuestions.map((q, i) => ` ${i + 1}. ${q}`).join('\n') : '') +
368
+ '\n- 最近内容: ' + (lastAssistant?.content || '').slice(0, 300);
369
+ logger.log('[compress] 使用启发式降级摘要');
370
+ }
371
+
372
+ // 重建消息数组(不含 system 消息,由 buildMessages() 负责注入)
373
+ const newMessages = [...recentMessages];
374
+ const newTokens = estimateConversationTokens(newMessages);
375
+ return { messages: newMessages, summary, removedCount: oldMessages.length, newTokens };
376
+ }
377
+
245
378
  // ==================== 助手工具定义与执行器 ====================
246
379
 
380
+ const MAX_TOOL_OUTPUT = 16384; // 16KB — 防止工具输出撑爆 LLM 上下文
381
+
382
+ function truncateOutput(obj) {
383
+ const str = typeof obj === 'string' ? obj : JSON.stringify(obj);
384
+ if (str.length <= MAX_TOOL_OUTPUT) return obj;
385
+ const truncated = str.slice(0, MAX_TOOL_OUTPUT);
386
+ return { _truncated: true, _original_bytes: str.length, _preview: truncated + '\n... [截断,原始输出 ' + str.length + ' 字符]' };
387
+ }
388
+
247
389
  const TOOL_DEFINITIONS = [
248
390
  {
249
391
  type: 'function',
@@ -438,6 +580,40 @@ async function init() {
438
580
  },
439
581
  },
440
582
  },
583
+ {
584
+ type: 'function',
585
+ function: {
586
+ name: 'edit_file',
587
+ description: '精确替换文件中的字符串。比 write_file 更安全,只替换匹配的内容,不会覆盖整个文件。',
588
+ parameters: {
589
+ type: 'object',
590
+ properties: {
591
+ path: { type: 'string', description: '文件路径' },
592
+ old_string: { type: 'string', description: '要被替换的原始字符串(必须精确匹配)' },
593
+ new_string: { type: 'string', description: '替换后的新字符串' },
594
+ replace_all: { type: 'boolean', description: '是否替换所有匹配项,默认 false(只替换第一个)' },
595
+ },
596
+ required: ['path', 'old_string', 'new_string'],
597
+ },
598
+ },
599
+ },
600
+ {
601
+ type: 'function',
602
+ function: {
603
+ name: 'grep_search',
604
+ description: '在文件内容中搜索正则表达式模式。用于查找代码、日志关键字等。',
605
+ parameters: {
606
+ type: 'object',
607
+ properties: {
608
+ pattern: { type: 'string', description: '正则表达式模式' },
609
+ path: { type: 'string', description: '搜索目录或文件路径,默认当前工作目录' },
610
+ glob: { type: 'string', description: '文件名过滤,如 "*.js" 或 "*.log"' },
611
+ max_results: { type: 'number', description: '最大返回匹配数,默认 50' },
612
+ },
613
+ required: ['pattern'],
614
+ },
615
+ },
616
+ },
441
617
  ];
442
618
 
443
619
  const TOOL_HANDLERS = {
@@ -524,6 +700,21 @@ async function init() {
524
700
  read_file: async (args) => {
525
701
  const filePath = path.resolve(args.path);
526
702
  try {
703
+ // 二进制检测:检查前 8KB 是否含 NUL 字节
704
+ const stat = await fs.promises.stat(filePath);
705
+ const peekSize = Math.min(8192, stat.size);
706
+ if (peekSize > 0) {
707
+ const fd = await fs.promises.open(filePath, 'r');
708
+ try {
709
+ const buf = Buffer.alloc(peekSize);
710
+ await fd.read(buf, 0, peekSize, 0);
711
+ if (buf.includes(0)) {
712
+ return { error: `二进制文件,无法以文本方式读取 (${filePath}, ${stat.size} bytes)` };
713
+ }
714
+ } finally {
715
+ await fd.close();
716
+ }
717
+ }
527
718
  const content = await fs.promises.readFile(filePath, 'utf8');
528
719
  const lines = content.split('\n');
529
720
  const offset = Math.max(0, parseInt(args.offset) || 0);
@@ -607,6 +798,74 @@ async function init() {
607
798
  });
608
799
  });
609
800
  },
801
+
802
+ edit_file: async (args) => {
803
+ const filePath = path.resolve(args.path);
804
+ try {
805
+ const content = await fs.promises.readFile(filePath, 'utf8');
806
+ const { old_string, new_string } = args;
807
+ if (old_string === new_string) return { error: 'old_string 和 new_string 不能相同' };
808
+ if (!content.includes(old_string)) return { error: `文件中未找到匹配的字符串` };
809
+ const replaceAll = !!args.replace_all;
810
+ const newContent = replaceAll
811
+ ? content.split(old_string).join(new_string)
812
+ : content.replace(old_string, new_string);
813
+ const count = replaceAll
814
+ ? content.split(old_string).length - 1
815
+ : 1;
816
+ await fs.promises.writeFile(filePath, newContent, 'utf8');
817
+ return { success: true, path: filePath, replacements: count };
818
+ } catch (err) {
819
+ return { error: err.message };
820
+ }
821
+ },
822
+
823
+ grep_search: async (args) => {
824
+ const root = path.resolve(args.path || '.');
825
+ const pattern = args.pattern;
826
+ const maxResults = Math.min(Math.max(1, parseInt(args.max_results) || 50), 200);
827
+ const globFilter = args.glob || '';
828
+ try {
829
+ const regex = new RegExp(pattern, 'gi');
830
+ const results = [];
831
+ const walk = async (dir) => {
832
+ if (results.length >= maxResults) return;
833
+ const entries = await fs.promises.readdir(dir, { withFileTypes: true });
834
+ for (const e of entries) {
835
+ if (results.length >= maxResults) break;
836
+ const fullPath = path.join(dir, e.name);
837
+ if (e.isDirectory()) {
838
+ if (['node_modules', '.git', 'dist', 'build', '.next'].includes(e.name)) continue;
839
+ await walk(fullPath);
840
+ } else if (e.isFile()) {
841
+ if (globFilter) {
842
+ const ext = '.' + e.name.split('.').pop();
843
+ if (!globFilter.includes(ext) && !globFilter.includes(e.name) && !globFilter.includes('*')) continue;
844
+ }
845
+ try {
846
+ const content = await fs.promises.readFile(fullPath, 'utf8');
847
+ const lines = content.split('\n');
848
+ for (let i = 0; i < lines.length; i++) {
849
+ if (results.length >= maxResults) break;
850
+ if (regex.test(lines[i])) {
851
+ results.push({
852
+ file: path.relative(root, fullPath),
853
+ line: i + 1,
854
+ content: lines[i].trim().slice(0, 300),
855
+ });
856
+ regex.lastIndex = 0;
857
+ }
858
+ }
859
+ } catch {}
860
+ }
861
+ }
862
+ };
863
+ await walk(root);
864
+ return { pattern, matches: results, total: results.length };
865
+ } catch (err) {
866
+ return { error: err.message };
867
+ }
868
+ },
610
869
  };
611
870
 
612
871
  async function startProxyWithProvider(proxy) {
@@ -1392,48 +1651,211 @@ async function init() {
1392
1651
 
1393
1652
  // ==================== 智控助手 Tool Calling API ====================
1394
1653
 
1654
+ const conversationStore = require('./lib/conversation-store');
1655
+ conversationStore.init();
1656
+
1657
+ // 会话并发锁:convId → true 表示正在 streaming
1658
+ const activeStreams = new Set();
1659
+
1395
1660
  function sendSSE(res, event, data) {
1396
1661
  res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
1397
1662
  }
1398
1663
 
1664
+ // 会话管理 API
1665
+ app.get('/api/assistant/conversations', (req, res) => {
1666
+ res.json({ conversations: conversationStore.list() });
1667
+ });
1668
+
1669
+ app.delete('/api/assistant/conversations/:id', (req, res) => {
1670
+ const conv = conversationStore.get(req.params.id);
1671
+ if (!conv) return res.status(404).json({ error: '会话不存在' });
1672
+ conversationStore.remove(req.params.id);
1673
+ res.json({ success: true });
1674
+ });
1675
+
1676
+ // 获取单个会话的消息历史(用于恢复会话显示)
1677
+ app.get('/api/assistant/conversations/:id/messages', (req, res) => {
1678
+ const conv = conversationStore.get(req.params.id);
1679
+ if (!conv) return res.status(404).json({ error: '会话不存在' });
1680
+ // 返回消息历史(过滤掉 system 消息,前端不需要显示)
1681
+ const messages = (conv.messages || []).filter(m => m.role !== 'system');
1682
+ const compressionSummary = conv.compressionSummary || null;
1683
+ res.json({ id: conv.id, proxyId: conv.proxyId, messages, compressionSummary });
1684
+ });
1685
+
1686
+ // 获取代理的候选供应商及其模型列表(供前端级联选择)
1687
+ app.get('/api/assistant/proxy-providers/:proxyId', (req, res) => {
1688
+ const proxy = configStore.getProxyById(req.params.proxyId);
1689
+ if (!proxy) return res.status(404).json({ error: '代理不存在' });
1690
+ const providers = configStore.getProviders().map(p => ({
1691
+ id: p.id,
1692
+ name: p.name,
1693
+ protocol: p.protocol,
1694
+ models: p.models || [],
1695
+ }));
1696
+ res.json({ providers, defaultModel: proxy.defaultModel || '' });
1697
+ });
1698
+
1699
+ function buildSystemPrompt() {
1700
+ const now = new Date().toLocaleString('zh-CN', { hour12: false });
1701
+ return `你是 Protocol Proxy 的智能助手,专门帮助管理员监控和排障。当前时间:${now}
1702
+
1703
+ 你有以下工具可以调用:
1704
+
1705
+ 系统查询:
1706
+ - get_system_status: 获取系统概览(代理运行状态、供应商数量、运行时长)
1707
+ - get_providers / get_provider: 获取供应商列表或详情
1708
+ - get_proxies / get_proxy: 获取代理列表或详情
1709
+ - get_usage_stats: 查询用量统计(支持按时间范围、代理筛选)
1710
+ - get_recent_requests: 获取最近请求日志
1711
+ - get_system_logs: 获取系统日志
1712
+ - get_key_health: 获取 API Key 健康检查结果
1713
+ - get_settings: 获取系统设置项
1714
+ - get_config_history: 获取配置快照历史
1715
+
1716
+ 文件与命令:
1717
+ - read_file: 读取任意文件内容(支持指定行范围,自动检测二进制文件)
1718
+ - write_file: 写入文件(会覆盖已有内容)
1719
+ - edit_file: 精确替换文件中的字符串(比 write_file 更安全,只替换匹配内容)
1720
+ - list_directory: 列出目录内容
1721
+ - search_files: 按文件名 glob 模式搜索文件
1722
+ - grep_search: 按正则表达式搜索文件内容(用于查找代码、日志关键字等)
1723
+ - execute_command: 执行 shell 命令
1724
+
1725
+ 规则:
1726
+ - 当用户询问系统状态、代理、供应商、日志、用量等运维相关问题时,调用工具获取实时数据后再回答
1727
+ - 当用户需要查看或修改文件、执行命令时,使用对应的文件和命令工具
1728
+ - 当用户只是打招呼、闲聊、或询问与系统无关的问题时,直接回答,不要调用工具
1729
+ - 不要凭空猜测系统状态,需要数据时必须调用工具
1730
+ - 执行写操作或危险命令前,先告知用户将要做什么
1731
+
1732
+ 你的职责:
1733
+ 1. 回答关于代理配置和运行状态的问题
1734
+ 2. 分析日志,指出异常和可能原因
1735
+ 3. 根据数据给出优化建议(负载均衡、模型选择、故障切换策略)
1736
+ 4. 用自然语言解释技术问题
1737
+ 5. 如果发现问题,给出具体的修复步骤
1738
+
1739
+ 请用中文回答,保持专业且易懂。`;
1740
+ }
1741
+
1399
1742
  app.post('/api/assistant/chat', async (req, res) => {
1400
- const { proxyId, messages } = req.body;
1401
- if (!proxyId || !Array.isArray(messages)) {
1402
- return res.status(400).json({ error: '需要 proxyId 和 messages' });
1743
+ const { proxyId, conversationId, message, compress, providerId, model } = req.body;
1744
+ if (!proxyId || (!compress && !message)) {
1745
+ return res.status(400).json({ error: '需要 proxyId 和 message' });
1403
1746
  }
1404
1747
 
1405
1748
  const proxy = configStore.getProxyById(proxyId);
1406
1749
  if (!proxy) return res.status(404).json({ error: '代理不存在' });
1407
1750
  if (!resolveTarget(proxy)) return res.status(500).json({ error: '代理目标未配置' });
1408
1751
 
1752
+ // 查找或创建对话
1753
+ const settings = configStore.getSettings();
1754
+ let convId = conversationId;
1755
+ let conv;
1756
+ if (convId) {
1757
+ conv = conversationStore.get(convId);
1758
+ }
1759
+ if (!conv && compress) {
1760
+ return res.status(404).json({ error: '会话不存在,无法压缩' });
1761
+ }
1762
+ if (!conv) {
1763
+ const maxConvs = parseInt(settings.maxConversations) || 0;
1764
+ conv = conversationStore.create(proxyId, maxConvs);
1765
+ convId = conv.id;
1766
+ }
1767
+
1768
+ // 并发锁:同一会话正在 streaming 时拒绝新请求
1769
+ if (activeStreams.has(convId)) {
1770
+ return res.status(429).json({ error: '该会话正在处理中,请稍后再试' });
1771
+ }
1772
+ activeStreams.add(convId);
1773
+ conversationStore.touch(conv);
1774
+
1775
+ // 追加用户消息到对话历史(压缩请求不追加空消息)
1776
+ if (!compress && message) {
1777
+ conv.messages.push({ role: 'user', content: message });
1778
+ conversationStore.touch(conv);
1779
+ }
1780
+
1409
1781
  const proxyUrl = `http://localhost:${proxy.port}/v1/chat/completions`;
1410
1782
  const proxyHeaders = { 'Content-Type': 'application/json' };
1411
1783
  if (proxy.requireAuth && proxy.authToken) {
1412
1784
  proxyHeaders['Authorization'] = `Bearer ${proxy.authToken}`;
1413
1785
  }
1786
+ if (providerId) proxyHeaders['x-pp-provider-id'] = providerId;
1787
+ if (model) proxyHeaders['x-pp-model'] = model;
1788
+ // 若供应商不在代理候选池中,传递完整供应商配置供代理动态构建临时候选
1789
+ if (providerId) {
1790
+ const target = resolveTarget(proxy);
1791
+ const inPool = target?.providerPool?.some(c => c.providerId === providerId);
1792
+ if (!inPool) {
1793
+ const provider = configStore.getProviderById(providerId);
1794
+ if (provider) {
1795
+ proxyHeaders['x-pp-provider-url'] = provider.url;
1796
+ proxyHeaders['x-pp-provider-protocol'] = provider.protocol;
1797
+ const enabledKeys = (provider.apiKeys || []).filter(k => k.enabled !== false).map(k => k.key);
1798
+ if (enabledKeys.length > 0) proxyHeaders['x-pp-provider-keys'] = JSON.stringify(enabledKeys);
1799
+ }
1800
+ }
1801
+ }
1414
1802
 
1415
1803
  // SSE 响应头
1416
1804
  res.setHeader('Content-Type', 'text/event-stream');
1417
1805
  res.setHeader('Cache-Control', 'no-cache');
1418
1806
  res.setHeader('Connection', 'keep-alive');
1419
1807
 
1420
- // 发送 SSE 的辅助函数,忽略写入错误
1421
1808
  function safeSSE(event, data) {
1422
1809
  try { sendSSE(res, event, data); } catch {}
1423
1810
  }
1811
+ const MAX_CONTEXT = Math.max(10000, parseInt(settings.maxContext) || 200000);
1812
+ const MAX_TOOL_ROUNDS = Math.max(1, Math.min(100, parseInt(settings.maxRounds) || 10));
1813
+
1814
+ // 手动压缩请求
1815
+ if (compress) {
1816
+ logger.log(`[assistant] 压缩请求 — ${conv.messages.length} messages`);
1817
+ safeSSE('compressing', {});
1818
+ const result = await compressConversation(conv, MAX_CONTEXT, proxyUrl, proxyHeaders, proxy.defaultModel);
1819
+ if (result) {
1820
+ conv.messages = result.messages;
1821
+ conv.compressionSummary = result.summary;
1822
+ conversationStore.touch(conv);
1823
+ safeSSE('compressed', { summary: result.summary, removedCount: result.removedCount, tokens: result.newTokens, maxTokens: MAX_CONTEXT, messages: conv.messages.length });
1824
+ logger.log(`[assistant] 压缩完成 — 移除 ${result.removedCount} 条`);
1825
+ } else {
1826
+ safeSSE('compressed', { summary: null, removedCount: 0, tokens: estimateConversationTokens(conv.messages), maxTokens: MAX_CONTEXT, messages: conv.messages.length });
1827
+ }
1828
+ safeSSE('done', {});
1829
+ res.end();
1830
+ return;
1831
+ }
1424
1832
 
1425
- const MAX_TOOL_ROUNDS = 10;
1426
- const conversationMessages = [...messages];
1833
+ // 发送 conversationId 给前端
1834
+ safeSSE('conversation', { id: convId });
1427
1835
 
1428
1836
  try {
1429
- for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
1430
- logger.log(`[assistant] round ${round} — messages: ${conversationMessages.length}`);
1431
- for (let i = 0; i < conversationMessages.length; i++) {
1432
- const m = conversationMessages[i];
1433
- logger.log(`[assistant] msg[${i}]: role=${m.role}, hasReasoning=${!!m.reasoning_content}, hasToolCalls=${!!m.tool_calls}, contentLen=${(m.content || '').length}`);
1837
+ // 请求级别缓存 system prompt(避免每轮重建导致 prompt cache 失效)
1838
+ const systemPrompt = buildSystemPrompt();
1839
+ const buildMessages = () => {
1840
+ const msgs = [{ role: 'system', content: systemPrompt }];
1841
+ if (conv.compressionSummary) {
1842
+ msgs.push({ role: 'system', content: `[压缩摘要]\n${conv.compressionSummary}\n\n---\n以上是之前对话的压缩摘要。最近的消息保留原文。请继续对话,不要复述摘要内容。` });
1434
1843
  }
1844
+ msgs.push(...conv.messages);
1845
+ return msgs;
1846
+ };
1847
+
1848
+ let currentTokens = estimateConversationTokens(buildMessages());
1849
+ const sendContext = () => {
1850
+ const pct = Math.round(currentTokens / MAX_CONTEXT * 1000) / 10;
1851
+ safeSSE('context', { tokens: currentTokens, maxTokens: MAX_CONTEXT, percent: pct, messages: conv.messages.length });
1852
+ };
1853
+ sendContext();
1854
+
1855
+ for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
1856
+ const messages = buildMessages();
1857
+ logger.log(`[assistant] round ${round} — ${messages.length} messages, ~${currentTokens} tokens`);
1435
1858
 
1436
- // 调用本地代理
1437
1859
  let fetchRes;
1438
1860
  try {
1439
1861
  fetchRes = await fetch(proxyUrl, {
@@ -1442,7 +1864,7 @@ async function init() {
1442
1864
  signal: AbortSignal.timeout(300000),
1443
1865
  body: JSON.stringify({
1444
1866
  model: proxy.defaultModel || 'gpt-4o',
1445
- messages: conversationMessages,
1867
+ messages,
1446
1868
  stream: true,
1447
1869
  tools: TOOL_DEFINITIONS,
1448
1870
  tool_choice: 'auto',
@@ -1472,37 +1894,24 @@ async function init() {
1472
1894
  while (true) {
1473
1895
  const { done, value } = await reader.read();
1474
1896
  if (done) break;
1475
-
1476
1897
  buffer += decoder.decode(value, { stream: true });
1477
1898
  const lines = buffer.split('\n');
1478
1899
  buffer = lines.pop();
1479
-
1480
1900
  for (const line of lines) {
1481
1901
  const trimmed = line.trim();
1482
1902
  if (!trimmed || !trimmed.startsWith('data: ')) continue;
1483
1903
  const payload = trimmed.slice(6);
1484
1904
  if (payload === '[DONE]') continue;
1485
-
1486
1905
  try {
1487
1906
  const data = JSON.parse(payload);
1488
1907
  const delta = data.choices?.[0]?.delta;
1489
1908
  if (!delta) continue;
1490
-
1491
- if (delta.content) {
1492
- fullContent += delta.content;
1493
- safeSSE('content', { delta: delta.content });
1494
- }
1495
-
1496
- if (delta.reasoning_content) {
1497
- reasoningContent += delta.reasoning_content;
1498
- }
1499
-
1909
+ if (delta.content) { fullContent += delta.content; safeSSE('content', { delta: delta.content }); }
1910
+ if (delta.reasoning_content) reasoningContent += delta.reasoning_content;
1500
1911
  if (delta.tool_calls) {
1501
1912
  for (const tc of delta.tool_calls) {
1502
1913
  const idx = tc.index;
1503
- if (!toolCallAccumulator[idx]) {
1504
- toolCallAccumulator[idx] = { id: '', name: '', arguments: '' };
1505
- }
1914
+ if (!toolCallAccumulator[idx]) toolCallAccumulator[idx] = { id: '', name: '', arguments: '' };
1506
1915
  if (tc.id) toolCallAccumulator[idx].id = tc.id;
1507
1916
  if (tc.function?.name) toolCallAccumulator[idx].name = tc.function.name;
1508
1917
  if (tc.function?.arguments) toolCallAccumulator[idx].arguments += tc.function.arguments;
@@ -1513,9 +1922,17 @@ async function init() {
1513
1922
  }
1514
1923
 
1515
1924
  const toolCalls = Object.values(toolCallAccumulator).filter(tc => tc.id && tc.name);
1516
- logger.log(`[assistant] round ${round} done — content: ${fullContent.length} chars, tool_calls: ${toolCalls.length}`);
1925
+ logger.log(`[assistant] round ${round} done — ${fullContent.length} chars, ${toolCalls.length} tool calls`);
1517
1926
 
1518
1927
  if (toolCalls.length === 0) {
1928
+ // 最终回复,追加到对话历史(跳过空响应避免 null content 污染历史)
1929
+ if (fullContent || reasoningContent) {
1930
+ const assistantMsg = { role: 'assistant', content: fullContent || null };
1931
+ if (reasoningContent) assistantMsg.reasoning_content = reasoningContent;
1932
+ conv.messages.push(assistantMsg);
1933
+ }
1934
+ currentTokens = estimateConversationTokens(buildMessages());
1935
+ sendContext();
1519
1936
  safeSSE('done', { reasoning_content: reasoningContent || undefined });
1520
1937
  break;
1521
1938
  }
@@ -1525,52 +1942,122 @@ async function init() {
1525
1942
  reasoning_content: reasoningContent || undefined,
1526
1943
  calls: toolCalls.map(tc => {
1527
1944
  let args = {};
1528
- try { args = JSON.parse(tc.arguments); } catch {}
1945
+ try { args = JSON.parse(tc.arguments); } catch (e) {
1946
+ logger.log(`[assistant] tool_calls args parse error (${tc.name}): ${e.message}, raw: ${(tc.arguments || '').slice(0, 200)}`);
1947
+ args = { _raw: tc.arguments, _parseError: true };
1948
+ }
1529
1949
  return { id: tc.id, name: tc.name, arguments: args };
1530
1950
  }),
1531
1951
  });
1532
1952
 
1533
- // 追加 assistant 消息到对话历史
1953
+ // 追加 assistant(tool_calls) 到对话历史
1534
1954
  const assistantMsg = {
1535
1955
  role: 'assistant',
1536
1956
  content: fullContent || null,
1537
- tool_calls: toolCalls.map(tc => ({
1538
- id: tc.id,
1539
- type: 'function',
1540
- function: { name: tc.name, arguments: tc.arguments },
1541
- })),
1957
+ tool_calls: toolCalls.map(tc => ({ id: tc.id, type: 'function', function: { name: tc.name, arguments: tc.arguments } })),
1542
1958
  };
1543
1959
  if (reasoningContent) assistantMsg.reasoning_content = reasoningContent;
1544
- conversationMessages.push(assistantMsg);
1960
+ conv.messages.push(assistantMsg);
1545
1961
 
1546
1962
  // 执行工具
1547
1963
  for (const tc of toolCalls) {
1548
1964
  let args = {};
1549
- try { args = JSON.parse(tc.arguments); } catch {}
1550
- logger.log(`[assistant] EXEC tool: ${tc.name}(${JSON.stringify(args)})`);
1965
+ let argsParseError = false;
1966
+ try { args = JSON.parse(tc.arguments); } catch (e) {
1967
+ logger.log(`[assistant] tool args parse error (${tc.name}): ${e.message}`);
1968
+ argsParseError = true;
1969
+ }
1970
+ logger.log(`[assistant] EXEC tool: ${tc.name}`);
1551
1971
  let result;
1552
- try {
1972
+ let isError = false;
1973
+ if (argsParseError) {
1974
+ result = { error: `工具 ${tc.name} 的参数 JSON 解析失败,原始内容: ${(tc.arguments || '').slice(0, 200)}` };
1975
+ isError = true;
1976
+ } else try {
1553
1977
  result = await TOOL_HANDLERS[tc.name]?.(args) || { error: `未知工具: ${tc.name}` };
1978
+ if (result && result.error) isError = true;
1554
1979
  } catch (err) {
1555
1980
  logger.log(`[assistant] tool ${tc.name} error: ${err.message}`);
1556
1981
  result = { error: err.message };
1982
+ isError = true;
1557
1983
  }
1558
-
1984
+ result = truncateOutput(result);
1559
1985
  const resultStr = JSON.stringify(result);
1560
- logger.log(`[assistant] tool ${tc.name} done: ${resultStr.length} chars`);
1561
- safeSSE('tool_result', { tool_call_id: tc.id, name: tc.name, result });
1986
+ logger.log(`[assistant] tool ${tc.name} done: ${resultStr.length} chars${isError ? ' (error)' : ''}`);
1987
+ safeSSE('tool_result', { tool_call_id: tc.id, name: tc.name, result, is_error: isError });
1988
+ conv.messages.push({ role: 'tool', tool_call_id: tc.id, content: isError ? `[ERROR] ${resultStr}` : resultStr });
1989
+ }
1562
1990
 
1563
- conversationMessages.push({
1564
- role: 'tool',
1565
- tool_call_id: tc.id,
1566
- content: resultStr,
1567
- });
1991
+ // token 检查 + 压缩
1992
+ currentTokens = estimateConversationTokens(buildMessages());
1993
+ sendContext();
1994
+ if (currentTokens >= MAX_CONTEXT * 0.8) {
1995
+ logger.log(`[assistant] 上下文 ${Math.round(currentTokens / MAX_CONTEXT * 100)}%,自动压缩`);
1996
+ safeSSE('compressing', {});
1997
+ const compResult = await compressConversation(conv, MAX_CONTEXT, proxyUrl, proxyHeaders, proxy.defaultModel);
1998
+ if (compResult) {
1999
+ conv.messages = compResult.messages;
2000
+ conv.compressionSummary = compResult.summary;
2001
+ conversationStore.touch(conv);
2002
+ currentTokens = compResult.newTokens;
2003
+ safeSSE('compressed', { summary: compResult.summary, removedCount: compResult.removedCount, tokens: currentTokens, maxTokens: MAX_CONTEXT, messages: conv.messages.length });
2004
+ sendContext();
2005
+ logger.log(`[assistant] 压缩完成 — 移除 ${compResult.removedCount} 条`);
2006
+ }
1568
2007
  }
1569
- // 继续下一轮
1570
2008
  }
1571
2009
 
1572
- // 循环正常结束(达到最大轮次)
1573
- safeSSE('done', {});
2010
+ // 达到最大轮次 → 总结回复
2011
+ logger.log(`[assistant] max rounds reached, requesting summary`);
2012
+ try {
2013
+ const summaryRes = await fetch(proxyUrl, {
2014
+ method: 'POST',
2015
+ headers: proxyHeaders,
2016
+ signal: AbortSignal.timeout(120000),
2017
+ body: JSON.stringify({
2018
+ model: proxy.defaultModel || 'gpt-4o',
2019
+ messages: [
2020
+ ...buildMessages(),
2021
+ { role: 'system', content: '你已达到最大工具调用轮次限制(' + MAX_TOOL_ROUNDS + ' 轮),无法继续调用工具。请基于已获取的信息给出回复,并明确告知用户:由于达到工具调用轮次上限,信息获取可能不完整或操作被迫中断。如果还有未完成的工作,请说明并建议用户重新提问以继续。' },
2022
+ ],
2023
+ stream: true,
2024
+ }),
2025
+ });
2026
+ if (summaryRes.ok) {
2027
+ const sr = summaryRes.body.getReader();
2028
+ const sd = new TextDecoder();
2029
+ let sb = '';
2030
+ let summaryContent = '';
2031
+ let summaryReasoning = '';
2032
+ while (true) {
2033
+ const { done: finished, value: v } = await sr.read();
2034
+ if (finished) break;
2035
+ sb += sd.decode(v, { stream: true });
2036
+ const lines = sb.split('\n');
2037
+ sb = lines.pop();
2038
+ for (const line of lines) {
2039
+ const t = line.trim();
2040
+ if (!t || !t.startsWith('data: ') || t === 'data: [DONE]') continue;
2041
+ try {
2042
+ const chunk = JSON.parse(t.slice(6));
2043
+ const delta = chunk.choices?.[0]?.delta;
2044
+ if (!delta) continue;
2045
+ if (delta.content) { summaryContent += delta.content; safeSSE('content', { delta: delta.content }); }
2046
+ if (delta.reasoning_content) summaryReasoning += delta.reasoning_content;
2047
+ } catch {}
2048
+ }
2049
+ }
2050
+ // 追加总结到对话历史
2051
+ const summaryMsg = { role: 'assistant', content: summaryContent || null };
2052
+ if (summaryReasoning) summaryMsg.reasoning_content = summaryReasoning;
2053
+ conv.messages.push(summaryMsg);
2054
+ safeSSE('done', { reasoning_content: summaryReasoning || undefined });
2055
+ } else {
2056
+ safeSSE('done', {});
2057
+ }
2058
+ } catch {
2059
+ safeSSE('done', {});
2060
+ }
1574
2061
  } catch (err) {
1575
2062
  logger.log(`[assistant] error: ${err.message}`);
1576
2063
  if (!res.headersSent) {
@@ -1579,6 +2066,8 @@ async function init() {
1579
2066
  safeSSE('error', { message: err.message });
1580
2067
  }
1581
2068
  } finally {
2069
+ activeStreams.delete(convId);
2070
+ conversationStore.touch(conv); // 保存最终对话状态
1582
2071
  res.end();
1583
2072
  }
1584
2073
  });