@templmf/temp-solf-lmf 0.0.43 → 0.0.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai-gateway/.env +42 -0
- package/ai-gateway/README.md +295 -0
- package/ai-gateway/package-lock.json +1370 -0
- package/ai-gateway/package.json +18 -0
- package/ai-gateway/src/index.js +132 -0
- package/ai-gateway/src/middleware/auth.js +45 -0
- package/ai-gateway/src/middleware/rateLimit.js +87 -0
- package/ai-gateway/src/routes/chat.js +657 -0
- package/ai-gateway/src/skills/detector.js +145 -0
- package/ai-gateway/src/skills/html.md +18 -0
- package/ai-gateway/src/skills/markdown.md +18 -0
- package/ai-gateway/src/skills/react.md +27 -0
- package/ai-gateway/src/skills/registry.js +441 -0
- package/ai-gateway/src/skills/skill-creator/LICENSE.txt +202 -0
- package/ai-gateway/src/skills/skill-creator/SKILL.md +485 -0
- package/ai-gateway/src/skills/skill-creator/agents/analyzer.md +274 -0
- package/ai-gateway/src/skills/skill-creator/agents/comparator.md +202 -0
- package/ai-gateway/src/skills/skill-creator/agents/grader.md +223 -0
- package/ai-gateway/src/skills/skill-creator/assets/eval_review.html +146 -0
- package/ai-gateway/src/skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/ai-gateway/src/skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/ai-gateway/src/skills/skill-creator/references/schemas.md +430 -0
- package/ai-gateway/src/skills/skill-creator/scripts/__init__.py +0 -0
- package/ai-gateway/src/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/ai-gateway/src/skills/skill-creator/scripts/generate_report.py +326 -0
- package/ai-gateway/src/skills/skill-creator/scripts/improve_description.py +247 -0
- package/ai-gateway/src/skills/skill-creator/scripts/package_skill.py +136 -0
- package/ai-gateway/src/skills/skill-creator/scripts/quick_validate.py +103 -0
- package/ai-gateway/src/skills/skill-creator/scripts/run_eval.py +310 -0
- package/ai-gateway/src/skills/skill-creator/scripts/run_loop.py +328 -0
- package/ai-gateway/src/skills/skill-creator/scripts/utils.py +47 -0
- package/ai-gateway/src/skills/skill-creator/skill-creator.skill +0 -0
- package/ai-gateway/src/skills/ticket.md +36 -0
- package/ai-gateway/src/skills/vue.md +31 -0
- package/ai-gateway/src/utils/logger.js +21 -0
- package/ai-gateway/src/utils/retry.js +90 -0
- package/ai-gateway/src/utils/sessionManager.js +159 -0
- package/ai-gateway/src/utils/structuredResponse.js +144 -0
- package/ai-gateway/src/utils/toolAdapter.js +151 -0
- package/package.json +1 -1
- package//345/216/213/347/274/251/345/220/216/347/232/204/346/226/207/344/273/266.7z +0 -0
- package/skill-mcp/README.md +0 -74
- package/skill-mcp/index.ts +0 -336
- package/skill-mcp/package (1).json +0 -19
- package/skill-mcp/tsconfig.json +0 -16
- package//347/247/273/345/212/250/345/272/224/347/224/250/345/217/260/350/264/246/347/247/273/344/272/244/346/270/205/345/215/225.xlsx +0 -0
|
@@ -0,0 +1,657 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* POST /v1/chat/completions
|
|
3
|
+
*
|
|
4
|
+
* 完全兼容 OpenAI Chat Completions 格式的核心接口。
|
|
5
|
+
* 集成:Skill 自动加载 / 结构化响应 / Tool Use 适配 /
|
|
6
|
+
* 会话上下文管理 / 重试 / Streaming SSE
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { Router } from "express";
|
|
10
|
+
import OpenAI from "openai";
|
|
11
|
+
import { v4 as uuidv4 } from "uuid";
|
|
12
|
+
|
|
13
|
+
import {
|
|
14
|
+
detectSkills,
|
|
15
|
+
detectSkillsByKeyword,
|
|
16
|
+
detectSkillsWithAI,
|
|
17
|
+
buildSkillPrompt
|
|
18
|
+
} from "../skills/registry.js";
|
|
19
|
+
import {
|
|
20
|
+
getModelCapabilities,
|
|
21
|
+
adaptTools,
|
|
22
|
+
stripThinkingBlock,
|
|
23
|
+
parseToolFallbackResponse
|
|
24
|
+
} from "../utils/toolAdapter.js";
|
|
25
|
+
import {
|
|
26
|
+
buildResponseFormatPrompt,
|
|
27
|
+
parseStructuredResponse,
|
|
28
|
+
wrapStructuredChoice
|
|
29
|
+
} from "../utils/structuredResponse.js";
|
|
30
|
+
import {
|
|
31
|
+
createSession,
|
|
32
|
+
getSession,
|
|
33
|
+
getSessionMessages,
|
|
34
|
+
appendMessages,
|
|
35
|
+
updateSessionMetadata,
|
|
36
|
+
compressIfNeeded
|
|
37
|
+
} from "../utils/sessionManager.js";
|
|
38
|
+
import {
|
|
39
|
+
withRetry,
|
|
40
|
+
buildJsonCorrectionMessages
|
|
41
|
+
} from "../utils/retry.js";
|
|
42
|
+
import { logger } from "../utils/logger.js";
|
|
43
|
+
|
|
44
|
+
export const chatRouter = Router();
|
|
45
|
+
|
|
46
|
+
// ─────────────────────────────────────────────────────
|
|
47
|
+
// 上游模型客户端(内网 Qwen 服务)
|
|
48
|
+
// ─────────────────────────────────────────────────────
|
|
49
|
+
const upstreamClient = new OpenAI({
|
|
50
|
+
apiKey: process.env.UPSTREAM_API_KEY || "none",
|
|
51
|
+
baseURL: process.env.UPSTREAM_BASE_URL || "http://localhost:8000/v1",
|
|
52
|
+
timeout: parseInt(process.env.UPSTREAM_TIMEOUT_MS || "120000"),
|
|
53
|
+
maxRetries: 0
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// ─────────────────────────────────────────────────────
|
|
57
|
+
// POST /v1/chat/completions
|
|
58
|
+
// ─────────────────────────────────────────────────────
|
|
59
|
+
chatRouter.post("/", async (req, res) => {
|
|
60
|
+
const requestId = uuidv4();
|
|
61
|
+
const startTime = Date.now();
|
|
62
|
+
|
|
63
|
+
// ── 1. 解析请求体 ─────────────────────────────────
|
|
64
|
+
const {
|
|
65
|
+
model,
|
|
66
|
+
messages: clientMessages,
|
|
67
|
+
system,
|
|
68
|
+
tools,
|
|
69
|
+
tool_choice,
|
|
70
|
+
stream = false,
|
|
71
|
+
max_tokens,
|
|
72
|
+
temperature,
|
|
73
|
+
top_p,
|
|
74
|
+
session_id,
|
|
75
|
+
response_format,
|
|
76
|
+
x_skill_override,
|
|
77
|
+
x_keep_thinking = false
|
|
78
|
+
} = req.body;
|
|
79
|
+
|
|
80
|
+
if (!clientMessages || !Array.isArray(clientMessages) || clientMessages.length === 0) {
|
|
81
|
+
return res.status(400).json({
|
|
82
|
+
error: {
|
|
83
|
+
message: "messages is required and must be a non-empty array",
|
|
84
|
+
type: "invalid_request_error"
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const resolvedModel = model || process.env.DEFAULT_MODEL || "qwen2.5-72b-instruct";
|
|
90
|
+
const caps = getModelCapabilities(resolvedModel);
|
|
91
|
+
|
|
92
|
+
// ── 2. 会话管理 ───────────────────────────────────
|
|
93
|
+
let resolvedSessionId = session_id;
|
|
94
|
+
let historyMessages = [];
|
|
95
|
+
|
|
96
|
+
if (resolvedSessionId) {
|
|
97
|
+
const session = getSession(resolvedSessionId);
|
|
98
|
+
if (session) {
|
|
99
|
+
historyMessages = getSessionMessages(resolvedSessionId);
|
|
100
|
+
} else {
|
|
101
|
+
resolvedSessionId = createSession(req.clientId);
|
|
102
|
+
logger.info("Session expired, created new", { requestId, newSessionId: resolvedSessionId });
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
resolvedSessionId = createSession(req.clientId);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const allMessages = [...historyMessages, ...clientMessages];
|
|
109
|
+
const { messages: finalMessages, compressed } = compressIfNeeded(
|
|
110
|
+
allMessages,
|
|
111
|
+
parseInt(process.env.MAX_CONTEXT_TOKENS || "6000")
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
// ── 3. Skill 检测 ─────────────────────────────────
|
|
115
|
+
// 统一使用 skillDetectMethod 这一个变量名贯穿全文
|
|
116
|
+
let skillNames = [];
|
|
117
|
+
let skillDetectMethod = "none";
|
|
118
|
+
|
|
119
|
+
if (x_skill_override) {
|
|
120
|
+
skillNames = Array.isArray(x_skill_override) ? x_skill_override : [x_skill_override];
|
|
121
|
+
skillDetectMethod = "override";
|
|
122
|
+
} else if (process.env.USE_AI_SKILL_DETECTION === "true") {
|
|
123
|
+
// AI 语义检测
|
|
124
|
+
const aiResult = await detectSkillsWithAI(finalMessages, upstreamClient, resolvedModel);
|
|
125
|
+
skillNames = aiResult.names;
|
|
126
|
+
skillDetectMethod = skillNames.length > 0 ? "ai" : "none";
|
|
127
|
+
} else {
|
|
128
|
+
// 关键词匹配(默认,支持组合规则)
|
|
129
|
+
skillNames = detectSkills(finalMessages);
|
|
130
|
+
skillDetectMethod = skillNames.length > 0 ? "keyword" : "none";
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const skillPrompt = buildSkillPrompt(skillNames);
|
|
134
|
+
|
|
135
|
+
// ── 4. 结构化响应格式 prompt ──────────────────────
|
|
136
|
+
const formatPrompt = buildResponseFormatPrompt(response_format);
|
|
137
|
+
|
|
138
|
+
// ── 5. 组装 system prompt ─────────────────────────
|
|
139
|
+
const baseSystem = system || "你是一个智能助手,擅长理解用户需求并提供精准、实用的回答。";
|
|
140
|
+
const finalSystem = `${baseSystem}${skillPrompt}${formatPrompt}`;
|
|
141
|
+
|
|
142
|
+
// ── 6. Tool Use 适配 ──────────────────────────────
|
|
143
|
+
const { tools: adaptedTools, toolFallbackPrompt } = adaptTools(tools, resolvedModel);
|
|
144
|
+
const systemWithTools = toolFallbackPrompt
|
|
145
|
+
? finalSystem + toolFallbackPrompt
|
|
146
|
+
: finalSystem;
|
|
147
|
+
|
|
148
|
+
// ── 7. 上游请求参数 ───────────────────────────────
|
|
149
|
+
const upstreamPayload = {
|
|
150
|
+
model: resolvedModel,
|
|
151
|
+
messages: [
|
|
152
|
+
{ role: "system", content: systemWithTools },
|
|
153
|
+
...finalMessages
|
|
154
|
+
],
|
|
155
|
+
stream,
|
|
156
|
+
...(adaptedTools?.length && { tools: adaptedTools }),
|
|
157
|
+
...(tool_choice && adaptedTools?.length && { tool_choice }),
|
|
158
|
+
...(max_tokens && { max_tokens }),
|
|
159
|
+
...(temperature !== undefined && { temperature }),
|
|
160
|
+
...(top_p !== undefined && { top_p }),
|
|
161
|
+
...(caps.requiresThinkingBlock && {
|
|
162
|
+
extra_body: { enable_thinking: true }
|
|
163
|
+
})
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
logger.info("Chat request", {
|
|
167
|
+
requestId,
|
|
168
|
+
clientId: req.clientId,
|
|
169
|
+
sessionId: resolvedSessionId,
|
|
170
|
+
model: resolvedModel,
|
|
171
|
+
historyCount: historyMessages.length,
|
|
172
|
+
skillsDetected: skillNames,
|
|
173
|
+
skillDetectMethod,
|
|
174
|
+
stream,
|
|
175
|
+
responseFormat: response_format?.type || response_format || null,
|
|
176
|
+
contextCompressed: compressed
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// ── 8. 路由 ───────────────────────────────────────
|
|
180
|
+
if (stream) {
|
|
181
|
+
return handleStream(res, upstreamClient, upstreamPayload, {
|
|
182
|
+
requestId, resolvedModel, resolvedSessionId,
|
|
183
|
+
skillNames, skillDetectMethod, clientMessages, x_keep_thinking, caps
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return handleNonStream(res, upstreamClient, upstreamPayload, {
|
|
188
|
+
requestId, resolvedModel, resolvedSessionId,
|
|
189
|
+
skillNames, skillDetectMethod, clientMessages, finalMessages,
|
|
190
|
+
response_format, x_keep_thinking, caps, startTime
|
|
191
|
+
});
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
// ─────────────────────────────────────────────────────
|
|
195
|
+
// 非流式响应处理
|
|
196
|
+
// ─────────────────────────────────────────────────────
|
|
197
|
+
async function handleNonStream(res, client, payload, ctx) {
|
|
198
|
+
const {
|
|
199
|
+
requestId, resolvedModel, resolvedSessionId,
|
|
200
|
+
skillNames, skillDetectMethod, clientMessages, finalMessages,
|
|
201
|
+
response_format, x_keep_thinking, caps, startTime
|
|
202
|
+
} = ctx;
|
|
203
|
+
|
|
204
|
+
const responseFormatType = response_format?.type || response_format;
|
|
205
|
+
const isJsonFormat = ["json_object", "json_schema", "ticket_form"].includes(responseFormatType);
|
|
206
|
+
|
|
207
|
+
try {
|
|
208
|
+
let completion = await withRetry(
|
|
209
|
+
() => client.chat.completions.create(payload),
|
|
210
|
+
{ requestId, label: "upstream_call", maxRetries: 2 }
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
// 过滤 thinking block
|
|
214
|
+
let processedChoices = completion.choices.map(choice => {
|
|
215
|
+
if (choice.message?.content) {
|
|
216
|
+
choice.message.content = stripThinkingBlock(
|
|
217
|
+
choice.message.content, x_keep_thinking
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
return choice;
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
// Tool fallback 解析
|
|
224
|
+
if (!payload.tools && systemWithToolsCheck(payload)) {
|
|
225
|
+
processedChoices = processedChoices.map(choice => {
|
|
226
|
+
const fallback = parseToolFallbackResponse(choice.message?.content || "");
|
|
227
|
+
return fallback ? { ...choice, message: fallback } : choice;
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// 结构化输出解析,失败时追加纠正重试
|
|
232
|
+
if (response_format && isJsonFormat) {
|
|
233
|
+
const firstParseResult = parseStructuredResponse(
|
|
234
|
+
processedChoices[0]?.message?.content || "",
|
|
235
|
+
response_format
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
if (!firstParseResult.success) {
|
|
239
|
+
logger.warn("Structured parse failed, retrying with correction", { requestId });
|
|
240
|
+
|
|
241
|
+
const correctedMessages = buildJsonCorrectionMessages(
|
|
242
|
+
finalMessages,
|
|
243
|
+
processedChoices[0]?.message?.content || "",
|
|
244
|
+
response_format.schema
|
|
245
|
+
);
|
|
246
|
+
|
|
247
|
+
const correctionPayload = {
|
|
248
|
+
...payload,
|
|
249
|
+
messages: [payload.messages[0], ...correctedMessages]
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
const corrected = await withRetry(
|
|
253
|
+
() => client.chat.completions.create(correctionPayload),
|
|
254
|
+
{ requestId, label: "json_correction", maxRetries: 1 }
|
|
255
|
+
);
|
|
256
|
+
|
|
257
|
+
completion = corrected;
|
|
258
|
+
processedChoices = corrected.choices.map(choice => {
|
|
259
|
+
if (choice.message?.content) {
|
|
260
|
+
choice.message.content = stripThinkingBlock(
|
|
261
|
+
choice.message.content, x_keep_thinking
|
|
262
|
+
);
|
|
263
|
+
}
|
|
264
|
+
return choice;
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
processedChoices = processedChoices.map(choice => {
|
|
269
|
+
const result = parseStructuredResponse(
|
|
270
|
+
choice.message?.content || "", response_format
|
|
271
|
+
);
|
|
272
|
+
return wrapStructuredChoice(choice, result, response_format);
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
if (responseFormatType === "ticket_form") {
|
|
276
|
+
const parsed = processedChoices[0]?.message?.parsed;
|
|
277
|
+
if (parsed?.form_draft) {
|
|
278
|
+
updateSessionMetadata(resolvedSessionId, { ticket_draft: parsed.form_draft });
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// 保存到 session
|
|
284
|
+
appendMessages(resolvedSessionId, [
|
|
285
|
+
...clientMessages,
|
|
286
|
+
{
|
|
287
|
+
role: "assistant",
|
|
288
|
+
content: processedChoices[0]?.message?.content || ""
|
|
289
|
+
}
|
|
290
|
+
]);
|
|
291
|
+
|
|
292
|
+
const matchedMcps = extractMcpMatches(processedChoices[0]?.message?.tool_calls);
|
|
293
|
+
|
|
294
|
+
const response = {
|
|
295
|
+
...completion,
|
|
296
|
+
choices: processedChoices,
|
|
297
|
+
_gateway: {
|
|
298
|
+
request_id: requestId,
|
|
299
|
+
session_id: resolvedSessionId,
|
|
300
|
+
matched: buildMatchedField(skillNames, skillDetectMethod, matchedMcps),
|
|
301
|
+
skills_loaded: skillNames,
|
|
302
|
+
model_caps: {
|
|
303
|
+
toolSupport: caps.toolSupport,
|
|
304
|
+
requiresThinkingBlock: caps.requiresThinkingBlock
|
|
305
|
+
},
|
|
306
|
+
latency_ms: Date.now() - startTime
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
logger.info("Chat completed", {
|
|
311
|
+
requestId,
|
|
312
|
+
sessionId: resolvedSessionId,
|
|
313
|
+
model: resolvedModel,
|
|
314
|
+
skillsLoaded: skillNames,
|
|
315
|
+
latencyMs: Date.now() - startTime,
|
|
316
|
+
promptTokens: completion.usage?.prompt_tokens,
|
|
317
|
+
completionTokens: completion.usage?.completion_tokens
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
res.json(response);
|
|
321
|
+
|
|
322
|
+
} catch (err) {
|
|
323
|
+
handleUpstreamError(err, res, requestId);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// ─────────────────────────────────────────────────────
|
|
328
|
+
// 流式响应处理(SSE)
|
|
329
|
+
// ─────────────────────────────────────────────────────
|
|
330
|
+
async function handleStream(res, client, payload, ctx) {
|
|
331
|
+
const {
|
|
332
|
+
requestId, resolvedModel, resolvedSessionId,
|
|
333
|
+
skillNames, skillDetectMethod, clientMessages, x_keep_thinking
|
|
334
|
+
} = ctx;
|
|
335
|
+
|
|
336
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
337
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
338
|
+
res.setHeader("Connection", "keep-alive");
|
|
339
|
+
res.setHeader("X-Gateway-Request-Id", requestId);
|
|
340
|
+
res.setHeader("X-Gateway-Session-Id", resolvedSessionId);
|
|
341
|
+
res.setHeader("X-Gateway-Skills", skillNames.join(","));
|
|
342
|
+
res.setHeader("X-Gateway-Skill-Detect", skillDetectMethod);
|
|
343
|
+
res.flushHeaders();
|
|
344
|
+
|
|
345
|
+
// 首个事件:推送 session 信息
|
|
346
|
+
res.write(`data: ${JSON.stringify({
|
|
347
|
+
id: requestId,
|
|
348
|
+
object: "chat.completion.chunk",
|
|
349
|
+
type: "session",
|
|
350
|
+
session_id: resolvedSessionId,
|
|
351
|
+
matched: buildMatchedField(skillNames, skillDetectMethod, []),
|
|
352
|
+
skills_loaded: skillNames
|
|
353
|
+
})}\n\n`);
|
|
354
|
+
|
|
355
|
+
let fullContent = "";
|
|
356
|
+
let insideThinkBlock = false;
|
|
357
|
+
const toolCallsAcc = {};
|
|
358
|
+
|
|
359
|
+
try {
|
|
360
|
+
const streamResponse = await withRetry(
|
|
361
|
+
() => client.chat.completions.create(payload),
|
|
362
|
+
{ requestId, label: "stream_request", maxRetries: 1 }
|
|
363
|
+
);
|
|
364
|
+
|
|
365
|
+
for await (const chunk of streamResponse) {
|
|
366
|
+
const delta = chunk.choices?.[0]?.delta || {};
|
|
367
|
+
|
|
368
|
+
// 累积 tool_calls 分片
|
|
369
|
+
if (delta.tool_calls) {
|
|
370
|
+
for (const tc of delta.tool_calls) {
|
|
371
|
+
const idx = tc.index ?? 0;
|
|
372
|
+
if (!toolCallsAcc[idx]) {
|
|
373
|
+
toolCallsAcc[idx] = {
|
|
374
|
+
id: tc.id || "",
|
|
375
|
+
type: "function",
|
|
376
|
+
function: { name: "", arguments: "" }
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
if (tc.id) toolCallsAcc[idx].id = tc.id;
|
|
380
|
+
if (tc.function?.name) toolCallsAcc[idx].function.name += tc.function.name;
|
|
381
|
+
if (tc.function?.arguments) toolCallsAcc[idx].function.arguments += tc.function.arguments;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// 文本 delta 过滤与透传
|
|
386
|
+
const textDelta = delta.content || "";
|
|
387
|
+
if (textDelta && !x_keep_thinking) {
|
|
388
|
+
if (textDelta.includes("<think>")) insideThinkBlock = true;
|
|
389
|
+
if (insideThinkBlock) {
|
|
390
|
+
if (textDelta.includes("</think>")) insideThinkBlock = false;
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
fullContent += textDelta;
|
|
396
|
+
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
const cleanContent = stripThinkingBlock(fullContent, x_keep_thinking);
|
|
400
|
+
const toolCallsList = Object.values(toolCallsAcc).filter(tc => tc.function?.name);
|
|
401
|
+
|
|
402
|
+
// tool_calls 由客户端负责执行并管理消息历史。
|
|
403
|
+
// 网关 session 只存纯文本回复,不存 tool_calls,
|
|
404
|
+
// 避免下一轮 session 历史里出现没有对应 tool 回复的悬空 tool_calls。
|
|
405
|
+
if (toolCallsList.length === 0) {
|
|
406
|
+
appendMessages(resolvedSessionId, [
|
|
407
|
+
...clientMessages,
|
|
408
|
+
{ role: "assistant", content: cleanContent || "" }
|
|
409
|
+
]);
|
|
410
|
+
}
|
|
411
|
+
// 有 tool_calls 时不写 session,客户端自行维护完整的工具调用轮次
|
|
412
|
+
|
|
413
|
+
res.write("data: [DONE]\n\n");
|
|
414
|
+
res.end();
|
|
415
|
+
|
|
416
|
+
logger.info("Stream completed", {
|
|
417
|
+
requestId,
|
|
418
|
+
sessionId: resolvedSessionId,
|
|
419
|
+
model: resolvedModel,
|
|
420
|
+
skillsLoaded: skillNames,
|
|
421
|
+
toolCallCount: toolCallsList.length
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
} catch (err) {
|
|
425
|
+
handleUpstreamStreamError(err, res, requestId);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// ─────────────────────────────────────────────────────
|
|
430
|
+
// 辅助:判断是否启用了 tool fallback prompt
|
|
431
|
+
// ─────────────────────────────────────────────────────
|
|
432
|
+
function systemWithToolsCheck(payload) {
|
|
433
|
+
const sys = payload.messages?.[0]?.content || "";
|
|
434
|
+
return sys.includes("tool_call") && sys.includes("工具名");
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// ─────────────────────────────────────────────────────
|
|
438
|
+
// 上游错误翻译
|
|
439
|
+
// 把上游原始错误转成对用户友好的结构化信息
|
|
440
|
+
// ─────────────────────────────────────────────────────
|
|
441
|
+
function translateUpstreamError(err) {
|
|
442
|
+
const raw = err.message || "";
|
|
443
|
+
const status = err.status || err.statusCode || 500;
|
|
444
|
+
const code = err.code || null;
|
|
445
|
+
|
|
446
|
+
// ── 配额 / 免费层耗尽 ────────────────────────────
|
|
447
|
+
if (
|
|
448
|
+
status === 403 ||
|
|
449
|
+
raw.includes("free tier") ||
|
|
450
|
+
raw.includes("has been exhausted") ||
|
|
451
|
+
raw.includes("quota") ||
|
|
452
|
+
raw.includes("insufficient_quota") ||
|
|
453
|
+
code === "insufficient_quota"
|
|
454
|
+
) {
|
|
455
|
+
return {
|
|
456
|
+
status: 403,
|
|
457
|
+
message: "上游模型配额已耗尽(免费额度用完)。请在模型管理控制台关闭「仅免费层」模式,或充值后重试。",
|
|
458
|
+
type: "quota_exceeded",
|
|
459
|
+
code: "upstream_quota_exhausted",
|
|
460
|
+
hint: raw // 保留原始信息供排查
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// ── 认证失败 ─────────────────────────────────────
|
|
465
|
+
if (
|
|
466
|
+
status === 401 ||
|
|
467
|
+
raw.includes("Incorrect API key") ||
|
|
468
|
+
raw.includes("invalid_api_key") ||
|
|
469
|
+
code === "invalid_api_key"
|
|
470
|
+
) {
|
|
471
|
+
return {
|
|
472
|
+
status: 401,
|
|
473
|
+
message: "上游 API Key 无效或已过期,请检查 UPSTREAM_API_KEY 配置。",
|
|
474
|
+
type: "auth_error",
|
|
475
|
+
code: "upstream_auth_failed",
|
|
476
|
+
hint: raw
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// ── 模型不存在 ────────────────────────────────────
|
|
481
|
+
if (
|
|
482
|
+
status === 404 ||
|
|
483
|
+
raw.includes("model") && raw.includes("not found") ||
|
|
484
|
+
raw.includes("does not exist") ||
|
|
485
|
+
code === "model_not_found"
|
|
486
|
+
) {
|
|
487
|
+
return {
|
|
488
|
+
status: 404,
|
|
489
|
+
message: `上游模型不存在或未部署,请检查 DEFAULT_MODEL / model 参数配置。原始信息:${raw}`,
|
|
490
|
+
type: "model_not_found",
|
|
491
|
+
code: "upstream_model_not_found",
|
|
492
|
+
hint: raw
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// ── 限流 ─────────────────────────────────────────
|
|
497
|
+
if (
|
|
498
|
+
status === 429 ||
|
|
499
|
+
raw.includes("rate limit") ||
|
|
500
|
+
raw.includes("too many requests") ||
|
|
501
|
+
code === "rate_limit_exceeded"
|
|
502
|
+
) {
|
|
503
|
+
return {
|
|
504
|
+
status: 429,
|
|
505
|
+
message: "上游服务限流,请求过于频繁,请稍后重试。",
|
|
506
|
+
type: "rate_limit_error",
|
|
507
|
+
code: "upstream_rate_limit",
|
|
508
|
+
hint: raw
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// ── 上游超时 ─────────────────────────────────────
|
|
513
|
+
if (
|
|
514
|
+
status === 408 || status === 504 ||
|
|
515
|
+
raw.includes("timeout") ||
|
|
516
|
+
raw.includes("timed out") ||
|
|
517
|
+
err.code === "ECONNABORTED"
|
|
518
|
+
) {
|
|
519
|
+
return {
|
|
520
|
+
status: 504,
|
|
521
|
+
message: "上游模型响应超时,请稍后重试或减少输入长度。",
|
|
522
|
+
type: "timeout_error",
|
|
523
|
+
code: "upstream_timeout",
|
|
524
|
+
hint: raw
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// ── 上游服务不可达 ────────────────────────────────
|
|
529
|
+
if (
|
|
530
|
+
status >= 500 && status < 600 ||
|
|
531
|
+
err.code === "ECONNREFUSED" ||
|
|
532
|
+
err.code === "ENOTFOUND"
|
|
533
|
+
) {
|
|
534
|
+
return {
|
|
535
|
+
status: 502,
|
|
536
|
+
message: `上游服务暂时不可用(${status}),请检查 UPSTREAM_BASE_URL 或稍后重试。`,
|
|
537
|
+
type: "upstream_error",
|
|
538
|
+
code: "upstream_unavailable",
|
|
539
|
+
hint: raw
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// ── 其他未知错误(原样返回,但保证格式统一)─────
|
|
544
|
+
return {
|
|
545
|
+
status: status < 100 ? 500 : status,
|
|
546
|
+
message: raw || "未知错误,请稍后重试。",
|
|
547
|
+
type: err.type || "api_error",
|
|
548
|
+
code: code,
|
|
549
|
+
hint: null
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// ─────────────────────────────────────────────────────
|
|
554
|
+
// 上游错误统一处理(非流式)
|
|
555
|
+
// ─────────────────────────────────────────────────────
|
|
556
|
+
function handleUpstreamError(err, res, requestId) {
|
|
557
|
+
const translated = translateUpstreamError(err);
|
|
558
|
+
|
|
559
|
+
logger.error("Upstream error", {
|
|
560
|
+
requestId,
|
|
561
|
+
status: translated.status,
|
|
562
|
+
type: translated.type,
|
|
563
|
+
code: translated.code,
|
|
564
|
+
rawError: translated.hint || err.message
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
res.status(translated.status).json({
|
|
568
|
+
error: {
|
|
569
|
+
message: translated.message,
|
|
570
|
+
type: translated.type,
|
|
571
|
+
code: translated.code,
|
|
572
|
+
request_id: requestId,
|
|
573
|
+
...(translated.hint && process.env.NODE_ENV !== "production"
|
|
574
|
+
? { upstream_detail: translated.hint }
|
|
575
|
+
: {})
|
|
576
|
+
}
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// ─────────────────────────────────────────────────────
|
|
581
|
+
// 上游错误统一处理(流式 SSE)
|
|
582
|
+
// SSE 已经发了 headers,只能通过 data 帧传递错误
|
|
583
|
+
// ─────────────────────────────────────────────────────
|
|
584
|
+
function handleUpstreamStreamError(err, res, requestId) {
|
|
585
|
+
const translated = translateUpstreamError(err);
|
|
586
|
+
|
|
587
|
+
logger.error("Stream upstream error", {
|
|
588
|
+
requestId,
|
|
589
|
+
status: translated.status,
|
|
590
|
+
type: translated.type,
|
|
591
|
+
code: translated.code,
|
|
592
|
+
rawError: translated.hint || err.message
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
res.write(`data: ${JSON.stringify({
|
|
596
|
+
error: {
|
|
597
|
+
message: translated.message,
|
|
598
|
+
type: translated.type,
|
|
599
|
+
code: translated.code,
|
|
600
|
+
request_id: requestId,
|
|
601
|
+
...(translated.hint && process.env.NODE_ENV !== "production"
|
|
602
|
+
? { upstream_detail: translated.hint }
|
|
603
|
+
: {})
|
|
604
|
+
}
|
|
605
|
+
})}\n\n`);
|
|
606
|
+
res.end();
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// ─────────────────────────────────────────────────────
|
|
610
|
+
// 从 tool_calls 里提取命中的 MCP server 信息
|
|
611
|
+
// mcp__<server-name>__<tool-name> 格式
|
|
612
|
+
// ─────────────────────────────────────────────────────
|
|
613
|
+
function extractMcpMatches(toolCalls) {
|
|
614
|
+
if (!toolCalls || toolCalls.length === 0) return [];
|
|
615
|
+
|
|
616
|
+
const mcpMap = {};
|
|
617
|
+
for (const tc of toolCalls) {
|
|
618
|
+
const name = tc?.function?.name || tc?.name || "";
|
|
619
|
+
const match = name.match(/^mcp__([^_]+(?:_[^_]+)*)__(.+)$/);
|
|
620
|
+
if (match) {
|
|
621
|
+
const serverName = match[1];
|
|
622
|
+
const toolName = match[2];
|
|
623
|
+
if (!mcpMap[serverName]) {
|
|
624
|
+
mcpMap[serverName] = { server: serverName, tools_called: [] };
|
|
625
|
+
}
|
|
626
|
+
mcpMap[serverName].tools_called.push(toolName);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return Object.values(mcpMap);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// ─────────────────────────────────────────────────────
|
|
634
|
+
// 构建 matched 字段
|
|
635
|
+
// 只在有命中时填充,未命中时返回 null,流程照常继续
|
|
636
|
+
// ─────────────────────────────────────────────────────
|
|
637
|
+
function buildMatchedField(skillNames, detectMethod, matchedMcps) {
|
|
638
|
+
const hasSkills = skillNames && skillNames.length > 0;
|
|
639
|
+
const hasMcps = matchedMcps && matchedMcps.length > 0;
|
|
640
|
+
|
|
641
|
+
if (!hasSkills && !hasMcps) return null;
|
|
642
|
+
|
|
643
|
+
return {
|
|
644
|
+
...(hasSkills && {
|
|
645
|
+
skills: skillNames.map(name => ({
|
|
646
|
+
name,
|
|
647
|
+
detection_method: detectMethod // "keyword" | "ai" | "override"
|
|
648
|
+
}))
|
|
649
|
+
}),
|
|
650
|
+
...(hasMcps && {
|
|
651
|
+
mcps: matchedMcps.map(m => ({
|
|
652
|
+
server: m.server,
|
|
653
|
+
tools_called: m.tools_called
|
|
654
|
+
}))
|
|
655
|
+
})
|
|
656
|
+
};
|
|
657
|
+
}
|