@pedrofariasx/qwenproxy 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +13 -0
- package/README.md +292 -0
- package/bin/qwenproxy.mjs +11 -0
- package/package.json +56 -0
- package/src/api/models.ts +183 -0
- package/src/api/server.ts +126 -0
- package/src/cache/memory-cache.ts +186 -0
- package/src/core/account-manager.ts +132 -0
- package/src/core/accounts.ts +78 -0
- package/src/core/config.ts +91 -0
- package/src/core/database.ts +92 -0
- package/src/core/logger.ts +96 -0
- package/src/core/metrics.ts +169 -0
- package/src/core/model-registry.ts +30 -0
- package/src/core/stream-registry.ts +40 -0
- package/src/core/watchdog.ts +130 -0
- package/src/index.ts +7 -0
- package/src/linter/extraction-engine.ts +165 -0
- package/src/linter/index.ts +258 -0
- package/src/linter/repair-normalize.ts +245 -0
- package/src/linter/safety-gate.ts +219 -0
- package/src/linter/streaming-state-machine.ts +252 -0
- package/src/linter/structural-parser.ts +352 -0
- package/src/linter/types.ts +74 -0
- package/src/login.ts +228 -0
- package/src/routes/chat.ts +801 -0
- package/src/routes/upload.ts +700 -0
- package/src/services/playwright.ts +778 -0
- package/src/services/qwen.ts +500 -0
- package/src/tests/advanced.test.ts +227 -0
- package/src/tests/agenticStress.test.ts +360 -0
- package/src/tests/concurrency.test.ts +103 -0
- package/src/tests/concurrentChat.test.ts +71 -0
- package/src/tests/delta.test.ts +63 -0
- package/src/tests/index.test.ts +356 -0
- package/src/tests/jsonFix.test.ts +98 -0
- package/src/tests/linter.test.ts +151 -0
- package/src/tests/parallel.test.ts +42 -0
- package/src/tests/parser.test.ts +89 -0
- package/src/tests/rotation.test.ts +45 -0
- package/src/tests/streamingOptimizations.test.ts +328 -0
- package/src/tests/structureVerification.test.ts +176 -0
- package/src/tools/ast.ts +15 -0
- package/src/tools/coercion.ts +67 -0
- package/src/tools/confidence.ts +48 -0
- package/src/tools/detector.ts +40 -0
- package/src/tools/executor.ts +236 -0
- package/src/tools/parser.ts +446 -0
- package/src/tools/pipeline.ts +122 -0
- package/src/tools/registry-runtime.ts +34 -0
- package/src/tools/registry.ts +142 -0
- package/src/tools/repair.ts +42 -0
- package/src/tools/schema.ts +285 -0
- package/src/tools/types.ts +104 -0
- package/src/tools/validator.ts +33 -0
- package/src/utils/context-truncation.ts +61 -0
- package/src/utils/json.ts +114 -0
- package/src/utils/qwen-stream-parser.ts +286 -0
- package/src/utils/types.ts +101 -0
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* File: chat.ts
|
|
3
|
+
* Project: qwenproxy
|
|
4
|
+
* Author: Pedro Farias
|
|
5
|
+
* Created: 2026-05-09
|
|
6
|
+
*
|
|
7
|
+
* Last Modified: Sat May 09 2026
|
|
8
|
+
* Modified By: Pedro Farias
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Context } from 'hono';
|
|
12
|
+
import { stream as honoStream } from 'hono/streaming';
|
|
13
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
14
|
+
import { createQwenStream, updateSessionParent } from '../services/qwen.ts';
|
|
15
|
+
import { OpenAIRequest, ChoiceDelta, Message } from '../utils/types.ts';
|
|
16
|
+
import { registry } from '../tools/registry.ts';
|
|
17
|
+
import type { FunctionToolDefinition } from '../tools/types.ts';
|
|
18
|
+
import { robustParseJSON } from '../utils/json.ts';
|
|
19
|
+
import { StreamingToolParser } from '../tools/parser.ts';
|
|
20
|
+
import { QwenStreamParser, ParsedChunkResult } from '../utils/qwen-stream-parser.ts';
|
|
21
|
+
import { RetryableQwenStreamError } from '../services/qwen.ts';
|
|
22
|
+
import { getModelContextWindow } from '../core/model-registry.js'
|
|
23
|
+
import { truncateMessages, estimateTokenCount } from '../utils/context-truncation.ts';
|
|
24
|
+
import { getNextAccount, getNextAvailableAccount, markAccountRateLimited, getAccountCooldownInfo } from '../core/account-manager.ts';
|
|
25
|
+
import { registerStream, removeStream, getStream } from '../core/stream-registry.ts';
|
|
26
|
+
import { metrics } from '../core/metrics.js'
|
|
27
|
+
|
|
28
|
+
export function cleanupAllAccountMutexes(): void {
|
|
29
|
+
// No-op - kept for backward compatibility
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface DeltaResult {
|
|
33
|
+
delta: string;
|
|
34
|
+
matchedContent: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult {
|
|
38
|
+
if (!oldStr) {
|
|
39
|
+
return { delta: newStr, matchedContent: newStr };
|
|
40
|
+
}
|
|
41
|
+
if (newStr === oldStr) {
|
|
42
|
+
return { delta: '', matchedContent: oldStr };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Fast path: incremental SSE streams append to oldStr most of the time
|
|
46
|
+
if (newStr.startsWith(oldStr)) {
|
|
47
|
+
const delta = newStr.slice(oldStr.length);
|
|
48
|
+
if (delta.length <= 4 && oldStr.length > 2000) {
|
|
49
|
+
return { delta: newStr, matchedContent: oldStr + newStr };
|
|
50
|
+
}
|
|
51
|
+
return { delta, matchedContent: newStr };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Fallback: segment-based prefix matching
|
|
55
|
+
const scanWindow = Math.min(2000, oldStr.length);
|
|
56
|
+
const maxLen = Math.min(scanWindow, newStr.length);
|
|
57
|
+
|
|
58
|
+
let commonPrefixLen = 0;
|
|
59
|
+
const segmentLen = 64;
|
|
60
|
+
while (commonPrefixLen + segmentLen <= maxLen) {
|
|
61
|
+
if (oldStr.slice(commonPrefixLen, commonPrefixLen + segmentLen) !==
|
|
62
|
+
newStr.slice(commonPrefixLen, commonPrefixLen + segmentLen)) {
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
commonPrefixLen += segmentLen;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Fine-grained scan within the mismatching segment
|
|
69
|
+
while (commonPrefixLen < maxLen && oldStr[commonPrefixLen] === newStr[commonPrefixLen]) {
|
|
70
|
+
commonPrefixLen++;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const threshold = Math.min(scanWindow, 4);
|
|
74
|
+
if (commonPrefixLen >= threshold) {
|
|
75
|
+
return { delta: newStr.substring(commonPrefixLen), matchedContent: newStr };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return { delta: newStr, matchedContent: oldStr + newStr };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function parseQwenErrorPayload(raw: string): { message: string; status: number } | null {
|
|
82
|
+
const text = raw.trim();
|
|
83
|
+
if (!text || text.startsWith('data: ')) return null;
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
const payload = JSON.parse(text);
|
|
87
|
+
if (payload && payload.success === false) {
|
|
88
|
+
const code = payload.data?.code || payload.code || 'UpstreamError';
|
|
89
|
+
const details = payload.data?.details || payload.message || 'Qwen returned an error';
|
|
90
|
+
const wait = payload.data?.num !== undefined ? ` Wait about ${payload.data.num} hour(s) before trying again.` : '';
|
|
91
|
+
const status = code === 'RateLimited' ? 429 : (code === 'Not_Found' ? 404 : 502);
|
|
92
|
+
return { message: `Qwen upstream error: ${code}: ${details}.${wait}`, status };
|
|
93
|
+
}
|
|
94
|
+
if (payload && payload.error) {
|
|
95
|
+
const msg = typeof payload.error === 'string' ? payload.error : (payload.error.message || JSON.stringify(payload.error));
|
|
96
|
+
return { message: `Qwen upstream error: ${msg}`, status: 502 };
|
|
97
|
+
}
|
|
98
|
+
} catch {
|
|
99
|
+
// Non-SSE, non-JSON upstream body. Keep this as an explicit bad gateway
|
|
100
|
+
// instead of silently returning an empty assistant message.
|
|
101
|
+
return { message: `Qwen upstream returned non-SSE response: ${text.slice(0, 300)}`, status: 502 };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export async function chatCompletions(c: Context) {
|
|
108
|
+
try {
|
|
109
|
+
const body: OpenAIRequest = await c.req.json();
|
|
110
|
+
const isStream = body.stream ?? false;
|
|
111
|
+
|
|
112
|
+
// Extract the prompt
|
|
113
|
+
let prompt = '';
|
|
114
|
+
const messages = body.messages || [];
|
|
115
|
+
let systemPrompt = '';
|
|
116
|
+
const pendingMultimodal: Array<Array<{ type: string; text?: string; image_url?: { url: string }; video_url?: { url: string }; audio_url?: { url: string }; file_url?: { url: string } }>> = [];
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < messages.length; i++) {
|
|
119
|
+
const msg = messages[i];
|
|
120
|
+
let contentStr = '';
|
|
121
|
+
if (Array.isArray(msg.content)) {
|
|
122
|
+
// Handle multimodal content (text + images + videos + audio + files)
|
|
123
|
+
const multimodalParts = msg.content.filter(
|
|
124
|
+
(p: any) =>
|
|
125
|
+
(p.type === "image_url" && p.image_url?.url) ||
|
|
126
|
+
(p.type === "video_url" && p.video_url?.url) ||
|
|
127
|
+
(p.type === "audio_url" && p.audio_url?.url) ||
|
|
128
|
+
(p.type === "file_url" && p.file_url?.url),
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
if (multimodalParts.length > 0) {
|
|
132
|
+
// Defer processing to after account selection to reuse cached headers
|
|
133
|
+
pendingMultimodal.push(multimodalParts);
|
|
134
|
+
// Extract text parts for prompt building
|
|
135
|
+
contentStr = msg.content
|
|
136
|
+
.filter((p: any) => p.type === "text")
|
|
137
|
+
.map((p: any) => p.text)
|
|
138
|
+
.join("\n");
|
|
139
|
+
} else {
|
|
140
|
+
// No multimodal parts, just extract text
|
|
141
|
+
contentStr = msg.content
|
|
142
|
+
.filter((p: any) => p.type === "text")
|
|
143
|
+
.map((p: any) => p.text)
|
|
144
|
+
.join("\n");
|
|
145
|
+
}
|
|
146
|
+
} else if (typeof msg.content === 'object' && msg.content !== null) {
|
|
147
|
+
contentStr = JSON.stringify(msg.content);
|
|
148
|
+
} else {
|
|
149
|
+
contentStr = msg.content || '';
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (msg.role === 'system') {
|
|
153
|
+
systemPrompt += (contentStr || '') + '\n\n';
|
|
154
|
+
} else if (msg.role === 'user') {
|
|
155
|
+
prompt += `User: ${contentStr || ''}\n\n`;
|
|
156
|
+
} else if (msg.role === 'assistant') {
|
|
157
|
+
let assistantContent = contentStr || '';
|
|
158
|
+
const reasoning = (msg as any).reasoning_content;
|
|
159
|
+
if (reasoning) {
|
|
160
|
+
assistantContent = `<think>\n${reasoning}\n</think>\n${assistantContent}`;
|
|
161
|
+
}
|
|
162
|
+
if (msg.tool_calls && Array.isArray(msg.tool_calls)) {
|
|
163
|
+
for (const tc of msg.tool_calls) {
|
|
164
|
+
const args = tc.function?.arguments;
|
|
165
|
+
let parsedArgs: any = {};
|
|
166
|
+
if (typeof args === 'string') {
|
|
167
|
+
try { parsedArgs = JSON.parse(args); } catch { parsedArgs = {}; }
|
|
168
|
+
} else if (args && typeof args === 'object') {
|
|
169
|
+
parsedArgs = args;
|
|
170
|
+
}
|
|
171
|
+
const payload = { name: tc.function?.name, arguments: parsedArgs };
|
|
172
|
+
const toolCallStr = `\n<tool_call>\n${JSON.stringify(payload)}\n</tool_call>`;
|
|
173
|
+
assistantContent = assistantContent ? assistantContent + toolCallStr : toolCallStr.trim();
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
prompt += `Assistant: ${assistantContent.trim()}\n\n`;
|
|
177
|
+
} else if (msg.role === 'tool' || msg.role === 'function') {
|
|
178
|
+
let toolName = msg.name;
|
|
179
|
+
if (!toolName && msg.tool_call_id) {
|
|
180
|
+
// Look up tool name in history by tool_call_id
|
|
181
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
182
|
+
const prevMsg = messages[j];
|
|
183
|
+
if (prevMsg.role === 'assistant' && prevMsg.tool_calls) {
|
|
184
|
+
const call = prevMsg.tool_calls.find(tc => tc.id === msg.tool_call_id);
|
|
185
|
+
if (call) {
|
|
186
|
+
toolName = call.function?.name;
|
|
187
|
+
break;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
prompt += `Tool Response (${toolName || 'tool'}): ${contentStr || ''}\n\n`;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Inject tools instructions
|
|
197
|
+
const bodyAny = body as any;
|
|
198
|
+
if (bodyAny.tools && Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0) {
|
|
199
|
+
// Better formatting for tools
|
|
200
|
+
const formattedTools = bodyAny.tools.map((t: any) => {
|
|
201
|
+
if (t.type === 'function') {
|
|
202
|
+
return {
|
|
203
|
+
name: t.function.name,
|
|
204
|
+
description: t.function.description || '',
|
|
205
|
+
parameters: t.function.parameters
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
return t;
|
|
209
|
+
});
|
|
210
|
+
const toolsJson = JSON.stringify(formattedTools, null, 2);
|
|
211
|
+
|
|
212
|
+
systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in these tags:\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n\n`;
|
|
213
|
+
|
|
214
|
+
if (bodyAny.tool_choice && typeof bodyAny.tool_choice === 'object' && bodyAny.tool_choice.function) {
|
|
215
|
+
const forcedTool = bodyAny.tool_choice.function.name;
|
|
216
|
+
systemPrompt += `CRITICAL: You MUST call the tool "${forcedTool}" in this response.\n\n`;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const modelId = body.model.replace('-no-thinking', '');
|
|
221
|
+
const modelContextWindow = getModelContextWindow(modelId)
|
|
222
|
+
const estimatedTokens = estimateTokenCount(systemPrompt + prompt);
|
|
223
|
+
|
|
224
|
+
let finalPrompt: string;
|
|
225
|
+
if (estimatedTokens > modelContextWindow - 1000) {
|
|
226
|
+
const truncated = truncateMessages(messages, modelContextWindow, systemPrompt);
|
|
227
|
+
finalPrompt = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
|
|
228
|
+
} else {
|
|
229
|
+
finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const isThinkingModel = !body.model.includes('no-thinking');
|
|
233
|
+
|
|
234
|
+
// A session is new if it doesn't have any assistant messages yet.
|
|
235
|
+
// This handles cases where the first request has [System, User] messages.
|
|
236
|
+
const isNewSession = !messages.some(m => m.role === 'assistant');
|
|
237
|
+
|
|
238
|
+
// Account selection with fallback on rate-limit/failure
|
|
239
|
+
let account = getNextAccount();
|
|
240
|
+
let triedAccountIds = new Set<string>();
|
|
241
|
+
let lastError: any = null;
|
|
242
|
+
|
|
243
|
+
let stream: ReadableStream | undefined;
|
|
244
|
+
let uiSessionId = '';
|
|
245
|
+
const completionId = 'chatcmpl-' + uuidv4();
|
|
246
|
+
|
|
247
|
+
while (account) {
|
|
248
|
+
const accountId = account.id;
|
|
249
|
+
const accountEmail = account.email;
|
|
250
|
+
|
|
251
|
+
if (triedAccountIds.has(accountId)) {
|
|
252
|
+
account = getNextAvailableAccount(accountId);
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
triedAccountIds.add(accountId);
|
|
256
|
+
|
|
257
|
+
const cooldownInfo = getAccountCooldownInfo(accountId);
|
|
258
|
+
if (cooldownInfo && accountId !== 'global') {
|
|
259
|
+
console.log(`[Chat] Skipping account ${accountEmail} (${accountId}) — on cooldown for ${Math.round(cooldownInfo.remainingMs / 1000)}s (${cooldownInfo.reason})`);
|
|
260
|
+
account = getNextAvailableAccount(accountId);
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
console.log(`[Chat] Routing request to account: ${accountEmail} (${accountId})`);
|
|
265
|
+
|
|
266
|
+
let retries = 3;
|
|
267
|
+
let retryDelay = 500;
|
|
268
|
+
let success = false;
|
|
269
|
+
|
|
270
|
+
while (retries > 0) {
|
|
271
|
+
try {
|
|
272
|
+
const result = await createQwenStream(
|
|
273
|
+
finalPrompt,
|
|
274
|
+
isThinkingModel,
|
|
275
|
+
body.model,
|
|
276
|
+
null, // Always force new chat for concurrency isolation
|
|
277
|
+
accountId === 'global' ? undefined : accountId,
|
|
278
|
+
undefined,
|
|
279
|
+
pendingMultimodal.length > 0 ? pendingMultimodal : undefined
|
|
280
|
+
);
|
|
281
|
+
stream = result.stream;
|
|
282
|
+
uiSessionId = result.uiSessionId;
|
|
283
|
+
registerStream(completionId, {
|
|
284
|
+
abortController: result.controller,
|
|
285
|
+
accountId: result.accountId,
|
|
286
|
+
uiSessionId: result.uiSessionId,
|
|
287
|
+
targetResponseId: '',
|
|
288
|
+
headers: result.headers,
|
|
289
|
+
});
|
|
290
|
+
success = true;
|
|
291
|
+
break;
|
|
292
|
+
} catch (err: any) {
|
|
293
|
+
retries--;
|
|
294
|
+
|
|
295
|
+
if (err.upstreamCode === 'RateLimited' || err.upstreamStatus === 429) {
|
|
296
|
+
const hourHint = err.message?.match(/Wait about (\d+) hour/);
|
|
297
|
+
const cooldownMs = hourHint ? parseInt(hourHint[1]) * 60 * 60 * 1000 : undefined;
|
|
298
|
+
markAccountRateLimited(accountId, cooldownMs, 'RateLimited');
|
|
299
|
+
console.warn(`[Chat] Account ${accountEmail} (${accountId}) rate-limited. Marked for cooldown.`);
|
|
300
|
+
lastError = err;
|
|
301
|
+
break;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (retries === 0) {
|
|
305
|
+
if (err.upstreamStatus && err.upstreamStatus >= 500) {
|
|
306
|
+
markAccountRateLimited(accountId, undefined, 'ServerError');
|
|
307
|
+
console.warn(`[Chat] Account ${accountEmail} (${accountId}) returned server error. Marked for cooldown.`);
|
|
308
|
+
}
|
|
309
|
+
lastError = err;
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
let useDelay = retryDelay;
|
|
314
|
+
if (err instanceof RetryableQwenStreamError && err.retryAfterMs !== undefined) {
|
|
315
|
+
useDelay = err.retryAfterMs;
|
|
316
|
+
}
|
|
317
|
+
const isRetryable = err instanceof RetryableQwenStreamError || err.message?.includes('in progress') || err.message?.includes('Bad_Request');
|
|
318
|
+
if (!isRetryable) {
|
|
319
|
+
lastError = err;
|
|
320
|
+
break;
|
|
321
|
+
}
|
|
322
|
+
console.warn(`[Chat] Qwen request failed for ${accountEmail}, retrying in ${useDelay}ms... (${retries} left)`);
|
|
323
|
+
await new Promise(r => setTimeout(r, useDelay));
|
|
324
|
+
retryDelay = Math.min(retryDelay * 2, 5000);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (success) {
|
|
329
|
+
break;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
account = getNextAvailableAccount(accountId);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (!stream) {
|
|
336
|
+
removeStream(completionId);
|
|
337
|
+
throw lastError || new Error('All accounts failed');
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (!isStream) {
|
|
341
|
+
const reader = stream!.getReader();
|
|
342
|
+
const decoder = new TextDecoder();
|
|
343
|
+
|
|
344
|
+
const toolCallsOut: any[] = [];
|
|
345
|
+
let buffer = '';
|
|
346
|
+
const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
|
|
347
|
+
|
|
348
|
+
const qwenParser = new QwenStreamParser(uiSessionId, {
|
|
349
|
+
tools: hasTools ? bodyAny.tools : [],
|
|
350
|
+
onThinking: (content: string) => {
|
|
351
|
+
// Accumulate reasoning content (handled via parser state)
|
|
352
|
+
},
|
|
353
|
+
onToolCall: (tc) => {
|
|
354
|
+
toolCallsOut.push({
|
|
355
|
+
id: tc.id,
|
|
356
|
+
type: 'function',
|
|
357
|
+
function: {
|
|
358
|
+
name: tc.name,
|
|
359
|
+
arguments: JSON.stringify(tc.arguments)
|
|
360
|
+
}
|
|
361
|
+
});
|
|
362
|
+
},
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
while (true) {
|
|
366
|
+
const { done, value } = await reader.read();
|
|
367
|
+
if (done) break;
|
|
368
|
+
|
|
369
|
+
buffer += decoder.decode(value, { stream: true });
|
|
370
|
+
const lines = buffer.split('\n');
|
|
371
|
+
buffer = lines.pop() || '';
|
|
372
|
+
|
|
373
|
+
for (const line of lines) {
|
|
374
|
+
const trimmed = line.trim();
|
|
375
|
+
if (!trimmed || !trimmed.startsWith('data: ')) continue;
|
|
376
|
+
|
|
377
|
+
const dataStr = trimmed.slice(6);
|
|
378
|
+
if (dataStr === '[DONE]') continue;
|
|
379
|
+
|
|
380
|
+
qwenParser.parseLine(dataStr);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
const upstreamError = parseQwenErrorPayload(buffer);
|
|
385
|
+
if (upstreamError) {
|
|
386
|
+
removeStream(completionId);
|
|
387
|
+
return c.json({ error: { message: upstreamError.message } }, upstreamError.status as any);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const { text: remainingText, toolCalls: remainingToolCalls } = qwenParser.flush();
|
|
391
|
+
const parserState = qwenParser.state;
|
|
392
|
+
let finalContent = parserState.lastFullContent;
|
|
393
|
+
if (remainingText) {
|
|
394
|
+
finalContent += remainingText;
|
|
395
|
+
}
|
|
396
|
+
for (const tc of remainingToolCalls) {
|
|
397
|
+
toolCallsOut.push({
|
|
398
|
+
id: tc.id,
|
|
399
|
+
type: 'function',
|
|
400
|
+
function: {
|
|
401
|
+
name: tc.name,
|
|
402
|
+
arguments: JSON.stringify(tc.arguments)
|
|
403
|
+
}
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const usage = {
|
|
408
|
+
prompt_tokens: parserState.promptTokens,
|
|
409
|
+
completion_tokens: parserState.completionTokens,
|
|
410
|
+
total_tokens: parserState.promptTokens + parserState.completionTokens,
|
|
411
|
+
prompt_tokens_details: { cached_tokens: 0 }
|
|
412
|
+
};
|
|
413
|
+
const message: any = { role: 'assistant', content: toolCallsOut.length ? null : finalContent };
|
|
414
|
+
if (parserState.reasoningBuffer) message.reasoning_content = parserState.reasoningBuffer;
|
|
415
|
+
if (toolCallsOut.length) toolCallsOut.forEach((tc, idx) => tc.index = idx);
|
|
416
|
+
if (toolCallsOut.length) message.tool_calls = toolCallsOut;
|
|
417
|
+
|
|
418
|
+
removeStream(completionId);
|
|
419
|
+
return c.json({
|
|
420
|
+
id: completionId,
|
|
421
|
+
object: 'chat.completion',
|
|
422
|
+
created: Math.floor(Date.now() / 1000),
|
|
423
|
+
model: body.model,
|
|
424
|
+
choices: [{
|
|
425
|
+
index: 0,
|
|
426
|
+
message,
|
|
427
|
+
logprobs: null,
|
|
428
|
+
finish_reason: toolCallsOut.length ? 'tool_calls' : 'stop'
|
|
429
|
+
}],
|
|
430
|
+
usage
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Disable Nagle's algorithm to transmit small chunks immediately without buffering delay
|
|
435
|
+
const socket = (c.env as any)?.incoming?.socket || (c.req.raw as any).socket;
|
|
436
|
+
if (socket && typeof socket.setNoDelay === 'function') {
|
|
437
|
+
socket.setNoDelay(true);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
c.header('Content-Type', 'text/event-stream');
|
|
441
|
+
c.header('Cache-Control', 'no-cache, no-transform');
|
|
442
|
+
c.header('Connection', 'keep-alive');
|
|
443
|
+
c.header('X-Accel-Buffering', 'no');
|
|
444
|
+
|
|
445
|
+
return honoStream(c, async (streamWriter: any) => {
|
|
446
|
+
let heartbeatInterval: any;
|
|
447
|
+
try {
|
|
448
|
+
// Send heartbeat to prevent Cloudflare 524 timeout
|
|
449
|
+
await streamWriter.write(': heartbeat\n\n');
|
|
450
|
+
|
|
451
|
+
// Set up a periodic heartbeat to keep the connection alive during long thinking phases
|
|
452
|
+
heartbeatInterval = setInterval(async () => {
|
|
453
|
+
try {
|
|
454
|
+
await streamWriter.write(': keep-alive\n\n');
|
|
455
|
+
} catch (e) {
|
|
456
|
+
clearInterval(heartbeatInterval);
|
|
457
|
+
}
|
|
458
|
+
}, 15000); // Every 15 seconds
|
|
459
|
+
|
|
460
|
+
// Optimized: fire-and-forget write (Hono's streamWriter has internal buffering)
|
|
461
|
+
const writeEvent = (data: any) => {
|
|
462
|
+
streamWriter.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
const makeChoice = (delta: any, finishReason: string | null = null) => ({
|
|
466
|
+
index: 0,
|
|
467
|
+
delta,
|
|
468
|
+
logprobs: null,
|
|
469
|
+
finish_reason: finishReason
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
// Pre-compute timestamp once before the stream loop
|
|
473
|
+
const createdTimestamp = Math.floor(Date.now() / 1000);
|
|
474
|
+
|
|
475
|
+
// Send initial chunk
|
|
476
|
+
writeEvent({
|
|
477
|
+
id: completionId,
|
|
478
|
+
object: 'chat.completion.chunk',
|
|
479
|
+
created: createdTimestamp,
|
|
480
|
+
model: body.model,
|
|
481
|
+
choices: [makeChoice({ role: 'assistant', content: '' })]
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
const reader = stream.getReader();
|
|
485
|
+
const decoder = new TextDecoder();
|
|
486
|
+
|
|
487
|
+
let reasoningBuffer = '';
|
|
488
|
+
let lastFullContent = '';
|
|
489
|
+
let targetResponseId: string | null = null;
|
|
490
|
+
let targetResponseIdSet = false;
|
|
491
|
+
let currentThoughtIndex = 0;
|
|
492
|
+
const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
|
|
493
|
+
const toolParser = hasTools ? new StreamingToolParser(bodyAny.tools) : null;
|
|
494
|
+
|
|
495
|
+
let buffer = '';
|
|
496
|
+
let completionTokens = 0;
|
|
497
|
+
let promptTokens = Math.ceil(finalPrompt.length / 3.5);
|
|
498
|
+
|
|
499
|
+
// Real-time flush: send each event immediately to minimize latency
|
|
500
|
+
let chunkCount = 0;
|
|
501
|
+
while (true) {
|
|
502
|
+
const { done, value } = await reader.read();
|
|
503
|
+
if (done) break;
|
|
504
|
+
|
|
505
|
+
buffer += decoder.decode(value, { stream: true });
|
|
506
|
+
|
|
507
|
+
let startIdx = 0;
|
|
508
|
+
let newlineIdx: number;
|
|
509
|
+
while ((newlineIdx = buffer.indexOf('\n', startIdx)) !== -1) {
|
|
510
|
+
const line = buffer.slice(startIdx, newlineIdx);
|
|
511
|
+
startIdx = newlineIdx + 1;
|
|
512
|
+
|
|
513
|
+
const trimmed = line.trim();
|
|
514
|
+
if (!trimmed || !trimmed.startsWith('data: ')) continue;
|
|
515
|
+
|
|
516
|
+
const dataStr = trimmed.slice(6);
|
|
517
|
+
if (dataStr === '[DONE]') {
|
|
518
|
+
streamWriter.write('data: [DONE]\n\n');
|
|
519
|
+
continue;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
try {
|
|
523
|
+
const chunk = JSON.parse(dataStr);
|
|
524
|
+
|
|
525
|
+
// Extract response_id for session tracking and target filtering
|
|
526
|
+
if (chunk['response.created'] && chunk['response.created'].response_id) {
|
|
527
|
+
if (!targetResponseId) {
|
|
528
|
+
targetResponseId = chunk['response.created'].response_id;
|
|
529
|
+
targetResponseIdSet = true;
|
|
530
|
+
}
|
|
531
|
+
updateSessionParent(uiSessionId, chunk['response.created'].response_id);
|
|
532
|
+
} else if (chunk.response_id && !targetResponseIdSet) {
|
|
533
|
+
targetResponseId = chunk.response_id;
|
|
534
|
+
targetResponseIdSet = true;
|
|
535
|
+
updateSessionParent(uiSessionId, chunk.response_id);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
if (chunk.usage) {
|
|
539
|
+
if (chunk.usage.output_tokens) completionTokens = chunk.usage.output_tokens;
|
|
540
|
+
if (chunk.usage.input_tokens) promptTokens = chunk.usage.input_tokens;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
let vStr = '';
|
|
544
|
+
let foundStr = false;
|
|
545
|
+
let isThinkingChunk = false;
|
|
546
|
+
|
|
547
|
+
if (chunk.choices && chunk.choices[0] && chunk.choices[0].delta &&
|
|
548
|
+
(!targetResponseIdSet || chunk.response_id === targetResponseId)) {
|
|
549
|
+
const delta = chunk.choices[0].delta;
|
|
550
|
+
|
|
551
|
+
if (delta.phase === 'thinking_summary') {
|
|
552
|
+
isThinkingChunk = true;
|
|
553
|
+
if (delta.extra && delta.extra.summary_thought && delta.extra.summary_thought.content) {
|
|
554
|
+
const thoughts = delta.extra.summary_thought.content;
|
|
555
|
+
if (thoughts.length > currentThoughtIndex) {
|
|
556
|
+
vStr = thoughts.slice(currentThoughtIndex).join('\n');
|
|
557
|
+
currentThoughtIndex = thoughts.length;
|
|
558
|
+
foundStr = true;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
} else if (delta.phase === 'answer') {
|
|
562
|
+
isThinkingChunk = false;
|
|
563
|
+
if (delta.content !== undefined) {
|
|
564
|
+
const newContent = delta.content || '';
|
|
565
|
+
const result = getIncrementalDelta(lastFullContent, newContent);
|
|
566
|
+
vStr = result.delta;
|
|
567
|
+
if (vStr) {
|
|
568
|
+
lastFullContent = result.matchedContent;
|
|
569
|
+
foundStr = true;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
if (foundStr && vStr !== '') {
|
|
576
|
+
if (vStr === 'FINISHED') continue;
|
|
577
|
+
|
|
578
|
+
if (isThinkingChunk) {
|
|
579
|
+
reasoningBuffer += vStr;
|
|
580
|
+
streamWriter.write(`data: ${JSON.stringify({
|
|
581
|
+
id: completionId,
|
|
582
|
+
object: 'chat.completion.chunk',
|
|
583
|
+
created: createdTimestamp,
|
|
584
|
+
model: body.model,
|
|
585
|
+
choices: [makeChoice({ reasoning_content: vStr })]
|
|
586
|
+
})}\n\n`);
|
|
587
|
+
} else {
|
|
588
|
+
if (hasTools && toolParser) {
|
|
589
|
+
const { text, toolCalls } = toolParser.feed(vStr);
|
|
590
|
+
if (text) {
|
|
591
|
+
streamWriter.write(`data: ${JSON.stringify({
|
|
592
|
+
id: completionId,
|
|
593
|
+
object: 'chat.completion.chunk',
|
|
594
|
+
created: createdTimestamp,
|
|
595
|
+
model: body.model,
|
|
596
|
+
choices: [makeChoice({ content: text })]
|
|
597
|
+
})}\n\n`);
|
|
598
|
+
}
|
|
599
|
+
for (const tc of toolCalls) {
|
|
600
|
+
streamWriter.write(`data: ${JSON.stringify({
|
|
601
|
+
id: completionId,
|
|
602
|
+
object: 'chat.completion.chunk',
|
|
603
|
+
created: createdTimestamp,
|
|
604
|
+
model: body.model,
|
|
605
|
+
choices: [makeChoice({
|
|
606
|
+
tool_calls: [{
|
|
607
|
+
index: toolParser.getEmittedToolCallCount() - toolCalls.length + toolCalls.indexOf(tc),
|
|
608
|
+
id: tc.id,
|
|
609
|
+
type: 'function',
|
|
610
|
+
function: {
|
|
611
|
+
name: tc.name,
|
|
612
|
+
arguments: JSON.stringify(tc.arguments)
|
|
613
|
+
}
|
|
614
|
+
}]
|
|
615
|
+
})]
|
|
616
|
+
})}\n\n`);
|
|
617
|
+
}
|
|
618
|
+
} else {
|
|
619
|
+
if (vStr) {
|
|
620
|
+
streamWriter.write(`data: ${JSON.stringify({
|
|
621
|
+
id: completionId,
|
|
622
|
+
object: 'chat.completion.chunk',
|
|
623
|
+
created: createdTimestamp,
|
|
624
|
+
model: body.model,
|
|
625
|
+
choices: [makeChoice({ content: vStr })]
|
|
626
|
+
})}\n\n`);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
} catch (e) {
|
|
632
|
+
// parse error, ignore partial chunk
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Trim processed portion from buffer
|
|
637
|
+
if (startIdx > 0) {
|
|
638
|
+
buffer = buffer.slice(startIdx);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Periodic yielding to prevent event loop starvation
|
|
642
|
+
chunkCount++;
|
|
643
|
+
if (chunkCount % 100 === 0) {
|
|
644
|
+
await new Promise(r => setImmediate(r));
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
const upstreamError = parseQwenErrorPayload(buffer);
|
|
649
|
+
if (upstreamError) {
|
|
650
|
+
writeEvent({
|
|
651
|
+
id: completionId,
|
|
652
|
+
object: 'chat.completion.chunk',
|
|
653
|
+
created: createdTimestamp,
|
|
654
|
+
model: body.model,
|
|
655
|
+
choices: [makeChoice({ content: upstreamError.message })]
|
|
656
|
+
});
|
|
657
|
+
writeEvent({
|
|
658
|
+
id: completionId,
|
|
659
|
+
object: 'chat.completion.chunk',
|
|
660
|
+
created: createdTimestamp,
|
|
661
|
+
model: body.model,
|
|
662
|
+
choices: [makeChoice({}, 'stop')]
|
|
663
|
+
});
|
|
664
|
+
streamWriter.write('data: [DONE]\n\n');
|
|
665
|
+
return;
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
if (toolParser) {
|
|
669
|
+
const flushResult = toolParser.flush();
|
|
670
|
+
|
|
671
|
+
if (flushResult.text) {
|
|
672
|
+
writeEvent({
|
|
673
|
+
id: completionId,
|
|
674
|
+
object: 'chat.completion.chunk',
|
|
675
|
+
created: createdTimestamp,
|
|
676
|
+
model: body.model,
|
|
677
|
+
choices: [makeChoice({ content: flushResult.text })]
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
for (const tc of flushResult.toolCalls) {
|
|
681
|
+
const idx = toolParser.getEmittedToolCallCount() - flushResult.toolCalls.length + flushResult.toolCalls.indexOf(tc);
|
|
682
|
+
writeEvent({
|
|
683
|
+
id: completionId,
|
|
684
|
+
object: 'chat.completion.chunk',
|
|
685
|
+
created: createdTimestamp,
|
|
686
|
+
model: body.model,
|
|
687
|
+
choices: [makeChoice({
|
|
688
|
+
tool_calls: [{
|
|
689
|
+
index: idx,
|
|
690
|
+
id: tc.id,
|
|
691
|
+
type: 'function',
|
|
692
|
+
function: {
|
|
693
|
+
name: tc.name,
|
|
694
|
+
arguments: JSON.stringify(tc.arguments)
|
|
695
|
+
}
|
|
696
|
+
}]
|
|
697
|
+
})]
|
|
698
|
+
});
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
const usage = {
|
|
703
|
+
prompt_tokens: promptTokens,
|
|
704
|
+
completion_tokens: completionTokens,
|
|
705
|
+
total_tokens: promptTokens + completionTokens,
|
|
706
|
+
prompt_tokens_details: { cached_tokens: 0 }
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
const finalFinishReason = toolParser && toolParser.getEmittedToolCallCount() > 0 ? 'tool_calls' : 'stop';
|
|
710
|
+
|
|
711
|
+
writeEvent({
|
|
712
|
+
id: completionId,
|
|
713
|
+
object: 'chat.completion.chunk',
|
|
714
|
+
created: createdTimestamp,
|
|
715
|
+
model: body.model,
|
|
716
|
+
choices: [makeChoice({}, finalFinishReason)],
|
|
717
|
+
...(body.stream_options?.include_usage ? {} : { usage })
|
|
718
|
+
});
|
|
719
|
+
|
|
720
|
+
if (body.stream_options?.include_usage) {
|
|
721
|
+
writeEvent({
|
|
722
|
+
id: completionId,
|
|
723
|
+
object: 'chat.completion.chunk',
|
|
724
|
+
created: createdTimestamp,
|
|
725
|
+
model: body.model,
|
|
726
|
+
choices: [],
|
|
727
|
+
usage
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
streamWriter.write('data: [DONE]\n\n');
|
|
731
|
+
|
|
732
|
+
} finally {
|
|
733
|
+
clearInterval(heartbeatInterval);
|
|
734
|
+
removeStream(completionId);
|
|
735
|
+
}
|
|
736
|
+
});
|
|
737
|
+
} catch (err: any) {
|
|
738
|
+
console.error('Error in chatCompletions:', err)
|
|
739
|
+
const status = err.upstreamStatus || 500
|
|
740
|
+
if (status >= 500) {
|
|
741
|
+
metrics.increment('requests.errors')
|
|
742
|
+
}
|
|
743
|
+
return c.json({ error: { message: err.message } }, status)
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
export async function chatCompletionsStop(c: Context) {
|
|
748
|
+
try {
|
|
749
|
+
const body = await c.req.json();
|
|
750
|
+
const { chat_id, response_id } = body;
|
|
751
|
+
|
|
752
|
+
if (!chat_id || !response_id) {
|
|
753
|
+
return c.json({ error: 'chat_id and response_id are required' }, 400);
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
const stream = getStream(chat_id);
|
|
757
|
+
if (!stream) {
|
|
758
|
+
return c.json({ error: 'Stream not found' }, 404);
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
if (stream.targetResponseId && stream.targetResponseId !== response_id) {
|
|
762
|
+
return c.json({ error: 'response_id mismatch' }, 400);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
const stopResponse = await fetch(`https://chat.qwen.ai/api/v2/chat/completions/stop?chat_id=${chat_id}`, {
|
|
766
|
+
method: 'POST',
|
|
767
|
+
headers: {
|
|
768
|
+
'Accept': 'application/json, text/plain, */*',
|
|
769
|
+
'Accept-Language': 'pt-BR,pt;q=0.9',
|
|
770
|
+
'Content-Type': 'application/json',
|
|
771
|
+
'Cookie': stream.headers.cookie,
|
|
772
|
+
'Origin': 'https://chat.qwen.ai',
|
|
773
|
+
'Referer': `https://chat.qwen.ai/c/${chat_id}`,
|
|
774
|
+
'Sec-Fetch-Dest': 'empty',
|
|
775
|
+
'Sec-Fetch-Mode': 'cors',
|
|
776
|
+
'Sec-Fetch-Site': 'same-origin',
|
|
777
|
+
'User-Agent': stream.headers['user-agent'],
|
|
778
|
+
'X-Request-Id': uuidv4(),
|
|
779
|
+
'bx-ua': stream.headers['bx-ua'],
|
|
780
|
+
'bx-umidtoken': stream.headers['bx-umidtoken'],
|
|
781
|
+
'bx-v': stream.headers['bx-v'],
|
|
782
|
+
},
|
|
783
|
+
body: JSON.stringify({ chat_id, response_id }),
|
|
784
|
+
});
|
|
785
|
+
|
|
786
|
+
if (!stopResponse.ok) {
|
|
787
|
+
const errorText = await stopResponse.text();
|
|
788
|
+
console.error(`[Stop] Failed to stop generation for chat_id=${chat_id}: ${stopResponse.status} ${errorText}`);
|
|
789
|
+
return c.json({ error: 'Failed to stop generation' }, stopResponse.status as any);
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
stream.abortController.abort();
|
|
793
|
+
removeStream(chat_id);
|
|
794
|
+
|
|
795
|
+
console.log(`[Stop] Generation stopped for chat_id=${chat_id}`);
|
|
796
|
+
return c.json({ success: true });
|
|
797
|
+
} catch (err: any) {
|
|
798
|
+
console.error('Error in chatCompletionsStop:', err);
|
|
799
|
+
return c.json({ error: err.message }, 500);
|
|
800
|
+
}
|
|
801
|
+
}
|