@pedrofariasx/qwenproxy 1.6.4 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pedrofariasx/qwenproxy",
3
- "version": "1.6.4",
3
+ "version": "1.7.0",
4
4
  "description": "Local OpenAI-compatible proxy API that routes requests to Qwen (chat.qwen.ai) via Playwright browser automation.",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -22,7 +22,7 @@
22
22
  "@hono/node-server": "^2.0.3",
23
23
  "ajv": "^8.20.0",
24
24
  "ali-oss": "^6.23.0",
25
- "better-sqlite3": "^12.10.0",
25
+ "better-sqlite3": "^12.10.1",
26
26
  "dotenv": "^17.4.2",
27
27
  "hono": "^4.12.21",
28
28
  "playwright": "^1.60.0",
@@ -38,7 +38,6 @@
38
38
  "@types/ali-oss": "^6.23.3",
39
39
  "@types/better-sqlite3": "^7.6.13",
40
40
  "@types/node": "^25.9.1",
41
- "@types/uuid": "^11.0.0",
42
41
  "semantic-release": "^25.0.3",
43
42
  "typescript": "^6.0.3"
44
43
  },
@@ -13,6 +13,7 @@ const envSchema = z.object({
13
13
  HTTP_TIMEOUT: z.string().default('30000'),
14
14
  HEADERS_TIMEOUT: z.string().default('60000'),
15
15
  CHAT_TIMEOUT: z.string().default('120000'),
16
+ STREAM_IDLE_TIMEOUT: z.string().default('180000'),
16
17
  CACHE_TTL: z.string().default('3600'),
17
18
  RESPONSE_TTL: z.string().default('1800'),
18
19
  METRICS_INTERVAL: z.string().default('10000'),
@@ -62,6 +63,7 @@ export const config = {
62
63
  http: parseInt(env.HTTP_TIMEOUT),
63
64
  headers: parseInt(env.HEADERS_TIMEOUT),
64
65
  chat: parseInt(env.CHAT_TIMEOUT),
66
+ streamIdle: parseInt(env.STREAM_IDLE_TIMEOUT),
65
67
  },
66
68
  cache: {
67
69
  defaultTTL: parseInt(env.CACHE_TTL),
@@ -128,14 +128,217 @@ function parseQwenErrorPayload(raw: string): { message: string; status: number }
128
128
  return { message: `Qwen upstream error: ${msg}`, status: 502 };
129
129
  }
130
130
  } catch {
131
- // Non-SSE, non-JSON upstream body. Keep this as an explicit bad gateway
132
- // instead of silently returning an empty assistant message.
133
131
  return { message: `Qwen upstream returned non-SSE response: ${text.slice(0, 300)}`, status: 502 };
134
132
  }
135
133
 
136
134
  return null;
137
135
  }
138
136
 
137
+ function getToolFunction(tool: FunctionToolDefinition | any): any {
138
+ return tool?.type === 'function' ? tool.function : tool;
139
+ }
140
+
141
+ function getToolName(tool: FunctionToolDefinition | any): string {
142
+ return getToolFunction(tool)?.name || '';
143
+ }
144
+
145
+ function getToolDescription(tool: FunctionToolDefinition | any): string {
146
+ return getToolFunction(tool)?.description || '';
147
+ }
148
+
149
+ function getToolParameters(tool: FunctionToolDefinition | any): Record<string, any> {
150
+ return getToolFunction(tool)?.parameters?.properties || {};
151
+ }
152
+
153
+ function getRequiredParams(tool: FunctionToolDefinition | any): Set<string> {
154
+ return new Set(getToolFunction(tool)?.parameters?.required || []);
155
+ }
156
+
157
+ function compactPromptText(text: string, maxChars = 180): string {
158
+ const compact = text.replace(/\s+/g, ' ').trim();
159
+ if (compact.length <= maxChars) return compact;
160
+ return `${compact.slice(0, maxChars)}...`;
161
+ }
162
+
163
+ function getForcedToolName(toolChoice: any): string {
164
+ if (toolChoice && typeof toolChoice === 'object' && toolChoice.function?.name) {
165
+ return toolChoice.function.name;
166
+ }
167
+ return '';
168
+ }
169
+
170
+ function tokenizeForToolScoring(text: string): Set<string> {
171
+ const tokens = new Set<string>();
172
+ for (const token of text.toLowerCase().match(/[a-z0-9_./-]+/g) || []) {
173
+ if (token.length >= 3) tokens.add(token);
174
+ }
175
+ return tokens;
176
+ }
177
+
178
+ function scoreToolForContext(tool: FunctionToolDefinition, contextText: string, forcedToolName: string, recentToolNames: Set<string>): number {
179
+ const name = getToolName(tool);
180
+ const description = getToolDescription(tool);
181
+ const params = Object.keys(getToolParameters(tool));
182
+ const tokens = tokenizeForToolScoring(contextText);
183
+ let score = 0;
184
+
185
+ if (forcedToolName && name === forcedToolName) score += 100;
186
+ if (recentToolNames.has(name)) score += 35;
187
+
188
+ const nameParts = name.toLowerCase().split(/[_./-]+/).filter(Boolean);
189
+ for (const part of nameParts) {
190
+ if (part.length >= 3 && tokens.has(part)) score += 20;
191
+ }
192
+
193
+ const toolText = `${name} ${description} ${params.join(' ')}`.toLowerCase();
194
+ for (const token of tokens) {
195
+ if (toolText.includes(token)) score += 2;
196
+ }
197
+
198
+ for (const param of params) {
199
+ if (tokens.has(param.toLowerCase())) score += 3;
200
+ }
201
+
202
+ return score;
203
+ }
204
+
205
+ function getRecentToolNames(messages: Message[]): Set<string> {
206
+ const recentToolNames = new Set<string>();
207
+ const recentMessages = messages.slice(-12);
208
+
209
+ for (const msg of recentMessages) {
210
+ if (msg.role === 'assistant' && Array.isArray(msg.tool_calls)) {
211
+ for (const call of msg.tool_calls) {
212
+ if (call?.function?.name) recentToolNames.add(call.function.name);
213
+ }
214
+ }
215
+ if ((msg.role === 'tool' || msg.role === 'function') && msg.name) {
216
+ recentToolNames.add(msg.name);
217
+ }
218
+ }
219
+
220
+ return recentToolNames;
221
+ }
222
+
223
+ function selectCandidateTools(
224
+ tools: FunctionToolDefinition[],
225
+ contextText: string,
226
+ forcedToolName = '',
227
+ recentToolNames: Set<string> = new Set(),
228
+ maxTools = 12
229
+ ): FunctionToolDefinition[] {
230
+ if (tools.length <= maxTools) return tools;
231
+
232
+ const scored = tools
233
+ .map(tool => ({ tool, score: scoreToolForContext(tool, contextText, forcedToolName, recentToolNames) }))
234
+ .filter(entry => entry.score > 0 || (forcedToolName && getToolName(entry.tool) === forcedToolName))
235
+ .sort((a, b) => b.score - a.score || getToolName(a.tool).localeCompare(getToolName(b.tool)));
236
+
237
+ if (scored.length === 0) {
238
+ return tools.slice(0, maxTools);
239
+ }
240
+
241
+ return scored.slice(0, maxTools).map(entry => entry.tool);
242
+ }
243
+
244
+ function buildCompactToolManifest(tools: FunctionToolDefinition[], forcedToolName = ''): string {
245
+ if (tools.length === 0) return '';
246
+
247
+ const lines = tools.map(tool => {
248
+ const name = getToolName(tool);
249
+ const description = compactPromptText(getToolDescription(tool), 140);
250
+ const params = getToolParameters(tool);
251
+ const required = getRequiredParams(tool);
252
+ const signature = Object.entries(params)
253
+ .map(([paramName, schema]: [string, any]) => {
254
+ const optional = required.has(paramName) ? '' : '?';
255
+ const type = schema?.type || 'any';
256
+ return `${paramName}${optional}: ${type}`;
257
+ })
258
+ .join(', ');
259
+
260
+ const marker = forcedToolName && name === forcedToolName ? ' [required]' : '';
261
+ return `${name}(${signature})${description ? ` - ${description}` : ''}${marker}`;
262
+ });
263
+
264
+ return `[COMPACT TOOL MANIFEST]\n${lines.join('\n')}`;
265
+ }
266
+
267
+ function buildToolCallContract(
268
+ tools: FunctionToolDefinition[],
269
+ forcedToolName = '',
270
+ parallelToolCalls = true
271
+ ): string {
272
+ const names = tools.map(getToolName).filter(Boolean);
273
+ const toolList = names.length > 0 ? names.join(', ') : 'none';
274
+ const forcedLine = forcedToolName
275
+ ? `This turn strongly expects the tool "${forcedToolName}". If you call a tool, prefer this exact name.`
276
+ : 'Only call a tool when the user request requires an external action.';
277
+ const parallelLine = parallelToolCalls
278
+ ? 'You may emit multiple tool call blocks only when the user explicitly asks for multiple independent actions.'
279
+ : 'Emit at most one tool call block.';
280
+
281
+ return `[TOOL CALL CONTRACT - MUST FOLLOW]
282
+ Available tool names: ${toolList}
283
+ Format:
284
+
285
+ <tool_call>
286
+ {"name": "tool_name", "arguments": {"param_name": "value"}}
287
+ </tool_call>
288
+
289
+ Rules:
290
+ 1. Use exact tool names from the list above or the full TOOLS AVAILABLE section.
291
+ 2. Do not invent, guess, rename, or approximate tool names.
292
+ 3. Do not output raw JSON as a tool call.
293
+ 4. ${forcedLine}
294
+ 5. ${parallelLine}
295
+ 6. If no tool is needed, do not emit any tool call block.`;
296
+ }
297
+
298
+ function parseToolArguments(value: unknown): Record<string, unknown> {
299
+ if (typeof value === 'string') {
300
+ try {
301
+ const parsed = JSON.parse(value);
302
+ return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed : {};
303
+ } catch {
304
+ return {};
305
+ }
306
+ }
307
+ if (value && typeof value === 'object' && !Array.isArray(value)) {
308
+ return value as Record<string, unknown>;
309
+ }
310
+ return {};
311
+ }
312
+
313
+ function looksLikeUnwrappedToolCall(text: string): boolean {
314
+ const trimmed = text.trim();
315
+ if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) return false;
316
+ return /["']name["']\s*:/.test(trimmed) && /["']arguments["']\s*:/.test(trimmed);
317
+ }
318
+
319
+ function parseUnwrappedToolCalls(text: string): Array<{ id: string; name: string; arguments: Record<string, unknown> }> {
320
+ if (!looksLikeUnwrappedToolCall(text)) return [];
321
+
322
+ try {
323
+ const parsed = robustParseJSON(text);
324
+ const items = Array.isArray(parsed) ? parsed : [parsed];
325
+ return items
326
+ .filter(item => item && typeof item === 'object')
327
+ .map((item: any) => {
328
+ const name = item.name || item.function?.name || item.tool_name || item.tool;
329
+ if (!name || typeof name !== 'string') return null;
330
+ return {
331
+ id: item.id || item.tool_call_id || `call_${crypto.randomUUID()}`,
332
+ name,
333
+ arguments: parseToolArguments(item.arguments || item.function?.arguments || item.args || item.parameters || item.input || {}),
334
+ };
335
+ })
336
+ .filter((item: any): item is { id: string; name: string; arguments: Record<string, unknown> } => item !== null);
337
+ } catch {
338
+ return [];
339
+ }
340
+ }
341
+
139
342
  export async function chatCompletions(c: Context) {
140
343
  try {
141
344
  const body: OpenAIRequest = await c.req.json();
@@ -250,6 +453,11 @@ export async function chatCompletions(c: Context) {
250
453
  const modelContextWindow = getModelContextWindow(modelId)
251
454
  const estimatedTokens = estimateTokenCount(systemPrompt + prompt, modelId);
252
455
  const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
456
+ const forcedToolName = getForcedToolName(bodyAny.tool_choice);
457
+ const parallelToolCalls = bodyAny.parallel_tool_calls !== false;
458
+ const toolContextText = `${systemPrompt}\n${prompt}`;
459
+ const recentToolNames = hasTools ? getRecentToolNames(messages) : new Set<string>();
460
+ const candidateTools = hasTools ? selectCandidateTools(bodyAny.tools, toolContextText, forcedToolName, recentToolNames) : [];
253
461
 
254
462
  let finalPrompt: string;
255
463
  if (estimatedTokens > modelContextWindow - 1000) {
@@ -260,9 +468,11 @@ export async function chatCompletions(c: Context) {
260
468
  finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
261
469
  }
262
470
 
263
- // Reforço de instrução de tool call para contextos longos (mitiga "Lost in the Middle")
264
- if (hasTools && estimatedTokens > 15000) {
265
- finalPrompt += '\n\n[CRITICAL REMINDER: You MUST use the exact <tool_call> JSON format specified in the system instructions. Do not hallucinate tool names or output raw JSON without the tags.]';
471
+ if (hasTools) {
472
+ const compactManifest = buildCompactToolManifest(candidateTools, forcedToolName);
473
+ const toolContract = buildToolCallContract(candidateTools, forcedToolName, parallelToolCalls);
474
+ finalPrompt += `\n\n${toolContract}`;
475
+ if (compactManifest) finalPrompt += `\n\n${compactManifest}`;
266
476
  }
267
477
 
268
478
  const isThinkingModel = !body.model.includes('no-thinking');
@@ -498,6 +708,20 @@ export async function chatCompletions(c: Context) {
498
708
  });
499
709
  }
500
710
 
711
+ if (hasTools && toolCallsOut.length === 0) {
712
+ for (const tc of parseUnwrappedToolCalls(finalContent)) {
713
+ toolCallsOut.push({
714
+ id: tc.id,
715
+ type: 'function',
716
+ function: {
717
+ name: tc.name,
718
+ arguments: JSON.stringify(tc.arguments)
719
+ }
720
+ });
721
+ }
722
+ if (toolCallsOut.length > 0) finalContent = '';
723
+ }
724
+
501
725
  const usage = {
502
726
  prompt_tokens: parserState.promptTokens,
503
727
  completion_tokens: parserState.completionTokens,
@@ -687,7 +911,32 @@ export async function chatCompletions(c: Context) {
687
911
  if (hasTools && toolParser) {
688
912
  const { text, toolCalls } = toolParser.feed(vStr);
689
913
  if (text) {
690
- fastWriteContent(text);
914
+ if (hasTools && toolParser && looksLikeUnwrappedToolCall(text)) {
915
+ const unwrappedToolCalls = parseUnwrappedToolCalls(text);
916
+ const baseIndex = toolParser.getEmittedToolCallCount();
917
+ for (let idx = 0; idx < unwrappedToolCalls.length; idx++) {
918
+ const tc = unwrappedToolCalls[idx];
919
+ streamWriter.write(`data: ${JSON.stringify({
920
+ id: completionId,
921
+ object: 'chat.completion.chunk',
922
+ created: createdTimestamp,
923
+ model: body.model,
924
+ choices: [makeChoice({
925
+ tool_calls: [{
926
+ index: baseIndex + idx,
927
+ id: tc.id,
928
+ type: 'function',
929
+ function: {
930
+ name: tc.name,
931
+ arguments: JSON.stringify(tc.arguments)
932
+ }
933
+ }]
934
+ })]
935
+ })}\n\n`);
936
+ }
937
+ } else {
938
+ fastWriteContent(text);
939
+ }
691
940
  }
692
941
  for (const tc of toolCalls) {
693
942
  streamWriter.write(`data: ${JSON.stringify({
@@ -753,13 +1002,38 @@ export async function chatCompletions(c: Context) {
753
1002
  const flushResult = toolParser.flush();
754
1003
 
755
1004
  if (flushResult.text) {
756
- writeEvent({
757
- id: completionId,
758
- object: 'chat.completion.chunk',
759
- created: createdTimestamp,
760
- model: body.model,
761
- choices: [makeChoice({ content: flushResult.text })]
762
- });
1005
+ if (hasTools && toolParser && looksLikeUnwrappedToolCall(flushResult.text)) {
1006
+ const unwrappedToolCalls = parseUnwrappedToolCalls(flushResult.text);
1007
+ const baseIndex = toolParser.getEmittedToolCallCount();
1008
+ for (let idx = 0; idx < unwrappedToolCalls.length; idx++) {
1009
+ const tc = unwrappedToolCalls[idx];
1010
+ writeEvent({
1011
+ id: completionId,
1012
+ object: 'chat.completion.chunk',
1013
+ created: createdTimestamp,
1014
+ model: body.model,
1015
+ choices: [makeChoice({
1016
+ tool_calls: [{
1017
+ index: baseIndex + idx,
1018
+ id: tc.id,
1019
+ type: 'function',
1020
+ function: {
1021
+ name: tc.name,
1022
+ arguments: JSON.stringify(tc.arguments)
1023
+ }
1024
+ }]
1025
+ })]
1026
+ });
1027
+ }
1028
+ } else {
1029
+ writeEvent({
1030
+ id: completionId,
1031
+ object: 'chat.completion.chunk',
1032
+ created: createdTimestamp,
1033
+ model: body.model,
1034
+ choices: [makeChoice({ content: flushResult.text })]
1035
+ });
1036
+ }
763
1037
  }
764
1038
  for (const tc of flushResult.toolCalls) {
765
1039
  const idx = toolParser.getEmittedToolCallCount() - flushResult.toolCalls.length + flushResult.toolCalls.indexOf(tc);
@@ -771,8 +771,6 @@ async function _getQwenHeadersInternal(forceNew = false, accountId?: string): Pr
771
771
 
772
772
  console.log(`[Playwright] Setting up route interception for ${cacheKey}...`);
773
773
  const routeHandler = async (route: any, request: any) => {
774
- clearTimeout(timeout);
775
-
776
774
  const reqHeaders = request.headers();
777
775
  let uiSessionId = '';
778
776
  let uiParentMessageId: string | null = null;
@@ -806,6 +804,8 @@ async function _getQwenHeadersInternal(forceNew = false, accountId?: string): Pr
806
804
  return;
807
805
  }
808
806
 
807
+ clearTimeout(timeout);
808
+
809
809
  console.log(`[Playwright] Successfully intercepted headers for ${cacheKey}.`);
810
810
  cache.currentHeaders = extractedHeaders;
811
811
  cache.cachedQwenHeaders = { headers: extractedHeaders, chatSessionId: uiSessionId, parentMessageId: uiParentMessageId };
@@ -1114,15 +1114,18 @@ export async function browserStreamFetch(
1114
1114
  const enc = new TextEncoder();
1115
1115
 
1116
1116
  let metaResolve!: (value: { status: number; statusText: string; contentType: string; headers: Record<string, string> }) => void;
1117
- const metaPromise = new Promise<{ status: number; statusText: string; contentType: string; headers: Record<string, string> }>((resolve) => {
1117
+ let metaReject!: (reason: Error) => void;
1118
+ const metaPromise = new Promise<{ status: number; statusText: string; contentType: string; headers: Record<string, string> }>((resolve, reject) => {
1118
1119
  metaResolve = resolve;
1120
+ metaReject = reject;
1119
1121
  });
1120
1122
 
1123
+ const metaTimeoutMs = options.timeoutMs || config.timeouts.chat;
1121
1124
  const metaTimeout = setTimeout(() => {
1122
1125
  streamCallbacks.delete(reqId);
1123
1126
  abortControllers.delete(reqId);
1124
- metaResolve({ status: 0, statusText: 'Timeout', contentType: '', headers: {} });
1125
- }, options.timeoutMs || config.timeouts.chat);
1127
+ metaReject(new Error(`Browser stream fetch timed out waiting for response metadata after ${metaTimeoutMs}ms`));
1128
+ }, metaTimeoutMs);
1126
1129
 
1127
1130
  streamCallbacks.set(reqId, {
1128
1131
  onMeta: (meta) => {
@@ -1131,13 +1134,20 @@ export async function browserStreamFetch(
1131
1134
  },
1132
1135
  onChunk: () => {},
1133
1136
  onEnd: () => {},
1134
- onError: () => {},
1137
+ onError: (msg: string) => {
1138
+ clearTimeout(metaTimeout);
1139
+ metaReject(new Error(msg));
1140
+ },
1135
1141
  onBody: () => {},
1136
1142
  });
1137
1143
 
1138
1144
  let abortFn = () => {};
1139
1145
  let bodyResolve!: (value: string) => void;
1140
- const bodyPromise = new Promise<string>((resolve) => { bodyResolve = resolve; });
1146
+ let bodyReject!: (reason: Error) => void;
1147
+ const bodyPromise = new Promise<string>((resolve, reject) => {
1148
+ bodyResolve = resolve;
1149
+ bodyReject = reject;
1150
+ });
1141
1151
 
1142
1152
  const stream = new ReadableStream<Uint8Array>({
1143
1153
  start(controller) {
@@ -1148,11 +1158,13 @@ export async function browserStreamFetch(
1148
1158
  };
1149
1159
  cb.onEnd = () => {
1150
1160
  try { controller.close(); } catch {}
1161
+ bodyResolve('');
1151
1162
  streamCallbacks.delete(reqId);
1152
1163
  abortControllers.delete(reqId);
1153
1164
  };
1154
1165
  cb.onError = (msg: string) => {
1155
1166
  try { controller.error(new Error(msg)); } catch {}
1167
+ bodyReject(new Error(msg));
1156
1168
  streamCallbacks.delete(reqId);
1157
1169
  abortControllers.delete(reqId);
1158
1170
  };
@@ -1166,7 +1178,7 @@ export async function browserStreamFetch(
1166
1178
  const controller = new AbortController();
1167
1179
  (window as any).__abortControllers = (window as any).__abortControllers || {};
1168
1180
  (window as any).__abortControllers[reqId] = controller;
1169
- const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs || 130000);
1181
+ const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs || config.timeouts.chat);
1170
1182
  try {
1171
1183
  const resp = await fetch(url, {
1172
1184
  method: options.method || 'POST',
@@ -10,6 +10,69 @@ const TIMEOUT_PER_MB = 30000;
10
10
 
11
11
  const sleep = (ms: number) => new Promise(r => setTimeout(r, ms));
12
12
 
13
+ function addIdleTimeoutToStream(
14
+ stream: ReadableStream<Uint8Array>,
15
+ controller: AbortController,
16
+ idleTimeoutMs: number,
17
+ label: string,
18
+ onTimeout?: () => void,
19
+ onDone?: () => void,
20
+ ): ReadableStream<Uint8Array> {
21
+ let idleTimer: ReturnType<typeof setTimeout> | undefined;
22
+ let reader: ReadableStreamDefaultReader<Uint8Array> | undefined;
23
+ let streamController: ReadableStreamDefaultController<Uint8Array> | undefined;
24
+
25
+ const clearIdleTimer = () => {
26
+ if (idleTimer) {
27
+ clearTimeout(idleTimer);
28
+ idleTimer = undefined;
29
+ }
30
+ };
31
+
32
+ const resetIdleTimer = () => {
33
+ clearIdleTimer();
34
+ idleTimer = setTimeout(() => {
35
+ const message = `${label} idle timeout after ${idleTimeoutMs}ms without upstream data`;
36
+ const timeoutError = new Error(message);
37
+ clearIdleTimer();
38
+ controller.abort();
39
+ streamController?.error(timeoutError);
40
+ onTimeout?.();
41
+ try { stream.cancel(message).catch(() => {}); } catch {}
42
+ }, idleTimeoutMs);
43
+ };
44
+
45
+ return new ReadableStream<Uint8Array>({
46
+ start() {
47
+ reader = stream.getReader();
48
+ resetIdleTimer();
49
+ },
50
+ async pull(streamController) {
51
+ try {
52
+ if (!reader) throw new Error('Stream reader was not initialized');
53
+ const { done, value } = await reader.read();
54
+ if (done) {
55
+ clearIdleTimer();
56
+ onDone?.();
57
+ streamController.close();
58
+ return;
59
+ }
60
+ resetIdleTimer();
61
+ streamController.enqueue(value);
62
+ } catch (err) {
63
+ clearIdleTimer();
64
+ onDone?.();
65
+ streamController.error(err);
66
+ }
67
+ },
68
+ cancel(reason) {
69
+ clearIdleTimer();
70
+ onDone?.();
71
+ return stream.cancel(reason);
72
+ },
73
+ });
74
+ }
75
+
13
76
  function getClientHintsHeaders(): Record<string, string> {
14
77
  return {
15
78
  'sec-ch-ua': CHROME_CLIENT_HINTS,
@@ -83,6 +146,8 @@ interface WarmPoolEntry {
83
146
 
84
147
  const warmPool: Map<string, WarmPoolEntry[]> = new Map();
85
148
 
149
+ const inFlightWarmChats = new Set<string>();
150
+
86
151
  const refillPromises: Map<string, Promise<void>> = new Map();
87
152
 
88
153
  const WARM_POOL_SIZE = 10;
@@ -97,6 +162,22 @@ function cleanupStalePool(accountId: string) {
97
162
  if (filtered.length !== pool.length) warmPool.set(accountId, filtered);
98
163
  }
99
164
 
165
+ function warmChatKey(accountId: string, chatId: string) {
166
+ return `${accountId}:${chatId}`;
167
+ }
168
+
169
+ function markWarmChatInFlight(accountId: string, chatId: string) {
170
+ inFlightWarmChats.add(warmChatKey(accountId, chatId));
171
+ }
172
+
173
+ function releaseWarmChat(accountId: string, chatId: string) {
174
+ inFlightWarmChats.delete(warmChatKey(accountId, chatId));
175
+ }
176
+
177
+ function isWarmChatInFlight(accountId: string, chatId: string) {
178
+ return inFlightWarmChats.has(warmChatKey(accountId, chatId));
179
+ }
180
+
100
181
  async function getBasicQwenHeaders(accountId?: string): Promise<Record<string, string>> {
101
182
  const { cookie, userAgent, bxV, bxUa, bxUmidtoken } = await getBasicHeaders(accountId);
102
183
  return {
@@ -289,6 +370,7 @@ async function refillPoolForAccount(accountId: string) {
289
370
  for (const chatId of unusedChats) {
290
371
  if (reused >= need) break;
291
372
  if (existingIds.has(chatId)) continue;
373
+ if (isWarmChatInFlight(accountId, chatId)) continue;
292
374
  pool.push({ chatId, headers, accountId, timestamp: Date.now() });
293
375
  existingIds.add(chatId);
294
376
  reused++;
@@ -348,7 +430,9 @@ export async function getWarmedChat(accountId?: string) {
348
430
  await refillPromises.get(key);
349
431
  }
350
432
  if (pool.length === 0) throw new Error(`Warm pool empty after retry for ${key}`);
351
- return pool.shift()!;
433
+ const entry = pool.shift()!;
434
+ markWarmChatInFlight(key, entry.chatId);
435
+ return entry;
352
436
  }
353
437
 
354
438
  export async function warmAllPools(accountIds: string[]) {
@@ -591,6 +675,34 @@ export async function createQwenStream(
591
675
  ): Promise<{ stream: ReadableStream, headers: Record<string, string>, uiSessionId: string, controller: AbortController, accountId: string }> {
592
676
  let chatId: string;
593
677
  let chatHeaders: Record<string, string>;
678
+ let leasedChat: WarmPoolEntry | undefined;
679
+ let leasedChatReleased = false;
680
+
681
+ const releaseLeasedChat = () => {
682
+ if (leasedChatReleased || !leasedChat) return;
683
+ leasedChatReleased = true;
684
+ releaseWarmChat(leasedChat.accountId, leasedChat.chatId);
685
+ };
686
+
687
+ const wrapLeasedStream = (
688
+ stream: ReadableStream<Uint8Array>,
689
+ controller: AbortController,
690
+ timeoutMs: number,
691
+ label: string,
692
+ onTimeout?: () => void,
693
+ ) => {
694
+ return addIdleTimeoutToStream(
695
+ stream,
696
+ controller,
697
+ timeoutMs,
698
+ label,
699
+ onTimeout,
700
+ () => {
701
+ onTimeout?.();
702
+ releaseLeasedChat();
703
+ },
704
+ );
705
+ };
594
706
 
595
707
  if (accountId === 'guest') {
596
708
  chatHeaders = await getGuestHeaders();
@@ -642,9 +754,8 @@ export async function createQwenStream(
642
754
  if (!chatId) throw new Error(`Unexpected guest chat response: ${JSON.stringify(json).slice(0, 200)}`);
643
755
  }
644
756
  } else {
645
- let chatEntry: WarmPoolEntry;
646
757
  try {
647
- chatEntry = await getWarmedChat(accountId);
758
+ leasedChat = await getWarmedChat(accountId);
648
759
  } catch (err: any) {
649
760
  if (err.message?.includes('chat is in progress') || err.message?.includes('The chat is in progress')) {
650
761
  const retryAfterMs = 2000 + Math.floor(Math.random() * 2000);
@@ -652,8 +763,8 @@ export async function createQwenStream(
652
763
  }
653
764
  throw err;
654
765
  }
655
- chatId = chatEntry.chatId;
656
- chatHeaders = chatEntry.headers;
766
+ chatId = leasedChat.chatId;
767
+ chatHeaders = leasedChat.headers;
657
768
  }
658
769
 
659
770
  const actualParentId: string | null = null;
@@ -692,7 +803,8 @@ export async function createQwenStream(
692
803
  }
693
804
  }
694
805
 
695
- const timestamp = Math.floor(Date.now() / 1000);
806
+ try {
807
+ const timestamp = Math.floor(Date.now() / 1000);
696
808
  const fid = crypto.randomUUID();
697
809
  const model = modelId.replace('-no-thinking', '');
698
810
 
@@ -766,7 +878,7 @@ export async function createQwenStream(
766
878
 
767
879
  if (browserResult.contentType.includes('text/event-stream') && browserResult.status < 400) {
768
880
  const controller = new AbortController();
769
- return { stream: browserResult.stream, headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
881
+ return { stream: wrapLeasedStream(browserResult.stream, controller, timeoutMs, `Qwen browser stream ${chatId}`, browserResult.abort), headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
770
882
  }
771
883
 
772
884
  if (browserResult.body) {
@@ -784,7 +896,7 @@ export async function createQwenStream(
784
896
  });
785
897
  if (retryResult.contentType.includes('text/event-stream') && retryResult.status < 400) {
786
898
  const controller = new AbortController();
787
- return { stream: retryResult.stream, headers: freshHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
899
+ return { stream: wrapLeasedStream(retryResult.stream, controller, timeoutMs, `Qwen browser stream ${chatId}`, retryResult.abort), headers: freshHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
788
900
  }
789
901
  if (retryResult.body && (retryResult.body.includes('FAIL_SYS_USER_VALIDATE') || retryResult.body.includes('_____tmd_____'))) {
790
902
  throw new QwenUpstreamError('Qwen TMD challenge persists after header refresh.', 'FAIL_SYS_USER_VALIDATE', 403);
@@ -872,7 +984,7 @@ export async function createQwenStream(
872
984
 
873
985
  const retryContentType = retryResponse.headers.get('content-type') || '';
874
986
  if (retryResponse.ok && retryContentType.includes('text/event-stream') && retryResponse.body) {
875
- return { stream: retryResponse.body, headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
987
+ return { stream: wrapLeasedStream(retryResponse.body, retryController, timeoutMs, `Qwen stream ${chatId}`), headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
876
988
  }
877
989
 
878
990
  const retryPeek = await retryResponse.clone().text().catch(() => '');
@@ -881,7 +993,7 @@ export async function createQwenStream(
881
993
  }
882
994
 
883
995
  if (retryResponse.ok && retryResponse.body) {
884
- return { stream: retryResponse.body, headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
996
+ return { stream: wrapLeasedStream(retryResponse.body, retryController, timeoutMs, `Qwen stream ${chatId}`), headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
885
997
  }
886
998
  } catch (retryErr) {
887
999
  if (retryErr instanceof QwenUpstreamError) throw retryErr;
@@ -904,7 +1016,11 @@ export async function createQwenStream(
904
1016
  throw new Error(`Failed to fetch from Qwen: ${response.status} ${response.statusText} - ${errText}`);
905
1017
  }
906
1018
 
907
- return { stream: response.body, headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
1019
+ return { stream: wrapLeasedStream(response.body, controller, timeoutMs, `Qwen stream ${chatId}`), headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
1020
+ } catch (err) {
1021
+ releaseLeasedChat();
1022
+ throw err;
1023
+ }
908
1024
  }
909
1025
 
910
1026
  function handleErrorBody(peekText: string, status: number): never {
@@ -135,6 +135,27 @@ test('truncateMessages: handles empty messages array', () => {
135
135
  assert.strictEqual(result.length, 0);
136
136
  });
137
137
 
138
+ test('truncateMessages: preserves earlier tool memory when truncating history', () => {
139
+ const messages = [
140
+ {
141
+ role: 'assistant',
142
+ content: 'I will inspect the file.',
143
+ tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'read_file', arguments: JSON.stringify({ path: '/tmp/a.txt' }) } }],
144
+ },
145
+ {
146
+ role: 'tool',
147
+ name: 'read_file',
148
+ content: 'old tool result that should be summarized',
149
+ },
150
+ { role: 'user', content: 'x'.repeat(5000) },
151
+ ];
152
+ const result = truncateMessages(messages, 1000);
153
+ assert.ok(result.some(m => m.content.includes('[Earlier tool memory]')));
154
+ assert.ok(result.some(m => m.content.includes('read_file')));
155
+ assert.ok(result.some(m => m.content.includes('/tmp/a.txt')));
156
+ assert.ok(result.some(m => m.content.includes('old tool result')));
157
+ });
158
+
138
159
  test('truncateMessages: handles empty messages with system prompt fallback', () => {
139
160
  const result = truncateMessages([], 5, 'fallback');
140
161
  assert.strictEqual(result.length, 1);
@@ -30,6 +30,60 @@ function truncateSemantically(content: string, maxChars: number): string {
30
30
  return truncated + '... [Truncated]';
31
31
  }
32
32
 
33
+ const TOOL_MEMORY_MAX_ITEMS = 24;
34
+ const TOOL_MEMORY_ITEM_MAX_CHARS = 180;
35
+
36
+ function summarizeContent(content: string, maxChars = TOOL_MEMORY_ITEM_MAX_CHARS): string {
37
+ const compact = content.replace(/\s+/g, ' ').trim();
38
+ if (compact.length <= maxChars) return compact;
39
+ return `${compact.slice(0, maxChars)}... [truncated]`;
40
+ }
41
+
42
+ function stringifyToolArgs(args: unknown): string {
43
+ try {
44
+ return summarizeContent(JSON.stringify(args), 220);
45
+ } catch {
46
+ return summarizeContent(String(args), 220);
47
+ }
48
+ }
49
+
50
+ function buildToolMemory(messages: Array<{ role: string; content: string | null | any[] | Record<string, unknown>; tool_calls?: any[]; name?: string; tool_call_id?: string }>): string {
51
+ const lines: string[] = [];
52
+
53
+ for (const msg of messages) {
54
+ if (msg.role === 'assistant' && Array.isArray(msg.tool_calls)) {
55
+ for (const call of msg.tool_calls) {
56
+ const name = call?.function?.name || call?.name || 'unknown_tool';
57
+ let args: unknown = {};
58
+ if (typeof call?.function?.arguments === 'string') {
59
+ try {
60
+ args = JSON.parse(call.function.arguments);
61
+ } catch {
62
+ args = call.function.arguments;
63
+ }
64
+ } else if (call?.function?.arguments !== undefined) {
65
+ args = call.function.arguments;
66
+ }
67
+ lines.push(`- call ${call.id || 'unknown'}: ${name}(${stringifyToolArgs(args)})`);
68
+ if (lines.length >= TOOL_MEMORY_MAX_ITEMS) return lines.join('\n');
69
+ }
70
+ }
71
+
72
+ if (msg.role === 'tool' || msg.role === 'function') {
73
+ const contentStr = Array.isArray(msg.content)
74
+ ? msg.content.map((c: any) => c.text || JSON.stringify(c)).join('\n')
75
+ : typeof msg.content === 'object' && msg.content !== null
76
+ ? JSON.stringify(msg.content)
77
+ : msg.content || '';
78
+ const toolName = msg.name || msg.tool_call_id || 'tool';
79
+ lines.push(`- ${toolName} response: ${summarizeContent(contentStr)}`);
80
+ if (lines.length >= TOOL_MEMORY_MAX_ITEMS) return lines.join('\n');
81
+ }
82
+ }
83
+
84
+ return lines.join('\n');
85
+ }
86
+
33
87
  export function truncateMessages(
34
88
  messages: Array<{ role: string; content: string | null | any[] | Record<string, unknown> }>,
35
89
  maxContextLength: number,
@@ -46,6 +100,7 @@ export function truncateMessages(
46
100
 
47
101
  const result: Array<{ role: string; content: string }> = [];
48
102
  let usedTokens = 0;
103
+ let droppedToolMemory = '';
49
104
 
50
105
  const normalizedMessages = messages.map(msg => {
51
106
  let contentStr = '';
@@ -56,7 +111,7 @@ export function truncateMessages(
56
111
  } else {
57
112
  contentStr = msg.content || '';
58
113
  }
59
- return { role: msg.role, content: contentStr };
114
+ return { role: msg.role, content: contentStr, tool_calls: (msg as any).tool_calls, name: (msg as any).name, tool_call_id: (msg as any).tool_call_id };
60
115
  });
61
116
 
62
117
  for (let i = normalizedMessages.length - 1; i >= 0; i--) {
@@ -73,6 +128,7 @@ export function truncateMessages(
73
128
  const truncatedContent = truncateSemantically(msg.content, maxChars);
74
129
  result.push({ role: msg.role, content: `[Truncated] ${truncatedContent}` });
75
130
  }
131
+ droppedToolMemory = buildToolMemory(normalizedMessages.slice(0, i));
76
132
  break;
77
133
  }
78
134
  }
@@ -84,6 +140,7 @@ export function truncateMessages(
84
140
  result.push({ role: lastMsg.role, content: `[Truncated] ${truncatedContent}` });
85
141
  }
86
142
 
87
- result.reverse();
88
- return result;
143
+ const truncated = result.reverse();
144
+ if (!droppedToolMemory) return truncated;
145
+ return [{ role: 'user', content: `[Earlier tool memory]\n${droppedToolMemory}` }, ...truncated];
89
146
  }