npm - @pedrofariasx/qwenproxy - Versions diffs - 1.6.4 → 1.7.0 - Mend

@pedrofariasx/qwenproxy 1.6.4 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +2 -3
package/src/core/config.ts +2 -0
package/src/routes/chat.ts +287 -13
package/src/services/playwright.ts +20 -8
package/src/services/qwen.ts +127 -11
package/src/tests/contextTruncation.test.ts +21 -0
package/src/utils/context-truncation.ts +60 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pedrofariasx/qwenproxy",
-  "version": "1.6.4",
+  "version": "1.7.0",
   "description": "Local OpenAI-compatible proxy API that routes requests to Qwen (chat.qwen.ai) via Playwright browser automation.",
   "main": "index.js",
   "scripts": {
@@ -22,7 +22,7 @@
     "@hono/node-server": "^2.0.3",
     "ajv": "^8.20.0",
     "ali-oss": "^6.23.0",
-    "better-sqlite3": "^12.10.0",
+    "better-sqlite3": "^12.10.1",
     "dotenv": "^17.4.2",
     "hono": "^4.12.21",
     "playwright": "^1.60.0",
@@ -38,7 +38,6 @@
     "@types/ali-oss": "^6.23.3",
     "@types/better-sqlite3": "^7.6.13",
     "@types/node": "^25.9.1",
-    "@types/uuid": "^11.0.0",
     "semantic-release": "^25.0.3",
     "typescript": "^6.0.3"
   },

package/src/core/config.ts CHANGED Viewed

@@ -13,6 +13,7 @@ const envSchema = z.object({
   HTTP_TIMEOUT: z.string().default('30000'),
   HEADERS_TIMEOUT: z.string().default('60000'),
   CHAT_TIMEOUT: z.string().default('120000'),
+  STREAM_IDLE_TIMEOUT: z.string().default('180000'),
   CACHE_TTL: z.string().default('3600'),
   RESPONSE_TTL: z.string().default('1800'),
   METRICS_INTERVAL: z.string().default('10000'),
@@ -62,6 +63,7 @@ export const config = {
     http: parseInt(env.HTTP_TIMEOUT),
     headers: parseInt(env.HEADERS_TIMEOUT),
     chat: parseInt(env.CHAT_TIMEOUT),
+    streamIdle: parseInt(env.STREAM_IDLE_TIMEOUT),
   },
   cache: {
     defaultTTL: parseInt(env.CACHE_TTL),

package/src/routes/chat.ts CHANGED Viewed

@@ -128,14 +128,217 @@ function parseQwenErrorPayload(raw: string): { message: string; status: number }
       return { message: `Qwen upstream error: ${msg}`, status: 502 };
     }
   } catch {
-    // Non-SSE, non-JSON upstream body. Keep this as an explicit bad gateway
-    // instead of silently returning an empty assistant message.
     return { message: `Qwen upstream returned non-SSE response: ${text.slice(0, 300)}`, status: 502 };
   }
   return null;
 }
+function getToolFunction(tool: FunctionToolDefinition | any): any {
+  return tool?.type === 'function' ? tool.function : tool;
+}
+function getToolName(tool: FunctionToolDefinition | any): string {
+  return getToolFunction(tool)?.name || '';
+}
+function getToolDescription(tool: FunctionToolDefinition | any): string {
+  return getToolFunction(tool)?.description || '';
+}
+function getToolParameters(tool: FunctionToolDefinition | any): Record<string, any> {
+  return getToolFunction(tool)?.parameters?.properties || {};
+}
+function getRequiredParams(tool: FunctionToolDefinition | any): Set<string> {
+  return new Set(getToolFunction(tool)?.parameters?.required || []);
+}
+function compactPromptText(text: string, maxChars = 180): string {
+  const compact = text.replace(/\s+/g, ' ').trim();
+  if (compact.length <= maxChars) return compact;
+  return `${compact.slice(0, maxChars)}...`;
+}
+function getForcedToolName(toolChoice: any): string {
+  if (toolChoice && typeof toolChoice === 'object' && toolChoice.function?.name) {
+    return toolChoice.function.name;
+  }
+  return '';
+}
+function tokenizeForToolScoring(text: string): Set<string> {
+  const tokens = new Set<string>();
+  for (const token of text.toLowerCase().match(/[a-z0-9_./-]+/g) || []) {
+    if (token.length >= 3) tokens.add(token);
+  }
+  return tokens;
+}
+function scoreToolForContext(tool: FunctionToolDefinition, contextText: string, forcedToolName: string, recentToolNames: Set<string>): number {
+  const name = getToolName(tool);
+  const description = getToolDescription(tool);
+  const params = Object.keys(getToolParameters(tool));
+  const tokens = tokenizeForToolScoring(contextText);
+  let score = 0;
+  if (forcedToolName && name === forcedToolName) score += 100;
+  if (recentToolNames.has(name)) score += 35;
+  const nameParts = name.toLowerCase().split(/[_./-]+/).filter(Boolean);
+  for (const part of nameParts) {
+    if (part.length >= 3 && tokens.has(part)) score += 20;
+  }
+  const toolText = `${name} ${description} ${params.join(' ')}`.toLowerCase();
+  for (const token of tokens) {
+    if (toolText.includes(token)) score += 2;
+  }
+  for (const param of params) {
+    if (tokens.has(param.toLowerCase())) score += 3;
+  }
+  return score;
+}
+function getRecentToolNames(messages: Message[]): Set<string> {
+  const recentToolNames = new Set<string>();
+  const recentMessages = messages.slice(-12);
+  for (const msg of recentMessages) {
+    if (msg.role === 'assistant' && Array.isArray(msg.tool_calls)) {
+      for (const call of msg.tool_calls) {
+        if (call?.function?.name) recentToolNames.add(call.function.name);
+      }
+    }
+    if ((msg.role === 'tool' || msg.role === 'function') && msg.name) {
+      recentToolNames.add(msg.name);
+    }
+  }
+  return recentToolNames;
+}
+function selectCandidateTools(
+  tools: FunctionToolDefinition[],
+  contextText: string,
+  forcedToolName = '',
+  recentToolNames: Set<string> = new Set(),
+  maxTools = 12
+): FunctionToolDefinition[] {
+  if (tools.length <= maxTools) return tools;
+  const scored = tools
+    .map(tool => ({ tool, score: scoreToolForContext(tool, contextText, forcedToolName, recentToolNames) }))
+    .filter(entry => entry.score > 0 || (forcedToolName && getToolName(entry.tool) === forcedToolName))
+    .sort((a, b) => b.score - a.score || getToolName(a.tool).localeCompare(getToolName(b.tool)));
+  if (scored.length === 0) {
+    return tools.slice(0, maxTools);
+  }
+  return scored.slice(0, maxTools).map(entry => entry.tool);
+}
+function buildCompactToolManifest(tools: FunctionToolDefinition[], forcedToolName = ''): string {
+  if (tools.length === 0) return '';
+  const lines = tools.map(tool => {
+    const name = getToolName(tool);
+    const description = compactPromptText(getToolDescription(tool), 140);
+    const params = getToolParameters(tool);
+    const required = getRequiredParams(tool);
+    const signature = Object.entries(params)
+      .map(([paramName, schema]: [string, any]) => {
+        const optional = required.has(paramName) ? '' : '?';
+        const type = schema?.type || 'any';
+        return `${paramName}${optional}: ${type}`;
+      })
+      .join(', ');
+    const marker = forcedToolName && name === forcedToolName ? ' [required]' : '';
+    return `${name}(${signature})${description ? ` - ${description}` : ''}${marker}`;
+  });
+  return `[COMPACT TOOL MANIFEST]\n${lines.join('\n')}`;
+}
+function buildToolCallContract(
+  tools: FunctionToolDefinition[],
+  forcedToolName = '',
+  parallelToolCalls = true
+): string {
+  const names = tools.map(getToolName).filter(Boolean);
+  const toolList = names.length > 0 ? names.join(', ') : 'none';
+  const forcedLine = forcedToolName
+    ? `This turn strongly expects the tool "${forcedToolName}". If you call a tool, prefer this exact name.`
+    : 'Only call a tool when the user request requires an external action.';
+  const parallelLine = parallelToolCalls
+    ? 'You may emit multiple tool call blocks only when the user explicitly asks for multiple independent actions.'
+    : 'Emit at most one tool call block.';
+  return `[TOOL CALL CONTRACT - MUST FOLLOW]
+Available tool names: ${toolList}
+Format:
+<tool_call>
+{"name": "tool_name", "arguments": {"param_name": "value"}}
+</tool_call>
+Rules:
+1. Use exact tool names from the list above or the full TOOLS AVAILABLE section.
+2. Do not invent, guess, rename, or approximate tool names.
+3. Do not output raw JSON as a tool call.
+4. ${forcedLine}
+5. ${parallelLine}
+6. If no tool is needed, do not emit any tool call block.`;
+}
+function parseToolArguments(value: unknown): Record<string, unknown> {
+  if (typeof value === 'string') {
+    try {
+      const parsed = JSON.parse(value);
+      return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? parsed : {};
+    } catch {
+      return {};
+    }
+  }
+  if (value && typeof value === 'object' && !Array.isArray(value)) {
+    return value as Record<string, unknown>;
+  }
+  return {};
+}
+function looksLikeUnwrappedToolCall(text: string): boolean {
+  const trimmed = text.trim();
+  if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) return false;
+  return /["']name["']\s*:/.test(trimmed) && /["']arguments["']\s*:/.test(trimmed);
+}
+function parseUnwrappedToolCalls(text: string): Array<{ id: string; name: string; arguments: Record<string, unknown> }> {
+  if (!looksLikeUnwrappedToolCall(text)) return [];
+  try {
+    const parsed = robustParseJSON(text);
+    const items = Array.isArray(parsed) ? parsed : [parsed];
+    return items
+      .filter(item => item && typeof item === 'object')
+      .map((item: any) => {
+        const name = item.name || item.function?.name || item.tool_name || item.tool;
+        if (!name || typeof name !== 'string') return null;
+        return {
+          id: item.id || item.tool_call_id || `call_${crypto.randomUUID()}`,
+          name,
+          arguments: parseToolArguments(item.arguments || item.function?.arguments || item.args || item.parameters || item.input || {}),
+        };
+      })
+      .filter((item: any): item is { id: string; name: string; arguments: Record<string, unknown> } => item !== null);
+  } catch {
+    return [];
+  }
+}
 export async function chatCompletions(c: Context) {
   try {
     const body: OpenAIRequest = await c.req.json();
@@ -250,6 +453,11 @@ export async function chatCompletions(c: Context) {
     const modelContextWindow = getModelContextWindow(modelId)
     const estimatedTokens = estimateTokenCount(systemPrompt + prompt, modelId);
     const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
+    const forcedToolName = getForcedToolName(bodyAny.tool_choice);
+    const parallelToolCalls = bodyAny.parallel_tool_calls !== false;
+    const toolContextText = `${systemPrompt}\n${prompt}`;
+    const recentToolNames = hasTools ? getRecentToolNames(messages) : new Set<string>();
+    const candidateTools = hasTools ? selectCandidateTools(bodyAny.tools, toolContextText, forcedToolName, recentToolNames) : [];
     let finalPrompt: string;
     if (estimatedTokens > modelContextWindow - 1000) {
@@ -260,9 +468,11 @@ export async function chatCompletions(c: Context) {
       finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
     }
-    // Reforço de instrução de tool call para contextos longos (mitiga "Lost in the Middle")
-    if (hasTools && estimatedTokens > 15000) {
-      finalPrompt += '\n\n[CRITICAL REMINDER: You MUST use the exact <tool_call> JSON format specified in the system instructions. Do not hallucinate tool names or output raw JSON without the tags.]';
+    if (hasTools) {
+      const compactManifest = buildCompactToolManifest(candidateTools, forcedToolName);
+      const toolContract = buildToolCallContract(candidateTools, forcedToolName, parallelToolCalls);
+      finalPrompt += `\n\n${toolContract}`;
+      if (compactManifest) finalPrompt += `\n\n${compactManifest}`;
     }
     const isThinkingModel = !body.model.includes('no-thinking');
@@ -498,6 +708,20 @@ export async function chatCompletions(c: Context) {
         });
       }
+      if (hasTools && toolCallsOut.length === 0) {
+        for (const tc of parseUnwrappedToolCalls(finalContent)) {
+          toolCallsOut.push({
+            id: tc.id,
+            type: 'function',
+            function: {
+              name: tc.name,
+              arguments: JSON.stringify(tc.arguments)
+            }
+          });
+        }
+        if (toolCallsOut.length > 0) finalContent = '';
+      }
       const usage = {
         prompt_tokens: parserState.promptTokens,
         completion_tokens: parserState.completionTokens,
@@ -687,7 +911,32 @@ export async function chatCompletions(c: Context) {
                   if (hasTools && toolParser) {
                     const { text, toolCalls } = toolParser.feed(vStr);
                     if (text) {
-                      fastWriteContent(text);
+                      if (hasTools && toolParser && looksLikeUnwrappedToolCall(text)) {
+                        const unwrappedToolCalls = parseUnwrappedToolCalls(text);
+                        const baseIndex = toolParser.getEmittedToolCallCount();
+                        for (let idx = 0; idx < unwrappedToolCalls.length; idx++) {
+                          const tc = unwrappedToolCalls[idx];
+                          streamWriter.write(`data: ${JSON.stringify({
+                            id: completionId,
+                            object: 'chat.completion.chunk',
+                            created: createdTimestamp,
+                            model: body.model,
+                            choices: [makeChoice({
+                              tool_calls: [{
+                                index: baseIndex + idx,
+                                id: tc.id,
+                                type: 'function',
+                                function: {
+                                  name: tc.name,
+                                  arguments: JSON.stringify(tc.arguments)
+                                }
+                              }]
+                            })]
+                          })}\n\n`);
+                        }
+                      } else {
+                        fastWriteContent(text);
+                      }
                     }
                     for (const tc of toolCalls) {
                       streamWriter.write(`data: ${JSON.stringify({
@@ -753,13 +1002,38 @@ export async function chatCompletions(c: Context) {
           const flushResult = toolParser.flush();
           if (flushResult.text) {
-            writeEvent({
-              id: completionId,
-              object: 'chat.completion.chunk',
-              created: createdTimestamp,
-              model: body.model,
-              choices: [makeChoice({ content: flushResult.text })]
-            });
+            if (hasTools && toolParser && looksLikeUnwrappedToolCall(flushResult.text)) {
+              const unwrappedToolCalls = parseUnwrappedToolCalls(flushResult.text);
+              const baseIndex = toolParser.getEmittedToolCallCount();
+              for (let idx = 0; idx < unwrappedToolCalls.length; idx++) {
+                const tc = unwrappedToolCalls[idx];
+                writeEvent({
+                  id: completionId,
+                  object: 'chat.completion.chunk',
+                  created: createdTimestamp,
+                  model: body.model,
+                  choices: [makeChoice({
+                    tool_calls: [{
+                      index: baseIndex + idx,
+                      id: tc.id,
+                      type: 'function',
+                      function: {
+                        name: tc.name,
+                        arguments: JSON.stringify(tc.arguments)
+                      }
+                    }]
+                  })]
+                });
+              }
+            } else {
+              writeEvent({
+                id: completionId,
+                object: 'chat.completion.chunk',
+                created: createdTimestamp,
+                model: body.model,
+                choices: [makeChoice({ content: flushResult.text })]
+              });
+            }
           }
           for (const tc of flushResult.toolCalls) {
             const idx = toolParser.getEmittedToolCallCount() - flushResult.toolCalls.length + flushResult.toolCalls.indexOf(tc);

package/src/services/playwright.ts CHANGED Viewed

@@ -771,8 +771,6 @@ async function _getQwenHeadersInternal(forceNew = false, accountId?: string): Pr
     console.log(`[Playwright] Setting up route interception for ${cacheKey}...`);
     const routeHandler = async (route: any, request: any) => {
-      clearTimeout(timeout);
       const reqHeaders = request.headers();
       let uiSessionId = '';
       let uiParentMessageId: string | null = null;
@@ -806,6 +804,8 @@ async function _getQwenHeadersInternal(forceNew = false, accountId?: string): Pr
         return;
       }
+      clearTimeout(timeout);
       console.log(`[Playwright] Successfully intercepted headers for ${cacheKey}.`);
       cache.currentHeaders = extractedHeaders;
       cache.cachedQwenHeaders = { headers: extractedHeaders, chatSessionId: uiSessionId, parentMessageId: uiParentMessageId };
@@ -1114,15 +1114,18 @@ export async function browserStreamFetch(
   const enc = new TextEncoder();
   let metaResolve!: (value: { status: number; statusText: string; contentType: string; headers: Record<string, string> }) => void;
-  const metaPromise = new Promise<{ status: number; statusText: string; contentType: string; headers: Record<string, string> }>((resolve) => {
+  let metaReject!: (reason: Error) => void;
+  const metaPromise = new Promise<{ status: number; statusText: string; contentType: string; headers: Record<string, string> }>((resolve, reject) => {
     metaResolve = resolve;
+    metaReject = reject;
   });
+  const metaTimeoutMs = options.timeoutMs || config.timeouts.chat;
   const metaTimeout = setTimeout(() => {
     streamCallbacks.delete(reqId);
     abortControllers.delete(reqId);
-    metaResolve({ status: 0, statusText: 'Timeout', contentType: '', headers: {} });
-  }, options.timeoutMs || config.timeouts.chat);
+    metaReject(new Error(`Browser stream fetch timed out waiting for response metadata after ${metaTimeoutMs}ms`));
+  }, metaTimeoutMs);
   streamCallbacks.set(reqId, {
     onMeta: (meta) => {
@@ -1131,13 +1134,20 @@ export async function browserStreamFetch(
     },
     onChunk: () => {},
     onEnd: () => {},
-    onError: () => {},
+    onError: (msg: string) => {
+      clearTimeout(metaTimeout);
+      metaReject(new Error(msg));
+    },
     onBody: () => {},
   });
   let abortFn = () => {};
   let bodyResolve!: (value: string) => void;
-  const bodyPromise = new Promise<string>((resolve) => { bodyResolve = resolve; });
+  let bodyReject!: (reason: Error) => void;
+  const bodyPromise = new Promise<string>((resolve, reject) => {
+    bodyResolve = resolve;
+    bodyReject = reject;
+  });
   const stream = new ReadableStream<Uint8Array>({
     start(controller) {
@@ -1148,11 +1158,13 @@ export async function browserStreamFetch(
       };
       cb.onEnd = () => {
         try { controller.close(); } catch {}
+        bodyResolve('');
         streamCallbacks.delete(reqId);
         abortControllers.delete(reqId);
       };
       cb.onError = (msg: string) => {
         try { controller.error(new Error(msg)); } catch {}
+        bodyReject(new Error(msg));
         streamCallbacks.delete(reqId);
         abortControllers.delete(reqId);
       };
@@ -1166,7 +1178,7 @@ export async function browserStreamFetch(
         const controller = new AbortController();
         (window as any).__abortControllers = (window as any).__abortControllers || {};
         (window as any).__abortControllers[reqId] = controller;
-        const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs || 130000);
+        const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs || config.timeouts.chat);
         try {
           const resp = await fetch(url, {
             method: options.method || 'POST',

package/src/services/qwen.ts CHANGED Viewed

@@ -10,6 +10,69 @@ const TIMEOUT_PER_MB = 30000;
 const sleep = (ms: number) => new Promise(r => setTimeout(r, ms));
+function addIdleTimeoutToStream(
+  stream: ReadableStream<Uint8Array>,
+  controller: AbortController,
+  idleTimeoutMs: number,
+  label: string,
+  onTimeout?: () => void,
+  onDone?: () => void,
+): ReadableStream<Uint8Array> {
+  let idleTimer: ReturnType<typeof setTimeout> | undefined;
+  let reader: ReadableStreamDefaultReader<Uint8Array> | undefined;
+  let streamController: ReadableStreamDefaultController<Uint8Array> | undefined;
+  const clearIdleTimer = () => {
+    if (idleTimer) {
+      clearTimeout(idleTimer);
+      idleTimer = undefined;
+    }
+  };
+  const resetIdleTimer = () => {
+    clearIdleTimer();
+    idleTimer = setTimeout(() => {
+      const message = `${label} idle timeout after ${idleTimeoutMs}ms without upstream data`;
+      const timeoutError = new Error(message);
+      clearIdleTimer();
+      controller.abort();
+      streamController?.error(timeoutError);
+      onTimeout?.();
+      try { stream.cancel(message).catch(() => {}); } catch {}
+    }, idleTimeoutMs);
+  };
+  return new ReadableStream<Uint8Array>({
+    start() {
+      reader = stream.getReader();
+      resetIdleTimer();
+    },
+    async pull(streamController) {
+      try {
+        if (!reader) throw new Error('Stream reader was not initialized');
+        const { done, value } = await reader.read();
+        if (done) {
+          clearIdleTimer();
+          onDone?.();
+          streamController.close();
+          return;
+        }
+        resetIdleTimer();
+        streamController.enqueue(value);
+      } catch (err) {
+        clearIdleTimer();
+        onDone?.();
+        streamController.error(err);
+      }
+    },
+    cancel(reason) {
+      clearIdleTimer();
+      onDone?.();
+      return stream.cancel(reason);
+    },
+  });
+}
 function getClientHintsHeaders(): Record<string, string> {
   return {
     'sec-ch-ua': CHROME_CLIENT_HINTS,
@@ -83,6 +146,8 @@ interface WarmPoolEntry {
 const warmPool: Map<string, WarmPoolEntry[]> = new Map();
+const inFlightWarmChats = new Set<string>();
 const refillPromises: Map<string, Promise<void>> = new Map();
 const WARM_POOL_SIZE = 10;
@@ -97,6 +162,22 @@ function cleanupStalePool(accountId: string) {
   if (filtered.length !== pool.length) warmPool.set(accountId, filtered);
 }
+function warmChatKey(accountId: string, chatId: string) {
+  return `${accountId}:${chatId}`;
+}
+function markWarmChatInFlight(accountId: string, chatId: string) {
+  inFlightWarmChats.add(warmChatKey(accountId, chatId));
+}
+function releaseWarmChat(accountId: string, chatId: string) {
+  inFlightWarmChats.delete(warmChatKey(accountId, chatId));
+}
+function isWarmChatInFlight(accountId: string, chatId: string) {
+  return inFlightWarmChats.has(warmChatKey(accountId, chatId));
+}
 async function getBasicQwenHeaders(accountId?: string): Promise<Record<string, string>> {
   const { cookie, userAgent, bxV, bxUa, bxUmidtoken } = await getBasicHeaders(accountId);
   return {
@@ -289,6 +370,7 @@ async function refillPoolForAccount(accountId: string) {
     for (const chatId of unusedChats) {
       if (reused >= need) break;
       if (existingIds.has(chatId)) continue;
+      if (isWarmChatInFlight(accountId, chatId)) continue;
       pool.push({ chatId, headers, accountId, timestamp: Date.now() });
       existingIds.add(chatId);
       reused++;
@@ -348,7 +430,9 @@ export async function getWarmedChat(accountId?: string) {
     await refillPromises.get(key);
   }
   if (pool.length === 0) throw new Error(`Warm pool empty after retry for ${key}`);
-  return pool.shift()!;
+  const entry = pool.shift()!;
+  markWarmChatInFlight(key, entry.chatId);
+  return entry;
 }
 export async function warmAllPools(accountIds: string[]) {
@@ -591,6 +675,34 @@ export async function createQwenStream(
 ): Promise<{ stream: ReadableStream, headers: Record<string, string>, uiSessionId: string, controller: AbortController, accountId: string }> {
   let chatId: string;
   let chatHeaders: Record<string, string>;
+  let leasedChat: WarmPoolEntry | undefined;
+  let leasedChatReleased = false;
+  const releaseLeasedChat = () => {
+    if (leasedChatReleased || !leasedChat) return;
+    leasedChatReleased = true;
+    releaseWarmChat(leasedChat.accountId, leasedChat.chatId);
+  };
+  const wrapLeasedStream = (
+    stream: ReadableStream<Uint8Array>,
+    controller: AbortController,
+    timeoutMs: number,
+    label: string,
+    onTimeout?: () => void,
+  ) => {
+    return addIdleTimeoutToStream(
+      stream,
+      controller,
+      timeoutMs,
+      label,
+      onTimeout,
+      () => {
+        onTimeout?.();
+        releaseLeasedChat();
+      },
+    );
+  };
   if (accountId === 'guest') {
     chatHeaders = await getGuestHeaders();
@@ -642,9 +754,8 @@ export async function createQwenStream(
       if (!chatId) throw new Error(`Unexpected guest chat response: ${JSON.stringify(json).slice(0, 200)}`);
     }
   } else {
-    let chatEntry: WarmPoolEntry;
     try {
-      chatEntry = await getWarmedChat(accountId);
+      leasedChat = await getWarmedChat(accountId);
     } catch (err: any) {
       if (err.message?.includes('chat is in progress') || err.message?.includes('The chat is in progress')) {
         const retryAfterMs = 2000 + Math.floor(Math.random() * 2000);
@@ -652,8 +763,8 @@ export async function createQwenStream(
       }
       throw err;
     }
-    chatId = chatEntry.chatId;
-    chatHeaders = chatEntry.headers;
+    chatId = leasedChat.chatId;
+    chatHeaders = leasedChat.headers;
   }
   const actualParentId: string | null = null;
@@ -692,7 +803,8 @@ export async function createQwenStream(
     }
   }
-  const timestamp = Math.floor(Date.now() / 1000);
+  try {
+    const timestamp = Math.floor(Date.now() / 1000);
   const fid = crypto.randomUUID();
   const model = modelId.replace('-no-thinking', '');
@@ -766,7 +878,7 @@ export async function createQwenStream(
       if (browserResult.contentType.includes('text/event-stream') && browserResult.status < 400) {
         const controller = new AbortController();
-        return { stream: browserResult.stream, headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
+        return { stream: wrapLeasedStream(browserResult.stream, controller, timeoutMs, `Qwen browser stream ${chatId}`, browserResult.abort), headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
       }
       if (browserResult.body) {
@@ -784,7 +896,7 @@ export async function createQwenStream(
             });
             if (retryResult.contentType.includes('text/event-stream') && retryResult.status < 400) {
               const controller = new AbortController();
-              return { stream: retryResult.stream, headers: freshHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
+              return { stream: wrapLeasedStream(retryResult.stream, controller, timeoutMs, `Qwen browser stream ${chatId}`, retryResult.abort), headers: freshHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
             }
             if (retryResult.body && (retryResult.body.includes('FAIL_SYS_USER_VALIDATE') || retryResult.body.includes('_____tmd_____'))) {
               throw new QwenUpstreamError('Qwen TMD challenge persists after header refresh.', 'FAIL_SYS_USER_VALIDATE', 403);
@@ -872,7 +984,7 @@ export async function createQwenStream(
         const retryContentType = retryResponse.headers.get('content-type') || '';
         if (retryResponse.ok && retryContentType.includes('text/event-stream') && retryResponse.body) {
-          return { stream: retryResponse.body, headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
+          return { stream: wrapLeasedStream(retryResponse.body, retryController, timeoutMs, `Qwen stream ${chatId}`), headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
         }
         const retryPeek = await retryResponse.clone().text().catch(() => '');
@@ -881,7 +993,7 @@ export async function createQwenStream(
         }
         if (retryResponse.ok && retryResponse.body) {
-          return { stream: retryResponse.body, headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
+          return { stream: wrapLeasedStream(retryResponse.body, retryController, timeoutMs, `Qwen stream ${chatId}`), headers: freshHeaders, uiSessionId: chatId, controller: retryController, accountId: accountId || 'guest' };
         }
       } catch (retryErr) {
         if (retryErr instanceof QwenUpstreamError) throw retryErr;
@@ -904,7 +1016,11 @@ export async function createQwenStream(
     throw new Error(`Failed to fetch from Qwen: ${response.status} ${response.statusText} - ${errText}`);
   }
-  return { stream: response.body, headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
+  return { stream: wrapLeasedStream(response.body, controller, timeoutMs, `Qwen stream ${chatId}`), headers: chatHeaders, uiSessionId: chatId, controller, accountId: accountId || 'guest' };
+  } catch (err) {
+    releaseLeasedChat();
+    throw err;
+  }
 }
 function handleErrorBody(peekText: string, status: number): never {

package/src/tests/contextTruncation.test.ts CHANGED Viewed

@@ -135,6 +135,27 @@ test('truncateMessages: handles empty messages array', () => {
   assert.strictEqual(result.length, 0);
 });
+test('truncateMessages: preserves earlier tool memory when truncating history', () => {
+  const messages = [
+    {
+      role: 'assistant',
+      content: 'I will inspect the file.',
+      tool_calls: [{ id: 'call_1', type: 'function', function: { name: 'read_file', arguments: JSON.stringify({ path: '/tmp/a.txt' }) } }],
+    },
+    {
+      role: 'tool',
+      name: 'read_file',
+      content: 'old tool result that should be summarized',
+    },
+    { role: 'user', content: 'x'.repeat(5000) },
+  ];
+  const result = truncateMessages(messages, 1000);
+  assert.ok(result.some(m => m.content.includes('[Earlier tool memory]')));
+  assert.ok(result.some(m => m.content.includes('read_file')));
+  assert.ok(result.some(m => m.content.includes('/tmp/a.txt')));
+  assert.ok(result.some(m => m.content.includes('old tool result')));
+});
 test('truncateMessages: handles empty messages with system prompt fallback', () => {
   const result = truncateMessages([], 5, 'fallback');
   assert.strictEqual(result.length, 1);

package/src/utils/context-truncation.ts CHANGED Viewed

@@ -30,6 +30,60 @@ function truncateSemantically(content: string, maxChars: number): string {
   return truncated + '... [Truncated]';
 }
+const TOOL_MEMORY_MAX_ITEMS = 24;
+const TOOL_MEMORY_ITEM_MAX_CHARS = 180;
+function summarizeContent(content: string, maxChars = TOOL_MEMORY_ITEM_MAX_CHARS): string {
+  const compact = content.replace(/\s+/g, ' ').trim();
+  if (compact.length <= maxChars) return compact;
+  return `${compact.slice(0, maxChars)}... [truncated]`;
+}
+function stringifyToolArgs(args: unknown): string {
+  try {
+    return summarizeContent(JSON.stringify(args), 220);
+  } catch {
+    return summarizeContent(String(args), 220);
+  }
+}
+function buildToolMemory(messages: Array<{ role: string; content: string | null | any[] | Record<string, unknown>; tool_calls?: any[]; name?: string; tool_call_id?: string }>): string {
+  const lines: string[] = [];
+  for (const msg of messages) {
+    if (msg.role === 'assistant' && Array.isArray(msg.tool_calls)) {
+      for (const call of msg.tool_calls) {
+        const name = call?.function?.name || call?.name || 'unknown_tool';
+        let args: unknown = {};
+        if (typeof call?.function?.arguments === 'string') {
+          try {
+            args = JSON.parse(call.function.arguments);
+          } catch {
+            args = call.function.arguments;
+          }
+        } else if (call?.function?.arguments !== undefined) {
+          args = call.function.arguments;
+        }
+        lines.push(`- call ${call.id || 'unknown'}: ${name}(${stringifyToolArgs(args)})`);
+        if (lines.length >= TOOL_MEMORY_MAX_ITEMS) return lines.join('\n');
+      }
+    }
+    if (msg.role === 'tool' || msg.role === 'function') {
+      const contentStr = Array.isArray(msg.content)
+        ? msg.content.map((c: any) => c.text || JSON.stringify(c)).join('\n')
+        : typeof msg.content === 'object' && msg.content !== null
+          ? JSON.stringify(msg.content)
+          : msg.content || '';
+      const toolName = msg.name || msg.tool_call_id || 'tool';
+      lines.push(`- ${toolName} response: ${summarizeContent(contentStr)}`);
+      if (lines.length >= TOOL_MEMORY_MAX_ITEMS) return lines.join('\n');
+    }
+  }
+  return lines.join('\n');
+}
 export function truncateMessages(
   messages: Array<{ role: string; content: string | null | any[] | Record<string, unknown> }>,
   maxContextLength: number,
@@ -46,6 +100,7 @@ export function truncateMessages(
   const result: Array<{ role: string; content: string }> = [];
   let usedTokens = 0;
+  let droppedToolMemory = '';
   const normalizedMessages = messages.map(msg => {
     let contentStr = '';
@@ -56,7 +111,7 @@ export function truncateMessages(
     } else {
       contentStr = msg.content || '';
     }
-    return { role: msg.role, content: contentStr };
+    return { role: msg.role, content: contentStr, tool_calls: (msg as any).tool_calls, name: (msg as any).name, tool_call_id: (msg as any).tool_call_id };
   });
   for (let i = normalizedMessages.length - 1; i >= 0; i--) {
@@ -73,6 +128,7 @@ export function truncateMessages(
         const truncatedContent = truncateSemantically(msg.content, maxChars);
         result.push({ role: msg.role, content: `[Truncated] ${truncatedContent}` });
       }
+      droppedToolMemory = buildToolMemory(normalizedMessages.slice(0, i));
       break;
     }
   }
@@ -84,6 +140,7 @@ export function truncateMessages(
     result.push({ role: lastMsg.role, content: `[Truncated] ${truncatedContent}` });
   }
-  result.reverse();
-  return result;
+  const truncated = result.reverse();
+  if (!droppedToolMemory) return truncated;
+  return [{ role: 'user', content: `[Earlier tool memory]\n${droppedToolMemory}` }, ...truncated];
 }