npm - @pedrofariasx/qwenproxy - Versions diffs - 1.2.0 → 1.2.2 - Mend

@pedrofariasx/qwenproxy 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +3 -13
package/package.json +1 -1
package/src/api/server.ts +0 -2
package/src/cache/memory-cache.ts +52 -18
package/src/routes/chat.ts +132 -79
package/src/routes/upload.ts +4 -4
package/src/services/playwright.ts +1 -0
package/src/services/qwen.ts +36 -15
package/src/tools/parser.ts +10 -13
package/src/utils/context-truncation.ts +36 -10
package/src/linter/extraction-engine.ts +0 -165
package/src/linter/index.ts +0 -258
package/src/linter/repair-normalize.ts +0 -245
package/src/linter/safety-gate.ts +0 -219
package/src/linter/streaming-state-machine.ts +0 -252
package/src/linter/structural-parser.ts +0 -352
package/src/linter/types.ts +0 -74
package/src/tests/linter.test.ts +0 -151
package/src/tests/parallel.test.ts +0 -42
package/src/tests/structureVerification.test.ts +0 -176
package/src/tools/ast.ts +0 -15
package/src/tools/coercion.ts +0 -67
package/src/tools/confidence.ts +0 -48
package/src/tools/detector.ts +0 -40
package/src/tools/executor.ts +0 -236
package/src/tools/pipeline.ts +0 -122
package/src/tools/registry-runtime.ts +0 -34
package/src/tools/repair.ts +0 -42
package/src/tools/validator.ts +0 -33

package/README.md CHANGED Viewed

@@ -39,7 +39,7 @@ graph TD
     Playwright --> Browser2[Browser - Conta 2]
     Playwright --> BrowserN[Browser - Conta N]
     Handler --> QwenAPI[chat.qwen.ai]
-    Handler --> Tools[Tool Executor]
+    Handler --> Tools[Tool Parser]
     subgraph "Persistência"
         Accounts
@@ -233,24 +233,14 @@ qwenproxy/
 │   │   ├── model-registry.ts    # Registro de modelos e context windows
 │   │   ├── stream-registry.ts   # Tracking de streams ativos
 │   │   └── watchdog.ts          # Health monitoring
-│   ├── linter/
-│   │   ├── bar.ts               # Facade
-│   │   ├── extraction-engine.ts # Extraction engine
-│   │   ├── foo.ts               # Exports
-│   │   ├── index.ts             # Main public API
-│   │   ├── repair-normalize.ts  # Repair and normalize
-│   │   ├── safety-gate.ts       # Safety gate
-│   │   ├── streaming-state-machine.ts # Streaming state machine
-│   │   ├── structural-parser.ts # Structural parser
-│   │   └── types.ts             # Types
 │   ├── routes/
-│   │   └── chat.ts              # Handler /v1/chat/completions
+│   │   ├── chat.ts              # Handler /v1/chat/completions
+│   │   └── upload.ts            # Handler /v1/upload (multimodal)
 │   ├── services/
 │   │   ├── playwright.ts        # Automação de navegador
 │   │   └── qwen.ts              # Integração com API do Qwen
 │   ├── tests/                   # Testes automatizados
 │   ├── tools/
-│   │   ├── executor.ts          # Execução de ferramentas
 │   │   ├── parser.ts            # Parser de <tool_call> tags
 │   │   ├── registry.ts          # Registro de tools
 │   │   ├── schema.ts            # Validação JSON Schema

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pedrofariasx/qwenproxy",
-  "version": "1.2.0",
+  "version": "1.2.2",
   "description": "Local OpenAI-compatible proxy API that routes requests to Qwen (chat.qwen.ai) via Playwright browser automation.",
   "main": "index.js",
   "scripts": {

package/src/api/server.ts CHANGED Viewed

@@ -111,8 +111,6 @@ export async function startServer(): Promise<void> {
     await cache.close()
     const { closePlaywright } = await import('../services/playwright.js')
     await closePlaywright()
-    const { cleanupAllAccountMutexes } = await import('../routes/chat.js')
-    cleanupAllAccountMutexes()
     const { closeDatabase } = await import('../core/database.ts')
     closeDatabase()
     server?.close()

package/src/cache/memory-cache.ts CHANGED Viewed

@@ -19,16 +19,34 @@ export class MemoryCache {
   private defaultTTL: number
   private prefix: string
   private cleanupInterval: NodeJS.Timeout | null
+  private maxEntries: number
+  private totalBytes: number
-  constructor(options?: { prefix?: string; defaultTTL?: number }) {
+  constructor(options?: { prefix?: string; defaultTTL?: number; maxEntries?: number }) {
     this.prefix = options?.prefix || 'qwenproxy:'
     this.defaultTTL = options?.defaultTTL || config.cache.defaultTTL
+    this.maxEntries = options?.maxEntries || 10000
     this.store = new Map()
+    this.totalBytes = 0
     this.cleanupInterval = null
     this.startCleanup()
   }
+  private entryByteSize(key: string, value: any): number {
+    return Buffer.byteLength(key) + Buffer.byteLength(JSON.stringify(value))
+  }
+  private evictLRU(): void {
+    const oldest = this.store.keys().next()
+    if (!oldest.done) {
+      const evicted = this.store.get(oldest.value)
+      if (evicted) this.totalBytes -= this.entryByteSize(oldest.value, evicted.value)
+      this.store.delete(oldest.value)
+      metrics.increment('cache.evicted')
+    }
+  }
   private startCleanup(): void {
     this.cleanupInterval = setInterval(() => {
       const now = Date.now()
@@ -46,16 +64,28 @@ export class MemoryCache {
   async set<T>(key: CacheKey, value: T, ttl?: number): Promise<void> {
     const serialized = JSON.stringify(value)
+    const valueBytes = Buffer.byteLength(serialized)
     const effectiveTTL = ttl || this.defaultTTL
     const fullKey = this.prefix + key
+    const entrySize = Buffer.byteLength(fullKey) + valueBytes
+    if (this.store.has(fullKey)) {
+      const oldEntry = this.store.get(fullKey)
+      if (oldEntry) this.totalBytes -= this.entryByteSize(fullKey, oldEntry.value)
+    } else {
+      while (this.store.size >= this.maxEntries) {
+        this.evictLRU()
+      }
+    }
     this.store.set(fullKey, {
       value,
       expiresAt: Date.now() + (effectiveTTL * 1000)
     })
+    this.totalBytes += entrySize
     metrics.increment('cache.set')
-    metrics.histogram('cache.value.size', Buffer.byteLength(serialized))
+    metrics.histogram('cache.value.size', valueBytes)
   }
   async get<T>(key: CacheKey): Promise<T | null> {
@@ -66,26 +96,39 @@ export class MemoryCache {
     metrics.histogram('cache.get.latency', Date.now() - start)
     if (!entry || entry.expiresAt <= Date.now()) {
-      if (entry) this.store.delete(fullKey)
+      if (entry) {
+        this.totalBytes -= this.entryByteSize(fullKey, entry.value)
+        this.store.delete(fullKey)
+      }
       metrics.increment('cache.miss')
       return null
     }
+    this.store.delete(fullKey)
+    this.store.set(fullKey, entry)
     metrics.increment('cache.hit')
     return entry.value as T
   }
   async delete(key: CacheKey): Promise<void> {
     const fullKey = this.prefix + key
-    this.store.delete(fullKey)
-    metrics.increment('cache.deleted')
+    const entry = this.store.get(fullKey)
+    if (entry) {
+      this.totalBytes -= this.entryByteSize(fullKey, entry.value)
+      this.store.delete(fullKey)
+      metrics.increment('cache.deleted')
+    }
   }
   async exists(key: CacheKey): Promise<boolean> {
     const fullKey = this.prefix + key
     const entry = this.store.get(fullKey)
     if (!entry || entry.expiresAt <= Date.now()) {
-      if (entry) this.store.delete(fullKey)
+      if (entry) {
+        this.totalBytes -= this.entryByteSize(fullKey, entry.value)
+        this.store.delete(fullKey)
+      }
       return false
     }
     return true
@@ -157,20 +200,10 @@ export class MemoryCache {
     keysCount?: number
     memoryUsage?: string
   }> {
-    const now = Date.now()
-    let validKeys = 0
-    let totalBytes = 0
-    for (const [key, entry] of this.store.entries()) {
-      if (entry.expiresAt > now) {
-        validKeys++
-        totalBytes += Buffer.byteLength(JSON.stringify(entry.value)) + Buffer.byteLength(key)
-      }
-    }
     return {
       connected: true,
-      keysCount: validKeys,
-      memoryUsage: `${(totalBytes / 1024).toFixed(2)}KB`
+      keysCount: this.store.size,
+      memoryUsage: `${(this.totalBytes / 1024).toFixed(2)}KB`
     }
   }
@@ -180,6 +213,7 @@ export class MemoryCache {
       this.cleanupInterval = null
     }
     this.store.clear()
+    this.totalBytes = 0
   }
 }

package/src/routes/chat.ts CHANGED Viewed

@@ -10,7 +10,7 @@
 import { Context } from 'hono';
 import { stream as honoStream } from 'hono/streaming';
-import { v4 as uuidv4 } from 'uuid';
+import crypto from 'crypto';
 import { createQwenStream, updateSessionParent } from '../services/qwen.ts';
 import { OpenAIRequest, ChoiceDelta, Message } from '../utils/types.ts';
 import { registry } from '../tools/registry.ts';
@@ -25,33 +25,71 @@ import { getNextAccount, getNextAvailableAccount, markAccountRateLimited, getAcc
 import { registerStream, removeStream, getStream } from '../core/stream-registry.ts';
 import { metrics } from '../core/metrics.js'
-export function cleanupAllAccountMutexes(): void {
-  // No-op - kept for backward compatibility
-}
 export interface DeltaResult {
   delta: string;
   matchedContent: string;
+  contentLength: number;
+  contentSuffix: string;
 }
-export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult {
+export function getIncrementalDelta(oldStr: string, newStr: string, prevLength: number = 0, prevSuffix: string = ''): DeltaResult {
   if (!oldStr) {
-    return { delta: newStr, matchedContent: newStr };
+    return {
+      delta: newStr,
+      matchedContent: newStr,
+      contentLength: newStr.length,
+      contentSuffix: newStr.slice(-64)
+    };
   }
   if (newStr === oldStr) {
-    return { delta: '', matchedContent: oldStr };
+    return { delta: '', matchedContent: oldStr, contentLength: prevLength, contentSuffix: prevSuffix };
   }
-  // Fast path: incremental SSE streams append to oldStr most of the time
+  // Ultra-fast path: use length tracking to avoid O(n) startsWith on large strings
+  if (newStr.length > prevLength && prevLength > 0) {
+    const delta = newStr.slice(prevLength);
+    const checkLen = Math.min(64, prevLength);
+    const expectedSuffix = prevSuffix.slice(-checkLen);
+    const actualSuffix = newStr.slice(prevLength - checkLen, prevLength);
+    if (expectedSuffix === actualSuffix) {
+      if (delta.length <= 4 && oldStr.length > 2000) {
+        return {
+          delta: newStr,
+          matchedContent: oldStr + newStr,
+          contentLength: newStr.length,
+          contentSuffix: newStr.slice(-64)
+        };
+      }
+      return {
+        delta,
+        matchedContent: newStr,
+        contentLength: newStr.length,
+        contentSuffix: newStr.slice(-64)
+      };
+    }
+  }
+  // Fallback: startsWith check for edge cases
   if (newStr.startsWith(oldStr)) {
     const delta = newStr.slice(oldStr.length);
     if (delta.length <= 4 && oldStr.length > 2000) {
-      return { delta: newStr, matchedContent: oldStr + newStr };
+      return {
+        delta: newStr,
+        matchedContent: oldStr + newStr,
+        contentLength: newStr.length,
+        contentSuffix: newStr.slice(-64)
+      };
     }
-    return { delta, matchedContent: newStr };
+    return {
+      delta,
+      matchedContent: newStr,
+      contentLength: newStr.length,
+      contentSuffix: newStr.slice(-64)
+    };
   }
-  // Fallback: segment-based prefix matching
+  // Segment-based prefix matching (rare path)
   const scanWindow = Math.min(2000, oldStr.length);
   const maxLen = Math.min(scanWindow, newStr.length);
@@ -65,17 +103,27 @@ export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult
     commonPrefixLen += segmentLen;
   }
-  // Fine-grained scan within the mismatching segment
   while (commonPrefixLen < maxLen && oldStr[commonPrefixLen] === newStr[commonPrefixLen]) {
     commonPrefixLen++;
   }
   const threshold = Math.min(scanWindow, 4);
   if (commonPrefixLen >= threshold) {
-    return { delta: newStr.substring(commonPrefixLen), matchedContent: newStr };
+    return {
+      delta: newStr.substring(commonPrefixLen),
+      matchedContent: newStr,
+      contentLength: newStr.length,
+      contentSuffix: newStr.slice(-64)
+    };
   }
-  return { delta: newStr, matchedContent: oldStr + newStr };
+  const combined = oldStr + newStr;
+  return {
+    delta: newStr,
+    matchedContent: combined,
+    contentLength: combined.length,
+    contentSuffix: combined.slice(-64)
+  };
 }
 function parseQwenErrorPayload(raw: string): { message: string; status: number } | null {
@@ -119,29 +167,26 @@ export async function chatCompletions(c: Context) {
       const msg = messages[i];
       let contentStr = '';
       if (Array.isArray(msg.content)) {
-        // Handle multimodal content (text + images + videos + audio + files)
-        const multimodalParts = msg.content.filter(
-          (p: any) =>
+        // Single-pass: extract text and multimodal parts in one iteration
+        const textParts: string[] = [];
+        const multimodalParts: Array<{ type: string; text?: string; image_url?: { url: string }; video_url?: { url: string }; audio_url?: { url: string }; file_url?: { url: string } }> = [];
+        for (const p of msg.content as any[]) {
+          if (p.type === "text" && p.text) {
+            textParts.push(p.text);
+          } else if (
             (p.type === "image_url" && p.image_url?.url) ||
             (p.type === "video_url" && p.video_url?.url) ||
             (p.type === "audio_url" && p.audio_url?.url) ||
-            (p.type === "file_url" && p.file_url?.url),
-        );
+            (p.type === "file_url" && p.file_url?.url)
+          ) {
+            multimodalParts.push(p);
+          }
+        }
+        contentStr = textParts.join("\n");
         if (multimodalParts.length > 0) {
-          // Defer processing to after account selection to reuse cached headers
           pendingMultimodal.push(multimodalParts);
-          // Extract text parts for prompt building
-          contentStr = msg.content
-            .filter((p: any) => p.type === "text")
-            .map((p: any) => p.text)
-            .join("\n");
-        } else {
-          // No multimodal parts, just extract text
-          contentStr = msg.content
-            .filter((p: any) => p.type === "text")
-            .map((p: any) => p.text)
-            .join("\n");
         }
       } else if (typeof msg.content === 'object' && msg.content !== null) {
         contentStr = JSON.stringify(msg.content);
@@ -209,7 +254,7 @@ export async function chatCompletions(c: Context) {
       });
       const toolsJson = JSON.stringify(formattedTools, null, 2);
-      systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in these tags:\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n\n`;
+      systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in <tool_call> tags:\n\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n6. NEVER invent, guess, or hallucinate tool names. You MUST ONLY use the exact tool names provided in the 'TOOLS AVAILABLE' list above. Calling an unlisted tool will result in a hard execution error.\n\n`;
       if (bodyAny.tool_choice && typeof bodyAny.tool_choice === 'object' && bodyAny.tool_choice.function) {
         const forcedTool = bodyAny.tool_choice.function.name;
@@ -220,15 +265,22 @@ export async function chatCompletions(c: Context) {
     const modelId = body.model.replace('-no-thinking', '');
     const modelContextWindow = getModelContextWindow(modelId)
     const estimatedTokens = estimateTokenCount(systemPrompt + prompt);
+    const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
     let finalPrompt: string;
     if (estimatedTokens > modelContextWindow - 1000) {
       const truncated = truncateMessages(messages, modelContextWindow, systemPrompt);
-      finalPrompt = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
+      const truncatedBody = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
+      finalPrompt = systemPrompt ? `${systemPrompt}\n\n${truncatedBody}` : truncatedBody;
     } else {
       finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
     }
+    // Reforço de instrução de tool call para contextos longos (mitiga "Lost in the Middle")
+    if (hasTools && estimatedTokens > 15000) {
+      finalPrompt += '\n\n[CRITICAL REMINDER: You MUST use the exact <tool_call> JSON format specified in the system instructions. Do not hallucinate tool names or output raw JSON without the tags.]';
+    }
     const isThinkingModel = !body.model.includes('no-thinking');
     // A session is new if it doesn't have any assistant messages yet.
@@ -242,7 +294,7 @@ export async function chatCompletions(c: Context) {
     let stream: ReadableStream | undefined;
     let uiSessionId = '';
-    const completionId = 'chatcmpl-' + uuidv4();
+    const completionId = 'chatcmpl-' + crypto.randomUUID();
     while (account) {
       const accountId = account.id;
@@ -469,10 +521,30 @@ export async function chatCompletions(c: Context) {
           finish_reason: finishReason
         });
-        // Pre-compute timestamp once before the stream loop
         const createdTimestamp = Math.floor(Date.now() / 1000);
-        // Send initial chunk
+        const fastWriteContent = (content: string) => {
+          const chunk = JSON.stringify({
+            id: completionId,
+            object: 'chat.completion.chunk',
+            created: createdTimestamp,
+            model: body.model,
+            choices: [makeChoice({ content })]
+          });
+          streamWriter.write(`data: ${chunk}\n\n`);
+        };
+        const fastWriteReasoning = (content: string) => {
+          const chunk = JSON.stringify({
+            id: completionId,
+            object: 'chat.completion.chunk',
+            created: createdTimestamp,
+            model: body.model,
+            choices: [makeChoice({ reasoning_content: content })]
+          });
+          streamWriter.write(`data: ${chunk}\n\n`);
+        };
         writeEvent({
           id: completionId,
           object: 'chat.completion.chunk',
@@ -486,6 +558,8 @@ export async function chatCompletions(c: Context) {
         let reasoningBuffer = '';
         let lastFullContent = '';
+        let contentLength = 0;
+        let contentSuffix = '';
         let targetResponseId: string | null = null;
         let targetResponseIdSet = false;
         let currentThoughtIndex = 0;
@@ -493,27 +567,27 @@ export async function chatCompletions(c: Context) {
         const toolParser = hasTools ? new StreamingToolParser(bodyAny.tools) : null;
         let buffer = '';
+        let bufferOffset = 0;
         let completionTokens = 0;
         let promptTokens = Math.ceil(finalPrompt.length / 3.5);
-        // Real-time flush: send each event immediately to minimize latency
-        let chunkCount = 0;
         while (true) {
           const { done, value } = await reader.read();
           if (done) break;
           buffer += decoder.decode(value, { stream: true });
-           let startIdx = 0;
-           let newlineIdx: number;
-           while ((newlineIdx = buffer.indexOf('\n', startIdx)) !== -1) {
-             const line = buffer.slice(startIdx, newlineIdx);
-             startIdx = newlineIdx + 1;
+          while (bufferOffset < buffer.length) {
+            const newlineIdx = buffer.indexOf('\n', bufferOffset);
+            if (newlineIdx === -1) break;
+            const line = buffer.slice(bufferOffset, newlineIdx);
+            bufferOffset = newlineIdx + 1;
-             const trimmed = line.trim();
-             if (!trimmed || !trimmed.startsWith('data: ')) continue;
+            const trimmed = line.trim();
+            if (!trimmed || !trimmed.startsWith('data: ')) continue;
-             const dataStr = trimmed.slice(6);
+            const dataStr = trimmed.slice(6);
              if (dataStr === '[DONE]') {
                streamWriter.write('data: [DONE]\n\n');
                continue;
@@ -562,10 +636,12 @@ export async function chatCompletions(c: Context) {
                   isThinkingChunk = false;
                   if (delta.content !== undefined) {
                     const newContent = delta.content || '';
-                    const result = getIncrementalDelta(lastFullContent, newContent);
+                    const result = getIncrementalDelta(lastFullContent, newContent, contentLength, contentSuffix);
                     vStr = result.delta;
                     if (vStr) {
                       lastFullContent = result.matchedContent;
+                      contentLength = result.contentLength;
+                      contentSuffix = result.contentSuffix;
                       foundStr = true;
                     }
                   }
@@ -577,24 +653,12 @@ export async function chatCompletions(c: Context) {
                 if (isThinkingChunk) {
                   reasoningBuffer += vStr;
-                  streamWriter.write(`data: ${JSON.stringify({
-                    id: completionId,
-                    object: 'chat.completion.chunk',
-                    created: createdTimestamp,
-                    model: body.model,
-                    choices: [makeChoice({ reasoning_content: vStr })]
-                  })}\n\n`);
+                  fastWriteReasoning(vStr);
                 } else {
                   if (hasTools && toolParser) {
                     const { text, toolCalls } = toolParser.feed(vStr);
                     if (text) {
-                      streamWriter.write(`data: ${JSON.stringify({
-                        id: completionId,
-                        object: 'chat.completion.chunk',
-                        created: createdTimestamp,
-                        model: body.model,
-                        choices: [makeChoice({ content: text })]
-                      })}\n\n`);
+                      fastWriteContent(text);
                     }
                     for (const tc of toolCalls) {
                       streamWriter.write(`data: ${JSON.stringify({
@@ -617,13 +681,7 @@ export async function chatCompletions(c: Context) {
                     }
                   } else {
                     if (vStr) {
-                      streamWriter.write(`data: ${JSON.stringify({
-                        id: completionId,
-                        object: 'chat.completion.chunk',
-                        created: createdTimestamp,
-                        model: body.model,
-                        choices: [makeChoice({ content: vStr })]
-                      })}\n\n`);
+                      fastWriteContent(vStr);
                     }
                   }
                 }
@@ -633,16 +691,11 @@ export async function chatCompletions(c: Context) {
             }
           }
-          // Trim processed portion from buffer
-          if (startIdx > 0) {
-            buffer = buffer.slice(startIdx);
+          if (bufferOffset > 0) {
+            buffer = buffer.slice(bufferOffset);
+            bufferOffset = 0;
           }
-          // Periodic yielding to prevent event loop starvation
-          chunkCount++;
-          if (chunkCount % 100 === 0) {
-            await new Promise(r => setImmediate(r));
-          }
         }
         const upstreamError = parseQwenErrorPayload(buffer);
@@ -775,7 +828,7 @@ export async function chatCompletionsStop(c: Context) {
         'Sec-Fetch-Mode': 'cors',
         'Sec-Fetch-Site': 'same-origin',
         'User-Agent': stream.headers['user-agent'],
-        'X-Request-Id': uuidv4(),
+        'X-Request-Id': crypto.randomUUID(),
         'bx-ua': stream.headers['bx-ua'],
         'bx-umidtoken': stream.headers['bx-umidtoken'],
         'bx-v': stream.headers['bx-v'],

package/src/routes/upload.ts CHANGED Viewed

@@ -6,7 +6,7 @@
 import { Context } from "hono";
 import { getQwenHeaders } from "../services/playwright.ts";
-import { v4 as uuidv4 } from "uuid";
+import crypto from "crypto";
 interface STSResponse {
   success: boolean;
@@ -46,7 +46,7 @@ async function getSTSToken(
           Origin: "https://chat.qwen.ai",
           Referer: "https://chat.qwen.ai/",
           "User-Agent": headers["user-agent"],
-          "X-Request-Id": uuidv4(),
+          "X-Request-Id": crypto.randomUUID(),
           "bx-ua": headers["bx-ua"],
           "bx-umidtoken": headers["bx-umidtoken"],
           "bx-v": headers["bx-v"],
@@ -723,11 +723,11 @@ export async function processImagesForQwen(
           greenNet: "success",
           size: fileSize,
           error: "",
-          itemId: uuidv4(),
+          itemId: crypto.randomUUID(),
           file_type: typeInfo.mime,
           showType: typeInfo.showType,
           file_class: typeInfo.fileClass,
-          uploadTaskId: uuidv4(),
+          uploadTaskId: crypto.randomUUID(),
         });
       }
     }

package/src/services/playwright.ts CHANGED Viewed

@@ -29,6 +29,7 @@ interface AccountHeaderCache {
 }
 const accountHeaderCaches = new Map<string, AccountHeaderCache>();
+const cachedUserAgents = new Map<string, string>();
 function getAccountHeaderCache(accountId: string): AccountHeaderCache {
   let cache = accountHeaderCaches.get(accountId);