@pedrofariasx/qwenproxy 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,7 +39,7 @@ graph TD
39
39
  Playwright --> Browser2[Browser - Conta 2]
40
40
  Playwright --> BrowserN[Browser - Conta N]
41
41
  Handler --> QwenAPI[chat.qwen.ai]
42
- Handler --> Tools[Tool Executor]
42
+ Handler --> Tools[Tool Parser]
43
43
 
44
44
  subgraph "Persistência"
45
45
  Accounts
@@ -233,24 +233,14 @@ qwenproxy/
233
233
  │ │ ├── model-registry.ts # Registro de modelos e context windows
234
234
  │ │ ├── stream-registry.ts # Tracking de streams ativos
235
235
  │ │ └── watchdog.ts # Health monitoring
236
- │ ├── linter/
237
- │ │ ├── bar.ts # Facade
238
- │ │ ├── extraction-engine.ts # Extraction engine
239
- │ │ ├── foo.ts # Exports
240
- │ │ ├── index.ts # Main public API
241
- │ │ ├── repair-normalize.ts # Repair and normalize
242
- │ │ ├── safety-gate.ts # Safety gate
243
- │ │ ├── streaming-state-machine.ts # Streaming state machine
244
- │ │ ├── structural-parser.ts # Structural parser
245
- │ │ └── types.ts # Types
246
236
  │ ├── routes/
247
- │ │ └── chat.ts # Handler /v1/chat/completions
237
+ │ │ ├── chat.ts # Handler /v1/chat/completions
238
+ │ │ └── upload.ts # Handler /v1/upload (multimodal)
248
239
  │ ├── services/
249
240
  │ │ ├── playwright.ts # Automação de navegador
250
241
  │ │ └── qwen.ts # Integração com API do Qwen
251
242
  │ ├── tests/ # Testes automatizados
252
243
  │ ├── tools/
253
- │ │ ├── executor.ts # Execução de ferramentas
254
244
  │ │ ├── parser.ts # Parser de <tool_call> tags
255
245
  │ │ ├── registry.ts # Registro de tools
256
246
  │ │ ├── schema.ts # Validação JSON Schema
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pedrofariasx/qwenproxy",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "description": "Local OpenAI-compatible proxy API that routes requests to Qwen (chat.qwen.ai) via Playwright browser automation.",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/src/api/server.ts CHANGED
@@ -111,8 +111,6 @@ export async function startServer(): Promise<void> {
111
111
  await cache.close()
112
112
  const { closePlaywright } = await import('../services/playwright.js')
113
113
  await closePlaywright()
114
- const { cleanupAllAccountMutexes } = await import('../routes/chat.js')
115
- cleanupAllAccountMutexes()
116
114
  const { closeDatabase } = await import('../core/database.ts')
117
115
  closeDatabase()
118
116
  server?.close()
@@ -19,16 +19,34 @@ export class MemoryCache {
19
19
  private defaultTTL: number
20
20
  private prefix: string
21
21
  private cleanupInterval: NodeJS.Timeout | null
22
+ private maxEntries: number
23
+ private totalBytes: number
22
24
 
23
- constructor(options?: { prefix?: string; defaultTTL?: number }) {
25
+ constructor(options?: { prefix?: string; defaultTTL?: number; maxEntries?: number }) {
24
26
  this.prefix = options?.prefix || 'qwenproxy:'
25
27
  this.defaultTTL = options?.defaultTTL || config.cache.defaultTTL
28
+ this.maxEntries = options?.maxEntries || 10000
26
29
  this.store = new Map()
30
+ this.totalBytes = 0
27
31
  this.cleanupInterval = null
28
32
 
29
33
  this.startCleanup()
30
34
  }
31
35
 
36
+ private entryByteSize(key: string, value: any): number {
37
+ return Buffer.byteLength(key) + Buffer.byteLength(JSON.stringify(value))
38
+ }
39
+
40
+ private evictLRU(): void {
41
+ const oldest = this.store.keys().next()
42
+ if (!oldest.done) {
43
+ const evicted = this.store.get(oldest.value)
44
+ if (evicted) this.totalBytes -= this.entryByteSize(oldest.value, evicted.value)
45
+ this.store.delete(oldest.value)
46
+ metrics.increment('cache.evicted')
47
+ }
48
+ }
49
+
32
50
  private startCleanup(): void {
33
51
  this.cleanupInterval = setInterval(() => {
34
52
  const now = Date.now()
@@ -46,16 +64,28 @@ export class MemoryCache {
46
64
 
47
65
  async set<T>(key: CacheKey, value: T, ttl?: number): Promise<void> {
48
66
  const serialized = JSON.stringify(value)
67
+ const valueBytes = Buffer.byteLength(serialized)
49
68
  const effectiveTTL = ttl || this.defaultTTL
50
69
  const fullKey = this.prefix + key
70
+ const entrySize = Buffer.byteLength(fullKey) + valueBytes
71
+
72
+ if (this.store.has(fullKey)) {
73
+ const oldEntry = this.store.get(fullKey)
74
+ if (oldEntry) this.totalBytes -= this.entryByteSize(fullKey, oldEntry.value)
75
+ } else {
76
+ while (this.store.size >= this.maxEntries) {
77
+ this.evictLRU()
78
+ }
79
+ }
51
80
 
52
81
  this.store.set(fullKey, {
53
82
  value,
54
83
  expiresAt: Date.now() + (effectiveTTL * 1000)
55
84
  })
85
+ this.totalBytes += entrySize
56
86
 
57
87
  metrics.increment('cache.set')
58
- metrics.histogram('cache.value.size', Buffer.byteLength(serialized))
88
+ metrics.histogram('cache.value.size', valueBytes)
59
89
  }
60
90
 
61
91
  async get<T>(key: CacheKey): Promise<T | null> {
@@ -66,26 +96,39 @@ export class MemoryCache {
66
96
  metrics.histogram('cache.get.latency', Date.now() - start)
67
97
 
68
98
  if (!entry || entry.expiresAt <= Date.now()) {
69
- if (entry) this.store.delete(fullKey)
99
+ if (entry) {
100
+ this.totalBytes -= this.entryByteSize(fullKey, entry.value)
101
+ this.store.delete(fullKey)
102
+ }
70
103
  metrics.increment('cache.miss')
71
104
  return null
72
105
  }
73
106
 
107
+ this.store.delete(fullKey)
108
+ this.store.set(fullKey, entry)
109
+
74
110
  metrics.increment('cache.hit')
75
111
  return entry.value as T
76
112
  }
77
113
 
78
114
  async delete(key: CacheKey): Promise<void> {
79
115
  const fullKey = this.prefix + key
80
- this.store.delete(fullKey)
81
- metrics.increment('cache.deleted')
116
+ const entry = this.store.get(fullKey)
117
+ if (entry) {
118
+ this.totalBytes -= this.entryByteSize(fullKey, entry.value)
119
+ this.store.delete(fullKey)
120
+ metrics.increment('cache.deleted')
121
+ }
82
122
  }
83
123
 
84
124
  async exists(key: CacheKey): Promise<boolean> {
85
125
  const fullKey = this.prefix + key
86
126
  const entry = this.store.get(fullKey)
87
127
  if (!entry || entry.expiresAt <= Date.now()) {
88
- if (entry) this.store.delete(fullKey)
128
+ if (entry) {
129
+ this.totalBytes -= this.entryByteSize(fullKey, entry.value)
130
+ this.store.delete(fullKey)
131
+ }
89
132
  return false
90
133
  }
91
134
  return true
@@ -157,20 +200,10 @@ export class MemoryCache {
157
200
  keysCount?: number
158
201
  memoryUsage?: string
159
202
  }> {
160
- const now = Date.now()
161
- let validKeys = 0
162
- let totalBytes = 0
163
- for (const [key, entry] of this.store.entries()) {
164
- if (entry.expiresAt > now) {
165
- validKeys++
166
- totalBytes += Buffer.byteLength(JSON.stringify(entry.value)) + Buffer.byteLength(key)
167
- }
168
- }
169
-
170
203
  return {
171
204
  connected: true,
172
- keysCount: validKeys,
173
- memoryUsage: `${(totalBytes / 1024).toFixed(2)}KB`
205
+ keysCount: this.store.size,
206
+ memoryUsage: `${(this.totalBytes / 1024).toFixed(2)}KB`
174
207
  }
175
208
  }
176
209
 
@@ -180,6 +213,7 @@ export class MemoryCache {
180
213
  this.cleanupInterval = null
181
214
  }
182
215
  this.store.clear()
216
+ this.totalBytes = 0
183
217
  }
184
218
  }
185
219
 
@@ -10,7 +10,7 @@
10
10
 
11
11
  import { Context } from 'hono';
12
12
  import { stream as honoStream } from 'hono/streaming';
13
- import { v4 as uuidv4 } from 'uuid';
13
+ import crypto from 'crypto';
14
14
  import { createQwenStream, updateSessionParent } from '../services/qwen.ts';
15
15
  import { OpenAIRequest, ChoiceDelta, Message } from '../utils/types.ts';
16
16
  import { registry } from '../tools/registry.ts';
@@ -25,33 +25,71 @@ import { getNextAccount, getNextAvailableAccount, markAccountRateLimited, getAcc
25
25
  import { registerStream, removeStream, getStream } from '../core/stream-registry.ts';
26
26
  import { metrics } from '../core/metrics.js'
27
27
 
28
- export function cleanupAllAccountMutexes(): void {
29
- // No-op - kept for backward compatibility
30
- }
31
-
32
28
  export interface DeltaResult {
33
29
  delta: string;
34
30
  matchedContent: string;
31
+ contentLength: number;
32
+ contentSuffix: string;
35
33
  }
36
34
 
37
- export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult {
35
+ export function getIncrementalDelta(oldStr: string, newStr: string, prevLength: number = 0, prevSuffix: string = ''): DeltaResult {
38
36
  if (!oldStr) {
39
- return { delta: newStr, matchedContent: newStr };
37
+ return {
38
+ delta: newStr,
39
+ matchedContent: newStr,
40
+ contentLength: newStr.length,
41
+ contentSuffix: newStr.slice(-64)
42
+ };
40
43
  }
41
44
  if (newStr === oldStr) {
42
- return { delta: '', matchedContent: oldStr };
45
+ return { delta: '', matchedContent: oldStr, contentLength: prevLength, contentSuffix: prevSuffix };
43
46
  }
44
47
 
45
- // Fast path: incremental SSE streams append to oldStr most of the time
48
+ // Ultra-fast path: use length tracking to avoid O(n) startsWith on large strings
49
+ if (newStr.length > prevLength && prevLength > 0) {
50
+ const delta = newStr.slice(prevLength);
51
+ const checkLen = Math.min(64, prevLength);
52
+ const expectedSuffix = prevSuffix.slice(-checkLen);
53
+ const actualSuffix = newStr.slice(prevLength - checkLen, prevLength);
54
+
55
+ if (expectedSuffix === actualSuffix) {
56
+ if (delta.length <= 4 && oldStr.length > 2000) {
57
+ return {
58
+ delta: newStr,
59
+ matchedContent: oldStr + newStr,
60
+ contentLength: newStr.length,
61
+ contentSuffix: newStr.slice(-64)
62
+ };
63
+ }
64
+ return {
65
+ delta,
66
+ matchedContent: newStr,
67
+ contentLength: newStr.length,
68
+ contentSuffix: newStr.slice(-64)
69
+ };
70
+ }
71
+ }
72
+
73
+ // Fallback: startsWith check for edge cases
46
74
  if (newStr.startsWith(oldStr)) {
47
75
  const delta = newStr.slice(oldStr.length);
48
76
  if (delta.length <= 4 && oldStr.length > 2000) {
49
- return { delta: newStr, matchedContent: oldStr + newStr };
77
+ return {
78
+ delta: newStr,
79
+ matchedContent: oldStr + newStr,
80
+ contentLength: newStr.length,
81
+ contentSuffix: newStr.slice(-64)
82
+ };
50
83
  }
51
- return { delta, matchedContent: newStr };
84
+ return {
85
+ delta,
86
+ matchedContent: newStr,
87
+ contentLength: newStr.length,
88
+ contentSuffix: newStr.slice(-64)
89
+ };
52
90
  }
53
91
 
54
- // Fallback: segment-based prefix matching
92
+ // Segment-based prefix matching (rare path)
55
93
  const scanWindow = Math.min(2000, oldStr.length);
56
94
  const maxLen = Math.min(scanWindow, newStr.length);
57
95
 
@@ -65,17 +103,27 @@ export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult
65
103
  commonPrefixLen += segmentLen;
66
104
  }
67
105
 
68
- // Fine-grained scan within the mismatching segment
69
106
  while (commonPrefixLen < maxLen && oldStr[commonPrefixLen] === newStr[commonPrefixLen]) {
70
107
  commonPrefixLen++;
71
108
  }
72
109
 
73
110
  const threshold = Math.min(scanWindow, 4);
74
111
  if (commonPrefixLen >= threshold) {
75
- return { delta: newStr.substring(commonPrefixLen), matchedContent: newStr };
112
+ return {
113
+ delta: newStr.substring(commonPrefixLen),
114
+ matchedContent: newStr,
115
+ contentLength: newStr.length,
116
+ contentSuffix: newStr.slice(-64)
117
+ };
76
118
  }
77
119
 
78
- return { delta: newStr, matchedContent: oldStr + newStr };
120
+ const combined = oldStr + newStr;
121
+ return {
122
+ delta: newStr,
123
+ matchedContent: combined,
124
+ contentLength: combined.length,
125
+ contentSuffix: combined.slice(-64)
126
+ };
79
127
  }
80
128
 
81
129
  function parseQwenErrorPayload(raw: string): { message: string; status: number } | null {
@@ -119,29 +167,26 @@ export async function chatCompletions(c: Context) {
119
167
  const msg = messages[i];
120
168
  let contentStr = '';
121
169
  if (Array.isArray(msg.content)) {
122
- // Handle multimodal content (text + images + videos + audio + files)
123
- const multimodalParts = msg.content.filter(
124
- (p: any) =>
170
+ // Single-pass: extract text and multimodal parts in one iteration
171
+ const textParts: string[] = [];
172
+ const multimodalParts: Array<{ type: string; text?: string; image_url?: { url: string }; video_url?: { url: string }; audio_url?: { url: string }; file_url?: { url: string } }> = [];
173
+
174
+ for (const p of msg.content as any[]) {
175
+ if (p.type === "text" && p.text) {
176
+ textParts.push(p.text);
177
+ } else if (
125
178
  (p.type === "image_url" && p.image_url?.url) ||
126
179
  (p.type === "video_url" && p.video_url?.url) ||
127
180
  (p.type === "audio_url" && p.audio_url?.url) ||
128
- (p.type === "file_url" && p.file_url?.url),
129
- );
130
-
181
+ (p.type === "file_url" && p.file_url?.url)
182
+ ) {
183
+ multimodalParts.push(p);
184
+ }
185
+ }
186
+
187
+ contentStr = textParts.join("\n");
131
188
  if (multimodalParts.length > 0) {
132
- // Defer processing to after account selection to reuse cached headers
133
189
  pendingMultimodal.push(multimodalParts);
134
- // Extract text parts for prompt building
135
- contentStr = msg.content
136
- .filter((p: any) => p.type === "text")
137
- .map((p: any) => p.text)
138
- .join("\n");
139
- } else {
140
- // No multimodal parts, just extract text
141
- contentStr = msg.content
142
- .filter((p: any) => p.type === "text")
143
- .map((p: any) => p.text)
144
- .join("\n");
145
190
  }
146
191
  } else if (typeof msg.content === 'object' && msg.content !== null) {
147
192
  contentStr = JSON.stringify(msg.content);
@@ -209,7 +254,7 @@ export async function chatCompletions(c: Context) {
209
254
  });
210
255
  const toolsJson = JSON.stringify(formattedTools, null, 2);
211
256
 
212
- systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in these tags:\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n\n`;
257
+ systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in <tool_call> tags:\n\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n6. NEVER invent, guess, or hallucinate tool names. You MUST ONLY use the exact tool names provided in the 'TOOLS AVAILABLE' list above. Calling an unlisted tool will result in a hard execution error.\n\n`;
213
258
 
214
259
  if (bodyAny.tool_choice && typeof bodyAny.tool_choice === 'object' && bodyAny.tool_choice.function) {
215
260
  const forcedTool = bodyAny.tool_choice.function.name;
@@ -220,15 +265,22 @@ export async function chatCompletions(c: Context) {
220
265
  const modelId = body.model.replace('-no-thinking', '');
221
266
  const modelContextWindow = getModelContextWindow(modelId)
222
267
  const estimatedTokens = estimateTokenCount(systemPrompt + prompt);
268
+ const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
223
269
 
224
270
  let finalPrompt: string;
225
271
  if (estimatedTokens > modelContextWindow - 1000) {
226
272
  const truncated = truncateMessages(messages, modelContextWindow, systemPrompt);
227
- finalPrompt = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
273
+ const truncatedBody = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
274
+ finalPrompt = systemPrompt ? `${systemPrompt}\n\n${truncatedBody}` : truncatedBody;
228
275
  } else {
229
276
  finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
230
277
  }
231
278
 
279
+ // Reforço de instrução de tool call para contextos longos (mitiga "Lost in the Middle")
280
+ if (hasTools && estimatedTokens > 15000) {
281
+ finalPrompt += '\n\n[CRITICAL REMINDER: You MUST use the exact <tool_call> JSON format specified in the system instructions. Do not hallucinate tool names or output raw JSON without the tags.]';
282
+ }
283
+
232
284
  const isThinkingModel = !body.model.includes('no-thinking');
233
285
 
234
286
  // A session is new if it doesn't have any assistant messages yet.
@@ -242,7 +294,7 @@ export async function chatCompletions(c: Context) {
242
294
 
243
295
  let stream: ReadableStream | undefined;
244
296
  let uiSessionId = '';
245
- const completionId = 'chatcmpl-' + uuidv4();
297
+ const completionId = 'chatcmpl-' + crypto.randomUUID();
246
298
 
247
299
  while (account) {
248
300
  const accountId = account.id;
@@ -469,10 +521,30 @@ export async function chatCompletions(c: Context) {
469
521
  finish_reason: finishReason
470
522
  });
471
523
 
472
- // Pre-compute timestamp once before the stream loop
473
524
  const createdTimestamp = Math.floor(Date.now() / 1000);
474
525
 
475
- // Send initial chunk
526
+ const fastWriteContent = (content: string) => {
527
+ const chunk = JSON.stringify({
528
+ id: completionId,
529
+ object: 'chat.completion.chunk',
530
+ created: createdTimestamp,
531
+ model: body.model,
532
+ choices: [makeChoice({ content })]
533
+ });
534
+ streamWriter.write(`data: ${chunk}\n\n`);
535
+ };
536
+
537
+ const fastWriteReasoning = (content: string) => {
538
+ const chunk = JSON.stringify({
539
+ id: completionId,
540
+ object: 'chat.completion.chunk',
541
+ created: createdTimestamp,
542
+ model: body.model,
543
+ choices: [makeChoice({ reasoning_content: content })]
544
+ });
545
+ streamWriter.write(`data: ${chunk}\n\n`);
546
+ };
547
+
476
548
  writeEvent({
477
549
  id: completionId,
478
550
  object: 'chat.completion.chunk',
@@ -486,6 +558,8 @@ export async function chatCompletions(c: Context) {
486
558
 
487
559
  let reasoningBuffer = '';
488
560
  let lastFullContent = '';
561
+ let contentLength = 0;
562
+ let contentSuffix = '';
489
563
  let targetResponseId: string | null = null;
490
564
  let targetResponseIdSet = false;
491
565
  let currentThoughtIndex = 0;
@@ -493,27 +567,27 @@ export async function chatCompletions(c: Context) {
493
567
  const toolParser = hasTools ? new StreamingToolParser(bodyAny.tools) : null;
494
568
 
495
569
  let buffer = '';
570
+ let bufferOffset = 0;
496
571
  let completionTokens = 0;
497
572
  let promptTokens = Math.ceil(finalPrompt.length / 3.5);
498
573
 
499
- // Real-time flush: send each event immediately to minimize latency
500
- let chunkCount = 0;
501
574
  while (true) {
502
575
  const { done, value } = await reader.read();
503
576
  if (done) break;
504
577
 
505
578
  buffer += decoder.decode(value, { stream: true });
506
579
 
507
- let startIdx = 0;
508
- let newlineIdx: number;
509
- while ((newlineIdx = buffer.indexOf('\n', startIdx)) !== -1) {
510
- const line = buffer.slice(startIdx, newlineIdx);
511
- startIdx = newlineIdx + 1;
580
+ while (bufferOffset < buffer.length) {
581
+ const newlineIdx = buffer.indexOf('\n', bufferOffset);
582
+ if (newlineIdx === -1) break;
583
+
584
+ const line = buffer.slice(bufferOffset, newlineIdx);
585
+ bufferOffset = newlineIdx + 1;
512
586
 
513
- const trimmed = line.trim();
514
- if (!trimmed || !trimmed.startsWith('data: ')) continue;
587
+ const trimmed = line.trim();
588
+ if (!trimmed || !trimmed.startsWith('data: ')) continue;
515
589
 
516
- const dataStr = trimmed.slice(6);
590
+ const dataStr = trimmed.slice(6);
517
591
  if (dataStr === '[DONE]') {
518
592
  streamWriter.write('data: [DONE]\n\n');
519
593
  continue;
@@ -562,10 +636,12 @@ export async function chatCompletions(c: Context) {
562
636
  isThinkingChunk = false;
563
637
  if (delta.content !== undefined) {
564
638
  const newContent = delta.content || '';
565
- const result = getIncrementalDelta(lastFullContent, newContent);
639
+ const result = getIncrementalDelta(lastFullContent, newContent, contentLength, contentSuffix);
566
640
  vStr = result.delta;
567
641
  if (vStr) {
568
642
  lastFullContent = result.matchedContent;
643
+ contentLength = result.contentLength;
644
+ contentSuffix = result.contentSuffix;
569
645
  foundStr = true;
570
646
  }
571
647
  }
@@ -577,24 +653,12 @@ export async function chatCompletions(c: Context) {
577
653
 
578
654
  if (isThinkingChunk) {
579
655
  reasoningBuffer += vStr;
580
- streamWriter.write(`data: ${JSON.stringify({
581
- id: completionId,
582
- object: 'chat.completion.chunk',
583
- created: createdTimestamp,
584
- model: body.model,
585
- choices: [makeChoice({ reasoning_content: vStr })]
586
- })}\n\n`);
656
+ fastWriteReasoning(vStr);
587
657
  } else {
588
658
  if (hasTools && toolParser) {
589
659
  const { text, toolCalls } = toolParser.feed(vStr);
590
660
  if (text) {
591
- streamWriter.write(`data: ${JSON.stringify({
592
- id: completionId,
593
- object: 'chat.completion.chunk',
594
- created: createdTimestamp,
595
- model: body.model,
596
- choices: [makeChoice({ content: text })]
597
- })}\n\n`);
661
+ fastWriteContent(text);
598
662
  }
599
663
  for (const tc of toolCalls) {
600
664
  streamWriter.write(`data: ${JSON.stringify({
@@ -617,13 +681,7 @@ export async function chatCompletions(c: Context) {
617
681
  }
618
682
  } else {
619
683
  if (vStr) {
620
- streamWriter.write(`data: ${JSON.stringify({
621
- id: completionId,
622
- object: 'chat.completion.chunk',
623
- created: createdTimestamp,
624
- model: body.model,
625
- choices: [makeChoice({ content: vStr })]
626
- })}\n\n`);
684
+ fastWriteContent(vStr);
627
685
  }
628
686
  }
629
687
  }
@@ -633,16 +691,11 @@ export async function chatCompletions(c: Context) {
633
691
  }
634
692
  }
635
693
 
636
- // Trim processed portion from buffer
637
- if (startIdx > 0) {
638
- buffer = buffer.slice(startIdx);
694
+ if (bufferOffset > 0) {
695
+ buffer = buffer.slice(bufferOffset);
696
+ bufferOffset = 0;
639
697
  }
640
698
 
641
- // Periodic yielding to prevent event loop starvation
642
- chunkCount++;
643
- if (chunkCount % 100 === 0) {
644
- await new Promise(r => setImmediate(r));
645
- }
646
699
  }
647
700
 
648
701
  const upstreamError = parseQwenErrorPayload(buffer);
@@ -775,7 +828,7 @@ export async function chatCompletionsStop(c: Context) {
775
828
  'Sec-Fetch-Mode': 'cors',
776
829
  'Sec-Fetch-Site': 'same-origin',
777
830
  'User-Agent': stream.headers['user-agent'],
778
- 'X-Request-Id': uuidv4(),
831
+ 'X-Request-Id': crypto.randomUUID(),
779
832
  'bx-ua': stream.headers['bx-ua'],
780
833
  'bx-umidtoken': stream.headers['bx-umidtoken'],
781
834
  'bx-v': stream.headers['bx-v'],
@@ -6,7 +6,7 @@
6
6
 
7
7
  import { Context } from "hono";
8
8
  import { getQwenHeaders } from "../services/playwright.ts";
9
- import { v4 as uuidv4 } from "uuid";
9
+ import crypto from "crypto";
10
10
 
11
11
  interface STSResponse {
12
12
  success: boolean;
@@ -46,7 +46,7 @@ async function getSTSToken(
46
46
  Origin: "https://chat.qwen.ai",
47
47
  Referer: "https://chat.qwen.ai/",
48
48
  "User-Agent": headers["user-agent"],
49
- "X-Request-Id": uuidv4(),
49
+ "X-Request-Id": crypto.randomUUID(),
50
50
  "bx-ua": headers["bx-ua"],
51
51
  "bx-umidtoken": headers["bx-umidtoken"],
52
52
  "bx-v": headers["bx-v"],
@@ -723,11 +723,11 @@ export async function processImagesForQwen(
723
723
  greenNet: "success",
724
724
  size: fileSize,
725
725
  error: "",
726
- itemId: uuidv4(),
726
+ itemId: crypto.randomUUID(),
727
727
  file_type: typeInfo.mime,
728
728
  showType: typeInfo.showType,
729
729
  file_class: typeInfo.fileClass,
730
- uploadTaskId: uuidv4(),
730
+ uploadTaskId: crypto.randomUUID(),
731
731
  });
732
732
  }
733
733
  }
@@ -29,6 +29,7 @@ interface AccountHeaderCache {
29
29
  }
30
30
 
31
31
  const accountHeaderCaches = new Map<string, AccountHeaderCache>();
32
+ const cachedUserAgents = new Map<string, string>();
32
33
 
33
34
  function getAccountHeaderCache(accountId: string): AccountHeaderCache {
34
35
  let cache = accountHeaderCaches.get(accountId);