@pedrofariasx/qwenproxy 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,7 +39,7 @@ graph TD
39
39
  Playwright --> Browser2[Browser - Conta 2]
40
40
  Playwright --> BrowserN[Browser - Conta N]
41
41
  Handler --> QwenAPI[chat.qwen.ai]
42
- Handler --> Tools[Tool Executor]
42
+ Handler --> Tools[Tool Parser]
43
43
 
44
44
  subgraph "Persistência"
45
45
  Accounts
@@ -233,24 +233,14 @@ qwenproxy/
233
233
  │ │ ├── model-registry.ts # Registro de modelos e context windows
234
234
  │ │ ├── stream-registry.ts # Tracking de streams ativos
235
235
  │ │ └── watchdog.ts # Health monitoring
236
- │ ├── linter/
237
- │ │ ├── bar.ts # Facade
238
- │ │ ├── extraction-engine.ts # Extraction engine
239
- │ │ ├── foo.ts # Exports
240
- │ │ ├── index.ts # Main public API
241
- │ │ ├── repair-normalize.ts # Repair and normalize
242
- │ │ ├── safety-gate.ts # Safety gate
243
- │ │ ├── streaming-state-machine.ts # Streaming state machine
244
- │ │ ├── structural-parser.ts # Structural parser
245
- │ │ └── types.ts # Types
246
236
  │ ├── routes/
247
- │ │ └── chat.ts # Handler /v1/chat/completions
237
+ │ │ ├── chat.ts # Handler /v1/chat/completions
238
+ │ │ └── upload.ts # Handler /v1/upload (multimodal)
248
239
  │ ├── services/
249
240
  │ │ ├── playwright.ts # Automação de navegador
250
241
  │ │ └── qwen.ts # Integração com API do Qwen
251
242
  │ ├── tests/ # Testes automatizados
252
243
  │ ├── tools/
253
- │ │ ├── executor.ts # Execução de ferramentas
254
244
  │ │ ├── parser.ts # Parser de <tool_call> tags
255
245
  │ │ ├── registry.ts # Registro de tools
256
246
  │ │ ├── schema.ts # Validação JSON Schema
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pedrofariasx/qwenproxy",
3
- "version": "1.2.1",
3
+ "version": "1.2.2",
4
4
  "description": "Local OpenAI-compatible proxy API that routes requests to Qwen (chat.qwen.ai) via Playwright browser automation.",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/src/api/server.ts CHANGED
@@ -111,8 +111,6 @@ export async function startServer(): Promise<void> {
111
111
  await cache.close()
112
112
  const { closePlaywright } = await import('../services/playwright.js')
113
113
  await closePlaywright()
114
- const { cleanupAllAccountMutexes } = await import('../routes/chat.js')
115
- cleanupAllAccountMutexes()
116
114
  const { closeDatabase } = await import('../core/database.ts')
117
115
  closeDatabase()
118
116
  server?.close()
@@ -64,9 +64,10 @@ export class MemoryCache {
64
64
 
65
65
  async set<T>(key: CacheKey, value: T, ttl?: number): Promise<void> {
66
66
  const serialized = JSON.stringify(value)
67
+ const valueBytes = Buffer.byteLength(serialized)
67
68
  const effectiveTTL = ttl || this.defaultTTL
68
69
  const fullKey = this.prefix + key
69
- const entrySize = this.entryByteSize(fullKey, value)
70
+ const entrySize = Buffer.byteLength(fullKey) + valueBytes
70
71
 
71
72
  if (this.store.has(fullKey)) {
72
73
  const oldEntry = this.store.get(fullKey)
@@ -84,7 +85,7 @@ export class MemoryCache {
84
85
  this.totalBytes += entrySize
85
86
 
86
87
  metrics.increment('cache.set')
87
- metrics.histogram('cache.value.size', Buffer.byteLength(serialized))
88
+ metrics.histogram('cache.value.size', valueBytes)
88
89
  }
89
90
 
90
91
  async get<T>(key: CacheKey): Promise<T | null> {
@@ -10,7 +10,7 @@
10
10
 
11
11
  import { Context } from 'hono';
12
12
  import { stream as honoStream } from 'hono/streaming';
13
- import { v4 as uuidv4 } from 'uuid';
13
+ import crypto from 'crypto';
14
14
  import { createQwenStream, updateSessionParent } from '../services/qwen.ts';
15
15
  import { OpenAIRequest, ChoiceDelta, Message } from '../utils/types.ts';
16
16
  import { registry } from '../tools/registry.ts';
@@ -25,33 +25,71 @@ import { getNextAccount, getNextAvailableAccount, markAccountRateLimited, getAcc
25
25
  import { registerStream, removeStream, getStream } from '../core/stream-registry.ts';
26
26
  import { metrics } from '../core/metrics.js'
27
27
 
28
- export function cleanupAllAccountMutexes(): void {
29
- // No-op - kept for backward compatibility
30
- }
31
-
32
28
  export interface DeltaResult {
33
29
  delta: string;
34
30
  matchedContent: string;
31
+ contentLength: number;
32
+ contentSuffix: string;
35
33
  }
36
34
 
37
- export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult {
35
+ export function getIncrementalDelta(oldStr: string, newStr: string, prevLength: number = 0, prevSuffix: string = ''): DeltaResult {
38
36
  if (!oldStr) {
39
- return { delta: newStr, matchedContent: newStr };
37
+ return {
38
+ delta: newStr,
39
+ matchedContent: newStr,
40
+ contentLength: newStr.length,
41
+ contentSuffix: newStr.slice(-64)
42
+ };
40
43
  }
41
44
  if (newStr === oldStr) {
42
- return { delta: '', matchedContent: oldStr };
45
+ return { delta: '', matchedContent: oldStr, contentLength: prevLength, contentSuffix: prevSuffix };
46
+ }
47
+
48
+ // Ultra-fast path: use length tracking to avoid O(n) startsWith on large strings
49
+ if (newStr.length > prevLength && prevLength > 0) {
50
+ const delta = newStr.slice(prevLength);
51
+ const checkLen = Math.min(64, prevLength);
52
+ const expectedSuffix = prevSuffix.slice(-checkLen);
53
+ const actualSuffix = newStr.slice(prevLength - checkLen, prevLength);
54
+
55
+ if (expectedSuffix === actualSuffix) {
56
+ if (delta.length <= 4 && oldStr.length > 2000) {
57
+ return {
58
+ delta: newStr,
59
+ matchedContent: oldStr + newStr,
60
+ contentLength: newStr.length,
61
+ contentSuffix: newStr.slice(-64)
62
+ };
63
+ }
64
+ return {
65
+ delta,
66
+ matchedContent: newStr,
67
+ contentLength: newStr.length,
68
+ contentSuffix: newStr.slice(-64)
69
+ };
70
+ }
43
71
  }
44
72
 
45
- // Fast path: incremental SSE streams append to oldStr most of the time
73
+ // Fallback: startsWith check for edge cases
46
74
  if (newStr.startsWith(oldStr)) {
47
75
  const delta = newStr.slice(oldStr.length);
48
76
  if (delta.length <= 4 && oldStr.length > 2000) {
49
- return { delta: newStr, matchedContent: oldStr + newStr };
77
+ return {
78
+ delta: newStr,
79
+ matchedContent: oldStr + newStr,
80
+ contentLength: newStr.length,
81
+ contentSuffix: newStr.slice(-64)
82
+ };
50
83
  }
51
- return { delta, matchedContent: newStr };
84
+ return {
85
+ delta,
86
+ matchedContent: newStr,
87
+ contentLength: newStr.length,
88
+ contentSuffix: newStr.slice(-64)
89
+ };
52
90
  }
53
91
 
54
- // Fallback: segment-based prefix matching
92
+ // Segment-based prefix matching (rare path)
55
93
  const scanWindow = Math.min(2000, oldStr.length);
56
94
  const maxLen = Math.min(scanWindow, newStr.length);
57
95
 
@@ -65,17 +103,27 @@ export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult
65
103
  commonPrefixLen += segmentLen;
66
104
  }
67
105
 
68
- // Fine-grained scan within the mismatching segment
69
106
  while (commonPrefixLen < maxLen && oldStr[commonPrefixLen] === newStr[commonPrefixLen]) {
70
107
  commonPrefixLen++;
71
108
  }
72
109
 
73
110
  const threshold = Math.min(scanWindow, 4);
74
111
  if (commonPrefixLen >= threshold) {
75
- return { delta: newStr.substring(commonPrefixLen), matchedContent: newStr };
112
+ return {
113
+ delta: newStr.substring(commonPrefixLen),
114
+ matchedContent: newStr,
115
+ contentLength: newStr.length,
116
+ contentSuffix: newStr.slice(-64)
117
+ };
76
118
  }
77
119
 
78
- return { delta: newStr, matchedContent: oldStr + newStr };
120
+ const combined = oldStr + newStr;
121
+ return {
122
+ delta: newStr,
123
+ matchedContent: combined,
124
+ contentLength: combined.length,
125
+ contentSuffix: combined.slice(-64)
126
+ };
79
127
  }
80
128
 
81
129
  function parseQwenErrorPayload(raw: string): { message: string; status: number } | null {
@@ -119,29 +167,26 @@ export async function chatCompletions(c: Context) {
119
167
  const msg = messages[i];
120
168
  let contentStr = '';
121
169
  if (Array.isArray(msg.content)) {
122
- // Handle multimodal content (text + images + videos + audio + files)
123
- const multimodalParts = msg.content.filter(
124
- (p: any) =>
170
+ // Single-pass: extract text and multimodal parts in one iteration
171
+ const textParts: string[] = [];
172
+ const multimodalParts: Array<{ type: string; text?: string; image_url?: { url: string }; video_url?: { url: string }; audio_url?: { url: string }; file_url?: { url: string } }> = [];
173
+
174
+ for (const p of msg.content as any[]) {
175
+ if (p.type === "text" && p.text) {
176
+ textParts.push(p.text);
177
+ } else if (
125
178
  (p.type === "image_url" && p.image_url?.url) ||
126
179
  (p.type === "video_url" && p.video_url?.url) ||
127
180
  (p.type === "audio_url" && p.audio_url?.url) ||
128
- (p.type === "file_url" && p.file_url?.url),
129
- );
130
-
181
+ (p.type === "file_url" && p.file_url?.url)
182
+ ) {
183
+ multimodalParts.push(p);
184
+ }
185
+ }
186
+
187
+ contentStr = textParts.join("\n");
131
188
  if (multimodalParts.length > 0) {
132
- // Defer processing to after account selection to reuse cached headers
133
189
  pendingMultimodal.push(multimodalParts);
134
- // Extract text parts for prompt building
135
- contentStr = msg.content
136
- .filter((p: any) => p.type === "text")
137
- .map((p: any) => p.text)
138
- .join("\n");
139
- } else {
140
- // No multimodal parts, just extract text
141
- contentStr = msg.content
142
- .filter((p: any) => p.type === "text")
143
- .map((p: any) => p.text)
144
- .join("\n");
145
190
  }
146
191
  } else if (typeof msg.content === 'object' && msg.content !== null) {
147
192
  contentStr = JSON.stringify(msg.content);
@@ -249,7 +294,7 @@ export async function chatCompletions(c: Context) {
249
294
 
250
295
  let stream: ReadableStream | undefined;
251
296
  let uiSessionId = '';
252
- const completionId = 'chatcmpl-' + uuidv4();
297
+ const completionId = 'chatcmpl-' + crypto.randomUUID();
253
298
 
254
299
  while (account) {
255
300
  const accountId = account.id;
@@ -476,10 +521,30 @@ export async function chatCompletions(c: Context) {
476
521
  finish_reason: finishReason
477
522
  });
478
523
 
479
- // Pre-compute timestamp once before the stream loop
480
524
  const createdTimestamp = Math.floor(Date.now() / 1000);
481
525
 
482
- // Send initial chunk
526
+ const fastWriteContent = (content: string) => {
527
+ const chunk = JSON.stringify({
528
+ id: completionId,
529
+ object: 'chat.completion.chunk',
530
+ created: createdTimestamp,
531
+ model: body.model,
532
+ choices: [makeChoice({ content })]
533
+ });
534
+ streamWriter.write(`data: ${chunk}\n\n`);
535
+ };
536
+
537
+ const fastWriteReasoning = (content: string) => {
538
+ const chunk = JSON.stringify({
539
+ id: completionId,
540
+ object: 'chat.completion.chunk',
541
+ created: createdTimestamp,
542
+ model: body.model,
543
+ choices: [makeChoice({ reasoning_content: content })]
544
+ });
545
+ streamWriter.write(`data: ${chunk}\n\n`);
546
+ };
547
+
483
548
  writeEvent({
484
549
  id: completionId,
485
550
  object: 'chat.completion.chunk',
@@ -493,6 +558,8 @@ export async function chatCompletions(c: Context) {
493
558
 
494
559
  let reasoningBuffer = '';
495
560
  let lastFullContent = '';
561
+ let contentLength = 0;
562
+ let contentSuffix = '';
496
563
  let targetResponseId: string | null = null;
497
564
  let targetResponseIdSet = false;
498
565
  let currentThoughtIndex = 0;
@@ -500,27 +567,27 @@ export async function chatCompletions(c: Context) {
500
567
  const toolParser = hasTools ? new StreamingToolParser(bodyAny.tools) : null;
501
568
 
502
569
  let buffer = '';
570
+ let bufferOffset = 0;
503
571
  let completionTokens = 0;
504
572
  let promptTokens = Math.ceil(finalPrompt.length / 3.5);
505
573
 
506
- // Real-time flush: send each event immediately to minimize latency
507
- let chunkCount = 0;
508
574
  while (true) {
509
575
  const { done, value } = await reader.read();
510
576
  if (done) break;
511
577
 
512
578
  buffer += decoder.decode(value, { stream: true });
513
579
 
514
- let startIdx = 0;
515
- let newlineIdx: number;
516
- while ((newlineIdx = buffer.indexOf('\n', startIdx)) !== -1) {
517
- const line = buffer.slice(startIdx, newlineIdx);
518
- startIdx = newlineIdx + 1;
580
+ while (bufferOffset < buffer.length) {
581
+ const newlineIdx = buffer.indexOf('\n', bufferOffset);
582
+ if (newlineIdx === -1) break;
583
+
584
+ const line = buffer.slice(bufferOffset, newlineIdx);
585
+ bufferOffset = newlineIdx + 1;
519
586
 
520
- const trimmed = line.trim();
521
- if (!trimmed || !trimmed.startsWith('data: ')) continue;
587
+ const trimmed = line.trim();
588
+ if (!trimmed || !trimmed.startsWith('data: ')) continue;
522
589
 
523
- const dataStr = trimmed.slice(6);
590
+ const dataStr = trimmed.slice(6);
524
591
  if (dataStr === '[DONE]') {
525
592
  streamWriter.write('data: [DONE]\n\n');
526
593
  continue;
@@ -569,10 +636,12 @@ export async function chatCompletions(c: Context) {
569
636
  isThinkingChunk = false;
570
637
  if (delta.content !== undefined) {
571
638
  const newContent = delta.content || '';
572
- const result = getIncrementalDelta(lastFullContent, newContent);
639
+ const result = getIncrementalDelta(lastFullContent, newContent, contentLength, contentSuffix);
573
640
  vStr = result.delta;
574
641
  if (vStr) {
575
642
  lastFullContent = result.matchedContent;
643
+ contentLength = result.contentLength;
644
+ contentSuffix = result.contentSuffix;
576
645
  foundStr = true;
577
646
  }
578
647
  }
@@ -584,24 +653,12 @@ export async function chatCompletions(c: Context) {
584
653
 
585
654
  if (isThinkingChunk) {
586
655
  reasoningBuffer += vStr;
587
- streamWriter.write(`data: ${JSON.stringify({
588
- id: completionId,
589
- object: 'chat.completion.chunk',
590
- created: createdTimestamp,
591
- model: body.model,
592
- choices: [makeChoice({ reasoning_content: vStr })]
593
- })}\n\n`);
656
+ fastWriteReasoning(vStr);
594
657
  } else {
595
658
  if (hasTools && toolParser) {
596
659
  const { text, toolCalls } = toolParser.feed(vStr);
597
660
  if (text) {
598
- streamWriter.write(`data: ${JSON.stringify({
599
- id: completionId,
600
- object: 'chat.completion.chunk',
601
- created: createdTimestamp,
602
- model: body.model,
603
- choices: [makeChoice({ content: text })]
604
- })}\n\n`);
661
+ fastWriteContent(text);
605
662
  }
606
663
  for (const tc of toolCalls) {
607
664
  streamWriter.write(`data: ${JSON.stringify({
@@ -624,13 +681,7 @@ export async function chatCompletions(c: Context) {
624
681
  }
625
682
  } else {
626
683
  if (vStr) {
627
- streamWriter.write(`data: ${JSON.stringify({
628
- id: completionId,
629
- object: 'chat.completion.chunk',
630
- created: createdTimestamp,
631
- model: body.model,
632
- choices: [makeChoice({ content: vStr })]
633
- })}\n\n`);
684
+ fastWriteContent(vStr);
634
685
  }
635
686
  }
636
687
  }
@@ -640,16 +691,11 @@ export async function chatCompletions(c: Context) {
640
691
  }
641
692
  }
642
693
 
643
- // Trim processed portion from buffer
644
- if (startIdx > 0) {
645
- buffer = buffer.slice(startIdx);
694
+ if (bufferOffset > 0) {
695
+ buffer = buffer.slice(bufferOffset);
696
+ bufferOffset = 0;
646
697
  }
647
698
 
648
- // Periodic yielding to prevent event loop starvation
649
- chunkCount++;
650
- if (chunkCount % 100 === 0) {
651
- await new Promise(r => setTimeout(r, 0));
652
- }
653
699
  }
654
700
 
655
701
  const upstreamError = parseQwenErrorPayload(buffer);
@@ -782,7 +828,7 @@ export async function chatCompletionsStop(c: Context) {
782
828
  'Sec-Fetch-Mode': 'cors',
783
829
  'Sec-Fetch-Site': 'same-origin',
784
830
  'User-Agent': stream.headers['user-agent'],
785
- 'X-Request-Id': uuidv4(),
831
+ 'X-Request-Id': crypto.randomUUID(),
786
832
  'bx-ua': stream.headers['bx-ua'],
787
833
  'bx-umidtoken': stream.headers['bx-umidtoken'],
788
834
  'bx-v': stream.headers['bx-v'],
@@ -6,7 +6,7 @@
6
6
 
7
7
  import { Context } from "hono";
8
8
  import { getQwenHeaders } from "../services/playwright.ts";
9
- import { v4 as uuidv4 } from "uuid";
9
+ import crypto from "crypto";
10
10
 
11
11
  interface STSResponse {
12
12
  success: boolean;
@@ -46,7 +46,7 @@ async function getSTSToken(
46
46
  Origin: "https://chat.qwen.ai",
47
47
  Referer: "https://chat.qwen.ai/",
48
48
  "User-Agent": headers["user-agent"],
49
- "X-Request-Id": uuidv4(),
49
+ "X-Request-Id": crypto.randomUUID(),
50
50
  "bx-ua": headers["bx-ua"],
51
51
  "bx-umidtoken": headers["bx-umidtoken"],
52
52
  "bx-v": headers["bx-v"],
@@ -723,11 +723,11 @@ export async function processImagesForQwen(
723
723
  greenNet: "success",
724
724
  size: fileSize,
725
725
  error: "",
726
- itemId: uuidv4(),
726
+ itemId: crypto.randomUUID(),
727
727
  file_type: typeInfo.mime,
728
728
  showType: typeInfo.showType,
729
729
  file_class: typeInfo.fileClass,
730
- uploadTaskId: uuidv4(),
730
+ uploadTaskId: crypto.randomUUID(),
731
731
  });
732
732
  }
733
733
  }
@@ -29,6 +29,7 @@ interface AccountHeaderCache {
29
29
  }
30
30
 
31
31
  const accountHeaderCaches = new Map<string, AccountHeaderCache>();
32
+ const cachedUserAgents = new Map<string, string>();
32
33
 
33
34
  function getAccountHeaderCache(accountId: string): AccountHeaderCache {
34
35
  let cache = accountHeaderCaches.get(accountId);
@@ -1,5 +1,7 @@
1
1
  import { getQwenHeaders, getBasicHeaders } from './playwright.ts';
2
- import { v4 as uuidv4 } from 'uuid';
2
+ import crypto from 'crypto';
3
+
4
+ const CACHED_TIMEZONE = new Date().toString().split(' (')[0];
3
5
 
4
6
  export class RetryableQwenStreamError extends Error {
5
7
  readonly retryAfterMs: number;
@@ -82,7 +84,6 @@ function cleanupStalePool(accountId: string) {
82
84
  }
83
85
 
84
86
  async function getBasicQwenHeaders(accountId?: string): Promise<Record<string, string>> {
85
- const { getBasicHeaders } = await import('./playwright.ts');
86
87
  const { cookie, userAgent, bxV } = await getBasicHeaders(accountId);
87
88
  return {
88
89
  cookie,
@@ -104,7 +105,7 @@ async function createRealQwenChat(header: Record<string, string>): Promise<strin
104
105
  origin: 'https://chat.qwen.ai',
105
106
  referer: 'https://chat.qwen.ai/c/new-chat',
106
107
  'user-agent': header['user-agent'],
107
- 'x-request-id': uuidv4(),
108
+ 'x-request-id': crypto.randomUUID(),
108
109
  'bx-v': header['bx-v'],
109
110
  },
110
111
  body: JSON.stringify({
@@ -131,18 +132,26 @@ async function refillPoolForAccount(accountId: string) {
131
132
  if (!pool) { pool = []; warmPool.set(accountId, pool); }
132
133
  cleanupStalePool(accountId);
133
134
  const need = Math.max(0, WARM_POOL_SIZE - pool.length);
134
-
135
+ if (need === 0) return;
136
+
137
+ let headers: Record<string, string>;
138
+ try {
139
+ headers = await getBasicQwenHeaders(accountId === 'global' ? undefined : accountId);
140
+ } catch (err) {
141
+ console.error(`[WarmPool] header fetch failed for ${accountId}:`, (err as Error).message);
142
+ return;
143
+ }
144
+
135
145
  const creationPromises = Array.from({ length: need }, async () => {
136
146
  try {
137
- const headers = await getBasicQwenHeaders(accountId === 'global' ? undefined : accountId);
138
147
  const chatId = await createRealQwenChat(headers);
139
148
  return { chatId, headers, accountId, timestamp: Date.now() };
140
149
  } catch (err) {
141
- console.error(`[WarmPool] refill failed for ${accountId}:`, (err as Error).message);
150
+ console.error(`[WarmPool] chat creation failed for ${accountId}:`, (err as Error).message);
142
151
  return null;
143
152
  }
144
153
  });
145
-
154
+
146
155
  const results = await Promise.all(creationPromises);
147
156
  for (const entry of results) {
148
157
  if (entry) pool.push(entry);
@@ -252,7 +261,7 @@ export async function disableNativeTools(accountId?: string): Promise<void> {
252
261
  'origin': 'https://chat.qwen.ai',
253
262
  'referer': 'https://chat.qwen.ai/',
254
263
  'user-agent': headers['user-agent'],
255
- 'x-request-id': uuidv4(),
264
+ 'x-request-id': crypto.randomUUID(),
256
265
  'bx-ua': headers['bx-ua'],
257
266
  'bx-umidtoken': headers['bx-umidtoken'],
258
267
  'bx-v': headers['bx-v']
@@ -291,9 +300,9 @@ export async function fetchQwenModels(accountId?: string): Promise<any[]> {
291
300
  'cookie': cookie,
292
301
  'referer': 'https://chat.qwen.ai/',
293
302
  'user-agent': userAgent,
294
- 'x-request-id': uuidv4(),
303
+ 'x-request-id': crypto.randomUUID(),
295
304
  'bx-v': bxV,
296
- 'timezone': new Date().toString(),
305
+ 'timezone': CACHED_TIMEZONE,
297
306
  'source': 'web'
298
307
  }
299
308
  });
@@ -397,7 +406,7 @@ export async function createQwenStream(
397
406
  }
398
407
 
399
408
  const timestamp = Math.floor(Date.now() / 1000);
400
- const fid = uuidv4();
409
+ const fid = crypto.randomUUID();
401
410
  const model = modelId.replace('-no-thinking', '');
402
411
 
403
412
  const payload: QwenPayload = {
@@ -456,10 +465,10 @@ export async function createQwenStream(
456
465
  'sec-fetch-dest': 'empty',
457
466
  'sec-fetch-mode': 'cors',
458
467
  'sec-fetch-site': 'same-origin',
459
- 'timezone': new Date().toString().split(' (')[0],
468
+ 'timezone': CACHED_TIMEZONE,
460
469
  'user-agent': chatHeaders['user-agent'],
461
470
  'x-accel-buffering': 'no',
462
- 'x-request-id': uuidv4(),
471
+ 'x-request-id': crypto.randomUUID(),
463
472
  'bx-v': chatHeaders['bx-v'],
464
473
  },
465
474
  body: JSON.stringify(payload),
@@ -5,7 +5,7 @@
5
5
  * Supports both JSON and Hermes-style XML <parameter> formats.
6
6
  */
7
7
 
8
- import { v4 as uuidv4 } from 'uuid';
8
+ import crypto from 'crypto';
9
9
  import { robustParseJSON } from '../utils/json.js';
10
10
  import { logger } from '../core/logger.js';
11
11
  import type { ParsedToolCall } from './types';
@@ -183,6 +183,11 @@ export class StreamingToolParser {
183
183
 
184
184
  while (this.buffer.length > 0) {
185
185
  if (!this.insideTool) {
186
+ if (this.buffer.indexOf('<') === -1) {
187
+ if (this.emittedToolCallCount === 0) result.text += this.buffer;
188
+ this.buffer = '';
189
+ break;
190
+ }
186
191
  const match = this.buffer.match(TOOL_OPEN_RE);
187
192
  if (match && match.index !== undefined) {
188
193
  // Text before the tool call tag
@@ -276,14 +281,6 @@ export class StreamingToolParser {
276
281
  return this.insideTool;
277
282
  }
278
283
 
279
- /**
280
- * Get any lead-in text that was captured before tool calls.
281
- * Useful for fallback content when tool calls fail to parse.
282
- */
283
- getPendingLeadIn(): string {
284
- return this.pendingLeadIn;
285
- }
286
-
287
284
  // ─── Internal Methods ──────────────────────────────────────────────────────
288
285
 
289
286
  private processToolContent(content: string, result: ParserResult): void {
@@ -302,7 +299,7 @@ export class StreamingToolParser {
302
299
  const xmlParsed = parseXmlParameterToolCall(t, this.currentOpenTag, this.tools);
303
300
  if (xmlParsed) {
304
301
  result.toolCalls.push({
305
- id: `call_${uuidv4()}`,
302
+ id: `call_${crypto.randomUUID()}`,
306
303
  name: xmlParsed.name,
307
304
  arguments: xmlParsed.arguments,
308
305
  });
@@ -365,7 +362,7 @@ export class StreamingToolParser {
365
362
  const xmlParsed = parseXmlParameterToolCall(block, this.currentOpenTag, this.tools);
366
363
  if (xmlParsed) {
367
364
  return {
368
- id: `call_${uuidv4()}`,
365
+ id: `call_${crypto.randomUUID()}`,
369
366
  name: xmlParsed.name,
370
367
  arguments: xmlParsed.arguments,
371
368
  };
@@ -375,7 +372,7 @@ export class StreamingToolParser {
375
372
  const recovered = parseRecoverableXmlToolCall(block, this.currentOpenTag, this.tools);
376
373
  if (recovered) {
377
374
  return {
378
- id: `call_${uuidv4()}`,
375
+ id: `call_${crypto.randomUUID()}`,
379
376
  name: recovered.name,
380
377
  arguments: recovered.arguments,
381
378
  };
@@ -438,7 +435,7 @@ export class StreamingToolParser {
438
435
  if (typeof args !== 'object' || args === null) args = {};
439
436
 
440
437
  return {
441
- id: parsed.id || parsed.tool_call_id || `call_${uuidv4()}`,
438
+ id: parsed.id || parsed.tool_call_id || `call_${crypto.randomUUID()}`,
442
439
  name,
443
440
  arguments: args,
444
441
  };
@@ -1,8 +1,3 @@
1
- export interface TruncatedMessage {
2
- role: string;
3
- content: string;
4
- }
5
-
6
1
  export function estimateTokenCount(text: string): number {
7
2
  // Divisor conservador (2.5) para evitar estouro silencioso do context window.
8
3
  // Tokenizers modernos (como o do Qwen) usam ~1.5 a 2.5 caracteres por token