@pedrofariasx/qwenproxy 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -13
- package/package.json +1 -1
- package/src/api/server.ts +0 -2
- package/src/cache/memory-cache.ts +52 -18
- package/src/routes/chat.ts +132 -79
- package/src/routes/upload.ts +4 -4
- package/src/services/playwright.ts +1 -0
- package/src/services/qwen.ts +36 -15
- package/src/tools/parser.ts +10 -13
- package/src/utils/context-truncation.ts +36 -10
- package/src/linter/extraction-engine.ts +0 -165
- package/src/linter/index.ts +0 -258
- package/src/linter/repair-normalize.ts +0 -245
- package/src/linter/safety-gate.ts +0 -219
- package/src/linter/streaming-state-machine.ts +0 -252
- package/src/linter/structural-parser.ts +0 -352
- package/src/linter/types.ts +0 -74
- package/src/tests/linter.test.ts +0 -151
- package/src/tests/parallel.test.ts +0 -42
- package/src/tests/structureVerification.test.ts +0 -176
- package/src/tools/ast.ts +0 -15
- package/src/tools/coercion.ts +0 -67
- package/src/tools/confidence.ts +0 -48
- package/src/tools/detector.ts +0 -40
- package/src/tools/executor.ts +0 -236
- package/src/tools/pipeline.ts +0 -122
- package/src/tools/registry-runtime.ts +0 -34
- package/src/tools/repair.ts +0 -42
- package/src/tools/validator.ts +0 -33
package/README.md
CHANGED
|
@@ -39,7 +39,7 @@ graph TD
|
|
|
39
39
|
Playwright --> Browser2[Browser - Conta 2]
|
|
40
40
|
Playwright --> BrowserN[Browser - Conta N]
|
|
41
41
|
Handler --> QwenAPI[chat.qwen.ai]
|
|
42
|
-
Handler --> Tools[Tool
|
|
42
|
+
Handler --> Tools[Tool Parser]
|
|
43
43
|
|
|
44
44
|
subgraph "Persistência"
|
|
45
45
|
Accounts
|
|
@@ -233,24 +233,14 @@ qwenproxy/
|
|
|
233
233
|
│ │ ├── model-registry.ts # Registro de modelos e context windows
|
|
234
234
|
│ │ ├── stream-registry.ts # Tracking de streams ativos
|
|
235
235
|
│ │ └── watchdog.ts # Health monitoring
|
|
236
|
-
│ ├── linter/
|
|
237
|
-
│ │ ├── bar.ts # Facade
|
|
238
|
-
│ │ ├── extraction-engine.ts # Extraction engine
|
|
239
|
-
│ │ ├── foo.ts # Exports
|
|
240
|
-
│ │ ├── index.ts # Main public API
|
|
241
|
-
│ │ ├── repair-normalize.ts # Repair and normalize
|
|
242
|
-
│ │ ├── safety-gate.ts # Safety gate
|
|
243
|
-
│ │ ├── streaming-state-machine.ts # Streaming state machine
|
|
244
|
-
│ │ ├── structural-parser.ts # Structural parser
|
|
245
|
-
│ │ └── types.ts # Types
|
|
246
236
|
│ ├── routes/
|
|
247
|
-
│ │
|
|
237
|
+
│ │ ├── chat.ts # Handler /v1/chat/completions
|
|
238
|
+
│ │ └── upload.ts # Handler /v1/upload (multimodal)
|
|
248
239
|
│ ├── services/
|
|
249
240
|
│ │ ├── playwright.ts # Automação de navegador
|
|
250
241
|
│ │ └── qwen.ts # Integração com API do Qwen
|
|
251
242
|
│ ├── tests/ # Testes automatizados
|
|
252
243
|
│ ├── tools/
|
|
253
|
-
│ │ ├── executor.ts # Execução de ferramentas
|
|
254
244
|
│ │ ├── parser.ts # Parser de <tool_call> tags
|
|
255
245
|
│ │ ├── registry.ts # Registro de tools
|
|
256
246
|
│ │ ├── schema.ts # Validação JSON Schema
|
package/package.json
CHANGED
package/src/api/server.ts
CHANGED
|
@@ -111,8 +111,6 @@ export async function startServer(): Promise<void> {
|
|
|
111
111
|
await cache.close()
|
|
112
112
|
const { closePlaywright } = await import('../services/playwright.js')
|
|
113
113
|
await closePlaywright()
|
|
114
|
-
const { cleanupAllAccountMutexes } = await import('../routes/chat.js')
|
|
115
|
-
cleanupAllAccountMutexes()
|
|
116
114
|
const { closeDatabase } = await import('../core/database.ts')
|
|
117
115
|
closeDatabase()
|
|
118
116
|
server?.close()
|
|
@@ -19,16 +19,34 @@ export class MemoryCache {
|
|
|
19
19
|
private defaultTTL: number
|
|
20
20
|
private prefix: string
|
|
21
21
|
private cleanupInterval: NodeJS.Timeout | null
|
|
22
|
+
private maxEntries: number
|
|
23
|
+
private totalBytes: number
|
|
22
24
|
|
|
23
|
-
constructor(options?: { prefix?: string; defaultTTL?: number }) {
|
|
25
|
+
constructor(options?: { prefix?: string; defaultTTL?: number; maxEntries?: number }) {
|
|
24
26
|
this.prefix = options?.prefix || 'qwenproxy:'
|
|
25
27
|
this.defaultTTL = options?.defaultTTL || config.cache.defaultTTL
|
|
28
|
+
this.maxEntries = options?.maxEntries || 10000
|
|
26
29
|
this.store = new Map()
|
|
30
|
+
this.totalBytes = 0
|
|
27
31
|
this.cleanupInterval = null
|
|
28
32
|
|
|
29
33
|
this.startCleanup()
|
|
30
34
|
}
|
|
31
35
|
|
|
36
|
+
private entryByteSize(key: string, value: any): number {
|
|
37
|
+
return Buffer.byteLength(key) + Buffer.byteLength(JSON.stringify(value))
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private evictLRU(): void {
|
|
41
|
+
const oldest = this.store.keys().next()
|
|
42
|
+
if (!oldest.done) {
|
|
43
|
+
const evicted = this.store.get(oldest.value)
|
|
44
|
+
if (evicted) this.totalBytes -= this.entryByteSize(oldest.value, evicted.value)
|
|
45
|
+
this.store.delete(oldest.value)
|
|
46
|
+
metrics.increment('cache.evicted')
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
32
50
|
private startCleanup(): void {
|
|
33
51
|
this.cleanupInterval = setInterval(() => {
|
|
34
52
|
const now = Date.now()
|
|
@@ -46,16 +64,28 @@ export class MemoryCache {
|
|
|
46
64
|
|
|
47
65
|
async set<T>(key: CacheKey, value: T, ttl?: number): Promise<void> {
|
|
48
66
|
const serialized = JSON.stringify(value)
|
|
67
|
+
const valueBytes = Buffer.byteLength(serialized)
|
|
49
68
|
const effectiveTTL = ttl || this.defaultTTL
|
|
50
69
|
const fullKey = this.prefix + key
|
|
70
|
+
const entrySize = Buffer.byteLength(fullKey) + valueBytes
|
|
71
|
+
|
|
72
|
+
if (this.store.has(fullKey)) {
|
|
73
|
+
const oldEntry = this.store.get(fullKey)
|
|
74
|
+
if (oldEntry) this.totalBytes -= this.entryByteSize(fullKey, oldEntry.value)
|
|
75
|
+
} else {
|
|
76
|
+
while (this.store.size >= this.maxEntries) {
|
|
77
|
+
this.evictLRU()
|
|
78
|
+
}
|
|
79
|
+
}
|
|
51
80
|
|
|
52
81
|
this.store.set(fullKey, {
|
|
53
82
|
value,
|
|
54
83
|
expiresAt: Date.now() + (effectiveTTL * 1000)
|
|
55
84
|
})
|
|
85
|
+
this.totalBytes += entrySize
|
|
56
86
|
|
|
57
87
|
metrics.increment('cache.set')
|
|
58
|
-
metrics.histogram('cache.value.size',
|
|
88
|
+
metrics.histogram('cache.value.size', valueBytes)
|
|
59
89
|
}
|
|
60
90
|
|
|
61
91
|
async get<T>(key: CacheKey): Promise<T | null> {
|
|
@@ -66,26 +96,39 @@ export class MemoryCache {
|
|
|
66
96
|
metrics.histogram('cache.get.latency', Date.now() - start)
|
|
67
97
|
|
|
68
98
|
if (!entry || entry.expiresAt <= Date.now()) {
|
|
69
|
-
if (entry)
|
|
99
|
+
if (entry) {
|
|
100
|
+
this.totalBytes -= this.entryByteSize(fullKey, entry.value)
|
|
101
|
+
this.store.delete(fullKey)
|
|
102
|
+
}
|
|
70
103
|
metrics.increment('cache.miss')
|
|
71
104
|
return null
|
|
72
105
|
}
|
|
73
106
|
|
|
107
|
+
this.store.delete(fullKey)
|
|
108
|
+
this.store.set(fullKey, entry)
|
|
109
|
+
|
|
74
110
|
metrics.increment('cache.hit')
|
|
75
111
|
return entry.value as T
|
|
76
112
|
}
|
|
77
113
|
|
|
78
114
|
async delete(key: CacheKey): Promise<void> {
|
|
79
115
|
const fullKey = this.prefix + key
|
|
80
|
-
this.store.
|
|
81
|
-
|
|
116
|
+
const entry = this.store.get(fullKey)
|
|
117
|
+
if (entry) {
|
|
118
|
+
this.totalBytes -= this.entryByteSize(fullKey, entry.value)
|
|
119
|
+
this.store.delete(fullKey)
|
|
120
|
+
metrics.increment('cache.deleted')
|
|
121
|
+
}
|
|
82
122
|
}
|
|
83
123
|
|
|
84
124
|
async exists(key: CacheKey): Promise<boolean> {
|
|
85
125
|
const fullKey = this.prefix + key
|
|
86
126
|
const entry = this.store.get(fullKey)
|
|
87
127
|
if (!entry || entry.expiresAt <= Date.now()) {
|
|
88
|
-
if (entry)
|
|
128
|
+
if (entry) {
|
|
129
|
+
this.totalBytes -= this.entryByteSize(fullKey, entry.value)
|
|
130
|
+
this.store.delete(fullKey)
|
|
131
|
+
}
|
|
89
132
|
return false
|
|
90
133
|
}
|
|
91
134
|
return true
|
|
@@ -157,20 +200,10 @@ export class MemoryCache {
|
|
|
157
200
|
keysCount?: number
|
|
158
201
|
memoryUsage?: string
|
|
159
202
|
}> {
|
|
160
|
-
const now = Date.now()
|
|
161
|
-
let validKeys = 0
|
|
162
|
-
let totalBytes = 0
|
|
163
|
-
for (const [key, entry] of this.store.entries()) {
|
|
164
|
-
if (entry.expiresAt > now) {
|
|
165
|
-
validKeys++
|
|
166
|
-
totalBytes += Buffer.byteLength(JSON.stringify(entry.value)) + Buffer.byteLength(key)
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
203
|
return {
|
|
171
204
|
connected: true,
|
|
172
|
-
keysCount:
|
|
173
|
-
memoryUsage: `${(totalBytes / 1024).toFixed(2)}KB`
|
|
205
|
+
keysCount: this.store.size,
|
|
206
|
+
memoryUsage: `${(this.totalBytes / 1024).toFixed(2)}KB`
|
|
174
207
|
}
|
|
175
208
|
}
|
|
176
209
|
|
|
@@ -180,6 +213,7 @@ export class MemoryCache {
|
|
|
180
213
|
this.cleanupInterval = null
|
|
181
214
|
}
|
|
182
215
|
this.store.clear()
|
|
216
|
+
this.totalBytes = 0
|
|
183
217
|
}
|
|
184
218
|
}
|
|
185
219
|
|
package/src/routes/chat.ts
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import { Context } from 'hono';
|
|
12
12
|
import { stream as honoStream } from 'hono/streaming';
|
|
13
|
-
import
|
|
13
|
+
import crypto from 'crypto';
|
|
14
14
|
import { createQwenStream, updateSessionParent } from '../services/qwen.ts';
|
|
15
15
|
import { OpenAIRequest, ChoiceDelta, Message } from '../utils/types.ts';
|
|
16
16
|
import { registry } from '../tools/registry.ts';
|
|
@@ -25,33 +25,71 @@ import { getNextAccount, getNextAvailableAccount, markAccountRateLimited, getAcc
|
|
|
25
25
|
import { registerStream, removeStream, getStream } from '../core/stream-registry.ts';
|
|
26
26
|
import { metrics } from '../core/metrics.js'
|
|
27
27
|
|
|
28
|
-
export function cleanupAllAccountMutexes(): void {
|
|
29
|
-
// No-op - kept for backward compatibility
|
|
30
|
-
}
|
|
31
|
-
|
|
32
28
|
export interface DeltaResult {
|
|
33
29
|
delta: string;
|
|
34
30
|
matchedContent: string;
|
|
31
|
+
contentLength: number;
|
|
32
|
+
contentSuffix: string;
|
|
35
33
|
}
|
|
36
34
|
|
|
37
|
-
export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult {
|
|
35
|
+
export function getIncrementalDelta(oldStr: string, newStr: string, prevLength: number = 0, prevSuffix: string = ''): DeltaResult {
|
|
38
36
|
if (!oldStr) {
|
|
39
|
-
return {
|
|
37
|
+
return {
|
|
38
|
+
delta: newStr,
|
|
39
|
+
matchedContent: newStr,
|
|
40
|
+
contentLength: newStr.length,
|
|
41
|
+
contentSuffix: newStr.slice(-64)
|
|
42
|
+
};
|
|
40
43
|
}
|
|
41
44
|
if (newStr === oldStr) {
|
|
42
|
-
return { delta: '', matchedContent: oldStr };
|
|
45
|
+
return { delta: '', matchedContent: oldStr, contentLength: prevLength, contentSuffix: prevSuffix };
|
|
43
46
|
}
|
|
44
47
|
|
|
45
|
-
//
|
|
48
|
+
// Ultra-fast path: use length tracking to avoid O(n) startsWith on large strings
|
|
49
|
+
if (newStr.length > prevLength && prevLength > 0) {
|
|
50
|
+
const delta = newStr.slice(prevLength);
|
|
51
|
+
const checkLen = Math.min(64, prevLength);
|
|
52
|
+
const expectedSuffix = prevSuffix.slice(-checkLen);
|
|
53
|
+
const actualSuffix = newStr.slice(prevLength - checkLen, prevLength);
|
|
54
|
+
|
|
55
|
+
if (expectedSuffix === actualSuffix) {
|
|
56
|
+
if (delta.length <= 4 && oldStr.length > 2000) {
|
|
57
|
+
return {
|
|
58
|
+
delta: newStr,
|
|
59
|
+
matchedContent: oldStr + newStr,
|
|
60
|
+
contentLength: newStr.length,
|
|
61
|
+
contentSuffix: newStr.slice(-64)
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
delta,
|
|
66
|
+
matchedContent: newStr,
|
|
67
|
+
contentLength: newStr.length,
|
|
68
|
+
contentSuffix: newStr.slice(-64)
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Fallback: startsWith check for edge cases
|
|
46
74
|
if (newStr.startsWith(oldStr)) {
|
|
47
75
|
const delta = newStr.slice(oldStr.length);
|
|
48
76
|
if (delta.length <= 4 && oldStr.length > 2000) {
|
|
49
|
-
return {
|
|
77
|
+
return {
|
|
78
|
+
delta: newStr,
|
|
79
|
+
matchedContent: oldStr + newStr,
|
|
80
|
+
contentLength: newStr.length,
|
|
81
|
+
contentSuffix: newStr.slice(-64)
|
|
82
|
+
};
|
|
50
83
|
}
|
|
51
|
-
return {
|
|
84
|
+
return {
|
|
85
|
+
delta,
|
|
86
|
+
matchedContent: newStr,
|
|
87
|
+
contentLength: newStr.length,
|
|
88
|
+
contentSuffix: newStr.slice(-64)
|
|
89
|
+
};
|
|
52
90
|
}
|
|
53
91
|
|
|
54
|
-
//
|
|
92
|
+
// Segment-based prefix matching (rare path)
|
|
55
93
|
const scanWindow = Math.min(2000, oldStr.length);
|
|
56
94
|
const maxLen = Math.min(scanWindow, newStr.length);
|
|
57
95
|
|
|
@@ -65,17 +103,27 @@ export function getIncrementalDelta(oldStr: string, newStr: string): DeltaResult
|
|
|
65
103
|
commonPrefixLen += segmentLen;
|
|
66
104
|
}
|
|
67
105
|
|
|
68
|
-
// Fine-grained scan within the mismatching segment
|
|
69
106
|
while (commonPrefixLen < maxLen && oldStr[commonPrefixLen] === newStr[commonPrefixLen]) {
|
|
70
107
|
commonPrefixLen++;
|
|
71
108
|
}
|
|
72
109
|
|
|
73
110
|
const threshold = Math.min(scanWindow, 4);
|
|
74
111
|
if (commonPrefixLen >= threshold) {
|
|
75
|
-
return {
|
|
112
|
+
return {
|
|
113
|
+
delta: newStr.substring(commonPrefixLen),
|
|
114
|
+
matchedContent: newStr,
|
|
115
|
+
contentLength: newStr.length,
|
|
116
|
+
contentSuffix: newStr.slice(-64)
|
|
117
|
+
};
|
|
76
118
|
}
|
|
77
119
|
|
|
78
|
-
|
|
120
|
+
const combined = oldStr + newStr;
|
|
121
|
+
return {
|
|
122
|
+
delta: newStr,
|
|
123
|
+
matchedContent: combined,
|
|
124
|
+
contentLength: combined.length,
|
|
125
|
+
contentSuffix: combined.slice(-64)
|
|
126
|
+
};
|
|
79
127
|
}
|
|
80
128
|
|
|
81
129
|
function parseQwenErrorPayload(raw: string): { message: string; status: number } | null {
|
|
@@ -119,29 +167,26 @@ export async function chatCompletions(c: Context) {
|
|
|
119
167
|
const msg = messages[i];
|
|
120
168
|
let contentStr = '';
|
|
121
169
|
if (Array.isArray(msg.content)) {
|
|
122
|
-
//
|
|
123
|
-
const
|
|
124
|
-
|
|
170
|
+
// Single-pass: extract text and multimodal parts in one iteration
|
|
171
|
+
const textParts: string[] = [];
|
|
172
|
+
const multimodalParts: Array<{ type: string; text?: string; image_url?: { url: string }; video_url?: { url: string }; audio_url?: { url: string }; file_url?: { url: string } }> = [];
|
|
173
|
+
|
|
174
|
+
for (const p of msg.content as any[]) {
|
|
175
|
+
if (p.type === "text" && p.text) {
|
|
176
|
+
textParts.push(p.text);
|
|
177
|
+
} else if (
|
|
125
178
|
(p.type === "image_url" && p.image_url?.url) ||
|
|
126
179
|
(p.type === "video_url" && p.video_url?.url) ||
|
|
127
180
|
(p.type === "audio_url" && p.audio_url?.url) ||
|
|
128
|
-
(p.type === "file_url" && p.file_url?.url)
|
|
129
|
-
|
|
130
|
-
|
|
181
|
+
(p.type === "file_url" && p.file_url?.url)
|
|
182
|
+
) {
|
|
183
|
+
multimodalParts.push(p);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
contentStr = textParts.join("\n");
|
|
131
188
|
if (multimodalParts.length > 0) {
|
|
132
|
-
// Defer processing to after account selection to reuse cached headers
|
|
133
189
|
pendingMultimodal.push(multimodalParts);
|
|
134
|
-
// Extract text parts for prompt building
|
|
135
|
-
contentStr = msg.content
|
|
136
|
-
.filter((p: any) => p.type === "text")
|
|
137
|
-
.map((p: any) => p.text)
|
|
138
|
-
.join("\n");
|
|
139
|
-
} else {
|
|
140
|
-
// No multimodal parts, just extract text
|
|
141
|
-
contentStr = msg.content
|
|
142
|
-
.filter((p: any) => p.type === "text")
|
|
143
|
-
.map((p: any) => p.text)
|
|
144
|
-
.join("\n");
|
|
145
190
|
}
|
|
146
191
|
} else if (typeof msg.content === 'object' && msg.content !== null) {
|
|
147
192
|
contentStr = JSON.stringify(msg.content);
|
|
@@ -209,7 +254,7 @@ export async function chatCompletions(c: Context) {
|
|
|
209
254
|
});
|
|
210
255
|
const toolsJson = JSON.stringify(formattedTools, null, 2);
|
|
211
256
|
|
|
212
|
-
systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in
|
|
257
|
+
systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in <tool_call> tags:\n\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n6. NEVER invent, guess, or hallucinate tool names. You MUST ONLY use the exact tool names provided in the 'TOOLS AVAILABLE' list above. Calling an unlisted tool will result in a hard execution error.\n\n`;
|
|
213
258
|
|
|
214
259
|
if (bodyAny.tool_choice && typeof bodyAny.tool_choice === 'object' && bodyAny.tool_choice.function) {
|
|
215
260
|
const forcedTool = bodyAny.tool_choice.function.name;
|
|
@@ -220,15 +265,22 @@ export async function chatCompletions(c: Context) {
|
|
|
220
265
|
const modelId = body.model.replace('-no-thinking', '');
|
|
221
266
|
const modelContextWindow = getModelContextWindow(modelId)
|
|
222
267
|
const estimatedTokens = estimateTokenCount(systemPrompt + prompt);
|
|
268
|
+
const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
|
|
223
269
|
|
|
224
270
|
let finalPrompt: string;
|
|
225
271
|
if (estimatedTokens > modelContextWindow - 1000) {
|
|
226
272
|
const truncated = truncateMessages(messages, modelContextWindow, systemPrompt);
|
|
227
|
-
|
|
273
|
+
const truncatedBody = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
|
|
274
|
+
finalPrompt = systemPrompt ? `${systemPrompt}\n\n${truncatedBody}` : truncatedBody;
|
|
228
275
|
} else {
|
|
229
276
|
finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
|
|
230
277
|
}
|
|
231
278
|
|
|
279
|
+
// Reforço de instrução de tool call para contextos longos (mitiga "Lost in the Middle")
|
|
280
|
+
if (hasTools && estimatedTokens > 15000) {
|
|
281
|
+
finalPrompt += '\n\n[CRITICAL REMINDER: You MUST use the exact <tool_call> JSON format specified in the system instructions. Do not hallucinate tool names or output raw JSON without the tags.]';
|
|
282
|
+
}
|
|
283
|
+
|
|
232
284
|
const isThinkingModel = !body.model.includes('no-thinking');
|
|
233
285
|
|
|
234
286
|
// A session is new if it doesn't have any assistant messages yet.
|
|
@@ -242,7 +294,7 @@ export async function chatCompletions(c: Context) {
|
|
|
242
294
|
|
|
243
295
|
let stream: ReadableStream | undefined;
|
|
244
296
|
let uiSessionId = '';
|
|
245
|
-
const completionId = 'chatcmpl-' +
|
|
297
|
+
const completionId = 'chatcmpl-' + crypto.randomUUID();
|
|
246
298
|
|
|
247
299
|
while (account) {
|
|
248
300
|
const accountId = account.id;
|
|
@@ -469,10 +521,30 @@ export async function chatCompletions(c: Context) {
|
|
|
469
521
|
finish_reason: finishReason
|
|
470
522
|
});
|
|
471
523
|
|
|
472
|
-
// Pre-compute timestamp once before the stream loop
|
|
473
524
|
const createdTimestamp = Math.floor(Date.now() / 1000);
|
|
474
525
|
|
|
475
|
-
|
|
526
|
+
const fastWriteContent = (content: string) => {
|
|
527
|
+
const chunk = JSON.stringify({
|
|
528
|
+
id: completionId,
|
|
529
|
+
object: 'chat.completion.chunk',
|
|
530
|
+
created: createdTimestamp,
|
|
531
|
+
model: body.model,
|
|
532
|
+
choices: [makeChoice({ content })]
|
|
533
|
+
});
|
|
534
|
+
streamWriter.write(`data: ${chunk}\n\n`);
|
|
535
|
+
};
|
|
536
|
+
|
|
537
|
+
const fastWriteReasoning = (content: string) => {
|
|
538
|
+
const chunk = JSON.stringify({
|
|
539
|
+
id: completionId,
|
|
540
|
+
object: 'chat.completion.chunk',
|
|
541
|
+
created: createdTimestamp,
|
|
542
|
+
model: body.model,
|
|
543
|
+
choices: [makeChoice({ reasoning_content: content })]
|
|
544
|
+
});
|
|
545
|
+
streamWriter.write(`data: ${chunk}\n\n`);
|
|
546
|
+
};
|
|
547
|
+
|
|
476
548
|
writeEvent({
|
|
477
549
|
id: completionId,
|
|
478
550
|
object: 'chat.completion.chunk',
|
|
@@ -486,6 +558,8 @@ export async function chatCompletions(c: Context) {
|
|
|
486
558
|
|
|
487
559
|
let reasoningBuffer = '';
|
|
488
560
|
let lastFullContent = '';
|
|
561
|
+
let contentLength = 0;
|
|
562
|
+
let contentSuffix = '';
|
|
489
563
|
let targetResponseId: string | null = null;
|
|
490
564
|
let targetResponseIdSet = false;
|
|
491
565
|
let currentThoughtIndex = 0;
|
|
@@ -493,27 +567,27 @@ export async function chatCompletions(c: Context) {
|
|
|
493
567
|
const toolParser = hasTools ? new StreamingToolParser(bodyAny.tools) : null;
|
|
494
568
|
|
|
495
569
|
let buffer = '';
|
|
570
|
+
let bufferOffset = 0;
|
|
496
571
|
let completionTokens = 0;
|
|
497
572
|
let promptTokens = Math.ceil(finalPrompt.length / 3.5);
|
|
498
573
|
|
|
499
|
-
// Real-time flush: send each event immediately to minimize latency
|
|
500
|
-
let chunkCount = 0;
|
|
501
574
|
while (true) {
|
|
502
575
|
const { done, value } = await reader.read();
|
|
503
576
|
if (done) break;
|
|
504
577
|
|
|
505
578
|
buffer += decoder.decode(value, { stream: true });
|
|
506
579
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
580
|
+
while (bufferOffset < buffer.length) {
|
|
581
|
+
const newlineIdx = buffer.indexOf('\n', bufferOffset);
|
|
582
|
+
if (newlineIdx === -1) break;
|
|
583
|
+
|
|
584
|
+
const line = buffer.slice(bufferOffset, newlineIdx);
|
|
585
|
+
bufferOffset = newlineIdx + 1;
|
|
512
586
|
|
|
513
|
-
|
|
514
|
-
|
|
587
|
+
const trimmed = line.trim();
|
|
588
|
+
if (!trimmed || !trimmed.startsWith('data: ')) continue;
|
|
515
589
|
|
|
516
|
-
|
|
590
|
+
const dataStr = trimmed.slice(6);
|
|
517
591
|
if (dataStr === '[DONE]') {
|
|
518
592
|
streamWriter.write('data: [DONE]\n\n');
|
|
519
593
|
continue;
|
|
@@ -562,10 +636,12 @@ export async function chatCompletions(c: Context) {
|
|
|
562
636
|
isThinkingChunk = false;
|
|
563
637
|
if (delta.content !== undefined) {
|
|
564
638
|
const newContent = delta.content || '';
|
|
565
|
-
const result = getIncrementalDelta(lastFullContent, newContent);
|
|
639
|
+
const result = getIncrementalDelta(lastFullContent, newContent, contentLength, contentSuffix);
|
|
566
640
|
vStr = result.delta;
|
|
567
641
|
if (vStr) {
|
|
568
642
|
lastFullContent = result.matchedContent;
|
|
643
|
+
contentLength = result.contentLength;
|
|
644
|
+
contentSuffix = result.contentSuffix;
|
|
569
645
|
foundStr = true;
|
|
570
646
|
}
|
|
571
647
|
}
|
|
@@ -577,24 +653,12 @@ export async function chatCompletions(c: Context) {
|
|
|
577
653
|
|
|
578
654
|
if (isThinkingChunk) {
|
|
579
655
|
reasoningBuffer += vStr;
|
|
580
|
-
|
|
581
|
-
id: completionId,
|
|
582
|
-
object: 'chat.completion.chunk',
|
|
583
|
-
created: createdTimestamp,
|
|
584
|
-
model: body.model,
|
|
585
|
-
choices: [makeChoice({ reasoning_content: vStr })]
|
|
586
|
-
})}\n\n`);
|
|
656
|
+
fastWriteReasoning(vStr);
|
|
587
657
|
} else {
|
|
588
658
|
if (hasTools && toolParser) {
|
|
589
659
|
const { text, toolCalls } = toolParser.feed(vStr);
|
|
590
660
|
if (text) {
|
|
591
|
-
|
|
592
|
-
id: completionId,
|
|
593
|
-
object: 'chat.completion.chunk',
|
|
594
|
-
created: createdTimestamp,
|
|
595
|
-
model: body.model,
|
|
596
|
-
choices: [makeChoice({ content: text })]
|
|
597
|
-
})}\n\n`);
|
|
661
|
+
fastWriteContent(text);
|
|
598
662
|
}
|
|
599
663
|
for (const tc of toolCalls) {
|
|
600
664
|
streamWriter.write(`data: ${JSON.stringify({
|
|
@@ -617,13 +681,7 @@ export async function chatCompletions(c: Context) {
|
|
|
617
681
|
}
|
|
618
682
|
} else {
|
|
619
683
|
if (vStr) {
|
|
620
|
-
|
|
621
|
-
id: completionId,
|
|
622
|
-
object: 'chat.completion.chunk',
|
|
623
|
-
created: createdTimestamp,
|
|
624
|
-
model: body.model,
|
|
625
|
-
choices: [makeChoice({ content: vStr })]
|
|
626
|
-
})}\n\n`);
|
|
684
|
+
fastWriteContent(vStr);
|
|
627
685
|
}
|
|
628
686
|
}
|
|
629
687
|
}
|
|
@@ -633,16 +691,11 @@ export async function chatCompletions(c: Context) {
|
|
|
633
691
|
}
|
|
634
692
|
}
|
|
635
693
|
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
694
|
+
if (bufferOffset > 0) {
|
|
695
|
+
buffer = buffer.slice(bufferOffset);
|
|
696
|
+
bufferOffset = 0;
|
|
639
697
|
}
|
|
640
698
|
|
|
641
|
-
// Periodic yielding to prevent event loop starvation
|
|
642
|
-
chunkCount++;
|
|
643
|
-
if (chunkCount % 100 === 0) {
|
|
644
|
-
await new Promise(r => setImmediate(r));
|
|
645
|
-
}
|
|
646
699
|
}
|
|
647
700
|
|
|
648
701
|
const upstreamError = parseQwenErrorPayload(buffer);
|
|
@@ -775,7 +828,7 @@ export async function chatCompletionsStop(c: Context) {
|
|
|
775
828
|
'Sec-Fetch-Mode': 'cors',
|
|
776
829
|
'Sec-Fetch-Site': 'same-origin',
|
|
777
830
|
'User-Agent': stream.headers['user-agent'],
|
|
778
|
-
'X-Request-Id':
|
|
831
|
+
'X-Request-Id': crypto.randomUUID(),
|
|
779
832
|
'bx-ua': stream.headers['bx-ua'],
|
|
780
833
|
'bx-umidtoken': stream.headers['bx-umidtoken'],
|
|
781
834
|
'bx-v': stream.headers['bx-v'],
|
package/src/routes/upload.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
import { Context } from "hono";
|
|
8
8
|
import { getQwenHeaders } from "../services/playwright.ts";
|
|
9
|
-
import
|
|
9
|
+
import crypto from "crypto";
|
|
10
10
|
|
|
11
11
|
interface STSResponse {
|
|
12
12
|
success: boolean;
|
|
@@ -46,7 +46,7 @@ async function getSTSToken(
|
|
|
46
46
|
Origin: "https://chat.qwen.ai",
|
|
47
47
|
Referer: "https://chat.qwen.ai/",
|
|
48
48
|
"User-Agent": headers["user-agent"],
|
|
49
|
-
"X-Request-Id":
|
|
49
|
+
"X-Request-Id": crypto.randomUUID(),
|
|
50
50
|
"bx-ua": headers["bx-ua"],
|
|
51
51
|
"bx-umidtoken": headers["bx-umidtoken"],
|
|
52
52
|
"bx-v": headers["bx-v"],
|
|
@@ -723,11 +723,11 @@ export async function processImagesForQwen(
|
|
|
723
723
|
greenNet: "success",
|
|
724
724
|
size: fileSize,
|
|
725
725
|
error: "",
|
|
726
|
-
itemId:
|
|
726
|
+
itemId: crypto.randomUUID(),
|
|
727
727
|
file_type: typeInfo.mime,
|
|
728
728
|
showType: typeInfo.showType,
|
|
729
729
|
file_class: typeInfo.fileClass,
|
|
730
|
-
uploadTaskId:
|
|
730
|
+
uploadTaskId: crypto.randomUUID(),
|
|
731
731
|
});
|
|
732
732
|
}
|
|
733
733
|
}
|
|
@@ -29,6 +29,7 @@ interface AccountHeaderCache {
|
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
const accountHeaderCaches = new Map<string, AccountHeaderCache>();
|
|
32
|
+
const cachedUserAgents = new Map<string, string>();
|
|
32
33
|
|
|
33
34
|
function getAccountHeaderCache(accountId: string): AccountHeaderCache {
|
|
34
35
|
let cache = accountHeaderCaches.get(accountId);
|