@pedrofariasx/qwenproxy 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cache/memory-cache.ts +50 -17
- package/src/routes/chat.ts +10 -3
- package/src/services/qwen.ts +16 -4
- package/src/utils/context-truncation.ts +37 -6
package/package.json
CHANGED
|
@@ -19,16 +19,34 @@ export class MemoryCache {
|
|
|
19
19
|
private defaultTTL: number
|
|
20
20
|
private prefix: string
|
|
21
21
|
private cleanupInterval: NodeJS.Timeout | null
|
|
22
|
+
private maxEntries: number
|
|
23
|
+
private totalBytes: number
|
|
22
24
|
|
|
23
|
-
constructor(options?: { prefix?: string; defaultTTL?: number }) {
|
|
25
|
+
constructor(options?: { prefix?: string; defaultTTL?: number; maxEntries?: number }) {
|
|
24
26
|
this.prefix = options?.prefix || 'qwenproxy:'
|
|
25
27
|
this.defaultTTL = options?.defaultTTL || config.cache.defaultTTL
|
|
28
|
+
this.maxEntries = options?.maxEntries || 10000
|
|
26
29
|
this.store = new Map()
|
|
30
|
+
this.totalBytes = 0
|
|
27
31
|
this.cleanupInterval = null
|
|
28
32
|
|
|
29
33
|
this.startCleanup()
|
|
30
34
|
}
|
|
31
35
|
|
|
36
|
+
private entryByteSize(key: string, value: any): number {
|
|
37
|
+
return Buffer.byteLength(key) + Buffer.byteLength(JSON.stringify(value))
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
private evictLRU(): void {
|
|
41
|
+
const oldest = this.store.keys().next()
|
|
42
|
+
if (!oldest.done) {
|
|
43
|
+
const evicted = this.store.get(oldest.value)
|
|
44
|
+
if (evicted) this.totalBytes -= this.entryByteSize(oldest.value, evicted.value)
|
|
45
|
+
this.store.delete(oldest.value)
|
|
46
|
+
metrics.increment('cache.evicted')
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
32
50
|
private startCleanup(): void {
|
|
33
51
|
this.cleanupInterval = setInterval(() => {
|
|
34
52
|
const now = Date.now()
|
|
@@ -48,11 +66,22 @@ export class MemoryCache {
|
|
|
48
66
|
const serialized = JSON.stringify(value)
|
|
49
67
|
const effectiveTTL = ttl || this.defaultTTL
|
|
50
68
|
const fullKey = this.prefix + key
|
|
69
|
+
const entrySize = this.entryByteSize(fullKey, value)
|
|
70
|
+
|
|
71
|
+
if (this.store.has(fullKey)) {
|
|
72
|
+
const oldEntry = this.store.get(fullKey)
|
|
73
|
+
if (oldEntry) this.totalBytes -= this.entryByteSize(fullKey, oldEntry.value)
|
|
74
|
+
} else {
|
|
75
|
+
while (this.store.size >= this.maxEntries) {
|
|
76
|
+
this.evictLRU()
|
|
77
|
+
}
|
|
78
|
+
}
|
|
51
79
|
|
|
52
80
|
this.store.set(fullKey, {
|
|
53
81
|
value,
|
|
54
82
|
expiresAt: Date.now() + (effectiveTTL * 1000)
|
|
55
83
|
})
|
|
84
|
+
this.totalBytes += entrySize
|
|
56
85
|
|
|
57
86
|
metrics.increment('cache.set')
|
|
58
87
|
metrics.histogram('cache.value.size', Buffer.byteLength(serialized))
|
|
@@ -66,26 +95,39 @@ export class MemoryCache {
|
|
|
66
95
|
metrics.histogram('cache.get.latency', Date.now() - start)
|
|
67
96
|
|
|
68
97
|
if (!entry || entry.expiresAt <= Date.now()) {
|
|
69
|
-
if (entry)
|
|
98
|
+
if (entry) {
|
|
99
|
+
this.totalBytes -= this.entryByteSize(fullKey, entry.value)
|
|
100
|
+
this.store.delete(fullKey)
|
|
101
|
+
}
|
|
70
102
|
metrics.increment('cache.miss')
|
|
71
103
|
return null
|
|
72
104
|
}
|
|
73
105
|
|
|
106
|
+
this.store.delete(fullKey)
|
|
107
|
+
this.store.set(fullKey, entry)
|
|
108
|
+
|
|
74
109
|
metrics.increment('cache.hit')
|
|
75
110
|
return entry.value as T
|
|
76
111
|
}
|
|
77
112
|
|
|
78
113
|
async delete(key: CacheKey): Promise<void> {
|
|
79
114
|
const fullKey = this.prefix + key
|
|
80
|
-
this.store.
|
|
81
|
-
|
|
115
|
+
const entry = this.store.get(fullKey)
|
|
116
|
+
if (entry) {
|
|
117
|
+
this.totalBytes -= this.entryByteSize(fullKey, entry.value)
|
|
118
|
+
this.store.delete(fullKey)
|
|
119
|
+
metrics.increment('cache.deleted')
|
|
120
|
+
}
|
|
82
121
|
}
|
|
83
122
|
|
|
84
123
|
async exists(key: CacheKey): Promise<boolean> {
|
|
85
124
|
const fullKey = this.prefix + key
|
|
86
125
|
const entry = this.store.get(fullKey)
|
|
87
126
|
if (!entry || entry.expiresAt <= Date.now()) {
|
|
88
|
-
if (entry)
|
|
127
|
+
if (entry) {
|
|
128
|
+
this.totalBytes -= this.entryByteSize(fullKey, entry.value)
|
|
129
|
+
this.store.delete(fullKey)
|
|
130
|
+
}
|
|
89
131
|
return false
|
|
90
132
|
}
|
|
91
133
|
return true
|
|
@@ -157,20 +199,10 @@ export class MemoryCache {
|
|
|
157
199
|
keysCount?: number
|
|
158
200
|
memoryUsage?: string
|
|
159
201
|
}> {
|
|
160
|
-
const now = Date.now()
|
|
161
|
-
let validKeys = 0
|
|
162
|
-
let totalBytes = 0
|
|
163
|
-
for (const [key, entry] of this.store.entries()) {
|
|
164
|
-
if (entry.expiresAt > now) {
|
|
165
|
-
validKeys++
|
|
166
|
-
totalBytes += Buffer.byteLength(JSON.stringify(entry.value)) + Buffer.byteLength(key)
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
202
|
return {
|
|
171
203
|
connected: true,
|
|
172
|
-
keysCount:
|
|
173
|
-
memoryUsage: `${(totalBytes / 1024).toFixed(2)}KB`
|
|
204
|
+
keysCount: this.store.size,
|
|
205
|
+
memoryUsage: `${(this.totalBytes / 1024).toFixed(2)}KB`
|
|
174
206
|
}
|
|
175
207
|
}
|
|
176
208
|
|
|
@@ -180,6 +212,7 @@ export class MemoryCache {
|
|
|
180
212
|
this.cleanupInterval = null
|
|
181
213
|
}
|
|
182
214
|
this.store.clear()
|
|
215
|
+
this.totalBytes = 0
|
|
183
216
|
}
|
|
184
217
|
}
|
|
185
218
|
|
package/src/routes/chat.ts
CHANGED
|
@@ -209,7 +209,7 @@ export async function chatCompletions(c: Context) {
|
|
|
209
209
|
});
|
|
210
210
|
const toolsJson = JSON.stringify(formattedTools, null, 2);
|
|
211
211
|
|
|
212
|
-
systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in
|
|
212
|
+
systemPrompt += `\n\n# TOOLS AVAILABLE\nYou have access to the following tools:\n${toolsJson}\n\n# TOOL CALLING FORMAT (MANDATORY)\nTo use a tool, you MUST output a JSON object wrapped EXACTLY in <tool_call> tags:\n\n<tool_call>\n{"name": "tool_name", "arguments": {"param_name": "value"}}\n</tool_call>\n\nEXAMPLE OF MULTIPLE TOOL CALLS:\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file1.txt"}}\n</tool_call>\n<tool_call>\n{"name": "read_file", "arguments": {"path": "file2.txt"}}\n</tool_call>\n\nCRITICAL RULES:\n1. ONLY use the tags above for tool calling. NEVER output raw JSON without tags.\n2. You can call multiple tools by outputting multiple <tool_call> blocks consecutively.\n3. Do NOT output any other text (explanations, chat, etc.) after your <tool_call> blocks. Wait for the user to provide the tool response.\n4. The JSON inside the tags MUST be valid and include ALL required braces and the "arguments" field.\n5. If you need to use a tool, do it IMMEDIATELY without preamble.\n6. NEVER invent, guess, or hallucinate tool names. You MUST ONLY use the exact tool names provided in the 'TOOLS AVAILABLE' list above. Calling an unlisted tool will result in a hard execution error.\n\n`;
|
|
213
213
|
|
|
214
214
|
if (bodyAny.tool_choice && typeof bodyAny.tool_choice === 'object' && bodyAny.tool_choice.function) {
|
|
215
215
|
const forcedTool = bodyAny.tool_choice.function.name;
|
|
@@ -220,15 +220,22 @@ export async function chatCompletions(c: Context) {
|
|
|
220
220
|
const modelId = body.model.replace('-no-thinking', '');
|
|
221
221
|
const modelContextWindow = getModelContextWindow(modelId)
|
|
222
222
|
const estimatedTokens = estimateTokenCount(systemPrompt + prompt);
|
|
223
|
+
const hasTools = Array.isArray(bodyAny.tools) && bodyAny.tools.length > 0;
|
|
223
224
|
|
|
224
225
|
let finalPrompt: string;
|
|
225
226
|
if (estimatedTokens > modelContextWindow - 1000) {
|
|
226
227
|
const truncated = truncateMessages(messages, modelContextWindow, systemPrompt);
|
|
227
|
-
|
|
228
|
+
const truncatedBody = truncated.map(m => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`).join('\n\n');
|
|
229
|
+
finalPrompt = systemPrompt ? `${systemPrompt}\n\n${truncatedBody}` : truncatedBody;
|
|
228
230
|
} else {
|
|
229
231
|
finalPrompt = systemPrompt ? `${systemPrompt}\n${prompt}` : prompt;
|
|
230
232
|
}
|
|
231
233
|
|
|
234
|
+
// Reforço de instrução de tool call para contextos longos (mitiga "Lost in the Middle")
|
|
235
|
+
if (hasTools && estimatedTokens > 15000) {
|
|
236
|
+
finalPrompt += '\n\n[CRITICAL REMINDER: You MUST use the exact <tool_call> JSON format specified in the system instructions. Do not hallucinate tool names or output raw JSON without the tags.]';
|
|
237
|
+
}
|
|
238
|
+
|
|
232
239
|
const isThinkingModel = !body.model.includes('no-thinking');
|
|
233
240
|
|
|
234
241
|
// A session is new if it doesn't have any assistant messages yet.
|
|
@@ -641,7 +648,7 @@ export async function chatCompletions(c: Context) {
|
|
|
641
648
|
// Periodic yielding to prevent event loop starvation
|
|
642
649
|
chunkCount++;
|
|
643
650
|
if (chunkCount % 100 === 0) {
|
|
644
|
-
await new Promise(r =>
|
|
651
|
+
await new Promise(r => setTimeout(r, 0));
|
|
645
652
|
}
|
|
646
653
|
}
|
|
647
654
|
|
package/src/services/qwen.ts
CHANGED
|
@@ -66,6 +66,9 @@ interface WarmPoolEntry {
|
|
|
66
66
|
const warmPool: Map<string, WarmPoolEntry[]> = (globalThis as any)._warmPool || new Map();
|
|
67
67
|
(globalThis as any)._warmPool = warmPool;
|
|
68
68
|
|
|
69
|
+
const refillPromises: Map<string, Promise<void>> = (globalThis as any)._refillPromises || new Map();
|
|
70
|
+
(globalThis as any)._refillPromises = refillPromises;
|
|
71
|
+
|
|
69
72
|
const WARM_POOL_SIZE = 5;
|
|
70
73
|
const WARM_POOL_TTL_MS = 10 * 60 * 1000;
|
|
71
74
|
|
|
@@ -128,15 +131,21 @@ async function refillPoolForAccount(accountId: string) {
|
|
|
128
131
|
if (!pool) { pool = []; warmPool.set(accountId, pool); }
|
|
129
132
|
cleanupStalePool(accountId);
|
|
130
133
|
const need = Math.max(0, WARM_POOL_SIZE - pool.length);
|
|
131
|
-
|
|
134
|
+
|
|
135
|
+
const creationPromises = Array.from({ length: need }, async () => {
|
|
132
136
|
try {
|
|
133
137
|
const headers = await getBasicQwenHeaders(accountId === 'global' ? undefined : accountId);
|
|
134
138
|
const chatId = await createRealQwenChat(headers);
|
|
135
|
-
|
|
139
|
+
return { chatId, headers, accountId, timestamp: Date.now() };
|
|
136
140
|
} catch (err) {
|
|
137
141
|
console.error(`[WarmPool] refill failed for ${accountId}:`, (err as Error).message);
|
|
138
|
-
|
|
142
|
+
return null;
|
|
139
143
|
}
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
const results = await Promise.all(creationPromises);
|
|
147
|
+
for (const entry of results) {
|
|
148
|
+
if (entry) pool.push(entry);
|
|
140
149
|
}
|
|
141
150
|
}
|
|
142
151
|
|
|
@@ -146,7 +155,10 @@ export async function getWarmedChat(accountId?: string) {
|
|
|
146
155
|
if (!pool) { pool = []; warmPool.set(key, pool); }
|
|
147
156
|
cleanupStalePool(key);
|
|
148
157
|
if (pool.length === 0) {
|
|
149
|
-
|
|
158
|
+
if (!refillPromises.has(key)) {
|
|
159
|
+
refillPromises.set(key, refillPoolForAccount(key).finally(() => refillPromises.delete(key)));
|
|
160
|
+
}
|
|
161
|
+
await refillPromises.get(key);
|
|
150
162
|
}
|
|
151
163
|
if (pool.length === 0) throw new Error(`Warm pool empty for ${key}`);
|
|
152
164
|
return pool.shift()!;
|
|
@@ -4,7 +4,35 @@ export interface TruncatedMessage {
|
|
|
4
4
|
}
|
|
5
5
|
|
|
6
6
|
export function estimateTokenCount(text: string): number {
|
|
7
|
-
|
|
7
|
+
// Divisor conservador (2.5) para evitar estouro silencioso do context window.
|
|
8
|
+
// Tokenizers modernos (como o do Qwen) usam ~1.5 a 2.5 caracteres por token
|
|
9
|
+
// para textos mistos (português, código, caracteres especiais).
|
|
10
|
+
return Math.ceil(text.length / 2.5);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function truncateSemantically(content: string, maxChars: number): string {
|
|
14
|
+
if (content.length <= maxChars) return content;
|
|
15
|
+
|
|
16
|
+
const truncated = content.slice(0, maxChars);
|
|
17
|
+
|
|
18
|
+
if (truncated.trimStart().startsWith('{') || truncated.trimStart().startsWith('[')) {
|
|
19
|
+
const lastBrace = Math.max(truncated.lastIndexOf('}'), truncated.lastIndexOf(']'));
|
|
20
|
+
if (lastBrace > maxChars * 0.7) {
|
|
21
|
+
return truncated.slice(0, lastBrace + 1) + ' /* truncated */';
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const lastNewline = truncated.lastIndexOf('\n');
|
|
26
|
+
if (lastNewline > maxChars * 0.8) {
|
|
27
|
+
return truncated.slice(0, lastNewline) + '\n[Truncated]';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const lastSpace = truncated.lastIndexOf(' ');
|
|
31
|
+
if (lastSpace > maxChars * 0.9) {
|
|
32
|
+
return truncated.slice(0, lastSpace) + '... [Truncated]';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return truncated + '... [Truncated]';
|
|
8
36
|
}
|
|
9
37
|
|
|
10
38
|
export function truncateMessages(
|
|
@@ -39,13 +67,14 @@ export function truncateMessages(
|
|
|
39
67
|
const msgTokens = estimateTokenCount(msg.content);
|
|
40
68
|
|
|
41
69
|
if (usedTokens + msgTokens <= availableTokens) {
|
|
42
|
-
result.
|
|
70
|
+
result.push(msg);
|
|
43
71
|
usedTokens += msgTokens;
|
|
44
72
|
} else {
|
|
45
73
|
const remainingTokens = availableTokens - usedTokens;
|
|
46
74
|
if (remainingTokens > 100) {
|
|
47
|
-
const
|
|
48
|
-
|
|
75
|
+
const maxChars = Math.floor(remainingTokens * 2.5);
|
|
76
|
+
const truncatedContent = truncateSemantically(msg.content, maxChars);
|
|
77
|
+
result.push({ role: msg.role, content: `[Truncated] ${truncatedContent}` });
|
|
49
78
|
}
|
|
50
79
|
break;
|
|
51
80
|
}
|
|
@@ -53,9 +82,11 @@ export function truncateMessages(
|
|
|
53
82
|
|
|
54
83
|
if (result.length === 0 && normalizedMessages.length > 0) {
|
|
55
84
|
const lastMsg = normalizedMessages[normalizedMessages.length - 1];
|
|
56
|
-
const
|
|
57
|
-
|
|
85
|
+
const maxChars = Math.max(200, Math.floor(availableTokens * 2.5));
|
|
86
|
+
const truncatedContent = truncateSemantically(lastMsg.content, maxChars);
|
|
87
|
+
result.push({ role: lastMsg.role, content: `[Truncated] ${truncatedContent}` });
|
|
58
88
|
}
|
|
59
89
|
|
|
90
|
+
result.reverse();
|
|
60
91
|
return result;
|
|
61
92
|
}
|