@pedrofariasx/qwenproxy 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -13
- package/package.json +1 -1
- package/src/api/server.ts +0 -2
- package/src/cache/memory-cache.ts +52 -18
- package/src/routes/chat.ts +132 -79
- package/src/routes/upload.ts +4 -4
- package/src/services/playwright.ts +1 -0
- package/src/services/qwen.ts +36 -15
- package/src/tools/parser.ts +10 -13
- package/src/utils/context-truncation.ts +36 -10
- package/src/linter/extraction-engine.ts +0 -165
- package/src/linter/index.ts +0 -258
- package/src/linter/repair-normalize.ts +0 -245
- package/src/linter/safety-gate.ts +0 -219
- package/src/linter/streaming-state-machine.ts +0 -252
- package/src/linter/structural-parser.ts +0 -352
- package/src/linter/types.ts +0 -74
- package/src/tests/linter.test.ts +0 -151
- package/src/tests/parallel.test.ts +0 -42
- package/src/tests/structureVerification.test.ts +0 -176
- package/src/tools/ast.ts +0 -15
- package/src/tools/coercion.ts +0 -67
- package/src/tools/confidence.ts +0 -48
- package/src/tools/detector.ts +0 -40
- package/src/tools/executor.ts +0 -236
- package/src/tools/pipeline.ts +0 -122
- package/src/tools/registry-runtime.ts +0 -34
- package/src/tools/repair.ts +0 -42
- package/src/tools/validator.ts +0 -33
package/src/services/qwen.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { getQwenHeaders, getBasicHeaders } from './playwright.ts';
|
|
2
|
-
import
|
|
2
|
+
import crypto from 'crypto';
|
|
3
|
+
|
|
4
|
+
const CACHED_TIMEZONE = new Date().toString().split(' (')[0];
|
|
3
5
|
|
|
4
6
|
export class RetryableQwenStreamError extends Error {
|
|
5
7
|
readonly retryAfterMs: number;
|
|
@@ -66,6 +68,9 @@ interface WarmPoolEntry {
|
|
|
66
68
|
const warmPool: Map<string, WarmPoolEntry[]> = (globalThis as any)._warmPool || new Map();
|
|
67
69
|
(globalThis as any)._warmPool = warmPool;
|
|
68
70
|
|
|
71
|
+
const refillPromises: Map<string, Promise<void>> = (globalThis as any)._refillPromises || new Map();
|
|
72
|
+
(globalThis as any)._refillPromises = refillPromises;
|
|
73
|
+
|
|
69
74
|
const WARM_POOL_SIZE = 5;
|
|
70
75
|
const WARM_POOL_TTL_MS = 10 * 60 * 1000;
|
|
71
76
|
|
|
@@ -79,7 +84,6 @@ function cleanupStalePool(accountId: string) {
|
|
|
79
84
|
}
|
|
80
85
|
|
|
81
86
|
async function getBasicQwenHeaders(accountId?: string): Promise<Record<string, string>> {
|
|
82
|
-
const { getBasicHeaders } = await import('./playwright.ts');
|
|
83
87
|
const { cookie, userAgent, bxV } = await getBasicHeaders(accountId);
|
|
84
88
|
return {
|
|
85
89
|
cookie,
|
|
@@ -101,7 +105,7 @@ async function createRealQwenChat(header: Record<string, string>): Promise<strin
|
|
|
101
105
|
origin: 'https://chat.qwen.ai',
|
|
102
106
|
referer: 'https://chat.qwen.ai/c/new-chat',
|
|
103
107
|
'user-agent': header['user-agent'],
|
|
104
|
-
'x-request-id':
|
|
108
|
+
'x-request-id': crypto.randomUUID(),
|
|
105
109
|
'bx-v': header['bx-v'],
|
|
106
110
|
},
|
|
107
111
|
body: JSON.stringify({
|
|
@@ -128,15 +132,29 @@ async function refillPoolForAccount(accountId: string) {
|
|
|
128
132
|
if (!pool) { pool = []; warmPool.set(accountId, pool); }
|
|
129
133
|
cleanupStalePool(accountId);
|
|
130
134
|
const need = Math.max(0, WARM_POOL_SIZE - pool.length);
|
|
131
|
-
|
|
135
|
+
if (need === 0) return;
|
|
136
|
+
|
|
137
|
+
let headers: Record<string, string>;
|
|
138
|
+
try {
|
|
139
|
+
headers = await getBasicQwenHeaders(accountId === 'global' ? undefined : accountId);
|
|
140
|
+
} catch (err) {
|
|
141
|
+
console.error(`[WarmPool] header fetch failed for ${accountId}:`, (err as Error).message);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const creationPromises = Array.from({ length: need }, async () => {
|
|
132
146
|
try {
|
|
133
|
-
const headers = await getBasicQwenHeaders(accountId === 'global' ? undefined : accountId);
|
|
134
147
|
const chatId = await createRealQwenChat(headers);
|
|
135
|
-
|
|
148
|
+
return { chatId, headers, accountId, timestamp: Date.now() };
|
|
136
149
|
} catch (err) {
|
|
137
|
-
console.error(`[WarmPool]
|
|
138
|
-
|
|
150
|
+
console.error(`[WarmPool] chat creation failed for ${accountId}:`, (err as Error).message);
|
|
151
|
+
return null;
|
|
139
152
|
}
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
const results = await Promise.all(creationPromises);
|
|
156
|
+
for (const entry of results) {
|
|
157
|
+
if (entry) pool.push(entry);
|
|
140
158
|
}
|
|
141
159
|
}
|
|
142
160
|
|
|
@@ -146,7 +164,10 @@ export async function getWarmedChat(accountId?: string) {
|
|
|
146
164
|
if (!pool) { pool = []; warmPool.set(key, pool); }
|
|
147
165
|
cleanupStalePool(key);
|
|
148
166
|
if (pool.length === 0) {
|
|
149
|
-
|
|
167
|
+
if (!refillPromises.has(key)) {
|
|
168
|
+
refillPromises.set(key, refillPoolForAccount(key).finally(() => refillPromises.delete(key)));
|
|
169
|
+
}
|
|
170
|
+
await refillPromises.get(key);
|
|
150
171
|
}
|
|
151
172
|
if (pool.length === 0) throw new Error(`Warm pool empty for ${key}`);
|
|
152
173
|
return pool.shift()!;
|
|
@@ -240,7 +261,7 @@ export async function disableNativeTools(accountId?: string): Promise<void> {
|
|
|
240
261
|
'origin': 'https://chat.qwen.ai',
|
|
241
262
|
'referer': 'https://chat.qwen.ai/',
|
|
242
263
|
'user-agent': headers['user-agent'],
|
|
243
|
-
'x-request-id':
|
|
264
|
+
'x-request-id': crypto.randomUUID(),
|
|
244
265
|
'bx-ua': headers['bx-ua'],
|
|
245
266
|
'bx-umidtoken': headers['bx-umidtoken'],
|
|
246
267
|
'bx-v': headers['bx-v']
|
|
@@ -279,9 +300,9 @@ export async function fetchQwenModels(accountId?: string): Promise<any[]> {
|
|
|
279
300
|
'cookie': cookie,
|
|
280
301
|
'referer': 'https://chat.qwen.ai/',
|
|
281
302
|
'user-agent': userAgent,
|
|
282
|
-
'x-request-id':
|
|
303
|
+
'x-request-id': crypto.randomUUID(),
|
|
283
304
|
'bx-v': bxV,
|
|
284
|
-
'timezone':
|
|
305
|
+
'timezone': CACHED_TIMEZONE,
|
|
285
306
|
'source': 'web'
|
|
286
307
|
}
|
|
287
308
|
});
|
|
@@ -385,7 +406,7 @@ export async function createQwenStream(
|
|
|
385
406
|
}
|
|
386
407
|
|
|
387
408
|
const timestamp = Math.floor(Date.now() / 1000);
|
|
388
|
-
const fid =
|
|
409
|
+
const fid = crypto.randomUUID();
|
|
389
410
|
const model = modelId.replace('-no-thinking', '');
|
|
390
411
|
|
|
391
412
|
const payload: QwenPayload = {
|
|
@@ -444,10 +465,10 @@ export async function createQwenStream(
|
|
|
444
465
|
'sec-fetch-dest': 'empty',
|
|
445
466
|
'sec-fetch-mode': 'cors',
|
|
446
467
|
'sec-fetch-site': 'same-origin',
|
|
447
|
-
'timezone':
|
|
468
|
+
'timezone': CACHED_TIMEZONE,
|
|
448
469
|
'user-agent': chatHeaders['user-agent'],
|
|
449
470
|
'x-accel-buffering': 'no',
|
|
450
|
-
'x-request-id':
|
|
471
|
+
'x-request-id': crypto.randomUUID(),
|
|
451
472
|
'bx-v': chatHeaders['bx-v'],
|
|
452
473
|
},
|
|
453
474
|
body: JSON.stringify(payload),
|
package/src/tools/parser.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Supports both JSON and Hermes-style XML <parameter> formats.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import
|
|
8
|
+
import crypto from 'crypto';
|
|
9
9
|
import { robustParseJSON } from '../utils/json.js';
|
|
10
10
|
import { logger } from '../core/logger.js';
|
|
11
11
|
import type { ParsedToolCall } from './types';
|
|
@@ -183,6 +183,11 @@ export class StreamingToolParser {
|
|
|
183
183
|
|
|
184
184
|
while (this.buffer.length > 0) {
|
|
185
185
|
if (!this.insideTool) {
|
|
186
|
+
if (this.buffer.indexOf('<') === -1) {
|
|
187
|
+
if (this.emittedToolCallCount === 0) result.text += this.buffer;
|
|
188
|
+
this.buffer = '';
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
186
191
|
const match = this.buffer.match(TOOL_OPEN_RE);
|
|
187
192
|
if (match && match.index !== undefined) {
|
|
188
193
|
// Text before the tool call tag
|
|
@@ -276,14 +281,6 @@ export class StreamingToolParser {
|
|
|
276
281
|
return this.insideTool;
|
|
277
282
|
}
|
|
278
283
|
|
|
279
|
-
/**
|
|
280
|
-
* Get any lead-in text that was captured before tool calls.
|
|
281
|
-
* Useful for fallback content when tool calls fail to parse.
|
|
282
|
-
*/
|
|
283
|
-
getPendingLeadIn(): string {
|
|
284
|
-
return this.pendingLeadIn;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
284
|
// ─── Internal Methods ──────────────────────────────────────────────────────
|
|
288
285
|
|
|
289
286
|
private processToolContent(content: string, result: ParserResult): void {
|
|
@@ -302,7 +299,7 @@ export class StreamingToolParser {
|
|
|
302
299
|
const xmlParsed = parseXmlParameterToolCall(t, this.currentOpenTag, this.tools);
|
|
303
300
|
if (xmlParsed) {
|
|
304
301
|
result.toolCalls.push({
|
|
305
|
-
id: `call_${
|
|
302
|
+
id: `call_${crypto.randomUUID()}`,
|
|
306
303
|
name: xmlParsed.name,
|
|
307
304
|
arguments: xmlParsed.arguments,
|
|
308
305
|
});
|
|
@@ -365,7 +362,7 @@ export class StreamingToolParser {
|
|
|
365
362
|
const xmlParsed = parseXmlParameterToolCall(block, this.currentOpenTag, this.tools);
|
|
366
363
|
if (xmlParsed) {
|
|
367
364
|
return {
|
|
368
|
-
id: `call_${
|
|
365
|
+
id: `call_${crypto.randomUUID()}`,
|
|
369
366
|
name: xmlParsed.name,
|
|
370
367
|
arguments: xmlParsed.arguments,
|
|
371
368
|
};
|
|
@@ -375,7 +372,7 @@ export class StreamingToolParser {
|
|
|
375
372
|
const recovered = parseRecoverableXmlToolCall(block, this.currentOpenTag, this.tools);
|
|
376
373
|
if (recovered) {
|
|
377
374
|
return {
|
|
378
|
-
id: `call_${
|
|
375
|
+
id: `call_${crypto.randomUUID()}`,
|
|
379
376
|
name: recovered.name,
|
|
380
377
|
arguments: recovered.arguments,
|
|
381
378
|
};
|
|
@@ -438,7 +435,7 @@ export class StreamingToolParser {
|
|
|
438
435
|
if (typeof args !== 'object' || args === null) args = {};
|
|
439
436
|
|
|
440
437
|
return {
|
|
441
|
-
id: parsed.id || parsed.tool_call_id || `call_${
|
|
438
|
+
id: parsed.id || parsed.tool_call_id || `call_${crypto.randomUUID()}`,
|
|
442
439
|
name,
|
|
443
440
|
arguments: args,
|
|
444
441
|
};
|
|
@@ -1,10 +1,33 @@
|
|
|
1
|
-
export
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
export function estimateTokenCount(text: string): number {
|
|
2
|
+
// Divisor conservador (2.5) para evitar estouro silencioso do context window.
|
|
3
|
+
// Tokenizers modernos (como o do Qwen) usam ~1.5 a 2.5 caracteres por token
|
|
4
|
+
// para textos mistos (português, código, caracteres especiais).
|
|
5
|
+
return Math.ceil(text.length / 2.5);
|
|
4
6
|
}
|
|
5
7
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
+
function truncateSemantically(content: string, maxChars: number): string {
|
|
9
|
+
if (content.length <= maxChars) return content;
|
|
10
|
+
|
|
11
|
+
const truncated = content.slice(0, maxChars);
|
|
12
|
+
|
|
13
|
+
if (truncated.trimStart().startsWith('{') || truncated.trimStart().startsWith('[')) {
|
|
14
|
+
const lastBrace = Math.max(truncated.lastIndexOf('}'), truncated.lastIndexOf(']'));
|
|
15
|
+
if (lastBrace > maxChars * 0.7) {
|
|
16
|
+
return truncated.slice(0, lastBrace + 1) + ' /* truncated */';
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const lastNewline = truncated.lastIndexOf('\n');
|
|
21
|
+
if (lastNewline > maxChars * 0.8) {
|
|
22
|
+
return truncated.slice(0, lastNewline) + '\n[Truncated]';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const lastSpace = truncated.lastIndexOf(' ');
|
|
26
|
+
if (lastSpace > maxChars * 0.9) {
|
|
27
|
+
return truncated.slice(0, lastSpace) + '... [Truncated]';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return truncated + '... [Truncated]';
|
|
8
31
|
}
|
|
9
32
|
|
|
10
33
|
export function truncateMessages(
|
|
@@ -39,13 +62,14 @@ export function truncateMessages(
|
|
|
39
62
|
const msgTokens = estimateTokenCount(msg.content);
|
|
40
63
|
|
|
41
64
|
if (usedTokens + msgTokens <= availableTokens) {
|
|
42
|
-
result.
|
|
65
|
+
result.push(msg);
|
|
43
66
|
usedTokens += msgTokens;
|
|
44
67
|
} else {
|
|
45
68
|
const remainingTokens = availableTokens - usedTokens;
|
|
46
69
|
if (remainingTokens > 100) {
|
|
47
|
-
const
|
|
48
|
-
|
|
70
|
+
const maxChars = Math.floor(remainingTokens * 2.5);
|
|
71
|
+
const truncatedContent = truncateSemantically(msg.content, maxChars);
|
|
72
|
+
result.push({ role: msg.role, content: `[Truncated] ${truncatedContent}` });
|
|
49
73
|
}
|
|
50
74
|
break;
|
|
51
75
|
}
|
|
@@ -53,9 +77,11 @@ export function truncateMessages(
|
|
|
53
77
|
|
|
54
78
|
if (result.length === 0 && normalizedMessages.length > 0) {
|
|
55
79
|
const lastMsg = normalizedMessages[normalizedMessages.length - 1];
|
|
56
|
-
const
|
|
57
|
-
|
|
80
|
+
const maxChars = Math.max(200, Math.floor(availableTokens * 2.5));
|
|
81
|
+
const truncatedContent = truncateSemantically(lastMsg.content, maxChars);
|
|
82
|
+
result.push({ role: lastMsg.role, content: `[Truncated] ${truncatedContent}` });
|
|
58
83
|
}
|
|
59
84
|
|
|
85
|
+
result.reverse();
|
|
60
86
|
return result;
|
|
61
87
|
}
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Layer 3: Tool Extraction Engine (Multi-Format)
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import type { ToolCallSource, RawToolCandidate, SecurityViolation } from './types'
|
|
6
|
-
import { StructuralParser } from './structural-parser'
|
|
7
|
-
|
|
8
|
-
export interface ExtractionResult {
|
|
9
|
-
candidates: RawToolCandidate[]
|
|
10
|
-
sourceHint: ToolCallSource
|
|
11
|
-
extractionErrors: string[]
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export class ToolExtractionEngine {
|
|
15
|
-
private parser: StructuralParser
|
|
16
|
-
|
|
17
|
-
constructor() {
|
|
18
|
-
this.parser = new StructuralParser()
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
extract(input: string): ExtractionResult {
|
|
22
|
-
const candidates: RawToolCandidate[] = []
|
|
23
|
-
const errors: string[] = []
|
|
24
|
-
let sourceHint: ToolCallSource = this.detectSourceHint(input)
|
|
25
|
-
|
|
26
|
-
const jsonCandidates = this.extractJsonObjects(input)
|
|
27
|
-
for (const candidate of jsonCandidates) {
|
|
28
|
-
const parsed = this.tryParseJson(candidate.raw)
|
|
29
|
-
if (parsed) {
|
|
30
|
-
candidates.push({
|
|
31
|
-
source: candidate.sourceHint,
|
|
32
|
-
raw: parsed,
|
|
33
|
-
rawString: candidate.raw,
|
|
34
|
-
confidence: this.calculateJsonConfidence(parsed, candidate.sourceHint),
|
|
35
|
-
})
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
if (candidates.length === 0) {
|
|
40
|
-
const reactCandidates = this.extractReAct(input)
|
|
41
|
-
candidates.push(
|
|
42
|
-
...reactCandidates.map(c => ({
|
|
43
|
-
source: 'react' as ToolCallSource,
|
|
44
|
-
raw: c.raw,
|
|
45
|
-
rawString: c.rawString,
|
|
46
|
-
confidence: 0.8,
|
|
47
|
-
}))
|
|
48
|
-
)
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
if (candidates.length === 0) {
|
|
52
|
-
const gemini = this.extractGemini(input)
|
|
53
|
-
if (gemini) {
|
|
54
|
-
candidates.push({
|
|
55
|
-
source: 'gemini' as ToolCallSource,
|
|
56
|
-
raw: gemini,
|
|
57
|
-
rawString: JSON.stringify(gemini),
|
|
58
|
-
confidence: 0.85,
|
|
59
|
-
})
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
return { candidates, sourceHint, extractionErrors: errors }
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
private extractJsonObjects(text: string): Array<{ raw: string; sourceHint: ToolCallSource }> {
|
|
67
|
-
const results: Array<{ raw: string; sourceHint: ToolCallSource }> = []
|
|
68
|
-
const markupCleaned = this.stripMarkup(text)
|
|
69
|
-
const jsonSpans = StructuralParser.extractJsonFromText(markupCleaned)
|
|
70
|
-
|
|
71
|
-
for (const span of jsonSpans) {
|
|
72
|
-
const sourceHint = this.detectSourceHint(span)
|
|
73
|
-
results.push({ raw: span, sourceHint })
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
if (results.length === 0) {
|
|
77
|
-
const funcMatch = markupCleaned.match(/call_function\s*\(\s*'(\w+)'\s*,\s*(\{[\s\S]*\})\s*\)/i)
|
|
78
|
-
if (funcMatch) {
|
|
79
|
-
results.push({ raw: `{"name":"${funcMatch[1]}","arguments":${funcMatch[2]}}`, sourceHint: 'openai' })
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return results
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
private extractReAct(text: string): Array<{ raw: Record<string, unknown>; rawString: string }> {
|
|
87
|
-
const results: Array<{ raw: Record<string, unknown>; rawString: string }> = []
|
|
88
|
-
const patterns = [
|
|
89
|
-
/Action:\s*(\w+)\s*\n?\s*Action Input:\s*(\{[\s\S]*?)(?=\n\s*\n|\n\s*Observation|\n\s*Final Answer|$)/i,
|
|
90
|
-
/Action:\s*(\w+)\s+Action Input:\s*(\{[\s\S]*)/i,
|
|
91
|
-
/\*\*Action\*\*:\s*(\w+)\s*\n?\s*\*\*Action Input\*\*:\s*(\{[\s\S]*?)(?=\n\s*\n|\n\s*Observation|\n\s*Final Answer|$)/i,
|
|
92
|
-
]
|
|
93
|
-
|
|
94
|
-
for (const pattern of patterns) {
|
|
95
|
-
const match = text.match(pattern)
|
|
96
|
-
if (match) {
|
|
97
|
-
let actionInputStr = match[2].trim()
|
|
98
|
-
actionInputStr = actionInputStr.replace(/\n\s*Observation.*$/is, '').trim()
|
|
99
|
-
results.push({
|
|
100
|
-
raw: { name: match[1], arguments: this.safeParse(actionInputStr) || {} },
|
|
101
|
-
rawString: match[0],
|
|
102
|
-
})
|
|
103
|
-
break
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
return results
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
private extractGemini(text: string): Record<string, unknown> | null {
|
|
111
|
-
const patterns = [
|
|
112
|
-
/functionCall\s*[:=]\s*\{\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*args\s*[:=]\s*(\{[\s\S]*\})\s*\}/i,
|
|
113
|
-
/"functionCall"\s*:\s*\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"args"\s*:\s*(\{[\s\S]*?\})\s*\}/i,
|
|
114
|
-
]
|
|
115
|
-
for (const pattern of patterns) {
|
|
116
|
-
const match = text.match(pattern)
|
|
117
|
-
if (match) return { name: match[1], arguments: this.safeParse(match[2]) || {} }
|
|
118
|
-
}
|
|
119
|
-
return null
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
private stripMarkup(text: string): string {
|
|
123
|
-
return text
|
|
124
|
-
.replace(/```json\s*/gi, '')
|
|
125
|
-
.replace(/```\s*/gi, '')
|
|
126
|
-
.replace(/<function_calls>/gi, '')
|
|
127
|
-
.replace(/<\/function_calls>/gi, '')
|
|
128
|
-
.trim()
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
private detectSourceHint(text: string): ToolCallSource {
|
|
132
|
-
const t = this.stripMarkup(text)
|
|
133
|
-
if (/\btool_use\b/.test(t) || /type:\s*['"]?tool_use/i.test(t)) return 'claude'
|
|
134
|
-
if (/\bfunctionCall\b/.test(t) || /"functionCall"/.test(t)) return 'gemini'
|
|
135
|
-
if (/\barguments\b/.test(t) && /\bname\b/.test(t)) return 'openai'
|
|
136
|
-
if (/Action:\s*\w+\s*\n?Action Input:/i.test(t)) return 'react'
|
|
137
|
-
return 'unknown'
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
private tryParseJson(raw: string): Record<string, unknown> | null {
|
|
141
|
-
try { return JSON.parse(raw) } catch {
|
|
142
|
-
const result = this.parser.parse(raw, 'unknown')
|
|
143
|
-
return (result.ast?.value as Record<string, unknown>) ?? null
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
private safeParse(str: string): Record<string, unknown> | null {
|
|
148
|
-
try { return JSON.parse(str) } catch {
|
|
149
|
-
const result = this.parser.parse(str, 'unknown')
|
|
150
|
-
return (result.ast?.value as Record<string, unknown>) ?? null
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
private calculateJsonConfidence(parsed: Record<string, unknown>, source: ToolCallSource): number {
|
|
155
|
-
let conf = 0.5
|
|
156
|
-
if (parsed.name) conf += 0.15
|
|
157
|
-
if (parsed.arguments && typeof parsed.arguments === 'object') conf += 0.15
|
|
158
|
-
if (parsed.input && typeof parsed.input === 'object') conf += 0.1
|
|
159
|
-
if (parsed.tool) conf += 0.1
|
|
160
|
-
if (parsed.args && typeof parsed.args === 'object') conf += 0.1
|
|
161
|
-
if (typeof parsed.arguments === 'string') conf -= 0.2
|
|
162
|
-
if (typeof parsed.input === 'string') conf -= 0.2
|
|
163
|
-
return Math.min(0.95, Math.max(0.3, conf))
|
|
164
|
-
}
|
|
165
|
-
}
|
package/src/linter/index.ts
DELETED
|
@@ -1,258 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* UltraToolCallLinter v1.0
|
|
3
|
-
* Main public API: composable 5-layer pipeline
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import type {
|
|
7
|
-
CanonicalToolCall,
|
|
8
|
-
ParserState,
|
|
9
|
-
RawToolCandidate,
|
|
10
|
-
ParseResult,
|
|
11
|
-
ToolCallSource,
|
|
12
|
-
SecurityViolation,
|
|
13
|
-
ToolDefinition,
|
|
14
|
-
ToolRegistry,
|
|
15
|
-
} from './types'
|
|
16
|
-
|
|
17
|
-
import { StreamingStateMachine } from './streaming-state-machine'
|
|
18
|
-
import { StructuralParser } from './structural-parser'
|
|
19
|
-
import { ToolExtractionEngine } from './extraction-engine'
|
|
20
|
-
import { GrammarRepairEngine, NormalizationEngine } from './repair-normalize'
|
|
21
|
-
import { SafetyGate } from './safety-gate'
|
|
22
|
-
|
|
23
|
-
export interface LinterConfig {
|
|
24
|
-
registry?: ToolRegistry
|
|
25
|
-
strictMode?: boolean
|
|
26
|
-
enableSecurityGate?: boolean
|
|
27
|
-
maxRecoveryAttempts?: number
|
|
28
|
-
minConfidenceThreshold?: number
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export class UltraToolCallLinter {
|
|
32
|
-
private readonly streaming = new StreamingStateMachine()
|
|
33
|
-
private readonly extractor = new ToolExtractionEngine()
|
|
34
|
-
private readonly repairer = new GrammarRepairEngine()
|
|
35
|
-
private readonly normalizer = new NormalizationEngine()
|
|
36
|
-
private readonly gate = new SafetyGate()
|
|
37
|
-
|
|
38
|
-
private registry: ToolRegistry = {}
|
|
39
|
-
private strictMode: boolean = false
|
|
40
|
-
private enableSecurityGate: boolean = true
|
|
41
|
-
private maxRecoveryAttempts: number = 3
|
|
42
|
-
private minConfidenceThreshold: number = 0.3
|
|
43
|
-
|
|
44
|
-
constructor(config: LinterConfig = {}) {
|
|
45
|
-
if (config.registry) this.registry = config.registry
|
|
46
|
-
if (config.strictMode !== undefined) this.strictMode = config.strictMode
|
|
47
|
-
if (config.enableSecurityGate !== undefined) this.enableSecurityGate = config.enableSecurityGate
|
|
48
|
-
if (config.maxRecoveryAttempts !== undefined) this.maxRecoveryAttempts = config.maxRecoveryAttempts
|
|
49
|
-
if (config.minConfidenceThreshold !== undefined) this.minConfidenceThreshold = config.minConfidenceThreshold
|
|
50
|
-
|
|
51
|
-
if (this.registry) this.gate.registerRegistry(this.registry)
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
setRegistry(registry: ToolRegistry): void {
|
|
55
|
-
this.registry = registry
|
|
56
|
-
this.gate.registerRegistry(registry)
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
registerTool(name: string, def: ToolDefinition): void {
|
|
60
|
-
this.gate.registerTool(name, def)
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
push(chunk: string): void {
|
|
64
|
-
this.streaming.push(chunk)
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
parse(): ParseResult {
|
|
68
|
-
const buffer = this.streaming.getBuffer()
|
|
69
|
-
const errors: string[] = []
|
|
70
|
-
|
|
71
|
-
const { candidates, extractionErrors } = this.extractor.extract(buffer)
|
|
72
|
-
errors.push(...extractionErrors)
|
|
73
|
-
|
|
74
|
-
const toolCalls: CanonicalToolCall[] = []
|
|
75
|
-
let maxConfidence = 0
|
|
76
|
-
|
|
77
|
-
for (const candidate of candidates) {
|
|
78
|
-
const result = this.processCandidate(candidate, errors)
|
|
79
|
-
const tc: CanonicalToolCall = {
|
|
80
|
-
tool: result.tool,
|
|
81
|
-
input: result.input,
|
|
82
|
-
meta: result.meta ?? { source: candidate.source, confidence: 0, repaired: false },
|
|
83
|
-
}
|
|
84
|
-
if (tc.meta) {
|
|
85
|
-
tc.meta.confidence = result.meta?.confidence ?? 0
|
|
86
|
-
tc.meta.repaired = result.meta?.repaired ?? false
|
|
87
|
-
} else {
|
|
88
|
-
tc.meta = { source: candidate.source, confidence: result.meta?.confidence ?? 0, repaired: result.meta?.repaired ?? false }
|
|
89
|
-
}
|
|
90
|
-
toolCalls.push(tc)
|
|
91
|
-
maxConfidence = Math.max(maxConfidence, tc.meta.confidence)
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
this.streaming.reset()
|
|
95
|
-
|
|
96
|
-
return {
|
|
97
|
-
text: buffer,
|
|
98
|
-
toolCalls,
|
|
99
|
-
errors,
|
|
100
|
-
confidence: maxConfidence,
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
parseText(text: string): ParseResult {
|
|
105
|
-
this.streaming.reset()
|
|
106
|
-
this.streaming.push(text)
|
|
107
|
-
return this.parse()
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
parseObject(name: string, argumentsObj: Record<string, unknown>): ParseResult {
|
|
111
|
-
const candidate: RawToolCandidate = {
|
|
112
|
-
source: 'openai',
|
|
113
|
-
raw: { name, arguments: argumentsObj },
|
|
114
|
-
rawString: JSON.stringify({ name, arguments: argumentsObj }),
|
|
115
|
-
confidence: 1,
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
const errors: string[] = []
|
|
119
|
-
const processingResult = this.processCandidate(candidate, errors)
|
|
120
|
-
|
|
121
|
-
return {
|
|
122
|
-
text: '',
|
|
123
|
-
toolCalls: [processingResult],
|
|
124
|
-
errors,
|
|
125
|
-
confidence: processingResult.meta?.confidence ?? 0,
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
repair(input: string): string {
|
|
130
|
-
const result = this.repairer.repair(input)
|
|
131
|
-
if (!result.repaired) {
|
|
132
|
-
const structural = new StructuralParser()
|
|
133
|
-
const parsed = structural.parse(input, 'unknown')
|
|
134
|
-
if (parsed.ast?.value) return JSON.stringify(parsed.ast.value, null, 0)
|
|
135
|
-
}
|
|
136
|
-
return JSON.stringify(result.value ?? {}, null, 0)
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
extract(input: string): RawToolCandidate[] {
|
|
140
|
-
this.streaming.reset()
|
|
141
|
-
const { candidates } = this.extractor.extract(input)
|
|
142
|
-
return candidates
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
reset(): void {
|
|
146
|
-
this.streaming.reset()
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
getState(): ParserState {
|
|
150
|
-
return this.streaming.getState()
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
public processCandidate(
|
|
154
|
-
candidate: RawToolCandidate,
|
|
155
|
-
errors: string[]
|
|
156
|
-
): CanonicalToolCall {
|
|
157
|
-
let confidence = candidate.confidence
|
|
158
|
-
let repaired = false
|
|
159
|
-
let currentRaw = { ...candidate.raw } as Record<string, unknown>
|
|
160
|
-
|
|
161
|
-
const args = currentRaw.arguments ?? currentRaw.input ?? currentRaw.args
|
|
162
|
-
|
|
163
|
-
if (typeof args === 'string' && args.trim().length > 0) {
|
|
164
|
-
const repairResult = this.repairer.repair(args)
|
|
165
|
-
if (repairResult.repaired) {
|
|
166
|
-
currentRaw = { ...currentRaw, arguments: repairResult.value }
|
|
167
|
-
repaired = true
|
|
168
|
-
confidence = Math.min(confidence + repairResult.confidence * 0.1, 0.95)
|
|
169
|
-
this.recoveryLog('string-args-repaired', candidate, repairResult.strategy)
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
if (!currentRaw.tool && !currentRaw.name && !currentRaw.functionCall && !currentRaw.type) {
|
|
174
|
-
const repairResult = this.repairer.repair(candidate.rawString)
|
|
175
|
-
if (repairResult.repaired) {
|
|
176
|
-
currentRaw = repairResult.value as Record<string, unknown>
|
|
177
|
-
repaired = true
|
|
178
|
-
confidence = Math.min(confidence + 0.05, 0.9)
|
|
179
|
-
this.recoveryLog('synthetic-construction', candidate, repairResult.strategy)
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
const { toolCall, warnings } = this.normalizer.normalize(
|
|
184
|
-
{ source: candidate.source, raw: currentRaw, rawString: candidate.rawString, confidence },
|
|
185
|
-
repaired
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
if (warnings.length > 0) errors.push(...warnings)
|
|
189
|
-
|
|
190
|
-
if (!toolCall.meta) {
|
|
191
|
-
toolCall.meta = { source: candidate.source, confidence, repaired }
|
|
192
|
-
} else {
|
|
193
|
-
toolCall.meta.confidence = confidence
|
|
194
|
-
toolCall.meta.repaired = repaired
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
if (this.enableSecurityGate) {
|
|
198
|
-
const report = this.gate.validate(toolCall, candidate.source)
|
|
199
|
-
toolCall.meta.confidence = Math.min(toolCall.meta.confidence, report.confidence)
|
|
200
|
-
|
|
201
|
-
if (!report.isValid && !this.strictMode) {
|
|
202
|
-
const recoveryResult = this.attemptRecovery(candidate, report.violations, toolCall, errors)
|
|
203
|
-
if (recoveryResult) return recoveryResult
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
errors.push(...report.warnings)
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
return toolCall
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
private attemptRecovery(
|
|
213
|
-
candidate: RawToolCandidate,
|
|
214
|
-
violations: SecurityViolation[],
|
|
215
|
-
failedCall: CanonicalToolCall,
|
|
216
|
-
errors: string[]
|
|
217
|
-
): CanonicalToolCall | null {
|
|
218
|
-
let attempt = 0
|
|
219
|
-
const currentCall: CanonicalToolCall = {
|
|
220
|
-
tool: failedCall.tool,
|
|
221
|
-
input: { ...failedCall.input },
|
|
222
|
-
meta: failedCall.meta ? { ...failedCall.meta } : undefined,
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
while (attempt < this.maxRecoveryAttempts) {
|
|
226
|
-
for (const v of violations) {
|
|
227
|
-
if (v.field === 'input') {
|
|
228
|
-
currentCall.input = {}
|
|
229
|
-
errors.push(`Recovered: cleared entire input`)
|
|
230
|
-
break
|
|
231
|
-
}
|
|
232
|
-
if (v.field in currentCall.input) {
|
|
233
|
-
delete currentCall.input[v.field]
|
|
234
|
-
errors.push(`Recovered: removed invalid field "${v.field}"`)
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
const report = this.gate.validate(currentCall, candidate.source)
|
|
239
|
-
if (report.isValid) {
|
|
240
|
-
if (currentCall.meta) {
|
|
241
|
-
currentCall.meta.repaired = true
|
|
242
|
-
currentCall.meta.confidence = Math.max(0.3, report.confidence)
|
|
243
|
-
}
|
|
244
|
-
return currentCall
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
attempt++
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
return null
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
private recoveryLog(event: string, candidate: RawToolCandidate, strategy: string): void {
|
|
254
|
-
if (process.env.DEBUG_ULTRA_LINTER === 'true') {
|
|
255
|
-
console.debug(`[UltraToolCallLinter] recovery ${event}: strategy=${strategy}, source=${candidate.source}`)
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
}
|