@yeaft/webchat-agent 0.1.399 → 0.1.408
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crew/role-query.js +10 -6
- package/package.json +3 -1
- package/sdk/query.js +3 -1
- package/unify/cli.js +537 -0
- package/unify/config.js +256 -0
- package/unify/debug-trace.js +398 -0
- package/unify/engine.js +319 -0
- package/unify/index.js +21 -0
- package/unify/init.js +147 -0
- package/unify/llm/adapter.js +186 -0
- package/unify/llm/anthropic.js +322 -0
- package/unify/llm/chat-completions.js +315 -0
- package/unify/models.js +167 -0
- package/unify/prompts.js +61 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* anthropic.js — Anthropic Messages API adapter
|
|
3
|
+
*
|
|
4
|
+
* POST /v1/messages with SSE streaming.
|
|
5
|
+
* tool_use.input is already a parsed object (no JSON.parse needed).
|
|
6
|
+
* Tool definitions use `input_schema` (not `parameters`).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
LLMAdapter,
|
|
11
|
+
LLMRateLimitError,
|
|
12
|
+
LLMAuthError,
|
|
13
|
+
LLMContextError,
|
|
14
|
+
LLMServerError,
|
|
15
|
+
LLMAbortError,
|
|
16
|
+
} from './adapter.js';
|
|
17
|
+
|
|
18
|
+
const DEFAULT_BASE_URL = 'https://api.anthropic.com';
|
|
19
|
+
const API_VERSION = '2023-06-01';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* AnthropicAdapter — Talks to Anthropic Messages API.
|
|
23
|
+
*/
|
|
24
|
+
export class AnthropicAdapter extends LLMAdapter {
|
|
25
|
+
#apiKey;
|
|
26
|
+
#baseUrl;
|
|
27
|
+
|
|
28
|
+
constructor({ apiKey, baseUrl = DEFAULT_BASE_URL }) {
|
|
29
|
+
super({ apiKey, baseUrl });
|
|
30
|
+
this.#apiKey = apiKey;
|
|
31
|
+
this.#baseUrl = baseUrl;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Translate UnifiedToolDef[] → Anthropic tool format.
|
|
36
|
+
* @param {import('./adapter.js').UnifiedToolDef[]} tools
|
|
37
|
+
* @returns {object[]}
|
|
38
|
+
*/
|
|
39
|
+
#translateTools(tools) {
|
|
40
|
+
if (!tools || tools.length === 0) return undefined;
|
|
41
|
+
return tools.map(t => ({
|
|
42
|
+
name: t.name,
|
|
43
|
+
description: t.description,
|
|
44
|
+
input_schema: t.parameters,
|
|
45
|
+
}));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Translate UnifiedMessage[] → Anthropic message format.
|
|
50
|
+
* @param {import('./adapter.js').UnifiedMessage[]} messages
|
|
51
|
+
* @returns {object[]}
|
|
52
|
+
*/
|
|
53
|
+
#translateMessages(messages) {
|
|
54
|
+
const result = [];
|
|
55
|
+
for (const msg of messages) {
|
|
56
|
+
if (msg.role === 'system') continue; // system goes separately
|
|
57
|
+
if (msg.role === 'user') {
|
|
58
|
+
result.push({ role: 'user', content: msg.content });
|
|
59
|
+
} else if (msg.role === 'assistant') {
|
|
60
|
+
const content = [];
|
|
61
|
+
if (msg.content) {
|
|
62
|
+
content.push({ type: 'text', text: msg.content });
|
|
63
|
+
}
|
|
64
|
+
if (msg.toolCalls) {
|
|
65
|
+
for (const tc of msg.toolCalls) {
|
|
66
|
+
content.push({
|
|
67
|
+
type: 'tool_use',
|
|
68
|
+
id: tc.id,
|
|
69
|
+
name: tc.name,
|
|
70
|
+
input: tc.input,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
result.push({ role: 'assistant', content });
|
|
75
|
+
} else if (msg.role === 'tool') {
|
|
76
|
+
// Anthropic requires all tool_results from the same turn in a single
|
|
77
|
+
// user message. Merge consecutive tool messages into one.
|
|
78
|
+
const toolResult = {
|
|
79
|
+
type: 'tool_result',
|
|
80
|
+
tool_use_id: msg.toolCallId,
|
|
81
|
+
content: msg.content,
|
|
82
|
+
is_error: msg.isError || false,
|
|
83
|
+
};
|
|
84
|
+
const prev = result[result.length - 1];
|
|
85
|
+
if (prev && prev.role === 'user' && Array.isArray(prev.content) &&
|
|
86
|
+
prev.content.length > 0 && prev.content[0].type === 'tool_result') {
|
|
87
|
+
// Append to existing tool_result user message
|
|
88
|
+
prev.content.push(toolResult);
|
|
89
|
+
} else {
|
|
90
|
+
result.push({
|
|
91
|
+
role: 'user',
|
|
92
|
+
content: [toolResult],
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Classify HTTP errors into our typed errors.
|
|
102
|
+
* @param {number} status
|
|
103
|
+
* @param {string} body
|
|
104
|
+
*/
|
|
105
|
+
#classifyError(status, body) {
|
|
106
|
+
if (status === 401 || status === 403) {
|
|
107
|
+
return new LLMAuthError(`Anthropic auth error: ${body}`, status);
|
|
108
|
+
}
|
|
109
|
+
if (status === 429) {
|
|
110
|
+
const retryAfter = null; // Could parse retry-after header
|
|
111
|
+
return new LLMRateLimitError(`Anthropic rate limit: ${body}`, status, retryAfter);
|
|
112
|
+
}
|
|
113
|
+
if (status === 529) {
|
|
114
|
+
return new LLMRateLimitError(`Anthropic overloaded: ${body}`, status);
|
|
115
|
+
}
|
|
116
|
+
if (body.includes('prompt is too long') || body.includes('max_tokens')) {
|
|
117
|
+
return new LLMContextError(`Anthropic context error: ${body}`);
|
|
118
|
+
}
|
|
119
|
+
if (status >= 500) {
|
|
120
|
+
return new LLMServerError(`Anthropic server error: ${body}`, status);
|
|
121
|
+
}
|
|
122
|
+
return new Error(`Anthropic API error ${status}: ${body}`);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* @param {{ model: string, system: string, messages: import('./adapter.js').UnifiedMessage[], tools?: import('./adapter.js').UnifiedToolDef[], maxTokens?: number, signal?: AbortSignal }} params
|
|
127
|
+
* @returns {AsyncGenerator<import('./adapter.js').StreamEvent>}
|
|
128
|
+
*/
|
|
129
|
+
async *stream({ model, system, messages, tools, maxTokens = 16384, signal }) {
|
|
130
|
+
if (signal?.aborted) throw new LLMAbortError();
|
|
131
|
+
|
|
132
|
+
const body = {
|
|
133
|
+
model,
|
|
134
|
+
max_tokens: maxTokens,
|
|
135
|
+
system,
|
|
136
|
+
messages: this.#translateMessages(messages),
|
|
137
|
+
stream: true,
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const translatedTools = this.#translateTools(tools);
|
|
141
|
+
if (translatedTools) body.tools = translatedTools;
|
|
142
|
+
|
|
143
|
+
const response = await fetch(`${this.#baseUrl}/v1/messages`, {
|
|
144
|
+
method: 'POST',
|
|
145
|
+
headers: {
|
|
146
|
+
'Content-Type': 'application/json',
|
|
147
|
+
'x-api-key': this.#apiKey,
|
|
148
|
+
'anthropic-version': API_VERSION,
|
|
149
|
+
},
|
|
150
|
+
body: JSON.stringify(body),
|
|
151
|
+
signal,
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
if (!response.ok) {
|
|
155
|
+
const errorBody = await response.text();
|
|
156
|
+
throw this.#classifyError(response.status, errorBody);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Parse SSE stream
|
|
160
|
+
const reader = response.body.getReader();
|
|
161
|
+
const decoder = new TextDecoder();
|
|
162
|
+
let buffer = '';
|
|
163
|
+
let currentToolCallId = null;
|
|
164
|
+
let currentToolName = null;
|
|
165
|
+
let currentToolInput = '';
|
|
166
|
+
|
|
167
|
+
try {
|
|
168
|
+
while (true) {
|
|
169
|
+
const { done, value } = await reader.read();
|
|
170
|
+
if (done) break;
|
|
171
|
+
|
|
172
|
+
buffer += decoder.decode(value, { stream: true });
|
|
173
|
+
const lines = buffer.split('\n');
|
|
174
|
+
buffer = lines.pop() || ''; // Keep incomplete line
|
|
175
|
+
|
|
176
|
+
for (const line of lines) {
|
|
177
|
+
if (!line.startsWith('data: ')) continue;
|
|
178
|
+
const data = line.slice(6).trim();
|
|
179
|
+
if (data === '[DONE]') continue;
|
|
180
|
+
|
|
181
|
+
let event;
|
|
182
|
+
try {
|
|
183
|
+
event = JSON.parse(data);
|
|
184
|
+
} catch {
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const type = event.type;
|
|
189
|
+
|
|
190
|
+
if (type === 'content_block_start') {
|
|
191
|
+
const block = event.content_block;
|
|
192
|
+
if (block?.type === 'tool_use') {
|
|
193
|
+
currentToolCallId = block.id;
|
|
194
|
+
currentToolName = block.name;
|
|
195
|
+
currentToolInput = '';
|
|
196
|
+
}
|
|
197
|
+
} else if (type === 'content_block_delta') {
|
|
198
|
+
const delta = event.delta;
|
|
199
|
+
if (delta?.type === 'text_delta') {
|
|
200
|
+
yield { type: 'text_delta', text: delta.text };
|
|
201
|
+
} else if (delta?.type === 'thinking_delta') {
|
|
202
|
+
yield { type: 'thinking_delta', text: delta.thinking };
|
|
203
|
+
} else if (delta?.type === 'input_json_delta') {
|
|
204
|
+
currentToolInput += delta.partial_json;
|
|
205
|
+
}
|
|
206
|
+
} else if (type === 'content_block_stop') {
|
|
207
|
+
if (currentToolCallId) {
|
|
208
|
+
let parsedInput = {};
|
|
209
|
+
try {
|
|
210
|
+
parsedInput = currentToolInput ? JSON.parse(currentToolInput) : {};
|
|
211
|
+
} catch {
|
|
212
|
+
parsedInput = {};
|
|
213
|
+
}
|
|
214
|
+
yield {
|
|
215
|
+
type: 'tool_call',
|
|
216
|
+
id: currentToolCallId,
|
|
217
|
+
name: currentToolName,
|
|
218
|
+
input: parsedInput,
|
|
219
|
+
};
|
|
220
|
+
currentToolCallId = null;
|
|
221
|
+
currentToolName = null;
|
|
222
|
+
currentToolInput = '';
|
|
223
|
+
}
|
|
224
|
+
} else if (type === 'message_delta') {
|
|
225
|
+
const stopReason = event.delta?.stop_reason;
|
|
226
|
+
if (stopReason) {
|
|
227
|
+
yield {
|
|
228
|
+
type: 'stop',
|
|
229
|
+
stopReason: this.#mapStopReason(stopReason),
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
// Usage from message_delta
|
|
233
|
+
if (event.usage) {
|
|
234
|
+
yield {
|
|
235
|
+
type: 'usage',
|
|
236
|
+
inputTokens: 0, // Only in message_start
|
|
237
|
+
outputTokens: event.usage.output_tokens || 0,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
} else if (type === 'message_start') {
|
|
241
|
+
// Usage from message_start
|
|
242
|
+
if (event.message?.usage) {
|
|
243
|
+
yield {
|
|
244
|
+
type: 'usage',
|
|
245
|
+
inputTokens: event.message.usage.input_tokens || 0,
|
|
246
|
+
outputTokens: event.message.usage.output_tokens || 0,
|
|
247
|
+
cacheReadTokens: event.message.usage.cache_read_input_tokens || 0,
|
|
248
|
+
cacheWriteTokens: event.message.usage.cache_creation_input_tokens || 0,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
} else if (type === 'error') {
|
|
252
|
+
yield {
|
|
253
|
+
type: 'error',
|
|
254
|
+
error: new Error(event.error?.message || 'Unknown streaming error'),
|
|
255
|
+
retryable: event.error?.type === 'overloaded_error',
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
} finally {
|
|
261
|
+
reader.releaseLock();
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Non-streaming call for side queries.
|
|
267
|
+
*/
|
|
268
|
+
async call({ model, system, messages, maxTokens = 4096, signal }) {
|
|
269
|
+
if (signal?.aborted) throw new LLMAbortError();
|
|
270
|
+
|
|
271
|
+
const body = {
|
|
272
|
+
model,
|
|
273
|
+
max_tokens: maxTokens,
|
|
274
|
+
system,
|
|
275
|
+
messages: this.#translateMessages(messages),
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
const response = await fetch(`${this.#baseUrl}/v1/messages`, {
|
|
279
|
+
method: 'POST',
|
|
280
|
+
headers: {
|
|
281
|
+
'Content-Type': 'application/json',
|
|
282
|
+
'x-api-key': this.#apiKey,
|
|
283
|
+
'anthropic-version': API_VERSION,
|
|
284
|
+
},
|
|
285
|
+
body: JSON.stringify(body),
|
|
286
|
+
signal,
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
if (!response.ok) {
|
|
290
|
+
const errorBody = await response.text();
|
|
291
|
+
throw this.#classifyError(response.status, errorBody);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
const result = await response.json();
|
|
295
|
+
const text = result.content
|
|
296
|
+
?.filter(b => b.type === 'text')
|
|
297
|
+
.map(b => b.text)
|
|
298
|
+
.join('') || '';
|
|
299
|
+
|
|
300
|
+
return {
|
|
301
|
+
text,
|
|
302
|
+
usage: {
|
|
303
|
+
inputTokens: result.usage?.input_tokens || 0,
|
|
304
|
+
outputTokens: result.usage?.output_tokens || 0,
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Map Anthropic stop_reason to unified format.
|
|
311
|
+
* @param {string} reason
|
|
312
|
+
* @returns {'end_turn' | 'tool_use' | 'max_tokens'}
|
|
313
|
+
*/
|
|
314
|
+
#mapStopReason(reason) {
|
|
315
|
+
switch (reason) {
|
|
316
|
+
case 'end_turn': return 'end_turn';
|
|
317
|
+
case 'tool_use': return 'tool_use';
|
|
318
|
+
case 'max_tokens': return 'max_tokens';
|
|
319
|
+
default: return 'end_turn';
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* chat-completions.js — OpenAI Chat Completions API adapter
|
|
3
|
+
*
|
|
4
|
+
* Covers ALL OpenAI-compatible backends via baseUrl:
|
|
5
|
+
* - https://api.openai.com/v1 → OpenAI direct
|
|
6
|
+
* - https://api.deepseek.com → DeepSeek
|
|
7
|
+
* - http://localhost:6628/v1 → CopilotProxy
|
|
8
|
+
* - Azure, Ollama, LMStudio, etc.
|
|
9
|
+
*
|
|
10
|
+
* Key translation responsibilities:
|
|
11
|
+
* Request: UnifiedToolDef → { type: "function", function: { name, description, parameters } }
|
|
12
|
+
* Response: delta.tool_calls[i].function.arguments (JSON string) → accumulate → JSON.parse → UnifiedToolCall
|
|
13
|
+
* Result: UnifiedToolResult → { role: "tool", tool_call_id, content }
|
|
14
|
+
* Finish: "tool_calls" → "tool_use", "stop" → "end_turn", "length" → "max_tokens"
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import {
|
|
18
|
+
LLMAdapter,
|
|
19
|
+
LLMRateLimitError,
|
|
20
|
+
LLMAuthError,
|
|
21
|
+
LLMContextError,
|
|
22
|
+
LLMServerError,
|
|
23
|
+
LLMAbortError,
|
|
24
|
+
} from './adapter.js';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* ChatCompletionsAdapter — Talks to OpenAI Chat Completions API and compatibles.
|
|
28
|
+
*/
|
|
29
|
+
export class ChatCompletionsAdapter extends LLMAdapter {
|
|
30
|
+
#apiKey;
|
|
31
|
+
#baseUrl;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* @param {{ apiKey: string, baseUrl: string }} config
|
|
35
|
+
*/
|
|
36
|
+
constructor({ apiKey, baseUrl }) {
|
|
37
|
+
super({ apiKey, baseUrl });
|
|
38
|
+
this.#apiKey = apiKey;
|
|
39
|
+
this.#baseUrl = baseUrl.replace(/\/+$/, ''); // strip trailing slash
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Translate UnifiedToolDef[] → Chat Completions tool format.
|
|
44
|
+
* @param {import('./adapter.js').UnifiedToolDef[]} tools
|
|
45
|
+
* @returns {object[]|undefined}
|
|
46
|
+
*/
|
|
47
|
+
#translateTools(tools) {
|
|
48
|
+
if (!tools || tools.length === 0) return undefined;
|
|
49
|
+
return tools.map(t => ({
|
|
50
|
+
type: 'function',
|
|
51
|
+
function: {
|
|
52
|
+
name: t.name,
|
|
53
|
+
description: t.description,
|
|
54
|
+
parameters: t.parameters,
|
|
55
|
+
},
|
|
56
|
+
}));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Translate UnifiedMessage[] → Chat Completions message format.
|
|
61
|
+
* @param {string} system — System prompt
|
|
62
|
+
* @param {import('./adapter.js').UnifiedMessage[]} messages
|
|
63
|
+
* @returns {object[]}
|
|
64
|
+
*/
|
|
65
|
+
#translateMessages(system, messages) {
|
|
66
|
+
const result = [];
|
|
67
|
+
|
|
68
|
+
// System message first
|
|
69
|
+
if (system) {
|
|
70
|
+
result.push({ role: 'system', content: system });
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
for (const msg of messages) {
|
|
74
|
+
if (msg.role === 'system') {
|
|
75
|
+
result.push({ role: 'system', content: msg.content });
|
|
76
|
+
} else if (msg.role === 'user') {
|
|
77
|
+
result.push({ role: 'user', content: msg.content });
|
|
78
|
+
} else if (msg.role === 'assistant') {
|
|
79
|
+
const entry = { role: 'assistant' };
|
|
80
|
+
// Some OpenAI-compatible APIs require `content: null` when tool_calls are present
|
|
81
|
+
entry.content = msg.content || null;
|
|
82
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
83
|
+
entry.tool_calls = msg.toolCalls.map(tc => ({
|
|
84
|
+
id: tc.id,
|
|
85
|
+
type: 'function',
|
|
86
|
+
function: {
|
|
87
|
+
name: tc.name,
|
|
88
|
+
arguments: JSON.stringify(tc.input),
|
|
89
|
+
},
|
|
90
|
+
}));
|
|
91
|
+
}
|
|
92
|
+
result.push(entry);
|
|
93
|
+
} else if (msg.role === 'tool') {
|
|
94
|
+
result.push({
|
|
95
|
+
role: 'tool',
|
|
96
|
+
tool_call_id: msg.toolCallId,
|
|
97
|
+
content: msg.content,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return result;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Classify HTTP errors.
|
|
106
|
+
* @param {number} status
|
|
107
|
+
* @param {string} body
|
|
108
|
+
*/
|
|
109
|
+
#classifyError(status, body) {
|
|
110
|
+
if (status === 401 || status === 403) {
|
|
111
|
+
return new LLMAuthError(`Auth error: ${body}`, status);
|
|
112
|
+
}
|
|
113
|
+
if (status === 429) {
|
|
114
|
+
return new LLMRateLimitError(`Rate limit: ${body}`, status);
|
|
115
|
+
}
|
|
116
|
+
if (status === 529) {
|
|
117
|
+
return new LLMRateLimitError(`Overloaded: ${body}`, status);
|
|
118
|
+
}
|
|
119
|
+
if (status === 413 || body.includes('context_length_exceeded') || body.includes('maximum context length')) {
|
|
120
|
+
return new LLMContextError(`Context too long: ${body}`);
|
|
121
|
+
}
|
|
122
|
+
if (status >= 500) {
|
|
123
|
+
return new LLMServerError(`Server error: ${body}`, status);
|
|
124
|
+
}
|
|
125
|
+
return new Error(`API error ${status}: ${body}`);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Map Chat Completions finish_reason → unified stop reason.
|
|
130
|
+
* @param {string|null} reason
|
|
131
|
+
* @returns {'end_turn' | 'tool_use' | 'max_tokens'}
|
|
132
|
+
*/
|
|
133
|
+
#mapFinishReason(reason) {
|
|
134
|
+
switch (reason) {
|
|
135
|
+
case 'tool_calls': return 'tool_use';
|
|
136
|
+
case 'stop': return 'end_turn';
|
|
137
|
+
case 'length': return 'max_tokens';
|
|
138
|
+
default: return 'end_turn';
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* @param {{ model: string, system: string, messages: import('./adapter.js').UnifiedMessage[], tools?: import('./adapter.js').UnifiedToolDef[], maxTokens?: number, signal?: AbortSignal }} params
|
|
144
|
+
* @returns {AsyncGenerator<import('./adapter.js').StreamEvent>}
|
|
145
|
+
*/
|
|
146
|
+
async *stream({ model, system, messages, tools, maxTokens = 16384, signal }) {
|
|
147
|
+
if (signal?.aborted) throw new LLMAbortError();
|
|
148
|
+
|
|
149
|
+
const body = {
|
|
150
|
+
model,
|
|
151
|
+
messages: this.#translateMessages(system, messages),
|
|
152
|
+
max_tokens: maxTokens,
|
|
153
|
+
stream: true,
|
|
154
|
+
stream_options: { include_usage: true },
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
const translatedTools = this.#translateTools(tools);
|
|
158
|
+
if (translatedTools) body.tools = translatedTools;
|
|
159
|
+
|
|
160
|
+
const response = await fetch(`${this.#baseUrl}/chat/completions`, {
|
|
161
|
+
method: 'POST',
|
|
162
|
+
headers: {
|
|
163
|
+
'Content-Type': 'application/json',
|
|
164
|
+
'Authorization': `Bearer ${this.#apiKey}`,
|
|
165
|
+
},
|
|
166
|
+
body: JSON.stringify(body),
|
|
167
|
+
signal,
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
if (!response.ok) {
|
|
171
|
+
const errorBody = await response.text();
|
|
172
|
+
throw this.#classifyError(response.status, errorBody);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Parse SSE stream
|
|
176
|
+
const reader = response.body.getReader();
|
|
177
|
+
const decoder = new TextDecoder();
|
|
178
|
+
let buffer = '';
|
|
179
|
+
|
|
180
|
+
// Tool call accumulation — Chat Completions sends tool args as fragments
|
|
181
|
+
// keyed by index within the delta.tool_calls array
|
|
182
|
+
/** @type {Map<number, { id: string, name: string, arguments: string }>} */
|
|
183
|
+
const toolCallAccum = new Map();
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
while (true) {
|
|
187
|
+
const { done, value } = await reader.read();
|
|
188
|
+
if (done) break;
|
|
189
|
+
|
|
190
|
+
buffer += decoder.decode(value, { stream: true });
|
|
191
|
+
const lines = buffer.split('\n');
|
|
192
|
+
buffer = lines.pop() || '';
|
|
193
|
+
|
|
194
|
+
for (const line of lines) {
|
|
195
|
+
if (!line.startsWith('data: ')) continue;
|
|
196
|
+
const data = line.slice(6).trim();
|
|
197
|
+
if (data === '[DONE]') continue;
|
|
198
|
+
|
|
199
|
+
let chunk;
|
|
200
|
+
try {
|
|
201
|
+
chunk = JSON.parse(data);
|
|
202
|
+
} catch {
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Usage (from stream_options: include_usage)
|
|
207
|
+
if (chunk.usage) {
|
|
208
|
+
yield {
|
|
209
|
+
type: 'usage',
|
|
210
|
+
inputTokens: chunk.usage.prompt_tokens || 0,
|
|
211
|
+
outputTokens: chunk.usage.completion_tokens || 0,
|
|
212
|
+
cacheReadTokens: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
|
|
213
|
+
cacheWriteTokens: 0,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const choice = chunk.choices?.[0];
|
|
218
|
+
if (!choice) continue;
|
|
219
|
+
|
|
220
|
+
const delta = choice.delta;
|
|
221
|
+
if (!delta) continue;
|
|
222
|
+
|
|
223
|
+
// Text content
|
|
224
|
+
if (delta.content) {
|
|
225
|
+
yield { type: 'text_delta', text: delta.content };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Tool calls (streamed as fragments)
|
|
229
|
+
if (delta.tool_calls) {
|
|
230
|
+
for (const tc of delta.tool_calls) {
|
|
231
|
+
const idx = tc.index;
|
|
232
|
+
if (!toolCallAccum.has(idx)) {
|
|
233
|
+
toolCallAccum.set(idx, {
|
|
234
|
+
id: tc.id || '',
|
|
235
|
+
name: tc.function?.name || '',
|
|
236
|
+
arguments: '',
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
const accum = toolCallAccum.get(idx);
|
|
240
|
+
if (tc.id) accum.id = tc.id;
|
|
241
|
+
if (tc.function?.name) accum.name = tc.function.name;
|
|
242
|
+
if (tc.function?.arguments) accum.arguments += tc.function.arguments;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Finish reason
|
|
247
|
+
if (choice.finish_reason) {
|
|
248
|
+
// Emit accumulated tool calls before stop
|
|
249
|
+
for (const [, accum] of toolCallAccum) {
|
|
250
|
+
let parsedInput = {};
|
|
251
|
+
try {
|
|
252
|
+
parsedInput = accum.arguments ? JSON.parse(accum.arguments) : {};
|
|
253
|
+
} catch {
|
|
254
|
+
parsedInput = {};
|
|
255
|
+
}
|
|
256
|
+
yield {
|
|
257
|
+
type: 'tool_call',
|
|
258
|
+
id: accum.id,
|
|
259
|
+
name: accum.name,
|
|
260
|
+
input: parsedInput,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
toolCallAccum.clear();
|
|
264
|
+
|
|
265
|
+
yield {
|
|
266
|
+
type: 'stop',
|
|
267
|
+
stopReason: this.#mapFinishReason(choice.finish_reason),
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
} finally {
|
|
273
|
+
reader.releaseLock();
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Non-streaming call for side queries.
|
|
279
|
+
*/
|
|
280
|
+
async call({ model, system, messages, maxTokens = 4096, signal }) {
|
|
281
|
+
if (signal?.aborted) throw new LLMAbortError();
|
|
282
|
+
|
|
283
|
+
const body = {
|
|
284
|
+
model,
|
|
285
|
+
messages: this.#translateMessages(system, messages),
|
|
286
|
+
max_tokens: maxTokens,
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
const response = await fetch(`${this.#baseUrl}/chat/completions`, {
|
|
290
|
+
method: 'POST',
|
|
291
|
+
headers: {
|
|
292
|
+
'Content-Type': 'application/json',
|
|
293
|
+
'Authorization': `Bearer ${this.#apiKey}`,
|
|
294
|
+
},
|
|
295
|
+
body: JSON.stringify(body),
|
|
296
|
+
signal,
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
if (!response.ok) {
|
|
300
|
+
const errorBody = await response.text();
|
|
301
|
+
throw this.#classifyError(response.status, errorBody);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const result = await response.json();
|
|
305
|
+
const text = result.choices?.[0]?.message?.content || '';
|
|
306
|
+
|
|
307
|
+
return {
|
|
308
|
+
text,
|
|
309
|
+
usage: {
|
|
310
|
+
inputTokens: result.usage?.prompt_tokens || 0,
|
|
311
|
+
outputTokens: result.usage?.completion_tokens || 0,
|
|
312
|
+
},
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
}
|