@yeaft/webchat-agent 0.1.399 → 0.1.409
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crew/role-query.js +10 -6
- package/package.json +3 -1
- package/sdk/query.js +3 -1
- package/unify/cli.js +735 -0
- package/unify/config.js +269 -0
- package/unify/conversation/persist.js +436 -0
- package/unify/conversation/search.js +65 -0
- package/unify/debug-trace.js +398 -0
- package/unify/engine.js +511 -0
- package/unify/index.js +27 -0
- package/unify/init.js +147 -0
- package/unify/llm/adapter.js +186 -0
- package/unify/llm/anthropic.js +322 -0
- package/unify/llm/chat-completions.js +315 -0
- package/unify/memory/consolidate.js +187 -0
- package/unify/memory/extract.js +97 -0
- package/unify/memory/recall.js +243 -0
- package/unify/memory/store.js +507 -0
- package/unify/models.js +167 -0
- package/unify/prompts.js +109 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* adapter.js — LLM Adapter base class, unified types, and factory
|
|
3
|
+
*
|
|
4
|
+
* Design decision (2026-04-10): Only two adapters needed:
|
|
5
|
+
* 1. AnthropicAdapter — Anthropic Messages API
|
|
6
|
+
* 2. ChatCompletionsAdapter — OpenAI Chat Completions API (covers GPT, DeepSeek, CopilotProxy, etc.)
|
|
7
|
+
*
|
|
8
|
+
* The engine sees only unified types — it never knows which API is underneath.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// ─── Unified Types ─────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @typedef {Object} UnifiedToolDef
|
|
15
|
+
* @property {string} name
|
|
16
|
+
* @property {string} description
|
|
17
|
+
* @property {object} parameters — JSON Schema
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @typedef {Object} UnifiedToolCall
|
|
22
|
+
* @property {string} id
|
|
23
|
+
* @property {string} name
|
|
24
|
+
* @property {object} input — Parsed object (not JSON string)
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* @typedef {Object} UnifiedToolResult
|
|
29
|
+
* @property {string} toolCallId
|
|
30
|
+
* @property {string} output
|
|
31
|
+
* @property {boolean} [isError]
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
// ─── Unified Event Stream ──────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* @typedef {{ type: 'text_delta', text: string }} TextDeltaEvent
|
|
38
|
+
* @typedef {{ type: 'thinking_delta', text: string }} ThinkingDeltaEvent
|
|
39
|
+
* @typedef {{ type: 'tool_call', id: string, name: string, input: object }} ToolCallEvent
|
|
40
|
+
* @typedef {{ type: 'usage', inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number }} UsageEvent
|
|
41
|
+
* @typedef {{ type: 'stop', stopReason: 'end_turn' | 'tool_use' | 'max_tokens' }} StopEvent
|
|
42
|
+
* @typedef {{ type: 'error', error: Error, retryable: boolean }} ErrorEvent
|
|
43
|
+
*
|
|
44
|
+
* @typedef {TextDeltaEvent | ThinkingDeltaEvent | ToolCallEvent | UsageEvent | StopEvent | ErrorEvent} StreamEvent
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
// ─── Unified Message Types ─────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* @typedef {{ role: 'system', content: string }} SystemMessage
|
|
51
|
+
* @typedef {{ role: 'user', content: string }} UserMessage
|
|
52
|
+
* @typedef {{ role: 'assistant', content: string, toolCalls?: UnifiedToolCall[] }} AssistantMessage
|
|
53
|
+
* @typedef {{ role: 'tool', toolCallId: string, content: string, isError?: boolean }} ToolMessage
|
|
54
|
+
*
|
|
55
|
+
* @typedef {SystemMessage | UserMessage | AssistantMessage | ToolMessage} UnifiedMessage
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
// ─── Error Types ───────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
/** Rate limit error (429, 529) — retryable with backoff. */
|
|
61
|
+
export class LLMRateLimitError extends Error {
|
|
62
|
+
constructor(message, statusCode, retryAfterMs = null) {
|
|
63
|
+
super(message);
|
|
64
|
+
this.name = 'LLMRateLimitError';
|
|
65
|
+
this.statusCode = statusCode;
|
|
66
|
+
this.retryAfterMs = retryAfterMs;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Authentication error (401, 403) — need to re-authenticate. */
|
|
71
|
+
export class LLMAuthError extends Error {
|
|
72
|
+
constructor(message, statusCode) {
|
|
73
|
+
super(message);
|
|
74
|
+
this.name = 'LLMAuthError';
|
|
75
|
+
this.statusCode = statusCode;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Context too long error (413 or API-specific) — need compaction. */
|
|
80
|
+
export class LLMContextError extends Error {
|
|
81
|
+
constructor(message) {
|
|
82
|
+
super(message);
|
|
83
|
+
this.name = 'LLMContextError';
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Server error (500, 502, 503) — retryable. */
|
|
88
|
+
export class LLMServerError extends Error {
|
|
89
|
+
constructor(message, statusCode) {
|
|
90
|
+
super(message);
|
|
91
|
+
this.name = 'LLMServerError';
|
|
92
|
+
this.statusCode = statusCode;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Abort error — signal was aborted. */
|
|
97
|
+
export class LLMAbortError extends Error {
|
|
98
|
+
constructor() {
|
|
99
|
+
super('Request aborted');
|
|
100
|
+
this.name = 'LLMAbortError';
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ─── Base Class ────────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* LLMAdapter — Abstract base class for LLM API adapters.
|
|
108
|
+
*
|
|
109
|
+
* Subclasses implement stream() and call() to talk to a specific API,
|
|
110
|
+
* translating between the unified types and the wire format.
|
|
111
|
+
*/
|
|
112
|
+
export class LLMAdapter {
|
|
113
|
+
/**
|
|
114
|
+
* @param {object} config — Adapter-specific configuration
|
|
115
|
+
*/
|
|
116
|
+
constructor(config = {}) {
|
|
117
|
+
this.config = config;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Stream a model response with tool support (the query loop call).
|
|
122
|
+
*
|
|
123
|
+
* @param {{ model: string, system: string, messages: UnifiedMessage[], tools?: UnifiedToolDef[], maxTokens?: number, signal?: AbortSignal }} params
|
|
124
|
+
* @returns {AsyncGenerator<StreamEvent>}
|
|
125
|
+
*/
|
|
126
|
+
async *stream(params) { // eslint-disable-line no-unused-vars
|
|
127
|
+
throw new Error('stream() must be implemented by subclass');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Make a single model call without tools (for side queries like summarization).
|
|
132
|
+
*
|
|
133
|
+
* @param {{ model: string, system: string, messages: UnifiedMessage[], maxTokens?: number, signal?: AbortSignal }} params
|
|
134
|
+
* @returns {Promise<{ text: string, usage: { inputTokens: number, outputTokens: number } }>}
|
|
135
|
+
*/
|
|
136
|
+
async call(params) { // eslint-disable-line no-unused-vars
|
|
137
|
+
throw new Error('call() must be implemented by subclass');
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ─── Factory ───────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Create an LLM adapter based on configuration.
|
|
145
|
+
*
|
|
146
|
+
* @param {object} config — From loadConfig()
|
|
147
|
+
* @returns {Promise<LLMAdapter>}
|
|
148
|
+
*/
|
|
149
|
+
export async function createLLMAdapter(config) {
|
|
150
|
+
// Normalize adapter name — accept 'chat-completions' as alias for 'openai'
|
|
151
|
+
const adapter = config.adapter === 'chat-completions' ? 'openai' : config.adapter;
|
|
152
|
+
|
|
153
|
+
if (adapter === 'anthropic' || (!adapter && config.apiKey)) {
|
|
154
|
+
if (!config.apiKey) {
|
|
155
|
+
throw new Error('Anthropic adapter requires YEAFT_API_KEY');
|
|
156
|
+
}
|
|
157
|
+
const { AnthropicAdapter } = await import('./anthropic.js');
|
|
158
|
+
return new AnthropicAdapter({
|
|
159
|
+
apiKey: config.apiKey,
|
|
160
|
+
baseUrl: config.baseUrl || undefined, // AnthropicAdapter has its own default
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (adapter === 'openai' || (!adapter && config.openaiApiKey)) {
|
|
165
|
+
if (!config.openaiApiKey && !config.apiKey) {
|
|
166
|
+
throw new Error('OpenAI adapter requires YEAFT_OPENAI_API_KEY (or YEAFT_API_KEY as fallback)');
|
|
167
|
+
}
|
|
168
|
+
const { ChatCompletionsAdapter } = await import('./chat-completions.js');
|
|
169
|
+
return new ChatCompletionsAdapter({
|
|
170
|
+
apiKey: config.openaiApiKey || config.apiKey,
|
|
171
|
+
baseUrl: config.baseUrl || 'https://api.openai.com/v1',
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (adapter === 'proxy' || (!adapter && config.proxyUrl)) {
|
|
176
|
+
const { ChatCompletionsAdapter } = await import('./chat-completions.js');
|
|
177
|
+
return new ChatCompletionsAdapter({
|
|
178
|
+
apiKey: 'proxy', // CopilotProxy handles auth
|
|
179
|
+
baseUrl: `${config.proxyUrl}/v1`,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
throw new Error(
|
|
184
|
+
'No LLM adapter configured. Set YEAFT_API_KEY (Anthropic), YEAFT_OPENAI_API_KEY (OpenAI), or YEAFT_PROXY_URL (CopilotProxy).',
|
|
185
|
+
);
|
|
186
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* anthropic.js — Anthropic Messages API adapter
|
|
3
|
+
*
|
|
4
|
+
* POST /v1/messages with SSE streaming.
|
|
5
|
+
* tool_use.input is already a parsed object (no JSON.parse needed).
|
|
6
|
+
* Tool definitions use `input_schema` (not `parameters`).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
LLMAdapter,
|
|
11
|
+
LLMRateLimitError,
|
|
12
|
+
LLMAuthError,
|
|
13
|
+
LLMContextError,
|
|
14
|
+
LLMServerError,
|
|
15
|
+
LLMAbortError,
|
|
16
|
+
} from './adapter.js';
|
|
17
|
+
|
|
18
|
+
const DEFAULT_BASE_URL = 'https://api.anthropic.com';
|
|
19
|
+
const API_VERSION = '2023-06-01';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* AnthropicAdapter — Talks to Anthropic Messages API.
|
|
23
|
+
*/
|
|
24
|
+
export class AnthropicAdapter extends LLMAdapter {
|
|
25
|
+
#apiKey;
|
|
26
|
+
#baseUrl;
|
|
27
|
+
|
|
28
|
+
constructor({ apiKey, baseUrl = DEFAULT_BASE_URL }) {
|
|
29
|
+
super({ apiKey, baseUrl });
|
|
30
|
+
this.#apiKey = apiKey;
|
|
31
|
+
this.#baseUrl = baseUrl;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Translate UnifiedToolDef[] → Anthropic tool format.
|
|
36
|
+
* @param {import('./adapter.js').UnifiedToolDef[]} tools
|
|
37
|
+
* @returns {object[]}
|
|
38
|
+
*/
|
|
39
|
+
#translateTools(tools) {
|
|
40
|
+
if (!tools || tools.length === 0) return undefined;
|
|
41
|
+
return tools.map(t => ({
|
|
42
|
+
name: t.name,
|
|
43
|
+
description: t.description,
|
|
44
|
+
input_schema: t.parameters,
|
|
45
|
+
}));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Translate UnifiedMessage[] → Anthropic message format.
|
|
50
|
+
* @param {import('./adapter.js').UnifiedMessage[]} messages
|
|
51
|
+
* @returns {object[]}
|
|
52
|
+
*/
|
|
53
|
+
#translateMessages(messages) {
|
|
54
|
+
const result = [];
|
|
55
|
+
for (const msg of messages) {
|
|
56
|
+
if (msg.role === 'system') continue; // system goes separately
|
|
57
|
+
if (msg.role === 'user') {
|
|
58
|
+
result.push({ role: 'user', content: msg.content });
|
|
59
|
+
} else if (msg.role === 'assistant') {
|
|
60
|
+
const content = [];
|
|
61
|
+
if (msg.content) {
|
|
62
|
+
content.push({ type: 'text', text: msg.content });
|
|
63
|
+
}
|
|
64
|
+
if (msg.toolCalls) {
|
|
65
|
+
for (const tc of msg.toolCalls) {
|
|
66
|
+
content.push({
|
|
67
|
+
type: 'tool_use',
|
|
68
|
+
id: tc.id,
|
|
69
|
+
name: tc.name,
|
|
70
|
+
input: tc.input,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
result.push({ role: 'assistant', content });
|
|
75
|
+
} else if (msg.role === 'tool') {
|
|
76
|
+
// Anthropic requires all tool_results from the same turn in a single
|
|
77
|
+
// user message. Merge consecutive tool messages into one.
|
|
78
|
+
const toolResult = {
|
|
79
|
+
type: 'tool_result',
|
|
80
|
+
tool_use_id: msg.toolCallId,
|
|
81
|
+
content: msg.content,
|
|
82
|
+
is_error: msg.isError || false,
|
|
83
|
+
};
|
|
84
|
+
const prev = result[result.length - 1];
|
|
85
|
+
if (prev && prev.role === 'user' && Array.isArray(prev.content) &&
|
|
86
|
+
prev.content.length > 0 && prev.content[0].type === 'tool_result') {
|
|
87
|
+
// Append to existing tool_result user message
|
|
88
|
+
prev.content.push(toolResult);
|
|
89
|
+
} else {
|
|
90
|
+
result.push({
|
|
91
|
+
role: 'user',
|
|
92
|
+
content: [toolResult],
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Classify HTTP errors into our typed errors.
|
|
102
|
+
* @param {number} status
|
|
103
|
+
* @param {string} body
|
|
104
|
+
*/
|
|
105
|
+
#classifyError(status, body) {
|
|
106
|
+
if (status === 401 || status === 403) {
|
|
107
|
+
return new LLMAuthError(`Anthropic auth error: ${body}`, status);
|
|
108
|
+
}
|
|
109
|
+
if (status === 429) {
|
|
110
|
+
const retryAfter = null; // Could parse retry-after header
|
|
111
|
+
return new LLMRateLimitError(`Anthropic rate limit: ${body}`, status, retryAfter);
|
|
112
|
+
}
|
|
113
|
+
if (status === 529) {
|
|
114
|
+
return new LLMRateLimitError(`Anthropic overloaded: ${body}`, status);
|
|
115
|
+
}
|
|
116
|
+
if (body.includes('prompt is too long') || body.includes('max_tokens')) {
|
|
117
|
+
return new LLMContextError(`Anthropic context error: ${body}`);
|
|
118
|
+
}
|
|
119
|
+
if (status >= 500) {
|
|
120
|
+
return new LLMServerError(`Anthropic server error: ${body}`, status);
|
|
121
|
+
}
|
|
122
|
+
return new Error(`Anthropic API error ${status}: ${body}`);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* @param {{ model: string, system: string, messages: import('./adapter.js').UnifiedMessage[], tools?: import('./adapter.js').UnifiedToolDef[], maxTokens?: number, signal?: AbortSignal }} params
|
|
127
|
+
* @returns {AsyncGenerator<import('./adapter.js').StreamEvent>}
|
|
128
|
+
*/
|
|
129
|
+
async *stream({ model, system, messages, tools, maxTokens = 16384, signal }) {
|
|
130
|
+
if (signal?.aborted) throw new LLMAbortError();
|
|
131
|
+
|
|
132
|
+
const body = {
|
|
133
|
+
model,
|
|
134
|
+
max_tokens: maxTokens,
|
|
135
|
+
system,
|
|
136
|
+
messages: this.#translateMessages(messages),
|
|
137
|
+
stream: true,
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const translatedTools = this.#translateTools(tools);
|
|
141
|
+
if (translatedTools) body.tools = translatedTools;
|
|
142
|
+
|
|
143
|
+
const response = await fetch(`${this.#baseUrl}/v1/messages`, {
|
|
144
|
+
method: 'POST',
|
|
145
|
+
headers: {
|
|
146
|
+
'Content-Type': 'application/json',
|
|
147
|
+
'x-api-key': this.#apiKey,
|
|
148
|
+
'anthropic-version': API_VERSION,
|
|
149
|
+
},
|
|
150
|
+
body: JSON.stringify(body),
|
|
151
|
+
signal,
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
if (!response.ok) {
|
|
155
|
+
const errorBody = await response.text();
|
|
156
|
+
throw this.#classifyError(response.status, errorBody);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Parse SSE stream
|
|
160
|
+
const reader = response.body.getReader();
|
|
161
|
+
const decoder = new TextDecoder();
|
|
162
|
+
let buffer = '';
|
|
163
|
+
let currentToolCallId = null;
|
|
164
|
+
let currentToolName = null;
|
|
165
|
+
let currentToolInput = '';
|
|
166
|
+
|
|
167
|
+
try {
|
|
168
|
+
while (true) {
|
|
169
|
+
const { done, value } = await reader.read();
|
|
170
|
+
if (done) break;
|
|
171
|
+
|
|
172
|
+
buffer += decoder.decode(value, { stream: true });
|
|
173
|
+
const lines = buffer.split('\n');
|
|
174
|
+
buffer = lines.pop() || ''; // Keep incomplete line
|
|
175
|
+
|
|
176
|
+
for (const line of lines) {
|
|
177
|
+
if (!line.startsWith('data: ')) continue;
|
|
178
|
+
const data = line.slice(6).trim();
|
|
179
|
+
if (data === '[DONE]') continue;
|
|
180
|
+
|
|
181
|
+
let event;
|
|
182
|
+
try {
|
|
183
|
+
event = JSON.parse(data);
|
|
184
|
+
} catch {
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const type = event.type;
|
|
189
|
+
|
|
190
|
+
if (type === 'content_block_start') {
|
|
191
|
+
const block = event.content_block;
|
|
192
|
+
if (block?.type === 'tool_use') {
|
|
193
|
+
currentToolCallId = block.id;
|
|
194
|
+
currentToolName = block.name;
|
|
195
|
+
currentToolInput = '';
|
|
196
|
+
}
|
|
197
|
+
} else if (type === 'content_block_delta') {
|
|
198
|
+
const delta = event.delta;
|
|
199
|
+
if (delta?.type === 'text_delta') {
|
|
200
|
+
yield { type: 'text_delta', text: delta.text };
|
|
201
|
+
} else if (delta?.type === 'thinking_delta') {
|
|
202
|
+
yield { type: 'thinking_delta', text: delta.thinking };
|
|
203
|
+
} else if (delta?.type === 'input_json_delta') {
|
|
204
|
+
currentToolInput += delta.partial_json;
|
|
205
|
+
}
|
|
206
|
+
} else if (type === 'content_block_stop') {
|
|
207
|
+
if (currentToolCallId) {
|
|
208
|
+
let parsedInput = {};
|
|
209
|
+
try {
|
|
210
|
+
parsedInput = currentToolInput ? JSON.parse(currentToolInput) : {};
|
|
211
|
+
} catch {
|
|
212
|
+
parsedInput = {};
|
|
213
|
+
}
|
|
214
|
+
yield {
|
|
215
|
+
type: 'tool_call',
|
|
216
|
+
id: currentToolCallId,
|
|
217
|
+
name: currentToolName,
|
|
218
|
+
input: parsedInput,
|
|
219
|
+
};
|
|
220
|
+
currentToolCallId = null;
|
|
221
|
+
currentToolName = null;
|
|
222
|
+
currentToolInput = '';
|
|
223
|
+
}
|
|
224
|
+
} else if (type === 'message_delta') {
|
|
225
|
+
const stopReason = event.delta?.stop_reason;
|
|
226
|
+
if (stopReason) {
|
|
227
|
+
yield {
|
|
228
|
+
type: 'stop',
|
|
229
|
+
stopReason: this.#mapStopReason(stopReason),
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
// Usage from message_delta
|
|
233
|
+
if (event.usage) {
|
|
234
|
+
yield {
|
|
235
|
+
type: 'usage',
|
|
236
|
+
inputTokens: 0, // Only in message_start
|
|
237
|
+
outputTokens: event.usage.output_tokens || 0,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
} else if (type === 'message_start') {
|
|
241
|
+
// Usage from message_start
|
|
242
|
+
if (event.message?.usage) {
|
|
243
|
+
yield {
|
|
244
|
+
type: 'usage',
|
|
245
|
+
inputTokens: event.message.usage.input_tokens || 0,
|
|
246
|
+
outputTokens: event.message.usage.output_tokens || 0,
|
|
247
|
+
cacheReadTokens: event.message.usage.cache_read_input_tokens || 0,
|
|
248
|
+
cacheWriteTokens: event.message.usage.cache_creation_input_tokens || 0,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
} else if (type === 'error') {
|
|
252
|
+
yield {
|
|
253
|
+
type: 'error',
|
|
254
|
+
error: new Error(event.error?.message || 'Unknown streaming error'),
|
|
255
|
+
retryable: event.error?.type === 'overloaded_error',
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
} finally {
|
|
261
|
+
reader.releaseLock();
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Non-streaming call for side queries.
|
|
267
|
+
*/
|
|
268
|
+
async call({ model, system, messages, maxTokens = 4096, signal }) {
|
|
269
|
+
if (signal?.aborted) throw new LLMAbortError();
|
|
270
|
+
|
|
271
|
+
const body = {
|
|
272
|
+
model,
|
|
273
|
+
max_tokens: maxTokens,
|
|
274
|
+
system,
|
|
275
|
+
messages: this.#translateMessages(messages),
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
const response = await fetch(`${this.#baseUrl}/v1/messages`, {
|
|
279
|
+
method: 'POST',
|
|
280
|
+
headers: {
|
|
281
|
+
'Content-Type': 'application/json',
|
|
282
|
+
'x-api-key': this.#apiKey,
|
|
283
|
+
'anthropic-version': API_VERSION,
|
|
284
|
+
},
|
|
285
|
+
body: JSON.stringify(body),
|
|
286
|
+
signal,
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
if (!response.ok) {
|
|
290
|
+
const errorBody = await response.text();
|
|
291
|
+
throw this.#classifyError(response.status, errorBody);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
const result = await response.json();
|
|
295
|
+
const text = result.content
|
|
296
|
+
?.filter(b => b.type === 'text')
|
|
297
|
+
.map(b => b.text)
|
|
298
|
+
.join('') || '';
|
|
299
|
+
|
|
300
|
+
return {
|
|
301
|
+
text,
|
|
302
|
+
usage: {
|
|
303
|
+
inputTokens: result.usage?.input_tokens || 0,
|
|
304
|
+
outputTokens: result.usage?.output_tokens || 0,
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Map Anthropic stop_reason to unified format.
|
|
311
|
+
* @param {string} reason
|
|
312
|
+
* @returns {'end_turn' | 'tool_use' | 'max_tokens'}
|
|
313
|
+
*/
|
|
314
|
+
#mapStopReason(reason) {
|
|
315
|
+
switch (reason) {
|
|
316
|
+
case 'end_turn': return 'end_turn';
|
|
317
|
+
case 'tool_use': return 'tool_use';
|
|
318
|
+
case 'max_tokens': return 'max_tokens';
|
|
319
|
+
default: return 'end_turn';
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|