universal-llm-client 4.5.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +2 -0
  3. package/dist/ai-model.d.ts +0 -1
  4. package/dist/ai-model.js +0 -1
  5. package/dist/auditor.d.ts +0 -1
  6. package/dist/auditor.js +0 -1
  7. package/dist/client.d.ts +0 -1
  8. package/dist/client.js +0 -1
  9. package/dist/gemma-channel.d.ts +0 -1
  10. package/dist/gemma-channel.js +0 -1
  11. package/dist/gemma-diffusion.d.ts +0 -1
  12. package/dist/gemma-diffusion.js +0 -1
  13. package/dist/http.d.ts +0 -1
  14. package/dist/http.js +0 -1
  15. package/dist/index.d.ts +0 -1
  16. package/dist/index.js +0 -1
  17. package/dist/interfaces.d.ts +0 -1
  18. package/dist/interfaces.js +0 -1
  19. package/dist/mcp.d.ts +0 -1
  20. package/dist/mcp.js +0 -1
  21. package/dist/providers/anthropic.d.ts +0 -1
  22. package/dist/providers/anthropic.js +0 -1
  23. package/dist/providers/google.d.ts +0 -1
  24. package/dist/providers/google.js +0 -1
  25. package/dist/providers/index.d.ts +0 -1
  26. package/dist/providers/index.js +0 -1
  27. package/dist/providers/ollama.d.ts +0 -1
  28. package/dist/providers/ollama.js +0 -1
  29. package/dist/providers/openai.d.ts +2 -1
  30. package/dist/providers/openai.js +303 -74
  31. package/dist/router.d.ts +0 -1
  32. package/dist/router.js +0 -1
  33. package/dist/stream-decoder.d.ts +0 -1
  34. package/dist/stream-decoder.js +0 -1
  35. package/dist/structured-output.d.ts +0 -1
  36. package/dist/structured-output.js +0 -1
  37. package/dist/thinking.d.ts +0 -1
  38. package/dist/thinking.js +0 -1
  39. package/dist/tools.d.ts +0 -1
  40. package/dist/tools.js +0 -1
  41. package/dist/zod-adapter.d.ts +0 -1
  42. package/dist/zod-adapter.js +0 -1
  43. package/package.json +1 -2
  44. package/dist/ai-model.d.ts.map +0 -1
  45. package/dist/ai-model.js.map +0 -1
  46. package/dist/auditor.d.ts.map +0 -1
  47. package/dist/auditor.js.map +0 -1
  48. package/dist/client.d.ts.map +0 -1
  49. package/dist/client.js.map +0 -1
  50. package/dist/gemma-channel.d.ts.map +0 -1
  51. package/dist/gemma-channel.js.map +0 -1
  52. package/dist/gemma-diffusion.d.ts.map +0 -1
  53. package/dist/gemma-diffusion.js.map +0 -1
  54. package/dist/http.d.ts.map +0 -1
  55. package/dist/http.js.map +0 -1
  56. package/dist/index.d.ts.map +0 -1
  57. package/dist/index.js.map +0 -1
  58. package/dist/interfaces.d.ts.map +0 -1
  59. package/dist/interfaces.js.map +0 -1
  60. package/dist/mcp.d.ts.map +0 -1
  61. package/dist/mcp.js.map +0 -1
  62. package/dist/providers/anthropic.d.ts.map +0 -1
  63. package/dist/providers/anthropic.js.map +0 -1
  64. package/dist/providers/google.d.ts.map +0 -1
  65. package/dist/providers/google.js.map +0 -1
  66. package/dist/providers/index.d.ts.map +0 -1
  67. package/dist/providers/index.js.map +0 -1
  68. package/dist/providers/ollama.d.ts.map +0 -1
  69. package/dist/providers/ollama.js.map +0 -1
  70. package/dist/providers/openai.d.ts.map +0 -1
  71. package/dist/providers/openai.js.map +0 -1
  72. package/dist/router.d.ts.map +0 -1
  73. package/dist/router.js.map +0 -1
  74. package/dist/stream-decoder.d.ts.map +0 -1
  75. package/dist/stream-decoder.js.map +0 -1
  76. package/dist/structured-output.d.ts.map +0 -1
  77. package/dist/structured-output.js.map +0 -1
  78. package/dist/thinking.d.ts.map +0 -1
  79. package/dist/thinking.js.map +0 -1
  80. package/dist/tools.d.ts.map +0 -1
  81. package/dist/tools.js.map +0 -1
  82. package/dist/zod-adapter.d.ts.map +0 -1
  83. package/dist/zod-adapter.js.map +0 -1
  84. package/src/ai-model.ts +0 -400
  85. package/src/auditor.ts +0 -213
  86. package/src/client.ts +0 -402
  87. package/src/debug/debug-google-streaming.ts +0 -97
  88. package/src/debug/debug-tool-execution.ts +0 -86
  89. package/src/debug/test-lmstudio-tools.ts +0 -155
  90. package/src/demos/README.md +0 -47
  91. package/src/demos/basic/universal-llm-examples.ts +0 -161
  92. package/src/demos/diffusion-gemma/.env +0 -29
  93. package/src/demos/diffusion-gemma/.env.example +0 -27
  94. package/src/demos/diffusion-gemma/CLAUDE.md +0 -95
  95. package/src/demos/diffusion-gemma/README.md +0 -59
  96. package/src/demos/diffusion-gemma/canvas.ts +0 -1606
  97. package/src/demos/diffusion-gemma/docker-compose.yml +0 -29
  98. package/src/demos/diffusion-gemma/probe-stream.ts +0 -51
  99. package/src/demos/diffusion-gemma/probe-tools.ts +0 -55
  100. package/src/demos/diffusion-gemma/server.ts +0 -1205
  101. package/src/demos/diffusion-gemma/start-vllm.sh +0 -98
  102. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  103. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  104. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  105. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  106. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  107. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  108. package/src/demos/model-alias-demo.ts +0 -0
  109. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  110. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  111. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  112. package/src/demos/tools/astrid-production-memory.ts +0 -558
  113. package/src/demos/tools/basic-translation-test.ts +0 -66
  114. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  115. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  116. package/src/demos/tools/clean-translation-test.ts +0 -119
  117. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  118. package/src/demos/tools/complete-rag-demo.ts +0 -369
  119. package/src/demos/tools/complete-tool-demo.ts +0 -132
  120. package/src/demos/tools/demo-tool-calling.ts +0 -124
  121. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  122. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  123. package/src/demos/tools/memory-integration-test.ts +0 -420
  124. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  125. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  126. package/src/demos/tools/production-tool-demo.ts +0 -245
  127. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  128. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  129. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  130. package/src/demos/tools/translation-integration-guide.ts +0 -346
  131. package/src/demos/tools/universal-memory-system.ts +0 -560
  132. package/src/gemma-channel.ts +0 -47
  133. package/src/gemma-diffusion.ts +0 -167
  134. package/src/http.ts +0 -261
  135. package/src/index.ts +0 -180
  136. package/src/interfaces.ts +0 -843
  137. package/src/mcp.ts +0 -345
  138. package/src/providers/anthropic.ts +0 -796
  139. package/src/providers/google.ts +0 -840
  140. package/src/providers/index.ts +0 -8
  141. package/src/providers/ollama.ts +0 -503
  142. package/src/providers/openai.ts +0 -587
  143. package/src/router.ts +0 -785
  144. package/src/stream-decoder.ts +0 -535
  145. package/src/structured-output.ts +0 -759
  146. package/src/test-scripts/test-advanced-tools.ts +0 -310
  147. package/src/test-scripts/test-google-deep-research.ts +0 -33
  148. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  149. package/src/test-scripts/test-google-streaming.ts +0 -63
  150. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  151. package/src/test-scripts/test-google-thinking.ts +0 -46
  152. package/src/test-scripts/test-mcp-config.ts +0 -28
  153. package/src/test-scripts/test-mcp-connection.ts +0 -29
  154. package/src/test-scripts/test-system-message-positions.ts +0 -163
  155. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  156. package/src/test-scripts/test-tool-calling.ts +0 -231
  157. package/src/test-scripts/test-vllm-qwen36.ts +0 -256
  158. package/src/tests/ai-model.test.ts +0 -1614
  159. package/src/tests/auditor.test.ts +0 -224
  160. package/src/tests/gemma-diffusion.test.ts +0 -115
  161. package/src/tests/http.test.ts +0 -200
  162. package/src/tests/interfaces.test.ts +0 -117
  163. package/src/tests/providers/anthropic.test.ts +0 -118
  164. package/src/tests/providers/google.test.ts +0 -841
  165. package/src/tests/providers/ollama.test.ts +0 -1034
  166. package/src/tests/providers/openai.test.ts +0 -1511
  167. package/src/tests/router.test.ts +0 -254
  168. package/src/tests/stream-decoder.test.ts +0 -263
  169. package/src/tests/structured-output.test.ts +0 -1450
  170. package/src/tests/thinking.test.ts +0 -65
  171. package/src/tests/tools.test.ts +0 -175
  172. package/src/thinking.ts +0 -73
  173. package/src/tools.ts +0 -246
  174. package/src/zod-adapter.ts +0 -72
@@ -1,796 +0,0 @@
1
- /**
2
- * Universal LLM Client v3 — Anthropic Messages API Provider
3
- *
4
- * Implements BaseLLMClient for Anthropic's Messages API (Claude).
5
- * Uses the custom Anthropic protocol — NOT OpenAI-compatible.
6
- *
7
- * Key differences from OpenAI:
8
- * - Endpoint: POST /v1/messages (not /chat/completions)
9
- * - Auth: x-api-key header (not Authorization: Bearer)
10
- * - System prompt: top-level `system` field, not a message
11
- * - Messages: content is always an array of content blocks
12
- * - Tool calls: `tool_use` content blocks (not tool_calls array)
13
- * - Tool results: `tool_result` content blocks in user messages
14
- * - Streaming: content_block_start/delta/stop events with typed deltas
15
- */
16
-
17
- import { BaseLLMClient } from '../client.js';
18
- import { resolveThinking, anthropicThinkingBudget } from '../thinking.js';
19
- import { httpRequest, httpStream, parseSSE } from '../http.js';
20
- import { StandardChatDecoder } from '../stream-decoder.js';
21
- import type {
22
- LLMClientOptions,
23
- LLMChatMessage,
24
- LLMChatResponse,
25
- LLMToolCall,
26
- LLMToolDefinition,
27
- ChatOptions,
28
- TokenUsageInfo,
29
- ModelMetadata,
30
- LLMContentPart,
31
- LLMMessageContent,
32
- } from '../interfaces.js';
33
- import type { DecodedEvent } from '../stream-decoder.js';
34
- import type { Auditor } from '../auditor.js';
35
-
36
- // ============================================================================
37
- // Anthropic-Specific Types
38
- // ============================================================================
39
-
40
- /** Anthropic content block types */
41
- interface AnthropicTextBlock {
42
- readonly type: 'text';
43
- readonly text: string;
44
- }
45
-
46
- interface AnthropicImageBlock {
47
- readonly type: 'image';
48
- readonly source: {
49
- readonly type: 'base64' | 'url';
50
- readonly media_type?: string;
51
- readonly data?: string;
52
- readonly url?: string;
53
- };
54
- }
55
-
56
- interface AnthropicToolUseBlock {
57
- readonly type: 'tool_use';
58
- readonly id: string;
59
- readonly name: string;
60
- readonly input: Record<string, unknown>;
61
- }
62
-
63
- interface AnthropicToolResultBlock {
64
- readonly type: 'tool_result';
65
- readonly tool_call_id: string;
66
- readonly content: string | AnthropicTextBlock[];
67
- }
68
-
69
- interface AnthropicThinkingBlock {
70
- readonly type: 'thinking';
71
- readonly thinking: string;
72
- readonly signature: string;
73
- }
74
-
75
- type AnthropicContentBlock =
76
- | AnthropicTextBlock
77
- | AnthropicImageBlock
78
- | AnthropicToolUseBlock
79
- | AnthropicToolResultBlock
80
- | AnthropicThinkingBlock;
81
-
82
- /** Anthropic message format */
83
- interface AnthropicMessage {
84
- readonly role: 'user' | 'assistant';
85
- readonly content: string | AnthropicContentBlock[];
86
- }
87
-
88
- /** Anthropic tool definition (uses input_schema, not parameters) */
89
- interface AnthropicToolDef {
90
- readonly name: string;
91
- readonly description: string;
92
- readonly input_schema: {
93
- readonly type: 'object';
94
- readonly properties?: Record<string, unknown>;
95
- readonly required?: string[];
96
- };
97
- }
98
-
99
- /** Anthropic request body */
100
- interface AnthropicRequest {
101
- readonly model: string;
102
- readonly messages: AnthropicMessage[];
103
- readonly max_tokens: number;
104
- readonly system?: string | Array<{
105
- type: 'text';
106
- text: string;
107
- cache_control?: { type: 'ephemeral' };
108
- }>;
109
- readonly tools?: AnthropicToolDef[];
110
- readonly tool_choice?: { readonly type: 'auto' | 'any' | 'tool'; readonly name?: string };
111
- readonly stream?: boolean;
112
- readonly temperature?: number;
113
- readonly thinking?:
114
- | { readonly type: 'enabled'; readonly budget_tokens: number }
115
- | { readonly type: 'disabled' };
116
- }
117
-
118
- /** Anthropic non-streaming response */
119
- interface AnthropicResponse {
120
- readonly id: string;
121
- readonly type: 'message';
122
- readonly role: 'assistant';
123
- readonly content: AnthropicContentBlock[];
124
- readonly model: string;
125
- readonly stop_reason: 'end_turn' | 'max_tokens' | 'stop_sequence' | 'tool_use' | null;
126
- readonly usage: {
127
- readonly input_tokens: number;
128
- readonly output_tokens: number;
129
- };
130
- }
131
-
132
- /** Anthropic model from models list */
133
- interface AnthropicModelInfo {
134
- readonly id: string;
135
- readonly display_name: string;
136
- readonly created_at: string;
137
- readonly type: 'model';
138
- }
139
-
140
- // ============================================================================
141
- // Streaming Event Types
142
- // ============================================================================
143
-
144
- interface StreamMessageStart {
145
- readonly type: 'message_start';
146
- readonly message: AnthropicResponse;
147
- }
148
-
149
- interface StreamContentBlockStart {
150
- readonly type: 'content_block_start';
151
- readonly index: number;
152
- readonly content_block: AnthropicContentBlock;
153
- }
154
-
155
- interface StreamContentBlockDelta {
156
- readonly type: 'content_block_delta';
157
- readonly index: number;
158
- readonly delta:
159
- | { readonly type: 'text_delta'; readonly text: string }
160
- | { readonly type: 'input_json_delta'; readonly partial_json: string }
161
- | { readonly type: 'thinking_delta'; readonly thinking: string }
162
- | { readonly type: 'signature_delta'; readonly signature: string };
163
- }
164
-
165
- interface StreamContentBlockStop {
166
- readonly type: 'content_block_stop';
167
- readonly index: number;
168
- }
169
-
170
- interface StreamMessageDelta {
171
- readonly type: 'message_delta';
172
- readonly delta: {
173
- readonly stop_reason: string | null;
174
- readonly stop_sequence?: string | null;
175
- };
176
- readonly usage: {
177
- readonly output_tokens: number;
178
- };
179
- }
180
-
181
- interface StreamMessageStop {
182
- readonly type: 'message_stop';
183
- }
184
-
185
- type AnthropicStreamEvent =
186
- | StreamMessageStart
187
- | StreamContentBlockStart
188
- | StreamContentBlockDelta
189
- | StreamContentBlockStop
190
- | StreamMessageDelta
191
- | StreamMessageStop
192
- | { readonly type: 'ping' }
193
- | { readonly type: 'error'; readonly error: { readonly type: string; readonly message: string } };
194
-
195
- // ============================================================================
196
- // Anthropic Client
197
- // ============================================================================
198
-
199
- export class AnthropicClient extends BaseLLMClient {
200
- private readonly baseUrl: string;
201
-
202
- constructor(options: LLMClientOptions, auditor?: Auditor) {
203
- const url = (options.url || 'https://api.anthropic.com').replace(/\/+$/, '');
204
- super({ ...options, url }, auditor);
205
- this.baseUrl = url;
206
- }
207
-
208
- // ========================================================================
209
- // Headers
210
- // ========================================================================
211
-
212
- private buildAnthropicHeaders(): Record<string, string> {
213
- const headers: Record<string, string> = {
214
- 'Content-Type': 'application/json',
215
- 'anthropic-version': '2023-06-01',
216
- };
217
- if (this.options.apiKey) {
218
- headers['x-api-key'] = this.options.apiKey;
219
- }
220
- return headers;
221
- }
222
-
223
- // ========================================================================
224
- // Chat (non-streaming)
225
- // ========================================================================
226
-
227
- override async chat(
228
- messages: LLMChatMessage[],
229
- options?: ChatOptions,
230
- ): Promise<LLMChatResponse> {
231
- const url = `${this.baseUrl}/v1/messages`;
232
- const body = this.buildRequestBody(messages, options, false);
233
-
234
- const start = Date.now();
235
- this.auditor.record({
236
- timestamp: start,
237
- type: 'request',
238
- provider: 'anthropic',
239
- model: this.options.model,
240
- });
241
-
242
- const response = await httpRequest<AnthropicResponse>(url, {
243
- method: 'POST',
244
- headers: this.buildAnthropicHeaders(),
245
- body,
246
- timeout: this.options.timeout ?? 60000,
247
- });
248
-
249
- const data = response.data;
250
- const result = this.parseAnthropicResponse(data);
251
-
252
- this.auditor.record({
253
- timestamp: Date.now(),
254
- type: 'response',
255
- provider: 'anthropic',
256
- model: this.options.model,
257
- duration: Date.now() - start,
258
- usage: result.usage,
259
- });
260
-
261
- return result;
262
- }
263
-
264
- // ========================================================================
265
- // Streaming
266
- // ========================================================================
267
-
268
- override async *chatStream(
269
- messages: LLMChatMessage[],
270
- options?: ChatOptions,
271
- ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
272
- const url = `${this.baseUrl}/v1/messages`;
273
- const body = this.buildRequestBody(messages, options, true);
274
-
275
- const start = Date.now();
276
- this.auditor.record({
277
- timestamp: start,
278
- type: 'stream_start',
279
- provider: 'anthropic',
280
- model: this.options.model,
281
- });
282
-
283
- const decoder = new StandardChatDecoder(() => {});
284
-
285
- // Track content blocks as they stream in
286
- const contentBlocks: Map<number, {
287
- type: string;
288
- text: string;
289
- toolId?: string;
290
- toolName?: string;
291
- inputJson?: string;
292
- thinking?: string;
293
- signature?: string;
294
- }> = new Map();
295
-
296
- let usage: TokenUsageInfo | undefined;
297
- let inputTokens = 0;
298
-
299
- const stream = httpStream(url, {
300
- method: 'POST',
301
- headers: this.buildAnthropicHeaders(),
302
- body,
303
- timeout: this.options.timeout ?? 120000,
304
- });
305
-
306
- for await (const { data } of parseSSE(stream)) {
307
- try {
308
- const event = JSON.parse(data) as AnthropicStreamEvent;
309
-
310
- switch (event.type) {
311
- case 'message_start': {
312
- inputTokens = event.message.usage?.input_tokens ?? 0;
313
- break;
314
- }
315
-
316
- case 'content_block_start': {
317
- const block = event.content_block;
318
- if (block.type === 'text') {
319
- contentBlocks.set(event.index, { type: 'text', text: '' });
320
- } else if (block.type === 'tool_use') {
321
- contentBlocks.set(event.index, {
322
- type: 'tool_use',
323
- text: '',
324
- toolId: block.id,
325
- toolName: block.name,
326
- inputJson: '',
327
- });
328
- } else if (block.type === 'thinking') {
329
- contentBlocks.set(event.index, { type: 'thinking', text: '', thinking: '' });
330
- }
331
- break;
332
- }
333
-
334
- case 'content_block_delta': {
335
- const block = contentBlocks.get(event.index);
336
- if (!block) break;
337
-
338
- if (event.delta.type === 'text_delta') {
339
- block.text += event.delta.text;
340
- decoder.push(event.delta.text);
341
- yield { type: 'text', content: event.delta.text };
342
- } else if (event.delta.type === 'input_json_delta') {
343
- block.inputJson = (block.inputJson ?? '') + event.delta.partial_json;
344
- } else if (event.delta.type === 'thinking_delta') {
345
- block.thinking = (block.thinking ?? '') + event.delta.thinking;
346
- decoder.pushReasoning(event.delta.thinking);
347
- yield { type: 'thinking', content: event.delta.thinking };
348
- } else if (event.delta.type === 'signature_delta') {
349
- block.signature = event.delta.signature;
350
- }
351
- break;
352
- }
353
-
354
- case 'content_block_stop': {
355
- const block = contentBlocks.get(event.index);
356
- if (block?.type === 'tool_use' && block.toolId && block.toolName) {
357
- // Parse accumulated JSON and emit tool call
358
- const toolCall: LLMToolCall = {
359
- id: block.toolId,
360
- type: 'function',
361
- function: {
362
- name: block.toolName,
363
- arguments: block.inputJson ?? '{}',
364
- },
365
- };
366
- yield { type: 'tool_call', calls: [toolCall] };
367
- }
368
- break;
369
- }
370
-
371
- case 'message_delta': {
372
- const outputTokens = event.usage?.output_tokens ?? 0;
373
- usage = {
374
- inputTokens,
375
- outputTokens,
376
- totalTokens: inputTokens + outputTokens,
377
- };
378
- break;
379
- }
380
-
381
- case 'error': {
382
- throw new Error(`Anthropic stream error: ${event.error.type} — ${event.error.message}`);
383
- }
384
- }
385
- } catch (e) {
386
- if (e instanceof Error && e.message.startsWith('Anthropic stream error')) {
387
- throw e;
388
- }
389
- // Skip unparseable SSE data
390
- }
391
- }
392
-
393
- decoder.flush();
394
-
395
- this.auditor.record({
396
- timestamp: Date.now(),
397
- type: 'stream_end',
398
- provider: 'anthropic',
399
- model: this.options.model,
400
- duration: Date.now() - start,
401
- usage,
402
- });
403
-
404
- // Build final tool calls from accumulated content blocks
405
- const toolCalls: LLMToolCall[] = [];
406
- for (const block of contentBlocks.values()) {
407
- if (block.type === 'tool_use' && block.toolId && block.toolName) {
408
- toolCalls.push({
409
- id: block.toolId,
410
- type: 'function',
411
- function: {
412
- name: block.toolName,
413
- arguments: block.inputJson ?? '{}',
414
- },
415
- });
416
- }
417
- }
418
-
419
- return {
420
- message: {
421
- role: 'assistant',
422
- content: decoder.getCleanContent(),
423
- tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
424
- },
425
- reasoning: decoder.getReasoning(),
426
- usage,
427
- provider: 'anthropic',
428
- };
429
- }
430
-
431
- // ========================================================================
432
- // Embeddings (not supported by Anthropic)
433
- // ========================================================================
434
-
435
- override async embed(_text: string): Promise<number[]> {
436
- throw new Error('Anthropic does not support embeddings. Use a different provider.');
437
- }
438
-
439
- // ========================================================================
440
- // Model Discovery
441
- // ========================================================================
442
-
443
- override async getModels(): Promise<string[]> {
444
- const url = `${this.baseUrl}/v1/models`;
445
- try {
446
- const response = await httpRequest<{
447
- data: AnthropicModelInfo[];
448
- }>(url, {
449
- headers: this.buildAnthropicHeaders(),
450
- timeout: 5000,
451
- });
452
- return response.data.data.map(m => m.id);
453
- } catch {
454
- // Fallback: return well-known Claude models
455
- return [
456
- 'claude-sonnet-4-20250514',
457
- 'claude-haiku-4-20250514',
458
- 'claude-opus-4-20250514',
459
- ];
460
- }
461
- }
462
-
463
- override async getModelInfo(_modelName?: string): Promise<ModelMetadata> {
464
- // Claude models support large context windows
465
- const model = _modelName ?? this.options.model;
466
-
467
- // Claude 4 models have 200K context
468
- if (model.includes('claude-4') || model.includes('claude-opus') ||
469
- model.includes('claude-sonnet') || model.includes('claude-haiku')) {
470
- return {
471
- model,
472
- contextLength: 200_000,
473
- capabilities: ['tools', 'vision', 'thinking'],
474
- };
475
- }
476
-
477
- return {
478
- model,
479
- contextLength: 200_000,
480
- capabilities: ['tools', 'vision'],
481
- };
482
- }
483
-
484
- // ========================================================================
485
- // Internal: Request Building
486
- // ========================================================================
487
-
488
- private buildRequestBody(
489
- messages: LLMChatMessage[],
490
- options: ChatOptions | undefined,
491
- stream: boolean,
492
- ): AnthropicRequest {
493
- // Extract system prompt from messages
494
- const systemMessages = messages.filter(m => m.role === 'system');
495
- const nonSystemMessages = messages.filter(m => m.role !== 'system');
496
-
497
- const systemPrompt = systemMessages.length > 0
498
- ? systemMessages
499
- .map(m => typeof m.content === 'string' ? m.content : this.extractText(m.content))
500
- .join('\n\n')
501
- : undefined;
502
-
503
- // Prompt caching support (Anthropic-specific, high impact for long system prompts / RAG)
504
- let system: AnthropicRequest['system'] = systemPrompt;
505
- if (options?.enablePromptCaching && systemPrompt) {
506
- system = [
507
- {
508
- type: 'text',
509
- text: systemPrompt,
510
- cache_control: { type: 'ephemeral' },
511
- },
512
- ];
513
- }
514
-
515
- // Convert tools from OpenAI format to Anthropic format
516
- const tools = options?.tools ?? (
517
- Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined
518
- );
519
- const anthropicTools = tools?.map(t => this.convertToolDef(t));
520
-
521
- // Map tool_choice
522
- let toolChoice: AnthropicRequest['tool_choice'];
523
- if (options?.toolChoice === 'required') {
524
- toolChoice = { type: 'any' };
525
- } else if (options?.toolChoice === 'none') {
526
- toolChoice = { type: 'auto' }; // Anthropic doesn't have 'none', closest is 'auto'
527
- } else if (options?.toolChoice === 'auto') {
528
- toolChoice = { type: 'auto' };
529
- }
530
-
531
- // Unified thinking flag → Anthropic extended thinking. Per-call overrides
532
- // model config; the level sets `budget_tokens` (kept < max_tokens). The
533
- // API forbids a custom temperature while thinking is enabled, so it is
534
- // omitted in that case (the required default of 1 applies).
535
- const thinking = resolveThinking(options?.thinking, this.options.thinking);
536
- const thinkingOn = thinking?.enabled === true;
537
- const requestedMax = options?.maxTokens ?? 4096;
538
- // Extended thinking requires budget_tokens >= 1024 AND < max_tokens, so when
539
- // thinking is on we bump max_tokens to guarantee headroom for the answer.
540
- const budget = thinkingOn ? anthropicThinkingBudget(thinking?.level, requestedMax) : 0;
541
- const maxTokens = thinkingOn ? Math.max(requestedMax, budget + 1024) : requestedMax;
542
-
543
- const body: AnthropicRequest = {
544
- model: this.options.model,
545
- messages: this.convertMessages(nonSystemMessages),
546
- max_tokens: maxTokens,
547
- ...(system && { system }),
548
- ...(anthropicTools?.length && { tools: anthropicTools }),
549
- ...(toolChoice && { tool_choice: toolChoice }),
550
- ...(stream && { stream: true }),
551
- ...(thinkingOn
552
- ? { thinking: { type: 'enabled' as const, budget_tokens: budget } }
553
- : (options?.temperature !== undefined && { temperature: options.temperature })),
554
- };
555
-
556
- return body;
557
- }
558
-
559
- // ========================================================================
560
- // Internal: Message Conversion
561
- // ========================================================================
562
-
563
- /**
564
- * Convert our canonical LLMChatMessage[] to Anthropic's message format.
565
- * Key conversions:
566
- * - 'tool' role messages → merged into preceding user message as tool_result blocks
567
- * - assistant messages with tool_calls → assistant message with tool_use blocks
568
- * - multimodal content → Anthropic image blocks
569
- */
570
- private convertMessages(messages: LLMChatMessage[]): AnthropicMessage[] {
571
- const result: AnthropicMessage[] = [];
572
-
573
- for (let i = 0; i < messages.length; i++) {
574
- const msg = messages[i]!;
575
-
576
- if (msg.role === 'assistant') {
577
- // Build content blocks for assistant
578
- const blocks: AnthropicContentBlock[] = [];
579
-
580
- // Add text content if present
581
- const text = typeof msg.content === 'string'
582
- ? msg.content
583
- : this.extractText(msg.content);
584
- if (text) {
585
- blocks.push({ type: 'text', text });
586
- }
587
-
588
- // Convert tool_calls to tool_use blocks
589
- if (msg.tool_calls) {
590
- for (const tc of msg.tool_calls) {
591
- let input: Record<string, unknown> = {};
592
- try {
593
- input = JSON.parse(tc.function.arguments);
594
- } catch {
595
- // Keep empty object if parse fails
596
- }
597
- blocks.push({
598
- type: 'tool_use',
599
- id: tc.id,
600
- name: tc.function.name,
601
- input,
602
- });
603
- }
604
- }
605
-
606
- if (blocks.length > 0) {
607
- result.push({ role: 'assistant', content: blocks });
608
- }
609
- } else if (msg.role === 'tool') {
610
- // Anthropic needs tool results inside user messages
611
- const toolResultBlock: AnthropicToolResultBlock = {
612
- type: 'tool_result',
613
- tool_call_id: msg.tool_call_id ?? '',
614
- content: typeof msg.content === 'string'
615
- ? msg.content
616
- : this.extractText(msg.content),
617
- };
618
-
619
- // Collect consecutive tool results
620
- const toolResults: AnthropicContentBlock[] = [toolResultBlock];
621
- while (i + 1 < messages.length && messages[i + 1]!.role === 'tool') {
622
- i++;
623
- const nextMsg = messages[i]!;
624
- toolResults.push({
625
- type: 'tool_result',
626
- tool_call_id: nextMsg.tool_call_id ?? '',
627
- content: typeof nextMsg.content === 'string'
628
- ? nextMsg.content
629
- : this.extractText(nextMsg.content),
630
- });
631
- }
632
-
633
- result.push({ role: 'user', content: toolResults });
634
- } else if (msg.role === 'user') {
635
- const blocks = this.convertUserContent(msg.content);
636
- result.push({ role: 'user', content: blocks });
637
- }
638
- }
639
-
640
- // Anthropic requires alternating user/assistant messages.
641
- // Merge consecutive same-role messages if needed.
642
- return this.ensureAlternating(result);
643
- }
644
-
645
- /**
646
- * Convert user message content (string or multimodal) to Anthropic blocks.
647
- */
648
- private convertUserContent(content: LLMMessageContent): AnthropicContentBlock[] {
649
- if (typeof content === 'string') {
650
- return [{ type: 'text', text: content }];
651
- }
652
-
653
- const blocks: AnthropicContentBlock[] = [];
654
- for (const part of content as LLMContentPart[]) {
655
- if (part.type === 'text') {
656
- blocks.push({ type: 'text', text: part.text });
657
- } else if (part.type === 'audio') {
658
- // Anthropic does not yet support audio input — skip silently
659
- this.debugLog('[Anthropic] Audio content dropped — not supported');
660
- } else if (part.type === 'image_url') {
661
- const url = part.image_url.url;
662
- if (url.startsWith('data:')) {
663
- // Extract base64 data from data URI
664
- const match = url.match(/^data:([^;]+);base64,(.+)$/);
665
- if (match) {
666
- blocks.push({
667
- type: 'image',
668
- source: {
669
- type: 'base64',
670
- media_type: match[1],
671
- data: match[2],
672
- },
673
- });
674
- }
675
- } else {
676
- // URL-based image
677
- blocks.push({
678
- type: 'image',
679
- source: {
680
- type: 'url',
681
- url,
682
- },
683
- });
684
- }
685
- }
686
- }
687
- return blocks.length > 0 ? blocks : [{ type: 'text', text: '' }];
688
- }
689
-
690
- /**
691
- * Ensure messages alternate between user and assistant roles.
692
- * Anthropic requires strict alternation. Merge consecutive same-role messages.
693
- */
694
- private ensureAlternating(messages: AnthropicMessage[]): AnthropicMessage[] {
695
- if (messages.length <= 1) return messages;
696
-
697
- const merged: AnthropicMessage[] = [messages[0]!];
698
-
699
- for (let i = 1; i < messages.length; i++) {
700
- const current = messages[i]!;
701
- const last = merged[merged.length - 1]!;
702
-
703
- if (current.role === last.role) {
704
- // Merge content arrays
705
- const lastContent = Array.isArray(last.content)
706
- ? last.content
707
- : [{ type: 'text' as const, text: last.content }];
708
- const currentContent = Array.isArray(current.content)
709
- ? current.content
710
- : [{ type: 'text' as const, text: current.content }];
711
-
712
- merged[merged.length - 1] = {
713
- role: current.role,
714
- content: [...lastContent, ...currentContent],
715
- };
716
- } else {
717
- merged.push(current);
718
- }
719
- }
720
-
721
- return merged;
722
- }
723
-
724
- // ========================================================================
725
- // Internal: Response Parsing
726
- // ========================================================================
727
-
728
- /**
729
- * Parse Anthropic's response format into our canonical LLMChatResponse.
730
- */
731
- private parseAnthropicResponse(data: AnthropicResponse): LLMChatResponse {
732
- let textContent = '';
733
- let reasoning: string | undefined;
734
- const toolCalls: LLMToolCall[] = [];
735
-
736
- for (const block of data.content) {
737
- if (block.type === 'text') {
738
- textContent += block.text;
739
- } else if (block.type === 'tool_use') {
740
- toolCalls.push({
741
- id: block.id,
742
- type: 'function',
743
- function: {
744
- name: block.name,
745
- arguments: JSON.stringify(block.input),
746
- },
747
- });
748
- } else if (block.type === 'thinking') {
749
- reasoning = (reasoning ?? '') + block.thinking;
750
- }
751
- }
752
-
753
- const usage: TokenUsageInfo = {
754
- inputTokens: data.usage.input_tokens,
755
- outputTokens: data.usage.output_tokens,
756
- totalTokens: data.usage.input_tokens + data.usage.output_tokens,
757
- };
758
-
759
- return {
760
- message: {
761
- role: 'assistant',
762
- content: textContent,
763
- tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
764
- },
765
- reasoning,
766
- usage,
767
- provider: 'anthropic',
768
- };
769
- }
770
-
771
- // ========================================================================
772
- // Internal: Helpers
773
- // ========================================================================
774
-
775
- /** Convert OpenAI-format tool definition to Anthropic format */
776
- private convertToolDef(tool: LLMToolDefinition): AnthropicToolDef {
777
- return {
778
- name: tool.function.name,
779
- description: tool.function.description,
780
- input_schema: {
781
- type: 'object',
782
- properties: tool.function.parameters.properties,
783
- required: tool.function.parameters.required,
784
- },
785
- };
786
- }
787
-
788
- /** Extract text from multimodal content */
789
- private extractText(content: LLMMessageContent): string {
790
- if (typeof content === 'string') return content;
791
- return (content as LLMContentPart[])
792
- .filter((p): p is { type: 'text'; text: string } => p.type === 'text')
793
- .map(p => p.text)
794
- .join('');
795
- }
796
- }