@llumiverse/drivers 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/lib/cjs/anthropic/index.js +64 -0
  2. package/lib/cjs/anthropic/index.js.map +1 -0
  3. package/lib/cjs/index.js +1 -0
  4. package/lib/cjs/index.js.map +1 -1
  5. package/lib/cjs/openai/index.js +12 -6
  6. package/lib/cjs/openai/index.js.map +1 -1
  7. package/lib/cjs/shared/claude-messages.js +737 -0
  8. package/lib/cjs/shared/claude-messages.js.map +1 -0
  9. package/lib/cjs/vertexai/index.js.map +1 -1
  10. package/lib/cjs/vertexai/models/claude.js +27 -872
  11. package/lib/cjs/vertexai/models/claude.js.map +1 -1
  12. package/lib/cjs/vertexai/models/gemini.js +18 -12
  13. package/lib/cjs/vertexai/models/gemini.js.map +1 -1
  14. package/lib/esm/anthropic/index.js +57 -0
  15. package/lib/esm/anthropic/index.js.map +1 -0
  16. package/lib/esm/index.js +1 -0
  17. package/lib/esm/index.js.map +1 -1
  18. package/lib/esm/openai/index.js +12 -7
  19. package/lib/esm/openai/index.js.map +1 -1
  20. package/lib/esm/shared/claude-messages.js +716 -0
  21. package/lib/esm/shared/claude-messages.js.map +1 -0
  22. package/lib/esm/vertexai/index.js.map +1 -1
  23. package/lib/esm/vertexai/models/claude.js +27 -865
  24. package/lib/esm/vertexai/models/claude.js.map +1 -1
  25. package/lib/esm/vertexai/models/gemini.js +18 -12
  26. package/lib/esm/vertexai/models/gemini.js.map +1 -1
  27. package/lib/types/anthropic/index.d.ts +21 -0
  28. package/lib/types/anthropic/index.d.ts.map +1 -0
  29. package/lib/types/index.d.ts +1 -0
  30. package/lib/types/index.d.ts.map +1 -1
  31. package/lib/types/openai/index.d.ts +1 -0
  32. package/lib/types/openai/index.d.ts.map +1 -1
  33. package/lib/types/shared/claude-messages.d.ts +75 -0
  34. package/lib/types/shared/claude-messages.d.ts.map +1 -0
  35. package/lib/types/vertexai/index.d.ts +4 -4
  36. package/lib/types/vertexai/index.d.ts.map +1 -1
  37. package/lib/types/vertexai/models/claude.d.ts +3 -106
  38. package/lib/types/vertexai/models/claude.d.ts.map +1 -1
  39. package/lib/types/vertexai/models/gemini.d.ts +1 -1
  40. package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
  41. package/package.json +7 -6
  42. package/src/anthropic/index.ts +104 -0
  43. package/src/index.ts +1 -0
  44. package/src/openai/index.ts +13 -8
  45. package/src/shared/claude-messages.ts +879 -0
  46. package/src/vertexai/index.ts +18 -19
  47. package/src/vertexai/models/claude-error-handling.test.ts +3 -3
  48. package/src/vertexai/models/claude.ts +44 -1016
  49. package/src/vertexai/models/gemini.ts +27 -14
@@ -1,36 +1,20 @@
1
1
  import {
2
- APIConnectionError,
3
- APIConnectionTimeoutError,
4
- APIError,
5
- AuthenticationError,
6
- BadRequestError,
7
- ConflictError,
8
- InternalServerError,
9
- NotFoundError,
10
- PermissionDeniedError,
11
- RateLimitError,
12
- UnprocessableEntityError,
13
- } from '@anthropic-ai/sdk/error';
14
- import type { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, Message, MessageParam, TextBlockParam, ToolResultBlockParam } from "@anthropic-ai/sdk/resources/index.js";
15
- import type { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
16
- import type { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
17
- import {
18
- type AIModel, type Completion, type CompletionChunkObject, type ExecutionOptions, type ExecutionTokenUsage,
19
- getConversationMeta,
20
- getMaxTokensLimitVertexAi,
21
- incrementConversationTurn,
22
- type JSONObject,
23
- LlumiverseError, type LlumiverseErrorContext,
2
+ type AIModel, type Completion, type CompletionChunkObject, type ExecutionOptions, type LlumiverseError,
3
+ type LlumiverseErrorContext,
24
4
  ModelType,
25
- PromptRole, type PromptSegment, readStreamAsBase64, readStreamAsString, type StatelessExecutionOptions,
26
- stripBase64ImagesFromConversation,
27
- stripHeartbeatsFromConversation,
28
- type ToolUse,
29
- truncateLargeTextInConversation,
5
+ type PromptSegment,
30
6
  type VertexAIClaudeOptions,
31
7
  } from "@llumiverse/core";
32
- import { asyncMap } from "@llumiverse/core/async";
33
- import { resolveClaudeThinking } from "../../shared/claude-thinking.js";
8
+ import type { ClaudePrompt } from "../../shared/claude-messages.js";
9
+ import {
10
+ executeClaudeCompletion,
11
+ formatAnthropicLlumiverseError,
12
+ formatClaudePrompt,
13
+ isClaudeErrorRetryable,
14
+ streamClaudeCompletion,
15
+ } from "../../shared/claude-messages.js";
16
+
17
+
34
18
  import type { VertexAIDriver } from "../index.js";
35
19
  import type { ModelDefinition } from "../models.js";
36
20
 
@@ -45,146 +29,18 @@ export const NON_GLOBAL_ANTHROPIC_MODELS = [
45
29
  "claude-3",
46
30
  ];
47
31
 
48
- interface ClaudePrompt {
49
- messages: MessageParam[];
50
- system?: TextBlockParam[];
51
- }
52
-
53
- interface AnthropicUsageLike {
54
- input_tokens: number;
55
- output_tokens: number;
56
- cache_read_input_tokens?: number | null;
57
- cache_creation_input_tokens?: number | null;
58
- }
59
-
60
- function anthropicUsageToTokenUsage(usage: AnthropicUsageLike): ExecutionTokenUsage {
61
- const cacheRead = usage.cache_read_input_tokens ?? 0;
62
- const cacheWrite = usage.cache_creation_input_tokens ?? 0;
63
- return {
64
- prompt_new: usage.input_tokens,
65
- prompt: usage.input_tokens + cacheRead + cacheWrite,
66
- result: usage.output_tokens,
67
- total: usage.input_tokens + usage.output_tokens + cacheRead + cacheWrite,
68
- prompt_cached: usage.cache_read_input_tokens ?? undefined,
69
- prompt_cache_write: usage.cache_creation_input_tokens ?? undefined,
70
- };
71
- }
72
-
73
- function claudeFinishReason(reason: string | undefined) {
74
- if (!reason) return undefined;
75
- switch (reason) {
76
- case 'end_turn': return "stop";
77
- case 'max_tokens': return "length";
78
- default: return reason; //stop_sequence
79
- }
80
- }
81
-
82
- export function collectTools(content: ContentBlock[]): ToolUse[] | undefined {
83
- const out: ToolUse[] = [];
84
-
85
- for (const block of content) {
86
- if (block.type === "tool_use") {
87
- out.push({
88
- id: block.id,
89
- tool_name: block.name,
90
- tool_input: block.input as JSONObject,
91
- });
92
- }
93
- }
94
-
95
- return out.length > 0 ? out : undefined;
96
- }
97
-
98
- function collectAllTextContent(content: ContentBlock[], includeThoughts: boolean = false) {
99
- const textParts: string[] = [];
100
-
101
- // First pass: collect thinking blocks
102
- if (includeThoughts) {
103
- for (const block of content) {
104
- if (block.type === 'thinking' && block.thinking) {
105
- textParts.push(block.thinking);
106
- } else if (block.type === 'redacted_thinking' && block.data) {
107
- textParts.push(`[Redacted thinking: ${block.data}]`);
108
- }
109
- }
110
- if (textParts.length > 0) {
111
- textParts.push(''); // Create a new line after thinking blocks
112
- }
113
- }
114
-
115
- // Second pass: collect text blocks
116
- for (const block of content) {
117
- if (block.type === 'text' && block.text) {
118
- textParts.push(block.text);
119
- }
120
- }
121
-
122
- return textParts.join('\n');
123
- }
124
-
125
- //Used to get a max_token value when not specified in the model options. Claude requires it to be set.
126
- function maxToken(option: StatelessExecutionOptions): number {
127
- const modelOptions = option.model_options as VertexAIClaudeOptions | undefined;
128
- if (modelOptions && typeof modelOptions.max_tokens === "number") {
129
- return modelOptions.max_tokens;
130
- } else {
131
- let maxSupportedTokens = getMaxTokensLimitVertexAi(option.model);
132
- // Fallback to the default max tokens limit for the model
133
- if (option.model.includes('claude-3-7-sonnet') && (modelOptions?.thinking_budget_tokens ?? 0) < 48000) {
134
- maxSupportedTokens = 64000; // Claude 3.7 can go up to 128k with a beta header, but when no max tokens is specified, we default to 64k.
135
- }
136
- return maxSupportedTokens;
137
- }
138
- }
139
-
140
- // Type-safe overloads for collectFileBlocks
141
- async function collectFileBlocks(segment: PromptSegment, restrictedTypes: true): Promise<Array<TextBlockParam | ImageBlockParam>>;
142
- async function collectFileBlocks(segment: PromptSegment, restrictedTypes?: false): Promise<ContentBlockParam[]>;
143
- async function collectFileBlocks(segment: PromptSegment, restrictedTypes: boolean = false): Promise<ContentBlockParam[]> {
144
- const contentBlocks: ContentBlockParam[] = [];
145
-
146
- for (const file of segment.files || []) {
147
- if (file.mime_type?.startsWith("image/")) {
148
- const allowedTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"];
149
- if (!allowedTypes.includes(file.mime_type)) {
150
- throw new Error(`Unsupported image type: ${file.mime_type}`);
151
- }
152
- const mimeType = String(file.mime_type) as "image/png" | "image/jpeg" | "image/gif" | "image/webp";
153
-
154
- contentBlocks.push({
155
- type: 'image',
156
- source: {
157
- type: 'base64',
158
- data: await readStreamAsBase64(await file.getStream()),
159
- media_type: mimeType
160
- }
161
- } satisfies ImageBlockParam);
162
- } else if (!restrictedTypes) {
163
- if (file.mime_type === "application/pdf") {
164
- contentBlocks.push({
165
- title: file.name,
166
- type: 'document',
167
- source: {
168
- type: 'base64',
169
- data: await readStreamAsBase64(await file.getStream()),
170
- media_type: 'application/pdf'
171
- }
172
- } satisfies DocumentBlockParam);
173
- } else if (file.mime_type?.startsWith("text/")) {
174
- contentBlocks.push({
175
- title: file.name,
176
- type: 'document',
177
- source: {
178
- type: 'text',
179
- data: await readStreamAsString(await file.getStream()),
180
- media_type: 'text/plain'
181
- }
182
- } satisfies DocumentBlockParam);
183
- }
184
- }
32
+ /**
33
+ * Parse a VertexAI model path (e.g. "locations/us-east5/claude-3-5-sonnet") into
34
+ * its region and model name components.
35
+ */
36
+ function resolveVertexAIModelPath(options: ExecutionOptions): { modelName: string; region: string | undefined; options: ExecutionOptions } {
37
+ const splits = options.model.split("/");
38
+ let region: string | undefined;
39
+ if (splits[0] === "locations" && splits.length >= 2) {
40
+ region = splits[1];
185
41
  }
186
-
187
- return contentBlocks;
42
+ const modelName = splits[splits.length - 1];
43
+ return { modelName, region, options: { ...options, model: modelName } };
188
44
  }
189
45
 
190
46
  export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
@@ -202,874 +58,46 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
202
58
  }
203
59
 
204
60
  async createPrompt(_driver: VertexAIDriver, segments: PromptSegment[], options: ExecutionOptions): Promise<ClaudePrompt> {
205
- // Convert the prompt to the format expected by the Claude API
206
- let system: TextBlockParam[] | undefined = segments
207
- .filter(segment => segment.role === PromptRole.system)
208
- .map(segment => ({
209
- text: segment.content,
210
- type: 'text'
211
- }));
212
-
213
- if (options.result_schema) {
214
- let schemaText: string = '';
215
- if (options.tools && options.tools.length > 0) {
216
- schemaText = "When not calling tools, the answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema);
217
- } else {
218
- schemaText = "The answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema);
219
- }
220
-
221
- const schemaSegments: TextBlockParam = {
222
- text: schemaText,
223
- type: 'text'
224
- }
225
- system.push(schemaSegments);
226
- }
227
-
228
- let messages: MessageParam[] = [];
229
- const safetyMessages: MessageParam[] = [];
230
- for (const segment of segments) {
231
- if (segment.role === PromptRole.system) {
232
- continue;
233
- }
234
-
235
- if (segment.role === PromptRole.tool) {
236
- if (!segment.tool_use_id) {
237
- throw new Error("Tool prompt segment must have a tool use ID");
238
- }
239
-
240
- // Build content blocks for tool results (restricted types)
241
- const contentBlocks: Array<TextBlockParam | ImageBlockParam> = [];
242
-
243
- if (segment.content) {
244
- contentBlocks.push({
245
- type: 'text',
246
- text: segment.content
247
- } satisfies TextBlockParam);
248
- }
249
-
250
- // Collect file blocks with type safety
251
- const fileBlocks = await collectFileBlocks(segment, true);
252
- contentBlocks.push(...fileBlocks);
253
-
254
- messages.push({
255
- role: 'user',
256
- content: [{
257
- type: 'tool_result',
258
- tool_use_id: segment.tool_use_id,
259
- content: contentBlocks,
260
- } satisfies ToolResultBlockParam]
261
- });
262
-
263
- } else {
264
- // Build content blocks for regular messages (all types allowed)
265
- const contentBlocks: ContentBlockParam[] = [];
266
-
267
- if (segment.content) {
268
- contentBlocks.push({
269
- type: 'text',
270
- text: segment.content
271
- } satisfies TextBlockParam);
272
- }
273
-
274
- // Collect file blocks without restrictions
275
- const fileBlocks = await collectFileBlocks(segment, false);
276
- contentBlocks.push(...fileBlocks);
277
-
278
- if (contentBlocks.length === 0) {
279
- continue; // skip empty segments
280
- }
281
-
282
- const messageParam: MessageParam = {
283
- role: segment.role === PromptRole.assistant ? 'assistant' : 'user',
284
- content: contentBlocks
285
- };
286
-
287
- if (segment.role === PromptRole.safety) {
288
- safetyMessages.push(messageParam);
289
- } else {
290
- messages.push(messageParam);
291
- }
292
- }
293
- }
294
-
295
- messages = messages.concat(safetyMessages);
296
-
297
- if (system && system.length === 0) {
298
- system = undefined; // If system is empty, set to undefined
299
- }
300
-
301
- return {
302
- messages: messages,
303
- system: system
304
- }
61
+ return formatClaudePrompt(segments, options);
305
62
  }
306
63
 
307
64
  async requestTextCompletion(driver: VertexAIDriver, prompt: ClaudePrompt, options: ExecutionOptions): Promise<Completion> {
308
- const splits = options.model.split("/");
309
- let region: string | undefined = undefined;
310
- if (splits[0] === "locations" && splits.length >= 2) {
311
- region = splits[1];
312
- }
313
- const modelName = splits[splits.length - 1];
314
- options = { ...options, model: modelName };
315
-
65
+ const { region, options: resolvedOptions } = resolveVertexAIModelPath(options);
316
66
  const client = await driver.getAnthropicClient(region);
317
- const model_options = options.model_options as VertexAIClaudeOptions | undefined;
318
-
67
+ const model_options = resolvedOptions.model_options as VertexAIClaudeOptions | undefined;
319
68
  if (model_options?._option_id !== undefined &&
320
69
  model_options?._option_id !== "vertexai-claude" &&
321
- model_options?._option_id !== "text-fallback"
322
- ) {
323
- driver.logger.debug({ options: options.model_options }, "Unexpected option id");
70
+ model_options?._option_id !== "text-fallback") {
71
+ driver.logger.debug({ options: resolvedOptions.model_options }, "Unexpected option id");
324
72
  }
325
-
326
- let conversation = updateConversation(options.conversation as ClaudePrompt, prompt);
327
-
328
- const { payload, requestOptions } = getClaudePayload(options, conversation);
329
- // disable streaming, the create function is overloaded so payload type matters.
330
- const nonStreamingPayload: MessageCreateParamsNonStreaming = { ...payload, stream: false };
331
-
332
- const result = await client.messages.create(nonStreamingPayload, requestOptions) satisfies Message;
333
-
334
- // Use the new function to collect text content, including thinking if enabled
335
- const includeThoughts = model_options?.include_thoughts ?? false;
336
- const text = collectAllTextContent(result.content, includeThoughts);
337
- const tool_use = collectTools(result.content);
338
-
339
- conversation = updateConversation(conversation, createPromptFromResponse(result));
340
-
341
- // Increment turn counter and apply stripping (same pattern as other drivers)
342
- conversation = incrementConversationTurn(conversation) as ClaudePrompt;
343
- const currentTurn = getConversationMeta(conversation).turnNumber;
344
- const stripOptions = {
345
- keepForTurns: options.stripImagesAfterTurns ?? Infinity,
346
- currentTurn,
347
- textMaxTokens: options.stripTextMaxTokens,
348
- };
349
- let processedConversation = stripBase64ImagesFromConversation(conversation, stripOptions);
350
- processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
351
- processedConversation = stripHeartbeatsFromConversation(processedConversation, {
352
- keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
353
- currentTurn,
354
- });
355
-
356
- return {
357
- result: text ? [{ type: "text", value: text }] : [{ type: "text", value: '' }],
358
- tool_use,
359
- token_usage: anthropicUsageToTokenUsage(result.usage),
360
- // make sure we set finish_reason to the correct value (claude is normally setting this by itself)
361
- finish_reason: tool_use ? "tool_use" : claudeFinishReason(result?.stop_reason ?? ''),
362
- conversation: processedConversation
363
- } satisfies Completion;
73
+ return executeClaudeCompletion(client, prompt, resolvedOptions);
364
74
  }
365
75
 
366
76
  async requestTextCompletionStream(driver: VertexAIDriver, prompt: ClaudePrompt, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
367
- const splits = options.model.split("/");
368
- let region: string | undefined = undefined;
369
- if (splits[0] === "locations" && splits.length >= 2) {
370
- region = splits[1];
371
- }
372
- const modelName = splits[splits.length - 1];
373
- options = { ...options, model: modelName };
374
-
77
+ const { region, options: resolvedOptions } = resolveVertexAIModelPath(options);
375
78
  const client = await driver.getAnthropicClient(region);
376
- const model_options = options.model_options as VertexAIClaudeOptions | undefined;
377
-
378
- if ((model_options?._option_id !== undefined &&
79
+ const model_options = resolvedOptions.model_options as VertexAIClaudeOptions | undefined;
80
+ if (model_options?._option_id !== undefined &&
379
81
  model_options?._option_id !== "vertexai-claude" &&
380
- model_options?._option_id !== "text-fallback")
381
- ) {
382
- driver.logger.debug({ options: options.model_options }, "Unexpected option id");
383
- }
384
-
385
- // Include conversation history (same as non-streaming)
386
- const conversation = updateConversation(options.conversation as ClaudePrompt, prompt);
387
-
388
- const { payload, requestOptions } = getClaudePayload(options, conversation);
389
- const streamingPayload: MessageStreamParams = { ...payload, stream: true };
390
-
391
- const response_stream = await client.messages.stream(streamingPayload, requestOptions);
392
-
393
- // Track current tool use being built from streaming
394
- let currentToolUse: { id: string; name: string; inputJson: string } | null = null;
395
- // Deferred spacing after a thinking block — emitted only when real text follows,
396
- // so it doesn't leak into the output when a tool call comes after thinking.
397
- let pendingSpacing = false;
398
-
399
- const stream = asyncMap(response_stream, async (streamEvent: RawMessageStreamEvent) => {
400
- switch (streamEvent.type) {
401
- case "message_start":
402
- return {
403
- result: [{ type: "text", value: '' }],
404
- token_usage: anthropicUsageToTokenUsage(streamEvent.message.usage as AnthropicUsageLike),
405
- } satisfies CompletionChunkObject;
406
- case "message_delta":
407
- return {
408
- result: [{ type: "text", value: '' }],
409
- token_usage: {
410
- result: streamEvent.usage.output_tokens
411
- },
412
- finish_reason: claudeFinishReason(streamEvent.delta.stop_reason ?? undefined),
413
- } satisfies CompletionChunkObject;
414
- case "content_block_start":
415
- // Handle tool_use blocks
416
- if (streamEvent.content_block.type === "tool_use") {
417
- currentToolUse = {
418
- id: streamEvent.content_block.id,
419
- name: streamEvent.content_block.name,
420
- inputJson: ''
421
- };
422
- return {
423
- result: [],
424
- tool_use: [{
425
- id: streamEvent.content_block.id,
426
- tool_name: streamEvent.content_block.name,
427
- tool_input: '' as any // Will be accumulated via input_json_delta
428
- }]
429
- } satisfies CompletionChunkObject;
430
- }
431
- // Handle redacted thinking blocks
432
- if (streamEvent.content_block.type === "redacted_thinking" && model_options?.include_thoughts) {
433
- return {
434
- result: [{ type: "text", value: `[Redacted thinking: ${streamEvent.content_block.data}]` }]
435
- } satisfies CompletionChunkObject;
436
- }
437
- break;
438
- case "content_block_delta":
439
- // Handle different delta types
440
- switch (streamEvent.delta.type) {
441
- case "text_delta": {
442
- const prefix = pendingSpacing ? '\n\n' : '';
443
- pendingSpacing = false;
444
- return {
445
- result: streamEvent.delta.text ? [{ type: "text", value: prefix + streamEvent.delta.text }] : []
446
- } satisfies CompletionChunkObject;
447
- }
448
- case "input_json_delta":
449
- // Accumulate tool input JSON
450
- if (currentToolUse && streamEvent.delta.partial_json) {
451
- return {
452
- result: [],
453
- tool_use: [{
454
- id: currentToolUse.id,
455
- tool_name: '', // Name already sent in content_block_start
456
- tool_input: streamEvent.delta.partial_json as any
457
- }]
458
- } satisfies CompletionChunkObject;
459
- }
460
- break;
461
- case "thinking_delta":
462
- if (model_options?.include_thoughts) {
463
- return {
464
- result: streamEvent.delta.thinking ? [{ type: "text", value: streamEvent.delta.thinking }] : [],
465
- } satisfies CompletionChunkObject;
466
- }
467
- break;
468
- case "signature_delta":
469
- // End of thinking block — defer spacing until real text follows,
470
- // so it doesn't leak when a tool call comes next.
471
- if (model_options?.include_thoughts) {
472
- pendingSpacing = true;
473
- }
474
- break;
475
- }
476
- break;
477
- case "content_block_stop":
478
- // Reset tool use tracking; spacing is handled via pendingSpacing
479
- if (currentToolUse) {
480
- currentToolUse = null;
481
- // Tool call followed thinking — discard any pending spacing so it doesn't leak
482
- pendingSpacing = false;
483
- }
484
- break;
485
- }
486
-
487
- // Default case for all other event types
488
- return {
489
- result: []
490
- } satisfies CompletionChunkObject;
491
- });
492
-
493
- return stream;
494
- }
495
-
496
- /**
497
- * Format Anthropic API errors into LlumiverseError with proper status codes and retryability.
498
- *
499
- * Anthropic API errors have a specific structure:
500
- * - APIError.status: HTTP status code (400, 401, 403, 404, 409, 422, 429, 500+)
501
- * - APIError.error: Nested error object with type and message
502
- * - APIError.requestID: Request ID for support (can be null)
503
- *
504
- * Common error types:
505
- * - BadRequestError (400): Invalid request parameters
506
- * - AuthenticationError (401): Authentication required
507
- * - PermissionDeniedError (403): Insufficient permissions
508
- * - NotFoundError (404): Resource not found
509
- * - ConflictError (409): Resource conflict
510
- * - UnprocessableEntityError (422): Validation error
511
- * - RateLimitError (429): Rate limit exceeded
512
- * - InternalServerError (500+): Server-side errors
513
- * - APIConnectionError: Connection issues (no status code)
514
- * - APIConnectionTimeoutError: Request timeout (no status code)
515
- *
516
- * @see https://docs.anthropic.com/en/api/errors
517
- */
518
- formatLlumiverseError(
519
- _driver: VertexAIDriver,
520
- error: unknown,
521
- context: LlumiverseErrorContext
522
- ): LlumiverseError {
523
- // Check if it's an Anthropic API error
524
- const isAnthropicError = this.isAnthropicApiError(error);
525
-
526
- if (!isAnthropicError) {
527
- // Not an Anthropic API error, use default handling
528
- throw error;
82
+ model_options?._option_id !== "text-fallback") {
83
+ driver.logger.debug({ options: resolvedOptions.model_options }, "Unexpected option id");
529
84
  }
530
-
531
- const apiError = error as APIError;
532
- const httpStatusCode = apiError.status;
533
-
534
- // Extract error message and nested error details
535
- let message = apiError.message || String(error);
536
-
537
- // Extract error type from nested error object if available
538
- let errorType: string | undefined;
539
- if (apiError.error && typeof apiError.error === 'object') {
540
- const nestedError = apiError.error as any;
541
- if (nestedError.error && typeof nestedError.error === 'object') {
542
- errorType = nestedError.error.type;
543
- // Use the nested error message if it's more specific
544
- if (nestedError.error.message) {
545
- message = nestedError.error.message;
546
- }
547
- }
548
- }
549
-
550
- // Build user-facing message with status code
551
- let userMessage = message;
552
-
553
- // Include status code in message (for end-user visibility)
554
- if (httpStatusCode) {
555
- userMessage = `[${httpStatusCode}] ${userMessage}`;
556
- }
557
-
558
- // Include error type if available
559
- if (errorType && errorType !== 'error') {
560
- userMessage = `${errorType}: ${userMessage}`;
561
- }
562
-
563
- // Add request ID if available (useful for Anthropic support)
564
- if (apiError.requestID) {
565
- userMessage += ` (Request ID: ${apiError.requestID})`;
566
- }
567
-
568
- // Determine retryability based on Anthropic error types
569
- const retryable = this.isClaudeErrorRetryable(error, httpStatusCode, errorType);
570
-
571
- // Use the error constructor name as the error name
572
- const errorName = error.constructor?.name || 'AnthropicError';
573
-
574
- return new LlumiverseError(
575
- `[${context.provider}] ${userMessage}`,
576
- retryable,
577
- context,
578
- error,
579
- httpStatusCode,
580
- errorName
581
- );
582
- }
583
-
584
- /**
585
- * Type guard to check if error is an Anthropic API error.
586
- */
587
- private isAnthropicApiError(error: unknown): error is APIError {
588
- return (
589
- error !== null &&
590
- typeof error === 'object' &&
591
- error instanceof APIError
592
- );
85
+ return streamClaudeCompletion(client, prompt, resolvedOptions);
593
86
  }
594
87
 
595
- /**
596
- * Determine if an Anthropic API error is retryable.
597
- *
598
- * Retryable errors:
599
- * - RateLimitError (429): Rate limit exceeded, retry with backoff
600
- * - InternalServerError (500+): Server-side errors
601
- * - APIConnectionTimeoutError: Request timeout
602
- * - 408 (Request Timeout): Request timeout
603
- * - 529 (Overloaded): Service overloaded
604
- *
605
- * Non-retryable errors:
606
- * - BadRequestError (400): Invalid request parameters
607
- * - AuthenticationError (401): Authentication failure
608
- * - PermissionDeniedError (403): Insufficient permissions
609
- * - NotFoundError (404): Resource not found
610
- * - ConflictError (409): Resource conflict
611
- * - UnprocessableEntityError (422): Validation error
612
- * - Other 4xx client errors
613
- * - invalid_request_error: Invalid request structure
614
- *
615
- * @param error - The error object
616
- * @param httpStatusCode - The HTTP status code if available
617
- * @param errorType - The nested error type if available
618
- * @returns True if retryable, false if not retryable, undefined if unknown
619
- */
620
- private isClaudeErrorRetryable(
88
+ isClaudeErrorRetryable(
621
89
  error: unknown,
622
90
  httpStatusCode: number | undefined,
623
91
  errorType: string | undefined
624
92
  ): boolean | undefined {
625
- // Check specific Anthropic error types by class
626
- if (error instanceof RateLimitError) return true;
627
- if (error instanceof InternalServerError) return true;
628
- if (error instanceof APIConnectionTimeoutError) return true;
629
-
630
- // Non-retryable by error type
631
- if (error instanceof BadRequestError) return false;
632
- if (error instanceof AuthenticationError) return false;
633
- if (error instanceof PermissionDeniedError) return false;
634
- if (error instanceof NotFoundError) return false;
635
- if (error instanceof ConflictError) return false;
636
- if (error instanceof UnprocessableEntityError) return false;
637
-
638
- // Check nested error type
639
- if (errorType === 'invalid_request_error') return false;
640
-
641
- // Use HTTP status code
642
- if (httpStatusCode !== undefined) {
643
- if (httpStatusCode === 429) return true; // Rate limit
644
- if (httpStatusCode === 408) return true; // Request timeout
645
- if (httpStatusCode === 529) return true; // Overloaded
646
- if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Server errors
647
- if (httpStatusCode >= 400 && httpStatusCode < 500) return false; // Client errors
648
- }
649
-
650
- // Connection errors without status codes
651
- if (error instanceof APIConnectionError && !(error instanceof APIConnectionTimeoutError)) {
652
- // Generic connection errors might be retryable (network issues)
653
- return true;
654
- }
655
-
656
- // Unknown error type - let consumer decide retry strategy
657
- return undefined;
658
- }
659
- }
660
-
661
- function createPromptFromResponse(response: Message): ClaudePrompt {
662
- return {
663
- messages: [{
664
- role: response.role,
665
- content: response.content,
666
- }],
667
- system: undefined
668
- }
669
- }
670
-
671
- /**
672
- * Merge consecutive user messages in the conversation.
673
- * This is required because Anthropic's API expects all tool_result blocks
674
- * from a single assistant turn to be in one user message.
675
- * When multiple tool results are added as separate user messages,
676
- * we need to merge them before sending to the API.
677
- */
678
- export function mergeConsecutiveUserMessages(messages: MessageParam[]): MessageParam[] {
679
- if (messages.length === 0) return [];
680
-
681
- // Check if any merging is needed
682
- const needsMerging = messages.some((msg, i) =>
683
- i < messages.length - 1 &&
684
- msg.role === 'user' &&
685
- messages[i + 1].role === 'user'
686
- );
687
-
688
- if (!needsMerging) {
689
- return messages;
690
- }
691
-
692
- const result: MessageParam[] = [];
693
- let i = 0;
694
-
695
- while (i < messages.length) {
696
- const current = messages[i];
697
-
698
- if (current.role === 'user') {
699
- // Collect all consecutive user messages
700
- const mergedContent: MessageParam['content'] = [];
701
-
702
- while (i < messages.length && messages[i].role === 'user') {
703
- const userMsg = messages[i];
704
- if (Array.isArray(userMsg.content)) {
705
- mergedContent.push(...userMsg.content);
706
- } else if (typeof userMsg.content === 'string') {
707
- mergedContent.push({ type: 'text', text: userMsg.content });
708
- }
709
- i++;
710
- }
711
-
712
- result.push({
713
- role: 'user',
714
- content: mergedContent
715
- });
716
- } else {
717
- result.push(current);
718
- i++;
719
- }
720
- }
721
-
722
- return result;
723
- }
724
-
725
- /**
726
- * Update the conversation messages
727
- * @param prompt
728
- * @param response
729
- * @returns
730
- */
731
- export function updateConversation(conversation: ClaudePrompt | undefined | null, prompt: ClaudePrompt): ClaudePrompt {
732
- const baseSystemMessages = conversation?.system || [];
733
- const baseMessages = conversation?.messages || [];
734
- const system = baseSystemMessages.concat(prompt.system || []);
735
- // Sanitize first, then merge. Order matters: an empty assistant message (e.g. from interrupted
736
- // streaming) between two tool-result user messages acts as a false separator. If merge runs
737
- // first, those messages look non-consecutive and fixOrphanedToolUse injects a synthetic result
738
- // into the first one; when sanitize later removes the empty assistant, the second user message
739
- // ends up with an orphaned tool_result that Vertex AI rejects:
740
- // "unexpected tool_use_id found in tool_result blocks".
741
- const combined = sanitizeMessages(baseMessages.concat(prompt.messages || []));
742
- const mergedMessages = mergeConsecutiveUserMessages(combined);
743
- return {
744
- messages: mergedMessages,
745
- system: system.length > 0 ? system : undefined // If system is empty, set to undefined
746
- };
747
- }
748
-
749
- /**
750
- * Sanitize messages by removing empty text blocks.
751
- * Claude API rejects messages with empty text content blocks ("text content blocks must be non-empty").
752
- * This handles cases where streaming was interrupted and left empty text blocks.
753
- *
754
- * - Filters out empty text blocks from each message's content
755
- * - Removes messages entirely if they have no content after filtering
756
- */
757
- export function sanitizeMessages(messages: MessageParam[]): MessageParam[] {
758
- const result: MessageParam[] = [];
759
-
760
- for (const message of messages) {
761
- if (typeof message.content === 'string') {
762
- // String content - keep only if non-empty
763
- if (message.content.trim()) {
764
- result.push(message);
765
- }
766
- continue;
767
- }
768
-
769
- // Array content - filter out empty text blocks
770
- const filteredContent = message.content.filter(block => {
771
- if (block.type === 'text') {
772
- return block.text && block.text.trim().length > 0;
773
- }
774
- // Keep all non-text blocks (tool_use, tool_result, image, etc.)
775
- return true;
776
- });
777
-
778
- // Only include message if it has content after filtering
779
- if (filteredContent.length > 0) {
780
- result.push({
781
- ...message,
782
- content: filteredContent
783
- });
784
- }
785
- }
786
-
787
- return result;
788
- }
789
-
790
- /**
791
- * Fix orphaned tool_use blocks in the conversation.
792
- * @exported for testing
793
- *
794
- * When an agent is stopped mid-tool-execution, the assistant message contains tool_use blocks
795
- * but no corresponding tool_result was added. The Anthropic API requires that every tool_use
796
- * must be followed by a tool_result in the next user message.
797
- *
798
- * This function detects such cases and injects synthetic tool_result blocks indicating
799
- * the tools were interrupted, allowing the conversation to continue.
800
- */
801
- export function fixOrphanedToolUse(messages: MessageParam[]): MessageParam[] {
802
- if (messages.length < 2) return messages;
803
-
804
- const result: MessageParam[] = [];
805
-
806
- for (let i = 0; i < messages.length; i++) {
807
- const current = messages[i];
808
- result.push(current);
809
-
810
- // Check if this is an assistant message with tool_use blocks
811
- if (current.role === 'assistant' && Array.isArray(current.content)) {
812
- const toolUseBlocks = current.content.filter(
813
- (block): block is ContentBlockParam & { type: 'tool_use'; id: string; name: string } =>
814
- block.type === 'tool_use'
815
- );
816
-
817
- if (toolUseBlocks.length > 0) {
818
- // Check if the next message is a user message with matching tool_results
819
- const nextMessage = messages[i + 1];
820
-
821
- if (nextMessage && nextMessage.role === 'user' && Array.isArray(nextMessage.content)) {
822
- // Get tool_result IDs from the next message
823
- const toolResultIds = new Set(
824
- nextMessage.content
825
- .filter((block): block is ToolResultBlockParam => block.type === 'tool_result')
826
- .map(block => block.tool_use_id)
827
- );
828
-
829
- // Find orphaned tool_use blocks (no matching tool_result)
830
- const orphanedToolUse = toolUseBlocks.filter(block => !toolResultIds.has(block.id));
831
-
832
- if (orphanedToolUse.length > 0) {
833
- // Inject synthetic tool_results for orphaned tool_use
834
- const syntheticResults: ToolResultBlockParam[] = orphanedToolUse.map(block => ({
835
- type: 'tool_result',
836
- tool_use_id: block.id,
837
- content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`
838
- }));
839
-
840
- // Prepend synthetic results to the next user message
841
- const updatedNextMessage: MessageParam = {
842
- ...nextMessage,
843
- content: [...syntheticResults, ...nextMessage.content]
844
- };
845
-
846
- // Replace the next message in our iteration
847
- messages[i + 1] = updatedNextMessage;
848
- }
849
- } else if (nextMessage && nextMessage.role === 'user') {
850
- // Next message is a user message but not array content (plain text)
851
- // We need to convert it and add tool_results
852
- const syntheticResults: ToolResultBlockParam[] = toolUseBlocks.map(block => ({
853
- type: 'tool_result',
854
- tool_use_id: block.id,
855
- content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`
856
- }));
857
-
858
- const textContent: TextBlockParam = typeof nextMessage.content === 'string'
859
- ? { type: 'text', text: nextMessage.content }
860
- : { type: 'text', text: '' };
861
-
862
- const updatedNextMessage: MessageParam = {
863
- role: 'user',
864
- content: [...syntheticResults, textContent]
865
- };
866
-
867
- messages[i + 1] = updatedNextMessage;
868
- }
869
- // Note: If there's no nextMessage, we leave the conversation as-is.
870
- // The tool_use blocks are expected to be there - the next turn will provide tool_results.
871
- }
872
- }
873
- }
874
-
875
- return result;
876
- }
877
-
878
- interface RequestOptions {
879
- headers?: Record<string, string>;
880
- }
881
-
882
- type ClaudeTool = NonNullable<MessageCreateParamsBase['tools']>[number];
883
-
884
- function stripClaudeCacheControlFromMessages(messages: MessageParam[]): MessageParam[] {
885
- return messages.map(message => {
886
- if (typeof message.content === 'string') {
887
- return message;
888
- }
889
-
890
- return {
891
- ...message,
892
- content: message.content.map(block => stripClaudeCacheControlFromBlock(block)),
893
- };
894
- });
895
- }
896
-
897
- function stripClaudeCacheControlFromBlock<T extends ContentBlockParam>(block: T): T {
898
- const cloned = { ...block } as T & { cache_control?: unknown };
899
- delete cloned.cache_control;
900
- return cloned as T;
901
- }
902
-
903
- function stripClaudeCacheControlFromSystem(system?: TextBlockParam[]): TextBlockParam[] | undefined {
904
- return system?.map(block => {
905
- const { cache_control: _cacheControl, ...rest } = block as TextBlockParam & { cache_control?: unknown };
906
- return rest as TextBlockParam;
907
- });
908
- }
909
-
910
- function stripClaudeCacheControlFromTools(tools?: MessageCreateParamsBase['tools']): MessageCreateParamsBase['tools'] | undefined {
911
- return tools?.map(tool => {
912
- const cloned = { ...tool } as ClaudeTool & { cache_control?: unknown };
913
- delete cloned.cache_control;
914
- return cloned as ClaudeTool;
915
- });
916
- }
917
-
918
- function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { payload: MessageCreateParamsBase, requestOptions: RequestOptions | undefined } {
919
- const modelName = options.model; // Model name is already extracted in the calling methods
920
- const model_options = options.model_options as VertexAIClaudeOptions;
921
-
922
- // Add beta header for Claude 3.7 models to enable 128k output tokens
923
- let requestOptions: RequestOptions | undefined = undefined;
924
- if (modelName.includes('claude-3-7-sonnet') &&
925
- ((model_options?.max_tokens ?? 0) > 64000 || (model_options?.thinking_budget_tokens ?? 0) > 64000)) {
926
- requestOptions = {
927
- headers: {
928
- 'anthropic-beta': 'output-128k-2025-02-19'
929
- }
930
- };
931
- }
932
-
933
- // Fix orphaned tool_use blocks (can occur when agent is stopped mid-tool-execution)
934
- const fixedMessages = fixOrphanedToolUse(prompt.messages);
935
- // Sanitize messages to remove empty text blocks (can occur from interrupted streaming)
936
- let sanitizedMessages = sanitizeMessages(fixedMessages);
937
-
938
- // Validate tools have input_schema.type set to 'object' as required by the Anthropic SDK
939
- if (options.tools) {
940
- for (const tool of options.tools) {
941
- if (tool.input_schema.type !== 'object') {
942
- throw new Error(`Tool "${tool.name}" has invalid input_schema.type: expected "object", got "${tool.input_schema.type}"`);
943
- }
944
- }
945
- }
946
-
947
- // When no tools are provided but conversation contains tool_use/tool_result blocks
948
- // (e.g. checkpoint summary calls), convert tool blocks to text to avoid API errors
949
- const hasTools = options.tools && options.tools.length > 0;
950
- if (!hasTools && claudeMessagesContainToolBlocks(sanitizedMessages)) {
951
- sanitizedMessages = convertClaudeToolBlocksToText(sanitizedMessages);
952
- }
953
-
954
- sanitizedMessages = stripClaudeCacheControlFromMessages(sanitizedMessages);
955
- const sanitizedSystem = stripClaudeCacheControlFromSystem(prompt.system);
956
- const sanitizedTools = hasTools
957
- ? stripClaudeCacheControlFromTools(options.tools as MessageCreateParamsBase['tools'])
958
- : undefined;
959
-
960
- // Prompt caching: use three breakpoints so stable system prompt, tool definitions,
961
- // and the conversation history prefix can all be reused across calls.
962
- const cacheEnabled = model_options?.cache_enabled === true;
963
- if (cacheEnabled) {
964
- const cacheTtl = model_options?.cache_ttl;
965
- const cacheControl = { type: 'ephemeral' as const, ...(cacheTtl && { ttl: cacheTtl }) };
966
-
967
- if (sanitizedSystem && sanitizedSystem.length > 0) {
968
- const lastSystemBlock = sanitizedSystem[sanitizedSystem.length - 1] as TextBlockParam & { cache_control?: unknown };
969
- lastSystemBlock.cache_control = cacheControl;
970
- }
971
-
972
- if (sanitizedTools && sanitizedTools.length > 0) {
973
- const lastTool = sanitizedTools[sanitizedTools.length - 1] as ClaudeTool & { cache_control?: unknown };
974
- lastTool.cache_control = cacheControl;
975
- }
976
-
977
- if (sanitizedMessages.length >= 4) {
978
- const pivotMsg = sanitizedMessages[sanitizedMessages.length - 2];
979
- if (Array.isArray(pivotMsg.content) && pivotMsg.content.length > 0) {
980
- const lastBlock = pivotMsg.content[pivotMsg.content.length - 1];
981
- if (typeof lastBlock === 'object' && lastBlock !== null &&
982
- 'type' in lastBlock && lastBlock.type !== 'thinking' && lastBlock.type !== 'redacted_thinking') {
983
- (lastBlock as TextBlockParam).cache_control = cacheControl;
984
- }
985
- }
986
- }
93
+ return isClaudeErrorRetryable(error, httpStatusCode, errorType);
987
94
  }
988
95
 
989
- // Resolve thinking, effort, and sampling restriction using shared Claude helper
990
- const { thinking, outputConfig, hasSamplingRestriction } = resolveClaudeThinking(modelName, model_options);
991
-
992
- const payload = {
993
- messages: sanitizedMessages,
994
- system: sanitizedSystem,
995
- tools: sanitizedTools,
996
- temperature: hasSamplingRestriction ? undefined : model_options?.temperature,
997
- model: modelName,
998
- max_tokens: maxToken(options),
999
- top_p: hasSamplingRestriction ? undefined : (model_options?.temperature != null ? undefined : model_options?.top_p),
1000
- top_k: hasSamplingRestriction ? undefined : model_options?.top_k,
1001
- stop_sequences: model_options?.stop_sequence,
1002
- thinking,
1003
- ...(outputConfig && { output_config: outputConfig }),
1004
- };
1005
-
1006
- return { payload, requestOptions };
1007
- }
1008
-
1009
- /**
1010
- * Checks whether any Claude message contains tool_use or tool_result content blocks.
1011
- */
1012
- export function claudeMessagesContainToolBlocks(messages: MessageParam[]): boolean {
1013
- for (const msg of messages) {
1014
- if (!Array.isArray(msg.content)) continue;
1015
- for (const block of msg.content) {
1016
- if (typeof block === 'object' && block !== null && 'type' in block) {
1017
- if (block.type === 'tool_use' || block.type === 'tool_result') return true;
1018
- }
1019
- }
96
+ formatLlumiverseError(
97
+ _driver: VertexAIDriver,
98
+ error: unknown,
99
+ context: LlumiverseErrorContext
100
+ ): LlumiverseError {
101
+ return formatAnthropicLlumiverseError(error, context);
1020
102
  }
1021
- return false;
1022
- }
1023
-
1024
- /**
1025
- * Converts tool_use and tool_result blocks to text in Claude messages.
1026
- * Preserves tool call information while removing structured blocks that
1027
- * require tools to be defined in the API request.
1028
- */
1029
- export function convertClaudeToolBlocksToText(messages: MessageParam[]): MessageParam[] {
1030
- return messages.map(msg => {
1031
- if (!Array.isArray(msg.content)) return msg;
1032
- let hasToolBlocks = false;
1033
- for (const block of msg.content) {
1034
- if (typeof block === 'object' && block !== null && 'type' in block &&
1035
- (block.type === 'tool_use' || block.type === 'tool_result')) {
1036
- hasToolBlocks = true;
1037
- break;
1038
- }
1039
- }
1040
- if (!hasToolBlocks) return msg;
1041
-
1042
- const newContent: MessageParam['content'] = [];
1043
- for (const block of msg.content) {
1044
- if (typeof block === 'string') {
1045
- newContent.push(block);
1046
- continue;
1047
- }
1048
- if (block.type === 'tool_use') {
1049
- const inputStr = block.input ? JSON.stringify(block.input) : '';
1050
- const truncated = inputStr.length > 500 ? inputStr.substring(0, 500) + '...' : inputStr;
1051
- (newContent as Array<{ type: 'text'; text: string }>).push({
1052
- type: 'text',
1053
- text: `[Tool call: ${block.name}(${truncated})]`,
1054
- });
1055
- } else if (block.type === 'tool_result') {
1056
- let resultStr = 'No content';
1057
- if (typeof block.content === 'string') {
1058
- resultStr = block.content.length > 500 ? block.content.substring(0, 500) + '...' : block.content;
1059
- } else if (Array.isArray(block.content)) {
1060
- const texts = block.content
1061
- .filter((c): c is { type: 'text'; text: string } => c.type === 'text')
1062
- .map(c => c.text.length > 500 ? c.text.substring(0, 500) + '...' : c.text);
1063
- resultStr = texts.join('\n') || 'No text content';
1064
- }
1065
- (newContent as Array<{ type: 'text'; text: string }>).push({
1066
- type: 'text',
1067
- text: `[Tool result: ${resultStr}]`,
1068
- });
1069
- } else {
1070
- newContent.push(block as any);
1071
- }
1072
- }
1073
- return { ...msg, content: newContent };
1074
- });
1075
103
  }