@llumiverse/drivers 1.1.1-dev.20260505.151157Z → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/lib/cjs/anthropic/index.js +64 -0
  2. package/lib/cjs/anthropic/index.js.map +1 -0
  3. package/lib/cjs/index.js +1 -0
  4. package/lib/cjs/index.js.map +1 -1
  5. package/lib/cjs/openai/index.js +12 -6
  6. package/lib/cjs/openai/index.js.map +1 -1
  7. package/lib/cjs/shared/claude-messages.js +737 -0
  8. package/lib/cjs/shared/claude-messages.js.map +1 -0
  9. package/lib/cjs/vertexai/index.js.map +1 -1
  10. package/lib/cjs/vertexai/models/claude.js +27 -872
  11. package/lib/cjs/vertexai/models/claude.js.map +1 -1
  12. package/lib/cjs/vertexai/models/gemini.js +18 -12
  13. package/lib/cjs/vertexai/models/gemini.js.map +1 -1
  14. package/lib/esm/anthropic/index.js +57 -0
  15. package/lib/esm/anthropic/index.js.map +1 -0
  16. package/lib/esm/index.js +1 -0
  17. package/lib/esm/index.js.map +1 -1
  18. package/lib/esm/openai/index.js +12 -7
  19. package/lib/esm/openai/index.js.map +1 -1
  20. package/lib/esm/shared/claude-messages.js +716 -0
  21. package/lib/esm/shared/claude-messages.js.map +1 -0
  22. package/lib/esm/vertexai/index.js.map +1 -1
  23. package/lib/esm/vertexai/models/claude.js +27 -865
  24. package/lib/esm/vertexai/models/claude.js.map +1 -1
  25. package/lib/esm/vertexai/models/gemini.js +18 -12
  26. package/lib/esm/vertexai/models/gemini.js.map +1 -1
  27. package/lib/types/anthropic/index.d.ts +21 -0
  28. package/lib/types/anthropic/index.d.ts.map +1 -0
  29. package/lib/types/index.d.ts +1 -0
  30. package/lib/types/index.d.ts.map +1 -1
  31. package/lib/types/openai/index.d.ts +1 -0
  32. package/lib/types/openai/index.d.ts.map +1 -1
  33. package/lib/types/shared/claude-messages.d.ts +75 -0
  34. package/lib/types/shared/claude-messages.d.ts.map +1 -0
  35. package/lib/types/vertexai/index.d.ts +4 -4
  36. package/lib/types/vertexai/index.d.ts.map +1 -1
  37. package/lib/types/vertexai/models/claude.d.ts +3 -106
  38. package/lib/types/vertexai/models/claude.d.ts.map +1 -1
  39. package/lib/types/vertexai/models/gemini.d.ts +1 -1
  40. package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
  41. package/package.json +7 -6
  42. package/src/anthropic/index.ts +104 -0
  43. package/src/index.ts +1 -0
  44. package/src/openai/index.ts +13 -8
  45. package/src/shared/claude-messages.ts +879 -0
  46. package/src/vertexai/index.ts +18 -19
  47. package/src/vertexai/models/claude-error-handling.test.ts +3 -3
  48. package/src/vertexai/models/claude.ts +44 -1016
  49. package/src/vertexai/models/gemini.ts +27 -14
@@ -1,7 +1,5 @@
1
- import { APIConnectionError, APIConnectionTimeoutError, APIError, AuthenticationError, BadRequestError, ConflictError, InternalServerError, NotFoundError, PermissionDeniedError, RateLimitError, UnprocessableEntityError, } from '@anthropic-ai/sdk/error';
2
- import { getConversationMeta, getMaxTokensLimitVertexAi, incrementConversationTurn, LlumiverseError, ModelType, PromptRole, readStreamAsBase64, readStreamAsString, stripBase64ImagesFromConversation, stripHeartbeatsFromConversation, truncateLargeTextInConversation, } from "@llumiverse/core";
3
- import { asyncMap } from "@llumiverse/core/async";
4
- import { resolveClaudeThinking } from "../../shared/claude-thinking.js";
1
+ import { ModelType, } from "@llumiverse/core";
2
+ import { executeClaudeCompletion, formatAnthropicLlumiverseError, formatClaudePrompt, isClaudeErrorRetryable, streamClaudeCompletion, } from "../../shared/claude-messages.js";
5
3
  export const ANTHROPIC_REGIONS = {
6
4
  us: "us-east5",
7
5
  europe: "europe-west1",
@@ -11,123 +9,18 @@ export const NON_GLOBAL_ANTHROPIC_MODELS = [
11
9
  "claude-3-5",
12
10
  "claude-3",
13
11
  ];
14
- function anthropicUsageToTokenUsage(usage) {
15
- const cacheRead = usage.cache_read_input_tokens ?? 0;
16
- const cacheWrite = usage.cache_creation_input_tokens ?? 0;
17
- return {
18
- prompt_new: usage.input_tokens,
19
- prompt: usage.input_tokens + cacheRead + cacheWrite,
20
- result: usage.output_tokens,
21
- total: usage.input_tokens + usage.output_tokens + cacheRead + cacheWrite,
22
- prompt_cached: usage.cache_read_input_tokens ?? undefined,
23
- prompt_cache_write: usage.cache_creation_input_tokens ?? undefined,
24
- };
25
- }
26
- function claudeFinishReason(reason) {
27
- if (!reason)
28
- return undefined;
29
- switch (reason) {
30
- case 'end_turn': return "stop";
31
- case 'max_tokens': return "length";
32
- default: return reason; //stop_sequence
33
- }
34
- }
35
- export function collectTools(content) {
36
- const out = [];
37
- for (const block of content) {
38
- if (block.type === "tool_use") {
39
- out.push({
40
- id: block.id,
41
- tool_name: block.name,
42
- tool_input: block.input,
43
- });
44
- }
45
- }
46
- return out.length > 0 ? out : undefined;
47
- }
48
- function collectAllTextContent(content, includeThoughts = false) {
49
- const textParts = [];
50
- // First pass: collect thinking blocks
51
- if (includeThoughts) {
52
- for (const block of content) {
53
- if (block.type === 'thinking' && block.thinking) {
54
- textParts.push(block.thinking);
55
- }
56
- else if (block.type === 'redacted_thinking' && block.data) {
57
- textParts.push(`[Redacted thinking: ${block.data}]`);
58
- }
59
- }
60
- if (textParts.length > 0) {
61
- textParts.push(''); // Create a new line after thinking blocks
62
- }
63
- }
64
- // Second pass: collect text blocks
65
- for (const block of content) {
66
- if (block.type === 'text' && block.text) {
67
- textParts.push(block.text);
68
- }
69
- }
70
- return textParts.join('\n');
71
- }
72
- //Used to get a max_token value when not specified in the model options. Claude requires it to be set.
73
- function maxToken(option) {
74
- const modelOptions = option.model_options;
75
- if (modelOptions && typeof modelOptions.max_tokens === "number") {
76
- return modelOptions.max_tokens;
77
- }
78
- else {
79
- let maxSupportedTokens = getMaxTokensLimitVertexAi(option.model);
80
- // Fallback to the default max tokens limit for the model
81
- if (option.model.includes('claude-3-7-sonnet') && (modelOptions?.thinking_budget_tokens ?? 0) < 48000) {
82
- maxSupportedTokens = 64000; // Claude 3.7 can go up to 128k with a beta header, but when no max tokens is specified, we default to 64k.
83
- }
84
- return maxSupportedTokens;
85
- }
86
- }
87
- async function collectFileBlocks(segment, restrictedTypes = false) {
88
- const contentBlocks = [];
89
- for (const file of segment.files || []) {
90
- if (file.mime_type?.startsWith("image/")) {
91
- const allowedTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"];
92
- if (!allowedTypes.includes(file.mime_type)) {
93
- throw new Error(`Unsupported image type: ${file.mime_type}`);
94
- }
95
- const mimeType = String(file.mime_type);
96
- contentBlocks.push({
97
- type: 'image',
98
- source: {
99
- type: 'base64',
100
- data: await readStreamAsBase64(await file.getStream()),
101
- media_type: mimeType
102
- }
103
- });
104
- }
105
- else if (!restrictedTypes) {
106
- if (file.mime_type === "application/pdf") {
107
- contentBlocks.push({
108
- title: file.name,
109
- type: 'document',
110
- source: {
111
- type: 'base64',
112
- data: await readStreamAsBase64(await file.getStream()),
113
- media_type: 'application/pdf'
114
- }
115
- });
116
- }
117
- else if (file.mime_type?.startsWith("text/")) {
118
- contentBlocks.push({
119
- title: file.name,
120
- type: 'document',
121
- source: {
122
- type: 'text',
123
- data: await readStreamAsString(await file.getStream()),
124
- media_type: 'text/plain'
125
- }
126
- });
127
- }
128
- }
12
+ /**
13
+ * Parse a VertexAI model path (e.g. "locations/us-east5/claude-3-5-sonnet") into
14
+ * its region and model name components.
15
+ */
16
+ function resolveVertexAIModelPath(options) {
17
+ const splits = options.model.split("/");
18
+ let region;
19
+ if (splits[0] === "locations" && splits.length >= 2) {
20
+ region = splits[1];
129
21
  }
130
- return contentBlocks;
22
+ const modelName = splits[splits.length - 1];
23
+ return { modelName, region, options: { ...options, model: modelName } };
131
24
  }
132
25
  export class ClaudeModelDefinition {
133
26
  model;
@@ -141,766 +34,35 @@ export class ClaudeModelDefinition {
141
34
  };
142
35
  }
143
36
  async createPrompt(_driver, segments, options) {
144
- // Convert the prompt to the format expected by the Claude API
145
- let system = segments
146
- .filter(segment => segment.role === PromptRole.system)
147
- .map(segment => ({
148
- text: segment.content,
149
- type: 'text'
150
- }));
151
- if (options.result_schema) {
152
- let schemaText = '';
153
- if (options.tools && options.tools.length > 0) {
154
- schemaText = "When not calling tools, the answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema);
155
- }
156
- else {
157
- schemaText = "The answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema);
158
- }
159
- const schemaSegments = {
160
- text: schemaText,
161
- type: 'text'
162
- };
163
- system.push(schemaSegments);
164
- }
165
- let messages = [];
166
- const safetyMessages = [];
167
- for (const segment of segments) {
168
- if (segment.role === PromptRole.system) {
169
- continue;
170
- }
171
- if (segment.role === PromptRole.tool) {
172
- if (!segment.tool_use_id) {
173
- throw new Error("Tool prompt segment must have a tool use ID");
174
- }
175
- // Build content blocks for tool results (restricted types)
176
- const contentBlocks = [];
177
- if (segment.content) {
178
- contentBlocks.push({
179
- type: 'text',
180
- text: segment.content
181
- });
182
- }
183
- // Collect file blocks with type safety
184
- const fileBlocks = await collectFileBlocks(segment, true);
185
- contentBlocks.push(...fileBlocks);
186
- messages.push({
187
- role: 'user',
188
- content: [{
189
- type: 'tool_result',
190
- tool_use_id: segment.tool_use_id,
191
- content: contentBlocks,
192
- }]
193
- });
194
- }
195
- else {
196
- // Build content blocks for regular messages (all types allowed)
197
- const contentBlocks = [];
198
- if (segment.content) {
199
- contentBlocks.push({
200
- type: 'text',
201
- text: segment.content
202
- });
203
- }
204
- // Collect file blocks without restrictions
205
- const fileBlocks = await collectFileBlocks(segment, false);
206
- contentBlocks.push(...fileBlocks);
207
- if (contentBlocks.length === 0) {
208
- continue; // skip empty segments
209
- }
210
- const messageParam = {
211
- role: segment.role === PromptRole.assistant ? 'assistant' : 'user',
212
- content: contentBlocks
213
- };
214
- if (segment.role === PromptRole.safety) {
215
- safetyMessages.push(messageParam);
216
- }
217
- else {
218
- messages.push(messageParam);
219
- }
220
- }
221
- }
222
- messages = messages.concat(safetyMessages);
223
- if (system && system.length === 0) {
224
- system = undefined; // If system is empty, set to undefined
225
- }
226
- return {
227
- messages: messages,
228
- system: system
229
- };
37
+ return formatClaudePrompt(segments, options);
230
38
  }
231
39
  async requestTextCompletion(driver, prompt, options) {
232
- const splits = options.model.split("/");
233
- let region = undefined;
234
- if (splits[0] === "locations" && splits.length >= 2) {
235
- region = splits[1];
236
- }
237
- const modelName = splits[splits.length - 1];
238
- options = { ...options, model: modelName };
40
+ const { region, options: resolvedOptions } = resolveVertexAIModelPath(options);
239
41
  const client = await driver.getAnthropicClient(region);
240
- const model_options = options.model_options;
42
+ const model_options = resolvedOptions.model_options;
241
43
  if (model_options?._option_id !== undefined &&
242
44
  model_options?._option_id !== "vertexai-claude" &&
243
45
  model_options?._option_id !== "text-fallback") {
244
- driver.logger.debug({ options: options.model_options }, "Unexpected option id");
46
+ driver.logger.debug({ options: resolvedOptions.model_options }, "Unexpected option id");
245
47
  }
246
- let conversation = updateConversation(options.conversation, prompt);
247
- const { payload, requestOptions } = getClaudePayload(options, conversation);
248
- // disable streaming, the create function is overloaded so payload type matters.
249
- const nonStreamingPayload = { ...payload, stream: false };
250
- const result = await client.messages.create(nonStreamingPayload, requestOptions);
251
- // Use the new function to collect text content, including thinking if enabled
252
- const includeThoughts = model_options?.include_thoughts ?? false;
253
- const text = collectAllTextContent(result.content, includeThoughts);
254
- const tool_use = collectTools(result.content);
255
- conversation = updateConversation(conversation, createPromptFromResponse(result));
256
- // Increment turn counter and apply stripping (same pattern as other drivers)
257
- conversation = incrementConversationTurn(conversation);
258
- const currentTurn = getConversationMeta(conversation).turnNumber;
259
- const stripOptions = {
260
- keepForTurns: options.stripImagesAfterTurns ?? Infinity,
261
- currentTurn,
262
- textMaxTokens: options.stripTextMaxTokens,
263
- };
264
- let processedConversation = stripBase64ImagesFromConversation(conversation, stripOptions);
265
- processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
266
- processedConversation = stripHeartbeatsFromConversation(processedConversation, {
267
- keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
268
- currentTurn,
269
- });
270
- return {
271
- result: text ? [{ type: "text", value: text }] : [{ type: "text", value: '' }],
272
- tool_use,
273
- token_usage: anthropicUsageToTokenUsage(result.usage),
274
- // make sure we set finish_reason to the correct value (claude is normally setting this by itself)
275
- finish_reason: tool_use ? "tool_use" : claudeFinishReason(result?.stop_reason ?? ''),
276
- conversation: processedConversation
277
- };
48
+ return executeClaudeCompletion(client, prompt, resolvedOptions);
278
49
  }
279
50
  async requestTextCompletionStream(driver, prompt, options) {
280
- const splits = options.model.split("/");
281
- let region = undefined;
282
- if (splits[0] === "locations" && splits.length >= 2) {
283
- region = splits[1];
284
- }
285
- const modelName = splits[splits.length - 1];
286
- options = { ...options, model: modelName };
51
+ const { region, options: resolvedOptions } = resolveVertexAIModelPath(options);
287
52
  const client = await driver.getAnthropicClient(region);
288
- const model_options = options.model_options;
289
- if ((model_options?._option_id !== undefined &&
53
+ const model_options = resolvedOptions.model_options;
54
+ if (model_options?._option_id !== undefined &&
290
55
  model_options?._option_id !== "vertexai-claude" &&
291
- model_options?._option_id !== "text-fallback")) {
292
- driver.logger.debug({ options: options.model_options }, "Unexpected option id");
293
- }
294
- // Include conversation history (same as non-streaming)
295
- const conversation = updateConversation(options.conversation, prompt);
296
- const { payload, requestOptions } = getClaudePayload(options, conversation);
297
- const streamingPayload = { ...payload, stream: true };
298
- const response_stream = await client.messages.stream(streamingPayload, requestOptions);
299
- // Track current tool use being built from streaming
300
- let currentToolUse = null;
301
- // Deferred spacing after a thinking block — emitted only when real text follows,
302
- // so it doesn't leak into the output when a tool call comes after thinking.
303
- let pendingSpacing = false;
304
- const stream = asyncMap(response_stream, async (streamEvent) => {
305
- switch (streamEvent.type) {
306
- case "message_start":
307
- return {
308
- result: [{ type: "text", value: '' }],
309
- token_usage: anthropicUsageToTokenUsage(streamEvent.message.usage),
310
- };
311
- case "message_delta":
312
- return {
313
- result: [{ type: "text", value: '' }],
314
- token_usage: {
315
- result: streamEvent.usage.output_tokens
316
- },
317
- finish_reason: claudeFinishReason(streamEvent.delta.stop_reason ?? undefined),
318
- };
319
- case "content_block_start":
320
- // Handle tool_use blocks
321
- if (streamEvent.content_block.type === "tool_use") {
322
- currentToolUse = {
323
- id: streamEvent.content_block.id,
324
- name: streamEvent.content_block.name,
325
- inputJson: ''
326
- };
327
- return {
328
- result: [],
329
- tool_use: [{
330
- id: streamEvent.content_block.id,
331
- tool_name: streamEvent.content_block.name,
332
- tool_input: '' // Will be accumulated via input_json_delta
333
- }]
334
- };
335
- }
336
- // Handle redacted thinking blocks
337
- if (streamEvent.content_block.type === "redacted_thinking" && model_options?.include_thoughts) {
338
- return {
339
- result: [{ type: "text", value: `[Redacted thinking: ${streamEvent.content_block.data}]` }]
340
- };
341
- }
342
- break;
343
- case "content_block_delta":
344
- // Handle different delta types
345
- switch (streamEvent.delta.type) {
346
- case "text_delta": {
347
- const prefix = pendingSpacing ? '\n\n' : '';
348
- pendingSpacing = false;
349
- return {
350
- result: streamEvent.delta.text ? [{ type: "text", value: prefix + streamEvent.delta.text }] : []
351
- };
352
- }
353
- case "input_json_delta":
354
- // Accumulate tool input JSON
355
- if (currentToolUse && streamEvent.delta.partial_json) {
356
- return {
357
- result: [],
358
- tool_use: [{
359
- id: currentToolUse.id,
360
- tool_name: '', // Name already sent in content_block_start
361
- tool_input: streamEvent.delta.partial_json
362
- }]
363
- };
364
- }
365
- break;
366
- case "thinking_delta":
367
- if (model_options?.include_thoughts) {
368
- return {
369
- result: streamEvent.delta.thinking ? [{ type: "text", value: streamEvent.delta.thinking }] : [],
370
- };
371
- }
372
- break;
373
- case "signature_delta":
374
- // End of thinking block — defer spacing until real text follows,
375
- // so it doesn't leak when a tool call comes next.
376
- if (model_options?.include_thoughts) {
377
- pendingSpacing = true;
378
- }
379
- break;
380
- }
381
- break;
382
- case "content_block_stop":
383
- // Reset tool use tracking; spacing is handled via pendingSpacing
384
- if (currentToolUse) {
385
- currentToolUse = null;
386
- // Tool call followed thinking — discard any pending spacing so it doesn't leak
387
- pendingSpacing = false;
388
- }
389
- break;
390
- }
391
- // Default case for all other event types
392
- return {
393
- result: []
394
- };
395
- });
396
- return stream;
397
- }
398
- /**
399
- * Format Anthropic API errors into LlumiverseError with proper status codes and retryability.
400
- *
401
- * Anthropic API errors have a specific structure:
402
- * - APIError.status: HTTP status code (400, 401, 403, 404, 409, 422, 429, 500+)
403
- * - APIError.error: Nested error object with type and message
404
- * - APIError.requestID: Request ID for support (can be null)
405
- *
406
- * Common error types:
407
- * - BadRequestError (400): Invalid request parameters
408
- * - AuthenticationError (401): Authentication required
409
- * - PermissionDeniedError (403): Insufficient permissions
410
- * - NotFoundError (404): Resource not found
411
- * - ConflictError (409): Resource conflict
412
- * - UnprocessableEntityError (422): Validation error
413
- * - RateLimitError (429): Rate limit exceeded
414
- * - InternalServerError (500+): Server-side errors
415
- * - APIConnectionError: Connection issues (no status code)
416
- * - APIConnectionTimeoutError: Request timeout (no status code)
417
- *
418
- * @see https://docs.anthropic.com/en/api/errors
419
- */
420
- formatLlumiverseError(_driver, error, context) {
421
- // Check if it's an Anthropic API error
422
- const isAnthropicError = this.isAnthropicApiError(error);
423
- if (!isAnthropicError) {
424
- // Not an Anthropic API error, use default handling
425
- throw error;
426
- }
427
- const apiError = error;
428
- const httpStatusCode = apiError.status;
429
- // Extract error message and nested error details
430
- let message = apiError.message || String(error);
431
- // Extract error type from nested error object if available
432
- let errorType;
433
- if (apiError.error && typeof apiError.error === 'object') {
434
- const nestedError = apiError.error;
435
- if (nestedError.error && typeof nestedError.error === 'object') {
436
- errorType = nestedError.error.type;
437
- // Use the nested error message if it's more specific
438
- if (nestedError.error.message) {
439
- message = nestedError.error.message;
440
- }
441
- }
442
- }
443
- // Build user-facing message with status code
444
- let userMessage = message;
445
- // Include status code in message (for end-user visibility)
446
- if (httpStatusCode) {
447
- userMessage = `[${httpStatusCode}] ${userMessage}`;
448
- }
449
- // Include error type if available
450
- if (errorType && errorType !== 'error') {
451
- userMessage = `${errorType}: ${userMessage}`;
452
- }
453
- // Add request ID if available (useful for Anthropic support)
454
- if (apiError.requestID) {
455
- userMessage += ` (Request ID: ${apiError.requestID})`;
56
+ model_options?._option_id !== "text-fallback") {
57
+ driver.logger.debug({ options: resolvedOptions.model_options }, "Unexpected option id");
456
58
  }
457
- // Determine retryability based on Anthropic error types
458
- const retryable = this.isClaudeErrorRetryable(error, httpStatusCode, errorType);
459
- // Use the error constructor name as the error name
460
- const errorName = error.constructor?.name || 'AnthropicError';
461
- return new LlumiverseError(`[${context.provider}] ${userMessage}`, retryable, context, error, httpStatusCode, errorName);
462
- }
463
- /**
464
- * Type guard to check if error is an Anthropic API error.
465
- */
466
- isAnthropicApiError(error) {
467
- return (error !== null &&
468
- typeof error === 'object' &&
469
- error instanceof APIError);
59
+ return streamClaudeCompletion(client, prompt, resolvedOptions);
470
60
  }
471
- /**
472
- * Determine if an Anthropic API error is retryable.
473
- *
474
- * Retryable errors:
475
- * - RateLimitError (429): Rate limit exceeded, retry with backoff
476
- * - InternalServerError (500+): Server-side errors
477
- * - APIConnectionTimeoutError: Request timeout
478
- * - 408 (Request Timeout): Request timeout
479
- * - 529 (Overloaded): Service overloaded
480
- *
481
- * Non-retryable errors:
482
- * - BadRequestError (400): Invalid request parameters
483
- * - AuthenticationError (401): Authentication failure
484
- * - PermissionDeniedError (403): Insufficient permissions
485
- * - NotFoundError (404): Resource not found
486
- * - ConflictError (409): Resource conflict
487
- * - UnprocessableEntityError (422): Validation error
488
- * - Other 4xx client errors
489
- * - invalid_request_error: Invalid request structure
490
- *
491
- * @param error - The error object
492
- * @param httpStatusCode - The HTTP status code if available
493
- * @param errorType - The nested error type if available
494
- * @returns True if retryable, false if not retryable, undefined if unknown
495
- */
496
61
  isClaudeErrorRetryable(error, httpStatusCode, errorType) {
497
- // Check specific Anthropic error types by class
498
- if (error instanceof RateLimitError)
499
- return true;
500
- if (error instanceof InternalServerError)
501
- return true;
502
- if (error instanceof APIConnectionTimeoutError)
503
- return true;
504
- // Non-retryable by error type
505
- if (error instanceof BadRequestError)
506
- return false;
507
- if (error instanceof AuthenticationError)
508
- return false;
509
- if (error instanceof PermissionDeniedError)
510
- return false;
511
- if (error instanceof NotFoundError)
512
- return false;
513
- if (error instanceof ConflictError)
514
- return false;
515
- if (error instanceof UnprocessableEntityError)
516
- return false;
517
- // Check nested error type
518
- if (errorType === 'invalid_request_error')
519
- return false;
520
- // Use HTTP status code
521
- if (httpStatusCode !== undefined) {
522
- if (httpStatusCode === 429)
523
- return true; // Rate limit
524
- if (httpStatusCode === 408)
525
- return true; // Request timeout
526
- if (httpStatusCode === 529)
527
- return true; // Overloaded
528
- if (httpStatusCode >= 500 && httpStatusCode < 600)
529
- return true; // Server errors
530
- if (httpStatusCode >= 400 && httpStatusCode < 500)
531
- return false; // Client errors
532
- }
533
- // Connection errors without status codes
534
- if (error instanceof APIConnectionError && !(error instanceof APIConnectionTimeoutError)) {
535
- // Generic connection errors might be retryable (network issues)
536
- return true;
537
- }
538
- // Unknown error type - let consumer decide retry strategy
539
- return undefined;
540
- }
541
- }
542
- function createPromptFromResponse(response) {
543
- return {
544
- messages: [{
545
- role: response.role,
546
- content: response.content,
547
- }],
548
- system: undefined
549
- };
550
- }
551
- /**
552
- * Merge consecutive user messages in the conversation.
553
- * This is required because Anthropic's API expects all tool_result blocks
554
- * from a single assistant turn to be in one user message.
555
- * When multiple tool results are added as separate user messages,
556
- * we need to merge them before sending to the API.
557
- */
558
- export function mergeConsecutiveUserMessages(messages) {
559
- if (messages.length === 0)
560
- return [];
561
- // Check if any merging is needed
562
- const needsMerging = messages.some((msg, i) => i < messages.length - 1 &&
563
- msg.role === 'user' &&
564
- messages[i + 1].role === 'user');
565
- if (!needsMerging) {
566
- return messages;
567
- }
568
- const result = [];
569
- let i = 0;
570
- while (i < messages.length) {
571
- const current = messages[i];
572
- if (current.role === 'user') {
573
- // Collect all consecutive user messages
574
- const mergedContent = [];
575
- while (i < messages.length && messages[i].role === 'user') {
576
- const userMsg = messages[i];
577
- if (Array.isArray(userMsg.content)) {
578
- mergedContent.push(...userMsg.content);
579
- }
580
- else if (typeof userMsg.content === 'string') {
581
- mergedContent.push({ type: 'text', text: userMsg.content });
582
- }
583
- i++;
584
- }
585
- result.push({
586
- role: 'user',
587
- content: mergedContent
588
- });
589
- }
590
- else {
591
- result.push(current);
592
- i++;
593
- }
594
- }
595
- return result;
596
- }
597
- /**
598
- * Update the conversation messages
599
- * @param prompt
600
- * @param response
601
- * @returns
602
- */
603
- export function updateConversation(conversation, prompt) {
604
- const baseSystemMessages = conversation?.system || [];
605
- const baseMessages = conversation?.messages || [];
606
- const system = baseSystemMessages.concat(prompt.system || []);
607
- // Sanitize first, then merge. Order matters: an empty assistant message (e.g. from interrupted
608
- // streaming) between two tool-result user messages acts as a false separator. If merge runs
609
- // first, those messages look non-consecutive and fixOrphanedToolUse injects a synthetic result
610
- // into the first one; when sanitize later removes the empty assistant, the second user message
611
- // ends up with an orphaned tool_result that Vertex AI rejects:
612
- // "unexpected tool_use_id found in tool_result blocks".
613
- const combined = sanitizeMessages(baseMessages.concat(prompt.messages || []));
614
- const mergedMessages = mergeConsecutiveUserMessages(combined);
615
- return {
616
- messages: mergedMessages,
617
- system: system.length > 0 ? system : undefined // If system is empty, set to undefined
618
- };
619
- }
620
- /**
621
- * Sanitize messages by removing empty text blocks.
622
- * Claude API rejects messages with empty text content blocks ("text content blocks must be non-empty").
623
- * This handles cases where streaming was interrupted and left empty text blocks.
624
- *
625
- * - Filters out empty text blocks from each message's content
626
- * - Removes messages entirely if they have no content after filtering
627
- */
628
- export function sanitizeMessages(messages) {
629
- const result = [];
630
- for (const message of messages) {
631
- if (typeof message.content === 'string') {
632
- // String content - keep only if non-empty
633
- if (message.content.trim()) {
634
- result.push(message);
635
- }
636
- continue;
637
- }
638
- // Array content - filter out empty text blocks
639
- const filteredContent = message.content.filter(block => {
640
- if (block.type === 'text') {
641
- return block.text && block.text.trim().length > 0;
642
- }
643
- // Keep all non-text blocks (tool_use, tool_result, image, etc.)
644
- return true;
645
- });
646
- // Only include message if it has content after filtering
647
- if (filteredContent.length > 0) {
648
- result.push({
649
- ...message,
650
- content: filteredContent
651
- });
652
- }
653
- }
654
- return result;
655
- }
656
- /**
657
- * Fix orphaned tool_use blocks in the conversation.
658
- * @exported for testing
659
- *
660
- * When an agent is stopped mid-tool-execution, the assistant message contains tool_use blocks
661
- * but no corresponding tool_result was added. The Anthropic API requires that every tool_use
662
- * must be followed by a tool_result in the next user message.
663
- *
664
- * This function detects such cases and injects synthetic tool_result blocks indicating
665
- * the tools were interrupted, allowing the conversation to continue.
666
- */
667
- export function fixOrphanedToolUse(messages) {
668
- if (messages.length < 2)
669
- return messages;
670
- const result = [];
671
- for (let i = 0; i < messages.length; i++) {
672
- const current = messages[i];
673
- result.push(current);
674
- // Check if this is an assistant message with tool_use blocks
675
- if (current.role === 'assistant' && Array.isArray(current.content)) {
676
- const toolUseBlocks = current.content.filter((block) => block.type === 'tool_use');
677
- if (toolUseBlocks.length > 0) {
678
- // Check if the next message is a user message with matching tool_results
679
- const nextMessage = messages[i + 1];
680
- if (nextMessage && nextMessage.role === 'user' && Array.isArray(nextMessage.content)) {
681
- // Get tool_result IDs from the next message
682
- const toolResultIds = new Set(nextMessage.content
683
- .filter((block) => block.type === 'tool_result')
684
- .map(block => block.tool_use_id));
685
- // Find orphaned tool_use blocks (no matching tool_result)
686
- const orphanedToolUse = toolUseBlocks.filter(block => !toolResultIds.has(block.id));
687
- if (orphanedToolUse.length > 0) {
688
- // Inject synthetic tool_results for orphaned tool_use
689
- const syntheticResults = orphanedToolUse.map(block => ({
690
- type: 'tool_result',
691
- tool_use_id: block.id,
692
- content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`
693
- }));
694
- // Prepend synthetic results to the next user message
695
- const updatedNextMessage = {
696
- ...nextMessage,
697
- content: [...syntheticResults, ...nextMessage.content]
698
- };
699
- // Replace the next message in our iteration
700
- messages[i + 1] = updatedNextMessage;
701
- }
702
- }
703
- else if (nextMessage && nextMessage.role === 'user') {
704
- // Next message is a user message but not array content (plain text)
705
- // We need to convert it and add tool_results
706
- const syntheticResults = toolUseBlocks.map(block => ({
707
- type: 'tool_result',
708
- tool_use_id: block.id,
709
- content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`
710
- }));
711
- const textContent = typeof nextMessage.content === 'string'
712
- ? { type: 'text', text: nextMessage.content }
713
- : { type: 'text', text: '' };
714
- const updatedNextMessage = {
715
- role: 'user',
716
- content: [...syntheticResults, textContent]
717
- };
718
- messages[i + 1] = updatedNextMessage;
719
- }
720
- // Note: If there's no nextMessage, we leave the conversation as-is.
721
- // The tool_use blocks are expected to be there - the next turn will provide tool_results.
722
- }
723
- }
724
- }
725
- return result;
726
- }
727
- function stripClaudeCacheControlFromMessages(messages) {
728
- return messages.map(message => {
729
- if (typeof message.content === 'string') {
730
- return message;
731
- }
732
- return {
733
- ...message,
734
- content: message.content.map(block => stripClaudeCacheControlFromBlock(block)),
735
- };
736
- });
737
- }
738
- function stripClaudeCacheControlFromBlock(block) {
739
- const cloned = { ...block };
740
- delete cloned.cache_control;
741
- return cloned;
742
- }
743
- function stripClaudeCacheControlFromSystem(system) {
744
- return system?.map(block => {
745
- const { cache_control: _cacheControl, ...rest } = block;
746
- return rest;
747
- });
748
- }
749
- function stripClaudeCacheControlFromTools(tools) {
750
- return tools?.map(tool => {
751
- const cloned = { ...tool };
752
- delete cloned.cache_control;
753
- return cloned;
754
- });
755
- }
756
- function getClaudePayload(options, prompt) {
757
- const modelName = options.model; // Model name is already extracted in the calling methods
758
- const model_options = options.model_options;
759
- // Add beta header for Claude 3.7 models to enable 128k output tokens
760
- let requestOptions = undefined;
761
- if (modelName.includes('claude-3-7-sonnet') &&
762
- ((model_options?.max_tokens ?? 0) > 64000 || (model_options?.thinking_budget_tokens ?? 0) > 64000)) {
763
- requestOptions = {
764
- headers: {
765
- 'anthropic-beta': 'output-128k-2025-02-19'
766
- }
767
- };
768
- }
769
- // Fix orphaned tool_use blocks (can occur when agent is stopped mid-tool-execution)
770
- const fixedMessages = fixOrphanedToolUse(prompt.messages);
771
- // Sanitize messages to remove empty text blocks (can occur from interrupted streaming)
772
- let sanitizedMessages = sanitizeMessages(fixedMessages);
773
- // Validate tools have input_schema.type set to 'object' as required by the Anthropic SDK
774
- if (options.tools) {
775
- for (const tool of options.tools) {
776
- if (tool.input_schema.type !== 'object') {
777
- throw new Error(`Tool "${tool.name}" has invalid input_schema.type: expected "object", got "${tool.input_schema.type}"`);
778
- }
779
- }
780
- }
781
- // When no tools are provided but conversation contains tool_use/tool_result blocks
782
- // (e.g. checkpoint summary calls), convert tool blocks to text to avoid API errors
783
- const hasTools = options.tools && options.tools.length > 0;
784
- if (!hasTools && claudeMessagesContainToolBlocks(sanitizedMessages)) {
785
- sanitizedMessages = convertClaudeToolBlocksToText(sanitizedMessages);
786
- }
787
- sanitizedMessages = stripClaudeCacheControlFromMessages(sanitizedMessages);
788
- const sanitizedSystem = stripClaudeCacheControlFromSystem(prompt.system);
789
- const sanitizedTools = hasTools
790
- ? stripClaudeCacheControlFromTools(options.tools)
791
- : undefined;
792
- // Prompt caching: use three breakpoints so stable system prompt, tool definitions,
793
- // and the conversation history prefix can all be reused across calls.
794
- const cacheEnabled = model_options?.cache_enabled === true;
795
- if (cacheEnabled) {
796
- const cacheTtl = model_options?.cache_ttl;
797
- const cacheControl = { type: 'ephemeral', ...(cacheTtl && { ttl: cacheTtl }) };
798
- if (sanitizedSystem && sanitizedSystem.length > 0) {
799
- const lastSystemBlock = sanitizedSystem[sanitizedSystem.length - 1];
800
- lastSystemBlock.cache_control = cacheControl;
801
- }
802
- if (sanitizedTools && sanitizedTools.length > 0) {
803
- const lastTool = sanitizedTools[sanitizedTools.length - 1];
804
- lastTool.cache_control = cacheControl;
805
- }
806
- if (sanitizedMessages.length >= 4) {
807
- const pivotMsg = sanitizedMessages[sanitizedMessages.length - 2];
808
- if (Array.isArray(pivotMsg.content) && pivotMsg.content.length > 0) {
809
- const lastBlock = pivotMsg.content[pivotMsg.content.length - 1];
810
- if (typeof lastBlock === 'object' && lastBlock !== null &&
811
- 'type' in lastBlock && lastBlock.type !== 'thinking' && lastBlock.type !== 'redacted_thinking') {
812
- lastBlock.cache_control = cacheControl;
813
- }
814
- }
815
- }
62
+ return isClaudeErrorRetryable(error, httpStatusCode, errorType);
816
63
  }
817
- // Resolve thinking, effort, and sampling restriction using shared Claude helper
818
- const { thinking, outputConfig, hasSamplingRestriction } = resolveClaudeThinking(modelName, model_options);
819
- const payload = {
820
- messages: sanitizedMessages,
821
- system: sanitizedSystem,
822
- tools: sanitizedTools,
823
- temperature: hasSamplingRestriction ? undefined : model_options?.temperature,
824
- model: modelName,
825
- max_tokens: maxToken(options),
826
- top_p: hasSamplingRestriction ? undefined : (model_options?.temperature != null ? undefined : model_options?.top_p),
827
- top_k: hasSamplingRestriction ? undefined : model_options?.top_k,
828
- stop_sequences: model_options?.stop_sequence,
829
- thinking,
830
- ...(outputConfig && { output_config: outputConfig }),
831
- };
832
- return { payload, requestOptions };
833
- }
834
- /**
835
- * Checks whether any Claude message contains tool_use or tool_result content blocks.
836
- */
837
- export function claudeMessagesContainToolBlocks(messages) {
838
- for (const msg of messages) {
839
- if (!Array.isArray(msg.content))
840
- continue;
841
- for (const block of msg.content) {
842
- if (typeof block === 'object' && block !== null && 'type' in block) {
843
- if (block.type === 'tool_use' || block.type === 'tool_result')
844
- return true;
845
- }
846
- }
64
+ formatLlumiverseError(_driver, error, context) {
65
+ return formatAnthropicLlumiverseError(error, context);
847
66
  }
848
- return false;
849
- }
850
- /**
851
- * Converts tool_use and tool_result blocks to text in Claude messages.
852
- * Preserves tool call information while removing structured blocks that
853
- * require tools to be defined in the API request.
854
- */
855
- export function convertClaudeToolBlocksToText(messages) {
856
- return messages.map(msg => {
857
- if (!Array.isArray(msg.content))
858
- return msg;
859
- let hasToolBlocks = false;
860
- for (const block of msg.content) {
861
- if (typeof block === 'object' && block !== null && 'type' in block &&
862
- (block.type === 'tool_use' || block.type === 'tool_result')) {
863
- hasToolBlocks = true;
864
- break;
865
- }
866
- }
867
- if (!hasToolBlocks)
868
- return msg;
869
- const newContent = [];
870
- for (const block of msg.content) {
871
- if (typeof block === 'string') {
872
- newContent.push(block);
873
- continue;
874
- }
875
- if (block.type === 'tool_use') {
876
- const inputStr = block.input ? JSON.stringify(block.input) : '';
877
- const truncated = inputStr.length > 500 ? inputStr.substring(0, 500) + '...' : inputStr;
878
- newContent.push({
879
- type: 'text',
880
- text: `[Tool call: ${block.name}(${truncated})]`,
881
- });
882
- }
883
- else if (block.type === 'tool_result') {
884
- let resultStr = 'No content';
885
- if (typeof block.content === 'string') {
886
- resultStr = block.content.length > 500 ? block.content.substring(0, 500) + '...' : block.content;
887
- }
888
- else if (Array.isArray(block.content)) {
889
- const texts = block.content
890
- .filter((c) => c.type === 'text')
891
- .map(c => c.text.length > 500 ? c.text.substring(0, 500) + '...' : c.text);
892
- resultStr = texts.join('\n') || 'No text content';
893
- }
894
- newContent.push({
895
- type: 'text',
896
- text: `[Tool result: ${resultStr}]`,
897
- });
898
- }
899
- else {
900
- newContent.push(block);
901
- }
902
- }
903
- return { ...msg, content: newContent };
904
- });
905
67
  }
906
68
  //# sourceMappingURL=claude.js.map