@llumiverse/drivers 1.0.0-dev.20260224.234313Z → 1.0.0-dev.20260331.080752Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/lib/cjs/bedrock/converse.js +86 -12
  2. package/lib/cjs/bedrock/converse.js.map +1 -1
  3. package/lib/cjs/bedrock/index.js +208 -1
  4. package/lib/cjs/bedrock/index.js.map +1 -1
  5. package/lib/cjs/groq/index.js +7 -4
  6. package/lib/cjs/groq/index.js.map +1 -1
  7. package/lib/cjs/openai/index.js +457 -26
  8. package/lib/cjs/openai/index.js.map +1 -1
  9. package/lib/cjs/openai/openai_compatible.js +1 -0
  10. package/lib/cjs/openai/openai_compatible.js.map +1 -1
  11. package/lib/cjs/vertexai/index.js +42 -0
  12. package/lib/cjs/vertexai/index.js.map +1 -1
  13. package/lib/cjs/vertexai/models/claude.js +230 -2
  14. package/lib/cjs/vertexai/models/claude.js.map +1 -1
  15. package/lib/cjs/vertexai/models/gemini.js +261 -41
  16. package/lib/cjs/vertexai/models/gemini.js.map +1 -1
  17. package/lib/cjs/vertexai/models.js +1 -1
  18. package/lib/cjs/vertexai/models.js.map +1 -1
  19. package/lib/esm/bedrock/converse.js +80 -6
  20. package/lib/esm/bedrock/converse.js.map +1 -1
  21. package/lib/esm/bedrock/index.js +207 -2
  22. package/lib/esm/bedrock/index.js.map +1 -1
  23. package/lib/esm/groq/index.js +7 -4
  24. package/lib/esm/groq/index.js.map +1 -1
  25. package/lib/esm/openai/index.js +456 -27
  26. package/lib/esm/openai/index.js.map +1 -1
  27. package/lib/esm/openai/openai_compatible.js +1 -0
  28. package/lib/esm/openai/openai_compatible.js.map +1 -1
  29. package/lib/esm/vertexai/index.js +43 -1
  30. package/lib/esm/vertexai/index.js.map +1 -1
  31. package/lib/esm/vertexai/models/claude.js +229 -3
  32. package/lib/esm/vertexai/models/claude.js.map +1 -1
  33. package/lib/esm/vertexai/models/gemini.js +262 -43
  34. package/lib/esm/vertexai/models/gemini.js.map +1 -1
  35. package/lib/esm/vertexai/models.js +1 -1
  36. package/lib/esm/vertexai/models.js.map +1 -1
  37. package/lib/types/bedrock/converse.d.ts +1 -2
  38. package/lib/types/bedrock/converse.d.ts.map +1 -1
  39. package/lib/types/bedrock/index.d.ts +53 -1
  40. package/lib/types/bedrock/index.d.ts.map +1 -1
  41. package/lib/types/openai/index.d.ts +96 -1
  42. package/lib/types/openai/index.d.ts.map +1 -1
  43. package/lib/types/openai/openai_compatible.d.ts +5 -0
  44. package/lib/types/openai/openai_compatible.d.ts.map +1 -1
  45. package/lib/types/openai/openai_format.d.ts +1 -1
  46. package/lib/types/vertexai/index.d.ts +11 -1
  47. package/lib/types/vertexai/index.d.ts.map +1 -1
  48. package/lib/types/vertexai/models/claude.d.ts +64 -1
  49. package/lib/types/vertexai/models/claude.d.ts.map +1 -1
  50. package/lib/types/vertexai/models/gemini.d.ts +61 -1
  51. package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
  52. package/lib/types/vertexai/models.d.ts +6 -1
  53. package/lib/types/vertexai/models.d.ts.map +1 -1
  54. package/package.json +9 -9
  55. package/src/bedrock/converse.ts +85 -10
  56. package/src/bedrock/error-handling.test.ts +352 -0
  57. package/src/bedrock/index.ts +225 -1
  58. package/src/groq/index.ts +9 -4
  59. package/src/openai/error-handling.test.ts +567 -0
  60. package/src/openai/index.ts +505 -29
  61. package/src/openai/openai_compatible.ts +7 -0
  62. package/src/openai/openai_format.ts +1 -1
  63. package/src/vertexai/index.ts +56 -5
  64. package/src/vertexai/models/claude-error-handling.test.ts +432 -0
  65. package/src/vertexai/models/claude.ts +273 -7
  66. package/src/vertexai/models/gemini-error-handling.test.ts +353 -0
  67. package/src/vertexai/models/gemini.ts +304 -48
  68. package/src/vertexai/models.ts +7 -2
@@ -1,17 +1,24 @@
1
+ import type { ApiError } from "@google/genai";
1
2
  import {
2
3
  Content, FinishReason, FunctionCallingConfigMode, FunctionDeclaration, GenerateContentConfig, GenerateContentParameters,
3
4
  GenerateContentResponseUsageMetadata,
4
- HarmBlockThreshold, HarmCategory, Modality, Part, SafetySetting, Schema, ThinkingConfig, Tool, Type
5
+ HarmBlockThreshold, HarmCategory, Modality, Part,
6
+ ProminentPeople,
7
+ SafetySetting, Schema, ThinkingConfig,
8
+ ThinkingLevel,
9
+ Tool, Type
5
10
  } from "@google/genai";
6
11
  import {
7
12
  AIModel, Completion, CompletionChunkObject, CompletionResult, ExecutionOptions,
8
13
  ExecutionTokenUsage,
9
14
  getConversationMeta,
10
- getMaxTokensLimitVertexAi,
15
+ getGeminiModelVersion,
11
16
  incrementConversationTurn,
12
- JSONObject, JSONSchema, ModelType, PromptOptions, PromptRole,
17
+ isGeminiModelVersionGte,
18
+ JSONObject, JSONSchema, LlumiverseError, LlumiverseErrorContext, ModelType, PromptOptions, PromptRole,
13
19
  PromptSegment, readStreamAsBase64, StatelessExecutionOptions,
14
20
  stripBase64ImagesFromConversation,
21
+ stripHeartbeatsFromConversation,
15
22
  ToolDefinition, ToolUse,
16
23
  truncateLargeTextInConversation,
17
24
  unwrapConversationArray,
@@ -53,15 +60,36 @@ const geminiSafetySettings: SafetySetting[] = [
53
60
  }
54
61
  ];
55
62
 
63
+ // We do the mapping here rather than in common to avoid bringing the SDK into the common package.
64
+ function getProminentPeopleOption(prominentPeople?: "PROMINENT_PEOPLE_UNSPECIFIED" | "ALLOW_PROMINENT_PEOPLE" | "BLOCK_PROMINENT_PEOPLE") {
65
+ switch (prominentPeople) {
66
+ case "ALLOW_PROMINENT_PEOPLE":
67
+ return ProminentPeople.ALLOW_PROMINENT_PEOPLE;
68
+ case "BLOCK_PROMINENT_PEOPLE":
69
+ return ProminentPeople.BLOCK_PROMINENT_PEOPLE;
70
+ case "PROMINENT_PEOPLE_UNSPECIFIED":
71
+ return ProminentPeople.PROMINENT_PEOPLE_UNSPECIFIED;
72
+ default:
73
+ return undefined;
74
+ }
75
+ }
76
+
56
77
  function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentPrompt): GenerateContentParameters {
57
78
  const model_options = options.model_options as VertexAIGeminiOptions | undefined;
58
79
  const tools = getToolDefinitions(options.tools);
59
80
 
60
- const useStructuredOutput = supportsStructuredOutput(options) && !tools;
81
+ // When no tools are provided but conversation contains functionCall/functionResponse parts
82
+ // (e.g. checkpoint summary calls), convert them to text to avoid API errors
83
+ if (!tools && prompt.contents) {
84
+ const hasToolParts = prompt.contents.some(c =>
85
+ c.parts?.some(p => p.functionCall || p.functionResponse)
86
+ );
87
+ if (hasToolParts) {
88
+ prompt.contents = convertGeminiFunctionPartsToText(prompt.contents);
89
+ }
90
+ }
61
91
 
62
- const thinkingConfigNeeded = model_options?.include_thoughts
63
- || model_options?.thinking_budget_tokens
64
- || options.model.includes("gemini-2.5");
92
+ const useStructuredOutput = supportsStructuredOutput(options) && !tools;
65
93
 
66
94
  const configNanoBanana: GenerateContentConfig = {
67
95
  systemInstruction: prompt.system,
@@ -71,10 +99,16 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
71
99
  //Model options
72
100
  temperature: model_options?.temperature,
73
101
  topP: model_options?.top_p,
74
- maxOutputTokens: geminiMaxTokens(options),
102
+ maxOutputTokens: model_options?.max_tokens,
75
103
  stopSequences: model_options?.stop_sequence,
104
+ thinkingConfig: geminiThinkingConfig(options),
76
105
  imageConfig: {
106
+ imageSize: model_options?.image_size,
77
107
  aspectRatio: model_options?.image_aspect_ratio,
108
+ personGeneration: model_options?.person_generation,
109
+ prominentPeople: getProminentPeopleOption(model_options?.prominent_people),
110
+ outputMimeType: model_options?.output_mime_type,
111
+ outputCompressionQuality: model_options?.output_compression_quality,
78
112
  }
79
113
  }
80
114
 
@@ -95,12 +129,12 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
95
129
  temperature: model_options?.temperature,
96
130
  topP: model_options?.top_p,
97
131
  topK: model_options?.top_k,
98
- maxOutputTokens: geminiMaxTokens(options),
132
+ maxOutputTokens: model_options?.max_tokens,
99
133
  stopSequences: model_options?.stop_sequence,
100
134
  presencePenalty: model_options?.presence_penalty,
101
135
  frequencyPenalty: model_options?.frequency_penalty,
102
136
  seed: model_options?.seed,
103
- thinkingConfig: thinkingConfigNeeded ? geminiThinkingConfig(options) : undefined,
137
+ thinkingConfig: geminiThinkingConfig(options),
104
138
  }
105
139
 
106
140
  return {
@@ -435,7 +469,11 @@ function removeEmptyJSONArray(array: any[], schema: JSONSchema): any[] {
435
469
  return cleanedArray.filter(item => !isEmpty(item));
436
470
  }
437
471
 
438
- function collectTextParts(content: Content): CompletionResult[] {
472
+ /**
473
+ * Collect all parts (text and images) from content in order.
474
+ * This preserves the original ordering of text and image parts.
475
+ */
476
+ function extractCompletionResults(content: Content): CompletionResult[] {
439
477
  const results: CompletionResult[] = [];
440
478
  const parts = content.parts;
441
479
  if (parts) {
@@ -445,18 +483,7 @@ function collectTextParts(content: Content): CompletionResult[] {
445
483
  type: "text",
446
484
  value: part.text
447
485
  });
448
- }
449
- }
450
- }
451
- return results;
452
- }
453
-
454
- function collectInlineDataParts(content: Content): CompletionResult[] {
455
- const results: CompletionResult[] = [];
456
- const parts = content.parts;
457
- if (parts) {
458
- for (const part of parts) {
459
- if (part.inlineData) {
486
+ } else if (part.inlineData) {
460
487
  const base64ImageBytes: string = part.inlineData.data ?? "";
461
488
  const mimeType = part.inlineData.mimeType ?? "image/png";
462
489
  const imageUrl = `data:${mimeType};base64,${base64ImageBytes}`;
@@ -533,25 +560,16 @@ const recoverableToolCallReasons = [
533
560
  'UNEXPECTED_TOOL_CALL', // Model called an undeclared tool
534
561
  ]
535
562
 
536
- function geminiMaxTokens(option: StatelessExecutionOptions) {
537
- const model_options = option.model_options as VertexAIGeminiOptions | undefined;
538
- if (model_options?.max_tokens) {
539
- return model_options.max_tokens;
540
- }
541
- if (option.model.includes("gemini-2.5")) {
542
- return getMaxTokensLimitVertexAi(option.model);
543
- }
544
- return undefined;
545
- }
546
563
 
547
564
  function geminiThinkingBudget(option: StatelessExecutionOptions) {
548
565
  const model_options = option.model_options as VertexAIGeminiOptions | undefined;
566
+ // If thinking_budget_tokens is explicitly set in model options, use it directly
549
567
  if (model_options?.thinking_budget_tokens) {
550
568
  return model_options.thinking_budget_tokens;
551
569
  }
552
570
  // Set minimum thinking level by default.
553
571
  // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
554
- if (option.model.includes("gemini-2.5")) {
572
+ if (getGeminiModelVersion(option.model) == '2.5') {
555
573
  if (option.model.includes("pro")) {
556
574
  return 128;
557
575
  }
@@ -562,16 +580,32 @@ function geminiThinkingBudget(option: StatelessExecutionOptions) {
562
580
 
563
581
  function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig | undefined {
564
582
  const model_options = option.model_options as VertexAIGeminiOptions | undefined;
583
+
584
+ // If thinking options are explicitly set in model options, use them directly
565
585
  const include_thoughts = model_options?.include_thoughts ?? false;
566
- if (model_options?.thinking_budget_tokens) {
567
- return { includeThoughts: include_thoughts, thinkingBudget: model_options.thinking_budget_tokens };
586
+ if (model_options?.thinking_budget_tokens || model_options?.thinking_level) {
587
+ return {
588
+ includeThoughts: include_thoughts,
589
+ thinkingBudget: model_options.thinking_budget_tokens,
590
+ thinkingLevel: model_options.thinking_level,
591
+ };
568
592
  }
569
593
 
570
- // Set minimum thinking level by default.
594
+ // Set a low thinking level by default.
571
595
  // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
572
- if (option.model.includes("gemini-2.5") || option.model.includes("gemini-3")) {
596
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking
597
+ if (isGeminiModelVersionGte(option.model, '3.0')) {
598
+ return {
599
+ includeThoughts: include_thoughts,
600
+ thinkingLevel: ThinkingLevel.LOW
601
+ };
602
+ }
603
+ if (isGeminiModelVersionGte(option.model, '2.5')) {
573
604
  const thinking_budget_tokens = geminiThinkingBudget(option) ?? 0;
574
- return { includeThoughts: include_thoughts, thinkingBudget: thinking_budget_tokens };
605
+ return {
606
+ includeThoughts: include_thoughts,
607
+ thinkingBudget: thinking_budget_tokens
608
+ };
575
609
  }
576
610
  }
577
611
 
@@ -667,7 +701,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
667
701
  // File content handling
668
702
  if (msg.files) {
669
703
  for (const f of msg.files) {
670
- let fileUrl = await f.getURL();
704
+ const fileUrl = await f.getURL();
671
705
  const isGsUrl = fileUrl.startsWith('gs://') || fileUrl.startsWith('https://storage.googleapis.com/');
672
706
 
673
707
  if (isGsUrl) {
@@ -680,7 +714,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
680
714
  } else {
681
715
  // Inline data handling
682
716
  const stream = await f.getStream();
683
- const data = await readStreamAsBase64(stream);
717
+ const data = await readStreamAsBase64(stream);
684
718
  parts.push({
685
719
  inlineData: {
686
720
  data,
@@ -776,6 +810,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
776
810
  const modelName = splits[splits.length - 1];
777
811
  options = { ...options, model: modelName };
778
812
 
813
+ // Restore system instruction from stored conversation on resume.
814
+ // The stored _llumiverse_system contains the complete system (interaction prompt + schema)
815
+ // from the initial call. Always prefer it over the prompt's system, which on resume only
816
+ // contains the schema instruction (no interaction system segments are present on resume).
817
+ const existingSystem = extractSystemFromConversation(options.conversation);
818
+ if (existingSystem) {
819
+ prompt.system = existingSystem;
820
+ }
821
+
779
822
  let conversation = updateConversation(options.conversation, prompt.contents);
780
823
  prompt.contents = conversation;
781
824
 
@@ -822,9 +865,8 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
822
865
 
823
866
  // We clean the content before validation, so we can update the conversation.
824
867
  const cleanedContent = cleanEmptyFieldsContent(content, options.result_schema);
825
- const textResults = collectTextParts(cleanedContent);
826
- const imageResults = collectInlineDataParts(cleanedContent);
827
- result = [...textResults, ...imageResults];
868
+ // Collect all parts in order (text and images)
869
+ result = extractCompletionResults(cleanedContent);
828
870
  conversation = updateConversation(conversation, [cleanedContent]);
829
871
  }
830
872
  }
@@ -850,12 +892,21 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
850
892
  // Truncate large text content if configured
851
893
  processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
852
894
 
895
+ // Strip old heartbeat status messages
896
+ processedConversation = stripHeartbeatsFromConversation(processedConversation, {
897
+ keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
898
+ currentTurn,
899
+ });
900
+
901
+ // Preserve system instruction in conversation for multi-turn support
902
+ const finalConversation = storeSystemInConversation(processedConversation, prompt.system);
903
+
853
904
  return {
854
905
  result: result && result.length > 0 ? result : [{ type: "text" as const, value: '' }],
855
906
  token_usage: token_usage,
856
907
  finish_reason: finish_reason,
857
908
  original_response: options.include_original_response ? response : undefined,
858
- conversation: processedConversation,
909
+ conversation: finalConversation,
859
910
  tool_use
860
911
  } satisfies Completion;
861
912
  }
@@ -869,6 +920,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
869
920
  const modelName = splits[splits.length - 1];
870
921
  options = { ...options, model: modelName };
871
922
 
923
+ // Restore system instruction from stored conversation on resume.
924
+ // The stored _llumiverse_system contains the complete system (interaction prompt + schema)
925
+ // from the initial call. Always prefer it over the prompt's system, which on resume only
926
+ // contains the schema instruction (no interaction system segments are present on resume).
927
+ const existingSystem = extractSystemFromConversation(options.conversation);
928
+ if (existingSystem) {
929
+ prompt.system = existingSystem;
930
+ }
931
+
872
932
  // Include conversation history in prompt contents (same as non-streaming)
873
933
  const conversation = updateConversation(options.conversation, prompt.contents);
874
934
  prompt.contents = conversation;
@@ -901,9 +961,8 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
901
961
  + `content: ${JSON.stringify(candidate.content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`);
902
962
  }
903
963
  if (candidate.content?.role === 'model') {
904
- const textResults = collectTextParts(candidate.content);
905
- const imageResults = collectInlineDataParts(candidate.content);
906
- const combinedResults = [...textResults, ...imageResults];
964
+ // Collect all parts in order (text and images)
965
+ const combinedResults = extractCompletionResults(candidate.content);
907
966
  tool_use = collectToolUseParts(candidate.content);
908
967
  if (tool_use) {
909
968
  finish_reason = "tool_use";
@@ -933,9 +992,176 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
933
992
  return stream;
934
993
  }
935
994
 
995
+ /**
996
+ * Format Google API errors into LlumiverseError with proper status codes and retryability.
997
+ *
998
+ * Google API errors follow AIP-193 standard:
999
+ * - ApiError.status: HTTP status code
1000
+ * - ApiError.message: Error message
1001
+ *
1002
+ * Common error codes:
1003
+ * - 400 (INVALID_ARGUMENT): Invalid request parameters
1004
+ * - 401 (UNAUTHENTICATED): Authentication required
1005
+ * - 403 (PERMISSION_DENIED): Insufficient permissions
1006
+ * - 404 (NOT_FOUND): Resource not found
1007
+ * - 429 (RESOURCE_EXHAUSTED): Rate limit/quota exceeded
1008
+ * - 500 (INTERNAL): Internal server error
1009
+ * - 503 (UNAVAILABLE): Service temporarily unavailable
1010
+ * - 504 (DEADLINE_EXCEEDED): Request timeout
1011
+ *
1012
+ * @see https://google.aip.dev/193
1013
+ * @see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/api-errors
1014
+ */
1015
+ formatLlumiverseError(
1016
+ _driver: VertexAIDriver,
1017
+ error: unknown,
1018
+ context: LlumiverseErrorContext
1019
+ ): LlumiverseError {
1020
+ // Check if it's a Google API error with status code
1021
+ const isApiError = this.isGoogleApiError(error);
1022
+
1023
+ if (!isApiError) {
1024
+ // Not a Google API error, use default handling
1025
+ // This will be called by the driver's default formatLlumiverseError
1026
+ throw error;
1027
+ }
1028
+
1029
+ const apiError = error as ApiError;
1030
+ const httpStatusCode = apiError.status;
1031
+
1032
+ // Extract error message
1033
+ const message = apiError.message || String(error);
1034
+
1035
+ // Build user-facing message with status code
1036
+ let userMessage = message;
1037
+
1038
+ // Include status code in message (for end-user visibility)
1039
+ if (httpStatusCode) {
1040
+ userMessage = `[${httpStatusCode}] ${userMessage}`;
1041
+ }
1042
+
1043
+ // Determine retryability based on Google error codes
1044
+ const retryable = this.isGeminiErrorRetryable(httpStatusCode);
1045
+
1046
+ // Extract error name/type from message if present
1047
+ const errorName = this.extractErrorName(message);
1048
+
1049
+ return new LlumiverseError(
1050
+ `[${context.provider}] ${userMessage}`,
1051
+ retryable,
1052
+ context,
1053
+ error,
1054
+ httpStatusCode,
1055
+ errorName
1056
+ );
1057
+ }
1058
+
1059
+ /**
1060
+ * Type guard to check if error is a Google API error.
1061
+ */
1062
+ private isGoogleApiError(error: unknown): error is ApiError {
1063
+ return (
1064
+ error !== null &&
1065
+ typeof error === 'object' &&
1066
+ 'status' in error &&
1067
+ typeof (error as any).status === 'number' &&
1068
+ 'message' in error
1069
+ );
1070
+ }
1071
+
1072
+ /**
1073
+ * Determine if a Google API error is retryable based on HTTP status code.
1074
+ *
1075
+ * Retryable errors (per Google AIP-194):
1076
+ * - 408 (REQUEST_TIMEOUT): Request timeout
1077
+ * - 429 (RESOURCE_EXHAUSTED): Rate limit exceeded, quota exhausted
1078
+ * - 500 (INTERNAL): Internal server error
1079
+ * - 502 (BAD_GATEWAY): Bad gateway
1080
+ * - 503 (UNAVAILABLE): Service temporarily unavailable
1081
+ * - 504 (DEADLINE_EXCEEDED): Gateway timeout
1082
+ *
1083
+ * Non-retryable errors:
1084
+ * - 400 (INVALID_ARGUMENT): Invalid request parameters
1085
+ * - 401 (UNAUTHENTICATED): Authentication required
1086
+ * - 403 (PERMISSION_DENIED): Insufficient permissions
1087
+ * - 404 (NOT_FOUND): Resource not found
1088
+ * - 409 (CONFLICT): Resource conflict
1089
+ * - Other 4xx client errors
1090
+ *
1091
+ * @param httpStatusCode - The HTTP status code from the API error
1092
+ * @returns True if retryable, false if not retryable, undefined if unknown
1093
+ */
1094
+ private isGeminiErrorRetryable(httpStatusCode: number): boolean | undefined {
1095
+ // Retryable status codes
1096
+ if (httpStatusCode === 408) return true; // Request timeout
1097
+ if (httpStatusCode === 429) return true; // Rate limit/quota
1098
+ if (httpStatusCode === 502) return true; // Bad gateway
1099
+ if (httpStatusCode === 503) return true; // Service unavailable
1100
+ if (httpStatusCode === 504) return true; // Gateway timeout
1101
+ if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Other 5xx server errors
1102
+
1103
+ // Non-retryable 4xx client errors
1104
+ if (httpStatusCode >= 400 && httpStatusCode < 500) return false;
1105
+
1106
+ // Unknown status codes - let consumer decide retry strategy
1107
+ return undefined;
1108
+ }
1109
+
1110
+ /**
1111
+ * Extract error type name from error message.
1112
+ * Google errors often include the error type in the message.
1113
+ * Examples: "INVALID_ARGUMENT", "RESOURCE_EXHAUSTED", "PERMISSION_DENIED"
1114
+ */
1115
+ private extractErrorName(message: string): string | undefined {
1116
+ // Common Google error patterns
1117
+ const patterns = [
1118
+ /^([A-Z_]+):/, // "ERROR_NAME: message"
1119
+ /\[([A-Z_]+)\]/, // "[ERROR_NAME] message"
1120
+ /^(\w+Error):/, // "ErrorTypeError: message"
1121
+ ];
1122
+
1123
+ for (const pattern of patterns) {
1124
+ const match = message.match(pattern);
1125
+ if (match) {
1126
+ return match[1];
1127
+ }
1128
+ }
1129
+
1130
+ return undefined;
1131
+ }
1132
+
936
1133
  }
937
1134
 
938
1135
 
1136
+ /**
1137
+ * Converts functionCall and functionResponse parts to text parts in Gemini Content[].
1138
+ * Preserves tool call information while removing structured parts that require
1139
+ * tools/toolConfig to be defined in the API request.
1140
+ */
1141
+ export function convertGeminiFunctionPartsToText(contents: Content[]): Content[] {
1142
+ return contents.map(content => {
1143
+ if (!content.parts) return content;
1144
+ const hasFunctionParts = content.parts.some(p => p.functionCall || p.functionResponse);
1145
+ if (!hasFunctionParts) return content;
1146
+
1147
+ const newParts = content.parts.map(part => {
1148
+ if (part.functionCall) {
1149
+ const argsStr = part.functionCall.args ? JSON.stringify(part.functionCall.args) : '';
1150
+ const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr;
1151
+ return { text: `[Tool call: ${part.functionCall.name}(${truncated})]` };
1152
+ }
1153
+ if (part.functionResponse) {
1154
+ const respStr = part.functionResponse.response
1155
+ ? JSON.stringify(part.functionResponse.response) : 'No response';
1156
+ const truncated = respStr.length > 500 ? respStr.substring(0, 500) + '...' : respStr;
1157
+ return { text: `[Tool result for ${part.functionResponse.name}: ${truncated}]` };
1158
+ }
1159
+ return part;
1160
+ });
1161
+ return { ...content, parts: newParts };
1162
+ });
1163
+ }
1164
+
939
1165
  function getToolDefinitions(tools: ToolDefinition[] | undefined | null): Tool | undefined {
940
1166
  if (!tools || tools.length === 0) {
941
1167
  return undefined;
@@ -979,6 +1205,36 @@ function updateConversation(conversation: unknown, prompt: Content[]): Content[]
979
1205
  return convArray.concat(prompt);
980
1206
  }
981
1207
 
1208
+ const SYSTEM_KEY = '_llumiverse_system';
1209
+
1210
+ /**
1211
+ * Extract the stored system instruction from a Gemini conversation object.
1212
+ * Returns undefined if no system was stored.
1213
+ */
1214
+ function extractSystemFromConversation(conversation: unknown): Content | undefined {
1215
+ if (typeof conversation === 'object' && conversation !== null) {
1216
+ const c = conversation as Record<string, unknown>;
1217
+ if (c[SYSTEM_KEY] && typeof c[SYSTEM_KEY] === 'object') {
1218
+ return c[SYSTEM_KEY] as Content;
1219
+ }
1220
+ }
1221
+ return undefined;
1222
+ }
1223
+
1224
+ /**
1225
+ * Store the system instruction in the Gemini conversation wrapper object.
1226
+ * The conversation is already wrapped by incrementConversationTurn into
1227
+ * { _arrayConversation: Content[], _llumiverse_meta: {...} }.
1228
+ * We add _llumiverse_system alongside these fields.
1229
+ */
1230
+ function storeSystemInConversation(conversation: unknown, system: Content | undefined): unknown {
1231
+ if (!system) return conversation;
1232
+ if (typeof conversation === 'object' && conversation !== null) {
1233
+ return { ...conversation as object, [SYSTEM_KEY]: system };
1234
+ }
1235
+ return conversation;
1236
+ }
1237
+
982
1238
  /**
983
1239
  *
984
1240
  * Gemini supports JSON output in the response. so we test if the response is a valid JSON object. otherwise we treat the response as a string.
@@ -1,7 +1,7 @@
1
- import { AIModel, Completion, PromptSegment, ExecutionOptions, CompletionChunkObject } from "@llumiverse/core";
1
+ import { AIModel, Completion, CompletionChunkObject, ExecutionOptions, LlumiverseError, LlumiverseErrorContext, PromptSegment } from "@llumiverse/core";
2
2
  import { VertexAIDriver, trimModelName } from "./index.js";
3
- import { GeminiModelDefinition } from "./models/gemini.js";
4
3
  import { ClaudeModelDefinition } from "./models/claude.js";
4
+ import { GeminiModelDefinition } from "./models/gemini.js";
5
5
  import { LLamaModelDefinition } from "./models/llama.js";
6
6
 
7
7
  export interface ModelDefinition<PromptT = any> {
@@ -11,6 +11,11 @@ export interface ModelDefinition<PromptT = any> {
11
11
  requestTextCompletion: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<Completion>;
12
12
  requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunkObject>>;
13
13
  preValidationProcessing?(result: Completion, options: ExecutionOptions): { result: Completion, options: ExecutionOptions };
14
+ /**
15
+ * Format provider-specific errors into standardized LlumiverseError.
16
+ * Optional - if not provided, VertexAIDriver will use default error handling.
17
+ */
18
+ formatLlumiverseError?(driver: VertexAIDriver, error: unknown, context: LlumiverseErrorContext): LlumiverseError;
14
19
  }
15
20
 
16
21
  export function getModelDefinition(model: string): ModelDefinition {