@llumiverse/drivers 1.0.0-dev.20260202.145450Z → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/lib/cjs/adobe/firefly.js +120 -0
  2. package/lib/cjs/adobe/firefly.js.map +1 -0
  3. package/lib/cjs/azure/azure_foundry.js +432 -0
  4. package/lib/cjs/azure/azure_foundry.js.map +1 -0
  5. package/lib/cjs/bedrock/converse.js +359 -0
  6. package/lib/cjs/bedrock/converse.js.map +1 -0
  7. package/lib/cjs/bedrock/index.js +1441 -0
  8. package/lib/cjs/bedrock/index.js.map +1 -0
  9. package/lib/cjs/bedrock/nova-image-payload.js +207 -0
  10. package/lib/cjs/bedrock/nova-image-payload.js.map +1 -0
  11. package/lib/cjs/bedrock/payloads.js +3 -0
  12. package/lib/cjs/bedrock/payloads.js.map +1 -0
  13. package/lib/cjs/bedrock/s3.js +107 -0
  14. package/lib/cjs/bedrock/s3.js.map +1 -0
  15. package/lib/cjs/bedrock/twelvelabs.js +87 -0
  16. package/lib/cjs/bedrock/twelvelabs.js.map +1 -0
  17. package/lib/cjs/groq/index.js +326 -0
  18. package/lib/cjs/groq/index.js.map +1 -0
  19. package/lib/cjs/huggingface_ie.js +201 -0
  20. package/lib/cjs/huggingface_ie.js.map +1 -0
  21. package/lib/cjs/index.js +31 -0
  22. package/lib/cjs/index.js.map +1 -0
  23. package/lib/cjs/mistral/index.js +176 -0
  24. package/lib/cjs/mistral/index.js.map +1 -0
  25. package/lib/cjs/mistral/types.js +83 -0
  26. package/lib/cjs/mistral/types.js.map +1 -0
  27. package/lib/cjs/openai/azure_openai.js +72 -0
  28. package/lib/cjs/openai/azure_openai.js.map +1 -0
  29. package/lib/cjs/openai/index.js +1100 -0
  30. package/lib/cjs/openai/index.js.map +1 -0
  31. package/lib/cjs/openai/openai.js +21 -0
  32. package/lib/cjs/openai/openai.js.map +1 -0
  33. package/lib/cjs/openai/openai_compatible.js +63 -0
  34. package/lib/cjs/openai/openai_compatible.js.map +1 -0
  35. package/lib/cjs/openai/openai_format.js +131 -0
  36. package/lib/cjs/openai/openai_format.js.map +1 -0
  37. package/lib/cjs/package.json +3 -0
  38. package/lib/cjs/replicate.js +275 -0
  39. package/lib/cjs/replicate.js.map +1 -0
  40. package/lib/cjs/test-driver/TestErrorCompletionStream.js +20 -0
  41. package/lib/cjs/test-driver/TestErrorCompletionStream.js.map +1 -0
  42. package/lib/cjs/test-driver/TestValidationErrorCompletionStream.js +24 -0
  43. package/lib/cjs/test-driver/TestValidationErrorCompletionStream.js.map +1 -0
  44. package/lib/cjs/test-driver/index.js +109 -0
  45. package/lib/cjs/test-driver/index.js.map +1 -0
  46. package/lib/cjs/test-driver/utils.js +30 -0
  47. package/lib/cjs/test-driver/utils.js.map +1 -0
  48. package/lib/cjs/togetherai/index.js +126 -0
  49. package/lib/cjs/togetherai/index.js.map +1 -0
  50. package/lib/cjs/togetherai/interfaces.js +3 -0
  51. package/lib/cjs/togetherai/interfaces.js.map +1 -0
  52. package/lib/cjs/vertexai/debug.js +12 -0
  53. package/lib/cjs/vertexai/debug.js.map +1 -0
  54. package/lib/cjs/vertexai/embeddings/embeddings-image.js +27 -0
  55. package/lib/cjs/vertexai/embeddings/embeddings-image.js.map +1 -0
  56. package/lib/cjs/vertexai/embeddings/embeddings-text.js +23 -0
  57. package/lib/cjs/vertexai/embeddings/embeddings-text.js.map +1 -0
  58. package/lib/cjs/vertexai/index.js +635 -0
  59. package/lib/cjs/vertexai/index.js.map +1 -0
  60. package/lib/cjs/vertexai/models/claude.js +842 -0
  61. package/lib/cjs/vertexai/models/claude.js.map +1 -0
  62. package/lib/cjs/vertexai/models/gemini.js +1110 -0
  63. package/lib/cjs/vertexai/models/gemini.js.map +1 -0
  64. package/lib/cjs/vertexai/models/imagen.js +303 -0
  65. package/lib/cjs/vertexai/models/imagen.js.map +1 -0
  66. package/lib/cjs/vertexai/models/llama.js +183 -0
  67. package/lib/cjs/vertexai/models/llama.js.map +1 -0
  68. package/lib/cjs/vertexai/models.js +35 -0
  69. package/lib/cjs/vertexai/models.js.map +1 -0
  70. package/lib/cjs/watsonx/index.js +161 -0
  71. package/lib/cjs/watsonx/index.js.map +1 -0
  72. package/lib/cjs/watsonx/interfaces.js +3 -0
  73. package/lib/cjs/watsonx/interfaces.js.map +1 -0
  74. package/lib/cjs/xai/index.js +65 -0
  75. package/lib/cjs/xai/index.js.map +1 -0
  76. package/lib/esm/adobe/firefly.js +116 -0
  77. package/lib/esm/adobe/firefly.js.map +1 -0
  78. package/lib/esm/azure/azure_foundry.js +426 -0
  79. package/lib/esm/azure/azure_foundry.js.map +1 -0
  80. package/lib/esm/bedrock/converse.js +352 -0
  81. package/lib/esm/bedrock/converse.js.map +1 -0
  82. package/lib/esm/bedrock/index.js +1434 -0
  83. package/lib/esm/bedrock/index.js.map +1 -0
  84. package/lib/esm/bedrock/nova-image-payload.js +203 -0
  85. package/lib/esm/bedrock/nova-image-payload.js.map +1 -0
  86. package/lib/esm/bedrock/payloads.js +2 -0
  87. package/lib/esm/bedrock/payloads.js.map +1 -0
  88. package/lib/esm/bedrock/s3.js +99 -0
  89. package/lib/esm/bedrock/s3.js.map +1 -0
  90. package/lib/esm/bedrock/twelvelabs.js +84 -0
  91. package/lib/esm/bedrock/twelvelabs.js.map +1 -0
  92. package/lib/esm/groq/index.js +319 -0
  93. package/lib/esm/groq/index.js.map +1 -0
  94. package/lib/esm/huggingface_ie.js +197 -0
  95. package/lib/esm/huggingface_ie.js.map +1 -0
  96. package/lib/esm/index.js +15 -0
  97. package/lib/esm/index.js.map +1 -0
  98. package/lib/esm/mistral/index.js +172 -0
  99. package/lib/esm/mistral/index.js.map +1 -0
  100. package/lib/esm/mistral/types.js +80 -0
  101. package/lib/esm/mistral/types.js.map +1 -0
  102. package/lib/esm/openai/azure_openai.js +68 -0
  103. package/lib/esm/openai/azure_openai.js.map +1 -0
  104. package/lib/esm/openai/index.js +1093 -0
  105. package/lib/esm/openai/index.js.map +1 -0
  106. package/lib/esm/openai/openai.js +14 -0
  107. package/lib/esm/openai/openai.js.map +1 -0
  108. package/lib/esm/openai/openai_compatible.js +56 -0
  109. package/lib/esm/openai/openai_compatible.js.map +1 -0
  110. package/lib/esm/openai/openai_format.js +127 -0
  111. package/lib/esm/openai/openai_format.js.map +1 -0
  112. package/lib/esm/replicate.js +268 -0
  113. package/lib/esm/replicate.js.map +1 -0
  114. package/lib/esm/test-driver/TestErrorCompletionStream.js +16 -0
  115. package/lib/esm/test-driver/TestErrorCompletionStream.js.map +1 -0
  116. package/lib/esm/test-driver/TestValidationErrorCompletionStream.js +20 -0
  117. package/lib/esm/test-driver/TestValidationErrorCompletionStream.js.map +1 -0
  118. package/lib/esm/test-driver/index.js +91 -0
  119. package/lib/esm/test-driver/index.js.map +1 -0
  120. package/lib/esm/test-driver/utils.js +25 -0
  121. package/lib/esm/test-driver/utils.js.map +1 -0
  122. package/lib/esm/togetherai/index.js +122 -0
  123. package/lib/esm/togetherai/index.js.map +1 -0
  124. package/lib/esm/togetherai/interfaces.js +2 -0
  125. package/lib/esm/togetherai/interfaces.js.map +1 -0
  126. package/lib/esm/vertexai/debug.js +6 -0
  127. package/lib/esm/vertexai/debug.js.map +1 -0
  128. package/lib/esm/vertexai/embeddings/embeddings-image.js +24 -0
  129. package/lib/esm/vertexai/embeddings/embeddings-image.js.map +1 -0
  130. package/lib/esm/vertexai/embeddings/embeddings-text.js +20 -0
  131. package/lib/esm/vertexai/embeddings/embeddings-text.js.map +1 -0
  132. package/lib/esm/vertexai/index.js +630 -0
  133. package/lib/esm/vertexai/index.js.map +1 -0
  134. package/lib/esm/vertexai/models/claude.js +833 -0
  135. package/lib/esm/vertexai/models/claude.js.map +1 -0
  136. package/lib/esm/vertexai/models/gemini.js +1104 -0
  137. package/lib/esm/vertexai/models/gemini.js.map +1 -0
  138. package/lib/esm/vertexai/models/imagen.js +299 -0
  139. package/lib/esm/vertexai/models/imagen.js.map +1 -0
  140. package/lib/esm/vertexai/models/llama.js +179 -0
  141. package/lib/esm/vertexai/models/llama.js.map +1 -0
  142. package/lib/esm/vertexai/models.js +32 -0
  143. package/lib/esm/vertexai/models.js.map +1 -0
  144. package/lib/esm/watsonx/index.js +157 -0
  145. package/lib/esm/watsonx/index.js.map +1 -0
  146. package/lib/esm/watsonx/interfaces.js +2 -0
  147. package/lib/esm/watsonx/interfaces.js.map +1 -0
  148. package/lib/esm/xai/index.js +58 -0
  149. package/lib/esm/xai/index.js.map +1 -0
  150. package/lib/types/adobe/firefly.d.ts +30 -0
  151. package/lib/types/adobe/firefly.d.ts.map +1 -0
  152. package/lib/types/azure/azure_foundry.d.ts +52 -0
  153. package/lib/types/azure/azure_foundry.d.ts.map +1 -0
  154. package/lib/types/bedrock/converse.d.ts +8 -0
  155. package/lib/types/bedrock/converse.d.ts.map +1 -0
  156. package/lib/types/bedrock/index.d.ts +135 -0
  157. package/lib/types/bedrock/index.d.ts.map +1 -0
  158. package/lib/types/bedrock/nova-image-payload.d.ts +74 -0
  159. package/lib/types/bedrock/nova-image-payload.d.ts.map +1 -0
  160. package/lib/types/bedrock/payloads.d.ts +12 -0
  161. package/lib/types/bedrock/payloads.d.ts.map +1 -0
  162. package/lib/types/bedrock/s3.d.ts +23 -0
  163. package/lib/types/bedrock/s3.d.ts.map +1 -0
  164. package/lib/types/bedrock/twelvelabs.d.ts +50 -0
  165. package/lib/types/bedrock/twelvelabs.d.ts.map +1 -0
  166. package/lib/types/groq/index.d.ts +27 -0
  167. package/lib/types/groq/index.d.ts.map +1 -0
  168. package/lib/types/huggingface_ie.d.ts +35 -0
  169. package/lib/types/huggingface_ie.d.ts.map +1 -0
  170. package/lib/types/index.d.ts +15 -0
  171. package/lib/types/index.d.ts.map +1 -0
  172. package/lib/types/mistral/index.d.ts +25 -0
  173. package/lib/types/mistral/index.d.ts.map +1 -0
  174. package/lib/types/mistral/types.d.ts +127 -0
  175. package/lib/types/mistral/types.d.ts.map +1 -0
  176. package/lib/types/openai/azure_openai.d.ts +25 -0
  177. package/lib/types/openai/azure_openai.d.ts.map +1 -0
  178. package/lib/types/openai/index.d.ts +126 -0
  179. package/lib/types/openai/index.d.ts.map +1 -0
  180. package/lib/types/openai/openai.d.ts +15 -0
  181. package/lib/types/openai/openai.d.ts.map +1 -0
  182. package/lib/types/openai/openai_compatible.d.ts +31 -0
  183. package/lib/types/openai/openai_compatible.d.ts.map +1 -0
  184. package/lib/types/openai/openai_format.d.ts +21 -0
  185. package/lib/types/openai/openai_format.d.ts.map +1 -0
  186. package/lib/types/replicate.d.ts +48 -0
  187. package/lib/types/replicate.d.ts.map +1 -0
  188. package/lib/types/test-driver/TestErrorCompletionStream.d.ts +9 -0
  189. package/lib/types/test-driver/TestErrorCompletionStream.d.ts.map +1 -0
  190. package/lib/types/test-driver/TestValidationErrorCompletionStream.d.ts +9 -0
  191. package/lib/types/test-driver/TestValidationErrorCompletionStream.d.ts.map +1 -0
  192. package/lib/types/test-driver/index.d.ts +24 -0
  193. package/lib/types/test-driver/index.d.ts.map +1 -0
  194. package/lib/types/test-driver/utils.d.ts +5 -0
  195. package/lib/types/test-driver/utils.d.ts.map +1 -0
  196. package/lib/types/togetherai/index.d.ts +23 -0
  197. package/lib/types/togetherai/index.d.ts.map +1 -0
  198. package/lib/types/togetherai/interfaces.d.ts +96 -0
  199. package/lib/types/togetherai/interfaces.d.ts.map +1 -0
  200. package/lib/types/vertexai/debug.d.ts +2 -0
  201. package/lib/types/vertexai/debug.d.ts.map +1 -0
  202. package/lib/types/vertexai/embeddings/embeddings-image.d.ts +11 -0
  203. package/lib/types/vertexai/embeddings/embeddings-image.d.ts.map +1 -0
  204. package/lib/types/vertexai/embeddings/embeddings-text.d.ts +10 -0
  205. package/lib/types/vertexai/embeddings/embeddings-text.d.ts.map +1 -0
  206. package/lib/types/vertexai/index.d.ts +79 -0
  207. package/lib/types/vertexai/index.d.ts.map +1 -0
  208. package/lib/types/vertexai/models/claude.d.ts +103 -0
  209. package/lib/types/vertexai/models/claude.d.ts.map +1 -0
  210. package/lib/types/vertexai/models/gemini.d.ts +78 -0
  211. package/lib/types/vertexai/models/gemini.d.ts.map +1 -0
  212. package/lib/types/vertexai/models/imagen.d.ts +75 -0
  213. package/lib/types/vertexai/models/imagen.d.ts.map +1 -0
  214. package/lib/types/vertexai/models/llama.d.ts +20 -0
  215. package/lib/types/vertexai/models/llama.d.ts.map +1 -0
  216. package/lib/types/vertexai/models.d.ts +20 -0
  217. package/lib/types/vertexai/models.d.ts.map +1 -0
  218. package/lib/types/watsonx/index.d.ts +27 -0
  219. package/lib/types/watsonx/index.d.ts.map +1 -0
  220. package/lib/types/watsonx/interfaces.d.ts +65 -0
  221. package/lib/types/watsonx/interfaces.d.ts.map +1 -0
  222. package/lib/types/xai/index.d.ts +18 -0
  223. package/lib/types/xai/index.d.ts.map +1 -0
  224. package/package.json +18 -18
  225. package/src/bedrock/converse.ts +85 -10
  226. package/src/bedrock/error-handling.test.ts +352 -0
  227. package/src/bedrock/index.ts +293 -16
  228. package/src/groq/index.ts +9 -4
  229. package/src/mistral/index.ts +25 -22
  230. package/src/mistral/types.ts +0 -5
  231. package/src/openai/error-handling.test.ts +567 -0
  232. package/src/openai/index.ts +513 -33
  233. package/src/openai/openai_compatible.ts +7 -0
  234. package/src/openai/openai_format.ts +1 -1
  235. package/src/vertexai/index.ts +61 -13
  236. package/src/vertexai/models/claude-error-handling.test.ts +432 -0
  237. package/src/vertexai/models/claude.ts +287 -10
  238. package/src/vertexai/models/gemini-error-handling.test.ts +353 -0
  239. package/src/vertexai/models/gemini.ts +329 -52
  240. package/src/vertexai/models.ts +7 -2
@@ -1,17 +1,24 @@
1
+ import type { ApiError } from "@google/genai";
1
2
  import {
2
3
  Content, FinishReason, FunctionCallingConfigMode, FunctionDeclaration, GenerateContentConfig, GenerateContentParameters,
3
4
  GenerateContentResponseUsageMetadata,
4
- HarmBlockThreshold, HarmCategory, Modality, Part, SafetySetting, Schema, ThinkingConfig, Tool, Type
5
+ HarmBlockThreshold, HarmCategory, Modality, Part,
6
+ ProminentPeople,
7
+ SafetySetting, Schema, ThinkingConfig,
8
+ ThinkingLevel,
9
+ Tool, Type
5
10
  } from "@google/genai";
6
11
  import {
7
12
  AIModel, Completion, CompletionChunkObject, CompletionResult, ExecutionOptions,
8
13
  ExecutionTokenUsage,
9
14
  getConversationMeta,
10
- getMaxTokensLimitVertexAi,
15
+ getGeminiModelVersion,
11
16
  incrementConversationTurn,
12
- JSONObject, JSONSchema, ModelType, PromptOptions, PromptRole,
17
+ isGeminiModelVersionGte,
18
+ JSONObject, JSONSchema, LlumiverseError, LlumiverseErrorContext, ModelType, PromptOptions, PromptRole,
13
19
  PromptSegment, readStreamAsBase64, StatelessExecutionOptions,
14
20
  stripBase64ImagesFromConversation,
21
+ stripHeartbeatsFromConversation,
15
22
  ToolDefinition, ToolUse,
16
23
  truncateLargeTextInConversation,
17
24
  unwrapConversationArray,
@@ -53,15 +60,36 @@ const geminiSafetySettings: SafetySetting[] = [
53
60
  }
54
61
  ];
55
62
 
63
+ // We do the mapping here rather than in common to avoid bringing the SDK into the common package.
64
+ function getProminentPeopleOption(prominentPeople?: "PROMINENT_PEOPLE_UNSPECIFIED" | "ALLOW_PROMINENT_PEOPLE" | "BLOCK_PROMINENT_PEOPLE") {
65
+ switch (prominentPeople) {
66
+ case "ALLOW_PROMINENT_PEOPLE":
67
+ return ProminentPeople.ALLOW_PROMINENT_PEOPLE;
68
+ case "BLOCK_PROMINENT_PEOPLE":
69
+ return ProminentPeople.BLOCK_PROMINENT_PEOPLE;
70
+ case "PROMINENT_PEOPLE_UNSPECIFIED":
71
+ return ProminentPeople.PROMINENT_PEOPLE_UNSPECIFIED;
72
+ default:
73
+ return undefined;
74
+ }
75
+ }
76
+
56
77
  function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentPrompt): GenerateContentParameters {
57
78
  const model_options = options.model_options as VertexAIGeminiOptions | undefined;
58
79
  const tools = getToolDefinitions(options.tools);
59
80
 
60
- const useStructuredOutput = supportsStructuredOutput(options) && !tools;
81
+ // When no tools are provided but conversation contains functionCall/functionResponse parts
82
+ // (e.g. checkpoint summary calls), convert them to text to avoid API errors
83
+ if (!tools && prompt.contents) {
84
+ const hasToolParts = prompt.contents.some(c =>
85
+ c.parts?.some(p => p.functionCall || p.functionResponse)
86
+ );
87
+ if (hasToolParts) {
88
+ prompt.contents = convertGeminiFunctionPartsToText(prompt.contents);
89
+ }
90
+ }
61
91
 
62
- const thinkingConfigNeeded = model_options?.include_thoughts
63
- || model_options?.thinking_budget_tokens
64
- || options.model.includes("gemini-2.5");
92
+ const useStructuredOutput = supportsStructuredOutput(options) && !tools;
65
93
 
66
94
  const configNanoBanana: GenerateContentConfig = {
67
95
  systemInstruction: prompt.system,
@@ -71,10 +99,16 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
71
99
  //Model options
72
100
  temperature: model_options?.temperature,
73
101
  topP: model_options?.top_p,
74
- maxOutputTokens: geminiMaxTokens(options),
102
+ maxOutputTokens: model_options?.max_tokens,
75
103
  stopSequences: model_options?.stop_sequence,
104
+ thinkingConfig: geminiThinkingConfig(options),
76
105
  imageConfig: {
106
+ imageSize: model_options?.image_size,
77
107
  aspectRatio: model_options?.image_aspect_ratio,
108
+ personGeneration: model_options?.person_generation,
109
+ prominentPeople: getProminentPeopleOption(model_options?.prominent_people),
110
+ outputMimeType: model_options?.output_mime_type,
111
+ outputCompressionQuality: model_options?.output_compression_quality,
78
112
  }
79
113
  }
80
114
 
@@ -95,12 +129,12 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
95
129
  temperature: model_options?.temperature,
96
130
  topP: model_options?.top_p,
97
131
  topK: model_options?.top_k,
98
- maxOutputTokens: geminiMaxTokens(options),
132
+ maxOutputTokens: model_options?.max_tokens,
99
133
  stopSequences: model_options?.stop_sequence,
100
134
  presencePenalty: model_options?.presence_penalty,
101
135
  frequencyPenalty: model_options?.frequency_penalty,
102
136
  seed: model_options?.seed,
103
- thinkingConfig: thinkingConfigNeeded ? geminiThinkingConfig(options) : undefined,
137
+ thinkingConfig: geminiThinkingConfig(options),
104
138
  }
105
139
 
106
140
  return {
@@ -435,7 +469,11 @@ function removeEmptyJSONArray(array: any[], schema: JSONSchema): any[] {
435
469
  return cleanedArray.filter(item => !isEmpty(item));
436
470
  }
437
471
 
438
- function collectTextParts(content: Content): CompletionResult[] {
472
+ /**
473
+ * Collect all parts (text and images) from content in order.
474
+ * This preserves the original ordering of text and image parts.
475
+ */
476
+ function extractCompletionResults(content: Content): CompletionResult[] {
439
477
  const results: CompletionResult[] = [];
440
478
  const parts = content.parts;
441
479
  if (parts) {
@@ -445,18 +483,7 @@ function collectTextParts(content: Content): CompletionResult[] {
445
483
  type: "text",
446
484
  value: part.text
447
485
  });
448
- }
449
- }
450
- }
451
- return results;
452
- }
453
-
454
- function collectInlineDataParts(content: Content): CompletionResult[] {
455
- const results: CompletionResult[] = [];
456
- const parts = content.parts;
457
- if (parts) {
458
- for (const part of parts) {
459
- if (part.inlineData) {
486
+ } else if (part.inlineData) {
460
487
  const base64ImageBytes: string = part.inlineData.data ?? "";
461
488
  const mimeType = part.inlineData.mimeType ?? "image/png";
462
489
  const imageUrl = `data:${mimeType};base64,${base64ImageBytes}`;
@@ -526,27 +553,23 @@ const supportedFinishReasons: FinishReason[] = [
526
553
  FinishReason.FINISH_REASON_UNSPECIFIED,
527
554
  ]
528
555
 
529
- function geminiMaxTokens(option: StatelessExecutionOptions) {
530
- const model_options = option.model_options as VertexAIGeminiOptions | undefined;
531
- if (model_options?.max_tokens) {
532
- return model_options.max_tokens;
533
- }
534
- if (option.model.includes("gemini-2.5")) {
535
- const maxSupportedTokens = getMaxTokensLimitVertexAi(option.model);
536
- const thinkingBudget = geminiThinkingBudget(option) ?? 0;
537
- return Math.min(maxSupportedTokens, 16000 + thinkingBudget);
538
- }
539
- return undefined;
540
- }
556
+ // Finish reasons that indicate tool call issues but should be recovered gracefully
557
+ // instead of throwing an error. The tool_use is still extracted and returned
558
+ // so the workflow can generate a proper toolError response.
559
+ const recoverableToolCallReasons = [
560
+ 'UNEXPECTED_TOOL_CALL', // Model called an undeclared tool
561
+ ]
562
+
541
563
 
542
564
  function geminiThinkingBudget(option: StatelessExecutionOptions) {
543
565
  const model_options = option.model_options as VertexAIGeminiOptions | undefined;
566
+ // If thinking_budget_tokens is explicitly set in model options, use it directly
544
567
  if (model_options?.thinking_budget_tokens) {
545
568
  return model_options.thinking_budget_tokens;
546
569
  }
547
570
  // Set minimum thinking level by default.
548
571
  // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
549
- if (option.model.includes("gemini-2.5")) {
572
+ if (getGeminiModelVersion(option.model) == '2.5') {
550
573
  if (option.model.includes("pro")) {
551
574
  return 128;
552
575
  }
@@ -557,16 +580,32 @@ function geminiThinkingBudget(option: StatelessExecutionOptions) {
557
580
 
558
581
  function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig | undefined {
559
582
  const model_options = option.model_options as VertexAIGeminiOptions | undefined;
583
+
584
+ // If thinking options are explicitly set in model options, use them directly
560
585
  const include_thoughts = model_options?.include_thoughts ?? false;
561
- if (model_options?.thinking_budget_tokens) {
562
- return { includeThoughts: include_thoughts, thinkingBudget: model_options.thinking_budget_tokens };
586
+ if (model_options?.thinking_budget_tokens || model_options?.thinking_level) {
587
+ return {
588
+ includeThoughts: include_thoughts,
589
+ thinkingBudget: model_options.thinking_budget_tokens,
590
+ thinkingLevel: model_options.thinking_level,
591
+ };
563
592
  }
564
593
 
565
- // Set minimum thinking level by default.
594
+ // Set a low thinking level by default.
566
595
  // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
567
- if (option.model.includes("gemini-2.5") || option.model.includes("gemini-3")) {
596
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking
597
+ if (isGeminiModelVersionGte(option.model, '3.0')) {
598
+ return {
599
+ includeThoughts: include_thoughts,
600
+ thinkingLevel: ThinkingLevel.LOW
601
+ };
602
+ }
603
+ if (isGeminiModelVersionGte(option.model, '2.5')) {
568
604
  const thinking_budget_tokens = geminiThinkingBudget(option) ?? 0;
569
- return { includeThoughts: include_thoughts, thinkingBudget: thinking_budget_tokens };
605
+ return {
606
+ includeThoughts: include_thoughts,
607
+ thinkingBudget: thinking_budget_tokens
608
+ };
570
609
  }
571
610
  }
572
611
 
@@ -662,7 +701,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
662
701
  // File content handling
663
702
  if (msg.files) {
664
703
  for (const f of msg.files) {
665
- let fileUrl = await f.getURL();
704
+ const fileUrl = await f.getURL();
666
705
  const isGsUrl = fileUrl.startsWith('gs://') || fileUrl.startsWith('https://storage.googleapis.com/');
667
706
 
668
707
  if (isGsUrl) {
@@ -675,7 +714,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
675
714
  } else {
676
715
  // Inline data handling
677
716
  const stream = await f.getStream();
678
- const data = await readStreamAsBase64(stream);
717
+ const data = await readStreamAsBase64(stream);
679
718
  parts.push({
680
719
  inlineData: {
681
720
  data,
@@ -771,6 +810,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
771
810
  const modelName = splits[splits.length - 1];
772
811
  options = { ...options, model: modelName };
773
812
 
813
+ // Restore system instruction from stored conversation on resume.
814
+ // The stored _llumiverse_system contains the complete system (interaction prompt + schema)
815
+ // from the initial call. Always prefer it over the prompt's system, which on resume only
816
+ // contains the schema instruction (no interaction system segments are present on resume).
817
+ const existingSystem = extractSystemFromConversation(options.conversation);
818
+ if (existingSystem) {
819
+ prompt.system = existingSystem;
820
+ }
821
+
774
822
  let conversation = updateConversation(options.conversation, prompt.contents);
775
823
  prompt.contents = conversation;
776
824
 
@@ -797,7 +845,9 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
797
845
  }
798
846
  const content = candidate.content;
799
847
 
800
- if (candidate.finishReason && !supportedFinishReasons.includes(candidate.finishReason)) {
848
+ // Check for unsupported finish reasons, but allow recoverable tool call issues
849
+ const isRecoverableToolCall = recoverableToolCallReasons.includes(candidate.finishReason as string);
850
+ if (candidate.finishReason && !supportedFinishReasons.includes(candidate.finishReason) && !isRecoverableToolCall) {
801
851
  throw new Error(`Unsupported finish reason: ${candidate.finishReason}, `
802
852
  + `finish message: ${candidate.finishMessage}, `
803
853
  + `content: ${JSON.stringify(content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`);
@@ -806,11 +856,17 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
806
856
  if (content) {
807
857
  tool_use = collectToolUseParts(content);
808
858
 
859
+ // For recoverable tool call issues, log warning but continue processing
860
+ // The workflow will handle the invalid tool call gracefully
861
+ if (isRecoverableToolCall && tool_use && tool_use.length > 0) {
862
+ console.warn(`[Gemini] Recoverable tool call issue (${candidate.finishReason}): ` +
863
+ `Model tried to call undeclared tool(s): ${tool_use.map(t => t.tool_name).join(', ')}`);
864
+ }
865
+
809
866
  // We clean the content before validation, so we can update the conversation.
810
867
  const cleanedContent = cleanEmptyFieldsContent(content, options.result_schema);
811
- const textResults = collectTextParts(cleanedContent);
812
- const imageResults = collectInlineDataParts(cleanedContent);
813
- result = [...textResults, ...imageResults];
868
+ // Collect all parts in order (text and images)
869
+ result = extractCompletionResults(cleanedContent);
814
870
  conversation = updateConversation(conversation, [cleanedContent]);
815
871
  }
816
872
  }
@@ -836,12 +892,21 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
836
892
  // Truncate large text content if configured
837
893
  processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
838
894
 
895
+ // Strip old heartbeat status messages
896
+ processedConversation = stripHeartbeatsFromConversation(processedConversation, {
897
+ keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
898
+ currentTurn,
899
+ });
900
+
901
+ // Preserve system instruction in conversation for multi-turn support
902
+ const finalConversation = storeSystemInConversation(processedConversation, prompt.system);
903
+
839
904
  return {
840
905
  result: result && result.length > 0 ? result : [{ type: "text" as const, value: '' }],
841
906
  token_usage: token_usage,
842
907
  finish_reason: finish_reason,
843
908
  original_response: options.include_original_response ? response : undefined,
844
- conversation: processedConversation,
909
+ conversation: finalConversation,
845
910
  tool_use
846
911
  } satisfies Completion;
847
912
  }
@@ -855,6 +920,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
855
920
  const modelName = splits[splits.length - 1];
856
921
  options = { ...options, model: modelName };
857
922
 
923
+ // Restore system instruction from stored conversation on resume.
924
+ // The stored _llumiverse_system contains the complete system (interaction prompt + schema)
925
+ // from the initial call. Always prefer it over the prompt's system, which on resume only
926
+ // contains the schema instruction (no interaction system segments are present on resume).
927
+ const existingSystem = extractSystemFromConversation(options.conversation);
928
+ if (existingSystem) {
929
+ prompt.system = existingSystem;
930
+ }
931
+
858
932
  // Include conversation history in prompt contents (same as non-streaming)
859
933
  const conversation = updateConversation(options.conversation, prompt.contents);
860
934
  prompt.contents = conversation;
@@ -879,18 +953,24 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
879
953
  case FinishReason.STOP: finish_reason = "stop"; break;
880
954
  default: finish_reason = candidate.finishReason;
881
955
  }
882
- if (candidate.finishReason && !supportedFinishReasons.includes(candidate.finishReason)) {
956
+ // Check for unsupported finish reasons, but allow recoverable tool call issues
957
+ const isRecoverableToolCall = recoverableToolCallReasons.includes(candidate.finishReason as string);
958
+ if (candidate.finishReason && !supportedFinishReasons.includes(candidate.finishReason) && !isRecoverableToolCall) {
883
959
  throw new Error(`Unsupported finish reason: ${candidate.finishReason}, `
884
960
  + `finish message: ${candidate.finishMessage}, `
885
961
  + `content: ${JSON.stringify(candidate.content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`);
886
962
  }
887
963
  if (candidate.content?.role === 'model') {
888
- const textResults = collectTextParts(candidate.content);
889
- const imageResults = collectInlineDataParts(candidate.content);
890
- const combinedResults = [...textResults, ...imageResults];
964
+ // Collect all parts in order (text and images)
965
+ const combinedResults = extractCompletionResults(candidate.content);
891
966
  tool_use = collectToolUseParts(candidate.content);
892
967
  if (tool_use) {
893
968
  finish_reason = "tool_use";
969
+ // Log warning for recoverable tool call issues
970
+ if (isRecoverableToolCall) {
971
+ console.warn(`[Gemini] Recoverable tool call issue (${candidate.finishReason}): ` +
972
+ `Model tried to call undeclared tool(s): ${tool_use.map(t => t.tool_name).join(', ')}`);
973
+ }
894
974
  }
895
975
  return {
896
976
  result: combinedResults.length > 0 ? combinedResults : [],
@@ -912,9 +992,176 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
912
992
  return stream;
913
993
  }
914
994
 
995
+ /**
996
+ * Format Google API errors into LlumiverseError with proper status codes and retryability.
997
+ *
998
+ * Google API errors follow AIP-193 standard:
999
+ * - ApiError.status: HTTP status code
1000
+ * - ApiError.message: Error message
1001
+ *
1002
+ * Common error codes:
1003
+ * - 400 (INVALID_ARGUMENT): Invalid request parameters
1004
+ * - 401 (UNAUTHENTICATED): Authentication required
1005
+ * - 403 (PERMISSION_DENIED): Insufficient permissions
1006
+ * - 404 (NOT_FOUND): Resource not found
1007
+ * - 429 (RESOURCE_EXHAUSTED): Rate limit/quota exceeded
1008
+ * - 500 (INTERNAL): Internal server error
1009
+ * - 503 (UNAVAILABLE): Service temporarily unavailable
1010
+ * - 504 (DEADLINE_EXCEEDED): Request timeout
1011
+ *
1012
+ * @see https://google.aip.dev/193
1013
+ * @see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/api-errors
1014
+ */
1015
+ formatLlumiverseError(
1016
+ _driver: VertexAIDriver,
1017
+ error: unknown,
1018
+ context: LlumiverseErrorContext
1019
+ ): LlumiverseError {
1020
+ // Check if it's a Google API error with status code
1021
+ const isApiError = this.isGoogleApiError(error);
1022
+
1023
+ if (!isApiError) {
1024
+ // Not a Google API error, use default handling
1025
+ // This will be called by the driver's default formatLlumiverseError
1026
+ throw error;
1027
+ }
1028
+
1029
+ const apiError = error as ApiError;
1030
+ const httpStatusCode = apiError.status;
1031
+
1032
+ // Extract error message
1033
+ const message = apiError.message || String(error);
1034
+
1035
+ // Build user-facing message with status code
1036
+ let userMessage = message;
1037
+
1038
+ // Include status code in message (for end-user visibility)
1039
+ if (httpStatusCode) {
1040
+ userMessage = `[${httpStatusCode}] ${userMessage}`;
1041
+ }
1042
+
1043
+ // Determine retryability based on Google error codes
1044
+ const retryable = this.isGeminiErrorRetryable(httpStatusCode);
1045
+
1046
+ // Extract error name/type from message if present
1047
+ const errorName = this.extractErrorName(message);
1048
+
1049
+ return new LlumiverseError(
1050
+ `[${context.provider}] ${userMessage}`,
1051
+ retryable,
1052
+ context,
1053
+ error,
1054
+ httpStatusCode,
1055
+ errorName
1056
+ );
1057
+ }
1058
+
1059
+ /**
1060
+ * Type guard to check if error is a Google API error.
1061
+ */
1062
+ private isGoogleApiError(error: unknown): error is ApiError {
1063
+ return (
1064
+ error !== null &&
1065
+ typeof error === 'object' &&
1066
+ 'status' in error &&
1067
+ typeof (error as any).status === 'number' &&
1068
+ 'message' in error
1069
+ );
1070
+ }
1071
+
1072
+ /**
1073
+ * Determine if a Google API error is retryable based on HTTP status code.
1074
+ *
1075
+ * Retryable errors (per Google AIP-194):
1076
+ * - 408 (REQUEST_TIMEOUT): Request timeout
1077
+ * - 429 (RESOURCE_EXHAUSTED): Rate limit exceeded, quota exhausted
1078
+ * - 500 (INTERNAL): Internal server error
1079
+ * - 502 (BAD_GATEWAY): Bad gateway
1080
+ * - 503 (UNAVAILABLE): Service temporarily unavailable
1081
+ * - 504 (DEADLINE_EXCEEDED): Gateway timeout
1082
+ *
1083
+ * Non-retryable errors:
1084
+ * - 400 (INVALID_ARGUMENT): Invalid request parameters
1085
+ * - 401 (UNAUTHENTICATED): Authentication required
1086
+ * - 403 (PERMISSION_DENIED): Insufficient permissions
1087
+ * - 404 (NOT_FOUND): Resource not found
1088
+ * - 409 (CONFLICT): Resource conflict
1089
+ * - Other 4xx client errors
1090
+ *
1091
+ * @param httpStatusCode - The HTTP status code from the API error
1092
+ * @returns True if retryable, false if not retryable, undefined if unknown
1093
+ */
1094
+ private isGeminiErrorRetryable(httpStatusCode: number): boolean | undefined {
1095
+ // Retryable status codes
1096
+ if (httpStatusCode === 408) return true; // Request timeout
1097
+ if (httpStatusCode === 429) return true; // Rate limit/quota
1098
+ if (httpStatusCode === 502) return true; // Bad gateway
1099
+ if (httpStatusCode === 503) return true; // Service unavailable
1100
+ if (httpStatusCode === 504) return true; // Gateway timeout
1101
+ if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Other 5xx server errors
1102
+
1103
+ // Non-retryable 4xx client errors
1104
+ if (httpStatusCode >= 400 && httpStatusCode < 500) return false;
1105
+
1106
+ // Unknown status codes - let consumer decide retry strategy
1107
+ return undefined;
1108
+ }
1109
+
1110
+ /**
1111
+ * Extract error type name from error message.
1112
+ * Google errors often include the error type in the message.
1113
+ * Examples: "INVALID_ARGUMENT", "RESOURCE_EXHAUSTED", "PERMISSION_DENIED"
1114
+ */
1115
+ private extractErrorName(message: string): string | undefined {
1116
+ // Common Google error patterns
1117
+ const patterns = [
1118
+ /^([A-Z_]+):/, // "ERROR_NAME: message"
1119
+ /\[([A-Z_]+)\]/, // "[ERROR_NAME] message"
1120
+ /^(\w+Error):/, // "ErrorTypeError: message"
1121
+ ];
1122
+
1123
+ for (const pattern of patterns) {
1124
+ const match = message.match(pattern);
1125
+ if (match) {
1126
+ return match[1];
1127
+ }
1128
+ }
1129
+
1130
+ return undefined;
1131
+ }
1132
+
915
1133
  }
916
1134
 
917
1135
 
1136
+ /**
1137
+ * Converts functionCall and functionResponse parts to text parts in Gemini Content[].
1138
+ * Preserves tool call information while removing structured parts that require
1139
+ * tools/toolConfig to be defined in the API request.
1140
+ */
1141
+ export function convertGeminiFunctionPartsToText(contents: Content[]): Content[] {
1142
+ return contents.map(content => {
1143
+ if (!content.parts) return content;
1144
+ const hasFunctionParts = content.parts.some(p => p.functionCall || p.functionResponse);
1145
+ if (!hasFunctionParts) return content;
1146
+
1147
+ const newParts = content.parts.map(part => {
1148
+ if (part.functionCall) {
1149
+ const argsStr = part.functionCall.args ? JSON.stringify(part.functionCall.args) : '';
1150
+ const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr;
1151
+ return { text: `[Tool call: ${part.functionCall.name}(${truncated})]` };
1152
+ }
1153
+ if (part.functionResponse) {
1154
+ const respStr = part.functionResponse.response
1155
+ ? JSON.stringify(part.functionResponse.response) : 'No response';
1156
+ const truncated = respStr.length > 500 ? respStr.substring(0, 500) + '...' : respStr;
1157
+ return { text: `[Tool result for ${part.functionResponse.name}: ${truncated}]` };
1158
+ }
1159
+ return part;
1160
+ });
1161
+ return { ...content, parts: newParts };
1162
+ });
1163
+ }
1164
+
918
1165
  function getToolDefinitions(tools: ToolDefinition[] | undefined | null): Tool | undefined {
919
1166
  if (!tools || tools.length === 0) {
920
1167
  return undefined;
@@ -958,6 +1205,36 @@ function updateConversation(conversation: unknown, prompt: Content[]): Content[]
958
1205
  return convArray.concat(prompt);
959
1206
  }
960
1207
 
1208
+ const SYSTEM_KEY = '_llumiverse_system';
1209
+
1210
+ /**
1211
+ * Extract the stored system instruction from a Gemini conversation object.
1212
+ * Returns undefined if no system was stored.
1213
+ */
1214
+ function extractSystemFromConversation(conversation: unknown): Content | undefined {
1215
+ if (typeof conversation === 'object' && conversation !== null) {
1216
+ const c = conversation as Record<string, unknown>;
1217
+ if (c[SYSTEM_KEY] && typeof c[SYSTEM_KEY] === 'object') {
1218
+ return c[SYSTEM_KEY] as Content;
1219
+ }
1220
+ }
1221
+ return undefined;
1222
+ }
1223
+
1224
+ /**
1225
+ * Store the system instruction in the Gemini conversation wrapper object.
1226
+ * The conversation is already wrapped by incrementConversationTurn into
1227
+ * { _arrayConversation: Content[], _llumiverse_meta: {...} }.
1228
+ * We add _llumiverse_system alongside these fields.
1229
+ */
1230
+ function storeSystemInConversation(conversation: unknown, system: Content | undefined): unknown {
1231
+ if (!system) return conversation;
1232
+ if (typeof conversation === 'object' && conversation !== null) {
1233
+ return { ...conversation as object, [SYSTEM_KEY]: system };
1234
+ }
1235
+ return conversation;
1236
+ }
1237
+
961
1238
  /**
962
1239
  *
963
1240
  * Gemini supports JSON output in the response. so we test if the response is a valid JSON object. otherwise we treat the response as a string.
@@ -1,7 +1,7 @@
1
- import { AIModel, Completion, PromptSegment, ExecutionOptions, CompletionChunkObject } from "@llumiverse/core";
1
+ import { AIModel, Completion, CompletionChunkObject, ExecutionOptions, LlumiverseError, LlumiverseErrorContext, PromptSegment } from "@llumiverse/core";
2
2
  import { VertexAIDriver, trimModelName } from "./index.js";
3
- import { GeminiModelDefinition } from "./models/gemini.js";
4
3
  import { ClaudeModelDefinition } from "./models/claude.js";
4
+ import { GeminiModelDefinition } from "./models/gemini.js";
5
5
  import { LLamaModelDefinition } from "./models/llama.js";
6
6
 
7
7
  export interface ModelDefinition<PromptT = any> {
@@ -11,6 +11,11 @@ export interface ModelDefinition<PromptT = any> {
11
11
  requestTextCompletion: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<Completion>;
12
12
  requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunkObject>>;
13
13
  preValidationProcessing?(result: Completion, options: ExecutionOptions): { result: Completion, options: ExecutionOptions };
14
+ /**
15
+ * Format provider-specific errors into standardized LlumiverseError.
16
+ * Optional - if not provided, VertexAIDriver will use default error handling.
17
+ */
18
+ formatLlumiverseError?(driver: VertexAIDriver, error: unknown, context: LlumiverseErrorContext): LlumiverseError;
14
19
  }
15
20
 
16
21
  export function getModelDefinition(model: string): ModelDefinition {