npm - @llumiverse/drivers - Versions diffs - 1.0.0-dev.20260224.234313Z → 1.0.0-dev.20260331.080752Z - Mend

@llumiverse/drivers 1.0.0-dev.20260224.234313Z → 1.0.0-dev.20260331.080752Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/lib/cjs/bedrock/converse.js +86 -12
package/lib/cjs/bedrock/converse.js.map +1 -1
package/lib/cjs/bedrock/index.js +208 -1
package/lib/cjs/bedrock/index.js.map +1 -1
package/lib/cjs/groq/index.js +7 -4
package/lib/cjs/groq/index.js.map +1 -1
package/lib/cjs/openai/index.js +457 -26
package/lib/cjs/openai/index.js.map +1 -1
package/lib/cjs/openai/openai_compatible.js +1 -0
package/lib/cjs/openai/openai_compatible.js.map +1 -1
package/lib/cjs/vertexai/index.js +42 -0
package/lib/cjs/vertexai/index.js.map +1 -1
package/lib/cjs/vertexai/models/claude.js +230 -2
package/lib/cjs/vertexai/models/claude.js.map +1 -1
package/lib/cjs/vertexai/models/gemini.js +261 -41
package/lib/cjs/vertexai/models/gemini.js.map +1 -1
package/lib/cjs/vertexai/models.js +1 -1
package/lib/cjs/vertexai/models.js.map +1 -1
package/lib/esm/bedrock/converse.js +80 -6
package/lib/esm/bedrock/converse.js.map +1 -1
package/lib/esm/bedrock/index.js +207 -2
package/lib/esm/bedrock/index.js.map +1 -1
package/lib/esm/groq/index.js +7 -4
package/lib/esm/groq/index.js.map +1 -1
package/lib/esm/openai/index.js +456 -27
package/lib/esm/openai/index.js.map +1 -1
package/lib/esm/openai/openai_compatible.js +1 -0
package/lib/esm/openai/openai_compatible.js.map +1 -1
package/lib/esm/vertexai/index.js +43 -1
package/lib/esm/vertexai/index.js.map +1 -1
package/lib/esm/vertexai/models/claude.js +229 -3
package/lib/esm/vertexai/models/claude.js.map +1 -1
package/lib/esm/vertexai/models/gemini.js +262 -43
package/lib/esm/vertexai/models/gemini.js.map +1 -1
package/lib/esm/vertexai/models.js +1 -1
package/lib/esm/vertexai/models.js.map +1 -1
package/lib/types/bedrock/converse.d.ts +1 -2
package/lib/types/bedrock/converse.d.ts.map +1 -1
package/lib/types/bedrock/index.d.ts +53 -1
package/lib/types/bedrock/index.d.ts.map +1 -1
package/lib/types/openai/index.d.ts +96 -1
package/lib/types/openai/index.d.ts.map +1 -1
package/lib/types/openai/openai_compatible.d.ts +5 -0
package/lib/types/openai/openai_compatible.d.ts.map +1 -1
package/lib/types/openai/openai_format.d.ts +1 -1
package/lib/types/vertexai/index.d.ts +11 -1
package/lib/types/vertexai/index.d.ts.map +1 -1
package/lib/types/vertexai/models/claude.d.ts +64 -1
package/lib/types/vertexai/models/claude.d.ts.map +1 -1
package/lib/types/vertexai/models/gemini.d.ts +61 -1
package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
package/lib/types/vertexai/models.d.ts +6 -1
package/lib/types/vertexai/models.d.ts.map +1 -1
package/package.json +9 -9
package/src/bedrock/converse.ts +85 -10
package/src/bedrock/error-handling.test.ts +352 -0
package/src/bedrock/index.ts +225 -1
package/src/groq/index.ts +9 -4
package/src/openai/error-handling.test.ts +567 -0
package/src/openai/index.ts +505 -29
package/src/openai/openai_compatible.ts +7 -0
package/src/openai/openai_format.ts +1 -1
package/src/vertexai/index.ts +56 -5
package/src/vertexai/models/claude-error-handling.test.ts +432 -0
package/src/vertexai/models/claude.ts +273 -7
package/src/vertexai/models/gemini-error-handling.test.ts +353 -0
package/src/vertexai/models/gemini.ts +304 -48
package/src/vertexai/models.ts +7 -2

package/src/vertexai/models/gemini.ts CHANGED Viewed

@@ -1,17 +1,24 @@
+import type { ApiError } from "@google/genai";
 import {
     Content, FinishReason, FunctionCallingConfigMode, FunctionDeclaration, GenerateContentConfig, GenerateContentParameters,
     GenerateContentResponseUsageMetadata,
-    HarmBlockThreshold, HarmCategory, Modality, Part, SafetySetting, Schema, ThinkingConfig, Tool, Type
+    HarmBlockThreshold, HarmCategory, Modality, Part,
+    ProminentPeople,
+    SafetySetting, Schema, ThinkingConfig,
+    ThinkingLevel,
+    Tool, Type
 } from "@google/genai";
 import {
     AIModel, Completion, CompletionChunkObject, CompletionResult, ExecutionOptions,
     ExecutionTokenUsage,
     getConversationMeta,
-    getMaxTokensLimitVertexAi,
+    getGeminiModelVersion,
     incrementConversationTurn,
-    JSONObject, JSONSchema, ModelType, PromptOptions, PromptRole,
+    isGeminiModelVersionGte,
+    JSONObject, JSONSchema, LlumiverseError, LlumiverseErrorContext, ModelType, PromptOptions, PromptRole,
     PromptSegment, readStreamAsBase64, StatelessExecutionOptions,
     stripBase64ImagesFromConversation,
+    stripHeartbeatsFromConversation,
     ToolDefinition, ToolUse,
     truncateLargeTextInConversation,
     unwrapConversationArray,
@@ -53,15 +60,36 @@ const geminiSafetySettings: SafetySetting[] = [
     }
 ];
+// We do the mapping here rather than in common to avoid bringing the SDK into the common package.
+function getProminentPeopleOption(prominentPeople?: "PROMINENT_PEOPLE_UNSPECIFIED" | "ALLOW_PROMINENT_PEOPLE" | "BLOCK_PROMINENT_PEOPLE") {
+    switch (prominentPeople) {
+        case "ALLOW_PROMINENT_PEOPLE":
+            return ProminentPeople.ALLOW_PROMINENT_PEOPLE;
+        case "BLOCK_PROMINENT_PEOPLE":
+            return ProminentPeople.BLOCK_PROMINENT_PEOPLE;
+        case "PROMINENT_PEOPLE_UNSPECIFIED":
+            return ProminentPeople.PROMINENT_PEOPLE_UNSPECIFIED;
+        default:
+            return undefined;
+    }
+}
 function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentPrompt): GenerateContentParameters {
     const model_options = options.model_options as VertexAIGeminiOptions | undefined;
     const tools = getToolDefinitions(options.tools);
-    const useStructuredOutput = supportsStructuredOutput(options) && !tools;
+    // When no tools are provided but conversation contains functionCall/functionResponse parts
+    // (e.g. checkpoint summary calls), convert them to text to avoid API errors
+    if (!tools && prompt.contents) {
+        const hasToolParts = prompt.contents.some(c =>
+            c.parts?.some(p => p.functionCall || p.functionResponse)
+        );
+        if (hasToolParts) {
+            prompt.contents = convertGeminiFunctionPartsToText(prompt.contents);
+        }
+    }
-    const thinkingConfigNeeded = model_options?.include_thoughts
-        || model_options?.thinking_budget_tokens
-        || options.model.includes("gemini-2.5");
+    const useStructuredOutput = supportsStructuredOutput(options) && !tools;
     const configNanoBanana: GenerateContentConfig = {
         systemInstruction: prompt.system,
@@ -71,10 +99,16 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
         //Model options
         temperature: model_options?.temperature,
         topP: model_options?.top_p,
-        maxOutputTokens: geminiMaxTokens(options),
+        maxOutputTokens: model_options?.max_tokens,
         stopSequences: model_options?.stop_sequence,
+        thinkingConfig: geminiThinkingConfig(options),
         imageConfig: {
+            imageSize: model_options?.image_size,
             aspectRatio: model_options?.image_aspect_ratio,
+            personGeneration: model_options?.person_generation,
+            prominentPeople: getProminentPeopleOption(model_options?.prominent_people),
+            outputMimeType: model_options?.output_mime_type,
+            outputCompressionQuality: model_options?.output_compression_quality,
         }
     }
@@ -95,12 +129,12 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
         temperature: model_options?.temperature,
         topP: model_options?.top_p,
         topK: model_options?.top_k,
-        maxOutputTokens: geminiMaxTokens(options),
+        maxOutputTokens: model_options?.max_tokens,
         stopSequences: model_options?.stop_sequence,
         presencePenalty: model_options?.presence_penalty,
         frequencyPenalty: model_options?.frequency_penalty,
         seed: model_options?.seed,
-        thinkingConfig: thinkingConfigNeeded ? geminiThinkingConfig(options) : undefined,
+        thinkingConfig: geminiThinkingConfig(options),
     }
     return {
@@ -435,7 +469,11 @@ function removeEmptyJSONArray(array: any[], schema: JSONSchema): any[] {
     return cleanedArray.filter(item => !isEmpty(item));
 }
-function collectTextParts(content: Content): CompletionResult[] {
+/**
+ * Collect all parts (text and images) from content in order.
+ * This preserves the original ordering of text and image parts.
+ */
+function extractCompletionResults(content: Content): CompletionResult[] {
     const results: CompletionResult[] = [];
     const parts = content.parts;
     if (parts) {
@@ -445,18 +483,7 @@ function collectTextParts(content: Content): CompletionResult[] {
                     type: "text",
                     value: part.text
                 });
-            }
-        }
-    }
-    return results;
-}
-function collectInlineDataParts(content: Content): CompletionResult[] {
-    const results: CompletionResult[] = [];
-    const parts = content.parts;
-    if (parts) {
-        for (const part of parts) {
-            if (part.inlineData) {
+            } else if (part.inlineData) {
                 const base64ImageBytes: string = part.inlineData.data ?? "";
                 const mimeType = part.inlineData.mimeType ?? "image/png";
                 const imageUrl = `data:${mimeType};base64,${base64ImageBytes}`;
@@ -533,25 +560,16 @@ const recoverableToolCallReasons = [
     'UNEXPECTED_TOOL_CALL', // Model called an undeclared tool
 ]
-function geminiMaxTokens(option: StatelessExecutionOptions) {
-    const model_options = option.model_options as VertexAIGeminiOptions | undefined;
-    if (model_options?.max_tokens) {
-        return model_options.max_tokens;
-    }
-    if (option.model.includes("gemini-2.5")) {
-        return getMaxTokensLimitVertexAi(option.model);
-    }
-    return undefined;
-}
 function geminiThinkingBudget(option: StatelessExecutionOptions) {
     const model_options = option.model_options as VertexAIGeminiOptions | undefined;
+    // If thinking_budget_tokens is explicitly set in model options, use it directly
     if (model_options?.thinking_budget_tokens) {
         return model_options.thinking_budget_tokens;
     }
     // Set minimum thinking level by default.
     // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
-    if (option.model.includes("gemini-2.5")) {
+    if (getGeminiModelVersion(option.model) == '2.5') {
         if (option.model.includes("pro")) {
             return 128;
         }
@@ -562,16 +580,32 @@ function geminiThinkingBudget(option: StatelessExecutionOptions) {
 function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig | undefined {
     const model_options = option.model_options as VertexAIGeminiOptions | undefined;
+    // If thinking options are explicitly set in model options, use them directly
     const include_thoughts = model_options?.include_thoughts ?? false;
-    if (model_options?.thinking_budget_tokens) {
-        return { includeThoughts: include_thoughts, thinkingBudget: model_options.thinking_budget_tokens };
+    if (model_options?.thinking_budget_tokens || model_options?.thinking_level) {
+        return {
+            includeThoughts: include_thoughts,
+            thinkingBudget: model_options.thinking_budget_tokens,
+            thinkingLevel: model_options.thinking_level,
+        };
     }
-    // Set minimum thinking level by default.
+    // Set a low thinking level by default.
     // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
-    if (option.model.includes("gemini-2.5") || option.model.includes("gemini-3")) {
+    // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking
+    if (isGeminiModelVersionGte(option.model, '3.0')) {
+        return {
+            includeThoughts: include_thoughts,
+            thinkingLevel: ThinkingLevel.LOW
+        };
+    }
+    if (isGeminiModelVersionGte(option.model, '2.5')) {
         const thinking_budget_tokens = geminiThinkingBudget(option) ?? 0;
-        return { includeThoughts: include_thoughts, thinkingBudget: thinking_budget_tokens };
+        return {
+            includeThoughts: include_thoughts,
+            thinkingBudget: thinking_budget_tokens
+        };
     }
 }
@@ -667,7 +701,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
                 // File content handling
                 if (msg.files) {
                     for (const f of msg.files) {
-                        let fileUrl = await f.getURL();
+                        const fileUrl = await f.getURL();
                         const isGsUrl = fileUrl.startsWith('gs://') || fileUrl.startsWith('https://storage.googleapis.com/');
                         if (isGsUrl) {
@@ -680,7 +714,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
                         } else {
                             // Inline data handling
                             const stream = await f.getStream();
-                            const data = await readStreamAsBase64(stream);
+                            const data = await readStreamAsBase64(stream);
                             parts.push({
                                 inlineData: {
                                     data,
@@ -776,6 +810,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
         const modelName = splits[splits.length - 1];
         options = { ...options, model: modelName };
+        // Restore system instruction from stored conversation on resume.
+        // The stored _llumiverse_system contains the complete system (interaction prompt + schema)
+        // from the initial call. Always prefer it over the prompt's system, which on resume only
+        // contains the schema instruction (no interaction system segments are present on resume).
+        const existingSystem = extractSystemFromConversation(options.conversation);
+        if (existingSystem) {
+            prompt.system = existingSystem;
+        }
         let conversation = updateConversation(options.conversation, prompt.contents);
         prompt.contents = conversation;
@@ -822,9 +865,8 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
                 // We clean the content before validation, so we can update the conversation.
                 const cleanedContent = cleanEmptyFieldsContent(content, options.result_schema);
-                const textResults = collectTextParts(cleanedContent);
-                const imageResults = collectInlineDataParts(cleanedContent);
-                result = [...textResults, ...imageResults];
+                // Collect all parts in order (text and images)
+                result = extractCompletionResults(cleanedContent);
                 conversation = updateConversation(conversation, [cleanedContent]);
             }
         }
@@ -850,12 +892,21 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
         // Truncate large text content if configured
         processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
+        // Strip old heartbeat status messages
+        processedConversation = stripHeartbeatsFromConversation(processedConversation, {
+            keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
+            currentTurn,
+        });
+        // Preserve system instruction in conversation for multi-turn support
+        const finalConversation = storeSystemInConversation(processedConversation, prompt.system);
         return {
             result: result && result.length > 0 ? result : [{ type: "text" as const, value: '' }],
             token_usage: token_usage,
             finish_reason: finish_reason,
             original_response: options.include_original_response ? response : undefined,
-            conversation: processedConversation,
+            conversation: finalConversation,
             tool_use
         } satisfies Completion;
     }
@@ -869,6 +920,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
         const modelName = splits[splits.length - 1];
         options = { ...options, model: modelName };
+        // Restore system instruction from stored conversation on resume.
+        // The stored _llumiverse_system contains the complete system (interaction prompt + schema)
+        // from the initial call. Always prefer it over the prompt's system, which on resume only
+        // contains the schema instruction (no interaction system segments are present on resume).
+        const existingSystem = extractSystemFromConversation(options.conversation);
+        if (existingSystem) {
+            prompt.system = existingSystem;
+        }
         // Include conversation history in prompt contents (same as non-streaming)
         const conversation = updateConversation(options.conversation, prompt.contents);
         prompt.contents = conversation;
@@ -901,9 +961,8 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
                             + `content: ${JSON.stringify(candidate.content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`);
                     }
                     if (candidate.content?.role === 'model') {
-                        const textResults = collectTextParts(candidate.content);
-                        const imageResults = collectInlineDataParts(candidate.content);
-                        const combinedResults = [...textResults, ...imageResults];
+                        // Collect all parts in order (text and images)
+                        const combinedResults = extractCompletionResults(candidate.content);
                         tool_use = collectToolUseParts(candidate.content);
                         if (tool_use) {
                             finish_reason = "tool_use";
@@ -933,9 +992,176 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
         return stream;
     }
+    /**
+     * Format Google API errors into LlumiverseError with proper status codes and retryability.
+     *
+     * Google API errors follow AIP-193 standard:
+     * - ApiError.status: HTTP status code
+     * - ApiError.message: Error message
+     *
+     * Common error codes:
+     * - 400 (INVALID_ARGUMENT): Invalid request parameters
+     * - 401 (UNAUTHENTICATED): Authentication required
+     * - 403 (PERMISSION_DENIED): Insufficient permissions
+     * - 404 (NOT_FOUND): Resource not found
+     * - 429 (RESOURCE_EXHAUSTED): Rate limit/quota exceeded
+     * - 500 (INTERNAL): Internal server error
+     * - 503 (UNAVAILABLE): Service temporarily unavailable
+     * - 504 (DEADLINE_EXCEEDED): Request timeout
+     *
+     * @see https://google.aip.dev/193
+     * @see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/api-errors
+     */
+    formatLlumiverseError(
+        _driver: VertexAIDriver,
+        error: unknown,
+        context: LlumiverseErrorContext
+    ): LlumiverseError {
+        // Check if it's a Google API error with status code
+        const isApiError = this.isGoogleApiError(error);
+        if (!isApiError) {
+            // Not a Google API error, use default handling
+            // This will be called by the driver's default formatLlumiverseError
+            throw error;
+        }
+        const apiError = error as ApiError;
+        const httpStatusCode = apiError.status;
+        // Extract error message
+        const message = apiError.message || String(error);
+        // Build user-facing message with status code
+        let userMessage = message;
+        // Include status code in message (for end-user visibility)
+        if (httpStatusCode) {
+            userMessage = `[${httpStatusCode}] ${userMessage}`;
+        }
+        // Determine retryability based on Google error codes
+        const retryable = this.isGeminiErrorRetryable(httpStatusCode);
+        // Extract error name/type from message if present
+        const errorName = this.extractErrorName(message);
+        return new LlumiverseError(
+            `[${context.provider}] ${userMessage}`,
+            retryable,
+            context,
+            error,
+            httpStatusCode,
+            errorName
+        );
+    }
+    /**
+     * Type guard to check if error is a Google API error.
+     */
+    private isGoogleApiError(error: unknown): error is ApiError {
+        return (
+            error !== null &&
+            typeof error === 'object' &&
+            'status' in error &&
+            typeof (error as any).status === 'number' &&
+            'message' in error
+        );
+    }
+    /**
+     * Determine if a Google API error is retryable based on HTTP status code.
+     *
+     * Retryable errors (per Google AIP-194):
+     * - 408 (REQUEST_TIMEOUT): Request timeout
+     * - 429 (RESOURCE_EXHAUSTED): Rate limit exceeded, quota exhausted
+     * - 500 (INTERNAL): Internal server error
+     * - 502 (BAD_GATEWAY): Bad gateway
+     * - 503 (UNAVAILABLE): Service temporarily unavailable
+     * - 504 (DEADLINE_EXCEEDED): Gateway timeout
+     *
+     * Non-retryable errors:
+     * - 400 (INVALID_ARGUMENT): Invalid request parameters
+     * - 401 (UNAUTHENTICATED): Authentication required
+     * - 403 (PERMISSION_DENIED): Insufficient permissions
+     * - 404 (NOT_FOUND): Resource not found
+     * - 409 (CONFLICT): Resource conflict
+     * - Other 4xx client errors
+     *
+     * @param httpStatusCode - The HTTP status code from the API error
+     * @returns True if retryable, false if not retryable, undefined if unknown
+     */
+    private isGeminiErrorRetryable(httpStatusCode: number): boolean | undefined {
+        // Retryable status codes
+        if (httpStatusCode === 408) return true; // Request timeout
+        if (httpStatusCode === 429) return true; // Rate limit/quota
+        if (httpStatusCode === 502) return true; // Bad gateway
+        if (httpStatusCode === 503) return true; // Service unavailable
+        if (httpStatusCode === 504) return true; // Gateway timeout
+        if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Other 5xx server errors
+        // Non-retryable 4xx client errors
+        if (httpStatusCode >= 400 && httpStatusCode < 500) return false;
+        // Unknown status codes - let consumer decide retry strategy
+        return undefined;
+    }
+    /**
+     * Extract error type name from error message.
+     * Google errors often include the error type in the message.
+     * Examples: "INVALID_ARGUMENT", "RESOURCE_EXHAUSTED", "PERMISSION_DENIED"
+     */
+    private extractErrorName(message: string): string | undefined {
+        // Common Google error patterns
+        const patterns = [
+            /^([A-Z_]+):/,  // "ERROR_NAME: message"
+            /\[([A-Z_]+)\]/, // "[ERROR_NAME] message"
+            /^(\w+Error):/,  // "ErrorTypeError: message"
+        ];
+        for (const pattern of patterns) {
+            const match = message.match(pattern);
+            if (match) {
+                return match[1];
+            }
+        }
+        return undefined;
+    }
 }
+/**
+ * Converts functionCall and functionResponse parts to text parts in Gemini Content[].
+ * Preserves tool call information while removing structured parts that require
+ * tools/toolConfig to be defined in the API request.
+ */
+export function convertGeminiFunctionPartsToText(contents: Content[]): Content[] {
+    return contents.map(content => {
+        if (!content.parts) return content;
+        const hasFunctionParts = content.parts.some(p => p.functionCall || p.functionResponse);
+        if (!hasFunctionParts) return content;
+        const newParts = content.parts.map(part => {
+            if (part.functionCall) {
+                const argsStr = part.functionCall.args ? JSON.stringify(part.functionCall.args) : '';
+                const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr;
+                return { text: `[Tool call: ${part.functionCall.name}(${truncated})]` };
+            }
+            if (part.functionResponse) {
+                const respStr = part.functionResponse.response
+                    ? JSON.stringify(part.functionResponse.response) : 'No response';
+                const truncated = respStr.length > 500 ? respStr.substring(0, 500) + '...' : respStr;
+                return { text: `[Tool result for ${part.functionResponse.name}: ${truncated}]` };
+            }
+            return part;
+        });
+        return { ...content, parts: newParts };
+    });
+}
 function getToolDefinitions(tools: ToolDefinition[] | undefined | null): Tool | undefined {
     if (!tools || tools.length === 0) {
         return undefined;
@@ -979,6 +1205,36 @@ function updateConversation(conversation: unknown, prompt: Content[]): Content[]
     return convArray.concat(prompt);
 }
+const SYSTEM_KEY = '_llumiverse_system';
+/**
+ * Extract the stored system instruction from a Gemini conversation object.
+ * Returns undefined if no system was stored.
+ */
+function extractSystemFromConversation(conversation: unknown): Content | undefined {
+    if (typeof conversation === 'object' && conversation !== null) {
+        const c = conversation as Record<string, unknown>;
+        if (c[SYSTEM_KEY] && typeof c[SYSTEM_KEY] === 'object') {
+            return c[SYSTEM_KEY] as Content;
+        }
+    }
+    return undefined;
+}
+/**
+ * Store the system instruction in the Gemini conversation wrapper object.
+ * The conversation is already wrapped by incrementConversationTurn into
+ * { _arrayConversation: Content[], _llumiverse_meta: {...} }.
+ * We add _llumiverse_system alongside these fields.
+ */
+function storeSystemInConversation(conversation: unknown, system: Content | undefined): unknown {
+    if (!system) return conversation;
+    if (typeof conversation === 'object' && conversation !== null) {
+        return { ...conversation as object, [SYSTEM_KEY]: system };
+    }
+    return conversation;
+}
 /**
  *
  * Gemini supports JSON output in the response. so we test if the response is a valid JSON object. otherwise we treat the response as a string.

package/src/vertexai/models.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { AIModel, Completion, PromptSegment, ExecutionOptions, CompletionChunkObject } from "@llumiverse/core";
+import { AIModel, Completion, CompletionChunkObject, ExecutionOptions, LlumiverseError, LlumiverseErrorContext, PromptSegment } from "@llumiverse/core";
 import { VertexAIDriver, trimModelName } from "./index.js";
-import { GeminiModelDefinition } from "./models/gemini.js";
 import { ClaudeModelDefinition } from "./models/claude.js";
+import { GeminiModelDefinition } from "./models/gemini.js";
 import { LLamaModelDefinition } from "./models/llama.js";
 export interface ModelDefinition<PromptT = any> {
@@ -11,6 +11,11 @@ export interface ModelDefinition<PromptT = any> {
     requestTextCompletion: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<Completion>;
     requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunkObject>>;
     preValidationProcessing?(result: Completion, options: ExecutionOptions): { result: Completion, options: ExecutionOptions };
+    /**
+     * Format provider-specific errors into standardized LlumiverseError.
+     * Optional - if not provided, VertexAIDriver will use default error handling.
+     */
+    formatLlumiverseError?(driver: VertexAIDriver, error: unknown, context: LlumiverseErrorContext): LlumiverseError;
 }
 export function getModelDefinition(model: string): ModelDefinition {