npm - @llumiverse/drivers - Versions diffs - 0.18.0 → 0.19.0 - Mend

@llumiverse/drivers 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/lib/cjs/bedrock/index.js +19 -22
package/lib/cjs/bedrock/index.js.map +1 -1
package/lib/cjs/huggingface_ie.js +1 -1
package/lib/cjs/huggingface_ie.js.map +1 -1
package/lib/cjs/mistral/index.js +1 -1
package/lib/cjs/mistral/index.js.map +1 -1
package/lib/cjs/openai/index.js +10 -14
package/lib/cjs/openai/index.js.map +1 -1
package/lib/cjs/togetherai/index.js +1 -1
package/lib/cjs/togetherai/index.js.map +1 -1
package/lib/cjs/vertexai/index.js +81 -18
package/lib/cjs/vertexai/index.js.map +1 -1
package/lib/cjs/vertexai/models/claude.js +46 -66
package/lib/cjs/vertexai/models/claude.js.map +1 -1
package/lib/cjs/vertexai/models/gemini.js +413 -80
package/lib/cjs/vertexai/models/gemini.js.map +1 -1
package/lib/cjs/vertexai/models/llama.js +182 -0
package/lib/cjs/vertexai/models/llama.js.map +1 -0
package/lib/cjs/vertexai/models.js +4 -0
package/lib/cjs/vertexai/models.js.map +1 -1
package/lib/cjs/watsonx/index.js +1 -1
package/lib/cjs/watsonx/index.js.map +1 -1
package/lib/cjs/xai/index.js +1 -1
package/lib/cjs/xai/index.js.map +1 -1
package/lib/esm/bedrock/index.js +19 -22
package/lib/esm/bedrock/index.js.map +1 -1
package/lib/esm/huggingface_ie.js +1 -1
package/lib/esm/huggingface_ie.js.map +1 -1
package/lib/esm/mistral/index.js +1 -1
package/lib/esm/mistral/index.js.map +1 -1
package/lib/esm/openai/index.js +12 -16
package/lib/esm/openai/index.js.map +1 -1
package/lib/esm/togetherai/index.js +1 -1
package/lib/esm/togetherai/index.js.map +1 -1
package/lib/esm/vertexai/index.js +81 -18
package/lib/esm/vertexai/index.js.map +1 -1
package/lib/esm/vertexai/models/claude.js +46 -66
package/lib/esm/vertexai/models/claude.js.map +1 -1
package/lib/esm/vertexai/models/gemini.js +409 -76
package/lib/esm/vertexai/models/gemini.js.map +1 -1
package/lib/esm/vertexai/models/llama.js +178 -0
package/lib/esm/vertexai/models/llama.js.map +1 -0
package/lib/esm/vertexai/models.js +4 -0
package/lib/esm/vertexai/models.js.map +1 -1
package/lib/esm/watsonx/index.js +1 -1
package/lib/esm/watsonx/index.js.map +1 -1
package/lib/esm/xai/index.js +1 -1
package/lib/esm/xai/index.js.map +1 -1
package/lib/types/bedrock/index.d.ts.map +1 -1
package/lib/types/groq/index.d.ts +1 -1
package/lib/types/groq/index.d.ts.map +1 -1
package/lib/types/huggingface_ie.d.ts +1 -1
package/lib/types/huggingface_ie.d.ts.map +1 -1
package/lib/types/mistral/index.d.ts +1 -1
package/lib/types/mistral/index.d.ts.map +1 -1
package/lib/types/openai/index.d.ts.map +1 -1
package/lib/types/togetherai/index.d.ts +1 -1
package/lib/types/togetherai/index.d.ts.map +1 -1
package/lib/types/vertexai/index.d.ts +17 -7
package/lib/types/vertexai/index.d.ts.map +1 -1
package/lib/types/vertexai/models/claude.d.ts.map +1 -1
package/lib/types/vertexai/models/gemini.d.ts +9 -6
package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
package/lib/types/vertexai/models/llama.d.ts +20 -0
package/lib/types/vertexai/models/llama.d.ts.map +1 -0
package/lib/types/vertexai/models.d.ts +6 -2
package/lib/types/vertexai/models.d.ts.map +1 -1
package/lib/types/watsonx/index.d.ts +1 -1
package/lib/types/watsonx/index.d.ts.map +1 -1
package/lib/types/xai/index.d.ts +1 -1
package/lib/types/xai/index.d.ts.map +1 -1
package/package.json +16 -16
package/src/bedrock/index.ts +19 -22
package/src/groq/index.ts +1 -1
package/src/huggingface_ie.ts +1 -1
package/src/mistral/index.ts +1 -1
package/src/openai/index.ts +12 -16
package/src/togetherai/index.ts +1 -1
package/src/vertexai/index.ts +95 -22
package/src/vertexai/models/claude.ts +54 -69
package/src/vertexai/models/gemini.ts +473 -93
package/src/vertexai/models/llama.ts +261 -0
package/src/vertexai/models.ts +6 -2
package/src/watsonx/index.ts +1 -1
package/src/xai/index.ts +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@llumiverse/drivers",
-  "version": "0.18.0",
+  "version": "0.19.0",
   "type": "module",
   "description": "LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.",
   "files": [
@@ -48,29 +48,29 @@
     "vitest": "^3.0.9"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.39.0",
-    "@anthropic-ai/vertex-sdk": "^0.7.0",
-    "@aws-sdk/client-bedrock": "^3.787.0",
-    "@aws-sdk/client-bedrock-runtime": "^3.787.0",
-    "@aws-sdk/client-s3": "^3.787.0",
-    "@aws-sdk/credential-providers": "^3.787.0",
-    "@aws-sdk/lib-storage": "^3.787.0",
-    "@aws-sdk/types": "^3.775.0",
-    "@azure/identity": "^4.9.1",
+    "@anthropic-ai/sdk": "^0.52.0",
+    "@anthropic-ai/vertex-sdk": "^0.11.4",
+    "@aws-sdk/client-bedrock": "^3.816.0",
+    "@aws-sdk/client-bedrock-runtime": "^3.816.0",
+    "@aws-sdk/client-s3": "^3.816.0",
+    "@aws-sdk/credential-providers": "^3.816.0",
+    "@aws-sdk/lib-storage": "^3.816.0",
+    "@aws-sdk/types": "^3.804.0",
+    "@azure/identity": "^4.10.0",
     "@azure/openai": "2.0.0",
     "@google-cloud/aiplatform": "^3.35.0",
-    "@google-cloud/vertexai": "^1.10.0",
+    "@google/genai": "^1.0.0",
     "@huggingface/inference": "2.6.7",
-    "api-fetch-client": "^0.13.0",
+    "@vertesia/api-fetch-client": "^0.60.0",
     "eventsource": "^4.0.0",
     "google-auth-library": "^9.14.0",
-    "groq-sdk": "^0.19.0",
+    "groq-sdk": "^0.22.0",
     "mnemonist": "^0.40.0",
     "node-web-stream-adapters": "^0.2.0",
-    "openai": "^4.98.0",
+    "openai": "^4.103.0",
     "replicate": "^1.0.1",
-    "@llumiverse/common": "0.18.0",
-    "@llumiverse/core": "0.18.0"
+    "@llumiverse/common": "0.19.0",
+    "@llumiverse/core": "0.19.0"
   },
   "ts_dual_module": {
     "outDir": "lib"

package/src/bedrock/index.ts CHANGED Viewed

@@ -295,7 +295,7 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
     }
     preparePayload(prompt: ConverseRequest, options: ExecutionOptions) {
-        const model_options = options.model_options as TextFallbackOptions;
+        const model_options: TextFallbackOptions = options.model_options as TextFallbackOptions ?? { _option_id: "text-fallback" };
         let additionalField = {};
@@ -305,7 +305,7 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
             }
             //Titan models also exists but does not support any additional options
             if (options.model.includes("nova")) {
-                additionalField = { inferenceConfig: { topK: model_options?.top_k } };
+                additionalField = { inferenceConfig: { topK: model_options.top_k } };
             }
         } else if (options.model.includes("claude")) {
             if (options.result_schema) {
@@ -313,18 +313,15 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
             }
             if (options.model.includes("claude-3-7")) {
                 const thinking_options = options.model_options as BedrockClaudeOptions;
-                const thinking = thinking_options?.thinking_mode ?? false;
-                if (!model_options?.max_tokens) {
-                    model_options.max_tokens = thinking ? 128000 : 8192;
-                }
+                const thinking = thinking_options.thinking_mode ?? false;
                 additionalField = {
                     ...additionalField,
                     reasoning_config: {
                         type: thinking ? "enabled" : "disabled",
-                        budget_tokens: thinking_options?.thinking_budget_tokens,
+                        budget_tokens: thinking_options.thinking_budget_tokens,
                     }
                 };
-                if (thinking && (thinking_options?.thinking_budget_tokens ?? 0) > 64000) {
+                if (thinking && (thinking_options.thinking_budget_tokens ?? 0) > 64000) {
                     additionalField = {
                         ...additionalField,
                         anthorpic_beta: ["output-128k-2025-02-19"]
@@ -332,16 +329,16 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
                 }
             }
             //Needs max_tokens to be set
-            if (!model_options?.max_tokens) {
+            if (!model_options.max_tokens) {
                 model_options.max_tokens = getMaxTokensLimit(options.model, model_options);
             }
-            additionalField = { ...additionalField, top_k: model_options?.top_k };
+            additionalField = { ...additionalField, top_k: model_options.top_k };
         } else if (options.model.includes("meta")) {
             //LLaMA models support no additional options
         } else if (options.model.includes("mistral")) {
             //7B instruct and 8x7B instruct
             if (options.model.includes("7b")) {
-                additionalField = { top_k: model_options?.top_k };
+                additionalField = { top_k: model_options.top_k };
                 //Does not support system messages
                 if (prompt.system && prompt.system?.length != 0) {
                     prompt.messages?.push(converseSystemToMessages(prompt.system));
@@ -360,8 +357,8 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
             //Jurassic 2 models do.
             if (options.model.includes("j2")) {
                 additionalField = {
-                    presencePenalty: { scale: model_options?.presence_penalty },
-                    frequencyPenalty: { scale: model_options?.frequency_penalty },
+                    presencePenalty: { scale: model_options.presence_penalty },
+                    frequencyPenalty: { scale: model_options.frequency_penalty },
                 };
                 //Does not support system messages
                 if (prompt.system && prompt.system?.length != 0) {
@@ -375,13 +372,13 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
             //Command R and R plus
             if (options.model.includes("cohere.command-r")) {
                 additionalField = {
-                    k: model_options?.top_k,
-                    frequency_penalty: model_options?.frequency_penalty,
-                    presence_penalty: model_options?.presence_penalty,
+                    k: model_options.top_k,
+                    frequency_penalty: model_options.frequency_penalty,
+                    presence_penalty: model_options.presence_penalty,
                 };
             } else {
                 // Command non-R
-                additionalField = { k: model_options?.top_k };
+                additionalField = { k: model_options.top_k };
                 //Does not support system messages
                 if (prompt.system && prompt.system?.length != 0) {
                     prompt.messages?.push(converseSystemToMessages(prompt.system));
@@ -404,7 +401,7 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
         //If last message is "```json", add corresponding ``` as a stop sequence.
         if (prompt.messages && prompt.messages.length > 0) {
             if (prompt.messages[prompt.messages.length - 1].content?.[0].text === "```json") {
-                let stopSeq = model_options?.stop_sequence;
+                let stopSeq = model_options.stop_sequence;
                 if (!stopSeq) {
                     model_options.stop_sequence = ["```"];
                 } else if (!stopSeq.includes("```")) {
@@ -421,10 +418,10 @@ export class BedrockDriver extends AbstractDriver<BedrockDriverOptions, BedrockP
             system: prompt.system,
             modelId: options.model,
             inferenceConfig: {
-                maxTokens: model_options?.max_tokens,
-                temperature: model_options?.temperature,
-                topP: model_options?.top_p,
-                stopSequences: model_options?.stop_sequence,
+                maxTokens: model_options.max_tokens,
+                temperature: model_options.temperature,
+                topP: model_options.top_p,
+                stopSequences: model_options.stop_sequence,
             } satisfies InferenceConfiguration,
             additionalModelRequestFields: {
                 ...additionalField,

package/src/groq/index.ts CHANGED Viewed

@@ -35,7 +35,7 @@ export class GroqDriver extends AbstractDriver<GroqDriverOptions, OpenAITextMess
     //     }
     // }
-    getResponseFormat(_options: ExecutionOptions): Groq.Chat.Completions.CompletionCreateParams.ResponseFormat | undefined {
+    getResponseFormat(_options: ExecutionOptions): undefined {
         //TODO: when forcing json_object type the streaming is not supported.
         // either implement canStream as above or comment the code below:
         // const responseFormatJson: Groq.Chat.Completions.CompletionCreateParams.ResponseFormat = {

package/src/huggingface_ie.ts CHANGED Viewed

@@ -14,7 +14,7 @@ import {
     TextFallbackOptions,
 } from "@llumiverse/core";
 import { transformAsyncIterator } from "@llumiverse/core/async";
-import { FetchClient } from "api-fetch-client";
+import { FetchClient } from "@vertesia/api-fetch-client";
 export interface HuggingFaceIEDriverOptions extends DriverOptions {
     apiKey: string;

package/src/mistral/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { AIModel, AbstractDriver, Completion, CompletionChunk, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, PromptSegment, TextFallbackOptions } from "@llumiverse/core";
 import { transformSSEStream } from "@llumiverse/core/async";
 import { OpenAITextMessage, formatOpenAILikeTextPrompt, getJSONSafetyNotice } from "@llumiverse/core/formatters";
-import { FetchClient } from "api-fetch-client";
+import { FetchClient } from "@vertesia/api-fetch-client";
 import { ChatCompletionResponse, CompletionRequestParams, ListModelsResponse, ResponseFormat } from "./types.js";
 //TODO retry on 429

package/src/openai/index.ts CHANGED Viewed

@@ -19,9 +19,10 @@ import {
     TrainingPromptOptions,
     getModelCapabilities,
     modelModalitiesToArray,
+    supportsToolUse,
 } from "@llumiverse/core";
 import { asyncMap } from "@llumiverse/core/async";
-import { formatOpenAILikeMultimodalPrompt, noStructuredOutputModels } from "@llumiverse/core/formatters";
+import { formatOpenAILikeMultimodalPrompt } from "@llumiverse/core/formatters";
 import OpenAI, { AzureOpenAI } from "openai";
 import { Stream } from "openai/streaming";
@@ -87,7 +88,7 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
         }
         const toolDefs = getToolDefinitions(options.tools);
-        const useTools: boolean = toolDefs ? supportsTools(options.model) : false;
+        const useTools: boolean = toolDefs ? supportsToolUse(options.model, "openai", true) : false;
         const mapFn = (chunk: OpenAI.Chat.Completions.ChatCompletionChunk) => {
             let result = undefined
@@ -167,7 +168,7 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
         insert_image_detail(prompt, model_options?.image_detail ?? "auto");
         const toolDefs = getToolDefinitions(options.tools);
-        const useTools: boolean = toolDefs ? supportsTools(options.model) : false;
+        const useTools: boolean = toolDefs ? supportsToolUse(options.model, "openai") : false;
         let conversation = updateConversation(options.conversation as OpenAIMessageBlock[], prompt);
@@ -289,7 +290,8 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
         //Some of these use the completions API instead of the chat completions API.
         //Others are for non-text input modalities. Therefore common to both.
-        const wordBlacklist = ["embed", "whisper", "transcribe", "audio", "moderation", "tts", "realtime", "dall-e", "babbage", "davinci"];
+        const wordBlacklist = ["embed", "whisper", "transcribe", "audio", "moderation", "tts",
+            "realtime", "dall-e", "babbage", "davinci", "codex", "o1-pro"];
         if (this.provider === "azure_openai") {
             //Azure OpenAI has additional information about the models
@@ -415,20 +417,14 @@ function convertRoles(messages: OpenAIMessageBlock[], model: string): OpenAIMess
     return messages
 }
-function supportsTools(model: string): boolean {
-    const list_check = !noStructuredOutputModels.some((m) => model.includes(m));
-    if (!list_check && model.includes("gpt-4o") && !model.includes("gpt-4o-2024-05-13")) {
-        return true;
-    }
-    return list_check
-}
+//Structured output support is typically aligned with tool use support
+//Not true for realtime models, which do not support structured output, but do support tool use.
 function supportsSchema(model: string): boolean {
-    const list_check = !noStructuredOutputModels.some((m) => model.includes(m));
-    if (!list_check && model.includes("gpt-4o") && !model.includes("gpt-4o-2024-05-13")) {
-        return true;
+    const realtimeModel = model.includes("realtime");
+    if (realtimeModel) {
+        return false;
     }
-    return list_check
+    return supportsToolUse(model, "openai");
 }
 function getToolDefinitions(tools: ToolDefinition[] | undefined | null): OpenAI.ChatCompletionTool[] | undefined {

package/src/togetherai/index.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { AIModel, AbstractDriver, Completion, CompletionChunk, DriverOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
 import { transformSSEStream } from "@llumiverse/core/async";
-import { FetchClient } from "api-fetch-client";
+import { FetchClient } from "@vertesia/api-fetch-client";
 import { TextCompletion, TogetherModelInfo } from "./interfaces.js";
 interface TogetherAIDriverOptions extends DriverOptions {

package/src/vertexai/index.ts CHANGED Viewed

@@ -1,9 +1,8 @@
-import { GenerateContentRequest, VertexAI } from "@google-cloud/vertexai";
 import {
     AIModel,
     AbstractDriver,
     Completion,
-    CompletionChunkObject,
+    CompletionChunk,
     DriverOptions,
     EmbeddingsResult,
     ExecutionOptions,
@@ -14,7 +13,7 @@ import {
     getModelCapabilities,
     modelModalitiesToArray,
 } from "@llumiverse/core";
-import { FetchClient } from "api-fetch-client";
+import { FetchClient } from "@vertesia/api-fetch-client";
 import { GoogleAuth, GoogleAuthOptions } from "google-auth-library";
 import { JSONClient } from "google-auth-library/build/src/auth/googleauth.js";
 import { TextEmbeddingsOptions, getEmbeddingsForText } from "./embeddings/embeddings-text.js";
@@ -24,6 +23,7 @@ import { getEmbeddingsForImages } from "./embeddings/embeddings-image.js";
 import { v1beta1 } from "@google-cloud/aiplatform";
 import { AnthropicVertex } from "@anthropic-ai/vertex-sdk";
 import { ImagenModelDefinition, ImagenPrompt } from "./models/imagen.js";
+import { GoogleGenAI, Content, Tool } from "@google/genai";
 export interface VertexAIDriverOptions extends DriverOptions {
     project: string;
@@ -31,8 +31,14 @@ export interface VertexAIDriverOptions extends DriverOptions {
     googleAuthOptions?: GoogleAuthOptions;
 }
+export interface GenerateContentPrompt {
+    contents: Content[];
+    system?: string;
+    tools?: Tool[];
+}
 //General Prompt type for VertexAI
-export type VertexAIPrompt = GenerateContentRequest | ImagenPrompt;
+export type VertexAIPrompt = ImagenPrompt | GenerateContentPrompt;
 export function trimModelName(model: string) {
     const i = model.lastIndexOf("@");
@@ -46,8 +52,9 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
     aiplatform: v1beta1.ModelServiceClient | undefined;
     anthropicClient: AnthropicVertex | undefined;
     fetchClient: FetchClient | undefined;
+    googleGenAI: GoogleGenAI | undefined;
+    llamaClient: FetchClient & { region?: string } | undefined;
     modelGarden: v1beta1.ModelGardenServiceClient | undefined;
-    vertexai: VertexAI | undefined;
     authClient: JSONClient | GoogleAuth<JSONClient>;
@@ -57,12 +64,28 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
         this.aiplatform = undefined;
         this.anthropicClient = undefined;
         this.fetchClient = undefined
+        this.googleGenAI = undefined;
         this.modelGarden = undefined;
-        this.vertexai = undefined;
+        this.llamaClient = undefined;
         this.authClient = options.googleAuthOptions?.authClient ?? new GoogleAuth(options.googleAuthOptions);
     }
+    public getGoogleGenAIClient(): GoogleGenAI {
+        //Lazy initialisation
+        if (!this.googleGenAI) {
+            this.googleGenAI = new GoogleGenAI({
+                project: this.options.project,
+                location: this.options.region,
+                vertexai: true,
+                googleAuthOptions: {
+                    authClient: this.authClient as JSONClient,
+                }
+            });
+        }
+        return this.googleGenAI;
+    }
     public getFetchClient(): FetchClient {
         //Lazy initialisation
         if (!this.fetchClient) {
@@ -78,6 +101,24 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
         return this.fetchClient;
     }
+    public getLLamaClient(region: string = "us-central1"): FetchClient {
+        //Lazy initialisation
+        if (!this.llamaClient || this.llamaClient["region"] !== region) {
+            this.llamaClient = createFetchClient({
+                region: region,
+                project: this.options.project,
+                apiVersion: "v1beta1",
+            }).withAuthCallback(async () => {
+                const accessTokenResponse = await this.authClient.getAccessToken();
+                const token = typeof accessTokenResponse === 'string' ? accessTokenResponse : accessTokenResponse?.token;
+                return `Bearer ${token}`;
+            });
+            // Store the region for potential client reuse
+            this.llamaClient["region"] = region;
+        }
+        return this.llamaClient;
+    }
     public getAnthropicClient(): AnthropicVertex {
         //Lazy initialisation
         if (!this.anthropicClient) {
@@ -89,18 +130,6 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
         return this.anthropicClient;
     }
-    public getVertexAIClient(): VertexAI {
-        //Lazy initialisation
-        if (!this.vertexai) {
-            this.vertexai = new VertexAI({
-                project: this.options.project,
-                location: this.options.region,
-                googleAuthOptions: this.options.googleAuthOptions,
-            });
-        }
-        return this.vertexai;
-    }
     public getAIPlatformClient(): v1beta1.ModelServiceClient {
         //Lazy initialisation
         if (!this.aiplatform) {
@@ -125,6 +154,18 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
         return this.modelGarden;
     }
+    validateResult(result: Completion, options: ExecutionOptions) {
+        // Optionally preprocess the result before validation
+        const modelDef = getModelDefinition(options.model);
+        if (typeof modelDef.preValidationProcessing === "function") {
+            const processed = modelDef.preValidationProcessing(result, options);
+            result = processed.result;
+            options = processed.options;
+        }
+        super.validateResult(result, options);
+    }
     protected canStream(options: ExecutionOptions): Promise<boolean> {
         if (options.output_modality == Modalities.image) {
             return Promise.resolve(false);
@@ -145,7 +186,7 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
     async requestTextCompletionStream(
         prompt: VertexAIPrompt,
         options: ExecutionOptions,
-    ): Promise<AsyncIterable<CompletionChunkObject>> {
+    ): Promise<AsyncIterable<CompletionChunk>> {
         return getModelDefinition(options.model).requestTextCompletionStream(this, prompt, options);
     }
@@ -178,14 +219,31 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
         );
         //Model Garden Publisher models - Pretrained models
-        const publishers = ["google", "anthropic"];
-        const supportedModels = { google: ["gemini", "imagen"], anthropic: ["claude"] };
+        const publishers = ["google", "anthropic", "meta"];
+        // Meta "maas" models are LLama Models-As-A-Service. Non-maas models are not pre-deployed.
+        const supportedModels = { google: ["gemini", "imagen"], anthropic: ["claude"], meta: ["maas"] };
+        // Additional models not in the listings, but we want to include
+        // TODO: Remove once the models are available in the listing API, or no longer needed
+        const additionalModels = {
+            google: ["imagen-3.0-fast-generate-001"],
+            anthropic: [],
+            meta: [
+                "llama-4-maverick-17b-128e-instruct-maas",
+                "llama-4-scout-17b-16e-instruct-maas",
+                "llama-3.3-70b-instruct-maas",
+                "llama-3.2-90b-vision-instruct-maas",
+                "llama-3.1-405b-instruct-maas",
+                "llama-3.1-70b-instruct-maas",
+                "llama-3.1-8b-instruct-maas",
+            ],
+        }
         //Used to exclude retired models that are still in the listing API but not available for use.
         //Or models we do not support yet
         const unsupportedModelsByPublisher = {
             google: ["gemini-pro", "gemini-ultra"],
             anthropic: [],
+            meta: [],
         };
         for (const publisher of publishers) {
@@ -228,13 +286,28 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
                     tool_support: modelCapability.tool_support,
                 } satisfies AIModel<string>;
             }));
+            // Add additional models that are not in the listing
+            for (const additionalModel of additionalModels[publisher as keyof typeof additionalModels]) {
+                const publisherModelName = `publishers/${publisher}/models/${additionalModel}`;
+                const modelCapability = getModelCapabilities(additionalModel, "vertexai");
+                models.push({
+                    id: publisherModelName,
+                    name: additionalModel,
+                    provider: 'vertexai',
+                    owner: publisher,
+                    input_modalities: modelModalitiesToArray(modelCapability.input),
+                    output_modalities: modelModalitiesToArray(modelCapability.output),
+                    tool_support: modelCapability.tool_support,
+                } satisfies AIModel<string>);
+            }
         }
         //Remove duplicates
         const uniqueModels = Array.from(new Set(models.map(a => a.id)))
             .map(id => {
                 return models.find(a => a.id === id) ?? {} as AIModel<string>;
-            });
+            }).sort((a, b) => a.id.localeCompare(b.id));
         return uniqueModels;
     }

package/src/vertexai/models/claude.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as AnthropicAPI from '@anthropic-ai/sdk';
-import { ContentBlock, ContentBlockParam, Message, TextBlockParam } from "@anthropic-ai/sdk/resources/index.js";
+import { ContentBlock, ContentBlockParam, ImageBlockParam, Message, TextBlockParam } from "@anthropic-ai/sdk/resources/index.js";
 import {
     AIModel, Completion, CompletionChunkObject, ExecutionOptions, JSONObject, ModelType,
     PromptOptions, PromptRole, PromptSegment, readStreamAsBase64, ToolUse, VertexAIClaudeOptions
@@ -15,23 +15,6 @@ interface ClaudePrompt {
     system: TextBlockParam[];
 }
-function getFullModelName(model: string): string {
-    if (model.includes("claude-3-5-sonnet-v2")) {
-        return "claude-3-5-sonnet-v2@20241022"
-    } else if (model.includes("claude-3-5-sonnet")) {
-        return "claude-3-5-sonnet@20240620"
-    } else if (model.includes("claude-3-5-haiku")) {
-        return "claude-3-5-haiku@20241022"
-    } else if (model.includes("claude-3-opus")) {
-        return "claude-3-opus@20240229"
-    } else if (model.includes("claude-3-sonnet")) {
-        return "claude-3-sonnet@20240229"
-    } else if (model.includes("claude-3-haiku")) {
-        return "claude-3-haiku@20240307"
-    }
-    return model;
-}
 function claudeFinishReason(reason: string | undefined) {
     if (!reason) return undefined;
     switch (reason) {
@@ -63,6 +46,36 @@ function maxToken(max_tokens: number | undefined, model: string): number {
     }
 }
+async function collectImageBlocks(segment: PromptSegment, contentBlocks: ContentBlockParam[]) {
+    for (const file of segment.files || []) {
+        if (file.mime_type?.startsWith("image/")) {
+            const allowedTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"];
+            if (!allowedTypes.includes(file.mime_type)) {
+                throw new Error(`Unsupported image type: ${file.mime_type}`);
+            }
+            const mimeType = String(file.mime_type) as "image/png" | "image/jpeg" | "image/gif" | "image/webp";
+            contentBlocks.push({
+                type: 'image',
+                source: {
+                    type: 'base64',
+                    data: await readStreamAsBase64(await file.getStream()),
+                    media_type: mimeType
+                }
+            });
+        } else if (file.mime_type?.startsWith("text/")) {
+            contentBlocks.push({
+                source: {
+                    type: 'text',
+                    data: await readStreamAsBase64(await file.getStream()),
+                    media_type: 'text/plain'
+                },
+                type: 'document'
+            });
+        }
+    }
+}
 export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
     model: AIModel
@@ -111,60 +124,38 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
                 if (!segment.tool_use_id) {
                     throw new Error("Tool prompt segment must have a tool_use_id");
                 }
+                const imageBlocks: ImageBlockParam[] = [];
+                await collectImageBlocks(segment, imageBlocks);
                 messages.push({
                     role: 'user',
-                    content: [
-                        {
-                            type: 'tool_result',
-                            tool_use_id: segment.tool_use_id,
-                            content: segment.content || undefined
-                        }
-                    ]
+                    content: [{
+                        type: 'tool_result',
+                        tool_use_id: segment.tool_use_id,
+                        content: [{
+                            type: 'text',
+                            text: segment.content || ''
+                        }, ...imageBlocks]
+                    }]
                 });
             } else {
                 const contentBlocks: ContentBlockParam[] = [];
-                for (const file of segment.files || []) {
-                    if (file.mime_type?.startsWith("image/")) {
-                        const allowedTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"];
-                        if (!allowedTypes.includes(file.mime_type)) {
-                            throw new Error(`Unsupported image type: ${file.mime_type}`);
-                        }
-                        contentBlocks.push({
-                            type: 'image',
-                            source: {
-                                type: 'base64',
-                                data: await readStreamAsBase64(await file.getStream()),
-                                media_type: file.mime_type as any
-                            }
-                        });
-                    } else if (file.mime_type?.startsWith("text/")) {
-                        contentBlocks.push({
-                            source: {
-                                type: 'text',
-                                data: await readStreamAsBase64(await file.getStream()),
-                                media_type: 'text/plain'
-                            },
-                            type : 'document'
-                        });
-                    }
-                }
+                collectImageBlocks(segment, contentBlocks);
                 if (segment.content) {
                     contentBlocks.push({
                         type: 'text',
                         text: segment.content
                     });
                 }
                 messages.push({
                     role: segment.role === PromptRole.assistant ? 'assistant' : 'user',
                     content: contentBlocks
                 });
             }
         }
         const system = systemSegments.concat(safetySegments);
         return {
@@ -253,23 +244,17 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
                 }
         });
-        //Streaming does not give information on the input tokens,
-        //So we use a separate call to get the input tokens.
-        //Non-critical and model name sensitive so we put it in a try catch block
-        let count_tokens = { input_tokens: 0 };
-        try {
-            count_tokens = await client.messages.countTokens({
-                ...prompt,  // messages, system
-                model: getFullModelName(modelName),
-            });
-        } catch (e) {
-            driver.logger.warn("Failed to get token count for model " + modelName);
-        }
         const stream = asyncMap(response_stream, async (item: any) => {
+            if (item.type == "message_start") {
+                return {
+                    result: '',
+                    token_usage: { prompt: item?.message?.usage?.input_tokens, result: item?.message?.usage?.output_tokens },
+                    finish_reason: undefined,
+                }
+            }
             return {
                 result: item?.delta?.text ?? '',
-                token_usage: { prompt: count_tokens.input_tokens, result: item?.usage?.output_tokens },
+                token_usage: { result: item?.usage?.output_tokens },
                 finish_reason: claudeFinishReason(item?.delta?.stop_reason ?? ''),
             }
         });