npm - browser-use - Versions diffs - 0.2.0 → 0.3.0 - Mend

browser-use 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (259) hide show

package/README.md +295 -686
package/dist/actor/element.d.ts +19 -0
package/dist/actor/element.js +46 -0
package/dist/actor/index.d.ts +4 -0
package/dist/actor/index.js +4 -0
package/dist/actor/mouse.d.ts +19 -0
package/dist/actor/mouse.js +39 -0
package/dist/actor/page.d.ts +29 -0
package/dist/actor/page.js +88 -0
package/dist/actor/utils.d.ts +4 -0
package/dist/actor/utils.js +35 -0
package/dist/agent/cloud-events.d.ts +18 -0
package/dist/agent/cloud-events.js +65 -2
package/dist/agent/gif.d.ts +1 -0
package/dist/agent/gif.js +24 -2
package/dist/agent/judge.d.ts +17 -0
package/dist/agent/judge.js +197 -0
package/dist/agent/message-manager/service.d.ts +12 -4
package/dist/agent/message-manager/service.js +205 -39
package/dist/agent/message-manager/utils.js +0 -1
package/dist/agent/message-manager/views.d.ts +4 -0
package/dist/agent/message-manager/views.js +11 -7
package/dist/agent/prompts.d.ts +24 -3
package/dist/agent/prompts.js +274 -59
package/dist/agent/service.d.ts +99 -41
package/dist/agent/service.js +2266 -472
package/dist/agent/variable-detector.d.ts +12 -0
package/dist/agent/variable-detector.js +211 -0
package/dist/agent/views.d.ts +237 -18
package/dist/agent/views.js +446 -33
package/dist/browser/cloud/cloud.d.ts +20 -0
package/dist/browser/cloud/cloud.js +129 -0
package/dist/browser/cloud/index.d.ts +2 -0
package/dist/browser/cloud/index.js +2 -0
package/dist/browser/cloud/views.d.ts +41 -0
package/dist/browser/cloud/views.js +35 -0
package/dist/browser/events.d.ts +345 -0
package/dist/browser/events.js +566 -0
package/dist/browser/extensions.js +17 -17
package/dist/browser/index.d.ts +4 -0
package/dist/browser/index.js +4 -0
package/dist/browser/profile.d.ts +8 -2
package/dist/browser/profile.js +79 -12
package/dist/browser/session-manager.d.ts +85 -0
package/dist/browser/session-manager.js +208 -0
package/dist/browser/session.d.ts +100 -8
package/dist/browser/session.js +1097 -58
package/dist/browser/types.d.ts +0 -2
package/dist/browser/views.d.ts +39 -0
package/dist/browser/views.js +32 -0
package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
package/dist/browser/watchdogs/base.d.ts +21 -0
package/dist/browser/watchdogs/base.js +81 -0
package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
package/dist/browser/watchdogs/crash-watchdog.js +296 -0
package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
package/dist/browser/watchdogs/dom-watchdog.js +31 -0
package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
package/dist/browser/watchdogs/index.d.ts +15 -0
package/dist/browser/watchdogs/index.js +15 -0
package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
package/dist/browser/watchdogs/popups-watchdog.js +77 -0
package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
package/dist/browser/watchdogs/recording-watchdog.js +249 -0
package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
package/dist/browser/watchdogs/security-watchdog.js +84 -0
package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
package/dist/cli.d.ts +7 -2
package/dist/cli.js +182 -25
package/dist/code-use/formatting.d.ts +3 -0
package/dist/code-use/formatting.js +18 -0
package/dist/code-use/index.d.ts +6 -0
package/dist/code-use/index.js +6 -0
package/dist/code-use/namespace.d.ts +5 -0
package/dist/code-use/namespace.js +81 -0
package/dist/code-use/notebook-export.d.ts +3 -0
package/dist/code-use/notebook-export.js +56 -0
package/dist/code-use/service.d.ts +24 -0
package/dist/code-use/service.js +104 -0
package/dist/code-use/utils.d.ts +4 -0
package/dist/code-use/utils.js +98 -0
package/dist/code-use/views.d.ts +108 -0
package/dist/code-use/views.js +165 -0
package/dist/config.d.ts +13 -0
package/dist/config.js +69 -3
package/dist/controller/registry/service.d.ts +10 -1
package/dist/controller/registry/service.js +266 -10
package/dist/controller/registry/views.d.ts +4 -1
package/dist/controller/registry/views.js +25 -2
package/dist/controller/service.d.ts +10 -1
package/dist/controller/service.js +1807 -268
package/dist/controller/views.d.ts +78 -155
package/dist/controller/views.js +61 -12
package/dist/dom/history-tree-processor/service.d.ts +5 -0
package/dist/dom/history-tree-processor/service.js +169 -14
package/dist/dom/history-tree-processor/view.d.ts +7 -1
package/dist/dom/history-tree-processor/view.js +10 -1
package/dist/dom/markdown-extractor.d.ts +37 -0
package/dist/dom/markdown-extractor.js +345 -0
package/dist/dom/service.d.ts +3 -1
package/dist/dom/service.js +76 -0
package/dist/dom/views.d.ts +1 -0
package/dist/dom/views.js +45 -0
package/dist/event-bus.d.ts +107 -7
package/dist/event-bus.js +313 -10
package/dist/exceptions.d.ts +0 -3
package/dist/exceptions.js +0 -7
package/dist/filesystem/file-system.d.ts +18 -0
package/dist/filesystem/file-system.js +503 -42
package/dist/index.d.ts +7 -0
package/dist/index.js +6 -0
package/dist/integrations/gmail/actions.d.ts +3 -3
package/dist/integrations/gmail/actions.js +4 -4
package/dist/llm/anthropic/chat.d.ts +18 -1
package/dist/llm/anthropic/chat.js +123 -55
package/dist/llm/anthropic/serializer.d.ts +2 -0
package/dist/llm/anthropic/serializer.js +81 -9
package/dist/llm/aws/chat-anthropic.d.ts +17 -0
package/dist/llm/aws/chat-anthropic.js +126 -26
package/dist/llm/aws/chat-bedrock.d.ts +28 -1
package/dist/llm/aws/chat-bedrock.js +161 -34
package/dist/llm/aws/serializer.d.ts +13 -1
package/dist/llm/aws/serializer.js +56 -17
package/dist/llm/azure/chat.d.ts +53 -2
package/dist/llm/azure/chat.js +366 -54
package/dist/llm/base.d.ts +2 -0
package/dist/llm/browser-use/chat.d.ts +40 -0
package/dist/llm/browser-use/chat.js +305 -0
package/dist/llm/browser-use/index.d.ts +1 -0
package/dist/llm/browser-use/index.js +1 -0
package/dist/llm/cerebras/chat.d.ts +39 -0
package/dist/llm/cerebras/chat.js +178 -0
package/dist/llm/cerebras/index.d.ts +2 -0
package/dist/llm/cerebras/index.js +2 -0
package/dist/llm/cerebras/serializer.d.ts +7 -0
package/dist/llm/cerebras/serializer.js +82 -0
package/dist/llm/deepseek/chat.d.ts +19 -2
package/dist/llm/deepseek/chat.js +138 -25
package/dist/llm/google/chat.d.ts +46 -2
package/dist/llm/google/chat.js +267 -64
package/dist/llm/google/serializer.d.ts +9 -1
package/dist/llm/google/serializer.js +141 -34
package/dist/llm/groq/chat.d.ts +21 -2
package/dist/llm/groq/chat.js +125 -26
package/dist/llm/groq/parser.js +3 -1
package/dist/llm/mistral/chat.d.ts +43 -0
package/dist/llm/mistral/chat.js +154 -0
package/dist/llm/mistral/index.d.ts +2 -0
package/dist/llm/mistral/index.js +2 -0
package/dist/llm/mistral/schema.d.ts +8 -0
package/dist/llm/mistral/schema.js +27 -0
package/dist/llm/models.d.ts +2 -0
package/dist/llm/models.js +317 -0
package/dist/llm/ollama/chat.d.ts +13 -1
package/dist/llm/ollama/chat.js +110 -19
package/dist/llm/ollama/serializer.d.ts +1 -0
package/dist/llm/ollama/serializer.js +34 -12
package/dist/llm/openai/chat.d.ts +16 -0
package/dist/llm/openai/chat.js +94 -44
package/dist/llm/openai/like.d.ts +5 -3
package/dist/llm/openai/like.js +7 -3
package/dist/llm/openai/responses-serializer.d.ts +18 -0
package/dist/llm/openai/responses-serializer.js +72 -0
package/dist/llm/openrouter/chat.d.ts +28 -2
package/dist/llm/openrouter/chat.js +115 -29
package/dist/llm/schema.d.ts +11 -1
package/dist/llm/schema.js +81 -1
package/dist/llm/vercel/chat.d.ts +50 -0
package/dist/llm/vercel/chat.js +276 -0
package/dist/llm/vercel/index.d.ts +1 -0
package/dist/llm/vercel/index.js +1 -0
package/dist/llm/vercel/serializer.d.ts +5 -0
package/dist/llm/vercel/serializer.js +7 -0
package/dist/llm/views.d.ts +2 -1
package/dist/llm/views.js +3 -1
package/dist/logging-config.d.ts +2 -0
package/dist/logging-config.js +82 -29
package/dist/mcp/client.d.ts +10 -5
package/dist/mcp/client.js +14 -9
package/dist/mcp/controller.d.ts +42 -3
package/dist/mcp/controller.js +56 -31
package/dist/mcp/server.d.ts +14 -0
package/dist/mcp/server.js +255 -52
package/dist/observability.js +10 -4
package/dist/sandbox/index.d.ts +2 -0
package/dist/sandbox/index.js +2 -0
package/dist/sandbox/sandbox.d.ts +19 -0
package/dist/sandbox/sandbox.js +140 -0
package/dist/sandbox/views.d.ts +67 -0
package/dist/sandbox/views.js +121 -0
package/dist/skill-cli/index.d.ts +3 -0
package/dist/skill-cli/index.js +3 -0
package/dist/skill-cli/protocol.d.ts +30 -0
package/dist/skill-cli/protocol.js +48 -0
package/dist/skill-cli/server.d.ts +11 -0
package/dist/skill-cli/server.js +85 -0
package/dist/skill-cli/sessions.d.ts +24 -0
package/dist/skill-cli/sessions.js +47 -0
package/dist/skills/index.d.ts +3 -0
package/dist/skills/index.js +3 -0
package/dist/skills/service.d.ts +27 -0
package/dist/skills/service.js +266 -0
package/dist/skills/utils.d.ts +6 -0
package/dist/skills/utils.js +53 -0
package/dist/skills/views.d.ts +40 -0
package/dist/skills/views.js +10 -0
package/dist/sync/auth.js +8 -3
package/dist/sync/service.d.ts +6 -6
package/dist/sync/service.js +54 -89
package/dist/telemetry/views.d.ts +20 -6
package/dist/telemetry/views.js +23 -5
package/dist/tokens/custom-pricing.d.ts +2 -0
package/dist/tokens/custom-pricing.js +22 -0
package/dist/tokens/index.d.ts +2 -0
package/dist/tokens/index.js +2 -0
package/dist/tokens/mappings.d.ts +1 -0
package/dist/tokens/mappings.js +3 -0
package/dist/tokens/service.js +27 -8
package/dist/tools/extraction/index.d.ts +2 -0
package/dist/tools/extraction/index.js +2 -0
package/dist/tools/extraction/schema-utils.d.ts +6 -0
package/dist/tools/extraction/schema-utils.js +237 -0
package/dist/tools/extraction/views.d.ts +7 -0
package/dist/tools/index.d.ts +5 -0
package/dist/tools/index.js +5 -0
package/dist/tools/registry/index.d.ts +2 -0
package/dist/tools/registry/index.js +2 -0
package/dist/tools/registry/service.d.ts +1 -0
package/dist/tools/registry/service.js +1 -0
package/dist/tools/registry/views.d.ts +1 -0
package/dist/tools/registry/views.js +1 -0
package/dist/tools/service.d.ts +2 -0
package/dist/tools/service.js +1 -0
package/dist/tools/utils.d.ts +2 -0
package/dist/tools/utils.js +57 -0
package/dist/tools/views.d.ts +1 -0
package/dist/tools/views.js +1 -0
package/dist/utils.d.ts +10 -1
package/dist/utils.js +70 -3
package/package.json +87 -26
package/dist/dom/playground/process-dom.js +0 -5
package/dist/dom/playground/test-accessibility.d.ts +0 -44
package/dist/dom/playground/test-accessibility.js +0 -111
/package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0

package/dist/llm/deepseek/chat.js CHANGED Viewed

@@ -1,15 +1,30 @@
 import OpenAI from 'openai';
+import { ModelProviderError, ModelRateLimitError } from '../exceptions.js';
+import { SchemaOptimizer, zodSchemaToJsonSchema } from '../schema.js';
 import { ChatInvokeCompletion } from '../views.js';
 import { DeepSeekMessageSerializer } from './serializer.js';
 export class ChatDeepSeek {
     model;
     provider = 'deepseek';
     client;
-    constructor(model = 'deepseek-chat') {
+    temperature;
+    maxTokens;
+    topP;
+    seed;
+    constructor(options = {}) {
+        const normalizedOptions = typeof options === 'string' ? { model: options } : options;
+        const { model = 'deepseek-chat', apiKey = process.env.DEEPSEEK_API_KEY, baseURL = 'https://api.deepseek.com/v1', timeout = null, clientParams = null, temperature = null, maxTokens = null, topP = null, seed = null, maxRetries = 10, } = normalizedOptions;
         this.model = model;
+        this.temperature = temperature;
+        this.maxTokens = maxTokens;
+        this.topP = topP;
+        this.seed = seed;
         this.client = new OpenAI({
-            apiKey: process.env.DEEPSEEK_API_KEY,
-            baseURL: 'https://api.deepseek.com',
+            apiKey,
+            baseURL,
+            ...(timeout !== null ? { timeout } : {}),
+            maxRetries,
+            ...(clientParams ?? {}),
         });
     }
     get name() {
@@ -18,34 +33,132 @@ export class ChatDeepSeek {
     get model_name() {
         return this.model;
     }
+    getUsage(response) {
+        if (!response.usage) {
+            return null;
+        }
+        return {
+            prompt_tokens: response.usage.prompt_tokens,
+            prompt_cached_tokens: response.usage.prompt_tokens_details?.cached_tokens ?? null,
+            prompt_cache_creation_tokens: null,
+            prompt_image_tokens: null,
+            completion_tokens: response.usage.completion_tokens,
+            total_tokens: response.usage.total_tokens,
+        };
+    }
     async ainvoke(messages, output_format, options = {}) {
         const serializer = new DeepSeekMessageSerializer();
         const deepseekMessages = serializer.serialize(messages);
-        let responseFormat = undefined;
-        if (output_format && 'schema' in output_format && output_format.schema) {
-            // DeepSeek supports json_object
-            responseFormat = { type: 'json_object' };
+        const modelParams = {};
+        if (this.temperature !== null) {
+            modelParams.temperature = this.temperature;
+        }
+        if (this.maxTokens !== null) {
+            modelParams.max_tokens = this.maxTokens;
         }
-        const response = await this.client.chat.completions.create({
-            model: this.model,
-            messages: deepseekMessages,
-            response_format: responseFormat,
-        }, options.signal ? { signal: options.signal } : undefined);
-        const content = response.choices[0].message.content || '';
-        let completion = content;
-        if (output_format) {
-            try {
-                completion = output_format.parse(JSON.parse(content));
+        if (this.topP !== null) {
+            modelParams.top_p = this.topP;
+        }
+        if (this.seed !== null) {
+            modelParams.seed = this.seed;
+        }
+        const zodSchemaCandidate = (() => {
+            const output = output_format;
+            if (output &&
+                typeof output === 'object' &&
+                typeof output.safeParse === 'function' &&
+                typeof output.parse === 'function') {
+                return output;
+            }
+            if (output &&
+                typeof output === 'object' &&
+                output.schema &&
+                typeof output.schema.safeParse === 'function' &&
+                typeof output.schema.parse === 'function') {
+                return output.schema;
             }
-            catch (e) {
-                console.error('Failed to parse completion', e);
-                throw e;
+            return null;
+        })();
+        try {
+            if (output_format && zodSchemaCandidate) {
+                const rawSchema = zodSchemaToJsonSchema(zodSchemaCandidate, {
+                    name: 'response',
+                    target: 'jsonSchema7',
+                });
+                const optimizedSchema = SchemaOptimizer.createOptimizedJsonSchema(rawSchema);
+                delete optimizedSchema.title;
+                const response = await this.client.chat.completions.create({
+                    model: this.model,
+                    messages: deepseekMessages,
+                    tools: [
+                        {
+                            type: 'function',
+                            function: {
+                                name: 'response',
+                                description: 'Return a JSON object of type response',
+                                parameters: optimizedSchema,
+                            },
+                        },
+                    ],
+                    tool_choice: {
+                        type: 'function',
+                        function: { name: 'response' },
+                    },
+                    ...modelParams,
+                }, options.signal ? { signal: options.signal } : undefined);
+                const usage = this.getUsage(response);
+                const stopReason = response.choices[0].finish_reason ?? null;
+                const toolCalls = response.choices[0].message.tool_calls;
+                if (!toolCalls?.length) {
+                    throw new ModelProviderError('Expected tool_calls in response but got none', 502, this.model);
+                }
+                const rawArguments = toolCalls[0]?.function?.arguments;
+                const parsedArguments = typeof rawArguments === 'string'
+                    ? JSON.parse(rawArguments)
+                    : rawArguments;
+                const output = output_format;
+                const completion = output &&
+                    typeof output === 'object' &&
+                    output.schema &&
+                    typeof output.schema.parse === 'function'
+                    ? output.schema.parse(parsedArguments)
+                    : output.parse(parsedArguments);
+                return new ChatInvokeCompletion(completion, usage, null, null, stopReason);
             }
+            const responseFormat = output_format ? { type: 'json_object' } : undefined;
+            const response = await this.client.chat.completions.create({
+                model: this.model,
+                messages: deepseekMessages,
+                response_format: responseFormat,
+                ...modelParams,
+            }, options.signal ? { signal: options.signal } : undefined);
+            const content = response.choices[0].message.content || '';
+            const usage = this.getUsage(response);
+            const stopReason = response.choices[0].finish_reason ?? null;
+            let completion = content;
+            if (output_format) {
+                const parsedJson = JSON.parse(content);
+                const output = output_format;
+                if (output &&
+                    typeof output === 'object' &&
+                    output.schema &&
+                    typeof output.schema.parse === 'function') {
+                    completion = output.schema.parse(parsedJson);
+                }
+                else {
+                    completion = output_format.parse(parsedJson);
+                }
+            }
+            return new ChatInvokeCompletion(completion, usage, null, null, stopReason);
+        }
+        catch (error) {
+            if (error?.status === 429) {
+                throw new ModelRateLimitError(error?.message ?? 'Rate limit exceeded', 429, this.model);
+            }
+            if (error?.status >= 500) {
+                throw new ModelProviderError(error?.message ?? 'Server error', error.status, this.model);
+            }
+            throw new ModelProviderError(error?.message ?? String(error), error?.status ?? 500, this.model);
         }
-        return new ChatInvokeCompletion(completion, {
-            prompt_tokens: response.usage?.prompt_tokens ?? 0,
-            completion_tokens: response.usage?.completion_tokens ?? 0,
-            total_tokens: response.usage?.total_tokens ?? 0,
-        });
     }
 }

package/dist/llm/google/chat.d.ts CHANGED Viewed

@@ -1,18 +1,62 @@
 import type { BaseChatModel, ChatInvokeOptions } from '../base.js';
 import { ChatInvokeCompletion } from '../views.js';
-import { type Message } from '../messages.js';
+import type { Message } from '../messages.js';
+export interface ChatGoogleOptions {
+    model?: string;
+    apiKey?: string;
+    apiVersion?: string;
+    baseUrl?: string;
+    vertexai?: boolean;
+    vertexAi?: boolean;
+    project?: string;
+    location?: string;
+    httpOptions?: Record<string, unknown>;
+    googleAuthOptions?: Record<string, unknown>;
+    credentials?: Record<string, unknown>;
+    temperature?: number | null;
+    topP?: number | null;
+    seed?: number | null;
+    thinkingBudget?: number | null;
+    thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | null;
+    maxOutputTokens?: number | null;
+    config?: Record<string, unknown> | null;
+    includeSystemInUser?: boolean;
+    supportsStructuredOutput?: boolean;
+    maxRetries?: number;
+    retryableStatusCodes?: number[];
+    retryBaseDelay?: number;
+    retryMaxDelay?: number;
+}
 export declare class ChatGoogle implements BaseChatModel {
     model: string;
     provider: string;
     private client;
-    constructor(model?: string);
+    private temperature;
+    private topP;
+    private seed;
+    private thinkingBudget;
+    private thinkingLevel;
+    private maxOutputTokens;
+    private config;
+    private includeSystemInUser;
+    private supportsStructuredOutput;
+    private maxRetries;
+    private retryableStatusCodes;
+    private retryBaseDelay;
+    private retryMaxDelay;
+    constructor(options?: string | ChatGoogleOptions);
     get name(): string;
     get model_name(): string;
+    private getUsage;
     /**
      * Clean up JSON schema for Google's format
      * Google API has specific requirements for responseSchema
      */
     private _cleanSchemaForGoogle;
+    private _parseStructuredJson;
+    private _extractStatusCode;
+    private _toModelProviderError;
+    private _sleep;
     ainvoke(messages: Message[], output_format?: undefined, options?: ChatInvokeOptions): Promise<ChatInvokeCompletion<string>>;
     ainvoke<T>(messages: Message[], output_format: {
         parse: (input: string) => T;

package/dist/llm/google/chat.js CHANGED Viewed

@@ -1,21 +1,62 @@
 import { GoogleGenAI } from '@google/genai';
-import { zodToJsonSchema } from 'zod-to-json-schema';
+import { ModelProviderError } from '../exceptions.js';
 import { ChatInvokeCompletion } from '../views.js';
-import { SystemMessage } from '../messages.js';
+import { SchemaOptimizer, zodSchemaToJsonSchema } from '../schema.js';
 import { GoogleMessageSerializer } from './serializer.js';
 export class ChatGoogle {
     model;
     provider = 'google';
     client;
-    constructor(model = 'gemini-2.5-flash') {
+    temperature;
+    topP;
+    seed;
+    thinkingBudget;
+    thinkingLevel;
+    maxOutputTokens;
+    config;
+    includeSystemInUser;
+    supportsStructuredOutput;
+    maxRetries;
+    retryableStatusCodes;
+    retryBaseDelay;
+    retryMaxDelay;
+    constructor(options = {}) {
+        const normalizedOptions = typeof options === 'string' ? { model: options } : options;
+        const { model = 'gemini-2.5-flash', apiKey = process.env.GOOGLE_API_KEY, apiVersion = process.env.GOOGLE_API_VERSION, baseUrl = process.env.GOOGLE_API_BASE_URL, vertexai, vertexAi, project, location, httpOptions, googleAuthOptions, credentials, temperature = 0.5, topP = null, seed = null, thinkingBudget = null, thinkingLevel = null, maxOutputTokens = 8096, config = null, includeSystemInUser = false, supportsStructuredOutput = true, maxRetries = 5, retryableStatusCodes = [429, 500, 502, 503, 504], retryBaseDelay = 1.0, retryMaxDelay = 60.0, } = normalizedOptions;
         this.model = model;
-        const apiVersion = process.env.GOOGLE_API_VERSION || 'v1';
-        const baseUrl = process.env.GOOGLE_API_BASE_URL;
-        this.client = new GoogleGenAI({
-            apiKey: process.env.GOOGLE_API_KEY || '',
+        this.temperature = temperature;
+        this.topP = topP;
+        this.seed = seed;
+        this.thinkingBudget = thinkingBudget;
+        this.thinkingLevel = thinkingLevel;
+        this.maxOutputTokens = maxOutputTokens;
+        this.config = config ? { ...config } : null;
+        this.includeSystemInUser = includeSystemInUser;
+        this.supportsStructuredOutput = supportsStructuredOutput;
+        this.maxRetries = Math.max(1, maxRetries);
+        this.retryableStatusCodes = [...retryableStatusCodes];
+        this.retryBaseDelay = retryBaseDelay;
+        this.retryMaxDelay = retryMaxDelay;
+        const resolvedGoogleAuthOptions = credentials == null
+            ? googleAuthOptions
+            : {
+                ...(googleAuthOptions ?? {}),
+                credentials,
+            };
+        const resolvedVertexAi = vertexai ?? vertexAi;
+        const clientOptions = {
+            ...(apiKey != null ? { apiKey } : {}),
             ...(baseUrl ? { baseUrl } : {}),
             ...(apiVersion ? { apiVersion } : {}),
-        });
+            ...(resolvedVertexAi != null ? { vertexai: resolvedVertexAi } : {}),
+            ...(project ? { project } : {}),
+            ...(location ? { location } : {}),
+            ...(httpOptions ? { httpOptions } : {}),
+            ...(resolvedGoogleAuthOptions
+                ? { googleAuthOptions: resolvedGoogleAuthOptions }
+                : {}),
+        };
+        this.client = new GoogleGenAI(clientOptions);
     }
     get name() {
         return this.model;
@@ -23,6 +64,33 @@ export class ChatGoogle {
     get model_name() {
         return this.model;
     }
+    getUsage(result) {
+        const usage = result?.usageMetadata;
+        if (!usage) {
+            return null;
+        }
+        let imageTokens = 0;
+        const promptTokenDetails = Array.isArray(usage.promptTokensDetails)
+            ? usage.promptTokensDetails
+            : [];
+        for (const detail of promptTokenDetails) {
+            if (String(detail?.modality ?? '').toUpperCase() === 'IMAGE') {
+                imageTokens += Number(detail?.tokenCount ?? 0) || 0;
+            }
+        }
+        const completionTokens = (Number(usage.candidatesTokenCount ?? 0) || 0) +
+            (Number(usage.thoughtsTokenCount ?? 0) || 0);
+        return {
+            prompt_tokens: Number(usage.promptTokenCount ?? 0) || 0,
+            prompt_cached_tokens: usage.cachedContentTokenCount == null
+                ? null
+                : Number(usage.cachedContentTokenCount),
+            prompt_cache_creation_tokens: null,
+            prompt_image_tokens: imageTokens,
+            completion_tokens: completionTokens,
+            total_tokens: Number(usage.totalTokenCount ?? 0) || 0,
+        };
+    }
     /**
      * Clean up JSON schema for Google's format
      * Google API has specific requirements for responseSchema
@@ -43,6 +111,10 @@ export class ChatGoogle {
             if (key === 'properties' && typeof value === 'object') {
                 cleaned.properties = {};
                 for (const [propKey, propValue] of Object.entries(value)) {
+                    // Align python: hide programmatic extraction schema field from LLM JSON schema.
+                    if (propKey === 'output_schema') {
+                        continue;
+                    }
                     cleaned.properties[propKey] = this._cleanSchemaForGoogle(propValue);
                 }
             }
@@ -56,42 +128,175 @@ export class ChatGoogle {
                 cleaned[key] = value;
             }
         }
+        const schemaType = String(cleaned.type ?? '').toUpperCase();
+        if (schemaType === 'OBJECT' &&
+            cleaned.properties &&
+            typeof cleaned.properties === 'object' &&
+            !Array.isArray(cleaned.properties) &&
+            Object.keys(cleaned.properties).length === 0) {
+            cleaned.properties = {
+                _placeholder: { type: 'string' },
+            };
+        }
+        if (Array.isArray(cleaned.required) &&
+            cleaned.properties &&
+            typeof cleaned.properties === 'object' &&
+            !Array.isArray(cleaned.properties)) {
+            const validKeys = new Set(Object.keys(cleaned.properties));
+            cleaned.required = cleaned.required.filter((name) => typeof name === 'string' && validKeys.has(name));
+        }
         return cleaned;
     }
+    _parseStructuredJson(text) {
+        let jsonText = String(text ?? '').trim();
+        const fencedMatch = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/i);
+        if (fencedMatch && fencedMatch[1]) {
+            jsonText = fencedMatch[1].trim();
+        }
+        const firstBrace = jsonText.indexOf('{');
+        const lastBrace = jsonText.lastIndexOf('}');
+        if (firstBrace === -1 || lastBrace === -1 || lastBrace <= firstBrace) {
+            throw new Error(`Expected JSON response but got plain text: "${jsonText.slice(0, 50)}..."`);
+        }
+        return JSON.parse(jsonText.slice(firstBrace, lastBrace + 1));
+    }
+    _extractStatusCode(error) {
+        const directStatus = Number(error?.status ??
+            error?.statusCode ??
+            error?.response?.status ??
+            error?.response?.statusCode);
+        if (Number.isFinite(directStatus)) {
+            return directStatus;
+        }
+        const message = String(error?.message ?? error ?? '').toLowerCase();
+        if (/(rate limit|resource exhausted|quota exceeded|too many requests|429)/.test(message)) {
+            return 429;
+        }
+        if (/(service unavailable|internal server error|bad gateway|503|502|500)/.test(message)) {
+            return 503;
+        }
+        if (/(forbidden|403)/.test(message)) {
+            return 403;
+        }
+        if (/(timeout|timed out|cancelled|canceled)/.test(message)) {
+            return 504;
+        }
+        return null;
+    }
+    _toModelProviderError(error) {
+        if (error instanceof ModelProviderError) {
+            return error;
+        }
+        return new ModelProviderError(error?.message ?? String(error), this._extractStatusCode(error) ?? 502, this.model);
+    }
+    async _sleep(ms) {
+        await new Promise((resolve) => setTimeout(resolve, ms));
+    }
     async ainvoke(messages, output_format, options = {}) {
         const serializer = new GoogleMessageSerializer();
-        const contents = serializer.serialize(messages);
-        const systemMessage = messages.find((msg) => msg instanceof SystemMessage);
-        const systemInstruction = systemMessage ? systemMessage.text : undefined;
-        let tools = undefined;
-        let toolConfig = undefined;
-        // For Google, we need to be more explicit about JSON output
-        // The generationConfig with responseSchema helps enforce JSON structure
-        const generationConfig = {
-            responseMimeType: 'application/json',
-        };
+        const { contents, systemInstruction } = serializer.serializeWithSystem(messages, this.includeSystemInUser);
+        const generationConfig = this.config ? { ...this.config } : {};
+        if (this.temperature !== null) {
+            generationConfig.temperature = this.temperature;
+        }
+        if (this.topP !== null) {
+            generationConfig.topP = this.topP;
+        }
+        if (this.seed !== null) {
+            generationConfig.seed = this.seed;
+        }
+        const isGemini3Pro = this.model.includes('gemini-3-pro');
+        const isGemini3Flash = this.model.includes('gemini-3-flash');
+        if (isGemini3Pro) {
+            let level = this.thinkingLevel ?? 'low';
+            if (level === 'minimal' || level === 'medium') {
+                level = 'low';
+            }
+            generationConfig.thinkingConfig = {
+                thinkingLevel: level.toUpperCase(),
+            };
+        }
+        else if (isGemini3Flash) {
+            if (this.thinkingLevel !== null) {
+                generationConfig.thinkingConfig = {
+                    thinkingLevel: this.thinkingLevel.toUpperCase(),
+                };
+            }
+            else {
+                generationConfig.thinkingConfig = {
+                    thinkingBudget: this.thinkingBudget === null ? -1 : this.thinkingBudget,
+                };
+            }
+        }
+        else {
+            let budget = this.thinkingBudget;
+            if (budget === null &&
+                (this.model.includes('gemini-2.5') ||
+                    this.model.includes('gemini-flash'))) {
+                budget = -1;
+            }
+            if (budget !== null) {
+                generationConfig.thinkingConfig = { thinkingBudget: budget };
+            }
+        }
+        if (this.maxOutputTokens !== null) {
+            generationConfig.maxOutputTokens = this.maxOutputTokens;
+        }
         // Try to get schema from output_format
-        const schemaForJson = output_format &&
-            'schema' in output_format &&
-            output_format.schema
-            ? output_format.schema
-            : null;
+        const schemaForJson = (() => {
+            const output = output_format;
+            if (output &&
+                typeof output === 'object' &&
+                typeof output.safeParse === 'function' &&
+                typeof output.parse === 'function') {
+                return output;
+            }
+            if (output &&
+                typeof output === 'object' &&
+                output.schema &&
+                typeof output.schema.safeParse === 'function' &&
+                typeof output.schema.parse === 'function') {
+                return output.schema;
+            }
+            return null;
+        })();
+        let cleanSchemaForJson = null;
         if (schemaForJson) {
             try {
-                const jsonSchema = zodToJsonSchema(schemaForJson);
-                // Clean up the schema for Google's format
-                const cleanSchema = this._cleanSchemaForGoogle(jsonSchema);
-                generationConfig.responseSchema = cleanSchema;
+                const jsonSchema = zodSchemaToJsonSchema(schemaForJson);
+                const optimizedSchema = SchemaOptimizer.createGeminiOptimizedSchema(jsonSchema);
+                cleanSchemaForJson = this._cleanSchemaForGoogle(optimizedSchema);
             }
-            catch (e) {
-                console.warn('Failed to set responseSchema', e);
+            catch {
+                cleanSchemaForJson = null;
+            }
+        }
+        if (cleanSchemaForJson && this.supportsStructuredOutput) {
+            generationConfig.responseMimeType = 'application/json';
+            generationConfig.responseSchema = cleanSchemaForJson;
+        }
+        const requestContents = contents.map((entry) => ({
+            ...entry,
+            parts: Array.isArray(entry?.parts)
+                ? entry.parts.map((part) => ({ ...part }))
+                : entry?.parts,
+        }));
+        if (output_format && cleanSchemaForJson && !this.supportsStructuredOutput) {
+            const jsonInstruction = '\n\nPlease respond with a valid JSON object that matches this schema: ' +
+                JSON.stringify(cleanSchemaForJson);
+            for (let i = requestContents.length - 1; i >= 0; i -= 1) {
+                const content = requestContents[i];
+                if (content?.role === 'user' && Array.isArray(content?.parts)) {
+                    content.parts = [...content.parts, { text: jsonInstruction }];
+                    break;
+                }
             }
         }
         const request = {
             model: this.model,
-            contents,
+            contents: requestContents,
         };
-        if (systemInstruction) {
+        if (systemInstruction && !this.includeSystemInUser) {
             request.systemInstruction = {
                 role: 'system',
                 parts: [{ text: systemInstruction }],
@@ -100,47 +305,45 @@ export class ChatGoogle {
         if (Object.keys(generationConfig).length > 0) {
             request.generationConfig = generationConfig;
         }
-        const result = await this.client.models.generateContent(request, options.signal ? { signal: options.signal } : undefined);
-        // Extract text from first candidate
-        const candidate = result.candidates?.[0];
-        const textParts = candidate?.content?.parts?.filter((p) => p.text) || [];
-        const text = textParts.map((p) => p.text).join('');
-        let completion = text;
-        if (output_format) {
+        for (let attempt = 0; attempt < this.maxRetries; attempt += 1) {
             try {
+                const result = await this.client.models.generateContent(request, options.signal ? { signal: options.signal } : undefined);
+                const candidate = result.candidates?.[0];
+                const textParts = candidate?.content?.parts?.filter((p) => p.text) || [];
+                const text = textParts.map((p) => p.text).join('');
+                let completion = text;
+                const stopReason = result?.candidates?.[0]?.finishReason ?? null;
                 let parsed = text;
-                if (generationConfig.responseMimeType === 'application/json') {
-                    let jsonText = text.trim();
-                    // Handle markdown code fences like ```json ... ```
-                    const fencedMatch = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/i);
-                    if (fencedMatch && fencedMatch[1]) {
-                        jsonText = fencedMatch[1].trim();
-                    }
-                    // Try to extract JSON object from text
-                    const firstBrace = jsonText.indexOf('{');
-                    const lastBrace = jsonText.lastIndexOf('}');
-                    if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
-                        jsonText = jsonText.slice(firstBrace, lastBrace + 1);
+                if (output_format && schemaForJson) {
+                    parsed = this._parseStructuredJson(text);
+                }
+                if (output_format) {
+                    const output = output_format;
+                    if (schemaForJson &&
+                        output &&
+                        typeof output === 'object' &&
+                        output.schema &&
+                        typeof output.schema.parse === 'function') {
+                        completion = output.schema.parse(parsed);
                     }
                     else {
-                        // If no JSON object found, the model returned plain text
-                        // Try to wrap it in a minimal valid structure
-                        console.warn('Google LLM returned plain text instead of JSON. Raw response:', text.slice(0, 200));
-                        throw new Error(`Expected JSON response but got plain text: "${text.slice(0, 50)}..."`);
+                        completion = output.parse(parsed);
                     }
-                    parsed = JSON.parse(jsonText);
                 }
-                completion = output_format.parse(parsed);
+                return new ChatInvokeCompletion(completion, this.getUsage(result), null, null, stopReason);
             }
-            catch (e) {
-                console.error('Failed to parse completion', e);
-                throw e;
+            catch (error) {
+                const providerError = this._toModelProviderError(error);
+                const shouldRetry = this.retryableStatusCodes.includes(providerError.statusCode) &&
+                    attempt < this.maxRetries - 1;
+                if (!shouldRetry) {
+                    throw providerError;
+                }
+                const delaySeconds = Math.min(this.retryBaseDelay * 2 ** attempt, this.retryMaxDelay);
+                const jitter = Math.random() * delaySeconds * 0.1;
+                await this._sleep((delaySeconds + jitter) * 1000);
             }
         }
-        return new ChatInvokeCompletion(completion, {
-            prompt_tokens: result.usageMetadata?.promptTokenCount ?? 0,
-            completion_tokens: result.usageMetadata?.candidatesTokenCount ?? 0,
-            total_tokens: result.usageMetadata?.totalTokenCount ?? 0,
-        });
+        throw new ModelProviderError('Retry loop completed without response', 500, this.model);
     }
 }

package/dist/llm/google/serializer.d.ts CHANGED Viewed

@@ -1,6 +1,14 @@
 import type { Content } from '@google/genai';
 import { type Message } from '../messages.js';
+export interface SerializedGoogleMessages {
+    contents: Content[];
+    systemInstruction: string | null;
+}
 export declare class GoogleMessageSerializer {
     serialize(messages: Message[]): Content[];
-    private serializeMessage;
+    serializeWithSystem(messages: Message[], includeSystemInUser?: boolean): SerializedGoogleMessages;
+    private serializeUserMessage;
+    private serializeAssistantMessage;
+    private serializeImagePart;
+    private extractMessageText;
 }