browser-use 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/README.md +295 -686
  2. package/dist/actor/element.d.ts +19 -0
  3. package/dist/actor/element.js +46 -0
  4. package/dist/actor/index.d.ts +4 -0
  5. package/dist/actor/index.js +4 -0
  6. package/dist/actor/mouse.d.ts +19 -0
  7. package/dist/actor/mouse.js +39 -0
  8. package/dist/actor/page.d.ts +29 -0
  9. package/dist/actor/page.js +88 -0
  10. package/dist/actor/utils.d.ts +4 -0
  11. package/dist/actor/utils.js +35 -0
  12. package/dist/agent/cloud-events.d.ts +18 -0
  13. package/dist/agent/cloud-events.js +65 -2
  14. package/dist/agent/gif.d.ts +1 -0
  15. package/dist/agent/gif.js +24 -2
  16. package/dist/agent/judge.d.ts +17 -0
  17. package/dist/agent/judge.js +197 -0
  18. package/dist/agent/message-manager/service.d.ts +12 -4
  19. package/dist/agent/message-manager/service.js +205 -39
  20. package/dist/agent/message-manager/utils.js +0 -1
  21. package/dist/agent/message-manager/views.d.ts +4 -0
  22. package/dist/agent/message-manager/views.js +11 -7
  23. package/dist/agent/prompts.d.ts +24 -3
  24. package/dist/agent/prompts.js +274 -59
  25. package/dist/agent/service.d.ts +99 -41
  26. package/dist/agent/service.js +2266 -472
  27. package/dist/agent/variable-detector.d.ts +12 -0
  28. package/dist/agent/variable-detector.js +211 -0
  29. package/dist/agent/views.d.ts +237 -18
  30. package/dist/agent/views.js +446 -33
  31. package/dist/browser/cloud/cloud.d.ts +20 -0
  32. package/dist/browser/cloud/cloud.js +129 -0
  33. package/dist/browser/cloud/index.d.ts +2 -0
  34. package/dist/browser/cloud/index.js +2 -0
  35. package/dist/browser/cloud/views.d.ts +41 -0
  36. package/dist/browser/cloud/views.js +35 -0
  37. package/dist/browser/events.d.ts +345 -0
  38. package/dist/browser/events.js +566 -0
  39. package/dist/browser/extensions.js +17 -17
  40. package/dist/browser/index.d.ts +4 -0
  41. package/dist/browser/index.js +4 -0
  42. package/dist/browser/profile.d.ts +8 -2
  43. package/dist/browser/profile.js +79 -12
  44. package/dist/browser/session-manager.d.ts +85 -0
  45. package/dist/browser/session-manager.js +208 -0
  46. package/dist/browser/session.d.ts +100 -8
  47. package/dist/browser/session.js +1097 -58
  48. package/dist/browser/types.d.ts +0 -2
  49. package/dist/browser/views.d.ts +39 -0
  50. package/dist/browser/views.js +32 -0
  51. package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
  52. package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
  53. package/dist/browser/watchdogs/base.d.ts +21 -0
  54. package/dist/browser/watchdogs/base.js +81 -0
  55. package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
  56. package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
  57. package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
  58. package/dist/browser/watchdogs/crash-watchdog.js +296 -0
  59. package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
  60. package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
  61. package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
  62. package/dist/browser/watchdogs/dom-watchdog.js +31 -0
  63. package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
  64. package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
  65. package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
  66. package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
  67. package/dist/browser/watchdogs/index.d.ts +15 -0
  68. package/dist/browser/watchdogs/index.js +15 -0
  69. package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
  70. package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
  71. package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
  72. package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
  73. package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
  74. package/dist/browser/watchdogs/popups-watchdog.js +77 -0
  75. package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
  76. package/dist/browser/watchdogs/recording-watchdog.js +249 -0
  77. package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
  78. package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
  79. package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
  80. package/dist/browser/watchdogs/security-watchdog.js +84 -0
  81. package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
  82. package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
  83. package/dist/cli.d.ts +7 -2
  84. package/dist/cli.js +182 -25
  85. package/dist/code-use/formatting.d.ts +3 -0
  86. package/dist/code-use/formatting.js +18 -0
  87. package/dist/code-use/index.d.ts +6 -0
  88. package/dist/code-use/index.js +6 -0
  89. package/dist/code-use/namespace.d.ts +5 -0
  90. package/dist/code-use/namespace.js +81 -0
  91. package/dist/code-use/notebook-export.d.ts +3 -0
  92. package/dist/code-use/notebook-export.js +56 -0
  93. package/dist/code-use/service.d.ts +24 -0
  94. package/dist/code-use/service.js +104 -0
  95. package/dist/code-use/utils.d.ts +4 -0
  96. package/dist/code-use/utils.js +98 -0
  97. package/dist/code-use/views.d.ts +108 -0
  98. package/dist/code-use/views.js +165 -0
  99. package/dist/config.d.ts +13 -0
  100. package/dist/config.js +69 -3
  101. package/dist/controller/registry/service.d.ts +10 -1
  102. package/dist/controller/registry/service.js +266 -10
  103. package/dist/controller/registry/views.d.ts +4 -1
  104. package/dist/controller/registry/views.js +25 -2
  105. package/dist/controller/service.d.ts +10 -1
  106. package/dist/controller/service.js +1807 -268
  107. package/dist/controller/views.d.ts +78 -155
  108. package/dist/controller/views.js +61 -12
  109. package/dist/dom/history-tree-processor/service.d.ts +5 -0
  110. package/dist/dom/history-tree-processor/service.js +169 -14
  111. package/dist/dom/history-tree-processor/view.d.ts +7 -1
  112. package/dist/dom/history-tree-processor/view.js +10 -1
  113. package/dist/dom/markdown-extractor.d.ts +37 -0
  114. package/dist/dom/markdown-extractor.js +345 -0
  115. package/dist/dom/service.d.ts +3 -1
  116. package/dist/dom/service.js +76 -0
  117. package/dist/dom/views.d.ts +1 -0
  118. package/dist/dom/views.js +45 -0
  119. package/dist/event-bus.d.ts +107 -7
  120. package/dist/event-bus.js +313 -10
  121. package/dist/exceptions.d.ts +0 -3
  122. package/dist/exceptions.js +0 -7
  123. package/dist/filesystem/file-system.d.ts +18 -0
  124. package/dist/filesystem/file-system.js +503 -42
  125. package/dist/index.d.ts +7 -0
  126. package/dist/index.js +6 -0
  127. package/dist/integrations/gmail/actions.d.ts +3 -3
  128. package/dist/integrations/gmail/actions.js +4 -4
  129. package/dist/llm/anthropic/chat.d.ts +18 -1
  130. package/dist/llm/anthropic/chat.js +123 -55
  131. package/dist/llm/anthropic/serializer.d.ts +2 -0
  132. package/dist/llm/anthropic/serializer.js +81 -9
  133. package/dist/llm/aws/chat-anthropic.d.ts +17 -0
  134. package/dist/llm/aws/chat-anthropic.js +126 -26
  135. package/dist/llm/aws/chat-bedrock.d.ts +28 -1
  136. package/dist/llm/aws/chat-bedrock.js +161 -34
  137. package/dist/llm/aws/serializer.d.ts +13 -1
  138. package/dist/llm/aws/serializer.js +56 -17
  139. package/dist/llm/azure/chat.d.ts +53 -2
  140. package/dist/llm/azure/chat.js +366 -54
  141. package/dist/llm/base.d.ts +2 -0
  142. package/dist/llm/browser-use/chat.d.ts +40 -0
  143. package/dist/llm/browser-use/chat.js +305 -0
  144. package/dist/llm/browser-use/index.d.ts +1 -0
  145. package/dist/llm/browser-use/index.js +1 -0
  146. package/dist/llm/cerebras/chat.d.ts +39 -0
  147. package/dist/llm/cerebras/chat.js +178 -0
  148. package/dist/llm/cerebras/index.d.ts +2 -0
  149. package/dist/llm/cerebras/index.js +2 -0
  150. package/dist/llm/cerebras/serializer.d.ts +7 -0
  151. package/dist/llm/cerebras/serializer.js +82 -0
  152. package/dist/llm/deepseek/chat.d.ts +19 -2
  153. package/dist/llm/deepseek/chat.js +138 -25
  154. package/dist/llm/google/chat.d.ts +46 -2
  155. package/dist/llm/google/chat.js +267 -64
  156. package/dist/llm/google/serializer.d.ts +9 -1
  157. package/dist/llm/google/serializer.js +141 -34
  158. package/dist/llm/groq/chat.d.ts +21 -2
  159. package/dist/llm/groq/chat.js +125 -26
  160. package/dist/llm/groq/parser.js +3 -1
  161. package/dist/llm/mistral/chat.d.ts +43 -0
  162. package/dist/llm/mistral/chat.js +154 -0
  163. package/dist/llm/mistral/index.d.ts +2 -0
  164. package/dist/llm/mistral/index.js +2 -0
  165. package/dist/llm/mistral/schema.d.ts +8 -0
  166. package/dist/llm/mistral/schema.js +27 -0
  167. package/dist/llm/models.d.ts +2 -0
  168. package/dist/llm/models.js +317 -0
  169. package/dist/llm/ollama/chat.d.ts +13 -1
  170. package/dist/llm/ollama/chat.js +110 -19
  171. package/dist/llm/ollama/serializer.d.ts +1 -0
  172. package/dist/llm/ollama/serializer.js +34 -12
  173. package/dist/llm/openai/chat.d.ts +16 -0
  174. package/dist/llm/openai/chat.js +94 -44
  175. package/dist/llm/openai/like.d.ts +5 -3
  176. package/dist/llm/openai/like.js +7 -3
  177. package/dist/llm/openai/responses-serializer.d.ts +18 -0
  178. package/dist/llm/openai/responses-serializer.js +72 -0
  179. package/dist/llm/openrouter/chat.d.ts +28 -2
  180. package/dist/llm/openrouter/chat.js +115 -29
  181. package/dist/llm/schema.d.ts +11 -1
  182. package/dist/llm/schema.js +81 -1
  183. package/dist/llm/vercel/chat.d.ts +50 -0
  184. package/dist/llm/vercel/chat.js +276 -0
  185. package/dist/llm/vercel/index.d.ts +1 -0
  186. package/dist/llm/vercel/index.js +1 -0
  187. package/dist/llm/vercel/serializer.d.ts +5 -0
  188. package/dist/llm/vercel/serializer.js +7 -0
  189. package/dist/llm/views.d.ts +2 -1
  190. package/dist/llm/views.js +3 -1
  191. package/dist/logging-config.d.ts +2 -0
  192. package/dist/logging-config.js +82 -29
  193. package/dist/mcp/client.d.ts +10 -5
  194. package/dist/mcp/client.js +14 -9
  195. package/dist/mcp/controller.d.ts +42 -3
  196. package/dist/mcp/controller.js +56 -31
  197. package/dist/mcp/server.d.ts +14 -0
  198. package/dist/mcp/server.js +255 -52
  199. package/dist/observability.js +10 -4
  200. package/dist/sandbox/index.d.ts +2 -0
  201. package/dist/sandbox/index.js +2 -0
  202. package/dist/sandbox/sandbox.d.ts +19 -0
  203. package/dist/sandbox/sandbox.js +140 -0
  204. package/dist/sandbox/views.d.ts +67 -0
  205. package/dist/sandbox/views.js +121 -0
  206. package/dist/skill-cli/index.d.ts +3 -0
  207. package/dist/skill-cli/index.js +3 -0
  208. package/dist/skill-cli/protocol.d.ts +30 -0
  209. package/dist/skill-cli/protocol.js +48 -0
  210. package/dist/skill-cli/server.d.ts +11 -0
  211. package/dist/skill-cli/server.js +85 -0
  212. package/dist/skill-cli/sessions.d.ts +24 -0
  213. package/dist/skill-cli/sessions.js +47 -0
  214. package/dist/skills/index.d.ts +3 -0
  215. package/dist/skills/index.js +3 -0
  216. package/dist/skills/service.d.ts +27 -0
  217. package/dist/skills/service.js +266 -0
  218. package/dist/skills/utils.d.ts +6 -0
  219. package/dist/skills/utils.js +53 -0
  220. package/dist/skills/views.d.ts +40 -0
  221. package/dist/skills/views.js +10 -0
  222. package/dist/sync/auth.js +8 -3
  223. package/dist/sync/service.d.ts +6 -6
  224. package/dist/sync/service.js +54 -89
  225. package/dist/telemetry/views.d.ts +20 -6
  226. package/dist/telemetry/views.js +23 -5
  227. package/dist/tokens/custom-pricing.d.ts +2 -0
  228. package/dist/tokens/custom-pricing.js +22 -0
  229. package/dist/tokens/index.d.ts +2 -0
  230. package/dist/tokens/index.js +2 -0
  231. package/dist/tokens/mappings.d.ts +1 -0
  232. package/dist/tokens/mappings.js +3 -0
  233. package/dist/tokens/service.js +27 -8
  234. package/dist/tools/extraction/index.d.ts +2 -0
  235. package/dist/tools/extraction/index.js +2 -0
  236. package/dist/tools/extraction/schema-utils.d.ts +6 -0
  237. package/dist/tools/extraction/schema-utils.js +237 -0
  238. package/dist/tools/extraction/views.d.ts +7 -0
  239. package/dist/tools/index.d.ts +5 -0
  240. package/dist/tools/index.js +5 -0
  241. package/dist/tools/registry/index.d.ts +2 -0
  242. package/dist/tools/registry/index.js +2 -0
  243. package/dist/tools/registry/service.d.ts +1 -0
  244. package/dist/tools/registry/service.js +1 -0
  245. package/dist/tools/registry/views.d.ts +1 -0
  246. package/dist/tools/registry/views.js +1 -0
  247. package/dist/tools/service.d.ts +2 -0
  248. package/dist/tools/service.js +1 -0
  249. package/dist/tools/utils.d.ts +2 -0
  250. package/dist/tools/utils.js +57 -0
  251. package/dist/tools/views.d.ts +1 -0
  252. package/dist/tools/views.js +1 -0
  253. package/dist/utils.d.ts +10 -1
  254. package/dist/utils.js +70 -3
  255. package/package.json +87 -26
  256. package/dist/dom/playground/process-dom.js +0 -5
  257. package/dist/dom/playground/test-accessibility.d.ts +0 -44
  258. package/dist/dom/playground/test-accessibility.js +0 -111
  259. /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
@@ -1,15 +1,30 @@
1
1
  import OpenAI from 'openai';
2
+ import { ModelProviderError, ModelRateLimitError } from '../exceptions.js';
3
+ import { SchemaOptimizer, zodSchemaToJsonSchema } from '../schema.js';
2
4
  import { ChatInvokeCompletion } from '../views.js';
3
5
  import { DeepSeekMessageSerializer } from './serializer.js';
4
6
  export class ChatDeepSeek {
5
7
  model;
6
8
  provider = 'deepseek';
7
9
  client;
8
- constructor(model = 'deepseek-chat') {
10
+ temperature;
11
+ maxTokens;
12
+ topP;
13
+ seed;
14
+ constructor(options = {}) {
15
+ const normalizedOptions = typeof options === 'string' ? { model: options } : options;
16
+ const { model = 'deepseek-chat', apiKey = process.env.DEEPSEEK_API_KEY, baseURL = 'https://api.deepseek.com/v1', timeout = null, clientParams = null, temperature = null, maxTokens = null, topP = null, seed = null, maxRetries = 10, } = normalizedOptions;
9
17
  this.model = model;
18
+ this.temperature = temperature;
19
+ this.maxTokens = maxTokens;
20
+ this.topP = topP;
21
+ this.seed = seed;
10
22
  this.client = new OpenAI({
11
- apiKey: process.env.DEEPSEEK_API_KEY,
12
- baseURL: 'https://api.deepseek.com',
23
+ apiKey,
24
+ baseURL,
25
+ ...(timeout !== null ? { timeout } : {}),
26
+ maxRetries,
27
+ ...(clientParams ?? {}),
13
28
  });
14
29
  }
15
30
  get name() {
@@ -18,34 +33,132 @@ export class ChatDeepSeek {
18
33
  get model_name() {
19
34
  return this.model;
20
35
  }
36
+ getUsage(response) {
37
+ if (!response.usage) {
38
+ return null;
39
+ }
40
+ return {
41
+ prompt_tokens: response.usage.prompt_tokens,
42
+ prompt_cached_tokens: response.usage.prompt_tokens_details?.cached_tokens ?? null,
43
+ prompt_cache_creation_tokens: null,
44
+ prompt_image_tokens: null,
45
+ completion_tokens: response.usage.completion_tokens,
46
+ total_tokens: response.usage.total_tokens,
47
+ };
48
+ }
21
49
  async ainvoke(messages, output_format, options = {}) {
22
50
  const serializer = new DeepSeekMessageSerializer();
23
51
  const deepseekMessages = serializer.serialize(messages);
24
- let responseFormat = undefined;
25
- if (output_format && 'schema' in output_format && output_format.schema) {
26
- // DeepSeek supports json_object
27
- responseFormat = { type: 'json_object' };
52
+ const modelParams = {};
53
+ if (this.temperature !== null) {
54
+ modelParams.temperature = this.temperature;
55
+ }
56
+ if (this.maxTokens !== null) {
57
+ modelParams.max_tokens = this.maxTokens;
28
58
  }
29
- const response = await this.client.chat.completions.create({
30
- model: this.model,
31
- messages: deepseekMessages,
32
- response_format: responseFormat,
33
- }, options.signal ? { signal: options.signal } : undefined);
34
- const content = response.choices[0].message.content || '';
35
- let completion = content;
36
- if (output_format) {
37
- try {
38
- completion = output_format.parse(JSON.parse(content));
59
+ if (this.topP !== null) {
60
+ modelParams.top_p = this.topP;
61
+ }
62
+ if (this.seed !== null) {
63
+ modelParams.seed = this.seed;
64
+ }
65
+ const zodSchemaCandidate = (() => {
66
+ const output = output_format;
67
+ if (output &&
68
+ typeof output === 'object' &&
69
+ typeof output.safeParse === 'function' &&
70
+ typeof output.parse === 'function') {
71
+ return output;
72
+ }
73
+ if (output &&
74
+ typeof output === 'object' &&
75
+ output.schema &&
76
+ typeof output.schema.safeParse === 'function' &&
77
+ typeof output.schema.parse === 'function') {
78
+ return output.schema;
39
79
  }
40
- catch (e) {
41
- console.error('Failed to parse completion', e);
42
- throw e;
80
+ return null;
81
+ })();
82
+ try {
83
+ if (output_format && zodSchemaCandidate) {
84
+ const rawSchema = zodSchemaToJsonSchema(zodSchemaCandidate, {
85
+ name: 'response',
86
+ target: 'jsonSchema7',
87
+ });
88
+ const optimizedSchema = SchemaOptimizer.createOptimizedJsonSchema(rawSchema);
89
+ delete optimizedSchema.title;
90
+ const response = await this.client.chat.completions.create({
91
+ model: this.model,
92
+ messages: deepseekMessages,
93
+ tools: [
94
+ {
95
+ type: 'function',
96
+ function: {
97
+ name: 'response',
98
+ description: 'Return a JSON object of type response',
99
+ parameters: optimizedSchema,
100
+ },
101
+ },
102
+ ],
103
+ tool_choice: {
104
+ type: 'function',
105
+ function: { name: 'response' },
106
+ },
107
+ ...modelParams,
108
+ }, options.signal ? { signal: options.signal } : undefined);
109
+ const usage = this.getUsage(response);
110
+ const stopReason = response.choices[0].finish_reason ?? null;
111
+ const toolCalls = response.choices[0].message.tool_calls;
112
+ if (!toolCalls?.length) {
113
+ throw new ModelProviderError('Expected tool_calls in response but got none', 502, this.model);
114
+ }
115
+ const rawArguments = toolCalls[0]?.function?.arguments;
116
+ const parsedArguments = typeof rawArguments === 'string'
117
+ ? JSON.parse(rawArguments)
118
+ : rawArguments;
119
+ const output = output_format;
120
+ const completion = output &&
121
+ typeof output === 'object' &&
122
+ output.schema &&
123
+ typeof output.schema.parse === 'function'
124
+ ? output.schema.parse(parsedArguments)
125
+ : output.parse(parsedArguments);
126
+ return new ChatInvokeCompletion(completion, usage, null, null, stopReason);
43
127
  }
128
+ const responseFormat = output_format ? { type: 'json_object' } : undefined;
129
+ const response = await this.client.chat.completions.create({
130
+ model: this.model,
131
+ messages: deepseekMessages,
132
+ response_format: responseFormat,
133
+ ...modelParams,
134
+ }, options.signal ? { signal: options.signal } : undefined);
135
+ const content = response.choices[0].message.content || '';
136
+ const usage = this.getUsage(response);
137
+ const stopReason = response.choices[0].finish_reason ?? null;
138
+ let completion = content;
139
+ if (output_format) {
140
+ const parsedJson = JSON.parse(content);
141
+ const output = output_format;
142
+ if (output &&
143
+ typeof output === 'object' &&
144
+ output.schema &&
145
+ typeof output.schema.parse === 'function') {
146
+ completion = output.schema.parse(parsedJson);
147
+ }
148
+ else {
149
+ completion = output_format.parse(parsedJson);
150
+ }
151
+ }
152
+ return new ChatInvokeCompletion(completion, usage, null, null, stopReason);
153
+ }
154
+ catch (error) {
155
+ if (error?.status === 429) {
156
+ throw new ModelRateLimitError(error?.message ?? 'Rate limit exceeded', 429, this.model);
157
+ }
158
+ if (error?.status >= 500) {
159
+ throw new ModelProviderError(error?.message ?? 'Server error', error.status, this.model);
160
+ }
161
+ throw new ModelProviderError(error?.message ?? String(error), error?.status ?? 500, this.model);
44
162
  }
45
- return new ChatInvokeCompletion(completion, {
46
- prompt_tokens: response.usage?.prompt_tokens ?? 0,
47
- completion_tokens: response.usage?.completion_tokens ?? 0,
48
- total_tokens: response.usage?.total_tokens ?? 0,
49
- });
50
163
  }
51
164
  }
@@ -1,18 +1,62 @@
1
1
  import type { BaseChatModel, ChatInvokeOptions } from '../base.js';
2
2
  import { ChatInvokeCompletion } from '../views.js';
3
- import { type Message } from '../messages.js';
3
+ import type { Message } from '../messages.js';
4
+ export interface ChatGoogleOptions {
5
+ model?: string;
6
+ apiKey?: string;
7
+ apiVersion?: string;
8
+ baseUrl?: string;
9
+ vertexai?: boolean;
10
+ vertexAi?: boolean;
11
+ project?: string;
12
+ location?: string;
13
+ httpOptions?: Record<string, unknown>;
14
+ googleAuthOptions?: Record<string, unknown>;
15
+ credentials?: Record<string, unknown>;
16
+ temperature?: number | null;
17
+ topP?: number | null;
18
+ seed?: number | null;
19
+ thinkingBudget?: number | null;
20
+ thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | null;
21
+ maxOutputTokens?: number | null;
22
+ config?: Record<string, unknown> | null;
23
+ includeSystemInUser?: boolean;
24
+ supportsStructuredOutput?: boolean;
25
+ maxRetries?: number;
26
+ retryableStatusCodes?: number[];
27
+ retryBaseDelay?: number;
28
+ retryMaxDelay?: number;
29
+ }
4
30
  export declare class ChatGoogle implements BaseChatModel {
5
31
  model: string;
6
32
  provider: string;
7
33
  private client;
8
- constructor(model?: string);
34
+ private temperature;
35
+ private topP;
36
+ private seed;
37
+ private thinkingBudget;
38
+ private thinkingLevel;
39
+ private maxOutputTokens;
40
+ private config;
41
+ private includeSystemInUser;
42
+ private supportsStructuredOutput;
43
+ private maxRetries;
44
+ private retryableStatusCodes;
45
+ private retryBaseDelay;
46
+ private retryMaxDelay;
47
+ constructor(options?: string | ChatGoogleOptions);
9
48
  get name(): string;
10
49
  get model_name(): string;
50
+ private getUsage;
11
51
  /**
12
52
  * Clean up JSON schema for Google's format
13
53
  * Google API has specific requirements for responseSchema
14
54
  */
15
55
  private _cleanSchemaForGoogle;
56
+ private _parseStructuredJson;
57
+ private _extractStatusCode;
58
+ private _toModelProviderError;
59
+ private _sleep;
16
60
  ainvoke(messages: Message[], output_format?: undefined, options?: ChatInvokeOptions): Promise<ChatInvokeCompletion<string>>;
17
61
  ainvoke<T>(messages: Message[], output_format: {
18
62
  parse: (input: string) => T;
@@ -1,21 +1,62 @@
1
1
  import { GoogleGenAI } from '@google/genai';
2
- import { zodToJsonSchema } from 'zod-to-json-schema';
2
+ import { ModelProviderError } from '../exceptions.js';
3
3
  import { ChatInvokeCompletion } from '../views.js';
4
- import { SystemMessage } from '../messages.js';
4
+ import { SchemaOptimizer, zodSchemaToJsonSchema } from '../schema.js';
5
5
  import { GoogleMessageSerializer } from './serializer.js';
6
6
  export class ChatGoogle {
7
7
  model;
8
8
  provider = 'google';
9
9
  client;
10
- constructor(model = 'gemini-2.5-flash') {
10
+ temperature;
11
+ topP;
12
+ seed;
13
+ thinkingBudget;
14
+ thinkingLevel;
15
+ maxOutputTokens;
16
+ config;
17
+ includeSystemInUser;
18
+ supportsStructuredOutput;
19
+ maxRetries;
20
+ retryableStatusCodes;
21
+ retryBaseDelay;
22
+ retryMaxDelay;
23
+ constructor(options = {}) {
24
+ const normalizedOptions = typeof options === 'string' ? { model: options } : options;
25
+ const { model = 'gemini-2.5-flash', apiKey = process.env.GOOGLE_API_KEY, apiVersion = process.env.GOOGLE_API_VERSION, baseUrl = process.env.GOOGLE_API_BASE_URL, vertexai, vertexAi, project, location, httpOptions, googleAuthOptions, credentials, temperature = 0.5, topP = null, seed = null, thinkingBudget = null, thinkingLevel = null, maxOutputTokens = 8096, config = null, includeSystemInUser = false, supportsStructuredOutput = true, maxRetries = 5, retryableStatusCodes = [429, 500, 502, 503, 504], retryBaseDelay = 1.0, retryMaxDelay = 60.0, } = normalizedOptions;
11
26
  this.model = model;
12
- const apiVersion = process.env.GOOGLE_API_VERSION || 'v1';
13
- const baseUrl = process.env.GOOGLE_API_BASE_URL;
14
- this.client = new GoogleGenAI({
15
- apiKey: process.env.GOOGLE_API_KEY || '',
27
+ this.temperature = temperature;
28
+ this.topP = topP;
29
+ this.seed = seed;
30
+ this.thinkingBudget = thinkingBudget;
31
+ this.thinkingLevel = thinkingLevel;
32
+ this.maxOutputTokens = maxOutputTokens;
33
+ this.config = config ? { ...config } : null;
34
+ this.includeSystemInUser = includeSystemInUser;
35
+ this.supportsStructuredOutput = supportsStructuredOutput;
36
+ this.maxRetries = Math.max(1, maxRetries);
37
+ this.retryableStatusCodes = [...retryableStatusCodes];
38
+ this.retryBaseDelay = retryBaseDelay;
39
+ this.retryMaxDelay = retryMaxDelay;
40
+ const resolvedGoogleAuthOptions = credentials == null
41
+ ? googleAuthOptions
42
+ : {
43
+ ...(googleAuthOptions ?? {}),
44
+ credentials,
45
+ };
46
+ const resolvedVertexAi = vertexai ?? vertexAi;
47
+ const clientOptions = {
48
+ ...(apiKey != null ? { apiKey } : {}),
16
49
  ...(baseUrl ? { baseUrl } : {}),
17
50
  ...(apiVersion ? { apiVersion } : {}),
18
- });
51
+ ...(resolvedVertexAi != null ? { vertexai: resolvedVertexAi } : {}),
52
+ ...(project ? { project } : {}),
53
+ ...(location ? { location } : {}),
54
+ ...(httpOptions ? { httpOptions } : {}),
55
+ ...(resolvedGoogleAuthOptions
56
+ ? { googleAuthOptions: resolvedGoogleAuthOptions }
57
+ : {}),
58
+ };
59
+ this.client = new GoogleGenAI(clientOptions);
19
60
  }
20
61
  get name() {
21
62
  return this.model;
@@ -23,6 +64,33 @@ export class ChatGoogle {
23
64
  get model_name() {
24
65
  return this.model;
25
66
  }
67
+ getUsage(result) {
68
+ const usage = result?.usageMetadata;
69
+ if (!usage) {
70
+ return null;
71
+ }
72
+ let imageTokens = 0;
73
+ const promptTokenDetails = Array.isArray(usage.promptTokensDetails)
74
+ ? usage.promptTokensDetails
75
+ : [];
76
+ for (const detail of promptTokenDetails) {
77
+ if (String(detail?.modality ?? '').toUpperCase() === 'IMAGE') {
78
+ imageTokens += Number(detail?.tokenCount ?? 0) || 0;
79
+ }
80
+ }
81
+ const completionTokens = (Number(usage.candidatesTokenCount ?? 0) || 0) +
82
+ (Number(usage.thoughtsTokenCount ?? 0) || 0);
83
+ return {
84
+ prompt_tokens: Number(usage.promptTokenCount ?? 0) || 0,
85
+ prompt_cached_tokens: usage.cachedContentTokenCount == null
86
+ ? null
87
+ : Number(usage.cachedContentTokenCount),
88
+ prompt_cache_creation_tokens: null,
89
+ prompt_image_tokens: imageTokens,
90
+ completion_tokens: completionTokens,
91
+ total_tokens: Number(usage.totalTokenCount ?? 0) || 0,
92
+ };
93
+ }
26
94
  /**
27
95
  * Clean up JSON schema for Google's format
28
96
  * Google API has specific requirements for responseSchema
@@ -43,6 +111,10 @@ export class ChatGoogle {
43
111
  if (key === 'properties' && typeof value === 'object') {
44
112
  cleaned.properties = {};
45
113
  for (const [propKey, propValue] of Object.entries(value)) {
114
+ // Align python: hide programmatic extraction schema field from LLM JSON schema.
115
+ if (propKey === 'output_schema') {
116
+ continue;
117
+ }
46
118
  cleaned.properties[propKey] = this._cleanSchemaForGoogle(propValue);
47
119
  }
48
120
  }
@@ -56,42 +128,175 @@ export class ChatGoogle {
56
128
  cleaned[key] = value;
57
129
  }
58
130
  }
131
+ const schemaType = String(cleaned.type ?? '').toUpperCase();
132
+ if (schemaType === 'OBJECT' &&
133
+ cleaned.properties &&
134
+ typeof cleaned.properties === 'object' &&
135
+ !Array.isArray(cleaned.properties) &&
136
+ Object.keys(cleaned.properties).length === 0) {
137
+ cleaned.properties = {
138
+ _placeholder: { type: 'string' },
139
+ };
140
+ }
141
+ if (Array.isArray(cleaned.required) &&
142
+ cleaned.properties &&
143
+ typeof cleaned.properties === 'object' &&
144
+ !Array.isArray(cleaned.properties)) {
145
+ const validKeys = new Set(Object.keys(cleaned.properties));
146
+ cleaned.required = cleaned.required.filter((name) => typeof name === 'string' && validKeys.has(name));
147
+ }
59
148
  return cleaned;
60
149
  }
150
+ _parseStructuredJson(text) {
151
+ let jsonText = String(text ?? '').trim();
152
+ const fencedMatch = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/i);
153
+ if (fencedMatch && fencedMatch[1]) {
154
+ jsonText = fencedMatch[1].trim();
155
+ }
156
+ const firstBrace = jsonText.indexOf('{');
157
+ const lastBrace = jsonText.lastIndexOf('}');
158
+ if (firstBrace === -1 || lastBrace === -1 || lastBrace <= firstBrace) {
159
+ throw new Error(`Expected JSON response but got plain text: "${jsonText.slice(0, 50)}..."`);
160
+ }
161
+ return JSON.parse(jsonText.slice(firstBrace, lastBrace + 1));
162
+ }
163
+ _extractStatusCode(error) {
164
+ const directStatus = Number(error?.status ??
165
+ error?.statusCode ??
166
+ error?.response?.status ??
167
+ error?.response?.statusCode);
168
+ if (Number.isFinite(directStatus)) {
169
+ return directStatus;
170
+ }
171
+ const message = String(error?.message ?? error ?? '').toLowerCase();
172
+ if (/(rate limit|resource exhausted|quota exceeded|too many requests|429)/.test(message)) {
173
+ return 429;
174
+ }
175
+ if (/(service unavailable|internal server error|bad gateway|503|502|500)/.test(message)) {
176
+ return 503;
177
+ }
178
+ if (/(forbidden|403)/.test(message)) {
179
+ return 403;
180
+ }
181
+ if (/(timeout|timed out|cancelled|canceled)/.test(message)) {
182
+ return 504;
183
+ }
184
+ return null;
185
+ }
186
+ _toModelProviderError(error) {
187
+ if (error instanceof ModelProviderError) {
188
+ return error;
189
+ }
190
+ return new ModelProviderError(error?.message ?? String(error), this._extractStatusCode(error) ?? 502, this.model);
191
+ }
192
+ async _sleep(ms) {
193
+ await new Promise((resolve) => setTimeout(resolve, ms));
194
+ }
61
195
  async ainvoke(messages, output_format, options = {}) {
62
196
  const serializer = new GoogleMessageSerializer();
63
- const contents = serializer.serialize(messages);
64
- const systemMessage = messages.find((msg) => msg instanceof SystemMessage);
65
- const systemInstruction = systemMessage ? systemMessage.text : undefined;
66
- let tools = undefined;
67
- let toolConfig = undefined;
68
- // For Google, we need to be more explicit about JSON output
69
- // The generationConfig with responseSchema helps enforce JSON structure
70
- const generationConfig = {
71
- responseMimeType: 'application/json',
72
- };
197
+ const { contents, systemInstruction } = serializer.serializeWithSystem(messages, this.includeSystemInUser);
198
+ const generationConfig = this.config ? { ...this.config } : {};
199
+ if (this.temperature !== null) {
200
+ generationConfig.temperature = this.temperature;
201
+ }
202
+ if (this.topP !== null) {
203
+ generationConfig.topP = this.topP;
204
+ }
205
+ if (this.seed !== null) {
206
+ generationConfig.seed = this.seed;
207
+ }
208
+ const isGemini3Pro = this.model.includes('gemini-3-pro');
209
+ const isGemini3Flash = this.model.includes('gemini-3-flash');
210
+ if (isGemini3Pro) {
211
+ let level = this.thinkingLevel ?? 'low';
212
+ if (level === 'minimal' || level === 'medium') {
213
+ level = 'low';
214
+ }
215
+ generationConfig.thinkingConfig = {
216
+ thinkingLevel: level.toUpperCase(),
217
+ };
218
+ }
219
+ else if (isGemini3Flash) {
220
+ if (this.thinkingLevel !== null) {
221
+ generationConfig.thinkingConfig = {
222
+ thinkingLevel: this.thinkingLevel.toUpperCase(),
223
+ };
224
+ }
225
+ else {
226
+ generationConfig.thinkingConfig = {
227
+ thinkingBudget: this.thinkingBudget === null ? -1 : this.thinkingBudget,
228
+ };
229
+ }
230
+ }
231
+ else {
232
+ let budget = this.thinkingBudget;
233
+ if (budget === null &&
234
+ (this.model.includes('gemini-2.5') ||
235
+ this.model.includes('gemini-flash'))) {
236
+ budget = -1;
237
+ }
238
+ if (budget !== null) {
239
+ generationConfig.thinkingConfig = { thinkingBudget: budget };
240
+ }
241
+ }
242
+ if (this.maxOutputTokens !== null) {
243
+ generationConfig.maxOutputTokens = this.maxOutputTokens;
244
+ }
73
245
  // Try to get schema from output_format
74
- const schemaForJson = output_format &&
75
- 'schema' in output_format &&
76
- output_format.schema
77
- ? output_format.schema
78
- : null;
246
+ const schemaForJson = (() => {
247
+ const output = output_format;
248
+ if (output &&
249
+ typeof output === 'object' &&
250
+ typeof output.safeParse === 'function' &&
251
+ typeof output.parse === 'function') {
252
+ return output;
253
+ }
254
+ if (output &&
255
+ typeof output === 'object' &&
256
+ output.schema &&
257
+ typeof output.schema.safeParse === 'function' &&
258
+ typeof output.schema.parse === 'function') {
259
+ return output.schema;
260
+ }
261
+ return null;
262
+ })();
263
+ let cleanSchemaForJson = null;
79
264
  if (schemaForJson) {
80
265
  try {
81
- const jsonSchema = zodToJsonSchema(schemaForJson);
82
- // Clean up the schema for Google's format
83
- const cleanSchema = this._cleanSchemaForGoogle(jsonSchema);
84
- generationConfig.responseSchema = cleanSchema;
266
+ const jsonSchema = zodSchemaToJsonSchema(schemaForJson);
267
+ const optimizedSchema = SchemaOptimizer.createGeminiOptimizedSchema(jsonSchema);
268
+ cleanSchemaForJson = this._cleanSchemaForGoogle(optimizedSchema);
85
269
  }
86
- catch (e) {
87
- console.warn('Failed to set responseSchema', e);
270
+ catch {
271
+ cleanSchemaForJson = null;
272
+ }
273
+ }
274
+ if (cleanSchemaForJson && this.supportsStructuredOutput) {
275
+ generationConfig.responseMimeType = 'application/json';
276
+ generationConfig.responseSchema = cleanSchemaForJson;
277
+ }
278
+ const requestContents = contents.map((entry) => ({
279
+ ...entry,
280
+ parts: Array.isArray(entry?.parts)
281
+ ? entry.parts.map((part) => ({ ...part }))
282
+ : entry?.parts,
283
+ }));
284
+ if (output_format && cleanSchemaForJson && !this.supportsStructuredOutput) {
285
+ const jsonInstruction = '\n\nPlease respond with a valid JSON object that matches this schema: ' +
286
+ JSON.stringify(cleanSchemaForJson);
287
+ for (let i = requestContents.length - 1; i >= 0; i -= 1) {
288
+ const content = requestContents[i];
289
+ if (content?.role === 'user' && Array.isArray(content?.parts)) {
290
+ content.parts = [...content.parts, { text: jsonInstruction }];
291
+ break;
292
+ }
88
293
  }
89
294
  }
90
295
  const request = {
91
296
  model: this.model,
92
- contents,
297
+ contents: requestContents,
93
298
  };
94
- if (systemInstruction) {
299
+ if (systemInstruction && !this.includeSystemInUser) {
95
300
  request.systemInstruction = {
96
301
  role: 'system',
97
302
  parts: [{ text: systemInstruction }],
@@ -100,47 +305,45 @@ export class ChatGoogle {
100
305
  if (Object.keys(generationConfig).length > 0) {
101
306
  request.generationConfig = generationConfig;
102
307
  }
103
- const result = await this.client.models.generateContent(request, options.signal ? { signal: options.signal } : undefined);
104
- // Extract text from first candidate
105
- const candidate = result.candidates?.[0];
106
- const textParts = candidate?.content?.parts?.filter((p) => p.text) || [];
107
- const text = textParts.map((p) => p.text).join('');
108
- let completion = text;
109
- if (output_format) {
308
+ for (let attempt = 0; attempt < this.maxRetries; attempt += 1) {
110
309
  try {
310
+ const result = await this.client.models.generateContent(request, options.signal ? { signal: options.signal } : undefined);
311
+ const candidate = result.candidates?.[0];
312
+ const textParts = candidate?.content?.parts?.filter((p) => p.text) || [];
313
+ const text = textParts.map((p) => p.text).join('');
314
+ let completion = text;
315
+ const stopReason = result?.candidates?.[0]?.finishReason ?? null;
111
316
  let parsed = text;
112
- if (generationConfig.responseMimeType === 'application/json') {
113
- let jsonText = text.trim();
114
- // Handle markdown code fences like ```json ... ```
115
- const fencedMatch = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/i);
116
- if (fencedMatch && fencedMatch[1]) {
117
- jsonText = fencedMatch[1].trim();
118
- }
119
- // Try to extract JSON object from text
120
- const firstBrace = jsonText.indexOf('{');
121
- const lastBrace = jsonText.lastIndexOf('}');
122
- if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
123
- jsonText = jsonText.slice(firstBrace, lastBrace + 1);
317
+ if (output_format && schemaForJson) {
318
+ parsed = this._parseStructuredJson(text);
319
+ }
320
+ if (output_format) {
321
+ const output = output_format;
322
+ if (schemaForJson &&
323
+ output &&
324
+ typeof output === 'object' &&
325
+ output.schema &&
326
+ typeof output.schema.parse === 'function') {
327
+ completion = output.schema.parse(parsed);
124
328
  }
125
329
  else {
126
- // If no JSON object found, the model returned plain text
127
- // Try to wrap it in a minimal valid structure
128
- console.warn('Google LLM returned plain text instead of JSON. Raw response:', text.slice(0, 200));
129
- throw new Error(`Expected JSON response but got plain text: "${text.slice(0, 50)}..."`);
330
+ completion = output.parse(parsed);
130
331
  }
131
- parsed = JSON.parse(jsonText);
132
332
  }
133
- completion = output_format.parse(parsed);
333
+ return new ChatInvokeCompletion(completion, this.getUsage(result), null, null, stopReason);
134
334
  }
135
- catch (e) {
136
- console.error('Failed to parse completion', e);
137
- throw e;
335
+ catch (error) {
336
+ const providerError = this._toModelProviderError(error);
337
+ const shouldRetry = this.retryableStatusCodes.includes(providerError.statusCode) &&
338
+ attempt < this.maxRetries - 1;
339
+ if (!shouldRetry) {
340
+ throw providerError;
341
+ }
342
+ const delaySeconds = Math.min(this.retryBaseDelay * 2 ** attempt, this.retryMaxDelay);
343
+ const jitter = Math.random() * delaySeconds * 0.1;
344
+ await this._sleep((delaySeconds + jitter) * 1000);
138
345
  }
139
346
  }
140
- return new ChatInvokeCompletion(completion, {
141
- prompt_tokens: result.usageMetadata?.promptTokenCount ?? 0,
142
- completion_tokens: result.usageMetadata?.candidatesTokenCount ?? 0,
143
- total_tokens: result.usageMetadata?.totalTokenCount ?? 0,
144
- });
347
+ throw new ModelProviderError('Retry loop completed without response', 500, this.model);
145
348
  }
146
349
  }
@@ -1,6 +1,14 @@
1
1
  import type { Content } from '@google/genai';
2
2
  import { type Message } from '../messages.js';
3
+ export interface SerializedGoogleMessages {
4
+ contents: Content[];
5
+ systemInstruction: string | null;
6
+ }
3
7
  export declare class GoogleMessageSerializer {
4
8
  serialize(messages: Message[]): Content[];
5
- private serializeMessage;
9
+ serializeWithSystem(messages: Message[], includeSystemInUser?: boolean): SerializedGoogleMessages;
10
+ private serializeUserMessage;
11
+ private serializeAssistantMessage;
12
+ private serializeImagePart;
13
+ private extractMessageText;
6
14
  }