browser-use 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/README.md +301 -636
  2. package/dist/actor/element.d.ts +19 -0
  3. package/dist/actor/element.js +46 -0
  4. package/dist/actor/index.d.ts +4 -0
  5. package/dist/actor/index.js +4 -0
  6. package/dist/actor/mouse.d.ts +19 -0
  7. package/dist/actor/mouse.js +39 -0
  8. package/dist/actor/page.d.ts +29 -0
  9. package/dist/actor/page.js +88 -0
  10. package/dist/actor/utils.d.ts +4 -0
  11. package/dist/actor/utils.js +35 -0
  12. package/dist/agent/cloud-events.d.ts +18 -0
  13. package/dist/agent/cloud-events.js +65 -2
  14. package/dist/agent/gif.d.ts +1 -0
  15. package/dist/agent/gif.js +24 -2
  16. package/dist/agent/judge.d.ts +17 -0
  17. package/dist/agent/judge.js +197 -0
  18. package/dist/agent/message-manager/service.d.ts +12 -4
  19. package/dist/agent/message-manager/service.js +205 -39
  20. package/dist/agent/message-manager/utils.js +0 -1
  21. package/dist/agent/message-manager/views.d.ts +4 -0
  22. package/dist/agent/message-manager/views.js +11 -7
  23. package/dist/agent/prompts.d.ts +24 -3
  24. package/dist/agent/prompts.js +274 -59
  25. package/dist/agent/service.d.ts +99 -40
  26. package/dist/agent/service.js +2282 -474
  27. package/dist/agent/variable-detector.d.ts +12 -0
  28. package/dist/agent/variable-detector.js +211 -0
  29. package/dist/agent/views.d.ts +237 -17
  30. package/dist/agent/views.js +446 -32
  31. package/dist/browser/cloud/cloud.d.ts +20 -0
  32. package/dist/browser/cloud/cloud.js +129 -0
  33. package/dist/browser/cloud/index.d.ts +2 -0
  34. package/dist/browser/cloud/index.js +2 -0
  35. package/dist/browser/cloud/views.d.ts +41 -0
  36. package/dist/browser/cloud/views.js +35 -0
  37. package/dist/browser/events.d.ts +345 -0
  38. package/dist/browser/events.js +566 -0
  39. package/dist/browser/extensions.js +17 -17
  40. package/dist/browser/index.d.ts +4 -0
  41. package/dist/browser/index.js +4 -0
  42. package/dist/browser/profile.d.ts +8 -2
  43. package/dist/browser/profile.js +79 -12
  44. package/dist/browser/session-manager.d.ts +85 -0
  45. package/dist/browser/session-manager.js +208 -0
  46. package/dist/browser/session.d.ts +100 -8
  47. package/dist/browser/session.js +1102 -63
  48. package/dist/browser/types.d.ts +0 -2
  49. package/dist/browser/views.d.ts +39 -0
  50. package/dist/browser/views.js +32 -0
  51. package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
  52. package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
  53. package/dist/browser/watchdogs/base.d.ts +21 -0
  54. package/dist/browser/watchdogs/base.js +81 -0
  55. package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
  56. package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
  57. package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
  58. package/dist/browser/watchdogs/crash-watchdog.js +296 -0
  59. package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
  60. package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
  61. package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
  62. package/dist/browser/watchdogs/dom-watchdog.js +31 -0
  63. package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
  64. package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
  65. package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
  66. package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
  67. package/dist/browser/watchdogs/index.d.ts +15 -0
  68. package/dist/browser/watchdogs/index.js +15 -0
  69. package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
  70. package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
  71. package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
  72. package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
  73. package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
  74. package/dist/browser/watchdogs/popups-watchdog.js +77 -0
  75. package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
  76. package/dist/browser/watchdogs/recording-watchdog.js +249 -0
  77. package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
  78. package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
  79. package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
  80. package/dist/browser/watchdogs/security-watchdog.js +84 -0
  81. package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
  82. package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
  83. package/dist/cli.d.ts +41 -0
  84. package/dist/cli.js +820 -10
  85. package/dist/code-use/formatting.d.ts +3 -0
  86. package/dist/code-use/formatting.js +18 -0
  87. package/dist/code-use/index.d.ts +6 -0
  88. package/dist/code-use/index.js +6 -0
  89. package/dist/code-use/namespace.d.ts +5 -0
  90. package/dist/code-use/namespace.js +81 -0
  91. package/dist/code-use/notebook-export.d.ts +3 -0
  92. package/dist/code-use/notebook-export.js +56 -0
  93. package/dist/code-use/service.d.ts +24 -0
  94. package/dist/code-use/service.js +104 -0
  95. package/dist/code-use/utils.d.ts +4 -0
  96. package/dist/code-use/utils.js +98 -0
  97. package/dist/code-use/views.d.ts +108 -0
  98. package/dist/code-use/views.js +165 -0
  99. package/dist/config.d.ts +13 -0
  100. package/dist/config.js +69 -3
  101. package/dist/controller/registry/service.d.ts +10 -1
  102. package/dist/controller/registry/service.js +266 -10
  103. package/dist/controller/registry/views.d.ts +4 -1
  104. package/dist/controller/registry/views.js +25 -2
  105. package/dist/controller/service.d.ts +10 -1
  106. package/dist/controller/service.js +1849 -288
  107. package/dist/controller/views.d.ts +78 -155
  108. package/dist/controller/views.js +61 -12
  109. package/dist/dom/history-tree-processor/service.d.ts +5 -0
  110. package/dist/dom/history-tree-processor/service.js +169 -14
  111. package/dist/dom/history-tree-processor/view.d.ts +7 -1
  112. package/dist/dom/history-tree-processor/view.js +10 -1
  113. package/dist/dom/markdown-extractor.d.ts +37 -0
  114. package/dist/dom/markdown-extractor.js +345 -0
  115. package/dist/dom/service.d.ts +3 -1
  116. package/dist/dom/service.js +76 -0
  117. package/dist/dom/views.d.ts +1 -0
  118. package/dist/dom/views.js +45 -0
  119. package/dist/event-bus.d.ts +107 -7
  120. package/dist/event-bus.js +313 -10
  121. package/dist/filesystem/file-system.d.ts +18 -0
  122. package/dist/filesystem/file-system.js +530 -42
  123. package/dist/index.d.ts +7 -0
  124. package/dist/index.js +6 -0
  125. package/dist/integrations/gmail/actions.d.ts +3 -3
  126. package/dist/integrations/gmail/actions.js +5 -5
  127. package/dist/llm/anthropic/chat.d.ts +18 -1
  128. package/dist/llm/anthropic/chat.js +123 -55
  129. package/dist/llm/anthropic/serializer.d.ts +2 -0
  130. package/dist/llm/anthropic/serializer.js +81 -9
  131. package/dist/llm/aws/chat-anthropic.d.ts +17 -0
  132. package/dist/llm/aws/chat-anthropic.js +129 -40
  133. package/dist/llm/aws/chat-bedrock.d.ts +28 -1
  134. package/dist/llm/aws/chat-bedrock.js +161 -34
  135. package/dist/llm/aws/serializer.d.ts +13 -1
  136. package/dist/llm/aws/serializer.js +56 -17
  137. package/dist/llm/azure/chat.d.ts +53 -2
  138. package/dist/llm/azure/chat.js +366 -53
  139. package/dist/llm/base.d.ts +2 -0
  140. package/dist/llm/browser-use/chat.d.ts +40 -0
  141. package/dist/llm/browser-use/chat.js +305 -0
  142. package/dist/llm/browser-use/index.d.ts +1 -0
  143. package/dist/llm/browser-use/index.js +1 -0
  144. package/dist/llm/cerebras/chat.d.ts +39 -0
  145. package/dist/llm/cerebras/chat.js +178 -0
  146. package/dist/llm/cerebras/index.d.ts +2 -0
  147. package/dist/llm/cerebras/index.js +2 -0
  148. package/dist/llm/cerebras/serializer.d.ts +7 -0
  149. package/dist/llm/cerebras/serializer.js +82 -0
  150. package/dist/llm/deepseek/chat.d.ts +19 -2
  151. package/dist/llm/deepseek/chat.js +138 -25
  152. package/dist/llm/google/chat.d.ts +46 -2
  153. package/dist/llm/google/chat.js +268 -63
  154. package/dist/llm/google/serializer.d.ts +9 -1
  155. package/dist/llm/google/serializer.js +141 -34
  156. package/dist/llm/groq/chat.d.ts +21 -2
  157. package/dist/llm/groq/chat.js +125 -26
  158. package/dist/llm/groq/parser.js +3 -1
  159. package/dist/llm/messages.d.ts +4 -4
  160. package/dist/llm/mistral/chat.d.ts +43 -0
  161. package/dist/llm/mistral/chat.js +154 -0
  162. package/dist/llm/mistral/index.d.ts +2 -0
  163. package/dist/llm/mistral/index.js +2 -0
  164. package/dist/llm/mistral/schema.d.ts +8 -0
  165. package/dist/llm/mistral/schema.js +27 -0
  166. package/dist/llm/models.d.ts +2 -0
  167. package/dist/llm/models.js +317 -0
  168. package/dist/llm/ollama/chat.d.ts +13 -1
  169. package/dist/llm/ollama/chat.js +110 -19
  170. package/dist/llm/ollama/serializer.d.ts +1 -0
  171. package/dist/llm/ollama/serializer.js +34 -12
  172. package/dist/llm/openai/chat.d.ts +16 -0
  173. package/dist/llm/openai/chat.js +94 -44
  174. package/dist/llm/openai/like.d.ts +5 -3
  175. package/dist/llm/openai/like.js +7 -3
  176. package/dist/llm/openai/responses-serializer.d.ts +18 -0
  177. package/dist/llm/openai/responses-serializer.js +72 -0
  178. package/dist/llm/openrouter/chat.d.ts +28 -2
  179. package/dist/llm/openrouter/chat.js +115 -29
  180. package/dist/llm/schema.d.ts +11 -1
  181. package/dist/llm/schema.js +81 -1
  182. package/dist/llm/vercel/chat.d.ts +50 -0
  183. package/dist/llm/vercel/chat.js +276 -0
  184. package/dist/llm/vercel/index.d.ts +1 -0
  185. package/dist/llm/vercel/index.js +1 -0
  186. package/dist/llm/vercel/serializer.d.ts +5 -0
  187. package/dist/llm/vercel/serializer.js +7 -0
  188. package/dist/llm/views.d.ts +2 -1
  189. package/dist/llm/views.js +3 -1
  190. package/dist/logging-config.d.ts +2 -0
  191. package/dist/logging-config.js +82 -29
  192. package/dist/mcp/client.d.ts +10 -5
  193. package/dist/mcp/client.js +21 -15
  194. package/dist/mcp/controller.d.ts +42 -3
  195. package/dist/mcp/controller.js +56 -31
  196. package/dist/mcp/server.d.ts +14 -0
  197. package/dist/mcp/server.js +257 -51
  198. package/dist/observability.js +10 -4
  199. package/dist/sandbox/index.d.ts +2 -0
  200. package/dist/sandbox/index.js +2 -0
  201. package/dist/sandbox/sandbox.d.ts +19 -0
  202. package/dist/sandbox/sandbox.js +140 -0
  203. package/dist/sandbox/views.d.ts +67 -0
  204. package/dist/sandbox/views.js +121 -0
  205. package/dist/skill-cli/index.d.ts +3 -0
  206. package/dist/skill-cli/index.js +3 -0
  207. package/dist/skill-cli/protocol.d.ts +30 -0
  208. package/dist/skill-cli/protocol.js +48 -0
  209. package/dist/skill-cli/server.d.ts +11 -0
  210. package/dist/skill-cli/server.js +85 -0
  211. package/dist/skill-cli/sessions.d.ts +24 -0
  212. package/dist/skill-cli/sessions.js +47 -0
  213. package/dist/skills/index.d.ts +3 -0
  214. package/dist/skills/index.js +3 -0
  215. package/dist/skills/service.d.ts +27 -0
  216. package/dist/skills/service.js +266 -0
  217. package/dist/skills/utils.d.ts +6 -0
  218. package/dist/skills/utils.js +53 -0
  219. package/dist/skills/views.d.ts +40 -0
  220. package/dist/skills/views.js +10 -0
  221. package/dist/sync/auth.js +8 -3
  222. package/dist/sync/service.d.ts +6 -6
  223. package/dist/sync/service.js +54 -89
  224. package/dist/telemetry/views.d.ts +20 -6
  225. package/dist/telemetry/views.js +23 -5
  226. package/dist/tokens/custom-pricing.d.ts +2 -0
  227. package/dist/tokens/custom-pricing.js +22 -0
  228. package/dist/tokens/index.d.ts +2 -0
  229. package/dist/tokens/index.js +2 -0
  230. package/dist/tokens/mappings.d.ts +1 -0
  231. package/dist/tokens/mappings.js +3 -0
  232. package/dist/tokens/service.js +30 -12
  233. package/dist/tools/extraction/index.d.ts +2 -0
  234. package/dist/tools/extraction/index.js +2 -0
  235. package/dist/tools/extraction/schema-utils.d.ts +6 -0
  236. package/dist/tools/extraction/schema-utils.js +237 -0
  237. package/dist/tools/extraction/views.d.ts +7 -0
  238. package/dist/tools/index.d.ts +5 -0
  239. package/dist/tools/index.js +5 -0
  240. package/dist/tools/registry/index.d.ts +2 -0
  241. package/dist/tools/registry/index.js +2 -0
  242. package/dist/tools/registry/service.d.ts +1 -0
  243. package/dist/tools/registry/service.js +1 -0
  244. package/dist/tools/registry/views.d.ts +1 -0
  245. package/dist/tools/registry/views.js +1 -0
  246. package/dist/tools/service.d.ts +2 -0
  247. package/dist/tools/service.js +1 -0
  248. package/dist/tools/utils.d.ts +2 -0
  249. package/dist/tools/utils.js +57 -0
  250. package/dist/tools/views.d.ts +1 -0
  251. package/dist/tools/views.js +1 -0
  252. package/dist/utils.d.ts +10 -1
  253. package/dist/utils.js +70 -3
  254. package/package.json +265 -28
  255. package/dist/dom/playground/process-dom.js +0 -5
  256. package/dist/dom/playground/test-accessibility.d.ts +0 -44
  257. package/dist/dom/playground/test-accessibility.js +0 -111
  258. /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
@@ -1,15 +1,30 @@
1
1
  import OpenAI from 'openai';
2
+ import { ModelProviderError, ModelRateLimitError } from '../exceptions.js';
3
+ import { SchemaOptimizer, zodSchemaToJsonSchema } from '../schema.js';
2
4
  import { ChatInvokeCompletion } from '../views.js';
3
5
  import { DeepSeekMessageSerializer } from './serializer.js';
4
6
  export class ChatDeepSeek {
5
7
  model;
6
8
  provider = 'deepseek';
7
9
  client;
8
- constructor(model = 'deepseek-chat') {
10
+ temperature;
11
+ maxTokens;
12
+ topP;
13
+ seed;
14
+ constructor(options = {}) {
15
+ const normalizedOptions = typeof options === 'string' ? { model: options } : options;
16
+ const { model = 'deepseek-chat', apiKey = process.env.DEEPSEEK_API_KEY, baseURL = 'https://api.deepseek.com/v1', timeout = null, clientParams = null, temperature = null, maxTokens = null, topP = null, seed = null, maxRetries = 10, } = normalizedOptions;
9
17
  this.model = model;
18
+ this.temperature = temperature;
19
+ this.maxTokens = maxTokens;
20
+ this.topP = topP;
21
+ this.seed = seed;
10
22
  this.client = new OpenAI({
11
- apiKey: process.env.DEEPSEEK_API_KEY,
12
- baseURL: 'https://api.deepseek.com',
23
+ apiKey,
24
+ baseURL,
25
+ ...(timeout !== null ? { timeout } : {}),
26
+ maxRetries,
27
+ ...(clientParams ?? {}),
13
28
  });
14
29
  }
15
30
  get name() {
@@ -18,34 +33,132 @@ export class ChatDeepSeek {
18
33
  get model_name() {
19
34
  return this.model;
20
35
  }
36
+ getUsage(response) {
37
+ if (!response.usage) {
38
+ return null;
39
+ }
40
+ return {
41
+ prompt_tokens: response.usage.prompt_tokens,
42
+ prompt_cached_tokens: response.usage.prompt_tokens_details?.cached_tokens ?? null,
43
+ prompt_cache_creation_tokens: null,
44
+ prompt_image_tokens: null,
45
+ completion_tokens: response.usage.completion_tokens,
46
+ total_tokens: response.usage.total_tokens,
47
+ };
48
+ }
21
49
  async ainvoke(messages, output_format, options = {}) {
22
50
  const serializer = new DeepSeekMessageSerializer();
23
51
  const deepseekMessages = serializer.serialize(messages);
24
- let responseFormat = undefined;
25
- if (output_format && 'schema' in output_format && output_format.schema) {
26
- // DeepSeek supports json_object
27
- responseFormat = { type: 'json_object' };
52
+ const modelParams = {};
53
+ if (this.temperature !== null) {
54
+ modelParams.temperature = this.temperature;
55
+ }
56
+ if (this.maxTokens !== null) {
57
+ modelParams.max_tokens = this.maxTokens;
28
58
  }
29
- const response = await this.client.chat.completions.create({
30
- model: this.model,
31
- messages: deepseekMessages,
32
- response_format: responseFormat,
33
- }, options.signal ? { signal: options.signal } : undefined);
34
- const content = response.choices[0].message.content || '';
35
- let completion = content;
36
- if (output_format) {
37
- try {
38
- completion = output_format.parse(JSON.parse(content));
59
+ if (this.topP !== null) {
60
+ modelParams.top_p = this.topP;
61
+ }
62
+ if (this.seed !== null) {
63
+ modelParams.seed = this.seed;
64
+ }
65
+ const zodSchemaCandidate = (() => {
66
+ const output = output_format;
67
+ if (output &&
68
+ typeof output === 'object' &&
69
+ typeof output.safeParse === 'function' &&
70
+ typeof output.parse === 'function') {
71
+ return output;
72
+ }
73
+ if (output &&
74
+ typeof output === 'object' &&
75
+ output.schema &&
76
+ typeof output.schema.safeParse === 'function' &&
77
+ typeof output.schema.parse === 'function') {
78
+ return output.schema;
39
79
  }
40
- catch (e) {
41
- console.error('Failed to parse completion', e);
42
- throw e;
80
+ return null;
81
+ })();
82
+ try {
83
+ if (output_format && zodSchemaCandidate) {
84
+ const rawSchema = zodSchemaToJsonSchema(zodSchemaCandidate, {
85
+ name: 'response',
86
+ target: 'jsonSchema7',
87
+ });
88
+ const optimizedSchema = SchemaOptimizer.createOptimizedJsonSchema(rawSchema);
89
+ delete optimizedSchema.title;
90
+ const response = await this.client.chat.completions.create({
91
+ model: this.model,
92
+ messages: deepseekMessages,
93
+ tools: [
94
+ {
95
+ type: 'function',
96
+ function: {
97
+ name: 'response',
98
+ description: 'Return a JSON object of type response',
99
+ parameters: optimizedSchema,
100
+ },
101
+ },
102
+ ],
103
+ tool_choice: {
104
+ type: 'function',
105
+ function: { name: 'response' },
106
+ },
107
+ ...modelParams,
108
+ }, options.signal ? { signal: options.signal } : undefined);
109
+ const usage = this.getUsage(response);
110
+ const stopReason = response.choices[0].finish_reason ?? null;
111
+ const toolCalls = response.choices[0].message.tool_calls;
112
+ if (!toolCalls?.length) {
113
+ throw new ModelProviderError('Expected tool_calls in response but got none', 502, this.model);
114
+ }
115
+ const rawArguments = toolCalls[0]?.function?.arguments;
116
+ const parsedArguments = typeof rawArguments === 'string'
117
+ ? JSON.parse(rawArguments)
118
+ : rawArguments;
119
+ const output = output_format;
120
+ const completion = output &&
121
+ typeof output === 'object' &&
122
+ output.schema &&
123
+ typeof output.schema.parse === 'function'
124
+ ? output.schema.parse(parsedArguments)
125
+ : output.parse(parsedArguments);
126
+ return new ChatInvokeCompletion(completion, usage, null, null, stopReason);
43
127
  }
128
+ const responseFormat = output_format ? { type: 'json_object' } : undefined;
129
+ const response = await this.client.chat.completions.create({
130
+ model: this.model,
131
+ messages: deepseekMessages,
132
+ response_format: responseFormat,
133
+ ...modelParams,
134
+ }, options.signal ? { signal: options.signal } : undefined);
135
+ const content = response.choices[0].message.content || '';
136
+ const usage = this.getUsage(response);
137
+ const stopReason = response.choices[0].finish_reason ?? null;
138
+ let completion = content;
139
+ if (output_format) {
140
+ const parsedJson = JSON.parse(content);
141
+ const output = output_format;
142
+ if (output &&
143
+ typeof output === 'object' &&
144
+ output.schema &&
145
+ typeof output.schema.parse === 'function') {
146
+ completion = output.schema.parse(parsedJson);
147
+ }
148
+ else {
149
+ completion = output_format.parse(parsedJson);
150
+ }
151
+ }
152
+ return new ChatInvokeCompletion(completion, usage, null, null, stopReason);
153
+ }
154
+ catch (error) {
155
+ if (error?.status === 429) {
156
+ throw new ModelRateLimitError(error?.message ?? 'Rate limit exceeded', 429, this.model);
157
+ }
158
+ if (error?.status >= 500) {
159
+ throw new ModelProviderError(error?.message ?? 'Server error', error.status, this.model);
160
+ }
161
+ throw new ModelProviderError(error?.message ?? String(error), error?.status ?? 500, this.model);
44
162
  }
45
- return new ChatInvokeCompletion(completion, {
46
- prompt_tokens: response.usage?.prompt_tokens ?? 0,
47
- completion_tokens: response.usage?.completion_tokens ?? 0,
48
- total_tokens: response.usage?.total_tokens ?? 0,
49
- });
50
163
  }
51
164
  }
@@ -1,18 +1,62 @@
1
1
  import type { BaseChatModel, ChatInvokeOptions } from '../base.js';
2
2
  import { ChatInvokeCompletion } from '../views.js';
3
- import { type Message } from '../messages.js';
3
+ import type { Message } from '../messages.js';
4
+ export interface ChatGoogleOptions {
5
+ model?: string;
6
+ apiKey?: string;
7
+ apiVersion?: string;
8
+ baseUrl?: string;
9
+ vertexai?: boolean;
10
+ vertexAi?: boolean;
11
+ project?: string;
12
+ location?: string;
13
+ httpOptions?: Record<string, unknown>;
14
+ googleAuthOptions?: Record<string, unknown>;
15
+ credentials?: Record<string, unknown>;
16
+ temperature?: number | null;
17
+ topP?: number | null;
18
+ seed?: number | null;
19
+ thinkingBudget?: number | null;
20
+ thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | null;
21
+ maxOutputTokens?: number | null;
22
+ config?: Record<string, unknown> | null;
23
+ includeSystemInUser?: boolean;
24
+ supportsStructuredOutput?: boolean;
25
+ maxRetries?: number;
26
+ retryableStatusCodes?: number[];
27
+ retryBaseDelay?: number;
28
+ retryMaxDelay?: number;
29
+ }
4
30
  export declare class ChatGoogle implements BaseChatModel {
5
31
  model: string;
6
32
  provider: string;
7
33
  private client;
8
- constructor(model?: string);
34
+ private temperature;
35
+ private topP;
36
+ private seed;
37
+ private thinkingBudget;
38
+ private thinkingLevel;
39
+ private maxOutputTokens;
40
+ private config;
41
+ private includeSystemInUser;
42
+ private supportsStructuredOutput;
43
+ private maxRetries;
44
+ private retryableStatusCodes;
45
+ private retryBaseDelay;
46
+ private retryMaxDelay;
47
+ constructor(options?: string | ChatGoogleOptions);
9
48
  get name(): string;
10
49
  get model_name(): string;
50
+ private getUsage;
11
51
  /**
12
52
  * Clean up JSON schema for Google's format
13
53
  * Google API has specific requirements for responseSchema
14
54
  */
15
55
  private _cleanSchemaForGoogle;
56
+ private _parseStructuredJson;
57
+ private _extractStatusCode;
58
+ private _toModelProviderError;
59
+ private _sleep;
16
60
  ainvoke(messages: Message[], output_format?: undefined, options?: ChatInvokeOptions): Promise<ChatInvokeCompletion<string>>;
17
61
  ainvoke<T>(messages: Message[], output_format: {
18
62
  parse: (input: string) => T;
@@ -1,21 +1,62 @@
1
- import { GoogleGenAI, } from '@google/genai';
2
- import { zodToJsonSchema } from 'zod-to-json-schema';
1
+ import { GoogleGenAI } from '@google/genai';
2
+ import { ModelProviderError } from '../exceptions.js';
3
3
  import { ChatInvokeCompletion } from '../views.js';
4
- import { SystemMessage } from '../messages.js';
4
+ import { SchemaOptimizer, zodSchemaToJsonSchema } from '../schema.js';
5
5
  import { GoogleMessageSerializer } from './serializer.js';
6
6
  export class ChatGoogle {
7
7
  model;
8
8
  provider = 'google';
9
9
  client;
10
- constructor(model = 'gemini-2.5-flash') {
10
+ temperature;
11
+ topP;
12
+ seed;
13
+ thinkingBudget;
14
+ thinkingLevel;
15
+ maxOutputTokens;
16
+ config;
17
+ includeSystemInUser;
18
+ supportsStructuredOutput;
19
+ maxRetries;
20
+ retryableStatusCodes;
21
+ retryBaseDelay;
22
+ retryMaxDelay;
23
+ constructor(options = {}) {
24
+ const normalizedOptions = typeof options === 'string' ? { model: options } : options;
25
+ const { model = 'gemini-2.5-flash', apiKey = process.env.GOOGLE_API_KEY, apiVersion = process.env.GOOGLE_API_VERSION, baseUrl = process.env.GOOGLE_API_BASE_URL, vertexai, vertexAi, project, location, httpOptions, googleAuthOptions, credentials, temperature = 0.5, topP = null, seed = null, thinkingBudget = null, thinkingLevel = null, maxOutputTokens = 8096, config = null, includeSystemInUser = false, supportsStructuredOutput = true, maxRetries = 5, retryableStatusCodes = [429, 500, 502, 503, 504], retryBaseDelay = 1.0, retryMaxDelay = 60.0, } = normalizedOptions;
11
26
  this.model = model;
12
- const apiVersion = process.env.GOOGLE_API_VERSION || 'v1';
13
- const baseUrl = process.env.GOOGLE_API_BASE_URL;
14
- this.client = new GoogleGenAI({
15
- apiKey: process.env.GOOGLE_API_KEY || '',
27
+ this.temperature = temperature;
28
+ this.topP = topP;
29
+ this.seed = seed;
30
+ this.thinkingBudget = thinkingBudget;
31
+ this.thinkingLevel = thinkingLevel;
32
+ this.maxOutputTokens = maxOutputTokens;
33
+ this.config = config ? { ...config } : null;
34
+ this.includeSystemInUser = includeSystemInUser;
35
+ this.supportsStructuredOutput = supportsStructuredOutput;
36
+ this.maxRetries = Math.max(1, maxRetries);
37
+ this.retryableStatusCodes = [...retryableStatusCodes];
38
+ this.retryBaseDelay = retryBaseDelay;
39
+ this.retryMaxDelay = retryMaxDelay;
40
+ const resolvedGoogleAuthOptions = credentials == null
41
+ ? googleAuthOptions
42
+ : {
43
+ ...(googleAuthOptions ?? {}),
44
+ credentials,
45
+ };
46
+ const resolvedVertexAi = vertexai ?? vertexAi;
47
+ const clientOptions = {
48
+ ...(apiKey != null ? { apiKey } : {}),
16
49
  ...(baseUrl ? { baseUrl } : {}),
17
50
  ...(apiVersion ? { apiVersion } : {}),
18
- });
51
+ ...(resolvedVertexAi != null ? { vertexai: resolvedVertexAi } : {}),
52
+ ...(project ? { project } : {}),
53
+ ...(location ? { location } : {}),
54
+ ...(httpOptions ? { httpOptions } : {}),
55
+ ...(resolvedGoogleAuthOptions
56
+ ? { googleAuthOptions: resolvedGoogleAuthOptions }
57
+ : {}),
58
+ };
59
+ this.client = new GoogleGenAI(clientOptions);
19
60
  }
20
61
  get name() {
21
62
  return this.model;
@@ -23,6 +64,33 @@ export class ChatGoogle {
23
64
  get model_name() {
24
65
  return this.model;
25
66
  }
67
+ getUsage(result) {
68
+ const usage = result?.usageMetadata;
69
+ if (!usage) {
70
+ return null;
71
+ }
72
+ let imageTokens = 0;
73
+ const promptTokenDetails = Array.isArray(usage.promptTokensDetails)
74
+ ? usage.promptTokensDetails
75
+ : [];
76
+ for (const detail of promptTokenDetails) {
77
+ if (String(detail?.modality ?? '').toUpperCase() === 'IMAGE') {
78
+ imageTokens += Number(detail?.tokenCount ?? 0) || 0;
79
+ }
80
+ }
81
+ const completionTokens = (Number(usage.candidatesTokenCount ?? 0) || 0) +
82
+ (Number(usage.thoughtsTokenCount ?? 0) || 0);
83
+ return {
84
+ prompt_tokens: Number(usage.promptTokenCount ?? 0) || 0,
85
+ prompt_cached_tokens: usage.cachedContentTokenCount == null
86
+ ? null
87
+ : Number(usage.cachedContentTokenCount),
88
+ prompt_cache_creation_tokens: null,
89
+ prompt_image_tokens: imageTokens,
90
+ completion_tokens: completionTokens,
91
+ total_tokens: Number(usage.totalTokenCount ?? 0) || 0,
92
+ };
93
+ }
26
94
  /**
27
95
  * Clean up JSON schema for Google's format
28
96
  * Google API has specific requirements for responseSchema
@@ -43,6 +111,10 @@ export class ChatGoogle {
43
111
  if (key === 'properties' && typeof value === 'object') {
44
112
  cleaned.properties = {};
45
113
  for (const [propKey, propValue] of Object.entries(value)) {
114
+ // Align python: hide programmatic extraction schema field from LLM JSON schema.
115
+ if (propKey === 'output_schema') {
116
+ continue;
117
+ }
46
118
  cleaned.properties[propKey] = this._cleanSchemaForGoogle(propValue);
47
119
  }
48
120
  }
@@ -56,40 +128,175 @@ export class ChatGoogle {
56
128
  cleaned[key] = value;
57
129
  }
58
130
  }
131
+ const schemaType = String(cleaned.type ?? '').toUpperCase();
132
+ if (schemaType === 'OBJECT' &&
133
+ cleaned.properties &&
134
+ typeof cleaned.properties === 'object' &&
135
+ !Array.isArray(cleaned.properties) &&
136
+ Object.keys(cleaned.properties).length === 0) {
137
+ cleaned.properties = {
138
+ _placeholder: { type: 'string' },
139
+ };
140
+ }
141
+ if (Array.isArray(cleaned.required) &&
142
+ cleaned.properties &&
143
+ typeof cleaned.properties === 'object' &&
144
+ !Array.isArray(cleaned.properties)) {
145
+ const validKeys = new Set(Object.keys(cleaned.properties));
146
+ cleaned.required = cleaned.required.filter((name) => typeof name === 'string' && validKeys.has(name));
147
+ }
59
148
  return cleaned;
60
149
  }
150
+ _parseStructuredJson(text) {
151
+ let jsonText = String(text ?? '').trim();
152
+ const fencedMatch = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/i);
153
+ if (fencedMatch && fencedMatch[1]) {
154
+ jsonText = fencedMatch[1].trim();
155
+ }
156
+ const firstBrace = jsonText.indexOf('{');
157
+ const lastBrace = jsonText.lastIndexOf('}');
158
+ if (firstBrace === -1 || lastBrace === -1 || lastBrace <= firstBrace) {
159
+ throw new Error(`Expected JSON response but got plain text: "${jsonText.slice(0, 50)}..."`);
160
+ }
161
+ return JSON.parse(jsonText.slice(firstBrace, lastBrace + 1));
162
+ }
163
+ _extractStatusCode(error) {
164
+ const directStatus = Number(error?.status ??
165
+ error?.statusCode ??
166
+ error?.response?.status ??
167
+ error?.response?.statusCode);
168
+ if (Number.isFinite(directStatus)) {
169
+ return directStatus;
170
+ }
171
+ const message = String(error?.message ?? error ?? '').toLowerCase();
172
+ if (/(rate limit|resource exhausted|quota exceeded|too many requests|429)/.test(message)) {
173
+ return 429;
174
+ }
175
+ if (/(service unavailable|internal server error|bad gateway|503|502|500)/.test(message)) {
176
+ return 503;
177
+ }
178
+ if (/(forbidden|403)/.test(message)) {
179
+ return 403;
180
+ }
181
+ if (/(timeout|timed out|cancelled|canceled)/.test(message)) {
182
+ return 504;
183
+ }
184
+ return null;
185
+ }
186
+ _toModelProviderError(error) {
187
+ if (error instanceof ModelProviderError) {
188
+ return error;
189
+ }
190
+ return new ModelProviderError(error?.message ?? String(error), this._extractStatusCode(error) ?? 502, this.model);
191
+ }
192
+ async _sleep(ms) {
193
+ await new Promise((resolve) => setTimeout(resolve, ms));
194
+ }
61
195
  async ainvoke(messages, output_format, options = {}) {
62
196
  const serializer = new GoogleMessageSerializer();
63
- const contents = serializer.serialize(messages);
64
- const systemMessage = messages.find((msg) => msg instanceof SystemMessage);
65
- const systemInstruction = systemMessage ? systemMessage.text : undefined;
66
- let tools = undefined;
67
- let toolConfig = undefined;
68
- // For Google, we need to be more explicit about JSON output
69
- // The generationConfig with responseSchema helps enforce JSON structure
70
- const generationConfig = {
71
- responseMimeType: 'application/json',
72
- };
197
+ const { contents, systemInstruction } = serializer.serializeWithSystem(messages, this.includeSystemInUser);
198
+ const generationConfig = this.config ? { ...this.config } : {};
199
+ if (this.temperature !== null) {
200
+ generationConfig.temperature = this.temperature;
201
+ }
202
+ if (this.topP !== null) {
203
+ generationConfig.topP = this.topP;
204
+ }
205
+ if (this.seed !== null) {
206
+ generationConfig.seed = this.seed;
207
+ }
208
+ const isGemini3Pro = this.model.includes('gemini-3-pro');
209
+ const isGemini3Flash = this.model.includes('gemini-3-flash');
210
+ if (isGemini3Pro) {
211
+ let level = this.thinkingLevel ?? 'low';
212
+ if (level === 'minimal' || level === 'medium') {
213
+ level = 'low';
214
+ }
215
+ generationConfig.thinkingConfig = {
216
+ thinkingLevel: level.toUpperCase(),
217
+ };
218
+ }
219
+ else if (isGemini3Flash) {
220
+ if (this.thinkingLevel !== null) {
221
+ generationConfig.thinkingConfig = {
222
+ thinkingLevel: this.thinkingLevel.toUpperCase(),
223
+ };
224
+ }
225
+ else {
226
+ generationConfig.thinkingConfig = {
227
+ thinkingBudget: this.thinkingBudget === null ? -1 : this.thinkingBudget,
228
+ };
229
+ }
230
+ }
231
+ else {
232
+ let budget = this.thinkingBudget;
233
+ if (budget === null &&
234
+ (this.model.includes('gemini-2.5') ||
235
+ this.model.includes('gemini-flash'))) {
236
+ budget = -1;
237
+ }
238
+ if (budget !== null) {
239
+ generationConfig.thinkingConfig = { thinkingBudget: budget };
240
+ }
241
+ }
242
+ if (this.maxOutputTokens !== null) {
243
+ generationConfig.maxOutputTokens = this.maxOutputTokens;
244
+ }
73
245
  // Try to get schema from output_format
74
- const schemaForJson = output_format && 'schema' in output_format && output_format.schema
75
- ? output_format.schema
76
- : null;
246
+ const schemaForJson = (() => {
247
+ const output = output_format;
248
+ if (output &&
249
+ typeof output === 'object' &&
250
+ typeof output.safeParse === 'function' &&
251
+ typeof output.parse === 'function') {
252
+ return output;
253
+ }
254
+ if (output &&
255
+ typeof output === 'object' &&
256
+ output.schema &&
257
+ typeof output.schema.safeParse === 'function' &&
258
+ typeof output.schema.parse === 'function') {
259
+ return output.schema;
260
+ }
261
+ return null;
262
+ })();
263
+ let cleanSchemaForJson = null;
77
264
  if (schemaForJson) {
78
265
  try {
79
- const jsonSchema = zodToJsonSchema(schemaForJson);
80
- // Clean up the schema for Google's format
81
- const cleanSchema = this._cleanSchemaForGoogle(jsonSchema);
82
- generationConfig.responseSchema = cleanSchema;
266
+ const jsonSchema = zodSchemaToJsonSchema(schemaForJson);
267
+ const optimizedSchema = SchemaOptimizer.createGeminiOptimizedSchema(jsonSchema);
268
+ cleanSchemaForJson = this._cleanSchemaForGoogle(optimizedSchema);
83
269
  }
84
- catch (e) {
85
- console.warn('Failed to set responseSchema', e);
270
+ catch {
271
+ cleanSchemaForJson = null;
272
+ }
273
+ }
274
+ if (cleanSchemaForJson && this.supportsStructuredOutput) {
275
+ generationConfig.responseMimeType = 'application/json';
276
+ generationConfig.responseSchema = cleanSchemaForJson;
277
+ }
278
+ const requestContents = contents.map((entry) => ({
279
+ ...entry,
280
+ parts: Array.isArray(entry?.parts)
281
+ ? entry.parts.map((part) => ({ ...part }))
282
+ : entry?.parts,
283
+ }));
284
+ if (output_format && cleanSchemaForJson && !this.supportsStructuredOutput) {
285
+ const jsonInstruction = '\n\nPlease respond with a valid JSON object that matches this schema: ' +
286
+ JSON.stringify(cleanSchemaForJson);
287
+ for (let i = requestContents.length - 1; i >= 0; i -= 1) {
288
+ const content = requestContents[i];
289
+ if (content?.role === 'user' && Array.isArray(content?.parts)) {
290
+ content.parts = [...content.parts, { text: jsonInstruction }];
291
+ break;
292
+ }
86
293
  }
87
294
  }
88
295
  const request = {
89
296
  model: this.model,
90
- contents,
297
+ contents: requestContents,
91
298
  };
92
- if (systemInstruction) {
299
+ if (systemInstruction && !this.includeSystemInUser) {
93
300
  request.systemInstruction = {
94
301
  role: 'system',
95
302
  parts: [{ text: systemInstruction }],
@@ -98,47 +305,45 @@ export class ChatGoogle {
98
305
  if (Object.keys(generationConfig).length > 0) {
99
306
  request.generationConfig = generationConfig;
100
307
  }
101
- const result = await this.client.models.generateContent(request, options.signal ? { signal: options.signal } : undefined);
102
- // Extract text from first candidate
103
- const candidate = result.candidates?.[0];
104
- const textParts = candidate?.content?.parts?.filter((p) => p.text) || [];
105
- const text = textParts.map((p) => p.text).join('');
106
- let completion = text;
107
- if (output_format) {
308
+ for (let attempt = 0; attempt < this.maxRetries; attempt += 1) {
108
309
  try {
310
+ const result = await this.client.models.generateContent(request, options.signal ? { signal: options.signal } : undefined);
311
+ const candidate = result.candidates?.[0];
312
+ const textParts = candidate?.content?.parts?.filter((p) => p.text) || [];
313
+ const text = textParts.map((p) => p.text).join('');
314
+ let completion = text;
315
+ const stopReason = result?.candidates?.[0]?.finishReason ?? null;
109
316
  let parsed = text;
110
- if (generationConfig.responseMimeType === 'application/json') {
111
- let jsonText = text.trim();
112
- // Handle markdown code fences like ```json ... ```
113
- const fencedMatch = jsonText.match(/```(?:json)?\s*([\s\S]*?)```/i);
114
- if (fencedMatch && fencedMatch[1]) {
115
- jsonText = fencedMatch[1].trim();
116
- }
117
- // Try to extract JSON object from text
118
- const firstBrace = jsonText.indexOf('{');
119
- const lastBrace = jsonText.lastIndexOf('}');
120
- if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
121
- jsonText = jsonText.slice(firstBrace, lastBrace + 1);
317
+ if (output_format && schemaForJson) {
318
+ parsed = this._parseStructuredJson(text);
319
+ }
320
+ if (output_format) {
321
+ const output = output_format;
322
+ if (schemaForJson &&
323
+ output &&
324
+ typeof output === 'object' &&
325
+ output.schema &&
326
+ typeof output.schema.parse === 'function') {
327
+ completion = output.schema.parse(parsed);
122
328
  }
123
329
  else {
124
- // If no JSON object found, the model returned plain text
125
- // Try to wrap it in a minimal valid structure
126
- console.warn('Google LLM returned plain text instead of JSON. Raw response:', text.slice(0, 200));
127
- throw new Error(`Expected JSON response but got plain text: "${text.slice(0, 50)}..."`);
330
+ completion = output.parse(parsed);
128
331
  }
129
- parsed = JSON.parse(jsonText);
130
332
  }
131
- completion = output_format.parse(parsed);
333
+ return new ChatInvokeCompletion(completion, this.getUsage(result), null, null, stopReason);
132
334
  }
133
- catch (e) {
134
- console.error('Failed to parse completion', e);
135
- throw e;
335
+ catch (error) {
336
+ const providerError = this._toModelProviderError(error);
337
+ const shouldRetry = this.retryableStatusCodes.includes(providerError.statusCode) &&
338
+ attempt < this.maxRetries - 1;
339
+ if (!shouldRetry) {
340
+ throw providerError;
341
+ }
342
+ const delaySeconds = Math.min(this.retryBaseDelay * 2 ** attempt, this.retryMaxDelay);
343
+ const jitter = Math.random() * delaySeconds * 0.1;
344
+ await this._sleep((delaySeconds + jitter) * 1000);
136
345
  }
137
346
  }
138
- return new ChatInvokeCompletion(completion, {
139
- prompt_tokens: result.usageMetadata?.promptTokenCount ?? 0,
140
- completion_tokens: result.usageMetadata?.candidatesTokenCount ?? 0,
141
- total_tokens: result.usageMetadata?.totalTokenCount ?? 0,
142
- });
347
+ throw new ModelProviderError('Retry loop completed without response', 500, this.model);
143
348
  }
144
349
  }
@@ -1,6 +1,14 @@
1
1
  import type { Content } from '@google/genai';
2
2
  import { type Message } from '../messages.js';
3
+ export interface SerializedGoogleMessages {
4
+ contents: Content[];
5
+ systemInstruction: string | null;
6
+ }
3
7
  export declare class GoogleMessageSerializer {
4
8
  serialize(messages: Message[]): Content[];
5
- private serializeMessage;
9
+ serializeWithSystem(messages: Message[], includeSystemInUser?: boolean): SerializedGoogleMessages;
10
+ private serializeUserMessage;
11
+ private serializeAssistantMessage;
12
+ private serializeImagePart;
13
+ private extractMessageText;
6
14
  }