@browserbasehq/orca 3.0.7-alpha-1 → 3.0.8-google-cua-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -179,7 +179,7 @@ var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")])
179
179
  var STAGEHAND_VERSION;
180
180
  var init_version = __esm({
181
181
  "lib/version.ts"() {
182
- STAGEHAND_VERSION = "3.0.7-alpha-1";
182
+ STAGEHAND_VERSION = "3.0.8-google-cua-1";
183
183
  }
184
184
  });
185
185
 
@@ -201,7 +201,7 @@ var init_sdkErrors = __esm({
201
201
  `
202
202
  Hey! We're sorry you ran into an error.
203
203
  Stagehand version: ${STAGEHAND_VERSION}
204
- If you need help, please open a Github issue or reach out to us on Slack: https://stagehand.dev/slack
204
+ If you need help, please open a Github issue or reach out to us on Discord: https://stagehand.dev/discord
205
205
 
206
206
  Full error:
207
207
  ${error.message}`
@@ -26278,6 +26278,16 @@ var init_page = __esm({
26278
26278
  yield this.waitForMainLoadState(state, timeoutMs != null ? timeoutMs : 15e3);
26279
26279
  });
26280
26280
  }
26281
+ /**
26282
+ * Wait for a specified amount of time.
26283
+ *
26284
+ * @param ms The number of milliseconds to wait.
26285
+ */
26286
+ waitForTimeout(ms) {
26287
+ return __async(this, null, function* () {
26288
+ return new Promise((resolve3) => setTimeout(resolve3, ms));
26289
+ });
26290
+ }
26281
26291
  evaluate(pageFunctionOrExpression, arg) {
26282
26292
  return __async(this, null, function* () {
26283
26293
  var _a4;
@@ -26803,6 +26813,7 @@ var init_page = __esm({
26803
26813
  // Modifier keys
26804
26814
  case "cmd":
26805
26815
  case "command":
26816
+ case "controlormeta":
26806
26817
  return this.isMacOS() ? "Meta" : "Control";
26807
26818
  case "win":
26808
26819
  case "windows":
@@ -27145,6 +27156,7 @@ __export(v3_exports, {
27145
27156
  providerEnvVarMap: () => providerEnvVarMap,
27146
27157
  toGeminiSchema: () => toGeminiSchema,
27147
27158
  toJsonSchema: () => toJsonSchema,
27159
+ tool: () => import_ai24.tool,
27148
27160
  transformSchema: () => transformSchema,
27149
27161
  trimTrailingTextNode: () => trimTrailingTextNode,
27150
27162
  validateZodSchema: () => validateZodSchema
@@ -27718,17 +27730,20 @@ var providerEnvVarMap = {
27718
27730
  xai: "XAI_API_KEY",
27719
27731
  google_legacy: "GOOGLE_API_KEY"
27720
27732
  };
27733
+ var providersWithoutApiKey = /* @__PURE__ */ new Set(["ollama"]);
27721
27734
  function loadApiKeyFromEnv(provider, logger) {
27722
27735
  if (!provider) {
27723
27736
  return void 0;
27724
27737
  }
27725
27738
  const envVarName = providerEnvVarMap[provider];
27726
27739
  if (!envVarName) {
27727
- logger({
27728
- category: "init",
27729
- message: `No known environment variable for provider '${provider}'`,
27730
- level: 0
27731
- });
27740
+ if (!providersWithoutApiKey.has(provider)) {
27741
+ logger({
27742
+ category: "init",
27743
+ message: `No known environment variable for provider '${provider}'`,
27744
+ level: 0
27745
+ });
27746
+ }
27732
27747
  return void 0;
27733
27748
  }
27734
27749
  const apiKeyFromEnv = Array.isArray(envVarName) ? envVarName.map((name) => process.env[name]).find((key) => key && key.length > 0) : process.env[envVarName];
@@ -28117,7 +28132,7 @@ var ActCache = class {
28117
28132
  };
28118
28133
  });
28119
28134
  }
28120
- tryReplay(context, page, timeout) {
28135
+ tryReplay(context, page, timeout, llmClientOverride) {
28121
28136
  return __async(this, null, function* () {
28122
28137
  var _a4;
28123
28138
  if (!this.enabled) return null;
@@ -28170,7 +28185,13 @@ var ActCache = class {
28170
28185
  }
28171
28186
  }
28172
28187
  });
28173
- return yield this.replayCachedActions(context, entry, page, timeout);
28188
+ return yield this.replayCachedActions(
28189
+ context,
28190
+ entry,
28191
+ page,
28192
+ timeout,
28193
+ llmClientOverride
28194
+ );
28174
28195
  });
28175
28196
  }
28176
28197
  store(context, result) {
@@ -28220,12 +28241,13 @@ var ActCache = class {
28220
28241
  });
28221
28242
  return (0, import_crypto.createHash)("sha256").update(payload).digest("hex");
28222
28243
  }
28223
- replayCachedActions(context, entry, page, timeout) {
28244
+ replayCachedActions(context, entry, page, timeout, llmClientOverride) {
28224
28245
  return __async(this, null, function* () {
28225
28246
  const handler = this.getActHandler();
28226
28247
  if (!handler) {
28227
28248
  throw new StagehandNotInitializedError("act()");
28228
28249
  }
28250
+ const effectiveClient = llmClientOverride != null ? llmClientOverride : this.getDefaultLlmClient();
28229
28251
  const execute = () => __async(this, null, function* () {
28230
28252
  var _a4, _b, _c;
28231
28253
  const actionResults = [];
@@ -28234,7 +28256,7 @@ var ActCache = class {
28234
28256
  action,
28235
28257
  page,
28236
28258
  this.domSettleTimeoutMs,
28237
- this.getDefaultLlmClient(),
28259
+ effectiveClient,
28238
28260
  void 0,
28239
28261
  context.variables
28240
28262
  );
@@ -28472,7 +28494,7 @@ var AgentCache = class {
28472
28494
  };
28473
28495
  });
28474
28496
  }
28475
- tryReplay(context) {
28497
+ tryReplay(context, llmClientOverride) {
28476
28498
  return __async(this, null, function* () {
28477
28499
  if (!this.enabled) return null;
28478
28500
  const {
@@ -28505,7 +28527,7 @@ var AgentCache = class {
28505
28527
  url: { value: context.startUrl, type: "string" }
28506
28528
  }
28507
28529
  });
28508
- return yield this.replayAgentCacheEntry(context, entry);
28530
+ return yield this.replayAgentCacheEntry(context, entry, llmClientOverride);
28509
28531
  });
28510
28532
  }
28511
28533
  /**
@@ -28524,9 +28546,9 @@ var AgentCache = class {
28524
28546
  * and await `result` even when the response comes from cache, maintaining
28525
28547
  * API consistency regardless of whether the result was cached or live.
28526
28548
  */
28527
- tryReplayAsStream(context) {
28549
+ tryReplayAsStream(context, llmClientOverride) {
28528
28550
  return __async(this, null, function* () {
28529
- const result = yield this.tryReplay(context);
28551
+ const result = yield this.tryReplay(context, llmClientOverride);
28530
28552
  if (!result) return null;
28531
28553
  return this.createCachedStreamResult(result);
28532
28554
  });
@@ -28752,17 +28774,23 @@ var AgentCache = class {
28752
28774
  }
28753
28775
  return value;
28754
28776
  }
28755
- replayAgentCacheEntry(context, entry) {
28777
+ replayAgentCacheEntry(context, entry, llmClientOverride) {
28756
28778
  return __async(this, null, function* () {
28757
28779
  var _a4, _b, _c;
28758
28780
  const ctx = this.getContext();
28759
28781
  const handler = this.getActHandler();
28760
28782
  if (!ctx || !handler) return null;
28783
+ const effectiveClient = llmClientOverride != null ? llmClientOverride : this.getDefaultLlmClient();
28761
28784
  try {
28762
28785
  const updatedSteps = [];
28763
28786
  let stepsChanged = false;
28764
28787
  for (const step of (_a4 = entry.steps) != null ? _a4 : []) {
28765
- const replayedStep = (_b = yield this.executeAgentReplayStep(step, ctx, handler)) != null ? _b : step;
28788
+ const replayedStep = (_b = yield this.executeAgentReplayStep(
28789
+ step,
28790
+ ctx,
28791
+ handler,
28792
+ effectiveClient
28793
+ )) != null ? _b : step;
28766
28794
  stepsChanged || (stepsChanged = replayedStep !== step);
28767
28795
  updatedSteps.push(replayedStep);
28768
28796
  }
@@ -28795,20 +28823,22 @@ var AgentCache = class {
28795
28823
  }
28796
28824
  });
28797
28825
  }
28798
- executeAgentReplayStep(step, ctx, handler) {
28826
+ executeAgentReplayStep(step, ctx, handler, llmClient) {
28799
28827
  return __async(this, null, function* () {
28800
28828
  switch (step.type) {
28801
28829
  case "act":
28802
28830
  return yield this.replayAgentActStep(
28803
28831
  step,
28804
28832
  ctx,
28805
- handler
28833
+ handler,
28834
+ llmClient
28806
28835
  );
28807
28836
  case "fillForm":
28808
28837
  return yield this.replayAgentFillFormStep(
28809
28838
  step,
28810
28839
  ctx,
28811
- handler
28840
+ handler,
28841
+ llmClient
28812
28842
  );
28813
28843
  case "goto":
28814
28844
  yield this.replayAgentGotoStep(step, ctx);
@@ -28840,7 +28870,7 @@ var AgentCache = class {
28840
28870
  }
28841
28871
  });
28842
28872
  }
28843
- replayAgentActStep(step, ctx, handler) {
28873
+ replayAgentActStep(step, ctx, handler, llmClient) {
28844
28874
  return __async(this, null, function* () {
28845
28875
  const actions = Array.isArray(step.actions) ? step.actions : [];
28846
28876
  if (actions.length > 0) {
@@ -28851,7 +28881,7 @@ var AgentCache = class {
28851
28881
  action,
28852
28882
  page,
28853
28883
  this.domSettleTimeoutMs,
28854
- this.getDefaultLlmClient()
28884
+ llmClient
28855
28885
  );
28856
28886
  if (result.success && Array.isArray(result.actions)) {
28857
28887
  updatedActions.push(...cloneForCache(result.actions));
@@ -28868,7 +28898,7 @@ var AgentCache = class {
28868
28898
  return step;
28869
28899
  });
28870
28900
  }
28871
- replayAgentFillFormStep(step, ctx, handler) {
28901
+ replayAgentFillFormStep(step, ctx, handler, llmClient) {
28872
28902
  return __async(this, null, function* () {
28873
28903
  var _a4;
28874
28904
  const actions = Array.isArray(step.actions) && step.actions.length > 0 ? step.actions : (_a4 = step.observeResults) != null ? _a4 : [];
@@ -28882,7 +28912,7 @@ var AgentCache = class {
28882
28912
  action,
28883
28913
  page,
28884
28914
  this.domSettleTimeoutMs,
28885
- this.getDefaultLlmClient()
28915
+ llmClient
28886
28916
  );
28887
28917
  if (result.success && Array.isArray(result.actions)) {
28888
28918
  updatedActions.push(...cloneForCache(result.actions));
@@ -31115,7 +31145,25 @@ var screenshotTool = (v3) => (0, import_ai3.tool)({
31115
31145
  // lib/v3/agent/tools/wait.ts
31116
31146
  var import_ai4 = require("ai");
31117
31147
  var import_zod8 = require("zod");
31118
- var waitTool = (v3) => (0, import_ai4.tool)({
31148
+
31149
+ // lib/v3/agent/utils/screenshotHandler.ts
31150
+ var DEFAULT_DELAY_MS = 500;
31151
+ function waitAndCaptureScreenshot(_0) {
31152
+ return __async(this, arguments, function* (page, delayMs = DEFAULT_DELAY_MS) {
31153
+ if (delayMs > 0) {
31154
+ yield page.waitForTimeout(delayMs);
31155
+ }
31156
+ try {
31157
+ const buffer = yield page.screenshot({ fullPage: false });
31158
+ return buffer.toString("base64");
31159
+ } catch (e2) {
31160
+ return void 0;
31161
+ }
31162
+ });
31163
+ }
31164
+
31165
+ // lib/v3/agent/tools/wait.ts
31166
+ var waitTool = (v3, mode) => (0, import_ai4.tool)({
31119
31167
  description: "Wait for a specified time",
31120
31168
  inputSchema: import_zod8.z.object({
31121
31169
  timeMs: import_zod8.z.number().describe("Time in milliseconds")
@@ -31136,8 +31184,32 @@ var waitTool = (v3) => (0, import_ai4.tool)({
31136
31184
  if (timeMs > 0) {
31137
31185
  v3.recordAgentReplayStep({ type: "wait", timeMs });
31138
31186
  }
31187
+ if (mode === "hybrid") {
31188
+ const page = yield v3.context.awaitActivePage();
31189
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page, 0);
31190
+ return { success: true, waited: timeMs, screenshotBase64 };
31191
+ }
31139
31192
  return { success: true, waited: timeMs };
31140
- })
31193
+ }),
31194
+ toModelOutput: (result) => {
31195
+ const content = [
31196
+ {
31197
+ type: "text",
31198
+ text: JSON.stringify({
31199
+ success: result.success,
31200
+ waited: result.waited
31201
+ })
31202
+ }
31203
+ ];
31204
+ if (result.screenshotBase64) {
31205
+ content.push({
31206
+ type: "media",
31207
+ mediaType: "image/png",
31208
+ data: result.screenshotBase64
31209
+ });
31210
+ }
31211
+ return { type: "content", value: content };
31212
+ }
31141
31213
  });
31142
31214
 
31143
31215
  // lib/v3/agent/tools/navback.ts
@@ -31164,26 +31236,12 @@ var navBackTool = (v3) => (0, import_ai5.tool)({
31164
31236
  })
31165
31237
  });
31166
31238
 
31167
- // lib/v3/agent/tools/close.ts
31239
+ // lib/v3/agent/tools/ariaTree.ts
31168
31240
  var import_ai6 = require("ai");
31169
31241
  var import_zod10 = require("zod");
31170
- var closeTool = () => (0, import_ai6.tool)({
31171
- description: "Complete the task and close",
31172
- inputSchema: import_zod10.z.object({
31173
- reasoning: import_zod10.z.string().describe("Summary of what was accomplished"),
31174
- taskComplete: import_zod10.z.boolean().describe("Whether the task was completed successfully")
31175
- }),
31176
- execute: (_0) => __async(null, [_0], function* ({ reasoning, taskComplete }) {
31177
- return { success: true, reasoning, taskComplete };
31178
- })
31179
- });
31180
-
31181
- // lib/v3/agent/tools/ariaTree.ts
31182
- var import_ai7 = require("ai");
31183
- var import_zod11 = require("zod");
31184
- var ariaTreeTool = (v3) => (0, import_ai7.tool)({
31242
+ var ariaTreeTool = (v3) => (0, import_ai6.tool)({
31185
31243
  description: "gets the accessibility (ARIA) hybrid tree text for the current page. use this to understand structure and content.",
31186
- inputSchema: import_zod11.z.object({}),
31244
+ inputSchema: import_zod10.z.object({}),
31187
31245
  execute: () => __async(null, null, function* () {
31188
31246
  v3.logger({
31189
31247
  category: "agent",
@@ -31210,18 +31268,18 @@ ${result.content}` }]
31210
31268
  });
31211
31269
 
31212
31270
  // lib/v3/agent/tools/fillform.ts
31213
- var import_ai8 = require("ai");
31214
- var import_zod12 = require("zod");
31215
- var fillFormTool = (v3, executionModel) => (0, import_ai8.tool)({
31271
+ var import_ai7 = require("ai");
31272
+ var import_zod11 = require("zod");
31273
+ var fillFormTool = (v3, executionModel) => (0, import_ai7.tool)({
31216
31274
  description: `\u{1F4DD} FORM FILL - MULTI-FIELD INPUT TOOL
31217
31275
  For any form with 2+ inputs/textareas. Faster than individual typing.`,
31218
- inputSchema: import_zod12.z.object({
31219
- fields: import_zod12.z.array(
31220
- import_zod12.z.object({
31221
- action: import_zod12.z.string().describe(
31276
+ inputSchema: import_zod11.z.object({
31277
+ fields: import_zod11.z.array(
31278
+ import_zod11.z.object({
31279
+ action: import_zod11.z.string().describe(
31222
31280
  'Description of typing action, e.g. "type foo into the email field"'
31223
31281
  ),
31224
- value: import_zod12.z.string().describe("Text to type into the target")
31282
+ value: import_zod11.z.string().describe("Text to type into the target")
31225
31283
  })
31226
31284
  ).min(1, "Provide at least one field to fill")
31227
31285
  }),
@@ -31264,8 +31322,8 @@ For any form with 2+ inputs/textareas. Faster than individual typing.`,
31264
31322
  });
31265
31323
 
31266
31324
  // lib/v3/agent/tools/scroll.ts
31267
- var import_ai9 = require("ai");
31268
- var import_zod13 = require("zod");
31325
+ var import_ai8 = require("ai");
31326
+ var import_zod12 = require("zod");
31269
31327
 
31270
31328
  // lib/v3/agent/utils/coordinateNormalization.ts
31271
31329
  var DEFAULT_VIEWPORT = { width: 1288, height: 711 };
@@ -31289,11 +31347,11 @@ function processCoordinates(x2, y, provider) {
31289
31347
  }
31290
31348
 
31291
31349
  // lib/v3/agent/tools/scroll.ts
31292
- var scrollTool = (v3) => (0, import_ai9.tool)({
31350
+ var scrollTool = (v3) => (0, import_ai8.tool)({
31293
31351
  description: "Scroll the page up or down by a percentage of the viewport height. Default is 80%, and what should be typically used for general page scrolling",
31294
- inputSchema: import_zod13.z.object({
31295
- direction: import_zod13.z.enum(["up", "down"]),
31296
- percentage: import_zod13.z.number().min(1).max(200).optional()
31352
+ inputSchema: import_zod12.z.object({
31353
+ direction: import_zod12.z.enum(["up", "down"]),
31354
+ percentage: import_zod12.z.number().min(1).max(200).optional()
31297
31355
  }),
31298
31356
  execute: (_0) => __async(null, [_0], function* ({ direction, percentage = 80 }) {
31299
31357
  v3.logger({
@@ -31327,16 +31385,20 @@ var scrollTool = (v3) => (0, import_ai9.tool)({
31327
31385
  };
31328
31386
  })
31329
31387
  });
31330
- var scrollVisionTool = (v3, provider) => (0, import_ai9.tool)({
31388
+ var scrollVisionTool = (v3, provider) => (0, import_ai8.tool)({
31331
31389
  description: `Scroll the page up or down. For general page scrolling, no coordinates needed. Only provide coordinates when scrolling inside a nested scrollable element (e.g., a dropdown menu, modal with overflow, or scrollable sidebar). Default is 80%, and what should be typically used for general page scrolling`,
31332
- inputSchema: import_zod13.z.object({
31333
- direction: import_zod13.z.enum(["up", "down"]),
31334
- coordinates: import_zod13.z.array(import_zod13.z.number()).optional().describe(
31390
+ inputSchema: import_zod12.z.object({
31391
+ direction: import_zod12.z.enum(["up", "down"]),
31392
+ coordinates: import_zod12.z.array(import_zod12.z.number()).optional().describe(
31335
31393
  "Only use coordinates for scrolling inside a nested scrollable element - provide (x, y) within that element"
31336
31394
  ),
31337
- percentage: import_zod13.z.number().min(1).max(200).optional()
31395
+ percentage: import_zod12.z.number().min(1).max(200).optional()
31338
31396
  }),
31339
- execute: (_0) => __async(null, [_0], function* ({ direction, coordinates, percentage = 80 }) {
31397
+ execute: (_0) => __async(null, [_0], function* ({
31398
+ direction,
31399
+ coordinates,
31400
+ percentage = 80
31401
+ }) {
31340
31402
  const page = yield v3.context.awaitActivePage();
31341
31403
  const { w, h: h2 } = yield page.mainFrame().evaluate("({ w: window.innerWidth, h: window.innerHeight })");
31342
31404
  let cx;
@@ -31372,6 +31434,7 @@ var scrollVisionTool = (v3, provider) => (0, import_ai9.tool)({
31372
31434
  const scrollDistance = Math.round(h2 * percentage / 100);
31373
31435
  const deltaY = direction === "up" ? -scrollDistance : scrollDistance;
31374
31436
  yield page.scroll(cx, cy, 0, deltaY);
31437
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page, 100);
31375
31438
  v3.recordAgentReplayStep({
31376
31439
  type: "scroll",
31377
31440
  deltaX: 0,
@@ -31381,29 +31444,50 @@ var scrollVisionTool = (v3, provider) => (0, import_ai9.tool)({
31381
31444
  return {
31382
31445
  success: true,
31383
31446
  message: coordinates ? `Scrolled ${percentage}% ${direction} at (${cx}, ${cy})` : `Scrolled ${percentage}% ${direction}`,
31384
- scrolledPixels: scrollDistance
31447
+ scrolledPixels: scrollDistance,
31448
+ screenshotBase64
31385
31449
  };
31386
- })
31450
+ }),
31451
+ toModelOutput: (result) => {
31452
+ const content = [
31453
+ {
31454
+ type: "text",
31455
+ text: JSON.stringify({
31456
+ success: result.success,
31457
+ message: result.message,
31458
+ scrolledPixels: result.scrolledPixels
31459
+ })
31460
+ }
31461
+ ];
31462
+ if (result.screenshotBase64) {
31463
+ content.push({
31464
+ type: "media",
31465
+ mediaType: "image/png",
31466
+ data: result.screenshotBase64
31467
+ });
31468
+ }
31469
+ return { type: "content", value: content };
31470
+ }
31387
31471
  });
31388
31472
 
31389
31473
  // lib/v3/agent/tools/extract.ts
31390
- var import_ai10 = require("ai");
31391
- var import_zod14 = require("zod");
31474
+ var import_ai9 = require("ai");
31475
+ var import_zod13 = require("zod");
31392
31476
  function evaluateZodSchema(schemaStr, logger) {
31393
31477
  var _a4;
31394
31478
  try {
31395
31479
  const fn = new Function("z", `return ${schemaStr}`);
31396
- return fn(import_zod14.z);
31480
+ return fn(import_zod13.z);
31397
31481
  } catch (e2) {
31398
31482
  logger == null ? void 0 : logger({
31399
31483
  category: "agent",
31400
31484
  message: `Failed to evaluate schema: ${(_a4 = e2 == null ? void 0 : e2.message) != null ? _a4 : String(e2)}`,
31401
31485
  level: 0
31402
31486
  });
31403
- return import_zod14.z.any();
31487
+ return import_zod13.z.any();
31404
31488
  }
31405
31489
  }
31406
- var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31490
+ var extractTool = (v3, executionModel, logger) => (0, import_ai9.tool)({
31407
31491
  description: `Extract structured data from the current page based on a provided schema.
31408
31492
 
31409
31493
  USAGE GUIDELINES:
@@ -31422,9 +31506,9 @@ var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31422
31506
  3. Extract arrays:
31423
31507
  instruction: "extract all product names and prices"
31424
31508
  schema: "z.object({ products: z.array(z.object({ name: z.string(), price: z.number() })) })"`,
31425
- inputSchema: import_zod14.z.object({
31426
- instruction: import_zod14.z.string(),
31427
- schema: import_zod14.z.string().optional().describe("Zod schema as code, e.g. z.object({ title: z.string() })")
31509
+ inputSchema: import_zod13.z.object({
31510
+ instruction: import_zod13.z.string(),
31511
+ schema: import_zod13.z.string().optional().describe("Zod schema as code, e.g. z.object({ title: z.string() })")
31428
31512
  }),
31429
31513
  execute: (_0) => __async(null, [_0], function* ({ instruction, schema }) {
31430
31514
  var _a4;
@@ -31439,8 +31523,8 @@ var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31439
31523
  });
31440
31524
 
31441
31525
  // lib/v3/agent/tools/click.ts
31442
- var import_ai11 = require("ai");
31443
- var import_zod15 = require("zod");
31526
+ var import_ai10 = require("ai");
31527
+ var import_zod14 = require("zod");
31444
31528
 
31445
31529
  // lib/v3/agent/utils/xpath.ts
31446
31530
  function ensureXPath(value) {
@@ -31451,16 +31535,13 @@ function ensureXPath(value) {
31451
31535
  }
31452
31536
 
31453
31537
  // lib/v3/agent/tools/click.ts
31454
- function waitForTimeout(ms) {
31455
- return new Promise((resolve3) => setTimeout(resolve3, ms));
31456
- }
31457
- var clickTool = (v3, provider) => (0, import_ai11.tool)({
31538
+ var clickTool = (v3, provider) => (0, import_ai10.tool)({
31458
31539
  description: "Click on an element using its coordinates (this is the most reliable way to click on an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
31459
- inputSchema: import_zod15.z.object({
31460
- describe: import_zod15.z.string().describe(
31540
+ inputSchema: import_zod14.z.object({
31541
+ describe: import_zod14.z.string().describe(
31461
31542
  "Describe the element to click on in a short, specific phrase that mentions the element type and a good visual description"
31462
31543
  ),
31463
- coordinates: import_zod15.z.array(import_zod15.z.number()).describe("The (x, y) coordinates to click on")
31544
+ coordinates: import_zod14.z.array(import_zod14.z.number()).describe("The (x, y) coordinates to click on")
31464
31545
  }),
31465
31546
  execute: (_0) => __async(null, [_0], function* ({ describe, coordinates }) {
31466
31547
  try {
@@ -31476,36 +31557,38 @@ var clickTool = (v3, provider) => (0, import_ai11.tool)({
31476
31557
  level: 1,
31477
31558
  auxiliary: {
31478
31559
  arguments: {
31479
- value: JSON.stringify({ describe, coordinates, processed }),
31480
- type: "string"
31560
+ value: JSON.stringify({ describe }),
31561
+ type: "object"
31481
31562
  }
31482
31563
  }
31483
31564
  });
31565
+ const shouldCollectXpath = v3.isAgentReplayActive();
31484
31566
  const xpath = yield page.click(processed.x, processed.y, {
31485
- returnXpath: true
31567
+ returnXpath: shouldCollectXpath
31486
31568
  });
31487
- if (isGoogleProvider(provider)) {
31488
- yield waitForTimeout(1e3);
31489
- }
31490
- const normalizedXpath = ensureXPath(xpath);
31491
- if (normalizedXpath) {
31492
- const action = {
31493
- selector: normalizedXpath,
31494
- description: describe,
31495
- method: "click",
31496
- arguments: []
31497
- };
31498
- v3.recordAgentReplayStep({
31499
- type: "act",
31500
- instruction: describe,
31501
- actions: [action],
31502
- actionDescription: describe
31503
- });
31569
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page);
31570
+ if (shouldCollectXpath) {
31571
+ const normalizedXpath = ensureXPath(xpath);
31572
+ if (normalizedXpath) {
31573
+ const action = {
31574
+ selector: normalizedXpath,
31575
+ description: describe,
31576
+ method: "click",
31577
+ arguments: []
31578
+ };
31579
+ v3.recordAgentReplayStep({
31580
+ type: "act",
31581
+ instruction: describe,
31582
+ actions: [action],
31583
+ actionDescription: describe
31584
+ });
31585
+ }
31504
31586
  }
31505
31587
  return {
31506
31588
  success: true,
31507
31589
  describe,
31508
- coordinates: [processed.x, processed.y]
31590
+ coordinates: [processed.x, processed.y],
31591
+ screenshotBase64
31509
31592
  };
31510
31593
  } catch (error) {
31511
31594
  return {
@@ -31513,25 +31596,60 @@ var clickTool = (v3, provider) => (0, import_ai11.tool)({
31513
31596
  error: `Error clicking: ${error.message}`
31514
31597
  };
31515
31598
  }
31516
- })
31599
+ }),
31600
+ toModelOutput: (result) => {
31601
+ if (result.success) {
31602
+ const content = [
31603
+ {
31604
+ type: "text",
31605
+ text: JSON.stringify({
31606
+ success: result.success,
31607
+ describe: result.describe,
31608
+ coordinates: result.coordinates
31609
+ })
31610
+ }
31611
+ ];
31612
+ if (result.screenshotBase64) {
31613
+ content.push({
31614
+ type: "media",
31615
+ mediaType: "image/png",
31616
+ data: result.screenshotBase64
31617
+ });
31618
+ }
31619
+ return { type: "content", value: content };
31620
+ }
31621
+ return {
31622
+ type: "content",
31623
+ value: [
31624
+ {
31625
+ type: "text",
31626
+ text: JSON.stringify({
31627
+ success: result.success,
31628
+ error: result.error
31629
+ })
31630
+ }
31631
+ ]
31632
+ };
31633
+ }
31517
31634
  });
31518
31635
 
31519
31636
  // lib/v3/agent/tools/type.ts
31520
- var import_ai12 = require("ai");
31521
- var import_zod16 = require("zod");
31522
- function waitForTimeout2(ms) {
31523
- return new Promise((resolve3) => setTimeout(resolve3, ms));
31524
- }
31525
- var typeTool = (v3, provider) => (0, import_ai12.tool)({
31637
+ var import_ai11 = require("ai");
31638
+ var import_zod15 = require("zod");
31639
+ var typeTool = (v3, provider) => (0, import_ai11.tool)({
31526
31640
  description: "Type text into an element using its coordinates. This will click the element and then type the text into it (this is the most reliable way to type into an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
31527
- inputSchema: import_zod16.z.object({
31528
- describe: import_zod16.z.string().describe(
31641
+ inputSchema: import_zod15.z.object({
31642
+ describe: import_zod15.z.string().describe(
31529
31643
  "Describe the element to type into in a short, specific phrase that mentions the element type and a good visual description"
31530
31644
  ),
31531
- text: import_zod16.z.string().describe("The text to type into the element"),
31532
- coordinates: import_zod16.z.array(import_zod16.z.number()).describe("The (x, y) coordinates to type into the element")
31645
+ text: import_zod15.z.string().describe("The text to type into the element"),
31646
+ coordinates: import_zod15.z.array(import_zod15.z.number()).describe("The (x, y) coordinates to type into the element")
31533
31647
  }),
31534
- execute: (_0) => __async(null, [_0], function* ({ describe, coordinates, text }) {
31648
+ execute: (_0) => __async(null, [_0], function* ({
31649
+ describe,
31650
+ coordinates,
31651
+ text
31652
+ }) {
31535
31653
  try {
31536
31654
  const page = yield v3.context.awaitActivePage();
31537
31655
  const processed = processCoordinates(
@@ -31545,54 +31663,98 @@ var typeTool = (v3, provider) => (0, import_ai12.tool)({
31545
31663
  level: 1,
31546
31664
  auxiliary: {
31547
31665
  arguments: {
31548
- value: JSON.stringify({ describe, coordinates, processed, text }),
31549
- type: "string"
31666
+ value: JSON.stringify({ describe, text }),
31667
+ type: "object"
31550
31668
  }
31551
31669
  }
31552
31670
  });
31671
+ const shouldCollectXpath = v3.isAgentReplayActive();
31553
31672
  const xpath = yield page.click(processed.x, processed.y, {
31554
- returnXpath: true
31673
+ returnXpath: shouldCollectXpath
31555
31674
  });
31556
- if (isGoogleProvider(provider)) {
31557
- yield waitForTimeout2(1e3);
31558
- }
31559
31675
  yield page.type(text);
31560
- const normalizedXpath = ensureXPath(xpath);
31561
- if (normalizedXpath) {
31562
- const action = {
31563
- selector: normalizedXpath,
31564
- description: describe,
31565
- method: "type",
31566
- arguments: [text]
31567
- };
31568
- v3.recordAgentReplayStep({
31569
- type: "act",
31570
- instruction: describe,
31571
- actions: [action],
31572
- actionDescription: describe
31573
- });
31676
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page);
31677
+ if (shouldCollectXpath) {
31678
+ const normalizedXpath = ensureXPath(xpath);
31679
+ if (normalizedXpath) {
31680
+ const action = {
31681
+ selector: normalizedXpath,
31682
+ description: describe,
31683
+ method: "type",
31684
+ arguments: [text]
31685
+ };
31686
+ v3.recordAgentReplayStep({
31687
+ type: "act",
31688
+ instruction: describe,
31689
+ actions: [action],
31690
+ actionDescription: describe
31691
+ });
31692
+ }
31574
31693
  }
31575
- return { success: true, describe, text };
31694
+ return {
31695
+ success: true,
31696
+ describe,
31697
+ text,
31698
+ screenshotBase64
31699
+ };
31576
31700
  } catch (error) {
31577
31701
  return {
31578
31702
  success: false,
31579
31703
  error: `Error typing: ${error.message}`
31580
31704
  };
31581
31705
  }
31582
- })
31706
+ }),
31707
+ toModelOutput: (result) => {
31708
+ if (result.success) {
31709
+ const content = [
31710
+ {
31711
+ type: "text",
31712
+ text: JSON.stringify({
31713
+ success: result.success,
31714
+ describe: result.describe,
31715
+ text: result.text
31716
+ })
31717
+ }
31718
+ ];
31719
+ if (result.screenshotBase64) {
31720
+ content.push({
31721
+ type: "media",
31722
+ mediaType: "image/png",
31723
+ data: result.screenshotBase64
31724
+ });
31725
+ }
31726
+ return { type: "content", value: content };
31727
+ }
31728
+ return {
31729
+ type: "content",
31730
+ value: [
31731
+ {
31732
+ type: "text",
31733
+ text: JSON.stringify({
31734
+ success: result.success,
31735
+ error: result.error
31736
+ })
31737
+ }
31738
+ ]
31739
+ };
31740
+ }
31583
31741
  });
31584
31742
 
31585
31743
  // lib/v3/agent/tools/dragAndDrop.ts
31586
- var import_ai13 = require("ai");
31587
- var import_zod17 = require("zod");
31588
- var dragAndDropTool = (v3, provider) => (0, import_ai13.tool)({
31744
+ var import_ai12 = require("ai");
31745
+ var import_zod16 = require("zod");
31746
+ var dragAndDropTool = (v3, provider) => (0, import_ai12.tool)({
31589
31747
  description: "Drag and drop an element using its coordinates (this is the most reliable way to drag and drop an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
31590
- inputSchema: import_zod17.z.object({
31591
- describe: import_zod17.z.string().describe("Describe the element to drag and drop"),
31592
- startCoordinates: import_zod17.z.array(import_zod17.z.number()).describe("The (x, y) coordinates to start the drag and drop from"),
31593
- endCoordinates: import_zod17.z.array(import_zod17.z.number()).describe("The (x, y) coordinates to end the drag and drop at")
31748
+ inputSchema: import_zod16.z.object({
31749
+ describe: import_zod16.z.string().describe("Describe the element to drag and drop"),
31750
+ startCoordinates: import_zod16.z.array(import_zod16.z.number()).describe("The (x, y) coordinates to start the drag and drop from"),
31751
+ endCoordinates: import_zod16.z.array(import_zod16.z.number()).describe("The (x, y) coordinates to end the drag and drop at")
31594
31752
  }),
31595
- execute: (_0) => __async(null, [_0], function* ({ describe, startCoordinates, endCoordinates }) {
31753
+ execute: (_0) => __async(null, [_0], function* ({
31754
+ describe,
31755
+ startCoordinates,
31756
+ endCoordinates
31757
+ }) {
31596
31758
  try {
31597
31759
  const page = yield v3.context.awaitActivePage();
31598
31760
  const processedStart = processCoordinates(
@@ -31612,60 +31774,97 @@ var dragAndDropTool = (v3, provider) => (0, import_ai13.tool)({
31612
31774
  auxiliary: {
31613
31775
  arguments: {
31614
31776
  value: JSON.stringify({
31615
- describe,
31616
- startCoordinates,
31617
- endCoordinates,
31618
- processedStart,
31619
- processedEnd
31777
+ describe
31620
31778
  }),
31621
- type: "string"
31779
+ type: "object"
31622
31780
  }
31623
31781
  }
31624
31782
  });
31783
+ const shouldCollectXpath = v3.isAgentReplayActive();
31625
31784
  const [fromXpath, toXpath] = yield page.dragAndDrop(
31626
31785
  processedStart.x,
31627
31786
  processedStart.y,
31628
31787
  processedEnd.x,
31629
31788
  processedEnd.y,
31630
- { returnXpath: true }
31789
+ { returnXpath: shouldCollectXpath }
31631
31790
  );
31632
- const normalizedFrom = ensureXPath(fromXpath);
31633
- const normalizedTo = ensureXPath(toXpath);
31634
- if (normalizedFrom && normalizedTo) {
31635
- const action = {
31636
- selector: normalizedFrom,
31637
- description: describe,
31638
- method: "dragAndDrop",
31639
- arguments: [normalizedTo]
31640
- };
31641
- v3.recordAgentReplayStep({
31642
- type: "act",
31643
- instruction: describe,
31644
- actions: [action],
31645
- actionDescription: describe
31646
- });
31791
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page);
31792
+ if (shouldCollectXpath) {
31793
+ const normalizedFrom = ensureXPath(fromXpath);
31794
+ const normalizedTo = ensureXPath(toXpath);
31795
+ if (normalizedFrom && normalizedTo) {
31796
+ const action = {
31797
+ selector: normalizedFrom,
31798
+ description: describe,
31799
+ method: "dragAndDrop",
31800
+ arguments: [normalizedTo]
31801
+ };
31802
+ v3.recordAgentReplayStep({
31803
+ type: "act",
31804
+ instruction: describe,
31805
+ actions: [action],
31806
+ actionDescription: describe
31807
+ });
31808
+ }
31647
31809
  }
31648
- return { success: true, describe };
31810
+ return {
31811
+ success: true,
31812
+ describe,
31813
+ screenshotBase64
31814
+ };
31649
31815
  } catch (error) {
31650
31816
  return {
31651
31817
  success: false,
31652
31818
  error: `Error dragging: ${error.message}`
31653
31819
  };
31654
31820
  }
31655
- })
31821
+ }),
31822
+ toModelOutput: (result) => {
31823
+ if (result.success) {
31824
+ const content = [
31825
+ {
31826
+ type: "text",
31827
+ text: JSON.stringify({
31828
+ success: result.success,
31829
+ describe: result.describe
31830
+ })
31831
+ }
31832
+ ];
31833
+ if (result.screenshotBase64) {
31834
+ content.push({
31835
+ type: "media",
31836
+ mediaType: "image/png",
31837
+ data: result.screenshotBase64
31838
+ });
31839
+ }
31840
+ return { type: "content", value: content };
31841
+ }
31842
+ return {
31843
+ type: "content",
31844
+ value: [
31845
+ {
31846
+ type: "text",
31847
+ text: JSON.stringify({
31848
+ success: result.success,
31849
+ error: result.error
31850
+ })
31851
+ }
31852
+ ]
31853
+ };
31854
+ }
31656
31855
  });
31657
31856
 
31658
31857
  // lib/v3/agent/tools/clickAndHold.ts
31659
- var import_ai14 = require("ai");
31660
- var import_zod18 = require("zod");
31661
- var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31858
+ var import_ai13 = require("ai");
31859
+ var import_zod17 = require("zod");
31860
+ var clickAndHoldTool = (v3, provider) => (0, import_ai13.tool)({
31662
31861
  description: "Click and hold on an element using its coordinates",
31663
- inputSchema: import_zod18.z.object({
31664
- describe: import_zod18.z.string().describe(
31862
+ inputSchema: import_zod17.z.object({
31863
+ describe: import_zod17.z.string().describe(
31665
31864
  "Describe the element to click on in a short, specific phrase that mentions the element type and a good visual description"
31666
31865
  ),
31667
- duration: import_zod18.z.number().describe("The duration to hold the element in milliseconds"),
31668
- coordinates: import_zod18.z.array(import_zod18.z.number()).describe("The (x, y) coordinates to click on")
31866
+ duration: import_zod17.z.number().describe("The duration to hold the element in milliseconds"),
31867
+ coordinates: import_zod17.z.array(import_zod17.z.number()).describe("The (x, y) coordinates to click on")
31669
31868
  }),
31670
31869
  execute: (_0) => __async(null, [_0], function* ({ describe, coordinates, duration }) {
31671
31870
  try {
@@ -31683,35 +31882,36 @@ var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31683
31882
  arguments: {
31684
31883
  value: JSON.stringify({
31685
31884
  describe,
31686
- coordinates,
31687
- processed,
31688
31885
  duration
31689
31886
  }),
31690
- type: "string"
31887
+ type: "object"
31691
31888
  }
31692
31889
  }
31693
31890
  });
31891
+ const shouldCollectXpath = v3.isAgentReplayActive();
31694
31892
  const [xpath] = yield page.dragAndDrop(
31695
31893
  processed.x,
31696
31894
  processed.y,
31697
31895
  processed.x,
31698
31896
  processed.y,
31699
- { delay: duration, returnXpath: true }
31897
+ { delay: duration, returnXpath: shouldCollectXpath }
31700
31898
  );
31701
- const normalizedXpath = ensureXPath(xpath);
31702
- if (normalizedXpath) {
31703
- const action = {
31704
- selector: normalizedXpath,
31705
- description: describe,
31706
- method: "clickAndHold",
31707
- arguments: [String(duration)]
31708
- };
31709
- v3.recordAgentReplayStep({
31710
- type: "act",
31711
- instruction: describe,
31712
- actions: [action],
31713
- actionDescription: describe
31714
- });
31899
+ if (shouldCollectXpath) {
31900
+ const normalizedXpath = ensureXPath(xpath);
31901
+ if (normalizedXpath) {
31902
+ const action = {
31903
+ selector: normalizedXpath,
31904
+ description: describe,
31905
+ method: "clickAndHold",
31906
+ arguments: [String(duration)]
31907
+ };
31908
+ v3.recordAgentReplayStep({
31909
+ type: "act",
31910
+ instruction: describe,
31911
+ actions: [action],
31912
+ actionDescription: describe
31913
+ });
31914
+ }
31715
31915
  }
31716
31916
  return { success: true, describe };
31717
31917
  } catch (error) {
@@ -31724,20 +31924,20 @@ var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31724
31924
  });
31725
31925
 
31726
31926
  // lib/v3/agent/tools/keys.ts
31727
- var import_ai15 = require("ai");
31728
- var import_zod19 = require("zod");
31729
- var keysTool = (v3) => (0, import_ai15.tool)({
31927
+ var import_ai14 = require("ai");
31928
+ var import_zod18 = require("zod");
31929
+ var keysTool = (v3) => (0, import_ai14.tool)({
31730
31930
  description: `Send keyboard input to the page without targeting a specific element. Unlike the type tool which clicks then types into coordinates, this sends keystrokes directly to wherever focus currently is.
31731
31931
 
31732
31932
  Use method="type" to enter text into the currently focused element. Preferred when: input is already focused, text needs to flow across multiple fields (e.g., verification codes)
31733
31933
 
31734
31934
  Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) and keyboard shortcuts (Cmd+A, Ctrl+C, Shift+Tab).`,
31735
- inputSchema: import_zod19.z.object({
31736
- method: import_zod19.z.enum(["press", "type"]),
31737
- value: import_zod19.z.string().describe(
31935
+ inputSchema: import_zod18.z.object({
31936
+ method: import_zod18.z.enum(["press", "type"]),
31937
+ value: import_zod18.z.string().describe(
31738
31938
  "The text to type, or the key/combo to press (Enter, Tab, Cmd+A)"
31739
31939
  ),
31740
- repeat: import_zod19.z.number().optional()
31940
+ repeat: import_zod18.z.number().optional()
31741
31941
  }),
31742
31942
  execute: (_0) => __async(null, [_0], function* ({ method, value, repeat }) {
31743
31943
  try {
@@ -31749,7 +31949,7 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a
31749
31949
  auxiliary: {
31750
31950
  arguments: {
31751
31951
  value: JSON.stringify({ method, value, repeat }),
31752
- type: "string"
31952
+ type: "object"
31753
31953
  }
31754
31954
  }
31755
31955
  });
@@ -31784,9 +31984,9 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a
31784
31984
  });
31785
31985
 
31786
31986
  // lib/v3/agent/tools/fillFormVision.ts
31787
- var import_ai16 = require("ai");
31788
- var import_zod20 = require("zod");
31789
- var fillFormVisionTool = (v3, provider) => (0, import_ai16.tool)({
31987
+ var import_ai15 = require("ai");
31988
+ var import_zod19 = require("zod");
31989
+ var fillFormVisionTool = (v3, provider) => (0, import_ai15.tool)({
31790
31990
  description: `FORM FILL - SPECIALIZED MULTI-FIELD INPUT TOOL
31791
31991
 
31792
31992
  CRITICAL: Use this for ANY form with 2+ input fields (text inputs, textareas, etc.)
@@ -31804,16 +32004,16 @@ MANDATORY USE CASES (always use fillFormVision for these):
31804
32004
  - Checkout forms: address, payment info fields
31805
32005
  - Profile updates: multiple user data fields
31806
32006
  - Search filters: multiple criteria inputs`,
31807
- inputSchema: import_zod20.z.object({
31808
- fields: import_zod20.z.array(
31809
- import_zod20.z.object({
31810
- action: import_zod20.z.string().describe(
32007
+ inputSchema: import_zod19.z.object({
32008
+ fields: import_zod19.z.array(
32009
+ import_zod19.z.object({
32010
+ action: import_zod19.z.string().describe(
31811
32011
  "Description of the typing action, e.g. 'type foo into the bar field'"
31812
32012
  ),
31813
- value: import_zod20.z.string().describe("Text to type into the target field"),
31814
- coordinates: import_zod20.z.object({
31815
- x: import_zod20.z.number(),
31816
- y: import_zod20.z.number()
32013
+ value: import_zod19.z.string().describe("Text to type into the target field"),
32014
+ coordinates: import_zod19.z.object({
32015
+ x: import_zod19.z.number(),
32016
+ y: import_zod19.z.number()
31817
32017
  }).describe("Coordinates of the target field")
31818
32018
  })
31819
32019
  ).min(2, "Provide at least two fields to fill")
@@ -31838,32 +32038,36 @@ MANDATORY USE CASES (always use fillFormVision for these):
31838
32038
  auxiliary: {
31839
32039
  arguments: {
31840
32040
  value: JSON.stringify({ fields, processedFields }),
31841
- type: "string"
32041
+ type: "object"
31842
32042
  }
31843
32043
  }
31844
32044
  });
32045
+ const shouldCollectXpath = v3.isAgentReplayActive();
31845
32046
  const actions = [];
31846
32047
  for (const field of processedFields) {
31847
32048
  const xpath = yield page.click(
31848
32049
  field.coordinates.x,
31849
32050
  field.coordinates.y,
31850
32051
  {
31851
- returnXpath: true
32052
+ returnXpath: shouldCollectXpath
31852
32053
  }
31853
32054
  );
31854
32055
  yield page.type(field.value);
31855
- const normalizedXpath = ensureXPath(xpath);
31856
- if (normalizedXpath) {
31857
- actions.push({
31858
- selector: normalizedXpath,
31859
- description: field.action,
31860
- method: "type",
31861
- arguments: [field.value]
31862
- });
32056
+ if (shouldCollectXpath) {
32057
+ const normalizedXpath = ensureXPath(xpath);
32058
+ if (normalizedXpath) {
32059
+ actions.push({
32060
+ selector: normalizedXpath,
32061
+ description: field.action,
32062
+ method: "type",
32063
+ arguments: [field.value]
32064
+ });
32065
+ }
31863
32066
  }
31864
32067
  yield new Promise((resolve3) => setTimeout(resolve3, 100));
31865
32068
  }
31866
- if (actions.length > 0) {
32069
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page, 100);
32070
+ if (shouldCollectXpath && actions.length > 0) {
31867
32071
  v3.recordAgentReplayStep({
31868
32072
  type: "act",
31869
32073
  instruction: `Fill ${fields.length} form fields`,
@@ -31873,7 +32077,8 @@ MANDATORY USE CASES (always use fillFormVision for these):
31873
32077
  }
31874
32078
  return {
31875
32079
  success: true,
31876
- playwrightArguments: processedFields
32080
+ playwrightArguments: processedFields,
32081
+ screenshotBase64
31877
32082
  };
31878
32083
  } catch (error) {
31879
32084
  return {
@@ -31881,13 +32086,47 @@ MANDATORY USE CASES (always use fillFormVision for these):
31881
32086
  error: `Error filling form: ${error.message}`
31882
32087
  };
31883
32088
  }
31884
- })
32089
+ }),
32090
+ toModelOutput: (result) => {
32091
+ var _a4, _b;
32092
+ if (result.success) {
32093
+ const content = [
32094
+ {
32095
+ type: "text",
32096
+ text: JSON.stringify({
32097
+ success: result.success,
32098
+ fieldsCount: (_b = (_a4 = result.playwrightArguments) == null ? void 0 : _a4.length) != null ? _b : 0
32099
+ })
32100
+ }
32101
+ ];
32102
+ if (result.screenshotBase64) {
32103
+ content.push({
32104
+ type: "media",
32105
+ mediaType: "image/png",
32106
+ data: result.screenshotBase64
32107
+ });
32108
+ }
32109
+ return { type: "content", value: content };
32110
+ }
32111
+ return {
32112
+ type: "content",
32113
+ value: [
32114
+ {
32115
+ type: "text",
32116
+ text: JSON.stringify({
32117
+ success: result.success,
32118
+ error: result.error
32119
+ })
32120
+ }
32121
+ ]
32122
+ };
32123
+ }
31885
32124
  });
31886
32125
 
31887
32126
  // lib/v3/agent/tools/think.ts
31888
- var import_ai17 = require("ai");
31889
- var import_zod21 = require("zod");
31890
- var thinkTool = () => (0, import_ai17.tool)({
32127
+ var import_ai16 = require("ai");
32128
+ var import_zod20 = require("zod");
32129
+ var thinkTool = () => (0, import_ai16.tool)({
31891
32130
  description: `Use this tool to think through complex problems or plan a sequence of steps. This is for internal reasoning only and doesn't perform any actions. Use this to:
31892
32131
 
31893
32132
  1. Plan a multi-step approach before taking action
@@ -31896,8 +32135,8 @@ var thinkTool = () => (0, import_ai17.tool)({
31896
32135
  4. Evaluate options when you're unsure what to do next
31897
32136
 
31898
32137
  The output is only visible to you; use it to track your own reasoning process.`,
31899
- inputSchema: import_zod21.z.object({
31900
- reasoning: import_zod21.z.string().describe(
32138
+ inputSchema: import_zod20.z.object({
32139
+ reasoning: import_zod20.z.string().describe(
31901
32140
  "Your step-by-step reasoning or planning process. Be as detailed as needed."
31902
32141
  )
31903
32142
  }),
@@ -31910,8 +32149,8 @@ The output is only visible to you; use it to track your own reasoning process.`,
31910
32149
  });
31911
32150
 
31912
32151
  // lib/v3/agent/tools/search.ts
31913
- var import_ai18 = require("ai");
31914
- var import_zod22 = require("zod");
32152
+ var import_ai17 = require("ai");
32153
+ var import_zod21 = require("zod");
31915
32154
  function performBraveSearch(query) {
31916
32155
  return __async(this, null, function* () {
31917
32156
  var _a4;
@@ -31957,10 +32196,10 @@ function performBraveSearch(query) {
31957
32196
  }
31958
32197
  });
31959
32198
  }
31960
- var searchTool = (v3) => (0, import_ai18.tool)({
32199
+ var searchTool = (v3) => (0, import_ai17.tool)({
31961
32200
  description: "Perform a web search and returns results. Use this tool when you need information from the web or when you are unsure of the exact URL you want to navigate to. This can be used to find the ideal entry point, resulting in a task that is easier to complete due to starting further in the process.",
31962
- inputSchema: import_zod22.z.object({
31963
- query: import_zod22.z.string().describe("The search query to look for on the web")
32201
+ inputSchema: import_zod21.z.object({
32202
+ query: import_zod21.z.string().describe("The search query to look for on the web")
31964
32203
  }),
31965
32204
  execute: (_0) => __async(null, [_0], function* ({ query }) {
31966
32205
  var _a4, _b, _c;
@@ -31971,7 +32210,7 @@ var searchTool = (v3) => (0, import_ai18.tool)({
31971
32210
  auxiliary: {
31972
32211
  arguments: {
31973
32212
  value: JSON.stringify({ query }),
31974
- type: "string"
32213
+ type: "object"
31975
32214
  }
31976
32215
  }
31977
32216
  });
@@ -32018,7 +32257,7 @@ function createAgentTools(v3, options) {
32018
32257
  ariaTree: ariaTreeTool(v3),
32019
32258
  click: clickTool(v3, provider),
32020
32259
  clickAndHold: clickAndHoldTool(v3, provider),
32021
- close: closeTool(),
32260
+ //close: closeTool(),
32022
32261
  dragAndDrop: dragAndDropTool(v3, provider),
32023
32262
  extract: extractTool(v3, executionModel, options == null ? void 0 : options.logger),
32024
32263
  fillForm: fillFormTool(v3, executionModel),
@@ -32030,7 +32269,7 @@ function createAgentTools(v3, options) {
32030
32269
  scroll: mode === "hybrid" ? scrollVisionTool(v3, provider) : scrollTool(v3),
32031
32270
  think: thinkTool(),
32032
32271
  type: typeTool(v3, provider),
32033
- wait: waitTool(v3)
32272
+ wait: waitTool(v3, mode)
32034
32273
  };
32035
32274
  if (process.env.BRAVE_API_KEY) {
32036
32275
  allTools.search = searchTool(v3);
@@ -32074,8 +32313,7 @@ function buildToolsSection(isHybridMode, hasSearch, excludeTools) {
32074
32313
  { name: "goto", description: "Navigate to a URL" },
32075
32314
  { name: "wait", description: "Wait for a specified time" },
32076
32315
  { name: "navback", description: "Navigate back in browser history" },
32077
- { name: "scroll", description: "Scroll the page x pixels up or down" },
32078
- { name: "close", description: "Mark the task as complete or failed" }
32316
+ { name: "scroll", description: "Scroll the page x pixels up or down" }
32079
32317
  ];
32080
32318
  const domTools = [
32081
32319
  {
@@ -32097,8 +32335,7 @@ function buildToolsSection(isHybridMode, hasSearch, excludeTools) {
32097
32335
  { name: "goto", description: "Navigate to a URL" },
32098
32336
  { name: "wait", description: "Wait for a specified time" },
32099
32337
  { name: "navback", description: "Navigate back in browser history" },
32100
- { name: "scroll", description: "Scroll the page x pixels up or down" },
32101
- { name: "close", description: "Mark the task as complete or failed" }
32338
+ { name: "scroll", description: "Scroll the page x pixels up or down" }
32102
32339
  ];
32103
32340
  const baseTools = isHybridMode ? hybridTools : domTools;
32104
32341
  if (hasSearch) {
@@ -32107,8 +32344,8 @@ function buildToolsSection(isHybridMode, hasSearch, excludeTools) {
32107
32344
  description: "Perform a web search and return results. Prefer this over navigating to Google and searching within the page for reliability and efficiency."
32108
32345
  });
32109
32346
  }
32110
- const filteredTools = baseTools.filter((tool21) => !excludeSet.has(tool21.name));
32111
- const toolLines = filteredTools.map((tool21) => ` <tool name="${tool21.name}">${tool21.description}</tool>`).join("\n");
32347
+ const filteredTools = baseTools.filter((tool22) => !excludeSet.has(tool22.name));
32348
+ const toolLines = filteredTools.map((tool22) => ` <tool name="${tool22.name}">${tool22.description}</tool>`).join("\n");
32112
32349
  return `<tools>
32113
32350
  ${toolLines}
32114
32351
  </tools>`;
@@ -32200,8 +32437,6 @@ function buildAgentSystemPrompt(options) {
32200
32437
  <item>Always start by understanding the current page state</item>
32201
32438
  <item>Use the screenshot tool to verify page state when needed</item>
32202
32439
  <item>Use appropriate tools for each action</item>
32203
- <item>When the task is complete, use the "close" tool with taskComplete: true</item>
32204
- <item>If the task cannot be completed, use "close" with taskComplete: false</item>
32205
32440
  </guidelines>
32206
32441
  ${pageUnderstandingProtocol}
32207
32442
  <navigation>
@@ -32225,132 +32460,143 @@ function buildAgentSystemPrompt(options) {
32225
32460
  }
32226
32461
 
32227
32462
  // lib/v3/handlers/v3AgentHandler.ts
32228
- var import_ai19 = require("ai");
32463
+ var import_ai20 = require("ai");
32229
32464
 
32230
32465
  // lib/v3/agent/utils/messageProcessing.ts
32466
+ var VISION_ACTION_TOOLS = [
32467
+ "click",
32468
+ "type",
32469
+ "dragAndDrop",
32470
+ "wait",
32471
+ "fillFormVision",
32472
+ "scroll"
32473
+ ];
32231
32474
  function isToolMessage(message) {
32232
32475
  return !!message && typeof message === "object" && message.role === "tool" && Array.isArray(message.content);
32233
32476
  }
32234
32477
  function isScreenshotPart(part) {
32235
32478
  return !!part && typeof part === "object" && part.toolName === "screenshot";
32236
32479
  }
32480
+ function isVisionActionPart(part) {
32481
+ if (!part || typeof part !== "object") return false;
32482
+ const toolName = part.toolName;
32483
+ return typeof toolName === "string" && VISION_ACTION_TOOLS.includes(toolName);
32484
+ }
32485
+ function isVisionPart(part) {
32486
+ return isScreenshotPart(part) || isVisionActionPart(part);
32487
+ }
32237
32488
  function isAriaTreePart(part) {
32238
32489
  return !!part && typeof part === "object" && part.toolName === "ariaTree";
32239
32490
  }
32240
- function processMessages(params) {
32241
- const originalContentSize = JSON.stringify(params.prompt).length;
32242
- const screenshotIndices = findToolIndices(params.prompt, "screenshot");
32243
- const ariaTreeIndices = findToolIndices(params.prompt, "ariaTree");
32244
- const processedPrompt = params.prompt.map(
32245
- (message, index) => {
32491
+ function processMessages(messages) {
32492
+ let compressedCount = 0;
32493
+ const visionIndices = [];
32494
+ const ariaTreeIndices = [];
32495
+ for (let i2 = 0; i2 < messages.length; i2++) {
32496
+ const message = messages[i2];
32497
+ if (isToolMessage(message)) {
32498
+ const content = message.content;
32499
+ if (content.some(isVisionPart)) {
32500
+ visionIndices.push(i2);
32501
+ }
32502
+ if (content.some(isAriaTreePart)) {
32503
+ ariaTreeIndices.push(i2);
32504
+ }
32505
+ }
32506
+ }
32507
+ if (visionIndices.length > 2) {
32508
+ const toCompress = visionIndices.slice(0, visionIndices.length - 2);
32509
+ for (const index of toCompress) {
32510
+ const message = messages[index];
32246
32511
  if (isToolMessage(message)) {
32247
- if (message.content.some((part) => isScreenshotPart(part))) {
32248
- const shouldCompress = shouldCompressScreenshot(
32249
- index,
32250
- screenshotIndices
32251
- );
32252
- if (shouldCompress) {
32253
- return compressScreenshotMessage(message);
32254
- }
32255
- }
32256
- if (message.content.some((part) => isAriaTreePart(part))) {
32257
- const shouldCompress = shouldCompressAriaTree(index, ariaTreeIndices);
32258
- if (shouldCompress) {
32259
- return compressAriaTreeMessage(message);
32260
- }
32261
- }
32512
+ compressScreenshotMessage(message);
32513
+ compressVisionActionMessage(message);
32514
+ compressedCount++;
32262
32515
  }
32263
- return message;
32264
32516
  }
32265
- );
32266
- const compressedContentSize = JSON.stringify(processedPrompt).length;
32267
- const stats = calculateCompressionStats(
32268
- originalContentSize,
32269
- compressedContentSize,
32270
- screenshotIndices.length,
32271
- ariaTreeIndices.length
32272
- );
32273
- return {
32274
- processedPrompt,
32275
- stats
32276
- };
32277
- }
32278
- function findToolIndices(prompt, toolName) {
32279
- const screenshotIndices = [];
32280
- prompt.forEach((message, index) => {
32281
- if (isToolMessage(message)) {
32282
- const hasMatch = message.content.some(
32283
- (part) => toolName === "screenshot" ? isScreenshotPart(part) : isAriaTreePart(part)
32284
- );
32285
- if (hasMatch) {
32286
- screenshotIndices.push(index);
32517
+ }
32518
+ if (ariaTreeIndices.length > 1) {
32519
+ const toCompress = ariaTreeIndices.slice(0, ariaTreeIndices.length - 1);
32520
+ for (const idx of toCompress) {
32521
+ const message = messages[idx];
32522
+ if (isToolMessage(message)) {
32523
+ compressAriaTreeMessage(message);
32524
+ compressedCount++;
32287
32525
  }
32288
32526
  }
32289
- });
32290
- return screenshotIndices;
32291
- }
32292
- function shouldCompressScreenshot(index, screenshotIndices) {
32293
- const isNewestScreenshot = index === Math.max(...screenshotIndices);
32294
- const isSecondNewestScreenshot = screenshotIndices.length > 1 && index === screenshotIndices.sort((a, b) => b - a)[1];
32295
- return !isNewestScreenshot && !isSecondNewestScreenshot;
32296
- }
32297
- function shouldCompressAriaTree(index, ariaTreeIndices) {
32298
- const isNewestAriaTree = index === Math.max(...ariaTreeIndices);
32299
- return !isNewestAriaTree;
32527
+ }
32528
+ return compressedCount;
32300
32529
  }
32301
32530
  function compressScreenshotMessage(message) {
32302
- const updatedContent = message.content.map((part) => {
32531
+ var _a4;
32532
+ for (const part of message.content) {
32303
32533
  if (isScreenshotPart(part)) {
32304
- return __spreadProps(__spreadValues({}, part), {
32305
- result: [
32306
- {
32307
- type: "text",
32308
- text: "screenshot taken"
32309
- }
32310
- ]
32311
- });
32534
+ const typedPart = part;
32535
+ const placeholder = [{ type: "text", text: "screenshot taken" }];
32536
+ if ((_a4 = typedPart.output) == null ? void 0 : _a4.value) {
32537
+ typedPart.output.value = placeholder;
32538
+ }
32539
+ if (typedPart.result) {
32540
+ typedPart.result = placeholder;
32541
+ }
32312
32542
  }
32313
- return part;
32314
- });
32315
- return __spreadProps(__spreadValues({}, message), {
32316
- content: updatedContent
32317
- });
32543
+ }
32544
+ }
32545
+ function compressVisionActionMessage(message) {
32546
+ var _a4;
32547
+ for (const part of message.content) {
32548
+ if (isVisionActionPart(part)) {
32549
+ const typedPart = part;
32550
+ if (((_a4 = typedPart.output) == null ? void 0 : _a4.value) && Array.isArray(typedPart.output.value)) {
32551
+ typedPart.output.value = typedPart.output.value.filter(
32552
+ (item) => item && typeof item === "object" && item.type !== "media"
32553
+ );
32554
+ }
32555
+ if (typedPart.result && Array.isArray(typedPart.result)) {
32556
+ typedPart.result = typedPart.result.filter(
32557
+ (item) => item && typeof item === "object" && item.type !== "media"
32558
+ );
32559
+ }
32560
+ }
32561
+ }
32318
32562
  }
32319
32563
  function compressAriaTreeMessage(message) {
32320
- const updatedContent = message.content.map((part) => {
32564
+ var _a4;
32565
+ for (const part of message.content) {
32321
32566
  if (isAriaTreePart(part)) {
32322
- return __spreadProps(__spreadValues({}, part), {
32323
- result: [
32324
- {
32325
- type: "text",
32326
- text: "ARIA tree extracted for context of page elements"
32327
- }
32328
- ]
32329
- });
32567
+ const typedPart = part;
32568
+ const placeholder = [
32569
+ {
32570
+ type: "text",
32571
+ text: "ARIA tree extracted for context of page elements"
32572
+ }
32573
+ ];
32574
+ if ((_a4 = typedPart.output) == null ? void 0 : _a4.value) {
32575
+ typedPart.output.value = placeholder;
32576
+ }
32577
+ if (typedPart.result) {
32578
+ typedPart.result = placeholder;
32579
+ }
32330
32580
  }
32331
- return part;
32332
- });
32333
- return __spreadProps(__spreadValues({}, message), {
32334
- content: updatedContent
32335
- });
32336
- }
32337
- function calculateCompressionStats(originalSize, compressedSize, screenshotCount, ariaTreeCount) {
32338
- const savedChars = originalSize - compressedSize;
32339
- const compressionRatio = originalSize > 0 ? (originalSize - compressedSize) / originalSize * 100 : 0;
32340
- return {
32341
- originalSize,
32342
- compressedSize,
32343
- savedChars,
32344
- compressionRatio,
32345
- screenshotCount,
32346
- ariaTreeCount
32347
- };
32581
+ }
32348
32582
  }
32349
32583
 
32350
32584
  // lib/v3/handlers/v3AgentHandler.ts
32351
32585
  init_flowLogger();
32352
32586
 
32353
32587
  // lib/v3/agent/utils/actionMapping.ts
32588
+ var EXCLUDED_OUTPUT_KEYS = ["screenshotBase64"];
32589
+ function stripExcludedKeys(output) {
32590
+ const result = {};
32591
+ for (const [key, value] of Object.entries(output)) {
32592
+ if (!EXCLUDED_OUTPUT_KEYS.includes(
32593
+ key
32594
+ )) {
32595
+ result[key] = value;
32596
+ }
32597
+ }
32598
+ return result;
32599
+ }
32354
32600
  function mapToolResultToActions({
32355
32601
  toolCallName,
32356
32602
  toolResult,
@@ -32416,14 +32662,112 @@ function createStandardAction(toolCallName, toolResult, args, reasoning) {
32416
32662
  return action;
32417
32663
  }
32418
32664
  if (toolCallName !== "ariaTree" && toolResult) {
32419
- const { output } = toolResult;
32420
- Object.assign(action, output);
32665
+ const result = toolResult;
32666
+ const output = result.output;
32667
+ if (output && typeof output === "object" && !Array.isArray(output)) {
32668
+ const cleanedOutput = stripExcludedKeys(
32669
+ output
32670
+ );
32671
+ Object.assign(action, cleanedOutput);
32672
+ }
32421
32673
  }
32422
32674
  return action;
32423
32675
  }
32424
32676
 
32425
32677
  // lib/v3/handlers/v3AgentHandler.ts
32426
32678
  init_sdkErrors();
32679
+
32680
+ // lib/v3/agent/utils/handleCloseToolCall.ts
32681
+ var import_ai18 = require("ai");
32682
+ var import_zod22 = require("zod");
32683
+ var import_ai19 = require("ai");
32684
+ var baseCloseSchema = import_zod22.z.object({
32685
+ reasoning: import_zod22.z.string().describe("Brief summary of what actions were taken and the outcome"),
32686
+ taskComplete: import_zod22.z.boolean().describe("true if the task was fully completed, false otherwise")
32687
+ });
32688
+ function handleCloseToolCall(options) {
32689
+ return __async(this, null, function* () {
32690
+ var _a4;
32691
+ const { model, inputMessages, instruction, outputSchema, logger } = options;
32692
+ logger({
32693
+ category: "agent",
32694
+ message: "Agent calling tool: close",
32695
+ level: 1
32696
+ });
32697
+ const closeToolSchema = outputSchema ? baseCloseSchema.extend({
32698
+ output: outputSchema.describe(
32699
+ "The specific data the user requested from this task"
32700
+ )
32701
+ }) : baseCloseSchema;
32702
+ const outputInstructions = outputSchema ? `
32703
+
32704
+ The user also requested the following information from this task. Provide it in the "output" field:
32705
+ ${JSON.stringify(
32706
+ Object.fromEntries(
32707
+ Object.entries(outputSchema.shape).map(([key, value]) => [
32708
+ key,
32709
+ value.description || "no description"
32710
+ ])
32711
+ ),
32712
+ null,
32713
+ 2
32714
+ )}` : "";
32715
+ const systemPrompt = `You are a web automation assistant that was tasked with completing a task.
32716
+
32717
+ The task was:
32718
+ "${instruction}"
32719
+
32720
+ Review what was accomplished and provide your final assessment in whether the task was completed successfully. you have been provided with the history of the actions taken so far, use this to determine if the task was completed successfully.${outputInstructions}
32721
+
32722
+ Call the "close" tool with:
32723
+ 1. A brief summary of what was done
32724
+ 2. Whether the task was completed successfully${outputSchema ? "\n3. The requested output data based on what you found" : ""}`;
32725
+ const closeTool = (0, import_ai19.tool)({
32726
+ description: outputSchema ? "Complete the task with your assessment and the requested output data." : "Complete the task with your final assessment.",
32727
+ inputSchema: closeToolSchema,
32728
+ execute: (params) => __async(null, null, function* () {
32729
+ return __spreadValues({ success: true }, params);
32730
+ })
32731
+ });
32732
+ const userPrompt = {
32733
+ role: "user",
32734
+ content: outputSchema ? "Provide your final assessment and the requested output data." : "Provide your final assessment."
32735
+ };
32736
+ const result = yield (0, import_ai18.generateText)({
32737
+ model,
32738
+ system: systemPrompt,
32739
+ messages: [...inputMessages, userPrompt],
32740
+ tools: { close: closeTool },
32741
+ toolChoice: { type: "tool", toolName: "close" }
32742
+ });
32743
+ const closeToolCall = result.toolCalls.find((tc) => tc.toolName === "close");
32744
+ const outputMessages = [
32745
+ userPrompt,
32746
+ ...((_a4 = result.response) == null ? void 0 : _a4.messages) || []
32747
+ ];
32748
+ if (!closeToolCall) {
32749
+ return {
32750
+ reasoning: result.text || "Task execution completed",
32751
+ taskComplete: false,
32752
+ messages: outputMessages
32753
+ };
32754
+ }
32755
+ const input = closeToolCall.input;
32756
+ logger({
32757
+ category: "agent",
32758
+ message: `Task completed`,
32759
+ level: 1
32760
+ });
32761
+ return {
32762
+ reasoning: input.reasoning,
32763
+ taskComplete: input.taskComplete,
32764
+ messages: outputMessages,
32765
+ output: input.output
32766
+ };
32767
+ });
32768
+ }
32769
+
32770
+ // lib/v3/handlers/v3AgentHandler.ts
32427
32771
  function getErrorMessage(error) {
32428
32772
  return error instanceof Error ? error.message : String(error);
32429
32773
  }
@@ -32459,14 +32803,9 @@ var V3AgentHandler = class {
32459
32803
  throw new MissingLLMConfigurationError();
32460
32804
  }
32461
32805
  const baseModel = this.llmClient.getLanguageModel();
32462
- const wrappedModel = (0, import_ai19.wrapLanguageModel)({
32806
+ const wrappedModel = (0, import_ai20.wrapLanguageModel)({
32463
32807
  model: baseModel,
32464
- middleware: __spreadValues({
32465
- transformParams: (_0) => __async(this, [_0], function* ({ params }) {
32466
- const { processedPrompt } = processMessages(params);
32467
- return __spreadProps(__spreadValues({}, params), { prompt: processedPrompt });
32468
- })
32469
- }, SessionFileLogger.createLlmLoggingMiddleware(baseModel.modelId))
32808
+ middleware: __spreadValues({}, SessionFileLogger.createLlmLoggingMiddleware(baseModel.modelId))
32470
32809
  });
32471
32810
  return {
32472
32811
  options,
@@ -32487,6 +32826,15 @@ var V3AgentHandler = class {
32487
32826
  }
32488
32827
  });
32489
32828
  }
32829
+ createPrepareStep(userCallback) {
32830
+ return (options) => __async(null, null, function* () {
32831
+ processMessages(options.messages);
32832
+ if (userCallback) {
32833
+ return userCallback(options);
32834
+ }
32835
+ return options;
32836
+ });
32837
+ }
32490
32838
  createStepHandler(state, userCallback) {
32491
32839
  return (event) => __async(this, null, function* () {
32492
32840
  var _a4;
@@ -32548,7 +32896,7 @@ var V3AgentHandler = class {
32548
32896
  }
32549
32897
  execute(instructionOrOptions) {
32550
32898
  return __async(this, null, function* () {
32551
- var _a4;
32899
+ var _a4, _b;
32552
32900
  const startTime = Date.now();
32553
32901
  const options = typeof instructionOrOptions === "object" ? instructionOrOptions : null;
32554
32902
  const signal = options == null ? void 0 : options.signal;
@@ -32601,7 +32949,7 @@ var V3AgentHandler = class {
32601
32949
  stopWhen: (result2) => this.handleStop(result2, maxSteps),
32602
32950
  temperature: 1,
32603
32951
  toolChoice: "auto",
32604
- prepareStep: callbacks == null ? void 0 : callbacks.prepareStep,
32952
+ prepareStep: this.createPrepareStep(callbacks == null ? void 0 : callbacks.prepareStep),
32605
32953
  onStepFinish: this.createStepHandler(state, callbacks == null ? void 0 : callbacks.onStepFinish),
32606
32954
  abortSignal: preparedOptions.signal,
32607
32955
  providerOptions: wrappedModel.modelId.includes("gemini-3") ? {
@@ -32610,11 +32958,22 @@ var V3AgentHandler = class {
32610
32958
  }
32611
32959
  } : void 0
32612
32960
  });
32961
+ const allMessages = [...messages, ...((_b = result.response) == null ? void 0 : _b.messages) || []];
32962
+ const closeResult = yield this.ensureClosed(
32963
+ state,
32964
+ wrappedModel,
32965
+ allMessages,
32966
+ preparedOptions.instruction,
32967
+ preparedOptions.output,
32968
+ this.logger
32969
+ );
32613
32970
  return this.consolidateMetricsAndResult(
32614
32971
  startTime,
32615
32972
  state,
32616
- messages,
32617
- result
32973
+ closeResult.messages,
32974
+ result,
32975
+ maxSteps,
32976
+ closeResult.output
32618
32977
  );
32619
32978
  } catch (error) {
32620
32979
  if (error instanceof StreamingCallbacksInNonStreamingModeError) {
@@ -32691,7 +33050,7 @@ var V3AgentHandler = class {
32691
33050
  stopWhen: (result) => this.handleStop(result, maxSteps),
32692
33051
  temperature: 1,
32693
33052
  toolChoice: "auto",
32694
- prepareStep: callbacks == null ? void 0 : callbacks.prepareStep,
33053
+ prepareStep: this.createPrepareStep(callbacks == null ? void 0 : callbacks.prepareStep),
32695
33054
  onStepFinish: this.createStepHandler(state, callbacks == null ? void 0 : callbacks.onStepFinish),
32696
33055
  onError: (event) => {
32697
33056
  if (callbacks == null ? void 0 : callbacks.onError) {
@@ -32701,16 +33060,29 @@ var V3AgentHandler = class {
32701
33060
  },
32702
33061
  onChunk: callbacks == null ? void 0 : callbacks.onChunk,
32703
33062
  onFinish: (event) => {
33063
+ var _a5;
32704
33064
  if (callbacks == null ? void 0 : callbacks.onFinish) {
32705
33065
  callbacks.onFinish(event);
32706
33066
  }
32707
- const result = this.consolidateMetricsAndResult(
32708
- startTime,
33067
+ const allMessages = [...messages, ...((_a5 = event.response) == null ? void 0 : _a5.messages) || []];
33068
+ this.ensureClosed(
32709
33069
  state,
32710
- messages,
32711
- event
32712
- );
32713
- resolveResult(result);
33070
+ wrappedModel,
33071
+ allMessages,
33072
+ options.instruction,
33073
+ options.output,
33074
+ this.logger
33075
+ ).then((closeResult) => {
33076
+ const result = this.consolidateMetricsAndResult(
33077
+ startTime,
33078
+ state,
33079
+ closeResult.messages,
33080
+ event,
33081
+ maxSteps,
33082
+ closeResult.output
33083
+ );
33084
+ resolveResult(result);
33085
+ });
32714
33086
  },
32715
33087
  onAbort: (event) => {
32716
33088
  var _a5;
@@ -32732,11 +33104,20 @@ var V3AgentHandler = class {
32732
33104
  return agentStreamResult;
32733
33105
  });
32734
33106
  }
32735
- consolidateMetricsAndResult(startTime, state, inputMessages, result) {
33107
+ consolidateMetricsAndResult(startTime, state, inputMessages, result, maxSteps, output) {
32736
33108
  var _a4;
32737
33109
  if (!state.finalMessage) {
32738
33110
  const allReasoning = state.collectedReasoning.join(" ").trim();
32739
- state.finalMessage = allReasoning || result.text || "";
33111
+ if (!state.completed && maxSteps && ((_a4 = result.steps) == null ? void 0 : _a4.length) >= maxSteps) {
33112
+ this.logger({
33113
+ category: "agent",
33114
+ message: `Agent stopped: reached maximum steps (${maxSteps})`,
33115
+ level: 1
33116
+ });
33117
+ state.finalMessage = `Agent stopped: reached maximum steps (${maxSteps})`;
33118
+ } else {
33119
+ state.finalMessage = allReasoning || result.text || "";
33120
+ }
32740
33121
  }
32741
33122
  const endTime = Date.now();
32742
33123
  const inferenceTimeMs = endTime - startTime;
@@ -32750,16 +33131,12 @@ var V3AgentHandler = class {
32750
33131
  inferenceTimeMs
32751
33132
  );
32752
33133
  }
32753
- const responseMessages = ((_a4 = result.response) == null ? void 0 : _a4.messages) || [];
32754
- const fullMessages = [
32755
- ...inputMessages,
32756
- ...responseMessages
32757
- ];
32758
33134
  return {
32759
33135
  success: state.completed,
32760
33136
  message: state.finalMessage || "Task execution completed",
32761
33137
  actions: state.actions,
32762
33138
  completed: state.completed,
33139
+ output,
32763
33140
  usage: result.usage ? {
32764
33141
  input_tokens: result.usage.inputTokens || 0,
32765
33142
  output_tokens: result.usage.outputTokens || 0,
@@ -32767,7 +33144,7 @@ var V3AgentHandler = class {
32767
33144
  cached_input_tokens: result.usage.cachedInputTokens || 0,
32768
33145
  inference_time_ms: inferenceTimeMs
32769
33146
  } : void 0,
32770
- messages: fullMessages
33147
+ messages: inputMessages
32771
33148
  };
32772
33149
  }
32773
33150
  createTools(excludeTools) {
@@ -32787,7 +33164,47 @@ var V3AgentHandler = class {
32787
33164
  if ((_a4 = lastStep == null ? void 0 : lastStep.toolCalls) == null ? void 0 : _a4.some((tc) => tc.toolName === "close")) {
32788
33165
  return true;
32789
33166
  }
32790
- return (0, import_ai19.stepCountIs)(maxSteps)(result);
33167
+ return (0, import_ai20.stepCountIs)(maxSteps)(result);
33168
+ }
33169
+ /**
33170
+ * Ensures the close tool is called at the end of agent execution.
33171
+ * Returns the messages and any extracted output from the close call.
33172
+ */
33173
+ ensureClosed(state, model, messages, instruction, outputSchema, logger) {
33174
+ return __async(this, null, function* () {
33175
+ if (state.completed) return { messages };
33176
+ const closeResult = yield handleCloseToolCall({
33177
+ model,
33178
+ inputMessages: messages,
33179
+ instruction,
33180
+ outputSchema,
33181
+ logger
33182
+ });
33183
+ state.completed = closeResult.taskComplete;
33184
+ state.finalMessage = closeResult.reasoning;
33185
+ const closeAction = mapToolResultToActions({
33186
+ toolCallName: "close",
33187
+ toolResult: {
33188
+ success: true,
33189
+ reasoning: closeResult.reasoning,
33190
+ taskComplete: closeResult.taskComplete
33191
+ },
33192
+ args: {
33193
+ reasoning: closeResult.reasoning,
33194
+ taskComplete: closeResult.taskComplete
33195
+ },
33196
+ reasoning: closeResult.reasoning
33197
+ });
33198
+ for (const action of closeAction) {
33199
+ action.pageUrl = state.currentPageUrl;
33200
+ action.timestamp = Date.now();
33201
+ state.actions.push(action);
33202
+ }
33203
+ return {
33204
+ messages: [...messages, ...closeResult.messages],
33205
+ output: closeResult.output
33206
+ };
33207
+ });
32791
33208
  }
32792
33209
  /**
32793
33210
  * Capture a screenshot and emit it via the event bus
@@ -33269,8 +33686,8 @@ var AnthropicCUAClient = class extends AgentClient {
33269
33686
  betas: ["computer-use-2025-01-24"]
33270
33687
  };
33271
33688
  if (this.tools && Object.keys(this.tools).length > 0) {
33272
- const customTools = Object.entries(this.tools).map(([name, tool21]) => {
33273
- const schema = tool21.inputSchema;
33689
+ const customTools = Object.entries(this.tools).map(([name, tool22]) => {
33690
+ const schema = tool22.inputSchema;
33274
33691
  const jsonSchema3 = toJsonSchema(schema);
33275
33692
  const inputSchema = {
33276
33693
  type: "object",
@@ -33279,7 +33696,7 @@ var AnthropicCUAClient = class extends AgentClient {
33279
33696
  };
33280
33697
  return {
33281
33698
  name,
33282
- description: tool21.description,
33699
+ description: tool22.description,
33283
33700
  input_schema: inputSchema
33284
33701
  };
33285
33702
  });
@@ -33397,13 +33814,13 @@ var AnthropicCUAClient = class extends AgentClient {
33397
33814
  let toolResult = "Tool executed successfully";
33398
33815
  if (this.tools && item.name in this.tools) {
33399
33816
  try {
33400
- const tool21 = this.tools[item.name];
33817
+ const tool22 = this.tools[item.name];
33401
33818
  logger({
33402
33819
  category: "agent",
33403
33820
  message: `Executing tool call: ${item.name} with args: ${JSON.stringify(item.input)}`,
33404
33821
  level: 1
33405
33822
  });
33406
- const result = yield tool21.execute(item.input, {
33823
+ const result = yield tool22.execute(item.input, {
33407
33824
  toolCallId: item.id,
33408
33825
  messages: []
33409
33826
  });
@@ -33674,7 +34091,6 @@ var OpenAICUAClient = class extends AgentClient {
33674
34091
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
33675
34092
  super(type, modelName, userProvidedInstructions);
33676
34093
  this.currentViewport = { width: 1288, height: 711 };
33677
- this.actualScreenshotSize = { width: 1288, height: 711 };
33678
34094
  this.reasoningItems = /* @__PURE__ */ new Map();
33679
34095
  this.environment = "browser";
33680
34096
  this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.OPENAI_API_KEY || "";
@@ -33695,9 +34111,6 @@ var OpenAICUAClient = class extends AgentClient {
33695
34111
  setViewport(width, height) {
33696
34112
  this.currentViewport = { width, height };
33697
34113
  }
33698
- setScreenshotSize(width, height) {
33699
- this.actualScreenshotSize = { width, height };
33700
- }
33701
34114
  setCurrentUrl(url) {
33702
34115
  this.currentUrl = url;
33703
34116
  }
@@ -33965,13 +34378,13 @@ var OpenAICUAClient = class extends AgentClient {
33965
34378
  truncation: "auto"
33966
34379
  };
33967
34380
  if (this.tools && Object.keys(this.tools).length > 0) {
33968
- const customTools = Object.entries(this.tools).map(([name, tool21]) => ({
34381
+ const customTools = Object.entries(this.tools).map(([name, tool22]) => ({
33969
34382
  type: "function",
33970
34383
  name,
33971
34384
  function: {
33972
34385
  name,
33973
- description: tool21.description,
33974
- parameters: tool21.inputSchema
34386
+ description: tool22.description,
34387
+ parameters: tool22.inputSchema
33975
34388
  }
33976
34389
  }));
33977
34390
  requestParams.tools = [
@@ -34123,14 +34536,14 @@ var OpenAICUAClient = class extends AgentClient {
34123
34536
  let toolResult = "Tool executed successfully";
34124
34537
  if (this.tools && item.name in this.tools) {
34125
34538
  try {
34126
- const tool21 = this.tools[item.name];
34539
+ const tool22 = this.tools[item.name];
34127
34540
  const args = JSON.parse(item.arguments);
34128
34541
  logger({
34129
34542
  category: "agent",
34130
34543
  message: `Executing tool call: ${item.name} with args: ${item.arguments}`,
34131
34544
  level: 1
34132
34545
  });
34133
- const result = yield tool21.execute(args, {
34546
+ const result = yield tool22.execute(args, {
34134
34547
  toolCallId: item.call_id,
34135
34548
  messages: []
34136
34549
  });
@@ -34180,16 +34593,9 @@ var OpenAICUAClient = class extends AgentClient {
34180
34593
  }
34181
34594
  convertComputerCallToAction(call) {
34182
34595
  const { action } = call;
34183
- const scaledAction = __spreadValues({}, action);
34184
- if (action.x !== void 0 && action.y !== void 0) {
34185
- const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
34186
- const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
34187
- scaledAction.x = Math.floor(action.x * scaleX);
34188
- scaledAction.y = Math.floor(action.y * scaleY);
34189
- }
34190
34596
  return __spreadValues({
34191
34597
  type: action.type
34192
- }, scaledAction);
34598
+ }, action);
34193
34599
  }
34194
34600
  convertFunctionCallToAction(call) {
34195
34601
  try {
@@ -34293,8 +34699,8 @@ function executeGoogleCustomTool(toolName, toolArgs, tools, functionCall, logger
34293
34699
  message: `Executing custom tool: ${toolName} with args: ${JSON.stringify(toolArgs)}`,
34294
34700
  level: 1
34295
34701
  });
34296
- const tool21 = tools[toolName];
34297
- const toolResult = yield tool21.execute(toolArgs, {
34702
+ const tool22 = tools[toolName];
34703
+ const toolResult = yield tool22.execute(toolArgs, {
34298
34704
  toolCallId: `tool_${Date.now()}`,
34299
34705
  messages: []
34300
34706
  });
@@ -34342,22 +34748,22 @@ function isCustomTool(functionCall, tools) {
34342
34748
  }
34343
34749
  function convertToolSetToFunctionDeclarations(tools) {
34344
34750
  const functionDeclarations = [];
34345
- for (const [name, tool21] of Object.entries(tools)) {
34346
- const functionDeclaration = convertToolToFunctionDeclaration(name, tool21);
34751
+ for (const [name, tool22] of Object.entries(tools)) {
34752
+ const functionDeclaration = convertToolToFunctionDeclaration(name, tool22);
34347
34753
  if (functionDeclaration) {
34348
34754
  functionDeclarations.push(functionDeclaration);
34349
34755
  }
34350
34756
  }
34351
34757
  return functionDeclarations;
34352
34758
  }
34353
- function convertToolToFunctionDeclaration(name, tool21) {
34759
+ function convertToolToFunctionDeclaration(name, tool22) {
34354
34760
  try {
34355
- const schema = tool21.inputSchema;
34761
+ const schema = tool22.inputSchema;
34356
34762
  const jsonSchema3 = toJsonSchema(schema);
34357
34763
  const parameters = convertJsonSchemaToGoogleParameters(jsonSchema3);
34358
34764
  return {
34359
34765
  name,
34360
- description: tool21.description || `Execute ${name}`,
34766
+ description: tool22.description || `Execute ${name}`,
34361
34767
  parameters
34362
34768
  };
34363
34769
  } catch (error) {
@@ -34408,7 +34814,6 @@ var GoogleCUAClient = class extends AgentClient {
34408
34814
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
34409
34815
  super(type, modelName, userProvidedInstructions);
34410
34816
  this.currentViewport = { width: 1288, height: 711 };
34411
- this.actualScreenshotSize = { width: 1288, height: 711 };
34412
34817
  this.history = [];
34413
34818
  this.environment = "ENVIRONMENT_BROWSER";
34414
34819
  this.tools = tools;
@@ -34447,9 +34852,6 @@ var GoogleCUAClient = class extends AgentClient {
34447
34852
  setViewport(width, height) {
34448
34853
  this.currentViewport = { width, height };
34449
34854
  }
34450
- setScreenshotSize(width, height) {
34451
- this.actualScreenshotSize = { width, height };
34452
- }
34453
34855
  setCurrentUrl(url) {
34454
34856
  this.currentUrl = url;
34455
34857
  }
@@ -35011,9 +35413,26 @@ var GoogleCUAClient = class extends AgentClient {
35011
35413
  }
35012
35414
  case "scroll_document": {
35013
35415
  const direction = args.direction.toLowerCase();
35416
+ const magnitude = typeof args.magnitude === "number" ? args.magnitude : 800;
35417
+ let scroll_x = 0;
35418
+ let scroll_y = 0;
35419
+ if (direction === "up") {
35420
+ scroll_y = -magnitude;
35421
+ } else if (direction === "down") {
35422
+ scroll_y = magnitude;
35423
+ } else if (direction === "left") {
35424
+ scroll_x = -magnitude;
35425
+ } else if (direction === "right") {
35426
+ scroll_x = magnitude;
35427
+ } else {
35428
+ scroll_y = magnitude;
35429
+ }
35014
35430
  return {
35015
- type: "keypress",
35016
- keys: [direction === "up" ? "PageUp" : "PageDown"]
35431
+ type: "scroll",
35432
+ x: 0,
35433
+ y: 0,
35434
+ scroll_x,
35435
+ scroll_y
35017
35436
  };
35018
35437
  }
35019
35438
  case "scroll_at": {
@@ -35111,18 +35530,17 @@ var GoogleCUAClient = class extends AgentClient {
35111
35530
  }
35112
35531
  }
35113
35532
  /**
35114
- * Normalize coordinates from Google's 0-1000 range to actual viewport dimensions
35533
+ * Normalize coordinates from Google's 0-1000 range to fixed viewport dimensions.
35534
+ * Google CUA outputs coordinates in 0-1000 range based on the screenshot it sees,
35535
+ * which is always 1288x711 regardless of browser-reported viewport size.
35115
35536
  */
35116
35537
  normalizeCoordinates(x2, y) {
35538
+ const GOOGLE_CUA_VIEWPORT = { width: 1288, height: 711 };
35117
35539
  x2 = Math.min(999, Math.max(0, x2));
35118
35540
  y = Math.min(999, Math.max(0, y));
35119
- const screenshotX = x2 / 1e3 * this.actualScreenshotSize.width;
35120
- const screenshotY = y / 1e3 * this.actualScreenshotSize.height;
35121
- const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
35122
- const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
35123
35541
  return {
35124
- x: Math.floor(screenshotX * scaleX),
35125
- y: Math.floor(screenshotY * scaleY)
35542
+ x: Math.floor(x2 / 1e3 * GOOGLE_CUA_VIEWPORT.width),
35543
+ y: Math.floor(y / 1e3 * GOOGLE_CUA_VIEWPORT.height)
35126
35544
  };
35127
35545
  }
35128
35546
  captureScreenshot(options) {
@@ -35173,8 +35591,6 @@ var MicrosoftCUAClient = class extends AgentClient {
35173
35591
  };
35174
35592
  // Resized dimensions for model input
35175
35593
  this.resizedViewport = { width: 1288, height: 711 };
35176
- // Actual screenshot dimensions (tracked separately from viewport)
35177
- this.actualScreenshotSize = { width: 1288, height: 711 };
35178
35594
  this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.AZURE_API_KEY || process.env.FIREWORKS_API_KEY || "";
35179
35595
  this.baseURL = (clientOptions == null ? void 0 : clientOptions.baseURL) || process.env.AZURE_ENDPOINT || process.env.FIREWORKS_ENDPOINT || "";
35180
35596
  this.clientOptions = {
@@ -35201,9 +35617,6 @@ var MicrosoftCUAClient = class extends AgentClient {
35201
35617
  this.currentViewport = { width, height };
35202
35618
  this.resizedViewport = this.smartResize(width, height);
35203
35619
  }
35204
- setScreenshotSize(width, height) {
35205
- this.actualScreenshotSize = { width, height };
35206
- }
35207
35620
  setCurrentUrl(url) {
35208
35621
  this.currentUrl = url;
35209
35622
  }
@@ -35241,7 +35654,7 @@ var MicrosoftCUAClient = class extends AgentClient {
35241
35654
  * Simplified to match Python's minimal approach
35242
35655
  */
35243
35656
  generateSystemPrompt() {
35244
- const { width, height } = this.actualScreenshotSize;
35657
+ const { width, height } = this.resizedViewport;
35245
35658
  let basePrompt = "You are a helpful assistant.";
35246
35659
  if (this.userProvidedInstructions) {
35247
35660
  basePrompt = `${basePrompt}
@@ -35402,8 +35815,8 @@ ${functionCallTemplate}`;
35402
35815
  const transformCoordinate = (coord) => {
35403
35816
  if (!coord || coord.length !== 2) return coord;
35404
35817
  const [x2, y] = coord;
35405
- const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
35406
- const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
35818
+ const scaleX = this.currentViewport.width / this.resizedViewport.width;
35819
+ const scaleY = this.currentViewport.height / this.resizedViewport.height;
35407
35820
  return [Math.round(x2 * scaleX), Math.round(y * scaleY)];
35408
35821
  };
35409
35822
  const baseAction = {
@@ -35831,6 +36244,7 @@ var modelToAgentProviderMap = {
35831
36244
  "claude-opus-4-5-20251101": "anthropic",
35832
36245
  "claude-haiku-4-5-20251001": "anthropic",
35833
36246
  "gemini-2.5-computer-use-preview-10-2025": "google",
36247
+ "gemini-3-flash-computer-use": "google",
35834
36248
  "fara-7b": "microsoft"
35835
36249
  };
35836
36250
  var AgentProvider = class _AgentProvider {
@@ -35912,14 +36326,6 @@ var AgentProvider = class _AgentProvider {
35912
36326
  // lib/v3/handlers/v3CuaAgentHandler.ts
35913
36327
  init_flowLogger();
35914
36328
  init_sdkErrors();
35915
- function getPNGDimensions(buffer) {
35916
- if (buffer.length < 24 || buffer[0] !== 137 || buffer[1] !== 80 || buffer[2] !== 78 || buffer[3] !== 71) {
35917
- throw new Error("Invalid PNG file");
35918
- }
35919
- const width = buffer.readUInt32BE(16);
35920
- const height = buffer.readUInt32BE(20);
35921
- return { width, height };
35922
- }
35923
36329
  var V3CuaAgentHandler = class {
35924
36330
  constructor(v3, logger, options, tools) {
35925
36331
  this.v3 = v3;
@@ -35950,21 +36356,6 @@ var V3CuaAgentHandler = class {
35950
36356
  this.ensureNotClosed();
35951
36357
  const page = yield this.v3.context.awaitActivePage();
35952
36358
  const screenshotBuffer = yield page.screenshot({ fullPage: false });
35953
- if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
35954
- try {
35955
- const dimensions = getPNGDimensions(screenshotBuffer);
35956
- this.agentClient.setScreenshotSize(
35957
- dimensions.width,
35958
- dimensions.height
35959
- );
35960
- } catch (e2) {
35961
- this.logger({
35962
- category: "agent",
35963
- message: `Could not read screenshot dimensions: ${e2}`,
35964
- level: 1
35965
- });
35966
- }
35967
- }
35968
36359
  return screenshotBuffer.toString("base64");
35969
36360
  }));
35970
36361
  this.agentClient.setActionHandler((action) => __async(this, null, function* () {
@@ -36418,21 +36809,6 @@ var V3CuaAgentHandler = class {
36418
36809
  try {
36419
36810
  const page = yield this.v3.context.awaitActivePage();
36420
36811
  const screenshotBuffer = yield page.screenshot({ fullPage: false });
36421
- if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
36422
- try {
36423
- const dimensions = getPNGDimensions(screenshotBuffer);
36424
- this.agentClient.setScreenshotSize(
36425
- dimensions.width,
36426
- dimensions.height
36427
- );
36428
- } catch (e2) {
36429
- this.logger({
36430
- category: "agent",
36431
- message: `Could not read screenshot dimensions: ${e2}`,
36432
- level: 1
36433
- });
36434
- }
36435
- }
36436
36812
  this.v3.bus.emit("agent_screenshot_taken_event", screenshotBuffer);
36437
36813
  const currentUrl = page.url();
36438
36814
  return yield this.agentClient.captureScreenshot({
@@ -37609,23 +37985,23 @@ function waitForWebSocketDebuggerUrl(port, timeoutMs) {
37609
37985
  init_sdkErrors();
37610
37986
 
37611
37987
  // lib/v3/llm/aisdk.ts
37612
- var import_ai21 = require("ai");
37988
+ var import_ai22 = require("ai");
37613
37989
  var import_uuid5 = require("uuid");
37614
37990
 
37615
37991
  // lib/v3/llm/LLMClient.ts
37616
- var import_ai20 = require("ai");
37992
+ var import_ai21 = require("ai");
37617
37993
  var AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
37618
37994
  var LLMClient = class {
37619
37995
  constructor(modelName, userProvidedInstructions) {
37620
- this.generateObject = import_ai20.generateObject;
37621
- this.generateText = import_ai20.generateText;
37622
- this.streamText = import_ai20.streamText;
37623
- this.streamObject = import_ai20.streamObject;
37624
- this.generateImage = import_ai20.experimental_generateImage;
37625
- this.embed = import_ai20.embed;
37626
- this.embedMany = import_ai20.embedMany;
37627
- this.transcribe = import_ai20.experimental_transcribe;
37628
- this.generateSpeech = import_ai20.experimental_generateSpeech;
37996
+ this.generateObject = import_ai21.generateObject;
37997
+ this.generateText = import_ai21.generateText;
37998
+ this.streamText = import_ai21.streamText;
37999
+ this.streamObject = import_ai21.streamObject;
38000
+ this.generateImage = import_ai21.experimental_generateImage;
38001
+ this.embed = import_ai21.embed;
38002
+ this.embedMany = import_ai21.embedMany;
38003
+ this.transcribe = import_ai21.experimental_transcribe;
38004
+ this.generateSpeech = import_ai21.experimental_generateSpeech;
37629
38005
  this.modelName = modelName;
37630
38006
  this.userProvidedInstructions = userProvidedInstructions;
37631
38007
  }
@@ -37750,7 +38126,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37750
38126
  });
37751
38127
  }
37752
38128
  try {
37753
- objectResponse = yield (0, import_ai21.generateObject)({
38129
+ objectResponse = yield (0, import_ai22.generateObject)({
37754
38130
  model: this.model,
37755
38131
  messages: formattedMessages,
37756
38132
  schema: options.response_model.schema,
@@ -37770,7 +38146,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37770
38146
  operation: "generateObject",
37771
38147
  output: `[error: ${err instanceof Error ? err.message : "unknown"}]`
37772
38148
  });
37773
- if (import_ai21.NoObjectGeneratedError.isInstance(err)) {
38149
+ if (import_ai22.NoObjectGeneratedError.isInstance(err)) {
37774
38150
  (_g = this.logger) == null ? void 0 : _g.call(this, {
37775
38151
  category: "AISDK error",
37776
38152
  message: err.message,
@@ -37848,10 +38224,10 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37848
38224
  }
37849
38225
  const tools = {};
37850
38226
  if (options.tools && options.tools.length > 0) {
37851
- for (const tool21 of options.tools) {
37852
- tools[tool21.name] = {
37853
- description: tool21.description,
37854
- inputSchema: tool21.parameters
38227
+ for (const tool22 of options.tools) {
38228
+ tools[tool22.name] = {
38229
+ description: tool22.description,
38230
+ inputSchema: tool22.parameters
37855
38231
  };
37856
38232
  }
37857
38233
  }
@@ -37868,7 +38244,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37868
38244
  });
37869
38245
  let textResponse;
37870
38246
  try {
37871
- textResponse = yield (0, import_ai21.generateText)({
38247
+ textResponse = yield (0, import_ai22.generateText)({
37872
38248
  model: this.model,
37873
38249
  messages: formattedMessages,
37874
38250
  tools: Object.keys(tools).length > 0 ? tools : void 0,
@@ -38050,14 +38426,14 @@ var AnthropicClient = class extends LLMClient {
38050
38426
  }
38051
38427
  formattedMessages.push(screenshotMessage);
38052
38428
  }
38053
- let anthropicTools2 = (_a4 = options.tools) == null ? void 0 : _a4.map((tool21) => {
38429
+ let anthropicTools2 = (_a4 = options.tools) == null ? void 0 : _a4.map((tool22) => {
38054
38430
  return {
38055
- name: tool21.name,
38056
- description: tool21.description,
38431
+ name: tool22.name,
38432
+ description: tool22.description,
38057
38433
  input_schema: {
38058
38434
  type: "object",
38059
- properties: tool21.parameters.properties,
38060
- required: tool21.parameters.required
38435
+ properties: tool22.parameters.properties,
38436
+ required: tool22.parameters.required
38061
38437
  }
38062
38438
  };
38063
38439
  });
@@ -38244,15 +38620,15 @@ var CerebrasClient = class extends LLMClient {
38244
38620
  return __spreadProps(__spreadValues({}, baseMessage), { role: "user" });
38245
38621
  }
38246
38622
  });
38247
- let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool21) => ({
38623
+ let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool22) => ({
38248
38624
  type: "function",
38249
38625
  function: {
38250
- name: tool21.name,
38251
- description: tool21.description,
38626
+ name: tool22.name,
38627
+ description: tool22.description,
38252
38628
  parameters: {
38253
38629
  type: "object",
38254
- properties: tool21.parameters.properties,
38255
- required: tool21.parameters.required
38630
+ properties: tool22.parameters.properties,
38631
+ required: tool22.parameters.required
38256
38632
  }
38257
38633
  }
38258
38634
  }));
@@ -38545,18 +38921,18 @@ ${firstPartText.text}`;
38545
38921
  }
38546
38922
  return [
38547
38923
  {
38548
- functionDeclarations: tools.map((tool21) => {
38924
+ functionDeclarations: tools.map((tool22) => {
38549
38925
  let parameters = void 0;
38550
- if (tool21.parameters) {
38926
+ if (tool22.parameters) {
38551
38927
  parameters = {
38552
38928
  type: import_genai4.Type.OBJECT,
38553
- properties: tool21.parameters.properties,
38554
- required: tool21.parameters.required
38929
+ properties: tool22.parameters.properties,
38930
+ required: tool22.parameters.required
38555
38931
  };
38556
38932
  }
38557
38933
  return {
38558
- name: tool21.name,
38559
- description: tool21.description,
38934
+ name: tool22.name,
38935
+ description: tool22.description,
38560
38936
  parameters
38561
38937
  };
38562
38938
  })
@@ -38832,15 +39208,15 @@ var GroqClient = class extends LLMClient {
38832
39208
  return __spreadProps(__spreadValues({}, baseMessage), { role: "user" });
38833
39209
  }
38834
39210
  });
38835
- let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool21) => ({
39211
+ let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool22) => ({
38836
39212
  type: "function",
38837
39213
  function: {
38838
- name: tool21.name,
38839
- description: tool21.description,
39214
+ name: tool22.name,
39215
+ description: tool22.description,
38840
39216
  parameters: {
38841
39217
  type: "object",
38842
- properties: tool21.parameters.properties,
38843
- required: tool21.parameters.required
39218
+ properties: tool22.parameters.properties,
39219
+ required: tool22.parameters.required
38844
39220
  }
38845
39221
  }
38846
39222
  }));
@@ -39217,11 +39593,11 @@ ${parsedSchema}
39217
39593
  messages: formattedMessages,
39218
39594
  response_format: responseFormat,
39219
39595
  stream: false,
39220
- tools: (_e = options.tools) == null ? void 0 : _e.map((tool21) => ({
39596
+ tools: (_e = options.tools) == null ? void 0 : _e.map((tool22) => ({
39221
39597
  function: {
39222
- name: tool21.name,
39223
- description: tool21.description,
39224
- parameters: tool21.parameters
39598
+ name: tool22.name,
39599
+ description: tool22.description,
39600
+ parameters: tool22.parameters
39225
39601
  },
39226
39602
  type: "function"
39227
39603
  }))
@@ -41887,21 +42263,21 @@ function prepareChatTools({
41887
42263
  return { tools: void 0, toolChoice: void 0, toolWarnings };
41888
42264
  }
41889
42265
  const openaiTools2 = [];
41890
- for (const tool21 of tools) {
41891
- switch (tool21.type) {
42266
+ for (const tool22 of tools) {
42267
+ switch (tool22.type) {
41892
42268
  case "function":
41893
42269
  openaiTools2.push({
41894
42270
  type: "function",
41895
42271
  function: {
41896
- name: tool21.name,
41897
- description: tool21.description,
41898
- parameters: tool21.inputSchema,
42272
+ name: tool22.name,
42273
+ description: tool22.description,
42274
+ parameters: tool22.inputSchema,
41899
42275
  strict: structuredOutputs ? strictJsonSchema : void 0
41900
42276
  }
41901
42277
  });
41902
42278
  break;
41903
42279
  default:
41904
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
42280
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
41905
42281
  break;
41906
42282
  }
41907
42283
  }
@@ -44217,22 +44593,22 @@ function prepareResponsesTools(_0) {
44217
44593
  return { tools: void 0, toolChoice: void 0, toolWarnings };
44218
44594
  }
44219
44595
  const openaiTools2 = [];
44220
- for (const tool21 of tools) {
44221
- switch (tool21.type) {
44596
+ for (const tool22 of tools) {
44597
+ switch (tool22.type) {
44222
44598
  case "function":
44223
44599
  openaiTools2.push({
44224
44600
  type: "function",
44225
- name: tool21.name,
44226
- description: tool21.description,
44227
- parameters: tool21.inputSchema,
44601
+ name: tool22.name,
44602
+ description: tool22.description,
44603
+ parameters: tool22.inputSchema,
44228
44604
  strict: strictJsonSchema
44229
44605
  });
44230
44606
  break;
44231
44607
  case "provider-defined": {
44232
- switch (tool21.id) {
44608
+ switch (tool22.id) {
44233
44609
  case "openai.file_search": {
44234
44610
  const args = yield validateTypes({
44235
- value: tool21.args,
44611
+ value: tool22.args,
44236
44612
  schema: fileSearchArgsSchema
44237
44613
  });
44238
44614
  openaiTools2.push({
@@ -44255,7 +44631,7 @@ function prepareResponsesTools(_0) {
44255
44631
  }
44256
44632
  case "openai.web_search_preview": {
44257
44633
  const args = yield validateTypes({
44258
- value: tool21.args,
44634
+ value: tool22.args,
44259
44635
  schema: webSearchPreviewArgsSchema
44260
44636
  });
44261
44637
  openaiTools2.push({
@@ -44267,7 +44643,7 @@ function prepareResponsesTools(_0) {
44267
44643
  }
44268
44644
  case "openai.web_search": {
44269
44645
  const args = yield validateTypes({
44270
- value: tool21.args,
44646
+ value: tool22.args,
44271
44647
  schema: webSearchArgsSchema
44272
44648
  });
44273
44649
  openaiTools2.push({
@@ -44280,7 +44656,7 @@ function prepareResponsesTools(_0) {
44280
44656
  }
44281
44657
  case "openai.code_interpreter": {
44282
44658
  const args = yield validateTypes({
44283
- value: tool21.args,
44659
+ value: tool22.args,
44284
44660
  schema: codeInterpreterArgsSchema
44285
44661
  });
44286
44662
  openaiTools2.push({
@@ -44291,7 +44667,7 @@ function prepareResponsesTools(_0) {
44291
44667
  }
44292
44668
  case "openai.image_generation": {
44293
44669
  const args = yield validateTypes({
44294
- value: tool21.args,
44670
+ value: tool22.args,
44295
44671
  schema: imageGenerationArgsSchema
44296
44672
  });
44297
44673
  openaiTools2.push({
@@ -44315,7 +44691,7 @@ function prepareResponsesTools(_0) {
44315
44691
  break;
44316
44692
  }
44317
44693
  default:
44318
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
44694
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
44319
44695
  break;
44320
44696
  }
44321
44697
  }
@@ -44420,7 +44796,7 @@ var OpenAIResponsesLanguageModel = class {
44420
44796
  }
44421
44797
  function hasOpenAITool(id) {
44422
44798
  return (tools == null ? void 0 : tools.find(
44423
- (tool21) => tool21.type === "provider-defined" && tool21.id === id
44799
+ (tool22) => tool22.type === "provider-defined" && tool22.id === id
44424
44800
  )) != null;
44425
44801
  }
44426
44802
  const topLogprobs = typeof (openaiOptions == null ? void 0 : openaiOptions.logprobs) === "number" ? openaiOptions == null ? void 0 : openaiOptions.logprobs : (openaiOptions == null ? void 0 : openaiOptions.logprobs) === true ? TOP_LOGPROBS_MAX : void 0;
@@ -44428,7 +44804,7 @@ var OpenAIResponsesLanguageModel = class {
44428
44804
  addInclude("message.output_text.logprobs");
44429
44805
  }
44430
44806
  const webSearchToolName = (_c = tools == null ? void 0 : tools.find(
44431
- (tool21) => tool21.type === "provider-defined" && (tool21.id === "openai.web_search" || tool21.id === "openai.web_search_preview")
44807
+ (tool22) => tool22.type === "provider-defined" && (tool22.id === "openai.web_search" || tool22.id === "openai.web_search_preview")
44432
44808
  )) == null ? void 0 : _c.name;
44433
44809
  if (webSearchToolName) {
44434
44810
  addInclude("web_search_call.action.sources");
@@ -48022,25 +48398,25 @@ function prepareTools({
48022
48398
  if (tools == null) {
48023
48399
  return { tools: void 0, toolConfig: void 0, toolWarnings };
48024
48400
  }
48025
- const hasFunctionTools = tools.some((tool21) => tool21.type === "function");
48401
+ const hasFunctionTools = tools.some((tool22) => tool22.type === "function");
48026
48402
  const hasProviderDefinedTools = tools.some(
48027
- (tool21) => tool21.type === "provider-defined"
48403
+ (tool22) => tool22.type === "provider-defined"
48028
48404
  );
48029
48405
  if (hasFunctionTools && hasProviderDefinedTools) {
48030
- const functionTools = tools.filter((tool21) => tool21.type === "function");
48406
+ const functionTools = tools.filter((tool22) => tool22.type === "function");
48031
48407
  toolWarnings.push({
48032
48408
  type: "unsupported-tool",
48033
- tool: tools.find((tool21) => tool21.type === "function"),
48409
+ tool: tools.find((tool22) => tool22.type === "function"),
48034
48410
  details: `Cannot mix function tools with provider-defined tools in the same request. Falling back to provider-defined tools only. The following function tools will be ignored: ${functionTools.map((t2) => t2.name).join(", ")}. Please use either function tools or provider-defined tools, but not both.`
48035
48411
  });
48036
48412
  }
48037
48413
  if (hasProviderDefinedTools) {
48038
48414
  const googleTools22 = [];
48039
48415
  const providerDefinedTools = tools.filter(
48040
- (tool21) => tool21.type === "provider-defined"
48416
+ (tool22) => tool22.type === "provider-defined"
48041
48417
  );
48042
- providerDefinedTools.forEach((tool21) => {
48043
- switch (tool21.id) {
48418
+ providerDefinedTools.forEach((tool22) => {
48419
+ switch (tool22.id) {
48044
48420
  case "google.google_search":
48045
48421
  if (isGemini2orNewer) {
48046
48422
  googleTools22.push({ googleSearch: {} });
@@ -48048,8 +48424,8 @@ function prepareTools({
48048
48424
  googleTools22.push({
48049
48425
  googleSearchRetrieval: {
48050
48426
  dynamicRetrievalConfig: {
48051
- mode: tool21.args.mode,
48052
- dynamicThreshold: tool21.args.dynamicThreshold
48427
+ mode: tool22.args.mode,
48428
+ dynamicThreshold: tool22.args.dynamicThreshold
48053
48429
  }
48054
48430
  }
48055
48431
  });
@@ -48063,7 +48439,7 @@ function prepareTools({
48063
48439
  } else {
48064
48440
  toolWarnings.push({
48065
48441
  type: "unsupported-tool",
48066
- tool: tool21,
48442
+ tool: tool22,
48067
48443
  details: "The URL context tool is not supported with other Gemini models than Gemini 2."
48068
48444
  });
48069
48445
  }
@@ -48074,18 +48450,18 @@ function prepareTools({
48074
48450
  } else {
48075
48451
  toolWarnings.push({
48076
48452
  type: "unsupported-tool",
48077
- tool: tool21,
48453
+ tool: tool22,
48078
48454
  details: "The code execution tools is not supported with other Gemini models than Gemini 2."
48079
48455
  });
48080
48456
  }
48081
48457
  break;
48082
48458
  case "google.file_search":
48083
48459
  if (supportsFileSearch) {
48084
- googleTools22.push({ fileSearch: __spreadValues({}, tool21.args) });
48460
+ googleTools22.push({ fileSearch: __spreadValues({}, tool22.args) });
48085
48461
  } else {
48086
48462
  toolWarnings.push({
48087
48463
  type: "unsupported-tool",
48088
- tool: tool21,
48464
+ tool: tool22,
48089
48465
  details: "The file search tool is only supported with Gemini 2.5 models."
48090
48466
  });
48091
48467
  }
@@ -48096,22 +48472,22 @@ function prepareTools({
48096
48472
  retrieval: {
48097
48473
  vertex_rag_store: {
48098
48474
  rag_resources: {
48099
- rag_corpus: tool21.args.ragCorpus
48475
+ rag_corpus: tool22.args.ragCorpus
48100
48476
  },
48101
- similarity_top_k: tool21.args.topK
48477
+ similarity_top_k: tool22.args.topK
48102
48478
  }
48103
48479
  }
48104
48480
  });
48105
48481
  } else {
48106
48482
  toolWarnings.push({
48107
48483
  type: "unsupported-tool",
48108
- tool: tool21,
48484
+ tool: tool22,
48109
48485
  details: "The RAG store tool is not supported with other Gemini models than Gemini 2."
48110
48486
  });
48111
48487
  }
48112
48488
  break;
48113
48489
  default:
48114
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
48490
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
48115
48491
  break;
48116
48492
  }
48117
48493
  });
@@ -48122,17 +48498,17 @@ function prepareTools({
48122
48498
  };
48123
48499
  }
48124
48500
  const functionDeclarations = [];
48125
- for (const tool21 of tools) {
48126
- switch (tool21.type) {
48501
+ for (const tool22 of tools) {
48502
+ switch (tool22.type) {
48127
48503
  case "function":
48128
48504
  functionDeclarations.push({
48129
- name: tool21.name,
48130
- description: (_a4 = tool21.description) != null ? _a4 : "",
48131
- parameters: convertJSONSchemaToOpenAPISchema(tool21.inputSchema)
48505
+ name: tool22.name,
48506
+ description: (_a4 = tool22.description) != null ? _a4 : "",
48507
+ parameters: convertJSONSchemaToOpenAPISchema(tool22.inputSchema)
48132
48508
  });
48133
48509
  break;
48134
48510
  default:
48135
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
48511
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
48136
48512
  break;
48137
48513
  }
48138
48514
  }
@@ -48246,7 +48622,7 @@ var GoogleGenerativeAILanguageModel = class {
48246
48622
  schema: googleGenerativeAIProviderOptions
48247
48623
  });
48248
48624
  if ((tools == null ? void 0 : tools.some(
48249
- (tool21) => tool21.type === "provider-defined" && tool21.id === "google.vertex_rag_store"
48625
+ (tool22) => tool22.type === "provider-defined" && tool22.id === "google.vertex_rag_store"
48250
48626
  )) && !this.config.provider.startsWith("google.vertex.")) {
48251
48627
  warnings.push({
48252
48628
  type: "other",
@@ -50108,23 +50484,23 @@ function prepareTools2(_0) {
50108
50484
  return { tools: void 0, toolChoice: void 0, toolWarnings, betas };
50109
50485
  }
50110
50486
  const anthropicTools2 = [];
50111
- for (const tool21 of tools) {
50112
- switch (tool21.type) {
50487
+ for (const tool22 of tools) {
50488
+ switch (tool22.type) {
50113
50489
  case "function": {
50114
- const cacheControl = validator3.getCacheControl(tool21.providerOptions, {
50490
+ const cacheControl = validator3.getCacheControl(tool22.providerOptions, {
50115
50491
  type: "tool definition",
50116
50492
  canCache: true
50117
50493
  });
50118
50494
  anthropicTools2.push({
50119
- name: tool21.name,
50120
- description: tool21.description,
50121
- input_schema: tool21.inputSchema,
50495
+ name: tool22.name,
50496
+ description: tool22.description,
50497
+ input_schema: tool22.inputSchema,
50122
50498
  cache_control: cacheControl
50123
50499
  });
50124
50500
  break;
50125
50501
  }
50126
50502
  case "provider-defined": {
50127
- switch (tool21.id) {
50503
+ switch (tool22.id) {
50128
50504
  case "anthropic.code_execution_20250522": {
50129
50505
  betas.add("code-execution-2025-05-22");
50130
50506
  anthropicTools2.push({
@@ -50147,9 +50523,9 @@ function prepareTools2(_0) {
50147
50523
  anthropicTools2.push({
50148
50524
  name: "computer",
50149
50525
  type: "computer_20250124",
50150
- display_width_px: tool21.args.displayWidthPx,
50151
- display_height_px: tool21.args.displayHeightPx,
50152
- display_number: tool21.args.displayNumber,
50526
+ display_width_px: tool22.args.displayWidthPx,
50527
+ display_height_px: tool22.args.displayHeightPx,
50528
+ display_number: tool22.args.displayNumber,
50153
50529
  cache_control: void 0
50154
50530
  });
50155
50531
  break;
@@ -50159,9 +50535,9 @@ function prepareTools2(_0) {
50159
50535
  anthropicTools2.push({
50160
50536
  name: "computer",
50161
50537
  type: "computer_20241022",
50162
- display_width_px: tool21.args.displayWidthPx,
50163
- display_height_px: tool21.args.displayHeightPx,
50164
- display_number: tool21.args.displayNumber,
50538
+ display_width_px: tool22.args.displayWidthPx,
50539
+ display_height_px: tool22.args.displayHeightPx,
50540
+ display_number: tool22.args.displayNumber,
50165
50541
  cache_control: void 0
50166
50542
  });
50167
50543
  break;
@@ -50195,7 +50571,7 @@ function prepareTools2(_0) {
50195
50571
  }
50196
50572
  case "anthropic.text_editor_20250728": {
50197
50573
  const args = yield validateTypes2({
50198
- value: tool21.args,
50574
+ value: tool22.args,
50199
50575
  schema: textEditor_20250728ArgsSchema
50200
50576
  });
50201
50577
  anthropicTools2.push({
@@ -50235,7 +50611,7 @@ function prepareTools2(_0) {
50235
50611
  case "anthropic.web_fetch_20250910": {
50236
50612
  betas.add("web-fetch-2025-09-10");
50237
50613
  const args = yield validateTypes2({
50238
- value: tool21.args,
50614
+ value: tool22.args,
50239
50615
  schema: webFetch_20250910ArgsSchema
50240
50616
  });
50241
50617
  anthropicTools2.push({
@@ -50252,7 +50628,7 @@ function prepareTools2(_0) {
50252
50628
  }
50253
50629
  case "anthropic.web_search_20250305": {
50254
50630
  const args = yield validateTypes2({
50255
- value: tool21.args,
50631
+ value: tool22.args,
50256
50632
  schema: webSearch_20250305ArgsSchema
50257
50633
  });
50258
50634
  anthropicTools2.push({
@@ -50267,14 +50643,14 @@ function prepareTools2(_0) {
50267
50643
  break;
50268
50644
  }
50269
50645
  default: {
50270
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
50646
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
50271
50647
  break;
50272
50648
  }
50273
50649
  }
50274
50650
  break;
50275
50651
  }
50276
50652
  default: {
50277
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
50653
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
50278
50654
  break;
50279
50655
  }
50280
50656
  }
@@ -51233,7 +51609,7 @@ var AnthropicMessagesLanguageModel = class {
51233
51609
  betas.add("skills-2025-10-02");
51234
51610
  betas.add("files-api-2025-04-14");
51235
51611
  if (!(tools == null ? void 0 : tools.some(
51236
- (tool21) => tool21.type === "provider-defined" && tool21.id === "anthropic.code_execution_20250825"
51612
+ (tool22) => tool22.type === "provider-defined" && tool22.id === "anthropic.code_execution_20250825"
51237
51613
  ))) {
51238
51614
  warnings.push({
51239
51615
  type: "other",
@@ -53071,24 +53447,24 @@ function prepareTools3({
53071
53447
  if (tools == null) {
53072
53448
  return { tools: void 0, toolConfig: void 0, toolWarnings };
53073
53449
  }
53074
- const hasFunctionTools = tools.some((tool21) => tool21.type === "function");
53450
+ const hasFunctionTools = tools.some((tool22) => tool22.type === "function");
53075
53451
  const hasProviderDefinedTools = tools.some(
53076
- (tool21) => tool21.type === "provider-defined"
53452
+ (tool22) => tool22.type === "provider-defined"
53077
53453
  );
53078
53454
  if (hasFunctionTools && hasProviderDefinedTools) {
53079
53455
  toolWarnings.push({
53080
53456
  type: "unsupported-tool",
53081
- tool: tools.find((tool21) => tool21.type === "function"),
53457
+ tool: tools.find((tool22) => tool22.type === "function"),
53082
53458
  details: "Cannot mix function tools with provider-defined tools in the same request. Please use either function tools or provider-defined tools, but not both."
53083
53459
  });
53084
53460
  }
53085
53461
  if (hasProviderDefinedTools) {
53086
53462
  const googleTools22 = [];
53087
53463
  const providerDefinedTools = tools.filter(
53088
- (tool21) => tool21.type === "provider-defined"
53464
+ (tool22) => tool22.type === "provider-defined"
53089
53465
  );
53090
- providerDefinedTools.forEach((tool21) => {
53091
- switch (tool21.id) {
53466
+ providerDefinedTools.forEach((tool22) => {
53467
+ switch (tool22.id) {
53092
53468
  case "google.google_search":
53093
53469
  if (isGemini2) {
53094
53470
  googleTools22.push({ googleSearch: {} });
@@ -53096,8 +53472,8 @@ function prepareTools3({
53096
53472
  googleTools22.push({
53097
53473
  googleSearchRetrieval: {
53098
53474
  dynamicRetrievalConfig: {
53099
- mode: tool21.args.mode,
53100
- dynamicThreshold: tool21.args.dynamicThreshold
53475
+ mode: tool22.args.mode,
53476
+ dynamicThreshold: tool22.args.dynamicThreshold
53101
53477
  }
53102
53478
  }
53103
53479
  });
@@ -53111,7 +53487,7 @@ function prepareTools3({
53111
53487
  } else {
53112
53488
  toolWarnings.push({
53113
53489
  type: "unsupported-tool",
53114
- tool: tool21,
53490
+ tool: tool22,
53115
53491
  details: "The URL context tool is not supported with other Gemini models than Gemini 2."
53116
53492
  });
53117
53493
  }
@@ -53122,13 +53498,13 @@ function prepareTools3({
53122
53498
  } else {
53123
53499
  toolWarnings.push({
53124
53500
  type: "unsupported-tool",
53125
- tool: tool21,
53501
+ tool: tool22,
53126
53502
  details: "The code execution tools is not supported with other Gemini models than Gemini 2."
53127
53503
  });
53128
53504
  }
53129
53505
  break;
53130
53506
  default:
53131
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
53507
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
53132
53508
  break;
53133
53509
  }
53134
53510
  });
@@ -53139,17 +53515,17 @@ function prepareTools3({
53139
53515
  };
53140
53516
  }
53141
53517
  const functionDeclarations = [];
53142
- for (const tool21 of tools) {
53143
- switch (tool21.type) {
53518
+ for (const tool22 of tools) {
53519
+ switch (tool22.type) {
53144
53520
  case "function":
53145
53521
  functionDeclarations.push({
53146
- name: tool21.name,
53147
- description: (_a4 = tool21.description) != null ? _a4 : "",
53148
- parameters: convertJSONSchemaToOpenAPISchema2(tool21.inputSchema)
53522
+ name: tool22.name,
53523
+ description: (_a4 = tool22.description) != null ? _a4 : "",
53524
+ parameters: convertJSONSchemaToOpenAPISchema2(tool22.inputSchema)
53149
53525
  });
53150
53526
  break;
53151
53527
  default:
53152
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
53528
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
53153
53529
  break;
53154
53530
  }
53155
53531
  }
@@ -54272,16 +54648,16 @@ function prepareTools4({
54272
54648
  return { tools: void 0, toolChoice: void 0, toolWarnings };
54273
54649
  }
54274
54650
  const openaiCompatTools = [];
54275
- for (const tool21 of tools) {
54276
- if (tool21.type === "provider-defined") {
54277
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
54651
+ for (const tool22 of tools) {
54652
+ if (tool22.type === "provider-defined") {
54653
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
54278
54654
  } else {
54279
54655
  openaiCompatTools.push({
54280
54656
  type: "function",
54281
54657
  function: {
54282
- name: tool21.name,
54283
- description: tool21.description,
54284
- parameters: tool21.inputSchema
54658
+ name: tool22.name,
54659
+ description: tool22.description,
54660
+ parameters: tool22.inputSchema
54285
54661
  }
54286
54662
  });
54287
54663
  }
@@ -55626,16 +56002,16 @@ function prepareTools5({
55626
56002
  return { tools: void 0, toolChoice: void 0, toolWarnings };
55627
56003
  }
55628
56004
  const xaiTools = [];
55629
- for (const tool21 of tools) {
55630
- if (tool21.type === "provider-defined") {
55631
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
56005
+ for (const tool22 of tools) {
56006
+ if (tool22.type === "provider-defined") {
56007
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
55632
56008
  } else {
55633
56009
  xaiTools.push({
55634
56010
  type: "function",
55635
56011
  function: {
55636
- name: tool21.name,
55637
- description: tool21.description,
55638
- parameters: tool21.inputSchema
56012
+ name: tool22.name,
56013
+ description: tool22.description,
56014
+ parameters: tool22.inputSchema
55639
56015
  }
55640
56016
  });
55641
56017
  }
@@ -56635,21 +57011,21 @@ function prepareChatTools2({
56635
57011
  return { tools: void 0, toolChoice: void 0, toolWarnings };
56636
57012
  }
56637
57013
  const openaiTools2 = [];
56638
- for (const tool21 of tools) {
56639
- switch (tool21.type) {
57014
+ for (const tool22 of tools) {
57015
+ switch (tool22.type) {
56640
57016
  case "function":
56641
57017
  openaiTools2.push({
56642
57018
  type: "function",
56643
57019
  function: {
56644
- name: tool21.name,
56645
- description: tool21.description,
56646
- parameters: tool21.inputSchema,
57020
+ name: tool22.name,
57021
+ description: tool22.description,
57022
+ parameters: tool22.inputSchema,
56647
57023
  strict: structuredOutputs ? strictJsonSchema : void 0
56648
57024
  }
56649
57025
  });
56650
57026
  break;
56651
57027
  default:
56652
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
57028
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
56653
57029
  break;
56654
57030
  }
56655
57031
  }
@@ -59231,22 +59607,22 @@ function prepareResponsesTools2(_0) {
59231
59607
  return { tools: void 0, toolChoice: void 0, toolWarnings };
59232
59608
  }
59233
59609
  const openaiTools2 = [];
59234
- for (const tool21 of tools) {
59235
- switch (tool21.type) {
59610
+ for (const tool22 of tools) {
59611
+ switch (tool22.type) {
59236
59612
  case "function":
59237
59613
  openaiTools2.push({
59238
59614
  type: "function",
59239
- name: tool21.name,
59240
- description: tool21.description,
59241
- parameters: tool21.inputSchema,
59615
+ name: tool22.name,
59616
+ description: tool22.description,
59617
+ parameters: tool22.inputSchema,
59242
59618
  strict: strictJsonSchema
59243
59619
  });
59244
59620
  break;
59245
59621
  case "provider-defined": {
59246
- switch (tool21.id) {
59622
+ switch (tool22.id) {
59247
59623
  case "openai.file_search": {
59248
59624
  const args = yield validateTypes({
59249
- value: tool21.args,
59625
+ value: tool22.args,
59250
59626
  schema: fileSearchArgsSchema3
59251
59627
  });
59252
59628
  openaiTools2.push({
@@ -59269,7 +59645,7 @@ function prepareResponsesTools2(_0) {
59269
59645
  }
59270
59646
  case "openai.web_search_preview": {
59271
59647
  const args = yield validateTypes({
59272
- value: tool21.args,
59648
+ value: tool22.args,
59273
59649
  schema: webSearchPreviewArgsSchema2
59274
59650
  });
59275
59651
  openaiTools2.push({
@@ -59281,7 +59657,7 @@ function prepareResponsesTools2(_0) {
59281
59657
  }
59282
59658
  case "openai.web_search": {
59283
59659
  const args = yield validateTypes({
59284
- value: tool21.args,
59660
+ value: tool22.args,
59285
59661
  schema: webSearchArgsSchema2
59286
59662
  });
59287
59663
  openaiTools2.push({
@@ -59294,7 +59670,7 @@ function prepareResponsesTools2(_0) {
59294
59670
  }
59295
59671
  case "openai.code_interpreter": {
59296
59672
  const args = yield validateTypes({
59297
- value: tool21.args,
59673
+ value: tool22.args,
59298
59674
  schema: codeInterpreterArgsSchema2
59299
59675
  });
59300
59676
  openaiTools2.push({
@@ -59305,7 +59681,7 @@ function prepareResponsesTools2(_0) {
59305
59681
  }
59306
59682
  case "openai.image_generation": {
59307
59683
  const args = yield validateTypes({
59308
- value: tool21.args,
59684
+ value: tool22.args,
59309
59685
  schema: imageGenerationArgsSchema2
59310
59686
  });
59311
59687
  openaiTools2.push({
@@ -59329,7 +59705,7 @@ function prepareResponsesTools2(_0) {
59329
59705
  break;
59330
59706
  }
59331
59707
  default:
59332
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
59708
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
59333
59709
  break;
59334
59710
  }
59335
59711
  }
@@ -59434,7 +59810,7 @@ var OpenAIResponsesLanguageModel2 = class {
59434
59810
  }
59435
59811
  function hasOpenAITool(id) {
59436
59812
  return (tools == null ? void 0 : tools.find(
59437
- (tool21) => tool21.type === "provider-defined" && tool21.id === id
59813
+ (tool22) => tool22.type === "provider-defined" && tool22.id === id
59438
59814
  )) != null;
59439
59815
  }
59440
59816
  const topLogprobs = typeof (openaiOptions == null ? void 0 : openaiOptions.logprobs) === "number" ? openaiOptions == null ? void 0 : openaiOptions.logprobs : (openaiOptions == null ? void 0 : openaiOptions.logprobs) === true ? TOP_LOGPROBS_MAX2 : void 0;
@@ -59442,7 +59818,7 @@ var OpenAIResponsesLanguageModel2 = class {
59442
59818
  addInclude("message.output_text.logprobs");
59443
59819
  }
59444
59820
  const webSearchToolName = (_c = tools == null ? void 0 : tools.find(
59445
- (tool21) => tool21.type === "provider-defined" && (tool21.id === "openai.web_search" || tool21.id === "openai.web_search_preview")
59821
+ (tool22) => tool22.type === "provider-defined" && (tool22.id === "openai.web_search" || tool22.id === "openai.web_search_preview")
59446
59822
  )) == null ? void 0 : _c.name;
59447
59823
  if (webSearchToolName) {
59448
59824
  addInclude("web_search_call.action.sources");
@@ -60665,13 +61041,13 @@ function prepareTools6({
60665
61041
  return { tools: void 0, toolChoice: void 0, toolWarnings };
60666
61042
  }
60667
61043
  const groqTools2 = [];
60668
- for (const tool21 of tools) {
60669
- if (tool21.type === "provider-defined") {
60670
- if (tool21.id === "groq.browser_search") {
61044
+ for (const tool22 of tools) {
61045
+ if (tool22.type === "provider-defined") {
61046
+ if (tool22.id === "groq.browser_search") {
60671
61047
  if (!isBrowserSearchSupportedModel(modelId)) {
60672
61048
  toolWarnings.push({
60673
61049
  type: "unsupported-tool",
60674
- tool: tool21,
61050
+ tool: tool22,
60675
61051
  details: `Browser search is only supported on the following models: ${getSupportedModelsString()}. Current model: ${modelId}`
60676
61052
  });
60677
61053
  } else {
@@ -60680,15 +61056,15 @@ function prepareTools6({
60680
61056
  });
60681
61057
  }
60682
61058
  } else {
60683
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
61059
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
60684
61060
  }
60685
61061
  } else {
60686
61062
  groqTools2.push({
60687
61063
  type: "function",
60688
61064
  function: {
60689
- name: tool21.name,
60690
- description: tool21.description,
60691
- parameters: tool21.inputSchema
61065
+ name: tool22.name,
61066
+ description: tool22.description,
61067
+ parameters: tool22.inputSchema
60692
61068
  }
60693
61069
  });
60694
61070
  }
@@ -61740,16 +62116,16 @@ function prepareTools7({
61740
62116
  return { tools: void 0, toolChoice: void 0, toolWarnings };
61741
62117
  }
61742
62118
  const mistralTools = [];
61743
- for (const tool21 of tools) {
61744
- if (tool21.type === "provider-defined") {
61745
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
62119
+ for (const tool22 of tools) {
62120
+ if (tool22.type === "provider-defined") {
62121
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
61746
62122
  } else {
61747
62123
  mistralTools.push({
61748
62124
  type: "function",
61749
62125
  function: {
61750
- name: tool21.name,
61751
- description: tool21.description,
61752
- parameters: tool21.inputSchema
62126
+ name: tool22.name,
62127
+ description: tool22.description,
62128
+ parameters: tool22.inputSchema
61753
62129
  }
61754
62130
  });
61755
62131
  }
@@ -61769,7 +62145,7 @@ function prepareTools7({
61769
62145
  case "tool":
61770
62146
  return {
61771
62147
  tools: mistralTools.filter(
61772
- (tool21) => tool21.function.name === toolChoice.toolName
62148
+ (tool22) => tool22.function.name === toolChoice.toolName
61773
62149
  ),
61774
62150
  toolChoice: "any",
61775
62151
  toolWarnings
@@ -63552,10 +63928,10 @@ function prepareResponsesTools3({
63552
63928
  return { tools: void 0, toolChoice: void 0, toolWarnings };
63553
63929
  }
63554
63930
  const ollamaTools = [];
63555
- for (const tool21 of tools) {
63556
- switch (tool21.type) {
63931
+ for (const tool22 of tools) {
63932
+ switch (tool22.type) {
63557
63933
  case "function": {
63558
- let parameters = tool21.inputSchema;
63934
+ let parameters = tool22.inputSchema;
63559
63935
  if (!parameters) {
63560
63936
  parameters = {
63561
63937
  type: "object",
@@ -63571,15 +63947,15 @@ function prepareResponsesTools3({
63571
63947
  ollamaTools.push({
63572
63948
  type: "function",
63573
63949
  function: {
63574
- name: tool21.name,
63575
- description: tool21.description,
63950
+ name: tool22.name,
63951
+ description: tool22.description,
63576
63952
  parameters
63577
63953
  }
63578
63954
  });
63579
63955
  break;
63580
63956
  }
63581
63957
  default:
63582
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
63958
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
63583
63959
  break;
63584
63960
  }
63585
63961
  }
@@ -64183,7 +64559,8 @@ var AISDKProvidersWithAPIKey = {
64183
64559
  togetherai: createTogetherAI,
64184
64560
  mistral: createMistral,
64185
64561
  deepseek: createDeepSeek,
64186
- perplexity: createPerplexity
64562
+ perplexity: createPerplexity,
64563
+ ollama: createOllama
64187
64564
  };
64188
64565
  var modelToProviderMap = {
64189
64566
  "gpt-4.1": "openai",
@@ -64220,7 +64597,8 @@ var modelToProviderMap = {
64220
64597
  "gemini-2.5-pro-preview-03-25": "google"
64221
64598
  };
64222
64599
  function getAISDKLanguageModel(subProvider, subModelName, clientOptions) {
64223
- if (clientOptions && Object.keys(clientOptions).length > 0) {
64600
+ const hasValidOptions = clientOptions && Object.values(clientOptions).some((v) => v !== void 0 && v !== null);
64601
+ if (hasValidOptions) {
64224
64602
  const creator = AISDKProvidersWithAPIKey[subProvider];
64225
64603
  if (!creator) {
64226
64604
  throw new UnsupportedAISDKModelProviderError(
@@ -64377,13 +64755,13 @@ var resolveTools = (clients, userTools) => __async(null, null, function* () {
64377
64755
  const clientTools = yield clientInstance.listTools({
64378
64756
  cursor: nextCursor
64379
64757
  });
64380
- for (const tool21 of clientTools.tools) {
64381
- tools[tool21.name] = {
64382
- description: tool21.description,
64383
- inputSchema: jsonSchemaToZod(tool21.inputSchema),
64758
+ for (const tool22 of clientTools.tools) {
64759
+ tools[tool22.name] = {
64760
+ description: tool22.description,
64761
+ inputSchema: jsonSchemaToZod(tool22.inputSchema),
64384
64762
  execute: (input) => __async(null, null, function* () {
64385
64763
  const result = yield clientInstance.callTool({
64386
- name: tool21.name,
64764
+ name: tool22.name,
64387
64765
  arguments: input
64388
64766
  });
64389
64767
  return result;
@@ -64406,6 +64784,7 @@ var AVAILABLE_CUA_MODELS = [
64406
64784
  "anthropic/claude-sonnet-4-20250514",
64407
64785
  "anthropic/claude-sonnet-4-5-20250929",
64408
64786
  "google/gemini-2.5-computer-use-preview-10-2025",
64787
+ "google/gemini-3-flash-computer-use",
64409
64788
  "microsoft/fara-7b"
64410
64789
  ];
64411
64790
 
@@ -64459,6 +64838,7 @@ __export(api_exports, {
64459
64838
  ReplayPageSchema: () => ReplayPageSchema,
64460
64839
  ReplayResponseSchema: () => ReplayResponseSchema,
64461
64840
  ReplayResultSchema: () => ReplayResultSchema,
64841
+ SessionEndRequestSchema: () => SessionEndRequestSchema,
64462
64842
  SessionEndResponseSchema: () => SessionEndResponseSchema,
64463
64843
  SessionEndResultSchema: () => SessionEndResultSchema,
64464
64844
  SessionHeadersSchema: () => SessionHeadersSchema,
@@ -64718,6 +65098,11 @@ var SessionStartResponseSchema = wrapResponse(
64718
65098
  SessionStartResultSchema,
64719
65099
  "SessionStartResponse"
64720
65100
  );
65101
+ var SessionEndRequestSchema = import_v4107.z.object({
65102
+ // Dummy property to ensure Stainless generates body parameter
65103
+ // The server accepts {} (this field should be omitted)
65104
+ _forceBody: import_v4107.z.undefined().optional()
65105
+ }).strict().meta({ id: "SessionEndRequest" });
64721
65106
  var SessionEndResultSchema = import_v4107.z.object({}).strict().meta({ id: "SessionEndResult" });
64722
65107
  var SessionEndResponseSchema = import_v4107.z.object({
64723
65108
  success: import_v4107.z.boolean().meta({
@@ -65182,7 +65567,7 @@ init_response2();
65182
65567
  init_sdkErrors();
65183
65568
 
65184
65569
  // examples/external_clients/aisdk.ts
65185
- var import_ai22 = require("ai");
65570
+ var import_ai23 = require("ai");
65186
65571
  var AISdkClient2 = class extends LLMClient {
65187
65572
  constructor({ model }) {
65188
65573
  super(model.modelId);
@@ -65244,7 +65629,7 @@ var AISdkClient2 = class extends LLMClient {
65244
65629
  }
65245
65630
  );
65246
65631
  if (options.response_model) {
65247
- const response2 = yield (0, import_ai22.generateObject)({
65632
+ const response2 = yield (0, import_ai23.generateObject)({
65248
65633
  model: this.model,
65249
65634
  messages: formattedMessages,
65250
65635
  schema: options.response_model.schema
@@ -65267,7 +65652,7 @@ var AISdkClient2 = class extends LLMClient {
65267
65652
  inputSchema: rawTool.parameters
65268
65653
  };
65269
65654
  }
65270
- const response = yield (0, import_ai22.generateText)({
65655
+ const response = yield (0, import_ai23.generateText)({
65271
65656
  model: this.model,
65272
65657
  messages: formattedMessages,
65273
65658
  tools
@@ -66629,6 +67014,9 @@ function validateExperimentalFeatures(options) {
66629
67014
  if ((executeOptions == null ? void 0 : executeOptions.excludeTools) && executeOptions.excludeTools.length > 0) {
66630
67015
  unsupportedFeatures.push("excludeTools");
66631
67016
  }
67017
+ if (executeOptions == null ? void 0 : executeOptions.output) {
67018
+ unsupportedFeatures.push("output schema");
67019
+ }
66632
67020
  if (unsupportedFeatures.length > 0) {
66633
67021
  throw new StagehandInvalidArgumentError(
66634
67022
  `${unsupportedFeatures.join(", ")} ${unsupportedFeatures.length === 1 ? "is" : "are"} not supported with CUA (Computer Use Agent) mode.`
@@ -66658,6 +67046,9 @@ function validateExperimentalFeatures(options) {
66658
67046
  if (executeOptions.excludeTools && executeOptions.excludeTools.length > 0) {
66659
67047
  features.push("excludeTools");
66660
67048
  }
67049
+ if (executeOptions.output) {
67050
+ features.push("output schema");
67051
+ }
66661
67052
  }
66662
67053
  if (features.length > 0) {
66663
67054
  throw new ExperimentalNotConfiguredError(`Agent ${features.join(", ")}`);
@@ -67440,6 +67831,7 @@ var _V3 = class _V3 {
67440
67831
  );
67441
67832
  }
67442
67833
  const page = yield this.resolvePage(options == null ? void 0 : options.page);
67834
+ const actCacheLlmClient = (options == null ? void 0 : options.model) ? this.resolveLlmClient(options.model) : void 0;
67443
67835
  let actCacheContext = null;
67444
67836
  const canUseCache = typeof input === "string" && !this.isAgentReplayRecording() && this.actCache.enabled;
67445
67837
  if (canUseCache) {
@@ -67452,7 +67844,8 @@ var _V3 = class _V3 {
67452
67844
  const cachedResult = yield this.actCache.tryReplay(
67453
67845
  actCacheContext,
67454
67846
  page,
67455
- options == null ? void 0 : options.timeout
67847
+ options == null ? void 0 : options.timeout,
67848
+ actCacheLlmClient
67456
67849
  );
67457
67850
  if (cachedResult) {
67458
67851
  this.addToHistory(
@@ -67855,7 +68248,13 @@ var _V3 = class _V3 {
67855
68248
  configSignature: agentConfigSignature,
67856
68249
  page: yield this.ctx.awaitActivePage()
67857
68250
  }) : null;
67858
- return { handler, resolvedOptions, instruction, cacheContext };
68251
+ return {
68252
+ handler,
68253
+ resolvedOptions,
68254
+ instruction,
68255
+ cacheContext,
68256
+ llmClient: agentLlmClient
68257
+ };
67859
68258
  });
67860
68259
  }
67861
68260
  agent(options) {
@@ -68005,13 +68404,16 @@ Do not ask follow up questions, the user will trust your judgement.`
68005
68404
  args: [instructionOrOptions]
68006
68405
  });
68007
68406
  if (isStreaming) {
68008
- const { handler: handler2, resolvedOptions: resolvedOptions2, cacheContext: cacheContext2 } = yield this.prepareAgentExecution(
68407
+ const { handler: handler2, resolvedOptions: resolvedOptions2, cacheContext: cacheContext2, llmClient: llmClient2 } = yield this.prepareAgentExecution(
68009
68408
  options,
68010
68409
  instructionOrOptions,
68011
68410
  agentConfigSignature
68012
68411
  );
68013
68412
  if (cacheContext2) {
68014
- const replayed = yield this.agentCache.tryReplayAsStream(cacheContext2);
68413
+ const replayed = yield this.agentCache.tryReplayAsStream(
68414
+ cacheContext2,
68415
+ llmClient2
68416
+ );
68015
68417
  if (replayed) {
68016
68418
  SessionFileLogger.logAgentTaskCompleted({ cacheHit: true });
68017
68419
  return replayed;
@@ -68034,13 +68436,16 @@ Do not ask follow up questions, the user will trust your judgement.`
68034
68436
  SessionFileLogger.logAgentTaskCompleted();
68035
68437
  return streamResult;
68036
68438
  }
68037
- const { handler, resolvedOptions, cacheContext } = yield this.prepareAgentExecution(
68439
+ const { handler, resolvedOptions, cacheContext, llmClient } = yield this.prepareAgentExecution(
68038
68440
  options,
68039
68441
  instructionOrOptions,
68040
68442
  agentConfigSignature
68041
68443
  );
68042
68444
  if (cacheContext) {
68043
- const replayed = yield this.agentCache.tryReplay(cacheContext);
68445
+ const replayed = yield this.agentCache.tryReplay(
68446
+ cacheContext,
68447
+ llmClient
68448
+ );
68044
68449
  if (replayed) {
68045
68450
  SessionFileLogger.logAgentTaskCompleted({ cacheHit: true });
68046
68451
  return replayed;
@@ -68333,6 +68738,9 @@ I'm providing ${screenshots.length} screenshots showing the progression of the t
68333
68738
  });
68334
68739
  }
68335
68740
  };
68741
+
68742
+ // lib/v3/index.ts
68743
+ var import_ai24 = require("ai");
68336
68744
  // Annotate the CommonJS export names for ESM import in node:
68337
68745
  0 && (module.exports = {
68338
68746
  AISdkClient,
@@ -68415,6 +68823,7 @@ I'm providing ${screenshots.length} screenshots showing the progression of the t
68415
68823
  providerEnvVarMap,
68416
68824
  toGeminiSchema,
68417
68825
  toJsonSchema,
68826
+ tool,
68418
68827
  transformSchema,
68419
68828
  trimTrailingTextNode,
68420
68829
  validateZodSchema