@browserbasehq/orca 3.0.7-alpha-1 → 3.0.8-google-cua

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -179,7 +179,7 @@ var __forAwait = (obj, it, method) => (it = obj[__knownSymbol("asyncIterator")])
179
179
  var STAGEHAND_VERSION;
180
180
  var init_version = __esm({
181
181
  "lib/version.ts"() {
182
- STAGEHAND_VERSION = "3.0.7-alpha-1";
182
+ STAGEHAND_VERSION = "3.0.7";
183
183
  }
184
184
  });
185
185
 
@@ -201,7 +201,7 @@ var init_sdkErrors = __esm({
201
201
  `
202
202
  Hey! We're sorry you ran into an error.
203
203
  Stagehand version: ${STAGEHAND_VERSION}
204
- If you need help, please open a Github issue or reach out to us on Slack: https://stagehand.dev/slack
204
+ If you need help, please open a Github issue or reach out to us on Discord: https://stagehand.dev/discord
205
205
 
206
206
  Full error:
207
207
  ${error.message}`
@@ -26278,6 +26278,16 @@ var init_page = __esm({
26278
26278
  yield this.waitForMainLoadState(state, timeoutMs != null ? timeoutMs : 15e3);
26279
26279
  });
26280
26280
  }
26281
+ /**
26282
+ * Wait for a specified amount of time.
26283
+ *
26284
+ * @param ms The number of milliseconds to wait.
26285
+ */
26286
+ waitForTimeout(ms) {
26287
+ return __async(this, null, function* () {
26288
+ return new Promise((resolve3) => setTimeout(resolve3, ms));
26289
+ });
26290
+ }
26281
26291
  evaluate(pageFunctionOrExpression, arg) {
26282
26292
  return __async(this, null, function* () {
26283
26293
  var _a4;
@@ -26803,6 +26813,7 @@ var init_page = __esm({
26803
26813
  // Modifier keys
26804
26814
  case "cmd":
26805
26815
  case "command":
26816
+ case "controlormeta":
26806
26817
  return this.isMacOS() ? "Meta" : "Control";
26807
26818
  case "win":
26808
26819
  case "windows":
@@ -27145,6 +27156,7 @@ __export(v3_exports, {
27145
27156
  providerEnvVarMap: () => providerEnvVarMap,
27146
27157
  toGeminiSchema: () => toGeminiSchema,
27147
27158
  toJsonSchema: () => toJsonSchema,
27159
+ tool: () => import_ai24.tool,
27148
27160
  transformSchema: () => transformSchema,
27149
27161
  trimTrailingTextNode: () => trimTrailingTextNode,
27150
27162
  validateZodSchema: () => validateZodSchema
@@ -27718,17 +27730,20 @@ var providerEnvVarMap = {
27718
27730
  xai: "XAI_API_KEY",
27719
27731
  google_legacy: "GOOGLE_API_KEY"
27720
27732
  };
27733
+ var providersWithoutApiKey = /* @__PURE__ */ new Set(["ollama"]);
27721
27734
  function loadApiKeyFromEnv(provider, logger) {
27722
27735
  if (!provider) {
27723
27736
  return void 0;
27724
27737
  }
27725
27738
  const envVarName = providerEnvVarMap[provider];
27726
27739
  if (!envVarName) {
27727
- logger({
27728
- category: "init",
27729
- message: `No known environment variable for provider '${provider}'`,
27730
- level: 0
27731
- });
27740
+ if (!providersWithoutApiKey.has(provider)) {
27741
+ logger({
27742
+ category: "init",
27743
+ message: `No known environment variable for provider '${provider}'`,
27744
+ level: 0
27745
+ });
27746
+ }
27732
27747
  return void 0;
27733
27748
  }
27734
27749
  const apiKeyFromEnv = Array.isArray(envVarName) ? envVarName.map((name) => process.env[name]).find((key) => key && key.length > 0) : process.env[envVarName];
@@ -28117,7 +28132,7 @@ var ActCache = class {
28117
28132
  };
28118
28133
  });
28119
28134
  }
28120
- tryReplay(context, page, timeout) {
28135
+ tryReplay(context, page, timeout, llmClientOverride) {
28121
28136
  return __async(this, null, function* () {
28122
28137
  var _a4;
28123
28138
  if (!this.enabled) return null;
@@ -28170,7 +28185,13 @@ var ActCache = class {
28170
28185
  }
28171
28186
  }
28172
28187
  });
28173
- return yield this.replayCachedActions(context, entry, page, timeout);
28188
+ return yield this.replayCachedActions(
28189
+ context,
28190
+ entry,
28191
+ page,
28192
+ timeout,
28193
+ llmClientOverride
28194
+ );
28174
28195
  });
28175
28196
  }
28176
28197
  store(context, result) {
@@ -28220,12 +28241,13 @@ var ActCache = class {
28220
28241
  });
28221
28242
  return (0, import_crypto.createHash)("sha256").update(payload).digest("hex");
28222
28243
  }
28223
- replayCachedActions(context, entry, page, timeout) {
28244
+ replayCachedActions(context, entry, page, timeout, llmClientOverride) {
28224
28245
  return __async(this, null, function* () {
28225
28246
  const handler = this.getActHandler();
28226
28247
  if (!handler) {
28227
28248
  throw new StagehandNotInitializedError("act()");
28228
28249
  }
28250
+ const effectiveClient = llmClientOverride != null ? llmClientOverride : this.getDefaultLlmClient();
28229
28251
  const execute = () => __async(this, null, function* () {
28230
28252
  var _a4, _b, _c;
28231
28253
  const actionResults = [];
@@ -28234,7 +28256,7 @@ var ActCache = class {
28234
28256
  action,
28235
28257
  page,
28236
28258
  this.domSettleTimeoutMs,
28237
- this.getDefaultLlmClient(),
28259
+ effectiveClient,
28238
28260
  void 0,
28239
28261
  context.variables
28240
28262
  );
@@ -28472,7 +28494,7 @@ var AgentCache = class {
28472
28494
  };
28473
28495
  });
28474
28496
  }
28475
- tryReplay(context) {
28497
+ tryReplay(context, llmClientOverride) {
28476
28498
  return __async(this, null, function* () {
28477
28499
  if (!this.enabled) return null;
28478
28500
  const {
@@ -28505,7 +28527,7 @@ var AgentCache = class {
28505
28527
  url: { value: context.startUrl, type: "string" }
28506
28528
  }
28507
28529
  });
28508
- return yield this.replayAgentCacheEntry(context, entry);
28530
+ return yield this.replayAgentCacheEntry(context, entry, llmClientOverride);
28509
28531
  });
28510
28532
  }
28511
28533
  /**
@@ -28524,9 +28546,9 @@ var AgentCache = class {
28524
28546
  * and await `result` even when the response comes from cache, maintaining
28525
28547
  * API consistency regardless of whether the result was cached or live.
28526
28548
  */
28527
- tryReplayAsStream(context) {
28549
+ tryReplayAsStream(context, llmClientOverride) {
28528
28550
  return __async(this, null, function* () {
28529
- const result = yield this.tryReplay(context);
28551
+ const result = yield this.tryReplay(context, llmClientOverride);
28530
28552
  if (!result) return null;
28531
28553
  return this.createCachedStreamResult(result);
28532
28554
  });
@@ -28752,17 +28774,23 @@ var AgentCache = class {
28752
28774
  }
28753
28775
  return value;
28754
28776
  }
28755
- replayAgentCacheEntry(context, entry) {
28777
+ replayAgentCacheEntry(context, entry, llmClientOverride) {
28756
28778
  return __async(this, null, function* () {
28757
28779
  var _a4, _b, _c;
28758
28780
  const ctx = this.getContext();
28759
28781
  const handler = this.getActHandler();
28760
28782
  if (!ctx || !handler) return null;
28783
+ const effectiveClient = llmClientOverride != null ? llmClientOverride : this.getDefaultLlmClient();
28761
28784
  try {
28762
28785
  const updatedSteps = [];
28763
28786
  let stepsChanged = false;
28764
28787
  for (const step of (_a4 = entry.steps) != null ? _a4 : []) {
28765
- const replayedStep = (_b = yield this.executeAgentReplayStep(step, ctx, handler)) != null ? _b : step;
28788
+ const replayedStep = (_b = yield this.executeAgentReplayStep(
28789
+ step,
28790
+ ctx,
28791
+ handler,
28792
+ effectiveClient
28793
+ )) != null ? _b : step;
28766
28794
  stepsChanged || (stepsChanged = replayedStep !== step);
28767
28795
  updatedSteps.push(replayedStep);
28768
28796
  }
@@ -28795,20 +28823,22 @@ var AgentCache = class {
28795
28823
  }
28796
28824
  });
28797
28825
  }
28798
- executeAgentReplayStep(step, ctx, handler) {
28826
+ executeAgentReplayStep(step, ctx, handler, llmClient) {
28799
28827
  return __async(this, null, function* () {
28800
28828
  switch (step.type) {
28801
28829
  case "act":
28802
28830
  return yield this.replayAgentActStep(
28803
28831
  step,
28804
28832
  ctx,
28805
- handler
28833
+ handler,
28834
+ llmClient
28806
28835
  );
28807
28836
  case "fillForm":
28808
28837
  return yield this.replayAgentFillFormStep(
28809
28838
  step,
28810
28839
  ctx,
28811
- handler
28840
+ handler,
28841
+ llmClient
28812
28842
  );
28813
28843
  case "goto":
28814
28844
  yield this.replayAgentGotoStep(step, ctx);
@@ -28840,7 +28870,7 @@ var AgentCache = class {
28840
28870
  }
28841
28871
  });
28842
28872
  }
28843
- replayAgentActStep(step, ctx, handler) {
28873
+ replayAgentActStep(step, ctx, handler, llmClient) {
28844
28874
  return __async(this, null, function* () {
28845
28875
  const actions = Array.isArray(step.actions) ? step.actions : [];
28846
28876
  if (actions.length > 0) {
@@ -28851,7 +28881,7 @@ var AgentCache = class {
28851
28881
  action,
28852
28882
  page,
28853
28883
  this.domSettleTimeoutMs,
28854
- this.getDefaultLlmClient()
28884
+ llmClient
28855
28885
  );
28856
28886
  if (result.success && Array.isArray(result.actions)) {
28857
28887
  updatedActions.push(...cloneForCache(result.actions));
@@ -28868,7 +28898,7 @@ var AgentCache = class {
28868
28898
  return step;
28869
28899
  });
28870
28900
  }
28871
- replayAgentFillFormStep(step, ctx, handler) {
28901
+ replayAgentFillFormStep(step, ctx, handler, llmClient) {
28872
28902
  return __async(this, null, function* () {
28873
28903
  var _a4;
28874
28904
  const actions = Array.isArray(step.actions) && step.actions.length > 0 ? step.actions : (_a4 = step.observeResults) != null ? _a4 : [];
@@ -28882,7 +28912,7 @@ var AgentCache = class {
28882
28912
  action,
28883
28913
  page,
28884
28914
  this.domSettleTimeoutMs,
28885
- this.getDefaultLlmClient()
28915
+ llmClient
28886
28916
  );
28887
28917
  if (result.success && Array.isArray(result.actions)) {
28888
28918
  updatedActions.push(...cloneForCache(result.actions));
@@ -31115,7 +31145,25 @@ var screenshotTool = (v3) => (0, import_ai3.tool)({
31115
31145
  // lib/v3/agent/tools/wait.ts
31116
31146
  var import_ai4 = require("ai");
31117
31147
  var import_zod8 = require("zod");
31118
- var waitTool = (v3) => (0, import_ai4.tool)({
31148
+
31149
+ // lib/v3/agent/utils/screenshotHandler.ts
31150
+ var DEFAULT_DELAY_MS = 500;
31151
+ function waitAndCaptureScreenshot(_0) {
31152
+ return __async(this, arguments, function* (page, delayMs = DEFAULT_DELAY_MS) {
31153
+ if (delayMs > 0) {
31154
+ yield page.waitForTimeout(delayMs);
31155
+ }
31156
+ try {
31157
+ const buffer = yield page.screenshot({ fullPage: false });
31158
+ return buffer.toString("base64");
31159
+ } catch (e2) {
31160
+ return void 0;
31161
+ }
31162
+ });
31163
+ }
31164
+
31165
+ // lib/v3/agent/tools/wait.ts
31166
+ var waitTool = (v3, mode) => (0, import_ai4.tool)({
31119
31167
  description: "Wait for a specified time",
31120
31168
  inputSchema: import_zod8.z.object({
31121
31169
  timeMs: import_zod8.z.number().describe("Time in milliseconds")
@@ -31136,8 +31184,32 @@ var waitTool = (v3) => (0, import_ai4.tool)({
31136
31184
  if (timeMs > 0) {
31137
31185
  v3.recordAgentReplayStep({ type: "wait", timeMs });
31138
31186
  }
31187
+ if (mode === "hybrid") {
31188
+ const page = yield v3.context.awaitActivePage();
31189
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page, 0);
31190
+ return { success: true, waited: timeMs, screenshotBase64 };
31191
+ }
31139
31192
  return { success: true, waited: timeMs };
31140
- })
31193
+ }),
31194
+ toModelOutput: (result) => {
31195
+ const content = [
31196
+ {
31197
+ type: "text",
31198
+ text: JSON.stringify({
31199
+ success: result.success,
31200
+ waited: result.waited
31201
+ })
31202
+ }
31203
+ ];
31204
+ if (result.screenshotBase64) {
31205
+ content.push({
31206
+ type: "media",
31207
+ mediaType: "image/png",
31208
+ data: result.screenshotBase64
31209
+ });
31210
+ }
31211
+ return { type: "content", value: content };
31212
+ }
31141
31213
  });
31142
31214
 
31143
31215
  // lib/v3/agent/tools/navback.ts
@@ -31164,26 +31236,12 @@ var navBackTool = (v3) => (0, import_ai5.tool)({
31164
31236
  })
31165
31237
  });
31166
31238
 
31167
- // lib/v3/agent/tools/close.ts
31239
+ // lib/v3/agent/tools/ariaTree.ts
31168
31240
  var import_ai6 = require("ai");
31169
31241
  var import_zod10 = require("zod");
31170
- var closeTool = () => (0, import_ai6.tool)({
31171
- description: "Complete the task and close",
31172
- inputSchema: import_zod10.z.object({
31173
- reasoning: import_zod10.z.string().describe("Summary of what was accomplished"),
31174
- taskComplete: import_zod10.z.boolean().describe("Whether the task was completed successfully")
31175
- }),
31176
- execute: (_0) => __async(null, [_0], function* ({ reasoning, taskComplete }) {
31177
- return { success: true, reasoning, taskComplete };
31178
- })
31179
- });
31180
-
31181
- // lib/v3/agent/tools/ariaTree.ts
31182
- var import_ai7 = require("ai");
31183
- var import_zod11 = require("zod");
31184
- var ariaTreeTool = (v3) => (0, import_ai7.tool)({
31242
+ var ariaTreeTool = (v3) => (0, import_ai6.tool)({
31185
31243
  description: "gets the accessibility (ARIA) hybrid tree text for the current page. use this to understand structure and content.",
31186
- inputSchema: import_zod11.z.object({}),
31244
+ inputSchema: import_zod10.z.object({}),
31187
31245
  execute: () => __async(null, null, function* () {
31188
31246
  v3.logger({
31189
31247
  category: "agent",
@@ -31210,18 +31268,18 @@ ${result.content}` }]
31210
31268
  });
31211
31269
 
31212
31270
  // lib/v3/agent/tools/fillform.ts
31213
- var import_ai8 = require("ai");
31214
- var import_zod12 = require("zod");
31215
- var fillFormTool = (v3, executionModel) => (0, import_ai8.tool)({
31271
+ var import_ai7 = require("ai");
31272
+ var import_zod11 = require("zod");
31273
+ var fillFormTool = (v3, executionModel) => (0, import_ai7.tool)({
31216
31274
  description: `\u{1F4DD} FORM FILL - MULTI-FIELD INPUT TOOL
31217
31275
  For any form with 2+ inputs/textareas. Faster than individual typing.`,
31218
- inputSchema: import_zod12.z.object({
31219
- fields: import_zod12.z.array(
31220
- import_zod12.z.object({
31221
- action: import_zod12.z.string().describe(
31276
+ inputSchema: import_zod11.z.object({
31277
+ fields: import_zod11.z.array(
31278
+ import_zod11.z.object({
31279
+ action: import_zod11.z.string().describe(
31222
31280
  'Description of typing action, e.g. "type foo into the email field"'
31223
31281
  ),
31224
- value: import_zod12.z.string().describe("Text to type into the target")
31282
+ value: import_zod11.z.string().describe("Text to type into the target")
31225
31283
  })
31226
31284
  ).min(1, "Provide at least one field to fill")
31227
31285
  }),
@@ -31264,8 +31322,8 @@ For any form with 2+ inputs/textareas. Faster than individual typing.`,
31264
31322
  });
31265
31323
 
31266
31324
  // lib/v3/agent/tools/scroll.ts
31267
- var import_ai9 = require("ai");
31268
- var import_zod13 = require("zod");
31325
+ var import_ai8 = require("ai");
31326
+ var import_zod12 = require("zod");
31269
31327
 
31270
31328
  // lib/v3/agent/utils/coordinateNormalization.ts
31271
31329
  var DEFAULT_VIEWPORT = { width: 1288, height: 711 };
@@ -31289,11 +31347,11 @@ function processCoordinates(x2, y, provider) {
31289
31347
  }
31290
31348
 
31291
31349
  // lib/v3/agent/tools/scroll.ts
31292
- var scrollTool = (v3) => (0, import_ai9.tool)({
31350
+ var scrollTool = (v3) => (0, import_ai8.tool)({
31293
31351
  description: "Scroll the page up or down by a percentage of the viewport height. Default is 80%, and what should be typically used for general page scrolling",
31294
- inputSchema: import_zod13.z.object({
31295
- direction: import_zod13.z.enum(["up", "down"]),
31296
- percentage: import_zod13.z.number().min(1).max(200).optional()
31352
+ inputSchema: import_zod12.z.object({
31353
+ direction: import_zod12.z.enum(["up", "down"]),
31354
+ percentage: import_zod12.z.number().min(1).max(200).optional()
31297
31355
  }),
31298
31356
  execute: (_0) => __async(null, [_0], function* ({ direction, percentage = 80 }) {
31299
31357
  v3.logger({
@@ -31327,16 +31385,20 @@ var scrollTool = (v3) => (0, import_ai9.tool)({
31327
31385
  };
31328
31386
  })
31329
31387
  });
31330
- var scrollVisionTool = (v3, provider) => (0, import_ai9.tool)({
31388
+ var scrollVisionTool = (v3, provider) => (0, import_ai8.tool)({
31331
31389
  description: `Scroll the page up or down. For general page scrolling, no coordinates needed. Only provide coordinates when scrolling inside a nested scrollable element (e.g., a dropdown menu, modal with overflow, or scrollable sidebar). Default is 80%, and what should be typically used for general page scrolling`,
31332
- inputSchema: import_zod13.z.object({
31333
- direction: import_zod13.z.enum(["up", "down"]),
31334
- coordinates: import_zod13.z.array(import_zod13.z.number()).optional().describe(
31390
+ inputSchema: import_zod12.z.object({
31391
+ direction: import_zod12.z.enum(["up", "down"]),
31392
+ coordinates: import_zod12.z.array(import_zod12.z.number()).optional().describe(
31335
31393
  "Only use coordinates for scrolling inside a nested scrollable element - provide (x, y) within that element"
31336
31394
  ),
31337
- percentage: import_zod13.z.number().min(1).max(200).optional()
31395
+ percentage: import_zod12.z.number().min(1).max(200).optional()
31338
31396
  }),
31339
- execute: (_0) => __async(null, [_0], function* ({ direction, coordinates, percentage = 80 }) {
31397
+ execute: (_0) => __async(null, [_0], function* ({
31398
+ direction,
31399
+ coordinates,
31400
+ percentage = 80
31401
+ }) {
31340
31402
  const page = yield v3.context.awaitActivePage();
31341
31403
  const { w, h: h2 } = yield page.mainFrame().evaluate("({ w: window.innerWidth, h: window.innerHeight })");
31342
31404
  let cx;
@@ -31372,6 +31434,7 @@ var scrollVisionTool = (v3, provider) => (0, import_ai9.tool)({
31372
31434
  const scrollDistance = Math.round(h2 * percentage / 100);
31373
31435
  const deltaY = direction === "up" ? -scrollDistance : scrollDistance;
31374
31436
  yield page.scroll(cx, cy, 0, deltaY);
31437
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page, 100);
31375
31438
  v3.recordAgentReplayStep({
31376
31439
  type: "scroll",
31377
31440
  deltaX: 0,
@@ -31381,29 +31444,50 @@ var scrollVisionTool = (v3, provider) => (0, import_ai9.tool)({
31381
31444
  return {
31382
31445
  success: true,
31383
31446
  message: coordinates ? `Scrolled ${percentage}% ${direction} at (${cx}, ${cy})` : `Scrolled ${percentage}% ${direction}`,
31384
- scrolledPixels: scrollDistance
31447
+ scrolledPixels: scrollDistance,
31448
+ screenshotBase64
31385
31449
  };
31386
- })
31450
+ }),
31451
+ toModelOutput: (result) => {
31452
+ const content = [
31453
+ {
31454
+ type: "text",
31455
+ text: JSON.stringify({
31456
+ success: result.success,
31457
+ message: result.message,
31458
+ scrolledPixels: result.scrolledPixels
31459
+ })
31460
+ }
31461
+ ];
31462
+ if (result.screenshotBase64) {
31463
+ content.push({
31464
+ type: "media",
31465
+ mediaType: "image/png",
31466
+ data: result.screenshotBase64
31467
+ });
31468
+ }
31469
+ return { type: "content", value: content };
31470
+ }
31387
31471
  });
31388
31472
 
31389
31473
  // lib/v3/agent/tools/extract.ts
31390
- var import_ai10 = require("ai");
31391
- var import_zod14 = require("zod");
31474
+ var import_ai9 = require("ai");
31475
+ var import_zod13 = require("zod");
31392
31476
  function evaluateZodSchema(schemaStr, logger) {
31393
31477
  var _a4;
31394
31478
  try {
31395
31479
  const fn = new Function("z", `return ${schemaStr}`);
31396
- return fn(import_zod14.z);
31480
+ return fn(import_zod13.z);
31397
31481
  } catch (e2) {
31398
31482
  logger == null ? void 0 : logger({
31399
31483
  category: "agent",
31400
31484
  message: `Failed to evaluate schema: ${(_a4 = e2 == null ? void 0 : e2.message) != null ? _a4 : String(e2)}`,
31401
31485
  level: 0
31402
31486
  });
31403
- return import_zod14.z.any();
31487
+ return import_zod13.z.any();
31404
31488
  }
31405
31489
  }
31406
- var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31490
+ var extractTool = (v3, executionModel, logger) => (0, import_ai9.tool)({
31407
31491
  description: `Extract structured data from the current page based on a provided schema.
31408
31492
 
31409
31493
  USAGE GUIDELINES:
@@ -31422,9 +31506,9 @@ var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31422
31506
  3. Extract arrays:
31423
31507
  instruction: "extract all product names and prices"
31424
31508
  schema: "z.object({ products: z.array(z.object({ name: z.string(), price: z.number() })) })"`,
31425
- inputSchema: import_zod14.z.object({
31426
- instruction: import_zod14.z.string(),
31427
- schema: import_zod14.z.string().optional().describe("Zod schema as code, e.g. z.object({ title: z.string() })")
31509
+ inputSchema: import_zod13.z.object({
31510
+ instruction: import_zod13.z.string(),
31511
+ schema: import_zod13.z.string().optional().describe("Zod schema as code, e.g. z.object({ title: z.string() })")
31428
31512
  }),
31429
31513
  execute: (_0) => __async(null, [_0], function* ({ instruction, schema }) {
31430
31514
  var _a4;
@@ -31439,8 +31523,8 @@ var extractTool = (v3, executionModel, logger) => (0, import_ai10.tool)({
31439
31523
  });
31440
31524
 
31441
31525
  // lib/v3/agent/tools/click.ts
31442
- var import_ai11 = require("ai");
31443
- var import_zod15 = require("zod");
31526
+ var import_ai10 = require("ai");
31527
+ var import_zod14 = require("zod");
31444
31528
 
31445
31529
  // lib/v3/agent/utils/xpath.ts
31446
31530
  function ensureXPath(value) {
@@ -31451,16 +31535,13 @@ function ensureXPath(value) {
31451
31535
  }
31452
31536
 
31453
31537
  // lib/v3/agent/tools/click.ts
31454
- function waitForTimeout(ms) {
31455
- return new Promise((resolve3) => setTimeout(resolve3, ms));
31456
- }
31457
- var clickTool = (v3, provider) => (0, import_ai11.tool)({
31538
+ var clickTool = (v3, provider) => (0, import_ai10.tool)({
31458
31539
  description: "Click on an element using its coordinates (this is the most reliable way to click on an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
31459
- inputSchema: import_zod15.z.object({
31460
- describe: import_zod15.z.string().describe(
31540
+ inputSchema: import_zod14.z.object({
31541
+ describe: import_zod14.z.string().describe(
31461
31542
  "Describe the element to click on in a short, specific phrase that mentions the element type and a good visual description"
31462
31543
  ),
31463
- coordinates: import_zod15.z.array(import_zod15.z.number()).describe("The (x, y) coordinates to click on")
31544
+ coordinates: import_zod14.z.array(import_zod14.z.number()).describe("The (x, y) coordinates to click on")
31464
31545
  }),
31465
31546
  execute: (_0) => __async(null, [_0], function* ({ describe, coordinates }) {
31466
31547
  try {
@@ -31476,36 +31557,38 @@ var clickTool = (v3, provider) => (0, import_ai11.tool)({
31476
31557
  level: 1,
31477
31558
  auxiliary: {
31478
31559
  arguments: {
31479
- value: JSON.stringify({ describe, coordinates, processed }),
31480
- type: "string"
31560
+ value: JSON.stringify({ describe }),
31561
+ type: "object"
31481
31562
  }
31482
31563
  }
31483
31564
  });
31565
+ const shouldCollectXpath = v3.isAgentReplayActive();
31484
31566
  const xpath = yield page.click(processed.x, processed.y, {
31485
- returnXpath: true
31567
+ returnXpath: shouldCollectXpath
31486
31568
  });
31487
- if (isGoogleProvider(provider)) {
31488
- yield waitForTimeout(1e3);
31489
- }
31490
- const normalizedXpath = ensureXPath(xpath);
31491
- if (normalizedXpath) {
31492
- const action = {
31493
- selector: normalizedXpath,
31494
- description: describe,
31495
- method: "click",
31496
- arguments: []
31497
- };
31498
- v3.recordAgentReplayStep({
31499
- type: "act",
31500
- instruction: describe,
31501
- actions: [action],
31502
- actionDescription: describe
31503
- });
31569
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page);
31570
+ if (shouldCollectXpath) {
31571
+ const normalizedXpath = ensureXPath(xpath);
31572
+ if (normalizedXpath) {
31573
+ const action = {
31574
+ selector: normalizedXpath,
31575
+ description: describe,
31576
+ method: "click",
31577
+ arguments: []
31578
+ };
31579
+ v3.recordAgentReplayStep({
31580
+ type: "act",
31581
+ instruction: describe,
31582
+ actions: [action],
31583
+ actionDescription: describe
31584
+ });
31585
+ }
31504
31586
  }
31505
31587
  return {
31506
31588
  success: true,
31507
31589
  describe,
31508
- coordinates: [processed.x, processed.y]
31590
+ coordinates: [processed.x, processed.y],
31591
+ screenshotBase64
31509
31592
  };
31510
31593
  } catch (error) {
31511
31594
  return {
@@ -31513,25 +31596,60 @@ var clickTool = (v3, provider) => (0, import_ai11.tool)({
31513
31596
  error: `Error clicking: ${error.message}`
31514
31597
  };
31515
31598
  }
31516
- })
31599
+ }),
31600
+ toModelOutput: (result) => {
31601
+ if (result.success) {
31602
+ const content = [
31603
+ {
31604
+ type: "text",
31605
+ text: JSON.stringify({
31606
+ success: result.success,
31607
+ describe: result.describe,
31608
+ coordinates: result.coordinates
31609
+ })
31610
+ }
31611
+ ];
31612
+ if (result.screenshotBase64) {
31613
+ content.push({
31614
+ type: "media",
31615
+ mediaType: "image/png",
31616
+ data: result.screenshotBase64
31617
+ });
31618
+ }
31619
+ return { type: "content", value: content };
31620
+ }
31621
+ return {
31622
+ type: "content",
31623
+ value: [
31624
+ {
31625
+ type: "text",
31626
+ text: JSON.stringify({
31627
+ success: result.success,
31628
+ error: result.error
31629
+ })
31630
+ }
31631
+ ]
31632
+ };
31633
+ }
31517
31634
  });
31518
31635
 
31519
31636
  // lib/v3/agent/tools/type.ts
31520
- var import_ai12 = require("ai");
31521
- var import_zod16 = require("zod");
31522
- function waitForTimeout2(ms) {
31523
- return new Promise((resolve3) => setTimeout(resolve3, ms));
31524
- }
31525
- var typeTool = (v3, provider) => (0, import_ai12.tool)({
31637
+ var import_ai11 = require("ai");
31638
+ var import_zod15 = require("zod");
31639
+ var typeTool = (v3, provider) => (0, import_ai11.tool)({
31526
31640
  description: "Type text into an element using its coordinates. This will click the element and then type the text into it (this is the most reliable way to type into an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
31527
- inputSchema: import_zod16.z.object({
31528
- describe: import_zod16.z.string().describe(
31641
+ inputSchema: import_zod15.z.object({
31642
+ describe: import_zod15.z.string().describe(
31529
31643
  "Describe the element to type into in a short, specific phrase that mentions the element type and a good visual description"
31530
31644
  ),
31531
- text: import_zod16.z.string().describe("The text to type into the element"),
31532
- coordinates: import_zod16.z.array(import_zod16.z.number()).describe("The (x, y) coordinates to type into the element")
31645
+ text: import_zod15.z.string().describe("The text to type into the element"),
31646
+ coordinates: import_zod15.z.array(import_zod15.z.number()).describe("The (x, y) coordinates to type into the element")
31533
31647
  }),
31534
- execute: (_0) => __async(null, [_0], function* ({ describe, coordinates, text }) {
31648
+ execute: (_0) => __async(null, [_0], function* ({
31649
+ describe,
31650
+ coordinates,
31651
+ text
31652
+ }) {
31535
31653
  try {
31536
31654
  const page = yield v3.context.awaitActivePage();
31537
31655
  const processed = processCoordinates(
@@ -31545,54 +31663,98 @@ var typeTool = (v3, provider) => (0, import_ai12.tool)({
31545
31663
  level: 1,
31546
31664
  auxiliary: {
31547
31665
  arguments: {
31548
- value: JSON.stringify({ describe, coordinates, processed, text }),
31549
- type: "string"
31666
+ value: JSON.stringify({ describe, text }),
31667
+ type: "object"
31550
31668
  }
31551
31669
  }
31552
31670
  });
31671
+ const shouldCollectXpath = v3.isAgentReplayActive();
31553
31672
  const xpath = yield page.click(processed.x, processed.y, {
31554
- returnXpath: true
31673
+ returnXpath: shouldCollectXpath
31555
31674
  });
31556
- if (isGoogleProvider(provider)) {
31557
- yield waitForTimeout2(1e3);
31558
- }
31559
31675
  yield page.type(text);
31560
- const normalizedXpath = ensureXPath(xpath);
31561
- if (normalizedXpath) {
31562
- const action = {
31563
- selector: normalizedXpath,
31564
- description: describe,
31565
- method: "type",
31566
- arguments: [text]
31567
- };
31568
- v3.recordAgentReplayStep({
31569
- type: "act",
31570
- instruction: describe,
31571
- actions: [action],
31572
- actionDescription: describe
31573
- });
31676
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page);
31677
+ if (shouldCollectXpath) {
31678
+ const normalizedXpath = ensureXPath(xpath);
31679
+ if (normalizedXpath) {
31680
+ const action = {
31681
+ selector: normalizedXpath,
31682
+ description: describe,
31683
+ method: "type",
31684
+ arguments: [text]
31685
+ };
31686
+ v3.recordAgentReplayStep({
31687
+ type: "act",
31688
+ instruction: describe,
31689
+ actions: [action],
31690
+ actionDescription: describe
31691
+ });
31692
+ }
31574
31693
  }
31575
- return { success: true, describe, text };
31694
+ return {
31695
+ success: true,
31696
+ describe,
31697
+ text,
31698
+ screenshotBase64
31699
+ };
31576
31700
  } catch (error) {
31577
31701
  return {
31578
31702
  success: false,
31579
31703
  error: `Error typing: ${error.message}`
31580
31704
  };
31581
31705
  }
31582
- })
31706
+ }),
31707
+ toModelOutput: (result) => {
31708
+ if (result.success) {
31709
+ const content = [
31710
+ {
31711
+ type: "text",
31712
+ text: JSON.stringify({
31713
+ success: result.success,
31714
+ describe: result.describe,
31715
+ text: result.text
31716
+ })
31717
+ }
31718
+ ];
31719
+ if (result.screenshotBase64) {
31720
+ content.push({
31721
+ type: "media",
31722
+ mediaType: "image/png",
31723
+ data: result.screenshotBase64
31724
+ });
31725
+ }
31726
+ return { type: "content", value: content };
31727
+ }
31728
+ return {
31729
+ type: "content",
31730
+ value: [
31731
+ {
31732
+ type: "text",
31733
+ text: JSON.stringify({
31734
+ success: result.success,
31735
+ error: result.error
31736
+ })
31737
+ }
31738
+ ]
31739
+ };
31740
+ }
31583
31741
  });
31584
31742
 
31585
31743
  // lib/v3/agent/tools/dragAndDrop.ts
31586
- var import_ai13 = require("ai");
31587
- var import_zod17 = require("zod");
31588
- var dragAndDropTool = (v3, provider) => (0, import_ai13.tool)({
31744
+ var import_ai12 = require("ai");
31745
+ var import_zod16 = require("zod");
31746
+ var dragAndDropTool = (v3, provider) => (0, import_ai12.tool)({
31589
31747
  description: "Drag and drop an element using its coordinates (this is the most reliable way to drag and drop an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
31590
- inputSchema: import_zod17.z.object({
31591
- describe: import_zod17.z.string().describe("Describe the element to drag and drop"),
31592
- startCoordinates: import_zod17.z.array(import_zod17.z.number()).describe("The (x, y) coordinates to start the drag and drop from"),
31593
- endCoordinates: import_zod17.z.array(import_zod17.z.number()).describe("The (x, y) coordinates to end the drag and drop at")
31748
+ inputSchema: import_zod16.z.object({
31749
+ describe: import_zod16.z.string().describe("Describe the element to drag and drop"),
31750
+ startCoordinates: import_zod16.z.array(import_zod16.z.number()).describe("The (x, y) coordinates to start the drag and drop from"),
31751
+ endCoordinates: import_zod16.z.array(import_zod16.z.number()).describe("The (x, y) coordinates to end the drag and drop at")
31594
31752
  }),
31595
- execute: (_0) => __async(null, [_0], function* ({ describe, startCoordinates, endCoordinates }) {
31753
+ execute: (_0) => __async(null, [_0], function* ({
31754
+ describe,
31755
+ startCoordinates,
31756
+ endCoordinates
31757
+ }) {
31596
31758
  try {
31597
31759
  const page = yield v3.context.awaitActivePage();
31598
31760
  const processedStart = processCoordinates(
@@ -31612,60 +31774,97 @@ var dragAndDropTool = (v3, provider) => (0, import_ai13.tool)({
31612
31774
  auxiliary: {
31613
31775
  arguments: {
31614
31776
  value: JSON.stringify({
31615
- describe,
31616
- startCoordinates,
31617
- endCoordinates,
31618
- processedStart,
31619
- processedEnd
31777
+ describe
31620
31778
  }),
31621
- type: "string"
31779
+ type: "object"
31622
31780
  }
31623
31781
  }
31624
31782
  });
31783
+ const shouldCollectXpath = v3.isAgentReplayActive();
31625
31784
  const [fromXpath, toXpath] = yield page.dragAndDrop(
31626
31785
  processedStart.x,
31627
31786
  processedStart.y,
31628
31787
  processedEnd.x,
31629
31788
  processedEnd.y,
31630
- { returnXpath: true }
31789
+ { returnXpath: shouldCollectXpath }
31631
31790
  );
31632
- const normalizedFrom = ensureXPath(fromXpath);
31633
- const normalizedTo = ensureXPath(toXpath);
31634
- if (normalizedFrom && normalizedTo) {
31635
- const action = {
31636
- selector: normalizedFrom,
31637
- description: describe,
31638
- method: "dragAndDrop",
31639
- arguments: [normalizedTo]
31640
- };
31641
- v3.recordAgentReplayStep({
31642
- type: "act",
31643
- instruction: describe,
31644
- actions: [action],
31645
- actionDescription: describe
31646
- });
31791
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page);
31792
+ if (shouldCollectXpath) {
31793
+ const normalizedFrom = ensureXPath(fromXpath);
31794
+ const normalizedTo = ensureXPath(toXpath);
31795
+ if (normalizedFrom && normalizedTo) {
31796
+ const action = {
31797
+ selector: normalizedFrom,
31798
+ description: describe,
31799
+ method: "dragAndDrop",
31800
+ arguments: [normalizedTo]
31801
+ };
31802
+ v3.recordAgentReplayStep({
31803
+ type: "act",
31804
+ instruction: describe,
31805
+ actions: [action],
31806
+ actionDescription: describe
31807
+ });
31808
+ }
31647
31809
  }
31648
- return { success: true, describe };
31810
+ return {
31811
+ success: true,
31812
+ describe,
31813
+ screenshotBase64
31814
+ };
31649
31815
  } catch (error) {
31650
31816
  return {
31651
31817
  success: false,
31652
31818
  error: `Error dragging: ${error.message}`
31653
31819
  };
31654
31820
  }
31655
- })
31821
+ }),
31822
+ toModelOutput: (result) => {
31823
+ if (result.success) {
31824
+ const content = [
31825
+ {
31826
+ type: "text",
31827
+ text: JSON.stringify({
31828
+ success: result.success,
31829
+ describe: result.describe
31830
+ })
31831
+ }
31832
+ ];
31833
+ if (result.screenshotBase64) {
31834
+ content.push({
31835
+ type: "media",
31836
+ mediaType: "image/png",
31837
+ data: result.screenshotBase64
31838
+ });
31839
+ }
31840
+ return { type: "content", value: content };
31841
+ }
31842
+ return {
31843
+ type: "content",
31844
+ value: [
31845
+ {
31846
+ type: "text",
31847
+ text: JSON.stringify({
31848
+ success: result.success,
31849
+ error: result.error
31850
+ })
31851
+ }
31852
+ ]
31853
+ };
31854
+ }
31656
31855
  });
31657
31856
 
31658
31857
  // lib/v3/agent/tools/clickAndHold.ts
31659
- var import_ai14 = require("ai");
31660
- var import_zod18 = require("zod");
31661
- var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31858
+ var import_ai13 = require("ai");
31859
+ var import_zod17 = require("zod");
31860
+ var clickAndHoldTool = (v3, provider) => (0, import_ai13.tool)({
31662
31861
  description: "Click and hold on an element using its coordinates",
31663
- inputSchema: import_zod18.z.object({
31664
- describe: import_zod18.z.string().describe(
31862
+ inputSchema: import_zod17.z.object({
31863
+ describe: import_zod17.z.string().describe(
31665
31864
  "Describe the element to click on in a short, specific phrase that mentions the element type and a good visual description"
31666
31865
  ),
31667
- duration: import_zod18.z.number().describe("The duration to hold the element in milliseconds"),
31668
- coordinates: import_zod18.z.array(import_zod18.z.number()).describe("The (x, y) coordinates to click on")
31866
+ duration: import_zod17.z.number().describe("The duration to hold the element in milliseconds"),
31867
+ coordinates: import_zod17.z.array(import_zod17.z.number()).describe("The (x, y) coordinates to click on")
31669
31868
  }),
31670
31869
  execute: (_0) => __async(null, [_0], function* ({ describe, coordinates, duration }) {
31671
31870
  try {
@@ -31683,35 +31882,36 @@ var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31683
31882
  arguments: {
31684
31883
  value: JSON.stringify({
31685
31884
  describe,
31686
- coordinates,
31687
- processed,
31688
31885
  duration
31689
31886
  }),
31690
- type: "string"
31887
+ type: "object"
31691
31888
  }
31692
31889
  }
31693
31890
  });
31891
+ const shouldCollectXpath = v3.isAgentReplayActive();
31694
31892
  const [xpath] = yield page.dragAndDrop(
31695
31893
  processed.x,
31696
31894
  processed.y,
31697
31895
  processed.x,
31698
31896
  processed.y,
31699
- { delay: duration, returnXpath: true }
31897
+ { delay: duration, returnXpath: shouldCollectXpath }
31700
31898
  );
31701
- const normalizedXpath = ensureXPath(xpath);
31702
- if (normalizedXpath) {
31703
- const action = {
31704
- selector: normalizedXpath,
31705
- description: describe,
31706
- method: "clickAndHold",
31707
- arguments: [String(duration)]
31708
- };
31709
- v3.recordAgentReplayStep({
31710
- type: "act",
31711
- instruction: describe,
31712
- actions: [action],
31713
- actionDescription: describe
31714
- });
31899
+ if (shouldCollectXpath) {
31900
+ const normalizedXpath = ensureXPath(xpath);
31901
+ if (normalizedXpath) {
31902
+ const action = {
31903
+ selector: normalizedXpath,
31904
+ description: describe,
31905
+ method: "clickAndHold",
31906
+ arguments: [String(duration)]
31907
+ };
31908
+ v3.recordAgentReplayStep({
31909
+ type: "act",
31910
+ instruction: describe,
31911
+ actions: [action],
31912
+ actionDescription: describe
31913
+ });
31914
+ }
31715
31915
  }
31716
31916
  return { success: true, describe };
31717
31917
  } catch (error) {
@@ -31724,20 +31924,20 @@ var clickAndHoldTool = (v3, provider) => (0, import_ai14.tool)({
31724
31924
  });
31725
31925
 
31726
31926
  // lib/v3/agent/tools/keys.ts
31727
- var import_ai15 = require("ai");
31728
- var import_zod19 = require("zod");
31729
- var keysTool = (v3) => (0, import_ai15.tool)({
31927
+ var import_ai14 = require("ai");
31928
+ var import_zod18 = require("zod");
31929
+ var keysTool = (v3) => (0, import_ai14.tool)({
31730
31930
  description: `Send keyboard input to the page without targeting a specific element. Unlike the type tool which clicks then types into coordinates, this sends keystrokes directly to wherever focus currently is.
31731
31931
 
31732
31932
  Use method="type" to enter text into the currently focused element. Preferred when: input is already focused, text needs to flow across multiple fields (e.g., verification codes)
31733
31933
 
31734
31934
  Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) and keyboard shortcuts (Cmd+A, Ctrl+C, Shift+Tab).`,
31735
- inputSchema: import_zod19.z.object({
31736
- method: import_zod19.z.enum(["press", "type"]),
31737
- value: import_zod19.z.string().describe(
31935
+ inputSchema: import_zod18.z.object({
31936
+ method: import_zod18.z.enum(["press", "type"]),
31937
+ value: import_zod18.z.string().describe(
31738
31938
  "The text to type, or the key/combo to press (Enter, Tab, Cmd+A)"
31739
31939
  ),
31740
- repeat: import_zod19.z.number().optional()
31940
+ repeat: import_zod18.z.number().optional()
31741
31941
  }),
31742
31942
  execute: (_0) => __async(null, [_0], function* ({ method, value, repeat }) {
31743
31943
  try {
@@ -31749,7 +31949,7 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a
31749
31949
  auxiliary: {
31750
31950
  arguments: {
31751
31951
  value: JSON.stringify({ method, value, repeat }),
31752
- type: "string"
31952
+ type: "object"
31753
31953
  }
31754
31954
  }
31755
31955
  });
@@ -31784,9 +31984,9 @@ Use method="press" for navigation keys (Enter, Tab, Escape, Backspace, arrows) a
31784
31984
  });
31785
31985
 
31786
31986
  // lib/v3/agent/tools/fillFormVision.ts
31787
- var import_ai16 = require("ai");
31788
- var import_zod20 = require("zod");
31789
- var fillFormVisionTool = (v3, provider) => (0, import_ai16.tool)({
31987
+ var import_ai15 = require("ai");
31988
+ var import_zod19 = require("zod");
31989
+ var fillFormVisionTool = (v3, provider) => (0, import_ai15.tool)({
31790
31990
  description: `FORM FILL - SPECIALIZED MULTI-FIELD INPUT TOOL
31791
31991
 
31792
31992
  CRITICAL: Use this for ANY form with 2+ input fields (text inputs, textareas, etc.)
@@ -31804,16 +32004,16 @@ MANDATORY USE CASES (always use fillFormVision for these):
31804
32004
  - Checkout forms: address, payment info fields
31805
32005
  - Profile updates: multiple user data fields
31806
32006
  - Search filters: multiple criteria inputs`,
31807
- inputSchema: import_zod20.z.object({
31808
- fields: import_zod20.z.array(
31809
- import_zod20.z.object({
31810
- action: import_zod20.z.string().describe(
32007
+ inputSchema: import_zod19.z.object({
32008
+ fields: import_zod19.z.array(
32009
+ import_zod19.z.object({
32010
+ action: import_zod19.z.string().describe(
31811
32011
  "Description of the typing action, e.g. 'type foo into the bar field'"
31812
32012
  ),
31813
- value: import_zod20.z.string().describe("Text to type into the target field"),
31814
- coordinates: import_zod20.z.object({
31815
- x: import_zod20.z.number(),
31816
- y: import_zod20.z.number()
32013
+ value: import_zod19.z.string().describe("Text to type into the target field"),
32014
+ coordinates: import_zod19.z.object({
32015
+ x: import_zod19.z.number(),
32016
+ y: import_zod19.z.number()
31817
32017
  }).describe("Coordinates of the target field")
31818
32018
  })
31819
32019
  ).min(2, "Provide at least two fields to fill")
@@ -31838,32 +32038,36 @@ MANDATORY USE CASES (always use fillFormVision for these):
31838
32038
  auxiliary: {
31839
32039
  arguments: {
31840
32040
  value: JSON.stringify({ fields, processedFields }),
31841
- type: "string"
32041
+ type: "object"
31842
32042
  }
31843
32043
  }
31844
32044
  });
32045
+ const shouldCollectXpath = v3.isAgentReplayActive();
31845
32046
  const actions = [];
31846
32047
  for (const field of processedFields) {
31847
32048
  const xpath = yield page.click(
31848
32049
  field.coordinates.x,
31849
32050
  field.coordinates.y,
31850
32051
  {
31851
- returnXpath: true
32052
+ returnXpath: shouldCollectXpath
31852
32053
  }
31853
32054
  );
31854
32055
  yield page.type(field.value);
31855
- const normalizedXpath = ensureXPath(xpath);
31856
- if (normalizedXpath) {
31857
- actions.push({
31858
- selector: normalizedXpath,
31859
- description: field.action,
31860
- method: "type",
31861
- arguments: [field.value]
31862
- });
32056
+ if (shouldCollectXpath) {
32057
+ const normalizedXpath = ensureXPath(xpath);
32058
+ if (normalizedXpath) {
32059
+ actions.push({
32060
+ selector: normalizedXpath,
32061
+ description: field.action,
32062
+ method: "type",
32063
+ arguments: [field.value]
32064
+ });
32065
+ }
31863
32066
  }
31864
32067
  yield new Promise((resolve3) => setTimeout(resolve3, 100));
31865
32068
  }
31866
- if (actions.length > 0) {
32069
+ const screenshotBase64 = yield waitAndCaptureScreenshot(page, 100);
32070
+ if (shouldCollectXpath && actions.length > 0) {
31867
32071
  v3.recordAgentReplayStep({
31868
32072
  type: "act",
31869
32073
  instruction: `Fill ${fields.length} form fields`,
@@ -31873,7 +32077,8 @@ MANDATORY USE CASES (always use fillFormVision for these):
31873
32077
  }
31874
32078
  return {
31875
32079
  success: true,
31876
- playwrightArguments: processedFields
32080
+ playwrightArguments: processedFields,
32081
+ screenshotBase64
31877
32082
  };
31878
32083
  } catch (error) {
31879
32084
  return {
@@ -31881,13 +32086,47 @@ MANDATORY USE CASES (always use fillFormVision for these):
31881
32086
  error: `Error filling form: ${error.message}`
31882
32087
  };
31883
32088
  }
31884
- })
32089
+ }),
32090
+ toModelOutput: (result) => {
32091
+ var _a4, _b;
32092
+ if (result.success) {
32093
+ const content = [
32094
+ {
32095
+ type: "text",
32096
+ text: JSON.stringify({
32097
+ success: result.success,
32098
+ fieldsCount: (_b = (_a4 = result.playwrightArguments) == null ? void 0 : _a4.length) != null ? _b : 0
32099
+ })
32100
+ }
32101
+ ];
32102
+ if (result.screenshotBase64) {
32103
+ content.push({
32104
+ type: "media",
32105
+ mediaType: "image/png",
32106
+ data: result.screenshotBase64
32107
+ });
32108
+ }
32109
+ return { type: "content", value: content };
32110
+ }
32111
+ return {
32112
+ type: "content",
32113
+ value: [
32114
+ {
32115
+ type: "text",
32116
+ text: JSON.stringify({
32117
+ success: result.success,
32118
+ error: result.error
32119
+ })
32120
+ }
32121
+ ]
32122
+ };
32123
+ }
31885
32124
  });
31886
32125
 
31887
32126
  // lib/v3/agent/tools/think.ts
31888
- var import_ai17 = require("ai");
31889
- var import_zod21 = require("zod");
31890
- var thinkTool = () => (0, import_ai17.tool)({
32127
+ var import_ai16 = require("ai");
32128
+ var import_zod20 = require("zod");
32129
+ var thinkTool = () => (0, import_ai16.tool)({
31891
32130
  description: `Use this tool to think through complex problems or plan a sequence of steps. This is for internal reasoning only and doesn't perform any actions. Use this to:
31892
32131
 
31893
32132
  1. Plan a multi-step approach before taking action
@@ -31896,8 +32135,8 @@ var thinkTool = () => (0, import_ai17.tool)({
31896
32135
  4. Evaluate options when you're unsure what to do next
31897
32136
 
31898
32137
  The output is only visible to you; use it to track your own reasoning process.`,
31899
- inputSchema: import_zod21.z.object({
31900
- reasoning: import_zod21.z.string().describe(
32138
+ inputSchema: import_zod20.z.object({
32139
+ reasoning: import_zod20.z.string().describe(
31901
32140
  "Your step-by-step reasoning or planning process. Be as detailed as needed."
31902
32141
  )
31903
32142
  }),
@@ -31910,8 +32149,8 @@ The output is only visible to you; use it to track your own reasoning process.`,
31910
32149
  });
31911
32150
 
31912
32151
  // lib/v3/agent/tools/search.ts
31913
- var import_ai18 = require("ai");
31914
- var import_zod22 = require("zod");
32152
+ var import_ai17 = require("ai");
32153
+ var import_zod21 = require("zod");
31915
32154
  function performBraveSearch(query) {
31916
32155
  return __async(this, null, function* () {
31917
32156
  var _a4;
@@ -31957,10 +32196,10 @@ function performBraveSearch(query) {
31957
32196
  }
31958
32197
  });
31959
32198
  }
31960
- var searchTool = (v3) => (0, import_ai18.tool)({
32199
+ var searchTool = (v3) => (0, import_ai17.tool)({
31961
32200
  description: "Perform a web search and returns results. Use this tool when you need information from the web or when you are unsure of the exact URL you want to navigate to. This can be used to find the ideal entry point, resulting in a task that is easier to complete due to starting further in the process.",
31962
- inputSchema: import_zod22.z.object({
31963
- query: import_zod22.z.string().describe("The search query to look for on the web")
32201
+ inputSchema: import_zod21.z.object({
32202
+ query: import_zod21.z.string().describe("The search query to look for on the web")
31964
32203
  }),
31965
32204
  execute: (_0) => __async(null, [_0], function* ({ query }) {
31966
32205
  var _a4, _b, _c;
@@ -31971,7 +32210,7 @@ var searchTool = (v3) => (0, import_ai18.tool)({
31971
32210
  auxiliary: {
31972
32211
  arguments: {
31973
32212
  value: JSON.stringify({ query }),
31974
- type: "string"
32213
+ type: "object"
31975
32214
  }
31976
32215
  }
31977
32216
  });
@@ -32018,7 +32257,7 @@ function createAgentTools(v3, options) {
32018
32257
  ariaTree: ariaTreeTool(v3),
32019
32258
  click: clickTool(v3, provider),
32020
32259
  clickAndHold: clickAndHoldTool(v3, provider),
32021
- close: closeTool(),
32260
+ //close: closeTool(),
32022
32261
  dragAndDrop: dragAndDropTool(v3, provider),
32023
32262
  extract: extractTool(v3, executionModel, options == null ? void 0 : options.logger),
32024
32263
  fillForm: fillFormTool(v3, executionModel),
@@ -32030,7 +32269,7 @@ function createAgentTools(v3, options) {
32030
32269
  scroll: mode === "hybrid" ? scrollVisionTool(v3, provider) : scrollTool(v3),
32031
32270
  think: thinkTool(),
32032
32271
  type: typeTool(v3, provider),
32033
- wait: waitTool(v3)
32272
+ wait: waitTool(v3, mode)
32034
32273
  };
32035
32274
  if (process.env.BRAVE_API_KEY) {
32036
32275
  allTools.search = searchTool(v3);
@@ -32074,8 +32313,7 @@ function buildToolsSection(isHybridMode, hasSearch, excludeTools) {
32074
32313
  { name: "goto", description: "Navigate to a URL" },
32075
32314
  { name: "wait", description: "Wait for a specified time" },
32076
32315
  { name: "navback", description: "Navigate back in browser history" },
32077
- { name: "scroll", description: "Scroll the page x pixels up or down" },
32078
- { name: "close", description: "Mark the task as complete or failed" }
32316
+ { name: "scroll", description: "Scroll the page x pixels up or down" }
32079
32317
  ];
32080
32318
  const domTools = [
32081
32319
  {
@@ -32097,8 +32335,7 @@ function buildToolsSection(isHybridMode, hasSearch, excludeTools) {
32097
32335
  { name: "goto", description: "Navigate to a URL" },
32098
32336
  { name: "wait", description: "Wait for a specified time" },
32099
32337
  { name: "navback", description: "Navigate back in browser history" },
32100
- { name: "scroll", description: "Scroll the page x pixels up or down" },
32101
- { name: "close", description: "Mark the task as complete or failed" }
32338
+ { name: "scroll", description: "Scroll the page x pixels up or down" }
32102
32339
  ];
32103
32340
  const baseTools = isHybridMode ? hybridTools : domTools;
32104
32341
  if (hasSearch) {
@@ -32107,8 +32344,8 @@ function buildToolsSection(isHybridMode, hasSearch, excludeTools) {
32107
32344
  description: "Perform a web search and return results. Prefer this over navigating to Google and searching within the page for reliability and efficiency."
32108
32345
  });
32109
32346
  }
32110
- const filteredTools = baseTools.filter((tool21) => !excludeSet.has(tool21.name));
32111
- const toolLines = filteredTools.map((tool21) => ` <tool name="${tool21.name}">${tool21.description}</tool>`).join("\n");
32347
+ const filteredTools = baseTools.filter((tool22) => !excludeSet.has(tool22.name));
32348
+ const toolLines = filteredTools.map((tool22) => ` <tool name="${tool22.name}">${tool22.description}</tool>`).join("\n");
32112
32349
  return `<tools>
32113
32350
  ${toolLines}
32114
32351
  </tools>`;
@@ -32200,8 +32437,6 @@ function buildAgentSystemPrompt(options) {
32200
32437
  <item>Always start by understanding the current page state</item>
32201
32438
  <item>Use the screenshot tool to verify page state when needed</item>
32202
32439
  <item>Use appropriate tools for each action</item>
32203
- <item>When the task is complete, use the "close" tool with taskComplete: true</item>
32204
- <item>If the task cannot be completed, use "close" with taskComplete: false</item>
32205
32440
  </guidelines>
32206
32441
  ${pageUnderstandingProtocol}
32207
32442
  <navigation>
@@ -32225,132 +32460,143 @@ function buildAgentSystemPrompt(options) {
32225
32460
  }
32226
32461
 
32227
32462
  // lib/v3/handlers/v3AgentHandler.ts
32228
- var import_ai19 = require("ai");
32463
+ var import_ai20 = require("ai");
32229
32464
 
32230
32465
  // lib/v3/agent/utils/messageProcessing.ts
32466
+ var VISION_ACTION_TOOLS = [
32467
+ "click",
32468
+ "type",
32469
+ "dragAndDrop",
32470
+ "wait",
32471
+ "fillFormVision",
32472
+ "scroll"
32473
+ ];
32231
32474
  function isToolMessage(message) {
32232
32475
  return !!message && typeof message === "object" && message.role === "tool" && Array.isArray(message.content);
32233
32476
  }
32234
32477
  function isScreenshotPart(part) {
32235
32478
  return !!part && typeof part === "object" && part.toolName === "screenshot";
32236
32479
  }
32480
+ function isVisionActionPart(part) {
32481
+ if (!part || typeof part !== "object") return false;
32482
+ const toolName = part.toolName;
32483
+ return typeof toolName === "string" && VISION_ACTION_TOOLS.includes(toolName);
32484
+ }
32485
+ function isVisionPart(part) {
32486
+ return isScreenshotPart(part) || isVisionActionPart(part);
32487
+ }
32237
32488
  function isAriaTreePart(part) {
32238
32489
  return !!part && typeof part === "object" && part.toolName === "ariaTree";
32239
32490
  }
32240
- function processMessages(params) {
32241
- const originalContentSize = JSON.stringify(params.prompt).length;
32242
- const screenshotIndices = findToolIndices(params.prompt, "screenshot");
32243
- const ariaTreeIndices = findToolIndices(params.prompt, "ariaTree");
32244
- const processedPrompt = params.prompt.map(
32245
- (message, index) => {
32491
+ function processMessages(messages) {
32492
+ let compressedCount = 0;
32493
+ const visionIndices = [];
32494
+ const ariaTreeIndices = [];
32495
+ for (let i2 = 0; i2 < messages.length; i2++) {
32496
+ const message = messages[i2];
32497
+ if (isToolMessage(message)) {
32498
+ const content = message.content;
32499
+ if (content.some(isVisionPart)) {
32500
+ visionIndices.push(i2);
32501
+ }
32502
+ if (content.some(isAriaTreePart)) {
32503
+ ariaTreeIndices.push(i2);
32504
+ }
32505
+ }
32506
+ }
32507
+ if (visionIndices.length > 2) {
32508
+ const toCompress = visionIndices.slice(0, visionIndices.length - 2);
32509
+ for (const index of toCompress) {
32510
+ const message = messages[index];
32246
32511
  if (isToolMessage(message)) {
32247
- if (message.content.some((part) => isScreenshotPart(part))) {
32248
- const shouldCompress = shouldCompressScreenshot(
32249
- index,
32250
- screenshotIndices
32251
- );
32252
- if (shouldCompress) {
32253
- return compressScreenshotMessage(message);
32254
- }
32255
- }
32256
- if (message.content.some((part) => isAriaTreePart(part))) {
32257
- const shouldCompress = shouldCompressAriaTree(index, ariaTreeIndices);
32258
- if (shouldCompress) {
32259
- return compressAriaTreeMessage(message);
32260
- }
32261
- }
32512
+ compressScreenshotMessage(message);
32513
+ compressVisionActionMessage(message);
32514
+ compressedCount++;
32262
32515
  }
32263
- return message;
32264
32516
  }
32265
- );
32266
- const compressedContentSize = JSON.stringify(processedPrompt).length;
32267
- const stats = calculateCompressionStats(
32268
- originalContentSize,
32269
- compressedContentSize,
32270
- screenshotIndices.length,
32271
- ariaTreeIndices.length
32272
- );
32273
- return {
32274
- processedPrompt,
32275
- stats
32276
- };
32277
- }
32278
- function findToolIndices(prompt, toolName) {
32279
- const screenshotIndices = [];
32280
- prompt.forEach((message, index) => {
32281
- if (isToolMessage(message)) {
32282
- const hasMatch = message.content.some(
32283
- (part) => toolName === "screenshot" ? isScreenshotPart(part) : isAriaTreePart(part)
32284
- );
32285
- if (hasMatch) {
32286
- screenshotIndices.push(index);
32517
+ }
32518
+ if (ariaTreeIndices.length > 1) {
32519
+ const toCompress = ariaTreeIndices.slice(0, ariaTreeIndices.length - 1);
32520
+ for (const idx of toCompress) {
32521
+ const message = messages[idx];
32522
+ if (isToolMessage(message)) {
32523
+ compressAriaTreeMessage(message);
32524
+ compressedCount++;
32287
32525
  }
32288
32526
  }
32289
- });
32290
- return screenshotIndices;
32291
- }
32292
- function shouldCompressScreenshot(index, screenshotIndices) {
32293
- const isNewestScreenshot = index === Math.max(...screenshotIndices);
32294
- const isSecondNewestScreenshot = screenshotIndices.length > 1 && index === screenshotIndices.sort((a, b) => b - a)[1];
32295
- return !isNewestScreenshot && !isSecondNewestScreenshot;
32296
- }
32297
- function shouldCompressAriaTree(index, ariaTreeIndices) {
32298
- const isNewestAriaTree = index === Math.max(...ariaTreeIndices);
32299
- return !isNewestAriaTree;
32527
+ }
32528
+ return compressedCount;
32300
32529
  }
32301
32530
  function compressScreenshotMessage(message) {
32302
- const updatedContent = message.content.map((part) => {
32531
+ var _a4;
32532
+ for (const part of message.content) {
32303
32533
  if (isScreenshotPart(part)) {
32304
- return __spreadProps(__spreadValues({}, part), {
32305
- result: [
32306
- {
32307
- type: "text",
32308
- text: "screenshot taken"
32309
- }
32310
- ]
32311
- });
32534
+ const typedPart = part;
32535
+ const placeholder = [{ type: "text", text: "screenshot taken" }];
32536
+ if ((_a4 = typedPart.output) == null ? void 0 : _a4.value) {
32537
+ typedPart.output.value = placeholder;
32538
+ }
32539
+ if (typedPart.result) {
32540
+ typedPart.result = placeholder;
32541
+ }
32312
32542
  }
32313
- return part;
32314
- });
32315
- return __spreadProps(__spreadValues({}, message), {
32316
- content: updatedContent
32317
- });
32543
+ }
32544
+ }
32545
+ function compressVisionActionMessage(message) {
32546
+ var _a4;
32547
+ for (const part of message.content) {
32548
+ if (isVisionActionPart(part)) {
32549
+ const typedPart = part;
32550
+ if (((_a4 = typedPart.output) == null ? void 0 : _a4.value) && Array.isArray(typedPart.output.value)) {
32551
+ typedPart.output.value = typedPart.output.value.filter(
32552
+ (item) => item && typeof item === "object" && item.type !== "media"
32553
+ );
32554
+ }
32555
+ if (typedPart.result && Array.isArray(typedPart.result)) {
32556
+ typedPart.result = typedPart.result.filter(
32557
+ (item) => item && typeof item === "object" && item.type !== "media"
32558
+ );
32559
+ }
32560
+ }
32561
+ }
32318
32562
  }
32319
32563
  function compressAriaTreeMessage(message) {
32320
- const updatedContent = message.content.map((part) => {
32564
+ var _a4;
32565
+ for (const part of message.content) {
32321
32566
  if (isAriaTreePart(part)) {
32322
- return __spreadProps(__spreadValues({}, part), {
32323
- result: [
32324
- {
32325
- type: "text",
32326
- text: "ARIA tree extracted for context of page elements"
32327
- }
32328
- ]
32329
- });
32567
+ const typedPart = part;
32568
+ const placeholder = [
32569
+ {
32570
+ type: "text",
32571
+ text: "ARIA tree extracted for context of page elements"
32572
+ }
32573
+ ];
32574
+ if ((_a4 = typedPart.output) == null ? void 0 : _a4.value) {
32575
+ typedPart.output.value = placeholder;
32576
+ }
32577
+ if (typedPart.result) {
32578
+ typedPart.result = placeholder;
32579
+ }
32330
32580
  }
32331
- return part;
32332
- });
32333
- return __spreadProps(__spreadValues({}, message), {
32334
- content: updatedContent
32335
- });
32336
- }
32337
- function calculateCompressionStats(originalSize, compressedSize, screenshotCount, ariaTreeCount) {
32338
- const savedChars = originalSize - compressedSize;
32339
- const compressionRatio = originalSize > 0 ? (originalSize - compressedSize) / originalSize * 100 : 0;
32340
- return {
32341
- originalSize,
32342
- compressedSize,
32343
- savedChars,
32344
- compressionRatio,
32345
- screenshotCount,
32346
- ariaTreeCount
32347
- };
32581
+ }
32348
32582
  }
32349
32583
 
32350
32584
  // lib/v3/handlers/v3AgentHandler.ts
32351
32585
  init_flowLogger();
32352
32586
 
32353
32587
  // lib/v3/agent/utils/actionMapping.ts
32588
+ var EXCLUDED_OUTPUT_KEYS = ["screenshotBase64"];
32589
+ function stripExcludedKeys(output) {
32590
+ const result = {};
32591
+ for (const [key, value] of Object.entries(output)) {
32592
+ if (!EXCLUDED_OUTPUT_KEYS.includes(
32593
+ key
32594
+ )) {
32595
+ result[key] = value;
32596
+ }
32597
+ }
32598
+ return result;
32599
+ }
32354
32600
  function mapToolResultToActions({
32355
32601
  toolCallName,
32356
32602
  toolResult,
@@ -32416,14 +32662,112 @@ function createStandardAction(toolCallName, toolResult, args, reasoning) {
32416
32662
  return action;
32417
32663
  }
32418
32664
  if (toolCallName !== "ariaTree" && toolResult) {
32419
- const { output } = toolResult;
32420
- Object.assign(action, output);
32665
+ const result = toolResult;
32666
+ const output = result.output;
32667
+ if (output && typeof output === "object" && !Array.isArray(output)) {
32668
+ const cleanedOutput = stripExcludedKeys(
32669
+ output
32670
+ );
32671
+ Object.assign(action, cleanedOutput);
32672
+ }
32421
32673
  }
32422
32674
  return action;
32423
32675
  }
32424
32676
 
32425
32677
  // lib/v3/handlers/v3AgentHandler.ts
32426
32678
  init_sdkErrors();
32679
+
32680
+ // lib/v3/agent/utils/handleCloseToolCall.ts
32681
+ var import_ai18 = require("ai");
32682
+ var import_zod22 = require("zod");
32683
+ var import_ai19 = require("ai");
32684
+ var baseCloseSchema = import_zod22.z.object({
32685
+ reasoning: import_zod22.z.string().describe("Brief summary of what actions were taken and the outcome"),
32686
+ taskComplete: import_zod22.z.boolean().describe("true if the task was fully completed, false otherwise")
32687
+ });
32688
+ function handleCloseToolCall(options) {
32689
+ return __async(this, null, function* () {
32690
+ var _a4;
32691
+ const { model, inputMessages, instruction, outputSchema, logger } = options;
32692
+ logger({
32693
+ category: "agent",
32694
+ message: "Agent calling tool: close",
32695
+ level: 1
32696
+ });
32697
+ const closeToolSchema = outputSchema ? baseCloseSchema.extend({
32698
+ output: outputSchema.describe(
32699
+ "The specific data the user requested from this task"
32700
+ )
32701
+ }) : baseCloseSchema;
32702
+ const outputInstructions = outputSchema ? `
32703
+
32704
+ The user also requested the following information from this task. Provide it in the "output" field:
32705
+ ${JSON.stringify(
32706
+ Object.fromEntries(
32707
+ Object.entries(outputSchema.shape).map(([key, value]) => [
32708
+ key,
32709
+ value.description || "no description"
32710
+ ])
32711
+ ),
32712
+ null,
32713
+ 2
32714
+ )}` : "";
32715
+ const systemPrompt = `You are a web automation assistant that was tasked with completing a task.
32716
+
32717
+ The task was:
32718
+ "${instruction}"
32719
+
32720
+ Review what was accomplished and provide your final assessment in whether the task was completed successfully. you have been provided with the history of the actions taken so far, use this to determine if the task was completed successfully.${outputInstructions}
32721
+
32722
+ Call the "close" tool with:
32723
+ 1. A brief summary of what was done
32724
+ 2. Whether the task was completed successfully${outputSchema ? "\n3. The requested output data based on what you found" : ""}`;
32725
+ const closeTool = (0, import_ai19.tool)({
32726
+ description: outputSchema ? "Complete the task with your assessment and the requested output data." : "Complete the task with your final assessment.",
32727
+ inputSchema: closeToolSchema,
32728
+ execute: (params) => __async(null, null, function* () {
32729
+ return __spreadValues({ success: true }, params);
32730
+ })
32731
+ });
32732
+ const userPrompt = {
32733
+ role: "user",
32734
+ content: outputSchema ? "Provide your final assessment and the requested output data." : "Provide your final assessment."
32735
+ };
32736
+ const result = yield (0, import_ai18.generateText)({
32737
+ model,
32738
+ system: systemPrompt,
32739
+ messages: [...inputMessages, userPrompt],
32740
+ tools: { close: closeTool },
32741
+ toolChoice: { type: "tool", toolName: "close" }
32742
+ });
32743
+ const closeToolCall = result.toolCalls.find((tc) => tc.toolName === "close");
32744
+ const outputMessages = [
32745
+ userPrompt,
32746
+ ...((_a4 = result.response) == null ? void 0 : _a4.messages) || []
32747
+ ];
32748
+ if (!closeToolCall) {
32749
+ return {
32750
+ reasoning: result.text || "Task execution completed",
32751
+ taskComplete: false,
32752
+ messages: outputMessages
32753
+ };
32754
+ }
32755
+ const input = closeToolCall.input;
32756
+ logger({
32757
+ category: "agent",
32758
+ message: `Task completed`,
32759
+ level: 1
32760
+ });
32761
+ return {
32762
+ reasoning: input.reasoning,
32763
+ taskComplete: input.taskComplete,
32764
+ messages: outputMessages,
32765
+ output: input.output
32766
+ };
32767
+ });
32768
+ }
32769
+
32770
+ // lib/v3/handlers/v3AgentHandler.ts
32427
32771
  function getErrorMessage(error) {
32428
32772
  return error instanceof Error ? error.message : String(error);
32429
32773
  }
@@ -32459,14 +32803,9 @@ var V3AgentHandler = class {
32459
32803
  throw new MissingLLMConfigurationError();
32460
32804
  }
32461
32805
  const baseModel = this.llmClient.getLanguageModel();
32462
- const wrappedModel = (0, import_ai19.wrapLanguageModel)({
32806
+ const wrappedModel = (0, import_ai20.wrapLanguageModel)({
32463
32807
  model: baseModel,
32464
- middleware: __spreadValues({
32465
- transformParams: (_0) => __async(this, [_0], function* ({ params }) {
32466
- const { processedPrompt } = processMessages(params);
32467
- return __spreadProps(__spreadValues({}, params), { prompt: processedPrompt });
32468
- })
32469
- }, SessionFileLogger.createLlmLoggingMiddleware(baseModel.modelId))
32808
+ middleware: __spreadValues({}, SessionFileLogger.createLlmLoggingMiddleware(baseModel.modelId))
32470
32809
  });
32471
32810
  return {
32472
32811
  options,
@@ -32487,6 +32826,15 @@ var V3AgentHandler = class {
32487
32826
  }
32488
32827
  });
32489
32828
  }
32829
+ createPrepareStep(userCallback) {
32830
+ return (options) => __async(null, null, function* () {
32831
+ processMessages(options.messages);
32832
+ if (userCallback) {
32833
+ return userCallback(options);
32834
+ }
32835
+ return options;
32836
+ });
32837
+ }
32490
32838
  createStepHandler(state, userCallback) {
32491
32839
  return (event) => __async(this, null, function* () {
32492
32840
  var _a4;
@@ -32548,7 +32896,7 @@ var V3AgentHandler = class {
32548
32896
  }
32549
32897
  execute(instructionOrOptions) {
32550
32898
  return __async(this, null, function* () {
32551
- var _a4;
32899
+ var _a4, _b;
32552
32900
  const startTime = Date.now();
32553
32901
  const options = typeof instructionOrOptions === "object" ? instructionOrOptions : null;
32554
32902
  const signal = options == null ? void 0 : options.signal;
@@ -32601,7 +32949,7 @@ var V3AgentHandler = class {
32601
32949
  stopWhen: (result2) => this.handleStop(result2, maxSteps),
32602
32950
  temperature: 1,
32603
32951
  toolChoice: "auto",
32604
- prepareStep: callbacks == null ? void 0 : callbacks.prepareStep,
32952
+ prepareStep: this.createPrepareStep(callbacks == null ? void 0 : callbacks.prepareStep),
32605
32953
  onStepFinish: this.createStepHandler(state, callbacks == null ? void 0 : callbacks.onStepFinish),
32606
32954
  abortSignal: preparedOptions.signal,
32607
32955
  providerOptions: wrappedModel.modelId.includes("gemini-3") ? {
@@ -32610,11 +32958,22 @@ var V3AgentHandler = class {
32610
32958
  }
32611
32959
  } : void 0
32612
32960
  });
32961
+ const allMessages = [...messages, ...((_b = result.response) == null ? void 0 : _b.messages) || []];
32962
+ const closeResult = yield this.ensureClosed(
32963
+ state,
32964
+ wrappedModel,
32965
+ allMessages,
32966
+ preparedOptions.instruction,
32967
+ preparedOptions.output,
32968
+ this.logger
32969
+ );
32613
32970
  return this.consolidateMetricsAndResult(
32614
32971
  startTime,
32615
32972
  state,
32616
- messages,
32617
- result
32973
+ closeResult.messages,
32974
+ result,
32975
+ maxSteps,
32976
+ closeResult.output
32618
32977
  );
32619
32978
  } catch (error) {
32620
32979
  if (error instanceof StreamingCallbacksInNonStreamingModeError) {
@@ -32691,7 +33050,7 @@ var V3AgentHandler = class {
32691
33050
  stopWhen: (result) => this.handleStop(result, maxSteps),
32692
33051
  temperature: 1,
32693
33052
  toolChoice: "auto",
32694
- prepareStep: callbacks == null ? void 0 : callbacks.prepareStep,
33053
+ prepareStep: this.createPrepareStep(callbacks == null ? void 0 : callbacks.prepareStep),
32695
33054
  onStepFinish: this.createStepHandler(state, callbacks == null ? void 0 : callbacks.onStepFinish),
32696
33055
  onError: (event) => {
32697
33056
  if (callbacks == null ? void 0 : callbacks.onError) {
@@ -32701,16 +33060,29 @@ var V3AgentHandler = class {
32701
33060
  },
32702
33061
  onChunk: callbacks == null ? void 0 : callbacks.onChunk,
32703
33062
  onFinish: (event) => {
33063
+ var _a5;
32704
33064
  if (callbacks == null ? void 0 : callbacks.onFinish) {
32705
33065
  callbacks.onFinish(event);
32706
33066
  }
32707
- const result = this.consolidateMetricsAndResult(
32708
- startTime,
33067
+ const allMessages = [...messages, ...((_a5 = event.response) == null ? void 0 : _a5.messages) || []];
33068
+ this.ensureClosed(
32709
33069
  state,
32710
- messages,
32711
- event
32712
- );
32713
- resolveResult(result);
33070
+ wrappedModel,
33071
+ allMessages,
33072
+ options.instruction,
33073
+ options.output,
33074
+ this.logger
33075
+ ).then((closeResult) => {
33076
+ const result = this.consolidateMetricsAndResult(
33077
+ startTime,
33078
+ state,
33079
+ closeResult.messages,
33080
+ event,
33081
+ maxSteps,
33082
+ closeResult.output
33083
+ );
33084
+ resolveResult(result);
33085
+ });
32714
33086
  },
32715
33087
  onAbort: (event) => {
32716
33088
  var _a5;
@@ -32732,11 +33104,20 @@ var V3AgentHandler = class {
32732
33104
  return agentStreamResult;
32733
33105
  });
32734
33106
  }
32735
- consolidateMetricsAndResult(startTime, state, inputMessages, result) {
33107
+ consolidateMetricsAndResult(startTime, state, inputMessages, result, maxSteps, output) {
32736
33108
  var _a4;
32737
33109
  if (!state.finalMessage) {
32738
33110
  const allReasoning = state.collectedReasoning.join(" ").trim();
32739
- state.finalMessage = allReasoning || result.text || "";
33111
+ if (!state.completed && maxSteps && ((_a4 = result.steps) == null ? void 0 : _a4.length) >= maxSteps) {
33112
+ this.logger({
33113
+ category: "agent",
33114
+ message: `Agent stopped: reached maximum steps (${maxSteps})`,
33115
+ level: 1
33116
+ });
33117
+ state.finalMessage = `Agent stopped: reached maximum steps (${maxSteps})`;
33118
+ } else {
33119
+ state.finalMessage = allReasoning || result.text || "";
33120
+ }
32740
33121
  }
32741
33122
  const endTime = Date.now();
32742
33123
  const inferenceTimeMs = endTime - startTime;
@@ -32750,16 +33131,12 @@ var V3AgentHandler = class {
32750
33131
  inferenceTimeMs
32751
33132
  );
32752
33133
  }
32753
- const responseMessages = ((_a4 = result.response) == null ? void 0 : _a4.messages) || [];
32754
- const fullMessages = [
32755
- ...inputMessages,
32756
- ...responseMessages
32757
- ];
32758
33134
  return {
32759
33135
  success: state.completed,
32760
33136
  message: state.finalMessage || "Task execution completed",
32761
33137
  actions: state.actions,
32762
33138
  completed: state.completed,
33139
+ output,
32763
33140
  usage: result.usage ? {
32764
33141
  input_tokens: result.usage.inputTokens || 0,
32765
33142
  output_tokens: result.usage.outputTokens || 0,
@@ -32767,7 +33144,7 @@ var V3AgentHandler = class {
32767
33144
  cached_input_tokens: result.usage.cachedInputTokens || 0,
32768
33145
  inference_time_ms: inferenceTimeMs
32769
33146
  } : void 0,
32770
- messages: fullMessages
33147
+ messages: inputMessages
32771
33148
  };
32772
33149
  }
32773
33150
  createTools(excludeTools) {
@@ -32787,7 +33164,47 @@ var V3AgentHandler = class {
32787
33164
  if ((_a4 = lastStep == null ? void 0 : lastStep.toolCalls) == null ? void 0 : _a4.some((tc) => tc.toolName === "close")) {
32788
33165
  return true;
32789
33166
  }
32790
- return (0, import_ai19.stepCountIs)(maxSteps)(result);
33167
+ return (0, import_ai20.stepCountIs)(maxSteps)(result);
33168
+ }
33169
+ /**
33170
+ * Ensures the close tool is called at the end of agent execution.
33171
+ * Returns the messages and any extracted output from the close call.
33172
+ */
33173
+ ensureClosed(state, model, messages, instruction, outputSchema, logger) {
33174
+ return __async(this, null, function* () {
33175
+ if (state.completed) return { messages };
33176
+ const closeResult = yield handleCloseToolCall({
33177
+ model,
33178
+ inputMessages: messages,
33179
+ instruction,
33180
+ outputSchema,
33181
+ logger
33182
+ });
33183
+ state.completed = closeResult.taskComplete;
33184
+ state.finalMessage = closeResult.reasoning;
33185
+ const closeAction = mapToolResultToActions({
33186
+ toolCallName: "close",
33187
+ toolResult: {
33188
+ success: true,
33189
+ reasoning: closeResult.reasoning,
33190
+ taskComplete: closeResult.taskComplete
33191
+ },
33192
+ args: {
33193
+ reasoning: closeResult.reasoning,
33194
+ taskComplete: closeResult.taskComplete
33195
+ },
33196
+ reasoning: closeResult.reasoning
33197
+ });
33198
+ for (const action of closeAction) {
33199
+ action.pageUrl = state.currentPageUrl;
33200
+ action.timestamp = Date.now();
33201
+ state.actions.push(action);
33202
+ }
33203
+ return {
33204
+ messages: [...messages, ...closeResult.messages],
33205
+ output: closeResult.output
33206
+ };
33207
+ });
32791
33208
  }
32792
33209
  /**
32793
33210
  * Capture a screenshot and emit it via the event bus
@@ -33269,8 +33686,8 @@ var AnthropicCUAClient = class extends AgentClient {
33269
33686
  betas: ["computer-use-2025-01-24"]
33270
33687
  };
33271
33688
  if (this.tools && Object.keys(this.tools).length > 0) {
33272
- const customTools = Object.entries(this.tools).map(([name, tool21]) => {
33273
- const schema = tool21.inputSchema;
33689
+ const customTools = Object.entries(this.tools).map(([name, tool22]) => {
33690
+ const schema = tool22.inputSchema;
33274
33691
  const jsonSchema3 = toJsonSchema(schema);
33275
33692
  const inputSchema = {
33276
33693
  type: "object",
@@ -33279,7 +33696,7 @@ var AnthropicCUAClient = class extends AgentClient {
33279
33696
  };
33280
33697
  return {
33281
33698
  name,
33282
- description: tool21.description,
33699
+ description: tool22.description,
33283
33700
  input_schema: inputSchema
33284
33701
  };
33285
33702
  });
@@ -33397,13 +33814,13 @@ var AnthropicCUAClient = class extends AgentClient {
33397
33814
  let toolResult = "Tool executed successfully";
33398
33815
  if (this.tools && item.name in this.tools) {
33399
33816
  try {
33400
- const tool21 = this.tools[item.name];
33817
+ const tool22 = this.tools[item.name];
33401
33818
  logger({
33402
33819
  category: "agent",
33403
33820
  message: `Executing tool call: ${item.name} with args: ${JSON.stringify(item.input)}`,
33404
33821
  level: 1
33405
33822
  });
33406
- const result = yield tool21.execute(item.input, {
33823
+ const result = yield tool22.execute(item.input, {
33407
33824
  toolCallId: item.id,
33408
33825
  messages: []
33409
33826
  });
@@ -33674,7 +34091,6 @@ var OpenAICUAClient = class extends AgentClient {
33674
34091
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
33675
34092
  super(type, modelName, userProvidedInstructions);
33676
34093
  this.currentViewport = { width: 1288, height: 711 };
33677
- this.actualScreenshotSize = { width: 1288, height: 711 };
33678
34094
  this.reasoningItems = /* @__PURE__ */ new Map();
33679
34095
  this.environment = "browser";
33680
34096
  this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.OPENAI_API_KEY || "";
@@ -33695,9 +34111,6 @@ var OpenAICUAClient = class extends AgentClient {
33695
34111
  setViewport(width, height) {
33696
34112
  this.currentViewport = { width, height };
33697
34113
  }
33698
- setScreenshotSize(width, height) {
33699
- this.actualScreenshotSize = { width, height };
33700
- }
33701
34114
  setCurrentUrl(url) {
33702
34115
  this.currentUrl = url;
33703
34116
  }
@@ -33965,13 +34378,13 @@ var OpenAICUAClient = class extends AgentClient {
33965
34378
  truncation: "auto"
33966
34379
  };
33967
34380
  if (this.tools && Object.keys(this.tools).length > 0) {
33968
- const customTools = Object.entries(this.tools).map(([name, tool21]) => ({
34381
+ const customTools = Object.entries(this.tools).map(([name, tool22]) => ({
33969
34382
  type: "function",
33970
34383
  name,
33971
34384
  function: {
33972
34385
  name,
33973
- description: tool21.description,
33974
- parameters: tool21.inputSchema
34386
+ description: tool22.description,
34387
+ parameters: tool22.inputSchema
33975
34388
  }
33976
34389
  }));
33977
34390
  requestParams.tools = [
@@ -34123,14 +34536,14 @@ var OpenAICUAClient = class extends AgentClient {
34123
34536
  let toolResult = "Tool executed successfully";
34124
34537
  if (this.tools && item.name in this.tools) {
34125
34538
  try {
34126
- const tool21 = this.tools[item.name];
34539
+ const tool22 = this.tools[item.name];
34127
34540
  const args = JSON.parse(item.arguments);
34128
34541
  logger({
34129
34542
  category: "agent",
34130
34543
  message: `Executing tool call: ${item.name} with args: ${item.arguments}`,
34131
34544
  level: 1
34132
34545
  });
34133
- const result = yield tool21.execute(args, {
34546
+ const result = yield tool22.execute(args, {
34134
34547
  toolCallId: item.call_id,
34135
34548
  messages: []
34136
34549
  });
@@ -34180,16 +34593,9 @@ var OpenAICUAClient = class extends AgentClient {
34180
34593
  }
34181
34594
  convertComputerCallToAction(call) {
34182
34595
  const { action } = call;
34183
- const scaledAction = __spreadValues({}, action);
34184
- if (action.x !== void 0 && action.y !== void 0) {
34185
- const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
34186
- const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
34187
- scaledAction.x = Math.floor(action.x * scaleX);
34188
- scaledAction.y = Math.floor(action.y * scaleY);
34189
- }
34190
34596
  return __spreadValues({
34191
34597
  type: action.type
34192
- }, scaledAction);
34598
+ }, action);
34193
34599
  }
34194
34600
  convertFunctionCallToAction(call) {
34195
34601
  try {
@@ -34293,8 +34699,8 @@ function executeGoogleCustomTool(toolName, toolArgs, tools, functionCall, logger
34293
34699
  message: `Executing custom tool: ${toolName} with args: ${JSON.stringify(toolArgs)}`,
34294
34700
  level: 1
34295
34701
  });
34296
- const tool21 = tools[toolName];
34297
- const toolResult = yield tool21.execute(toolArgs, {
34702
+ const tool22 = tools[toolName];
34703
+ const toolResult = yield tool22.execute(toolArgs, {
34298
34704
  toolCallId: `tool_${Date.now()}`,
34299
34705
  messages: []
34300
34706
  });
@@ -34342,22 +34748,22 @@ function isCustomTool(functionCall, tools) {
34342
34748
  }
34343
34749
  function convertToolSetToFunctionDeclarations(tools) {
34344
34750
  const functionDeclarations = [];
34345
- for (const [name, tool21] of Object.entries(tools)) {
34346
- const functionDeclaration = convertToolToFunctionDeclaration(name, tool21);
34751
+ for (const [name, tool22] of Object.entries(tools)) {
34752
+ const functionDeclaration = convertToolToFunctionDeclaration(name, tool22);
34347
34753
  if (functionDeclaration) {
34348
34754
  functionDeclarations.push(functionDeclaration);
34349
34755
  }
34350
34756
  }
34351
34757
  return functionDeclarations;
34352
34758
  }
34353
- function convertToolToFunctionDeclaration(name, tool21) {
34759
+ function convertToolToFunctionDeclaration(name, tool22) {
34354
34760
  try {
34355
- const schema = tool21.inputSchema;
34761
+ const schema = tool22.inputSchema;
34356
34762
  const jsonSchema3 = toJsonSchema(schema);
34357
34763
  const parameters = convertJsonSchemaToGoogleParameters(jsonSchema3);
34358
34764
  return {
34359
34765
  name,
34360
- description: tool21.description || `Execute ${name}`,
34766
+ description: tool22.description || `Execute ${name}`,
34361
34767
  parameters
34362
34768
  };
34363
34769
  } catch (error) {
@@ -34408,7 +34814,6 @@ var GoogleCUAClient = class extends AgentClient {
34408
34814
  constructor(type, modelName, userProvidedInstructions, clientOptions, tools) {
34409
34815
  super(type, modelName, userProvidedInstructions);
34410
34816
  this.currentViewport = { width: 1288, height: 711 };
34411
- this.actualScreenshotSize = { width: 1288, height: 711 };
34412
34817
  this.history = [];
34413
34818
  this.environment = "ENVIRONMENT_BROWSER";
34414
34819
  this.tools = tools;
@@ -34447,9 +34852,6 @@ var GoogleCUAClient = class extends AgentClient {
34447
34852
  setViewport(width, height) {
34448
34853
  this.currentViewport = { width, height };
34449
34854
  }
34450
- setScreenshotSize(width, height) {
34451
- this.actualScreenshotSize = { width, height };
34452
- }
34453
34855
  setCurrentUrl(url) {
34454
34856
  this.currentUrl = url;
34455
34857
  }
@@ -35011,9 +35413,26 @@ var GoogleCUAClient = class extends AgentClient {
35011
35413
  }
35012
35414
  case "scroll_document": {
35013
35415
  const direction = args.direction.toLowerCase();
35416
+ const magnitude = typeof args.magnitude === "number" ? args.magnitude : 800;
35417
+ let scroll_x = 0;
35418
+ let scroll_y = 0;
35419
+ if (direction === "up") {
35420
+ scroll_y = -magnitude;
35421
+ } else if (direction === "down") {
35422
+ scroll_y = magnitude;
35423
+ } else if (direction === "left") {
35424
+ scroll_x = -magnitude;
35425
+ } else if (direction === "right") {
35426
+ scroll_x = magnitude;
35427
+ } else {
35428
+ scroll_y = magnitude;
35429
+ }
35014
35430
  return {
35015
- type: "keypress",
35016
- keys: [direction === "up" ? "PageUp" : "PageDown"]
35431
+ type: "scroll",
35432
+ x: 0,
35433
+ y: 0,
35434
+ scroll_x,
35435
+ scroll_y
35017
35436
  };
35018
35437
  }
35019
35438
  case "scroll_at": {
@@ -35116,13 +35535,9 @@ var GoogleCUAClient = class extends AgentClient {
35116
35535
  normalizeCoordinates(x2, y) {
35117
35536
  x2 = Math.min(999, Math.max(0, x2));
35118
35537
  y = Math.min(999, Math.max(0, y));
35119
- const screenshotX = x2 / 1e3 * this.actualScreenshotSize.width;
35120
- const screenshotY = y / 1e3 * this.actualScreenshotSize.height;
35121
- const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
35122
- const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
35123
35538
  return {
35124
- x: Math.floor(screenshotX * scaleX),
35125
- y: Math.floor(screenshotY * scaleY)
35539
+ x: Math.floor(x2 / 1e3 * this.currentViewport.width),
35540
+ y: Math.floor(y / 1e3 * this.currentViewport.height)
35126
35541
  };
35127
35542
  }
35128
35543
  captureScreenshot(options) {
@@ -35173,8 +35588,6 @@ var MicrosoftCUAClient = class extends AgentClient {
35173
35588
  };
35174
35589
  // Resized dimensions for model input
35175
35590
  this.resizedViewport = { width: 1288, height: 711 };
35176
- // Actual screenshot dimensions (tracked separately from viewport)
35177
- this.actualScreenshotSize = { width: 1288, height: 711 };
35178
35591
  this.apiKey = (clientOptions == null ? void 0 : clientOptions.apiKey) || process.env.AZURE_API_KEY || process.env.FIREWORKS_API_KEY || "";
35179
35592
  this.baseURL = (clientOptions == null ? void 0 : clientOptions.baseURL) || process.env.AZURE_ENDPOINT || process.env.FIREWORKS_ENDPOINT || "";
35180
35593
  this.clientOptions = {
@@ -35201,9 +35614,6 @@ var MicrosoftCUAClient = class extends AgentClient {
35201
35614
  this.currentViewport = { width, height };
35202
35615
  this.resizedViewport = this.smartResize(width, height);
35203
35616
  }
35204
- setScreenshotSize(width, height) {
35205
- this.actualScreenshotSize = { width, height };
35206
- }
35207
35617
  setCurrentUrl(url) {
35208
35618
  this.currentUrl = url;
35209
35619
  }
@@ -35241,7 +35651,7 @@ var MicrosoftCUAClient = class extends AgentClient {
35241
35651
  * Simplified to match Python's minimal approach
35242
35652
  */
35243
35653
  generateSystemPrompt() {
35244
- const { width, height } = this.actualScreenshotSize;
35654
+ const { width, height } = this.resizedViewport;
35245
35655
  let basePrompt = "You are a helpful assistant.";
35246
35656
  if (this.userProvidedInstructions) {
35247
35657
  basePrompt = `${basePrompt}
@@ -35402,8 +35812,8 @@ ${functionCallTemplate}`;
35402
35812
  const transformCoordinate = (coord) => {
35403
35813
  if (!coord || coord.length !== 2) return coord;
35404
35814
  const [x2, y] = coord;
35405
- const scaleX = this.currentViewport.width / this.actualScreenshotSize.width;
35406
- const scaleY = this.currentViewport.height / this.actualScreenshotSize.height;
35815
+ const scaleX = this.currentViewport.width / this.resizedViewport.width;
35816
+ const scaleY = this.currentViewport.height / this.resizedViewport.height;
35407
35817
  return [Math.round(x2 * scaleX), Math.round(y * scaleY)];
35408
35818
  };
35409
35819
  const baseAction = {
@@ -35831,6 +36241,7 @@ var modelToAgentProviderMap = {
35831
36241
  "claude-opus-4-5-20251101": "anthropic",
35832
36242
  "claude-haiku-4-5-20251001": "anthropic",
35833
36243
  "gemini-2.5-computer-use-preview-10-2025": "google",
36244
+ "gemini-3-flash-computer-use": "google",
35834
36245
  "fara-7b": "microsoft"
35835
36246
  };
35836
36247
  var AgentProvider = class _AgentProvider {
@@ -35912,14 +36323,6 @@ var AgentProvider = class _AgentProvider {
35912
36323
  // lib/v3/handlers/v3CuaAgentHandler.ts
35913
36324
  init_flowLogger();
35914
36325
  init_sdkErrors();
35915
- function getPNGDimensions(buffer) {
35916
- if (buffer.length < 24 || buffer[0] !== 137 || buffer[1] !== 80 || buffer[2] !== 78 || buffer[3] !== 71) {
35917
- throw new Error("Invalid PNG file");
35918
- }
35919
- const width = buffer.readUInt32BE(16);
35920
- const height = buffer.readUInt32BE(20);
35921
- return { width, height };
35922
- }
35923
36326
  var V3CuaAgentHandler = class {
35924
36327
  constructor(v3, logger, options, tools) {
35925
36328
  this.v3 = v3;
@@ -35950,21 +36353,6 @@ var V3CuaAgentHandler = class {
35950
36353
  this.ensureNotClosed();
35951
36354
  const page = yield this.v3.context.awaitActivePage();
35952
36355
  const screenshotBuffer = yield page.screenshot({ fullPage: false });
35953
- if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
35954
- try {
35955
- const dimensions = getPNGDimensions(screenshotBuffer);
35956
- this.agentClient.setScreenshotSize(
35957
- dimensions.width,
35958
- dimensions.height
35959
- );
35960
- } catch (e2) {
35961
- this.logger({
35962
- category: "agent",
35963
- message: `Could not read screenshot dimensions: ${e2}`,
35964
- level: 1
35965
- });
35966
- }
35967
- }
35968
36356
  return screenshotBuffer.toString("base64");
35969
36357
  }));
35970
36358
  this.agentClient.setActionHandler((action) => __async(this, null, function* () {
@@ -36418,21 +36806,6 @@ var V3CuaAgentHandler = class {
36418
36806
  try {
36419
36807
  const page = yield this.v3.context.awaitActivePage();
36420
36808
  const screenshotBuffer = yield page.screenshot({ fullPage: false });
36421
- if (this.agentClient instanceof GoogleCUAClient || this.agentClient instanceof OpenAICUAClient || this.agentClient instanceof MicrosoftCUAClient) {
36422
- try {
36423
- const dimensions = getPNGDimensions(screenshotBuffer);
36424
- this.agentClient.setScreenshotSize(
36425
- dimensions.width,
36426
- dimensions.height
36427
- );
36428
- } catch (e2) {
36429
- this.logger({
36430
- category: "agent",
36431
- message: `Could not read screenshot dimensions: ${e2}`,
36432
- level: 1
36433
- });
36434
- }
36435
- }
36436
36809
  this.v3.bus.emit("agent_screenshot_taken_event", screenshotBuffer);
36437
36810
  const currentUrl = page.url();
36438
36811
  return yield this.agentClient.captureScreenshot({
@@ -37609,23 +37982,23 @@ function waitForWebSocketDebuggerUrl(port, timeoutMs) {
37609
37982
  init_sdkErrors();
37610
37983
 
37611
37984
  // lib/v3/llm/aisdk.ts
37612
- var import_ai21 = require("ai");
37985
+ var import_ai22 = require("ai");
37613
37986
  var import_uuid5 = require("uuid");
37614
37987
 
37615
37988
  // lib/v3/llm/LLMClient.ts
37616
- var import_ai20 = require("ai");
37989
+ var import_ai21 = require("ai");
37617
37990
  var AnnotatedScreenshotText = "This is a screenshot of the current page state with the elements annotated on it. Each element id is annotated with a number to the top left of it. Duplicate annotations at the same location are under each other vertically.";
37618
37991
  var LLMClient = class {
37619
37992
  constructor(modelName, userProvidedInstructions) {
37620
- this.generateObject = import_ai20.generateObject;
37621
- this.generateText = import_ai20.generateText;
37622
- this.streamText = import_ai20.streamText;
37623
- this.streamObject = import_ai20.streamObject;
37624
- this.generateImage = import_ai20.experimental_generateImage;
37625
- this.embed = import_ai20.embed;
37626
- this.embedMany = import_ai20.embedMany;
37627
- this.transcribe = import_ai20.experimental_transcribe;
37628
- this.generateSpeech = import_ai20.experimental_generateSpeech;
37993
+ this.generateObject = import_ai21.generateObject;
37994
+ this.generateText = import_ai21.generateText;
37995
+ this.streamText = import_ai21.streamText;
37996
+ this.streamObject = import_ai21.streamObject;
37997
+ this.generateImage = import_ai21.experimental_generateImage;
37998
+ this.embed = import_ai21.embed;
37999
+ this.embedMany = import_ai21.embedMany;
38000
+ this.transcribe = import_ai21.experimental_transcribe;
38001
+ this.generateSpeech = import_ai21.experimental_generateSpeech;
37629
38002
  this.modelName = modelName;
37630
38003
  this.userProvidedInstructions = userProvidedInstructions;
37631
38004
  }
@@ -37750,7 +38123,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37750
38123
  });
37751
38124
  }
37752
38125
  try {
37753
- objectResponse = yield (0, import_ai21.generateObject)({
38126
+ objectResponse = yield (0, import_ai22.generateObject)({
37754
38127
  model: this.model,
37755
38128
  messages: formattedMessages,
37756
38129
  schema: options.response_model.schema,
@@ -37770,7 +38143,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37770
38143
  operation: "generateObject",
37771
38144
  output: `[error: ${err instanceof Error ? err.message : "unknown"}]`
37772
38145
  });
37773
- if (import_ai21.NoObjectGeneratedError.isInstance(err)) {
38146
+ if (import_ai22.NoObjectGeneratedError.isInstance(err)) {
37774
38147
  (_g = this.logger) == null ? void 0 : _g.call(this, {
37775
38148
  category: "AISDK error",
37776
38149
  message: err.message,
@@ -37848,10 +38221,10 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37848
38221
  }
37849
38222
  const tools = {};
37850
38223
  if (options.tools && options.tools.length > 0) {
37851
- for (const tool21 of options.tools) {
37852
- tools[tool21.name] = {
37853
- description: tool21.description,
37854
- inputSchema: tool21.parameters
38224
+ for (const tool22 of options.tools) {
38225
+ tools[tool22.name] = {
38226
+ description: tool22.description,
38227
+ inputSchema: tool22.parameters
37855
38228
  };
37856
38229
  }
37857
38230
  }
@@ -37868,7 +38241,7 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
37868
38241
  });
37869
38242
  let textResponse;
37870
38243
  try {
37871
- textResponse = yield (0, import_ai21.generateText)({
38244
+ textResponse = yield (0, import_ai22.generateText)({
37872
38245
  model: this.model,
37873
38246
  messages: formattedMessages,
37874
38247
  tools: Object.keys(tools).length > 0 ? tools : void 0,
@@ -38050,14 +38423,14 @@ var AnthropicClient = class extends LLMClient {
38050
38423
  }
38051
38424
  formattedMessages.push(screenshotMessage);
38052
38425
  }
38053
- let anthropicTools2 = (_a4 = options.tools) == null ? void 0 : _a4.map((tool21) => {
38426
+ let anthropicTools2 = (_a4 = options.tools) == null ? void 0 : _a4.map((tool22) => {
38054
38427
  return {
38055
- name: tool21.name,
38056
- description: tool21.description,
38428
+ name: tool22.name,
38429
+ description: tool22.description,
38057
38430
  input_schema: {
38058
38431
  type: "object",
38059
- properties: tool21.parameters.properties,
38060
- required: tool21.parameters.required
38432
+ properties: tool22.parameters.properties,
38433
+ required: tool22.parameters.required
38061
38434
  }
38062
38435
  };
38063
38436
  });
@@ -38244,15 +38617,15 @@ var CerebrasClient = class extends LLMClient {
38244
38617
  return __spreadProps(__spreadValues({}, baseMessage), { role: "user" });
38245
38618
  }
38246
38619
  });
38247
- let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool21) => ({
38620
+ let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool22) => ({
38248
38621
  type: "function",
38249
38622
  function: {
38250
- name: tool21.name,
38251
- description: tool21.description,
38623
+ name: tool22.name,
38624
+ description: tool22.description,
38252
38625
  parameters: {
38253
38626
  type: "object",
38254
- properties: tool21.parameters.properties,
38255
- required: tool21.parameters.required
38627
+ properties: tool22.parameters.properties,
38628
+ required: tool22.parameters.required
38256
38629
  }
38257
38630
  }
38258
38631
  }));
@@ -38545,18 +38918,18 @@ ${firstPartText.text}`;
38545
38918
  }
38546
38919
  return [
38547
38920
  {
38548
- functionDeclarations: tools.map((tool21) => {
38921
+ functionDeclarations: tools.map((tool22) => {
38549
38922
  let parameters = void 0;
38550
- if (tool21.parameters) {
38923
+ if (tool22.parameters) {
38551
38924
  parameters = {
38552
38925
  type: import_genai4.Type.OBJECT,
38553
- properties: tool21.parameters.properties,
38554
- required: tool21.parameters.required
38926
+ properties: tool22.parameters.properties,
38927
+ required: tool22.parameters.required
38555
38928
  };
38556
38929
  }
38557
38930
  return {
38558
- name: tool21.name,
38559
- description: tool21.description,
38931
+ name: tool22.name,
38932
+ description: tool22.description,
38560
38933
  parameters
38561
38934
  };
38562
38935
  })
@@ -38832,15 +39205,15 @@ var GroqClient = class extends LLMClient {
38832
39205
  return __spreadProps(__spreadValues({}, baseMessage), { role: "user" });
38833
39206
  }
38834
39207
  });
38835
- let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool21) => ({
39208
+ let tools = (_a4 = options.tools) == null ? void 0 : _a4.map((tool22) => ({
38836
39209
  type: "function",
38837
39210
  function: {
38838
- name: tool21.name,
38839
- description: tool21.description,
39211
+ name: tool22.name,
39212
+ description: tool22.description,
38840
39213
  parameters: {
38841
39214
  type: "object",
38842
- properties: tool21.parameters.properties,
38843
- required: tool21.parameters.required
39215
+ properties: tool22.parameters.properties,
39216
+ required: tool22.parameters.required
38844
39217
  }
38845
39218
  }
38846
39219
  }));
@@ -39217,11 +39590,11 @@ ${parsedSchema}
39217
39590
  messages: formattedMessages,
39218
39591
  response_format: responseFormat,
39219
39592
  stream: false,
39220
- tools: (_e = options.tools) == null ? void 0 : _e.map((tool21) => ({
39593
+ tools: (_e = options.tools) == null ? void 0 : _e.map((tool22) => ({
39221
39594
  function: {
39222
- name: tool21.name,
39223
- description: tool21.description,
39224
- parameters: tool21.parameters
39595
+ name: tool22.name,
39596
+ description: tool22.description,
39597
+ parameters: tool22.parameters
39225
39598
  },
39226
39599
  type: "function"
39227
39600
  }))
@@ -41887,21 +42260,21 @@ function prepareChatTools({
41887
42260
  return { tools: void 0, toolChoice: void 0, toolWarnings };
41888
42261
  }
41889
42262
  const openaiTools2 = [];
41890
- for (const tool21 of tools) {
41891
- switch (tool21.type) {
42263
+ for (const tool22 of tools) {
42264
+ switch (tool22.type) {
41892
42265
  case "function":
41893
42266
  openaiTools2.push({
41894
42267
  type: "function",
41895
42268
  function: {
41896
- name: tool21.name,
41897
- description: tool21.description,
41898
- parameters: tool21.inputSchema,
42269
+ name: tool22.name,
42270
+ description: tool22.description,
42271
+ parameters: tool22.inputSchema,
41899
42272
  strict: structuredOutputs ? strictJsonSchema : void 0
41900
42273
  }
41901
42274
  });
41902
42275
  break;
41903
42276
  default:
41904
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
42277
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
41905
42278
  break;
41906
42279
  }
41907
42280
  }
@@ -44217,22 +44590,22 @@ function prepareResponsesTools(_0) {
44217
44590
  return { tools: void 0, toolChoice: void 0, toolWarnings };
44218
44591
  }
44219
44592
  const openaiTools2 = [];
44220
- for (const tool21 of tools) {
44221
- switch (tool21.type) {
44593
+ for (const tool22 of tools) {
44594
+ switch (tool22.type) {
44222
44595
  case "function":
44223
44596
  openaiTools2.push({
44224
44597
  type: "function",
44225
- name: tool21.name,
44226
- description: tool21.description,
44227
- parameters: tool21.inputSchema,
44598
+ name: tool22.name,
44599
+ description: tool22.description,
44600
+ parameters: tool22.inputSchema,
44228
44601
  strict: strictJsonSchema
44229
44602
  });
44230
44603
  break;
44231
44604
  case "provider-defined": {
44232
- switch (tool21.id) {
44605
+ switch (tool22.id) {
44233
44606
  case "openai.file_search": {
44234
44607
  const args = yield validateTypes({
44235
- value: tool21.args,
44608
+ value: tool22.args,
44236
44609
  schema: fileSearchArgsSchema
44237
44610
  });
44238
44611
  openaiTools2.push({
@@ -44255,7 +44628,7 @@ function prepareResponsesTools(_0) {
44255
44628
  }
44256
44629
  case "openai.web_search_preview": {
44257
44630
  const args = yield validateTypes({
44258
- value: tool21.args,
44631
+ value: tool22.args,
44259
44632
  schema: webSearchPreviewArgsSchema
44260
44633
  });
44261
44634
  openaiTools2.push({
@@ -44267,7 +44640,7 @@ function prepareResponsesTools(_0) {
44267
44640
  }
44268
44641
  case "openai.web_search": {
44269
44642
  const args = yield validateTypes({
44270
- value: tool21.args,
44643
+ value: tool22.args,
44271
44644
  schema: webSearchArgsSchema
44272
44645
  });
44273
44646
  openaiTools2.push({
@@ -44280,7 +44653,7 @@ function prepareResponsesTools(_0) {
44280
44653
  }
44281
44654
  case "openai.code_interpreter": {
44282
44655
  const args = yield validateTypes({
44283
- value: tool21.args,
44656
+ value: tool22.args,
44284
44657
  schema: codeInterpreterArgsSchema
44285
44658
  });
44286
44659
  openaiTools2.push({
@@ -44291,7 +44664,7 @@ function prepareResponsesTools(_0) {
44291
44664
  }
44292
44665
  case "openai.image_generation": {
44293
44666
  const args = yield validateTypes({
44294
- value: tool21.args,
44667
+ value: tool22.args,
44295
44668
  schema: imageGenerationArgsSchema
44296
44669
  });
44297
44670
  openaiTools2.push({
@@ -44315,7 +44688,7 @@ function prepareResponsesTools(_0) {
44315
44688
  break;
44316
44689
  }
44317
44690
  default:
44318
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
44691
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
44319
44692
  break;
44320
44693
  }
44321
44694
  }
@@ -44420,7 +44793,7 @@ var OpenAIResponsesLanguageModel = class {
44420
44793
  }
44421
44794
  function hasOpenAITool(id) {
44422
44795
  return (tools == null ? void 0 : tools.find(
44423
- (tool21) => tool21.type === "provider-defined" && tool21.id === id
44796
+ (tool22) => tool22.type === "provider-defined" && tool22.id === id
44424
44797
  )) != null;
44425
44798
  }
44426
44799
  const topLogprobs = typeof (openaiOptions == null ? void 0 : openaiOptions.logprobs) === "number" ? openaiOptions == null ? void 0 : openaiOptions.logprobs : (openaiOptions == null ? void 0 : openaiOptions.logprobs) === true ? TOP_LOGPROBS_MAX : void 0;
@@ -44428,7 +44801,7 @@ var OpenAIResponsesLanguageModel = class {
44428
44801
  addInclude("message.output_text.logprobs");
44429
44802
  }
44430
44803
  const webSearchToolName = (_c = tools == null ? void 0 : tools.find(
44431
- (tool21) => tool21.type === "provider-defined" && (tool21.id === "openai.web_search" || tool21.id === "openai.web_search_preview")
44804
+ (tool22) => tool22.type === "provider-defined" && (tool22.id === "openai.web_search" || tool22.id === "openai.web_search_preview")
44432
44805
  )) == null ? void 0 : _c.name;
44433
44806
  if (webSearchToolName) {
44434
44807
  addInclude("web_search_call.action.sources");
@@ -48022,25 +48395,25 @@ function prepareTools({
48022
48395
  if (tools == null) {
48023
48396
  return { tools: void 0, toolConfig: void 0, toolWarnings };
48024
48397
  }
48025
- const hasFunctionTools = tools.some((tool21) => tool21.type === "function");
48398
+ const hasFunctionTools = tools.some((tool22) => tool22.type === "function");
48026
48399
  const hasProviderDefinedTools = tools.some(
48027
- (tool21) => tool21.type === "provider-defined"
48400
+ (tool22) => tool22.type === "provider-defined"
48028
48401
  );
48029
48402
  if (hasFunctionTools && hasProviderDefinedTools) {
48030
- const functionTools = tools.filter((tool21) => tool21.type === "function");
48403
+ const functionTools = tools.filter((tool22) => tool22.type === "function");
48031
48404
  toolWarnings.push({
48032
48405
  type: "unsupported-tool",
48033
- tool: tools.find((tool21) => tool21.type === "function"),
48406
+ tool: tools.find((tool22) => tool22.type === "function"),
48034
48407
  details: `Cannot mix function tools with provider-defined tools in the same request. Falling back to provider-defined tools only. The following function tools will be ignored: ${functionTools.map((t2) => t2.name).join(", ")}. Please use either function tools or provider-defined tools, but not both.`
48035
48408
  });
48036
48409
  }
48037
48410
  if (hasProviderDefinedTools) {
48038
48411
  const googleTools22 = [];
48039
48412
  const providerDefinedTools = tools.filter(
48040
- (tool21) => tool21.type === "provider-defined"
48413
+ (tool22) => tool22.type === "provider-defined"
48041
48414
  );
48042
- providerDefinedTools.forEach((tool21) => {
48043
- switch (tool21.id) {
48415
+ providerDefinedTools.forEach((tool22) => {
48416
+ switch (tool22.id) {
48044
48417
  case "google.google_search":
48045
48418
  if (isGemini2orNewer) {
48046
48419
  googleTools22.push({ googleSearch: {} });
@@ -48048,8 +48421,8 @@ function prepareTools({
48048
48421
  googleTools22.push({
48049
48422
  googleSearchRetrieval: {
48050
48423
  dynamicRetrievalConfig: {
48051
- mode: tool21.args.mode,
48052
- dynamicThreshold: tool21.args.dynamicThreshold
48424
+ mode: tool22.args.mode,
48425
+ dynamicThreshold: tool22.args.dynamicThreshold
48053
48426
  }
48054
48427
  }
48055
48428
  });
@@ -48063,7 +48436,7 @@ function prepareTools({
48063
48436
  } else {
48064
48437
  toolWarnings.push({
48065
48438
  type: "unsupported-tool",
48066
- tool: tool21,
48439
+ tool: tool22,
48067
48440
  details: "The URL context tool is not supported with other Gemini models than Gemini 2."
48068
48441
  });
48069
48442
  }
@@ -48074,18 +48447,18 @@ function prepareTools({
48074
48447
  } else {
48075
48448
  toolWarnings.push({
48076
48449
  type: "unsupported-tool",
48077
- tool: tool21,
48450
+ tool: tool22,
48078
48451
  details: "The code execution tools is not supported with other Gemini models than Gemini 2."
48079
48452
  });
48080
48453
  }
48081
48454
  break;
48082
48455
  case "google.file_search":
48083
48456
  if (supportsFileSearch) {
48084
- googleTools22.push({ fileSearch: __spreadValues({}, tool21.args) });
48457
+ googleTools22.push({ fileSearch: __spreadValues({}, tool22.args) });
48085
48458
  } else {
48086
48459
  toolWarnings.push({
48087
48460
  type: "unsupported-tool",
48088
- tool: tool21,
48461
+ tool: tool22,
48089
48462
  details: "The file search tool is only supported with Gemini 2.5 models."
48090
48463
  });
48091
48464
  }
@@ -48096,22 +48469,22 @@ function prepareTools({
48096
48469
  retrieval: {
48097
48470
  vertex_rag_store: {
48098
48471
  rag_resources: {
48099
- rag_corpus: tool21.args.ragCorpus
48472
+ rag_corpus: tool22.args.ragCorpus
48100
48473
  },
48101
- similarity_top_k: tool21.args.topK
48474
+ similarity_top_k: tool22.args.topK
48102
48475
  }
48103
48476
  }
48104
48477
  });
48105
48478
  } else {
48106
48479
  toolWarnings.push({
48107
48480
  type: "unsupported-tool",
48108
- tool: tool21,
48481
+ tool: tool22,
48109
48482
  details: "The RAG store tool is not supported with other Gemini models than Gemini 2."
48110
48483
  });
48111
48484
  }
48112
48485
  break;
48113
48486
  default:
48114
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
48487
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
48115
48488
  break;
48116
48489
  }
48117
48490
  });
@@ -48122,17 +48495,17 @@ function prepareTools({
48122
48495
  };
48123
48496
  }
48124
48497
  const functionDeclarations = [];
48125
- for (const tool21 of tools) {
48126
- switch (tool21.type) {
48498
+ for (const tool22 of tools) {
48499
+ switch (tool22.type) {
48127
48500
  case "function":
48128
48501
  functionDeclarations.push({
48129
- name: tool21.name,
48130
- description: (_a4 = tool21.description) != null ? _a4 : "",
48131
- parameters: convertJSONSchemaToOpenAPISchema(tool21.inputSchema)
48502
+ name: tool22.name,
48503
+ description: (_a4 = tool22.description) != null ? _a4 : "",
48504
+ parameters: convertJSONSchemaToOpenAPISchema(tool22.inputSchema)
48132
48505
  });
48133
48506
  break;
48134
48507
  default:
48135
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
48508
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
48136
48509
  break;
48137
48510
  }
48138
48511
  }
@@ -48246,7 +48619,7 @@ var GoogleGenerativeAILanguageModel = class {
48246
48619
  schema: googleGenerativeAIProviderOptions
48247
48620
  });
48248
48621
  if ((tools == null ? void 0 : tools.some(
48249
- (tool21) => tool21.type === "provider-defined" && tool21.id === "google.vertex_rag_store"
48622
+ (tool22) => tool22.type === "provider-defined" && tool22.id === "google.vertex_rag_store"
48250
48623
  )) && !this.config.provider.startsWith("google.vertex.")) {
48251
48624
  warnings.push({
48252
48625
  type: "other",
@@ -50108,23 +50481,23 @@ function prepareTools2(_0) {
50108
50481
  return { tools: void 0, toolChoice: void 0, toolWarnings, betas };
50109
50482
  }
50110
50483
  const anthropicTools2 = [];
50111
- for (const tool21 of tools) {
50112
- switch (tool21.type) {
50484
+ for (const tool22 of tools) {
50485
+ switch (tool22.type) {
50113
50486
  case "function": {
50114
- const cacheControl = validator3.getCacheControl(tool21.providerOptions, {
50487
+ const cacheControl = validator3.getCacheControl(tool22.providerOptions, {
50115
50488
  type: "tool definition",
50116
50489
  canCache: true
50117
50490
  });
50118
50491
  anthropicTools2.push({
50119
- name: tool21.name,
50120
- description: tool21.description,
50121
- input_schema: tool21.inputSchema,
50492
+ name: tool22.name,
50493
+ description: tool22.description,
50494
+ input_schema: tool22.inputSchema,
50122
50495
  cache_control: cacheControl
50123
50496
  });
50124
50497
  break;
50125
50498
  }
50126
50499
  case "provider-defined": {
50127
- switch (tool21.id) {
50500
+ switch (tool22.id) {
50128
50501
  case "anthropic.code_execution_20250522": {
50129
50502
  betas.add("code-execution-2025-05-22");
50130
50503
  anthropicTools2.push({
@@ -50147,9 +50520,9 @@ function prepareTools2(_0) {
50147
50520
  anthropicTools2.push({
50148
50521
  name: "computer",
50149
50522
  type: "computer_20250124",
50150
- display_width_px: tool21.args.displayWidthPx,
50151
- display_height_px: tool21.args.displayHeightPx,
50152
- display_number: tool21.args.displayNumber,
50523
+ display_width_px: tool22.args.displayWidthPx,
50524
+ display_height_px: tool22.args.displayHeightPx,
50525
+ display_number: tool22.args.displayNumber,
50153
50526
  cache_control: void 0
50154
50527
  });
50155
50528
  break;
@@ -50159,9 +50532,9 @@ function prepareTools2(_0) {
50159
50532
  anthropicTools2.push({
50160
50533
  name: "computer",
50161
50534
  type: "computer_20241022",
50162
- display_width_px: tool21.args.displayWidthPx,
50163
- display_height_px: tool21.args.displayHeightPx,
50164
- display_number: tool21.args.displayNumber,
50535
+ display_width_px: tool22.args.displayWidthPx,
50536
+ display_height_px: tool22.args.displayHeightPx,
50537
+ display_number: tool22.args.displayNumber,
50165
50538
  cache_control: void 0
50166
50539
  });
50167
50540
  break;
@@ -50195,7 +50568,7 @@ function prepareTools2(_0) {
50195
50568
  }
50196
50569
  case "anthropic.text_editor_20250728": {
50197
50570
  const args = yield validateTypes2({
50198
- value: tool21.args,
50571
+ value: tool22.args,
50199
50572
  schema: textEditor_20250728ArgsSchema
50200
50573
  });
50201
50574
  anthropicTools2.push({
@@ -50235,7 +50608,7 @@ function prepareTools2(_0) {
50235
50608
  case "anthropic.web_fetch_20250910": {
50236
50609
  betas.add("web-fetch-2025-09-10");
50237
50610
  const args = yield validateTypes2({
50238
- value: tool21.args,
50611
+ value: tool22.args,
50239
50612
  schema: webFetch_20250910ArgsSchema
50240
50613
  });
50241
50614
  anthropicTools2.push({
@@ -50252,7 +50625,7 @@ function prepareTools2(_0) {
50252
50625
  }
50253
50626
  case "anthropic.web_search_20250305": {
50254
50627
  const args = yield validateTypes2({
50255
- value: tool21.args,
50628
+ value: tool22.args,
50256
50629
  schema: webSearch_20250305ArgsSchema
50257
50630
  });
50258
50631
  anthropicTools2.push({
@@ -50267,14 +50640,14 @@ function prepareTools2(_0) {
50267
50640
  break;
50268
50641
  }
50269
50642
  default: {
50270
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
50643
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
50271
50644
  break;
50272
50645
  }
50273
50646
  }
50274
50647
  break;
50275
50648
  }
50276
50649
  default: {
50277
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
50650
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
50278
50651
  break;
50279
50652
  }
50280
50653
  }
@@ -51233,7 +51606,7 @@ var AnthropicMessagesLanguageModel = class {
51233
51606
  betas.add("skills-2025-10-02");
51234
51607
  betas.add("files-api-2025-04-14");
51235
51608
  if (!(tools == null ? void 0 : tools.some(
51236
- (tool21) => tool21.type === "provider-defined" && tool21.id === "anthropic.code_execution_20250825"
51609
+ (tool22) => tool22.type === "provider-defined" && tool22.id === "anthropic.code_execution_20250825"
51237
51610
  ))) {
51238
51611
  warnings.push({
51239
51612
  type: "other",
@@ -53071,24 +53444,24 @@ function prepareTools3({
53071
53444
  if (tools == null) {
53072
53445
  return { tools: void 0, toolConfig: void 0, toolWarnings };
53073
53446
  }
53074
- const hasFunctionTools = tools.some((tool21) => tool21.type === "function");
53447
+ const hasFunctionTools = tools.some((tool22) => tool22.type === "function");
53075
53448
  const hasProviderDefinedTools = tools.some(
53076
- (tool21) => tool21.type === "provider-defined"
53449
+ (tool22) => tool22.type === "provider-defined"
53077
53450
  );
53078
53451
  if (hasFunctionTools && hasProviderDefinedTools) {
53079
53452
  toolWarnings.push({
53080
53453
  type: "unsupported-tool",
53081
- tool: tools.find((tool21) => tool21.type === "function"),
53454
+ tool: tools.find((tool22) => tool22.type === "function"),
53082
53455
  details: "Cannot mix function tools with provider-defined tools in the same request. Please use either function tools or provider-defined tools, but not both."
53083
53456
  });
53084
53457
  }
53085
53458
  if (hasProviderDefinedTools) {
53086
53459
  const googleTools22 = [];
53087
53460
  const providerDefinedTools = tools.filter(
53088
- (tool21) => tool21.type === "provider-defined"
53461
+ (tool22) => tool22.type === "provider-defined"
53089
53462
  );
53090
- providerDefinedTools.forEach((tool21) => {
53091
- switch (tool21.id) {
53463
+ providerDefinedTools.forEach((tool22) => {
53464
+ switch (tool22.id) {
53092
53465
  case "google.google_search":
53093
53466
  if (isGemini2) {
53094
53467
  googleTools22.push({ googleSearch: {} });
@@ -53096,8 +53469,8 @@ function prepareTools3({
53096
53469
  googleTools22.push({
53097
53470
  googleSearchRetrieval: {
53098
53471
  dynamicRetrievalConfig: {
53099
- mode: tool21.args.mode,
53100
- dynamicThreshold: tool21.args.dynamicThreshold
53472
+ mode: tool22.args.mode,
53473
+ dynamicThreshold: tool22.args.dynamicThreshold
53101
53474
  }
53102
53475
  }
53103
53476
  });
@@ -53111,7 +53484,7 @@ function prepareTools3({
53111
53484
  } else {
53112
53485
  toolWarnings.push({
53113
53486
  type: "unsupported-tool",
53114
- tool: tool21,
53487
+ tool: tool22,
53115
53488
  details: "The URL context tool is not supported with other Gemini models than Gemini 2."
53116
53489
  });
53117
53490
  }
@@ -53122,13 +53495,13 @@ function prepareTools3({
53122
53495
  } else {
53123
53496
  toolWarnings.push({
53124
53497
  type: "unsupported-tool",
53125
- tool: tool21,
53498
+ tool: tool22,
53126
53499
  details: "The code execution tools is not supported with other Gemini models than Gemini 2."
53127
53500
  });
53128
53501
  }
53129
53502
  break;
53130
53503
  default:
53131
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
53504
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
53132
53505
  break;
53133
53506
  }
53134
53507
  });
@@ -53139,17 +53512,17 @@ function prepareTools3({
53139
53512
  };
53140
53513
  }
53141
53514
  const functionDeclarations = [];
53142
- for (const tool21 of tools) {
53143
- switch (tool21.type) {
53515
+ for (const tool22 of tools) {
53516
+ switch (tool22.type) {
53144
53517
  case "function":
53145
53518
  functionDeclarations.push({
53146
- name: tool21.name,
53147
- description: (_a4 = tool21.description) != null ? _a4 : "",
53148
- parameters: convertJSONSchemaToOpenAPISchema2(tool21.inputSchema)
53519
+ name: tool22.name,
53520
+ description: (_a4 = tool22.description) != null ? _a4 : "",
53521
+ parameters: convertJSONSchemaToOpenAPISchema2(tool22.inputSchema)
53149
53522
  });
53150
53523
  break;
53151
53524
  default:
53152
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
53525
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
53153
53526
  break;
53154
53527
  }
53155
53528
  }
@@ -54272,16 +54645,16 @@ function prepareTools4({
54272
54645
  return { tools: void 0, toolChoice: void 0, toolWarnings };
54273
54646
  }
54274
54647
  const openaiCompatTools = [];
54275
- for (const tool21 of tools) {
54276
- if (tool21.type === "provider-defined") {
54277
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
54648
+ for (const tool22 of tools) {
54649
+ if (tool22.type === "provider-defined") {
54650
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
54278
54651
  } else {
54279
54652
  openaiCompatTools.push({
54280
54653
  type: "function",
54281
54654
  function: {
54282
- name: tool21.name,
54283
- description: tool21.description,
54284
- parameters: tool21.inputSchema
54655
+ name: tool22.name,
54656
+ description: tool22.description,
54657
+ parameters: tool22.inputSchema
54285
54658
  }
54286
54659
  });
54287
54660
  }
@@ -55626,16 +55999,16 @@ function prepareTools5({
55626
55999
  return { tools: void 0, toolChoice: void 0, toolWarnings };
55627
56000
  }
55628
56001
  const xaiTools = [];
55629
- for (const tool21 of tools) {
55630
- if (tool21.type === "provider-defined") {
55631
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
56002
+ for (const tool22 of tools) {
56003
+ if (tool22.type === "provider-defined") {
56004
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
55632
56005
  } else {
55633
56006
  xaiTools.push({
55634
56007
  type: "function",
55635
56008
  function: {
55636
- name: tool21.name,
55637
- description: tool21.description,
55638
- parameters: tool21.inputSchema
56009
+ name: tool22.name,
56010
+ description: tool22.description,
56011
+ parameters: tool22.inputSchema
55639
56012
  }
55640
56013
  });
55641
56014
  }
@@ -56635,21 +57008,21 @@ function prepareChatTools2({
56635
57008
  return { tools: void 0, toolChoice: void 0, toolWarnings };
56636
57009
  }
56637
57010
  const openaiTools2 = [];
56638
- for (const tool21 of tools) {
56639
- switch (tool21.type) {
57011
+ for (const tool22 of tools) {
57012
+ switch (tool22.type) {
56640
57013
  case "function":
56641
57014
  openaiTools2.push({
56642
57015
  type: "function",
56643
57016
  function: {
56644
- name: tool21.name,
56645
- description: tool21.description,
56646
- parameters: tool21.inputSchema,
57017
+ name: tool22.name,
57018
+ description: tool22.description,
57019
+ parameters: tool22.inputSchema,
56647
57020
  strict: structuredOutputs ? strictJsonSchema : void 0
56648
57021
  }
56649
57022
  });
56650
57023
  break;
56651
57024
  default:
56652
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
57025
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
56653
57026
  break;
56654
57027
  }
56655
57028
  }
@@ -59231,22 +59604,22 @@ function prepareResponsesTools2(_0) {
59231
59604
  return { tools: void 0, toolChoice: void 0, toolWarnings };
59232
59605
  }
59233
59606
  const openaiTools2 = [];
59234
- for (const tool21 of tools) {
59235
- switch (tool21.type) {
59607
+ for (const tool22 of tools) {
59608
+ switch (tool22.type) {
59236
59609
  case "function":
59237
59610
  openaiTools2.push({
59238
59611
  type: "function",
59239
- name: tool21.name,
59240
- description: tool21.description,
59241
- parameters: tool21.inputSchema,
59612
+ name: tool22.name,
59613
+ description: tool22.description,
59614
+ parameters: tool22.inputSchema,
59242
59615
  strict: strictJsonSchema
59243
59616
  });
59244
59617
  break;
59245
59618
  case "provider-defined": {
59246
- switch (tool21.id) {
59619
+ switch (tool22.id) {
59247
59620
  case "openai.file_search": {
59248
59621
  const args = yield validateTypes({
59249
- value: tool21.args,
59622
+ value: tool22.args,
59250
59623
  schema: fileSearchArgsSchema3
59251
59624
  });
59252
59625
  openaiTools2.push({
@@ -59269,7 +59642,7 @@ function prepareResponsesTools2(_0) {
59269
59642
  }
59270
59643
  case "openai.web_search_preview": {
59271
59644
  const args = yield validateTypes({
59272
- value: tool21.args,
59645
+ value: tool22.args,
59273
59646
  schema: webSearchPreviewArgsSchema2
59274
59647
  });
59275
59648
  openaiTools2.push({
@@ -59281,7 +59654,7 @@ function prepareResponsesTools2(_0) {
59281
59654
  }
59282
59655
  case "openai.web_search": {
59283
59656
  const args = yield validateTypes({
59284
- value: tool21.args,
59657
+ value: tool22.args,
59285
59658
  schema: webSearchArgsSchema2
59286
59659
  });
59287
59660
  openaiTools2.push({
@@ -59294,7 +59667,7 @@ function prepareResponsesTools2(_0) {
59294
59667
  }
59295
59668
  case "openai.code_interpreter": {
59296
59669
  const args = yield validateTypes({
59297
- value: tool21.args,
59670
+ value: tool22.args,
59298
59671
  schema: codeInterpreterArgsSchema2
59299
59672
  });
59300
59673
  openaiTools2.push({
@@ -59305,7 +59678,7 @@ function prepareResponsesTools2(_0) {
59305
59678
  }
59306
59679
  case "openai.image_generation": {
59307
59680
  const args = yield validateTypes({
59308
- value: tool21.args,
59681
+ value: tool22.args,
59309
59682
  schema: imageGenerationArgsSchema2
59310
59683
  });
59311
59684
  openaiTools2.push({
@@ -59329,7 +59702,7 @@ function prepareResponsesTools2(_0) {
59329
59702
  break;
59330
59703
  }
59331
59704
  default:
59332
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
59705
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
59333
59706
  break;
59334
59707
  }
59335
59708
  }
@@ -59434,7 +59807,7 @@ var OpenAIResponsesLanguageModel2 = class {
59434
59807
  }
59435
59808
  function hasOpenAITool(id) {
59436
59809
  return (tools == null ? void 0 : tools.find(
59437
- (tool21) => tool21.type === "provider-defined" && tool21.id === id
59810
+ (tool22) => tool22.type === "provider-defined" && tool22.id === id
59438
59811
  )) != null;
59439
59812
  }
59440
59813
  const topLogprobs = typeof (openaiOptions == null ? void 0 : openaiOptions.logprobs) === "number" ? openaiOptions == null ? void 0 : openaiOptions.logprobs : (openaiOptions == null ? void 0 : openaiOptions.logprobs) === true ? TOP_LOGPROBS_MAX2 : void 0;
@@ -59442,7 +59815,7 @@ var OpenAIResponsesLanguageModel2 = class {
59442
59815
  addInclude("message.output_text.logprobs");
59443
59816
  }
59444
59817
  const webSearchToolName = (_c = tools == null ? void 0 : tools.find(
59445
- (tool21) => tool21.type === "provider-defined" && (tool21.id === "openai.web_search" || tool21.id === "openai.web_search_preview")
59818
+ (tool22) => tool22.type === "provider-defined" && (tool22.id === "openai.web_search" || tool22.id === "openai.web_search_preview")
59446
59819
  )) == null ? void 0 : _c.name;
59447
59820
  if (webSearchToolName) {
59448
59821
  addInclude("web_search_call.action.sources");
@@ -60665,13 +61038,13 @@ function prepareTools6({
60665
61038
  return { tools: void 0, toolChoice: void 0, toolWarnings };
60666
61039
  }
60667
61040
  const groqTools2 = [];
60668
- for (const tool21 of tools) {
60669
- if (tool21.type === "provider-defined") {
60670
- if (tool21.id === "groq.browser_search") {
61041
+ for (const tool22 of tools) {
61042
+ if (tool22.type === "provider-defined") {
61043
+ if (tool22.id === "groq.browser_search") {
60671
61044
  if (!isBrowserSearchSupportedModel(modelId)) {
60672
61045
  toolWarnings.push({
60673
61046
  type: "unsupported-tool",
60674
- tool: tool21,
61047
+ tool: tool22,
60675
61048
  details: `Browser search is only supported on the following models: ${getSupportedModelsString()}. Current model: ${modelId}`
60676
61049
  });
60677
61050
  } else {
@@ -60680,15 +61053,15 @@ function prepareTools6({
60680
61053
  });
60681
61054
  }
60682
61055
  } else {
60683
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
61056
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
60684
61057
  }
60685
61058
  } else {
60686
61059
  groqTools2.push({
60687
61060
  type: "function",
60688
61061
  function: {
60689
- name: tool21.name,
60690
- description: tool21.description,
60691
- parameters: tool21.inputSchema
61062
+ name: tool22.name,
61063
+ description: tool22.description,
61064
+ parameters: tool22.inputSchema
60692
61065
  }
60693
61066
  });
60694
61067
  }
@@ -61740,16 +62113,16 @@ function prepareTools7({
61740
62113
  return { tools: void 0, toolChoice: void 0, toolWarnings };
61741
62114
  }
61742
62115
  const mistralTools = [];
61743
- for (const tool21 of tools) {
61744
- if (tool21.type === "provider-defined") {
61745
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
62116
+ for (const tool22 of tools) {
62117
+ if (tool22.type === "provider-defined") {
62118
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
61746
62119
  } else {
61747
62120
  mistralTools.push({
61748
62121
  type: "function",
61749
62122
  function: {
61750
- name: tool21.name,
61751
- description: tool21.description,
61752
- parameters: tool21.inputSchema
62123
+ name: tool22.name,
62124
+ description: tool22.description,
62125
+ parameters: tool22.inputSchema
61753
62126
  }
61754
62127
  });
61755
62128
  }
@@ -61769,7 +62142,7 @@ function prepareTools7({
61769
62142
  case "tool":
61770
62143
  return {
61771
62144
  tools: mistralTools.filter(
61772
- (tool21) => tool21.function.name === toolChoice.toolName
62145
+ (tool22) => tool22.function.name === toolChoice.toolName
61773
62146
  ),
61774
62147
  toolChoice: "any",
61775
62148
  toolWarnings
@@ -63552,10 +63925,10 @@ function prepareResponsesTools3({
63552
63925
  return { tools: void 0, toolChoice: void 0, toolWarnings };
63553
63926
  }
63554
63927
  const ollamaTools = [];
63555
- for (const tool21 of tools) {
63556
- switch (tool21.type) {
63928
+ for (const tool22 of tools) {
63929
+ switch (tool22.type) {
63557
63930
  case "function": {
63558
- let parameters = tool21.inputSchema;
63931
+ let parameters = tool22.inputSchema;
63559
63932
  if (!parameters) {
63560
63933
  parameters = {
63561
63934
  type: "object",
@@ -63571,15 +63944,15 @@ function prepareResponsesTools3({
63571
63944
  ollamaTools.push({
63572
63945
  type: "function",
63573
63946
  function: {
63574
- name: tool21.name,
63575
- description: tool21.description,
63947
+ name: tool22.name,
63948
+ description: tool22.description,
63576
63949
  parameters
63577
63950
  }
63578
63951
  });
63579
63952
  break;
63580
63953
  }
63581
63954
  default:
63582
- toolWarnings.push({ type: "unsupported-tool", tool: tool21 });
63955
+ toolWarnings.push({ type: "unsupported-tool", tool: tool22 });
63583
63956
  break;
63584
63957
  }
63585
63958
  }
@@ -64183,7 +64556,8 @@ var AISDKProvidersWithAPIKey = {
64183
64556
  togetherai: createTogetherAI,
64184
64557
  mistral: createMistral,
64185
64558
  deepseek: createDeepSeek,
64186
- perplexity: createPerplexity
64559
+ perplexity: createPerplexity,
64560
+ ollama: createOllama
64187
64561
  };
64188
64562
  var modelToProviderMap = {
64189
64563
  "gpt-4.1": "openai",
@@ -64220,7 +64594,8 @@ var modelToProviderMap = {
64220
64594
  "gemini-2.5-pro-preview-03-25": "google"
64221
64595
  };
64222
64596
  function getAISDKLanguageModel(subProvider, subModelName, clientOptions) {
64223
- if (clientOptions && Object.keys(clientOptions).length > 0) {
64597
+ const hasValidOptions = clientOptions && Object.values(clientOptions).some((v) => v !== void 0 && v !== null);
64598
+ if (hasValidOptions) {
64224
64599
  const creator = AISDKProvidersWithAPIKey[subProvider];
64225
64600
  if (!creator) {
64226
64601
  throw new UnsupportedAISDKModelProviderError(
@@ -64377,13 +64752,13 @@ var resolveTools = (clients, userTools) => __async(null, null, function* () {
64377
64752
  const clientTools = yield clientInstance.listTools({
64378
64753
  cursor: nextCursor
64379
64754
  });
64380
- for (const tool21 of clientTools.tools) {
64381
- tools[tool21.name] = {
64382
- description: tool21.description,
64383
- inputSchema: jsonSchemaToZod(tool21.inputSchema),
64755
+ for (const tool22 of clientTools.tools) {
64756
+ tools[tool22.name] = {
64757
+ description: tool22.description,
64758
+ inputSchema: jsonSchemaToZod(tool22.inputSchema),
64384
64759
  execute: (input) => __async(null, null, function* () {
64385
64760
  const result = yield clientInstance.callTool({
64386
- name: tool21.name,
64761
+ name: tool22.name,
64387
64762
  arguments: input
64388
64763
  });
64389
64764
  return result;
@@ -64406,6 +64781,7 @@ var AVAILABLE_CUA_MODELS = [
64406
64781
  "anthropic/claude-sonnet-4-20250514",
64407
64782
  "anthropic/claude-sonnet-4-5-20250929",
64408
64783
  "google/gemini-2.5-computer-use-preview-10-2025",
64784
+ "google/gemini-3-flash-computer-use",
64409
64785
  "microsoft/fara-7b"
64410
64786
  ];
64411
64787
 
@@ -64459,6 +64835,7 @@ __export(api_exports, {
64459
64835
  ReplayPageSchema: () => ReplayPageSchema,
64460
64836
  ReplayResponseSchema: () => ReplayResponseSchema,
64461
64837
  ReplayResultSchema: () => ReplayResultSchema,
64838
+ SessionEndRequestSchema: () => SessionEndRequestSchema,
64462
64839
  SessionEndResponseSchema: () => SessionEndResponseSchema,
64463
64840
  SessionEndResultSchema: () => SessionEndResultSchema,
64464
64841
  SessionHeadersSchema: () => SessionHeadersSchema,
@@ -64718,6 +65095,11 @@ var SessionStartResponseSchema = wrapResponse(
64718
65095
  SessionStartResultSchema,
64719
65096
  "SessionStartResponse"
64720
65097
  );
65098
+ var SessionEndRequestSchema = import_v4107.z.object({
65099
+ // Dummy property to ensure Stainless generates body parameter
65100
+ // The server accepts {} (this field should be omitted)
65101
+ _forceBody: import_v4107.z.undefined().optional()
65102
+ }).strict().meta({ id: "SessionEndRequest" });
64721
65103
  var SessionEndResultSchema = import_v4107.z.object({}).strict().meta({ id: "SessionEndResult" });
64722
65104
  var SessionEndResponseSchema = import_v4107.z.object({
64723
65105
  success: import_v4107.z.boolean().meta({
@@ -65182,7 +65564,7 @@ init_response2();
65182
65564
  init_sdkErrors();
65183
65565
 
65184
65566
  // examples/external_clients/aisdk.ts
65185
- var import_ai22 = require("ai");
65567
+ var import_ai23 = require("ai");
65186
65568
  var AISdkClient2 = class extends LLMClient {
65187
65569
  constructor({ model }) {
65188
65570
  super(model.modelId);
@@ -65244,7 +65626,7 @@ var AISdkClient2 = class extends LLMClient {
65244
65626
  }
65245
65627
  );
65246
65628
  if (options.response_model) {
65247
- const response2 = yield (0, import_ai22.generateObject)({
65629
+ const response2 = yield (0, import_ai23.generateObject)({
65248
65630
  model: this.model,
65249
65631
  messages: formattedMessages,
65250
65632
  schema: options.response_model.schema
@@ -65267,7 +65649,7 @@ var AISdkClient2 = class extends LLMClient {
65267
65649
  inputSchema: rawTool.parameters
65268
65650
  };
65269
65651
  }
65270
- const response = yield (0, import_ai22.generateText)({
65652
+ const response = yield (0, import_ai23.generateText)({
65271
65653
  model: this.model,
65272
65654
  messages: formattedMessages,
65273
65655
  tools
@@ -66629,6 +67011,9 @@ function validateExperimentalFeatures(options) {
66629
67011
  if ((executeOptions == null ? void 0 : executeOptions.excludeTools) && executeOptions.excludeTools.length > 0) {
66630
67012
  unsupportedFeatures.push("excludeTools");
66631
67013
  }
67014
+ if (executeOptions == null ? void 0 : executeOptions.output) {
67015
+ unsupportedFeatures.push("output schema");
67016
+ }
66632
67017
  if (unsupportedFeatures.length > 0) {
66633
67018
  throw new StagehandInvalidArgumentError(
66634
67019
  `${unsupportedFeatures.join(", ")} ${unsupportedFeatures.length === 1 ? "is" : "are"} not supported with CUA (Computer Use Agent) mode.`
@@ -66658,6 +67043,9 @@ function validateExperimentalFeatures(options) {
66658
67043
  if (executeOptions.excludeTools && executeOptions.excludeTools.length > 0) {
66659
67044
  features.push("excludeTools");
66660
67045
  }
67046
+ if (executeOptions.output) {
67047
+ features.push("output schema");
67048
+ }
66661
67049
  }
66662
67050
  if (features.length > 0) {
66663
67051
  throw new ExperimentalNotConfiguredError(`Agent ${features.join(", ")}`);
@@ -67440,6 +67828,7 @@ var _V3 = class _V3 {
67440
67828
  );
67441
67829
  }
67442
67830
  const page = yield this.resolvePage(options == null ? void 0 : options.page);
67831
+ const actCacheLlmClient = (options == null ? void 0 : options.model) ? this.resolveLlmClient(options.model) : void 0;
67443
67832
  let actCacheContext = null;
67444
67833
  const canUseCache = typeof input === "string" && !this.isAgentReplayRecording() && this.actCache.enabled;
67445
67834
  if (canUseCache) {
@@ -67452,7 +67841,8 @@ var _V3 = class _V3 {
67452
67841
  const cachedResult = yield this.actCache.tryReplay(
67453
67842
  actCacheContext,
67454
67843
  page,
67455
- options == null ? void 0 : options.timeout
67844
+ options == null ? void 0 : options.timeout,
67845
+ actCacheLlmClient
67456
67846
  );
67457
67847
  if (cachedResult) {
67458
67848
  this.addToHistory(
@@ -67855,7 +68245,13 @@ var _V3 = class _V3 {
67855
68245
  configSignature: agentConfigSignature,
67856
68246
  page: yield this.ctx.awaitActivePage()
67857
68247
  }) : null;
67858
- return { handler, resolvedOptions, instruction, cacheContext };
68248
+ return {
68249
+ handler,
68250
+ resolvedOptions,
68251
+ instruction,
68252
+ cacheContext,
68253
+ llmClient: agentLlmClient
68254
+ };
67859
68255
  });
67860
68256
  }
67861
68257
  agent(options) {
@@ -68005,13 +68401,16 @@ Do not ask follow up questions, the user will trust your judgement.`
68005
68401
  args: [instructionOrOptions]
68006
68402
  });
68007
68403
  if (isStreaming) {
68008
- const { handler: handler2, resolvedOptions: resolvedOptions2, cacheContext: cacheContext2 } = yield this.prepareAgentExecution(
68404
+ const { handler: handler2, resolvedOptions: resolvedOptions2, cacheContext: cacheContext2, llmClient: llmClient2 } = yield this.prepareAgentExecution(
68009
68405
  options,
68010
68406
  instructionOrOptions,
68011
68407
  agentConfigSignature
68012
68408
  );
68013
68409
  if (cacheContext2) {
68014
- const replayed = yield this.agentCache.tryReplayAsStream(cacheContext2);
68410
+ const replayed = yield this.agentCache.tryReplayAsStream(
68411
+ cacheContext2,
68412
+ llmClient2
68413
+ );
68015
68414
  if (replayed) {
68016
68415
  SessionFileLogger.logAgentTaskCompleted({ cacheHit: true });
68017
68416
  return replayed;
@@ -68034,13 +68433,16 @@ Do not ask follow up questions, the user will trust your judgement.`
68034
68433
  SessionFileLogger.logAgentTaskCompleted();
68035
68434
  return streamResult;
68036
68435
  }
68037
- const { handler, resolvedOptions, cacheContext } = yield this.prepareAgentExecution(
68436
+ const { handler, resolvedOptions, cacheContext, llmClient } = yield this.prepareAgentExecution(
68038
68437
  options,
68039
68438
  instructionOrOptions,
68040
68439
  agentConfigSignature
68041
68440
  );
68042
68441
  if (cacheContext) {
68043
- const replayed = yield this.agentCache.tryReplay(cacheContext);
68442
+ const replayed = yield this.agentCache.tryReplay(
68443
+ cacheContext,
68444
+ llmClient
68445
+ );
68044
68446
  if (replayed) {
68045
68447
  SessionFileLogger.logAgentTaskCompleted({ cacheHit: true });
68046
68448
  return replayed;
@@ -68333,6 +68735,9 @@ I'm providing ${screenshots.length} screenshots showing the progression of the t
68333
68735
  });
68334
68736
  }
68335
68737
  };
68738
+
68739
+ // lib/v3/index.ts
68740
+ var import_ai24 = require("ai");
68336
68741
  // Annotate the CommonJS export names for ESM import in node:
68337
68742
  0 && (module.exports = {
68338
68743
  AISdkClient,
@@ -68415,6 +68820,7 @@ I'm providing ${screenshots.length} screenshots showing the progression of the t
68415
68820
  providerEnvVarMap,
68416
68821
  toGeminiSchema,
68417
68822
  toJsonSchema,
68823
+ tool,
68418
68824
  transformSchema,
68419
68825
  trimTrailingTextNode,
68420
68826
  validateZodSchema