@browserbasehq/browse-cli 0.4.0 → 0.4.1-alpha-3917df4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +72 -34
  2. package/package.json +3 -2
package/dist/index.js CHANGED
@@ -100088,6 +100088,18 @@ Return the element that matches the instruction if it exists. Otherwise, return
100088
100088
  content: [content, buildUserInstructionsString(userProvidedInstructions)].filter(Boolean).join("\n\n")
100089
100089
  };
100090
100090
  }
100091
+ function buildActVariablesPrompt(variables) {
100092
+ if (!variables || Object.keys(variables).length === 0) {
100093
+ return "";
100094
+ }
100095
+ const variableNames = Object.keys(variables).map((key) => `%${key}%`).join(", ");
100096
+ return ` The user has provided the following variables to be used in the action: ${variableNames}
100097
+
100098
+ Note that these are the variable names/keys, and not the actual variable values.
100099
+
100100
+ To use the variables in the action, you must respond with the variable name inside the 'arguments' array. The variable name must be wrapped in percentage signs (eg, %variableNameHere%) so that it can be replaced with the actual variable value before the action is taken.
100101
+ `;
100102
+ }
100091
100103
  function buildActPrompt(action, supportedActions, variables) {
100092
100104
  let instruction = `Find the most relevant element to perform an action on given the following action: ${action}.
100093
100105
  IF AND ONLY IF the action EXPLICITLY includes the word 'dropdown' and implies choosing/selecting an option from a dropdown, ignore the 'General Instructions' section, and follow the 'Dropdown Specific Instructions' section carefully.
@@ -100114,11 +100126,7 @@ function buildActPrompt(action, supportedActions, variables) {
100114
100126
  - choose the 'click' method
100115
100127
  - set twoStep to true.
100116
100128
  `;
100117
- if (variables && Object.keys(variables).length > 0) {
100118
- const variableNames = Object.keys(variables).map((key) => `%${key}%`).join(", ");
100119
- const variablesPrompt = `The following variables are available to use in the action: ${variableNames}. Fill the argument variables with the variable name.`;
100120
- instruction += ` ${variablesPrompt}`;
100121
- }
100129
+ instruction += buildActVariablesPrompt(variables);
100122
100130
  return instruction;
100123
100131
  }
100124
100132
  function buildStepTwoPrompt(originalUserAction, previousAction, supportedActions, variables) {
@@ -100136,11 +100144,7 @@ function buildStepTwoPrompt(originalUserAction, previousAction, supportedActions
100136
100144
  If the user is asking to scroll to the next chunk/previous chunk, choose the nextChunk/prevChunk method. No arguments are required here.
100137
100145
  If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument \u2014 e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.
100138
100146
  `;
100139
- if (variables && Object.keys(variables).length > 0) {
100140
- const variableNames = Object.keys(variables).map((key) => `%${key}%`).join(", ");
100141
- const variablesPrompt = `The following variables are available to use in the action: ${variableNames}. Fill the argument variables with the variable name.`;
100142
- instruction += ` ${variablesPrompt}`;
100143
- }
100147
+ instruction += buildActVariablesPrompt(variables);
100144
100148
  return instruction;
100145
100149
  }
100146
100150
  function buildGoogleCUASystemPrompt() {
@@ -100448,7 +100452,7 @@ async function observe({ instruction, domElements, llmClient, userProvidedInstru
100448
100452
  async function act({ instruction, domElements, llmClient, userProvidedInstructions, logger, logInferenceToFile = false }) {
100449
100453
  const isGPT5 = llmClient.modelName.includes("gpt-5");
100450
100454
  const actSchema = external_exports.object({
100451
- elementId: external_exports.string().regex(/^\d+-\d+$/).describe("the ID string associated with the element. Never include surrounding square brackets. This field must follow the format of 'number-number'."),
100455
+ elementId: external_exports.string().regex(/^\d+-\d+$/).describe("the ID string associated with the element. Never include surrounding square brackets. This field must follow the format of 'number-number'. for example, '0-76' or '16-21'"),
100452
100456
  description: external_exports.string().describe("a description of the accessible element and its purpose"),
100453
100457
  method: external_exports.enum(
100454
100458
  // Use Object.values() for Zod v3 compatibility - z.enum() in v3 doesn't accept TypeScript enums directly
@@ -119787,10 +119791,13 @@ var AISdkClient2 = class extends LLMClient {
119787
119791
  type = "aisdk";
119788
119792
  model;
119789
119793
  logger;
119790
- constructor({ model, logger }) {
119794
+ constructor({ model, logger, clientOptions }) {
119791
119795
  super(model.modelId);
119792
119796
  this.model = model;
119793
119797
  this.logger = logger;
119798
+ if (clientOptions) {
119799
+ this.clientOptions = clientOptions;
119800
+ }
119794
119801
  }
119795
119802
  getLanguageModel() {
119796
119803
  return this.model;
@@ -119868,9 +119875,11 @@ var AISdkClient2 = class extends LLMClient {
119868
119875
  let objectResponse;
119869
119876
  const isGPT5 = this.model.modelId.includes("gpt-5");
119870
119877
  const isCodex = this.model.modelId.includes("codex");
119871
- const usesLowReasoningEffort = (this.model.modelId.includes("gpt-5.1") || this.model.modelId.includes("gpt-5.2")) && !isCodex;
119872
119878
  const isKimi = this.model.modelId.includes("kimi");
119873
119879
  const temperature = isKimi ? 1 : options.temperature;
119880
+ const isGPT5SubModel = this.model.modelId.includes("gpt-5.") && !isCodex;
119881
+ const userReasoningEffort = this.clientOptions?.reasoningEffort;
119882
+ const resolvedReasoningEffort = userReasoningEffort ?? (isGPT5SubModel ? "none" : void 0);
119874
119883
  const PROMPT_JSON_FALLBACK_PATTERNS = ["deepseek", "kimi", "glm"];
119875
119884
  const needsPromptJsonFallback = PROMPT_JSON_FALLBACK_PATTERNS.some((p2) => this.model.modelId.includes(p2));
119876
119885
  if (options.response_model) {
@@ -119899,11 +119908,10 @@ You must respond in JSON format. respond WITH JSON. Do not include any other tex
119899
119908
  messages: formattedMessages,
119900
119909
  schema: options.response_model.schema,
119901
119910
  temperature,
119902
- providerOptions: isGPT5 ? {
119911
+ providerOptions: resolvedReasoningEffort ? {
119903
119912
  openai: {
119904
- textVerbosity: isCodex ? "medium" : "low",
119905
- // codex models only support 'medium'
119906
- reasoningEffort: isCodex ? "medium" : usesLowReasoningEffort ? "low" : "minimal"
119913
+ ...isGPT5 ? { textVerbosity: isCodex ? "medium" : "low" } : {},
119914
+ reasoningEffort: resolvedReasoningEffort
119907
119915
  }
119908
119916
  } : void 0
119909
119917
  });
@@ -152792,27 +152800,34 @@ var modelToProviderMap = {
152792
152800
  "gemini-2.5-flash-preview-04-17": "google",
152793
152801
  "gemini-2.5-pro-preview-03-25": "google"
152794
152802
  };
152795
- function getAISDKLanguageModel(subProvider, subModelName, clientOptions) {
152803
+ function getAISDKLanguageModel(subProvider, subModelName, clientOptions, middleware) {
152796
152804
  const hasValidOptions = clientOptions && Object.values(clientOptions).some((v2) => v2 !== void 0 && v2 !== null);
152805
+ let model;
152797
152806
  if (hasValidOptions) {
152798
152807
  const creator = AISDKProvidersWithAPIKey[subProvider];
152799
152808
  if (!creator) {
152800
152809
  throw new UnsupportedAISDKModelProviderError(subProvider, Object.keys(AISDKProvidersWithAPIKey));
152801
152810
  }
152802
152811
  const provider = creator(clientOptions);
152803
- return provider(subModelName);
152812
+ model = provider(subModelName);
152804
152813
  } else {
152805
152814
  const provider = AISDKProviders[subProvider];
152806
152815
  if (!provider) {
152807
152816
  throw new UnsupportedAISDKModelProviderError(subProvider, Object.keys(AISDKProviders));
152808
152817
  }
152809
- return provider(subModelName);
152818
+ model = provider(subModelName);
152810
152819
  }
152820
+ if (middleware) {
152821
+ return wrapLanguageModel({ model, middleware });
152822
+ }
152823
+ return model;
152811
152824
  }
152812
152825
  var LLMProvider = class {
152813
152826
  logger;
152814
- constructor(logger) {
152827
+ middleware;
152828
+ constructor(logger, middleware) {
152815
152829
  this.logger = logger;
152830
+ this.middleware = middleware;
152816
152831
  }
152817
152832
  getClient(modelName, clientOptions, options) {
152818
152833
  if (modelName.includes("/")) {
@@ -152822,10 +152837,12 @@ var LLMProvider = class {
152822
152837
  if (subProvider === "vertex" && !options?.disableAPI && !options?.experimental) {
152823
152838
  throw new ExperimentalNotConfiguredError("Vertex provider");
152824
152839
  }
152825
- const languageModel = getAISDKLanguageModel(subProvider, subModelName, clientOptions);
152840
+ const effectiveMiddleware = options?.middleware ?? this.middleware;
152841
+ const languageModel = getAISDKLanguageModel(subProvider, subModelName, clientOptions, effectiveMiddleware);
152826
152842
  return new AISdkClient2({
152827
152843
  model: languageModel,
152828
- logger: this.logger
152844
+ logger: this.logger,
152845
+ clientOptions
152829
152846
  });
152830
152847
  }
152831
152848
  const provider = modelToProviderMap[modelName];
@@ -160612,7 +160629,9 @@ function hasInjectableDOM(url2) {
160612
160629
  return false;
160613
160630
  }
160614
160631
  function isNonWebTarget(info) {
160615
- return info.type !== "page" && info.type !== "iframe" || !hasInjectableDOM(info.url);
160632
+ if (info.type === "page")
160633
+ return false;
160634
+ return info.type !== "iframe" || !hasInjectableDOM(info.url);
160616
160635
  }
160617
160636
  function isTopLevelPage(info) {
160618
160637
  const ti = info;
@@ -162828,13 +162847,14 @@ function resolveModelConfiguration(model) {
162828
162847
  return { modelName: model };
162829
162848
  }
162830
162849
  if (model && typeof model === "object") {
162831
- const { modelName, ...clientOptions } = model;
162850
+ const { modelName, middleware, ...clientOptions } = model;
162832
162851
  if (!modelName) {
162833
162852
  throw new StagehandInvalidArgumentError("model.modelName is required when providing client options.");
162834
162853
  }
162835
162854
  return {
162836
162855
  modelName,
162837
- clientOptions
162856
+ clientOptions,
162857
+ middleware
162838
162858
  };
162839
162859
  }
162840
162860
  return { modelName: DEFAULT_MODEL_NAME };
@@ -163009,11 +163029,11 @@ var V3 = (() => {
163009
163029
  }
163010
163030
  } catch {
163011
163031
  }
163012
- const { modelName, clientOptions } = resolveModelConfiguration(opts.model);
163032
+ const { modelName, clientOptions, middleware } = resolveModelConfiguration(opts.model);
163013
163033
  this.modelName = modelName;
163014
163034
  this.experimental = opts.experimental ?? false;
163015
163035
  this.logInferenceToFile = opts.logInferenceToFile ?? false;
163016
- this.llmProvider = new LLMProvider(this.logger);
163036
+ this.llmProvider = new LLMProvider(this.logger, middleware);
163017
163037
  this.domSettleTimeoutMs = opts.domSettleTimeout;
163018
163038
  this.disableAPI = opts.disableAPI ?? false;
163019
163039
  const baseClientOptions = clientOptions ? { ...clientOptions } : {};
@@ -163095,14 +163115,16 @@ var V3 = (() => {
163095
163115
  }
163096
163116
  let modelName;
163097
163117
  let clientOptions;
163118
+ let perCallMiddleware;
163098
163119
  if (typeof model === "string") {
163099
163120
  modelName = model;
163100
163121
  } else {
163101
- const { modelName: overrideModelName, ...rest } = model;
163122
+ const { modelName: overrideModelName, middleware, ...rest } = model;
163102
163123
  modelName = overrideModelName;
163103
163124
  clientOptions = rest;
163125
+ perCallMiddleware = middleware;
163104
163126
  }
163105
- if (modelName === this.modelName && (!clientOptions || Object.keys(clientOptions).length === 0)) {
163127
+ if (modelName === this.modelName && !perCallMiddleware && (!clientOptions || Object.keys(clientOptions).length === 0)) {
163106
163128
  return this.llmClient;
163107
163129
  }
163108
163130
  const overrideProvider = String(modelName).split("/")[0];
@@ -163118,6 +163140,13 @@ var V3 = (() => {
163118
163140
  mergedOptions.apiKey = apiKey;
163119
163141
  }
163120
163142
  }
163143
+ if (perCallMiddleware) {
163144
+ return this.llmProvider.getClient(modelName, mergedOptions, {
163145
+ experimental: this.experimental,
163146
+ disableAPI: this.disableAPI,
163147
+ middleware: perCallMiddleware
163148
+ });
163149
+ }
163121
163150
  const cacheKey = JSON.stringify({
163122
163151
  modelName,
163123
163152
  clientOptions: mergedOptions
@@ -163126,7 +163155,10 @@ var V3 = (() => {
163126
163155
  if (cached2) {
163127
163156
  return cached2;
163128
163157
  }
163129
- const client = this.llmProvider.getClient(modelName, mergedOptions, { experimental: this.experimental, disableAPI: this.disableAPI });
163158
+ const client = this.llmProvider.getClient(modelName, mergedOptions, {
163159
+ experimental: this.experimental,
163160
+ disableAPI: this.disableAPI
163161
+ });
163130
163162
  this.overrideLlmClients.set(cacheKey, client);
163131
163163
  return client;
163132
163164
  }
@@ -164620,7 +164652,7 @@ var import_child_process4 = require("child_process");
164620
164652
  var readline = __toESM(require("readline"));
164621
164653
 
164622
164654
  // package.json
164623
- var version3 = "0.4.0";
164655
+ var version3 = "0.4.1";
164624
164656
 
164625
164657
  // src/resolve-ws.ts
164626
164658
  init_cjs_shims();
@@ -164645,6 +164677,7 @@ async function resolveWsTarget(input) {
164645
164677
  }
164646
164678
 
164647
164679
  // src/index.ts
164680
+ var import_node_html_markdown = require("node-html-markdown");
164648
164681
  var program = new import_commander.Command();
164649
164682
  var SOCKET_DIR = os3.tmpdir();
164650
164683
  function getSocketPath(session) {
@@ -165104,7 +165137,7 @@ async function runDaemon(session, headless) {
165104
165137
  } : {},
165105
165138
  ...!connectSessionId ? {
165106
165139
  browserbaseSessionCreateParams: {
165107
- userMetadata: { "browse-cli": "true" },
165140
+ userMetadata: { browse_cli: "true" },
165108
165141
  ...contextConfig ? {
165109
165142
  browserSettings: {
165110
165143
  context: contextConfig
@@ -165466,6 +165499,11 @@ async function executeCommand(context, command, args, stagehand) {
165466
165499
  return {
165467
165500
  checked: await page.deepLocator(resolveSelector(selector)).isChecked()
165468
165501
  };
165502
+ case "markdown": {
165503
+ const target = selector ? resolveSelector(selector) : "body";
165504
+ const html = await page.deepLocator(target).innerHtml();
165505
+ return { markdown: import_node_html_markdown.NodeHtmlMarkdown.translate(html) };
165506
+ }
165469
165507
  default:
165470
165508
  throw new Error(`Unknown get type: ${what}`);
165471
165509
  }
@@ -166395,7 +166433,7 @@ program.command("highlight <selector>").description("Highlight element").option(
166395
166433
  }
166396
166434
  });
166397
166435
  program.command("get <what> [selector]").description(
166398
- "Get page info: url, title, text, html, value, box, visible, checked"
166436
+ "Get page info: url, title, text, html, markdown, value, box, visible, checked"
166399
166437
  ).action(async (what, selector) => {
166400
166438
  const opts = program.opts();
166401
166439
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@browserbasehq/browse-cli",
3
- "version": "0.4.0",
3
+ "version": "0.4.1-alpha-3917df4",
4
4
  "description": "Browser automation CLI for AI agents, built on Stagehand",
5
5
  "type": "commonjs",
6
6
  "license": "MIT",
@@ -44,10 +44,11 @@
44
44
  "dependencies": {
45
45
  "commander": "^12.0.0",
46
46
  "dotenv": "^16.4.5",
47
+ "node-html-markdown": "^1.3.0",
47
48
  "pino": "^9.6.0",
48
49
  "pino-pretty": "^13.0.0",
49
50
  "ws": "^8.18.0",
50
- "@browserbasehq/stagehand": "3.2.1-alpha-be6798aefdf44d2d2d275a4b9cf3d4473609515b"
51
+ "@browserbasehq/stagehand": "3.2.0"
51
52
  },
52
53
  "devDependencies": {
53
54
  "@types/node": "^20.11.30",