npm - @browserbasehq/stagehand - Versions diffs - 1.4.0 → 1.5.0 - Mend

@browserbasehq/stagehand 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/lib/cache/ActionCache.ts +158 -0
package/lib/cache/BaseCache.ts +553 -0
package/lib/cache/LLMCache.ts +48 -0
package/lib/cache.ts +99 -0
package/lib/dom/build/index.js +626 -0
package/lib/dom/build/scriptContent.ts +1 -0
package/lib/dom/debug.ts +147 -0
package/lib/dom/genDomScripts.ts +29 -0
package/lib/dom/global.d.ts +25 -0
package/lib/dom/index.ts +3 -0
package/lib/dom/process.ts +441 -0
package/lib/dom/utils.ts +17 -0
package/lib/dom/xpathUtils.ts +246 -0
package/lib/handlers/actHandler.ts +1421 -0
package/lib/handlers/extractHandler.ts +179 -0
package/lib/handlers/observeHandler.ts +170 -0
package/lib/index.ts +900 -0
package/lib/inference.ts +324 -0
package/lib/llm/AnthropicClient.ts +314 -0
package/lib/llm/LLMClient.ts +66 -0
package/lib/llm/LLMProvider.ts +81 -0
package/lib/llm/OpenAIClient.ts +206 -0
package/lib/prompt.ts +341 -0
package/lib/utils.ts +16 -0
package/lib/vision.ts +299 -0
package/package.json +3 -3

package/lib/llm/LLMProvider.ts ADDED Viewed

@@ -0,0 +1,81 @@
+import { OpenAIClient } from "./OpenAIClient";
+import { AnthropicClient } from "./AnthropicClient";
+import { LLMClient } from "./LLMClient";
+import { LLMCache } from "../cache/LLMCache";
+import { LogLine } from "../../types/log";
+import {
+  AvailableModel,
+  ModelProvider,
+  ClientOptions,
+} from "../../types/model";
+export class LLMProvider {
+  private modelToProviderMap: { [key in AvailableModel]: ModelProvider } = {
+    "gpt-4o": "openai",
+    "gpt-4o-mini": "openai",
+    "gpt-4o-2024-08-06": "openai",
+    "claude-3-5-sonnet-latest": "anthropic",
+    "claude-3-5-sonnet-20240620": "anthropic",
+    "claude-3-5-sonnet-20241022": "anthropic",
+  };
+  private logger: (message: LogLine) => void;
+  private enableCaching: boolean;
+  private cache: LLMCache | undefined;
+  constructor(logger: (message: LogLine) => void, enableCaching: boolean) {
+    this.logger = logger;
+    this.enableCaching = enableCaching;
+    this.cache = enableCaching ? new LLMCache(logger) : undefined;
+  }
+  cleanRequestCache(requestId: string): void {
+    if (!this.enableCaching) {
+      return;
+    }
+    this.logger({
+      category: "llm_cache",
+      message: "cleaning up cache",
+      level: 1,
+      auxiliary: {
+        requestId: {
+          value: requestId,
+          type: "string",
+        },
+      },
+    });
+    this.cache.deleteCacheForRequestId(requestId);
+  }
+  getClient(
+    modelName: AvailableModel,
+    clientOptions?: ClientOptions,
+  ): LLMClient {
+    const provider = this.modelToProviderMap[modelName];
+    if (!provider) {
+      throw new Error(`Unsupported model: ${modelName}`);
+    }
+    switch (provider) {
+      case "openai":
+        return new OpenAIClient(
+          this.logger,
+          this.enableCaching,
+          this.cache,
+          modelName,
+          clientOptions,
+        );
+      case "anthropic":
+        return new AnthropicClient(
+          this.logger,
+          this.enableCaching,
+          this.cache,
+          modelName,
+          clientOptions,
+        );
+      default:
+        throw new Error(`Unsupported provider: ${provider}`);
+    }
+  }
+}

package/lib/llm/OpenAIClient.ts ADDED Viewed

@@ -0,0 +1,206 @@
+import OpenAI, { ClientOptions } from "openai";
+import { zodResponseFormat } from "openai/helpers/zod";
+import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat";
+import { LogLine } from "../../types/log";
+import { AvailableModel } from "../../types/model";
+import { LLMCache } from "../cache/LLMCache";
+import { ChatCompletionOptions, ChatMessage, LLMClient } from "./LLMClient";
+export class OpenAIClient extends LLMClient {
+  private client: OpenAI;
+  private cache: LLMCache | undefined;
+  public logger: (message: LogLine) => void;
+  private enableCaching: boolean;
+  private clientOptions: ClientOptions;
+  constructor(
+    logger: (message: LogLine) => void,
+    enableCaching = false,
+    cache: LLMCache | undefined,
+    modelName: AvailableModel,
+    clientOptions?: ClientOptions,
+  ) {
+    super(modelName);
+    this.client = new OpenAI(clientOptions);
+    this.logger = logger;
+    this.cache = cache;
+    this.enableCaching = enableCaching;
+    this.modelName = modelName;
+  }
+  async createChatCompletion(options: ChatCompletionOptions) {
+    const { image: _, ...optionsWithoutImage } = options;
+    this.logger({
+      category: "openai",
+      message: "creating chat completion",
+      level: 1,
+      auxiliary: {
+        options: {
+          value: JSON.stringify(optionsWithoutImage),
+          type: "object",
+        },
+        modelName: {
+          value: this.modelName,
+          type: "string",
+        },
+      },
+    });
+    const cacheOptions = {
+      model: this.modelName,
+      messages: options.messages,
+      temperature: options.temperature,
+      top_p: options.top_p,
+      frequency_penalty: options.frequency_penalty,
+      presence_penalty: options.presence_penalty,
+      image: options.image,
+      response_model: options.response_model,
+    };
+    if (this.enableCaching) {
+      const cachedResponse = await this.cache.get(
+        cacheOptions,
+        options.requestId,
+      );
+      if (cachedResponse) {
+        this.logger({
+          category: "llm_cache",
+          message: "LLM cache hit - returning cached response",
+          level: 1,
+          auxiliary: {
+            requestId: {
+              value: options.requestId,
+              type: "string",
+            },
+            cachedResponse: {
+              value: JSON.stringify(cachedResponse),
+              type: "object",
+            },
+          },
+        });
+        return cachedResponse;
+      } else {
+        this.logger({
+          category: "llm_cache",
+          message: "LLM cache miss - no cached response found",
+          level: 1,
+          auxiliary: {
+            requestId: {
+              value: options.requestId,
+              type: "string",
+            },
+          },
+        });
+      }
+    }
+    if (options.image) {
+      const screenshotMessage: ChatMessage = {
+        role: "user",
+        content: [
+          {
+            type: "image_url",
+            image_url: {
+              url: `data:image/jpeg;base64,${options.image.buffer.toString("base64")}`,
+            },
+          },
+          ...(options.image.description
+            ? [{ type: "text", text: options.image.description }]
+            : []),
+        ],
+      };
+      options.messages = [...options.messages, screenshotMessage];
+    }
+    const { image, response_model, requestId, ...openAiOptions } = {
+      ...options,
+      model: this.modelName,
+    };
+    let responseFormat = undefined;
+    if (options.response_model) {
+      responseFormat = zodResponseFormat(
+        options.response_model.schema,
+        options.response_model.name,
+      );
+    }
+    this.logger({
+      category: "openai",
+      message: "creating chat completion",
+      level: 1,
+      auxiliary: {
+        openAiOptions: {
+          value: JSON.stringify(openAiOptions),
+          type: "object",
+        },
+      },
+    });
+    const response = await this.client.chat.completions.create({
+      ...openAiOptions,
+      response_format: responseFormat,
+    } as unknown as ChatCompletionCreateParamsNonStreaming); // TODO (kamath): remove this forced typecast
+    this.logger({
+      category: "openai",
+      message: "response",
+      level: 1,
+      auxiliary: {
+        response: {
+          value: JSON.stringify(response),
+          type: "object",
+        },
+        requestId: {
+          value: options.requestId,
+          type: "string",
+        },
+      },
+    });
+    if (response_model) {
+      const extractedData = response.choices[0].message.content;
+      const parsedData = JSON.parse(extractedData);
+      if (this.enableCaching) {
+        this.cache.set(
+          cacheOptions,
+          {
+            ...parsedData,
+          },
+          options.requestId,
+        );
+      }
+      return {
+        ...parsedData,
+      };
+    }
+    if (this.enableCaching) {
+      this.logger({
+        category: "llm_cache",
+        message: "caching response",
+        level: 1,
+        auxiliary: {
+          requestId: {
+            value: options.requestId,
+            type: "string",
+          },
+          cacheOptions: {
+            value: JSON.stringify(cacheOptions),
+            type: "object",
+          },
+          response: {
+            value: JSON.stringify(response),
+            type: "object",
+          },
+        },
+      });
+      this.cache.set(cacheOptions, response, options.requestId);
+    }
+    return response;
+  }
+}

package/lib/prompt.ts ADDED Viewed

@@ -0,0 +1,341 @@
+import OpenAI from "openai";
+import { ChatMessage } from "./llm/LLMClient";
+// act
+const actSystemPrompt = `
+# Instructions
+You are a browser automation assistant. Your job is to accomplish the user's goal across multiple model calls by running playwright commands.
+## Input
+You will receive:
+1. the user's overall goal
+2. the steps that you've taken so far
+3. a list of active DOM elements in this chunk to consider to get closer to the goal.
+4. Optionally, a list of variable names that the user has provided that you may use to accomplish the goal. To use the variables, you must use the special <|VARIABLE_NAME|> syntax.
+## Your Goal / Specification
+You have 2 tools that you can call: doAction, and skipSection. Do action only performs Playwright actions. Do exactly what the user's goal is. Do not perform any other actions or exceed the scope of the goal.
+If the user's goal will be accomplished after running the playwright action, set completed to true. Better to have completed set to true if your are not sure.
+Note: If there is a popup on the page for cookies or advertising that has nothing to do with the goal, try to close it first before proceeding. As this can block the goal from being completed.
+Again, if the user's goal will be accomplished after running the playwright action, set completed to true.
+`;
+const verifyActCompletionSystemPrompt = `
+You are a browser automation assistant. The job has given you a goal and a list of steps that have been taken so far. Your job is to determine if the user's goal has been completed based on the provided information.
+# Input
+You will receive:
+1. The user's goal: A clear description of what the user wants to achieve.
+2. Steps taken so far: A list of actions that have been performed up to this point.
+3. An image of the current page
+# Your Task
+Analyze the provided information to determine if the user's goal has been fully completed.
+# Output
+Return a boolean value:
+- true: If the goal has been definitively completed based on the steps taken and the current page.
+- false: If the goal has not been completed or if there's any uncertainty about its completion.
+# Important Considerations
+- False positives are okay. False negatives are not okay.
+- Look for evidence of errors on the page or something having gone wrong in completing the goal. If one does not exist, return true.
+`;
+// ## Examples for completion check
+// ### Example 1
+// 1. User's goal: "input data scientist into role"
+// 2. Steps you've taken so far: "The role input field was filled with 'data scientist'."
+// 3. Active DOM elements: ["<input id="c9" class="VfPpkd-fmcmS-wGMbrd " aria-expanded="false" data-axe="mdc-autocomplete">data scientist</input>", "<button class="VfPpkd-LgbsSe VfPpkd-LgbsSe-OWXEXe-INsAgc lJ9FBc nDgy9d" type="submit">Search</button>"]
+// Output: Will need to have completed set to true. Nothing else matters.
+// Reasoning: The goal the user set has already been accomplished. We should not take any extra actions outside of the scope of the goal (for example, clicking on the search button is an invalid action - ie: not acceptable).
+// ### Example 2
+// 1. User's goal: "Sign up for the newsletter"
+// 2. Steps you've taken so far: ["The email input field was filled with 'test@test.com'."]
+// 3. Active DOM elements: ["<input type='email' id='newsletter-email' placeholder='Enter your email'></input>", "<button id='subscribe-button'>Subscribe</button>"]
+// Output: Will need to have click on the subscribe button as action. And completed set to false.
+// Reasoning: There might be an error when trying to submit the form and you need to make sure the goal is accomplished properly. So you set completed to false.
+export function buildVerifyActCompletionSystemPrompt(): ChatMessage {
+  return {
+    role: "system",
+    content: verifyActCompletionSystemPrompt,
+  };
+}
+export function buildVerifyActCompletionUserPrompt(
+  goal: string,
+  steps = "None",
+  domElements: string | undefined,
+): ChatMessage {
+  let actUserPrompt = `
+# My Goal
+${goal}
+# Steps You've Taken So Far
+${steps}
+`;
+  if (domElements) {
+    actUserPrompt += `
+# Active DOM Elements on the current page
+${domElements}
+`;
+  }
+  return {
+    role: "user",
+    content: actUserPrompt,
+  };
+}
+export function buildActSystemPrompt(): ChatMessage {
+  return {
+    role: "system",
+    content: actSystemPrompt,
+  };
+}
+export function buildActUserPrompt(
+  action: string,
+  steps = "None",
+  domElements: string,
+  variables?: Record<string, string>,
+): ChatMessage {
+  let actUserPrompt = `
+# My Goal
+${action}
+# Steps You've Taken So Far
+${steps}
+# Current Active Dom Elements
+${domElements}
+`;
+  if (variables && Object.keys(variables).length > 0) {
+    actUserPrompt += `
+# Variables
+${Object.entries(variables)
+  .map(([key, value]) => `<|${key.toUpperCase()}|>`)
+  .join("\n")}
+`;
+  }
+  return {
+    role: "user",
+    content: actUserPrompt,
+  };
+}
+export const actTools: Array<OpenAI.ChatCompletionTool> = [
+  {
+    type: "function",
+    function: {
+      name: "doAction",
+      description:
+        "execute the next playwright step that directly accomplishes the goal",
+      parameters: {
+        type: "object",
+        required: ["method", "element", "args", "step", "completed"],
+        properties: {
+          method: {
+            type: "string",
+            description: "The playwright function to call.",
+          },
+          element: {
+            type: "number",
+            description: "The element number to act on",
+          },
+          args: {
+            type: "array",
+            description: "The required arguments",
+            items: {
+              type: "string",
+              description: "The argument to pass to the function",
+            },
+          },
+          step: {
+            type: "string",
+            description:
+              "human readable description of the step that is taken in the past tense. Please be very detailed.",
+          },
+          why: {
+            type: "string",
+            description:
+              "why is this step taken? how does it advance the goal?",
+          },
+          completed: {
+            type: "boolean",
+            description:
+              "true if the goal should be accomplished after this step",
+          },
+        },
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "skipSection",
+      description:
+        "skips this area of the webpage because the current goal cannot be accomplished here",
+      parameters: {
+        type: "object",
+        properties: {
+          reason: {
+            type: "string",
+            description: "reason that no action is taken",
+          },
+        },
+      },
+    },
+  },
+];
+// extract
+const extractSystemPrompt = `You are extracting content on behalf of a user. You will be given:
+1. An instruction
+2. A list of DOM elements to extract from
+Print the exact text from the DOM elements with all symbols, characters, and endlines as is.
+Print null or an empty string if no new information is found.
+ONLY print the content using the print_extracted_data tool provided.
+ONLY print the content using the print_extracted_data tool provided.
+`;
+export function buildExtractSystemPrompt(): ChatMessage {
+  const content = extractSystemPrompt.replace(/\s+/g, " ");
+  return {
+    role: "system",
+    content,
+  };
+}
+export function buildExtractUserPrompt(
+  instruction: string,
+  domElements: string,
+): ChatMessage {
+  return {
+    role: "user",
+    content: `Instruction: ${instruction}
+DOM: ${domElements}
+ONLY print the content using the print_extracted_data tool provided.
+ONLY print the content using the print_extracted_data tool provided.`,
+  };
+}
+const refineSystemPrompt = `You are tasked with refining and filtering information for the final output based on newly extracted and previously extracted content. Your responsibilities are:
+1. Remove exact duplicates for elements in arrays and objects.
+2. For text fields, append or update relevant text if the new content is an extension, replacement, or continuation.
+3. For non-text fields (e.g., numbers, booleans), update with new values if they differ.
+4. Add any completely new fields or objects.
+Return the updated content that includes both the previous content and the new, non-duplicate, or extended information.`;
+export function buildRefineSystemPrompt(): ChatMessage {
+  return {
+    role: "system",
+    content: refineSystemPrompt,
+  };
+}
+export function buildRefineUserPrompt(
+  instruction: string,
+  previouslyExtractedContent: object,
+  newlyExtractedContent: object,
+): ChatMessage {
+  return {
+    role: "user",
+    content: `Instruction: ${instruction}
+Previously extracted content: ${JSON.stringify(previouslyExtractedContent, null, 2)}
+Newly extracted content: ${JSON.stringify(newlyExtractedContent, null, 2)}
+Refined content:`,
+  };
+}
+const metadataSystemPrompt = `You are an AI assistant tasked with evaluating the progress and completion status of an extraction task.
+Analyze the extraction response and determine if the task is completed or if more information is needed.
+Strictly abide by the following criteria:
+1. Once the instruction has been satisfied by the current extraction response, ALWAYS set completion status to true and stop processing, regardless of remaining chunks.
+2. Only set completion status to false if BOTH of these conditions are true:
+   - The instruction has not been satisfied yet
+   - There are still chunks left to process (chunksTotal > chunksSeen)`;
+export function buildMetadataSystemPrompt(): ChatMessage {
+  return {
+    role: "system",
+    content: metadataSystemPrompt,
+  };
+}
+export function buildMetadataPrompt(
+  instruction: string,
+  extractionResponse: object,
+  chunksSeen: number,
+  chunksTotal: number,
+): ChatMessage {
+  return {
+    role: "user",
+    content: `Instruction: ${instruction}
+Extracted content: ${JSON.stringify(extractionResponse, null, 2)}
+chunksSeen: ${chunksSeen}
+chunksTotal: ${chunksTotal}`,
+  };
+}
+// observe
+const observeSystemPrompt = `
+You are helping the user automate the browser by finding elements based on what the user wants to observe in the page.
+You will be given:
+1. a instruction of elements to observe
+2. a numbered list of possible elements or an annotated image of the page
+Return an array of elements that match the instruction.
+`;
+export function buildObserveSystemPrompt(): ChatMessage {
+  const content = observeSystemPrompt.replace(/\s+/g, " ");
+  return {
+    role: "system",
+    content,
+  };
+}
+export function buildObserveUserMessage(
+  instruction: string,
+  domElements: string,
+): ChatMessage {
+  return {
+    role: "user",
+    content: `instruction: ${instruction}
+DOM: ${domElements}`,
+  };
+}
+// ask
+const askSystemPrompt = `
+you are a simple question answering assistent given the user's question. respond with only the answer.
+`;
+export function buildAskSystemPrompt(): ChatMessage {
+  return {
+    role: "system",
+    content: askSystemPrompt,
+  };
+}
+export function buildAskUserPrompt(question: string): ChatMessage {
+  return {
+    role: "user",
+    content: `question: ${question}`,
+  };
+}

package/lib/utils.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import crypto from "crypto";
+import { LogLine } from "../types/log";
+export function generateId(operation: string) {
+  return crypto.createHash("sha256").update(operation).digest("hex");
+}
+export function logLineToString(logLine: LogLine): string {
+  const timestamp = logLine.timestamp || new Date().toISOString();
+  if (logLine.auxiliary?.error) {
+    return `${timestamp}::[stagehand:${logLine.category}] ${logLine.message}\n ${logLine.auxiliary.error.value}\n ${logLine.auxiliary.trace.value}`;
+  }
+  return `${timestamp}::[stagehand:${logLine.category}] ${logLine.message} ${
+    logLine.auxiliary ? JSON.stringify(logLine.auxiliary) : ""
+  }`;
+}