npm - @shiplightai/sdk - Versions diffs - 0.1.1 - Mend

@shiplightai/sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +280 -0
package/dist/agentHelpers-UCLT5EKK.js +1 -0
package/dist/agentLogin-ARB3NEO4.js +1 -0
package/dist/chunk-6H2NJBNL.js +1 -0
package/dist/chunk-GDTCZALZ.js +192 -0
package/dist/chunk-GPZJYXUG.js +3880 -0
package/dist/chunk-KFC5I6R5.js +14 -0
package/dist/chunk-QIBDXB3J.js +22 -0
package/dist/chunk-UFLZ3URR.js +1 -0
package/dist/chunk-UHZTPBZ3.js +197 -0
package/dist/chunk-YR4E7JSB.js +3 -0
package/dist/handler-TPOFKKIB.js +1 -0
package/dist/index.d.ts +446 -0
package/dist/index.js +44 -0
package/dist/task-57MAWXLN.js +190 -0
package/package.json +76 -0

package/README.md ADDED Viewed

@@ -0,0 +1,280 @@
+# @shiplightai/sdk
+A companion SDK for Playwright that makes your tests resilient to UI changes like dynamic IDs, layout rearrangements, and styling updates.
+## Installation
+```bash
+npm install @shiplightai/sdk playwright
+```
+## Quick Start
+```typescript
+import { chromium } from 'playwright';
+import { createAgent, configureSdk } from '@shiplightai/sdk';
+// Configure SDK with API key (call once at startup)
+configureSdk({
+  env: { GOOGLE_API_KEY: process.env.GOOGLE_API_KEY },
+});
+// Create an agent
+const agent = createAgent({
+  model: 'gemini-2.5-pro',
+});
+// Use with Playwright
+const browser = await chromium.launch();
+const page = await browser.newPage();
+// Login using the Sauce Labs demo site (public test site)
+await agent.login(page, {
+  url: 'https://www.saucedemo.com/',
+  username: 'standard_user',
+  password: 'secret_sauce',
+});
+// Verify login succeeded
+await agent.assert(page, 'Products page is visible');
+// Extract data from the page
+await agent.extract(page, 'the first product name', 'productName');
+console.log('First product:', agent.getVariable('productName'));
+await browser.close();
+```
+## Custom Actions
+Extend the agent with custom actions for your specific use case:
+```typescript
+import { createAgent, z } from '@shiplightai/sdk';
+const agent = createAgent({ model: 'gemini-2.5-pro' });
+// Register a custom action
+agent.registerAction({
+  name: 'extract_email_code',
+  description: 'Extract verification code from email inbox',
+  schema: z.object({
+    email_address: z.string().describe('The email address to check'),
+    code_type: z.enum(['verification', 'reset']).describe('Type of code'),
+  }),
+  async execute(args, ctx) {
+    // Your custom logic here
+    const code = await myEmailService.getCode(args.email_address, args.code_type);
+    // Store the result for later use
+    ctx.variableStore.set('verification_code', code);
+    return { success: true, message: `Found code: ${code}` };
+  },
+});
+// The agent will automatically use your action when needed
+await agent.act(page, 'Get the verification code from email');
+await agent.act(page, 'Enter {{ verification_code }} in the input field');
+```
+## API Reference
+### `createAgent(options)`
+Create a new agent instance.
+```typescript
+const agent = createAgent({
+  // Required: LLM model to use
+  model: 'gemini-2.5-pro',
+  // Optional: Initial variables
+  variables: { username: 'test@example.com' },
+  // Optional: Keys to mark as sensitive (masked in logs)
+  sensitiveKeys: ['password', 'apiKey'],
+  // Optional: Directory for test data files
+  testDataDir: './test-data',
+  // Optional: Directory for downloads
+  downloadDir: './downloads',
+  // Optional: Self-healing strategy ('none' | 'single' | 'multi')
+  selfHealingStrategy: 'single',
+});
+```
+### Supported Models
+| Provider | Model | Notes |
+|----------|-------|-------|
+| Google | `gemini-2.5-pro` | Recommended, requires `GOOGLE_API_KEY` |
+| Google | `gemini-3-pro-preview` | More powerful, higher cost |
+| OpenAI | `computer-use-preview` | Optional, for vision operations (drag and drop) |
+### `agent.registerAction(action)`
+Register a custom action.
+```typescript
+agent.registerAction({
+  // Unique action name (snake_case recommended)
+  name: 'my_action',
+  // Description for the agent
+  description: 'What this action does and when to use it',
+  // Zod schema for parameters
+  schema: z.object({
+    param1: z.string().describe('Description of the parameter'),
+    param2: z.number().optional(),
+  }),
+  // Execute function
+  async execute(args, ctx) {
+    // args: validated parameters
+    // ctx.page: Playwright page
+    // ctx.variableStore: access to variables
+    return { success: true, message: 'Optional status message' };
+  },
+});
+```
+### `agent.act(page, instruction)`
+Perform a single action on the page.
+```typescript
+await agent.act(page, 'Click the login button');
+await agent.act(page, 'Fill the email field with test@example.com');
+await agent.act(page, 'Select "Express" from shipping dropdown');
+```
+### `agent.run(page, instruction, options?)`
+Run a multi-step instruction until the goal is achieved.
+```typescript
+await agent.run(page, 'Complete the checkout process');
+await agent.run(page, 'Fill out the entire registration form');
+await agent.run(page, 'Add 3 items to cart', { maxSteps: 10 });
+```
+### `agent.assert(page, statement)`
+Assert a condition (throws on failure).
+```typescript
+await agent.assert(page, 'Login button is visible');
+await agent.assert(page, 'Cart contains 3 items');
+```
+### `agent.evaluate(page, statement)`
+Evaluate a condition (returns boolean, doesn't throw).
+```typescript
+const isLoggedIn = await agent.evaluate(page, 'User is logged in');
+if (!isLoggedIn) {
+  await agent.run(page, 'Click the login button');
+}
+```
+### `agent.extract(page, description, variableName)`
+Extract data from the page and store in a variable.
+```typescript
+await agent.extract(page, 'the order total', 'orderTotal');
+await agent.run(page, 'Verify {{ orderTotal }} is displayed on receipt');
+```
+### `agent.waitUntil(page, condition, timeoutSeconds?)`
+Wait until a condition becomes true.
+```typescript
+await agent.waitUntil(page, 'Loading spinner is no longer visible');
+const appeared = await agent.waitUntil(page, 'Table shows at least 5 rows', 30);
+if (!appeared) {
+  throw new Error('Data did not load in time');
+}
+```
+## Custom Action Context
+The `execute` function receives a context object:
+```typescript
+interface IActionExecutionContext {
+  // Playwright page instance
+  page: Page;
+  // Variable store for reading/writing variables
+  variableStore: VariableStore;
+}
+```
+### Using Variables
+```typescript
+async execute(args, ctx) {
+  // Read a variable
+  const email = ctx.variableStore.get('email');
+  // Set a variable
+  ctx.variableStore.set('result', 'some value');
+  // Set a sensitive variable (masked in logs)
+  ctx.variableStore.set('token', secretToken, true);
+  return { success: true };
+}
+```
+## SDK Configuration
+Configure SDK-wide settings before creating agents:
+```typescript
+import { configureSdk, getSdkConfig, LogLevel } from '@shiplightai/sdk';
+configureSdk({
+  // Log level: DEBUG, INFO, WARN, ERROR
+  logLevel: LogLevel.INFO,
+  // Enable detailed agent logging
+  debugAgent: false,
+  // Environment variables (API keys)
+  env: {
+    // Required: Google API key for Gemini models
+    GOOGLE_API_KEY: 'your-google-api-key',
+    // Optional: OpenAI API key for vision operations
+    // OPENAI_API_KEY: 'sk-...',
+  },
+  // Optional: paths for logs and results
+  agentLogPath: './logs/agent.log',
+  testResultsJsonPath: './results.json',
+});
+// Read current config
+const config = getSdkConfig();
+```
+## Environment Variables
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `GOOGLE_API_KEY` | Yes | Google AI API key for Gemini models |
+| `OPENAI_API_KEY` | No | OpenAI API key (for vision operations) |
+## License
+MIT

package/dist/agentHelpers-UCLT5EKK.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ import{b as t,c as o,d as r,e as p,f as i}from"./chunk-GDTCZALZ.js";import"./chunk-GPZJYXUG.js";import"./chunk-YR4E7JSB.js";import"./chunk-KFC5I6R5.js";import"./chunk-UFLZ3URR.js";export{p as evaluateStatement,t as executeAction,r as executeStep,o as generateActionStep,i as runTask};

package/dist/agentLogin-ARB3NEO4.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ import{b as o,d as t,e as i,f as r,g as e,h as a,i as n}from"./chunk-UHZTPBZ3.js";import"./chunk-6H2NJBNL.js";import"./chunk-KFC5I6R5.js";import"./chunk-UFLZ3URR.js";export{o as LoginType,t as checkLocators,e as createUnsignedInContext,n as generateAndValidateLoginLocators,a as generateValidationLocators,r as validateLogin,i as validateLoginLocators};

package/dist/chunk-6H2NJBNL.js ADDED Viewed

@@ -0,0 +1 @@

+ var g=.5*1e3,h=30*1e3,y=1*1e3;async function v(o,d){let n=new Set,i=Date.now(),w=new Set(["document","stylesheet","image","font","script","iframe"]),l=new Set(["text/html","text/css","application/javascript","image/","font/","application/json"]),r=["analytics","tracking","telemetry","beacon","metrics","doubleclick","adsystem","adserver","advertising","facebook.com/plugins","platform.twitter","linkedin.com/embed","livechat","zendesk","intercom","crisp.chat","hotjar","push-notifications","onesignal","pushwoosh","heartbeat","ping","alive","webrtc","rtmp://","wss://","cloudfront.net","fastly.net"],m=t=>{let e=t.toLowerCase();return e.startsWith("data:")||e.startsWith("blob:")?!0:r.some(s=>e.includes(s))},p=t=>{let e=t.resourceType(),s=t.url();if(!w.has(e)||["websocket","media","eventsource","manifest","other"].includes(e)||m(s))return;let a=t.headers();a.purpose==="prefetch"||["video","audio"].includes(a["sec-fetch-dest"])||(n.add(t),i=Date.now())},u=async t=>{let e=t.request();if(!n.has(e))return;let s=(t.headers()["content-type"]||"").toLowerCase();if(["streaming","video","audio","webm","mp4","event-stream","websocket","protobuf"].some(c=>s.includes(c))){n.delete(e);return}if(![...l].some(c=>s.startsWith(c))){n.delete(e);return}let f=t.headers()["content-length"];if(f)try{if(parseInt(f,10)>5*1024*1024){n.delete(e);return}}catch{}n.delete(e),i=Date.now()};o.on("request",p),o.on("response",u);try{let t=Date.now(),e=y;for(;;){await new Promise(a=>setTimeout(a,100));let s=Date.now();if(n.size===0&&s-i>=e||s-t>d)break}}finally{o.removeListener("request",p),o.removeListener("response",u)}}async function P(o,d=h,n=g){let i=Date.now();try{await v(o,d)}catch(r){throw r instanceof Error?new Error(`Failed during network stabilization: ${r.message}`):new Error("An unknown error occurred during network stabilization.")}let w=Date.now()-i,l=Math.max(n-w,0);l>0&&await new Promise(r=>setTimeout(r,l))}export{P as a};

package/dist/chunk-GDTCZALZ.js ADDED Viewed

@@ -0,0 +1,192 @@
+import{a as L,b as W,c as M,d as Y}from"./chunk-GPZJYXUG.js";import{a as ce,g as le,i as ue}from"./chunk-YR4E7JSB.js";import{a as v}from"./chunk-KFC5I6R5.js";import{g as ae}from"./chunk-UFLZ3URR.js";import{zodToJsonSchema as be}from"zod-to-json-schema";function V(e){let t=be(e,{$refStrategy:"none"});if(t.$schema&&delete t.$schema,t.type!=="object")throw new Error(`Schema must be a Zod object schema, got type: ${t.type}`);return H(t),t}function H(e){if(!(typeof e!="object"||e===null)){if(e.type==="object"&&(e.additionalProperties=!1,e.properties)){let t=Object.keys(e.properties);e.required=t;for(let n of Object.values(e.properties))H(n)}e.type==="array"&&e.items&&H(e.items);for(let t of["anyOf","oneOf","allOf"])Array.isArray(e[t])&&e[t].forEach(H);for(let[t,n]of Object.entries(e))typeof n=="object"&&n!==null&&!["properties","items","anyOf","oneOf","allOf"].includes(t)&&H(n)}}var pe=class{constructor(e){this.registry=e}getToolDefinitions(){return this.registry.getTools().filter(e=>e.availability.openai).map(e=>({type:"function",function:{name:e.name,description:e.description,parameters:V(e.schema),strict:!0}}))}getToolDefinitionsFiltered(e){let t=new Set(e);return this.registry.getTools().filter(n=>t.has(n.name)&&n.availability.openai).map(n=>({type:"function",function:{name:n.name,description:n.description,parameters:V(n.schema),strict:!0}}))}toJSON(){return this.getToolDefinitions()}getToolDefinition(e){let t=this.registry.get(e);if(t)return{type:"function",function:{name:t.name,description:t.description,parameters:V(t.schema),strict:!0}}}getToolCount(){return this.registry.size()}getToolNames(){return this.registry.getToolNames()}};function ve(e,t){let n=new Map;for(let c of t)c.hash&&c.url?n.set(c.hash,c.url):c.uuid&&c.url&&n.set(c.uuid,c.url);let o=/!\[([^\]]*)\]\(image:([a-f0-9]{64}|[a-zA-Z0-9\-]+)\)/g,r=[],i=0,a;for(;(a=o.exec(e))!==null;){let c=e.slice(i,a.index);c&&r.push({type:"text",text:c});let u=a[2];n.has(u)?r.push({type:"image",image:new URL(n.get(u))}):r.push({type:"text",text:a[0]}),i=a.index+a[0].length}let s=e.slice(i);return s&&r.push({type:"text",text:s}),r.length===0&&r.push({type:"text",text:e}),r}function we(e){let t=[];for(let n of e){let o=n.content||"",r=n.images||[],i=ve(o,r);t.length>0&&i.length>0&&t.push({type:"text",text:`
+`}),t.push(...i)}return t}var me=!1;function q(e,t=me){if(t){let n=we(e);if(n.length===0)return[];let o={type:"text",text:`
+<retrieved_knowledge>
+Below are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:
+`},r={type:"text",text:`
+</retrieved_knowledge>
+`};return[o,...n,r]}else{let o=e.filter(s=>!s.images||s.images.length===0).map(s=>s.content||"").filter(s=>s.length>0).join(`
+`);return o?[{type:"text",text:`
+<retrieved_knowledge>
+Below are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:
+`},{type:"text",text:o},{type:"text",text:`
+</retrieved_knowledge>
+`}]:[]}}function de(e,t=me){if(!t)return 0;let n=0;for(let o of e)o.images&&(n+=o.images.length);return n}function xe(){let e=new Date,n=new Intl.DateTimeFormat("en-US",{timeZone:"America/Los_Angeles",year:"numeric",month:"2-digit",day:"2-digit",hour:"2-digit",minute:"2-digit",second:"2-digit",hour12:!1,timeZoneName:"short"}).formatToParts(e),o=n.find(l=>l.type==="year").value,r=n.find(l=>l.type==="month").value,i=n.find(l=>l.type==="day").value,a=n.find(l=>l.type==="hour").value,s=n.find(l=>l.type==="minute").value,c=n.find(l=>l.type==="second").value,u=n.find(l=>l.type==="timeZoneName").value,p=String(e.getMilliseconds()).padStart(3,"0");return`${o}-${r}-${i} ${a}:${s}:${c}.${p} ${u}`}function he(e){return`# Your Role
+You are part of a end-to-end testing system that is designed to automate the testing of a website. Given an instruction in natural language, your job is to translate it into an action in the predefined actions. The instruction might not match any action in the predefined actions or might require to interact with an element that is not on the page. It's your job to detect these cases and return an empty action.
+# Rules
+## Action Selection Rules
+- If the instruction requires a specific action, you must select that action. If no action matches the specific action, you must return an empty action so that testing system can aware of the situation.
+- If asked to do nothing or ignore the instruction or something similar, you must select \`wait\` action of 1 second.
+- If asked to verify something, you must select \`verify\` action.
+- If asked to do accurate interaction, like selecting a specific chunk of text or drawing a bounding box, you must select \`perform_accurate_operation\` action.
+- If asked to scroll, you decide if you need to \`scroll\` the page or \`scroll_on_element\`. also you need to calculate how much to scroll.
+## Element Selection Rules
+- If the instruction requires to interact with a specific element, you must select that element.
+- If no element matches the specific element, you must return an empty action so that testing system can aware of the situation. NEVER click on alternative elements as a workaround. NEVER try to navigate to find the element (e.g. by scrolling, closing modals, clicking other buttons, or refreshing the page).
+- Fail fast: If the exact target element is not visible on the current page, return an empty action immediately. The testing system will handle recovery.
+- The type of the selected element doesn't have to match the target, for example, if the instruction requires to interact with an image but no image element matches, you can select a div that contains the image.
+## Instruction Completion Analysis Rules
+- Reasoning about the instruction completion is critical. You must analyze the instruction and your action to determine if your action will complete the instruction.
+## Response Format Rules
+- Respond using valid JSON format, which can be parsed by python json.loads():
+{
+    "thought": "...", // step by step reasoning of your decision making process
+    "description": "...", // detailed description of the action to be performed. (e.g. click on the 'Submit' button to submit the form)
+    "action": {"one_action_name": {// action-specific parameter}},
+    "completes_instruction": true/false // boolean indicating whether this action completes the given instruction. Set to false if the action is only partial, requires follow-up actions, or cannot fully complete the instruction.
+}
+Follow the rules above strictly.
+# Action Space
+${e}
+# Examples
+Example of \`verify\` action:
+instruction: "Verify that the page title is 'Home'"
+{
+    "thought": "I understand the instruction is to verify that the page title is 'Home'. I will use the \`verify\` action to verify the page title.", // Do not verify it yourself, just translate the instruction to the \`verify\` action
+    "description": "Verify that the page title is 'Home'",
+    "action": {"verify": {"statement": "the page title is 'Home'"}}, // the statement should be the same wording as the instruction, don't rephrase it
+    "completes_instruction": true // this action fully completes the instruction
+}
+Example of \`save_variable\` action:
+instruction: "Extract and save the page title as page_title"
+{
+    "thought": "I understand the instruction is to save the page title as page_title. The current page title is 'Home'. I will use the \`save_variable\` action to save the page title.",
+    "description": "Save the page title as variable page_title",
+    "action": {"save_variable": {"name": "page_title", "value": "Home"}}, // the value should be the same wording as the instruction, don't rephrase it
+    "completes_instruction": true // this action fully completes the instruction
+}
+Example of empty action when the target element is not on the page, or the instruction cannot be completed for any reason:
+{
+    "thought": "The user wants me to click the 'Create Entry' button. However, the current page is a sign-in page and the only interactive element is the 'Sign In' button. The 'Create Entry' button is not present on the page. The previous attempt to click this button also failed. Therefore, I cannot complete the instruction and will return an empty action.",
+    "description": "Click the 'Create Entry' button.",
+    "action": {}, // empty action object to indicate the instruction cannot be completed
+    "completes_instruction": false
+}
+`}function Se(e){if(!e||e.length===0)return"";let t="";return e.forEach(([n,o],r)=>{t+=`(${r+1}) Description: ${n}
+    Feedback: ${o}
+`}),`## Additional context
+You just executed following steps in order:
+${t}`}function fe(e,t,n,o,r,i,a=!1,s=xe(),c=!1,u){let p=[],l=`
+# Instruction
+"${t}"
+# Current webpage state
+## Tab information:
+${e.currentTabText}Available tabs:
+${e.tabsText}
+## Element interaction guidelines:
+   - Only use indexes that exist in the provided element list
+   - Each element has a unique index number (e.g., "[33]<button>")
+   - The bounding box and index of each element is marked on the screenshot.
+   - Elements marked with "[]Non-interactive text" are non-interactive (for context only)
+   - Elements are indented to show the structure of the element tree, with indentation level indicating depth
+   - When considering an element, also consider its children elements
+   - If an element is scrollable, it will be marked with "(SCROLLABLE)" (e.g., "[33](SCROLLABLE)<ul>"), use the \`scroll_on_element\` action to scroll on the element.
+## Interactive elements from current page:
+${e.elementsText}
+    `;if(p.push({type:"text",text:l}),a&&e.slicedScreenshotsBase64)for(let h of e.slicedScreenshotsBase64)p.push({type:"text",text:"The following images provided are sliced screenshots of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box."}),p.push({type:"image",image:h});else i&&(p.push({type:"text",text:"The following image provided is a screenshot of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box."}),p.push({type:"image",image:i}));if(r&&r.length>0){let h=q(r,c);p.push(...h)}let d="";if(d+=`
+Current local time is ${s}.
+`,n&&Object.keys(n).length>0){let h=[];for(let w of Object.keys(n))if(u?.has(w))h.push(`  - ${w}: [SENSITIVE - value hidden]`);else{let x=n[w],E=typeof x=="string"?x:JSON.stringify(x);h.push(`  - ${w}: "${E}"`)}d+=`
+## Available Data Placeholders
+The following placeholders are available for use in your actions:
+${h.join(`
+`)}
+To use them, write Jinja-like template syntax: {{ placeholder_name }}
+- Use the EXACT placeholder name as shown above
+- Do NOT use the actual value directly
+- The values shown are for context only to help you understand what data is available
+- In action descriptions, describe what the placeholder represents in natural language (e.g., "Type the first user name" instead of "Type {{ firstUserName }}")
+`}if(o&&o.length>0){let h=Se(o);d+=`
+`+h}return d+=`
+Based on the above information, please determine the right action to accomplish the task.
+`,p.push({type:"text",text:d}),p}import Te from"openai";import Ae from"sharp";async function ke(e,t,n){await e.evaluate(()=>{let p=document.getElementById("playwright-highlight-container");p&&p.remove(),window._highlightCleanupFunctions&&(window._highlightCleanupFunctions.forEach(l=>l()),window._highlightCleanupFunctions=[])});let o=await e.screenshot({type:"png",fullPage:!1}),r=Ae(o),i=await r.metadata(),a=i.width||0,s=i.height||0;v.log(`Screenshot actual dimensions: ${a}x${s}, viewport: ${t}x${n}`);let c;return a!==t||s!==n?(v.log(`Resizing screenshot from ${a}x${s} to ${t}x${n}`),c=await r.resize(t,n).png().toBuffer()):c=o,c.toString("base64")}async function oe(e,t,n){let o=await e.evaluateHandle(c=>document.elementFromPoint(c.x,c.y),{x:t,y:n});if(!o)throw new Error(`No element found at (${t}, ${n})`);let r=o.asElement();if(!r)throw await o.dispose(),new Error(`No element found at (${t}, ${n})`);let i=await r.boundingBox();if(!i)throw await o.dispose(),new Error("Element has no bounding box");let a=t-(i.x+i.width/2),s=n-(i.y+i.height/2);return{relative_x:a,relative_y:s,element:r}}async function Ee(e,t){return t?{xpath:void 0,locator:(t?await le(e,t):null)||void 0,frame_path:[]}:{xpath:void 0,locator:void 0,frame_path:[]}}async function _e(e,t){let n=null,o=null;switch(t.type){case"click":{let i=t.button==="right"?"right_click_by_coordinates":"click_by_coordinates";if(t.x===void 0||t.y===void 0)break;let a=await oe(e,t.x,t.y);n={action_name:i,kwargs:{relative_x:a.relative_x,relative_y:a.relative_y}},o=a.element;break}case"double_click":{if(t.x===void 0||t.y===void 0)break;let i=await oe(e,t.x,t.y);n={action_name:"double_click_by_coordinates",kwargs:{relative_x:i.relative_x,relative_y:i.relative_y}},o=i.element;break}case"drag":{if(!t.path||t.path.length<2)break;let i=t.path[0].x,a=t.path[0].y,s=t.path[1].x,c=t.path[1].y,u=await oe(e,i,a);n={action_name:"drag_drop",kwargs:{relative_x:u.relative_x,relative_y:u.relative_y,delta_x:s-i,delta_y:c-a}},o=u.element;break}}let r=await Ee(e,o);return{action_data:n,locatorInfo:r}}async function D(e,t,n={}){try{let{page:o}=t,r=o.viewportSize();if(!r)return{status:"error",error:"Viewport size not available"};let i=r.width,a=r.height;v.log(`Viewport size: ${i}x${a}`);let s=await ke(o,i,a),u=ae().env?.OPENAI_API_KEY;if(!u)return{status:"error",error:"OpenAI API key not found. Set OPENAI_API_KEY environment variable or configure via SDK config."};let p=new Te({apiKey:u});v.log("Sending request to OpenAI CUA...");let d=[{role:"user",content:[{type:"input_text",text:`
+You will be given an action to execute and screenshot of the current screen.
+Output one computer_call object that will accomplish this action.
+Action: ${e}
+`},{type:"input_image",detail:"auto",image_url:`data:image/png;base64,${s}`}]}],h=await p.responses.create({model:"computer-use-preview",tools:[{type:"computer-preview",display_width:i,display_height:a,environment:"browser"}],input:d,truncation:"auto",temperature:.1});v.log("Received response from OpenAI CUA");let A=h.output.filter(_=>_.type==="computer_call")[0]||null;if(!A)return{status:"error",reasoning:h.output_text||""||"Invalid action generation response",error:"No computer_call action generated"};let x=A.action;v.log(`Generated action: ${JSON.stringify(x)}`);let{action_data:E,locatorInfo:P}=await _e(o,x);return E?{status:"success",actionEntity:{action_description:e,action_data:E,locator:P.locator||void 0,xpath:P.xpath||void 0,frame_path:P.frame_path},reasoning:"Action generated successfully using pure vision mode",goalAccomplished:!0}:{status:"error",error:"Failed to map action to ActionDataEntity"}}catch(o){return v.error("Error generating CUA action",o),{status:"error",error:o.message||"Failed to generate action with pure vision"}}}async function $e(e){let t=e.context().pages(),n=null,o=[];for(let a=0;a<t.length;a++){let s=t[a];s===e&&(n=a);let c="(title unavailable)";try{c=await Promise.race([s.title(),new Promise((p,l)=>setTimeout(()=>l(new Error("timeout")),1e3))])}catch{}let u=`Tab ${a}: ${s.url()}`;c&&(u+=` - ${c.slice(0,50)}`),o.push(u)}let r=o.length>0?o.join(`
+`):"";return{currentTabText:n!==null?`Current tab: ${n}
+`:"",tabsText:r}}async function Ie(e,t){let{currentTabText:n,tabsText:o}=await $e(e);return{elementsText:t,currentUrl:e.url(),currentTitle:await e.title(),currentTabText:n,tabsText:o}}async function G(e,t){let{page:n,domService:o,agentServices:r}=e,i=typeof t=="boolean"?{useCleanScreenshot:t}:t||{},a=r.getInteractiveClassNames(),{domState:s,screenshotBase64:c,slicedScreenshotsBase64:u}=await o.getClickableElementsWithScreenshot(n,{interactiveClassNames:a,useCleanScreenshot:i.useCleanScreenshot,useSlicedScreenshots:i.useSlicedScreenshots,resizeSlicedScreenshots:i.resizeSlicedScreenshots,useAccessibilityTree:i.useAccessibilityTree,actionIntent:i.actionIntent}),p=s.elementTree.clickableElementsToString(),l=await Ie(n,p);return u&&(l.slicedScreenshotsBase64=u),{domTree:p,screenshotBase64:c,slicedScreenshotsBase64:u,domState:s,pageContext:l}}function J(e,t){return e?{prompt_tokens:e.promptTokens||e.inputTokens||0,completion_tokens:e.completionTokens||e.outputTokens||0,total_tokens:e.totalTokens||0,model:t}:null}import{generateText as Oe,Output as Pe}from"ai";import{z as Z}from"zod";function Ce(e){let t=e.toLowerCase();return[/\b(type|enter|input|fill|write|set)\b/,/\b(text|value|field|box)\b.*\b(to|with|as)\b/,/\b(username|password|email|search|query)\b/].some(i=>i.test(t))?"input":[/\bscroll\b/,/\b(scroll|swipe)\s*(up|down|left|right)\b/,/\b(page|move)\s*(down|up)\b/].some(i=>i.test(t))?"scroll":[/\b(click|tap|press|select|choose|pick|check|toggle)\b/,/\b(open|close|submit|confirm|cancel|dismiss)\b/,/\b(button|link|menu|dropdown|checkbox|radio)\b/].some(i=>i.test(t))?"click":"all"}function Ne(e){if(!e)return null;if(e instanceof URL)return e.href;if(typeof e=="object"&&e.href)return String(e.href);if(typeof e=="object"&&typeof e.toString=="function"){let t=e.toString();if(t.startsWith("http://")||t.startsWith("https://"))return t}return typeof e=="string"&&(e.startsWith("http://")||e.startsWith("https://"))?e:null}function Le(e){return e.map(t=>({role:t.role,content:Array.isArray(t.content)?t.content.map(n=>{if(n.type==="image"){let o=n.image,r=Ne(o);if(r)return{type:"image",file:r};let i=typeof o=="string"?o:"";return{type:"image",file:i.startsWith("data:")?i:`data:image/png;base64,${i}`}}return{type:"text",text:n.text}}):t.content}))}async function ge(e,t,n={}){let{page:o,agentServices:r}=t,i=r.getModel(),a=n.temperature??0,s=r.retrieveKnowledges(e).catch(y=>(v.log(`Failed to retrieve knowledges: ${y}`),[])),c=r.isSlicedScreenshotsEnabled(),u=r.isResizeSlicedScreenshotsEnabled(),p=r.isKnowledgeImagesEnabled(),l=r.isAccessibilityTreeEnabled(),d=r.isActionIntentFilteringEnabled(),h=d?Ce(e):"all";d&&h!=="all"&&v.log(`Action intent filtering: detected '${h}' intent from statement`);let{screenshotBase64:w,domState:A,pageContext:x}=await G(t,{useSlicedScreenshots:c,resizeSlicedScreenshots:u,useAccessibilityTree:l,actionIntent:h});t.domState=A;let R=new pe(L).getToolDefinitions().map(y=>{let N=y.function;return`${N.name}: ${N.description}
+Parameters: ${JSON.stringify(N.parameters,null,2)}`}).join(`
+`),_=he(R),b=await s,S=fe(x,e,t.variables,t.executionHistory,b.length>0?b:void 0,w,c,void 0,p,t.sensitiveKeys),k=[];n.chatHistory&&n.chatHistory.length>0&&n.chatHistory.forEach(y=>{y.role==="user"?k.push({role:"user",content:y.content}):y.role==="assistant"&&k.push({role:"assistant",content:y.content})}),k.push({role:"user",content:S});let ee=Le(k),te=L.buildActionUnionSchema(),ne=Z.object({thought:Z.string().describe("Step by step reasoning of your decision making process"),description:Z.string().describe("Detailed description of the action to be performed"),action:te,completes_instruction:Z.boolean().describe("Whether this action completes the given instruction")}),F=Array.isArray(S)?S.filter(y=>y.type==="image").length:0,U=M(i,F),B=await Oe({model:W(i),system:_,messages:k,temperature:a,output:Pe.object({schema:ne}),providerOptions:U}),$=B.output,z=JSON.stringify($,null,2);ce.debug(`Generate Action Raw Output: ${z}`);let j=[],K=J(B.usage,i);K&&j.push(K);let C={systemPrompt:_,userPrompt:ee,rawLlmResponse:z,tokenUsages:j},I=$.thought||"",m=$.description||"",g=$.action||{},T=$.completes_instruction||!1;if(!g||Object.keys(g).length===0)return{status:"error",reasoning:I||m||"No action generated",goalAccomplished:T,error:"Agent did not generate any action",debugInfo:C};let f=Object.keys(g)[0];if(f==="done")return{status:"error",reasoning:I||m||"Task marked as done",goalAccomplished:T,error:"Agent indicated task is done without generating an action",debugInfo:C};if(f==="perform_accurate_operation")return await D(e,t,n);let O=g[f]||{},re={};if(typeof O.element_index=="number"){let y=O.element_index;if(y<0)return{status:"error",reasoning:I||m||"No action generated",goalAccomplished:T,error:"Agent did not generate any action",debugInfo:C};let N=A.selectorMap.get(y);N&&(re=await ue(o,N))}let se=m;return f==="verify"&&(se=e,O.statement=e),{status:"success",actionEntity:{...re,action_description:se||I||`${f}(${JSON.stringify(O)})`,action_data:{action_name:f,kwargs:O}},reasoning:I||m,goalAccomplished:T,debugInfo:C}}import{generateText as Re,Output as Ue}from"ai";import{z as X}from"zod";function je(e){if(!e)return null;if(e instanceof URL)return e.href;if(typeof e=="object"&&e.href)return String(e.href);if(typeof e=="object"&&typeof e.toString=="function"){let t=e.toString();if(t.startsWith("http://")||t.startsWith("https://"))return t}return typeof e=="string"&&(e.startsWith("http://")||e.startsWith("https://"))?e:null}var Ke=X.object({screenshotDescription:X.string().describe(`Description of the screenshot content, listing out key elements along with their Set of Mark indices,
+		and a description of their location: formatting example: [12] A red button with text "Submit", next to [11]
+[45] A modal dialog titled "Confirmation",
+		in the center of the screen`),explanation:X.string().describe("Step by step reasoning explaining your conclusion about the statement"),conclusion:X.enum(["true","false","unknown"]).describe("Whether the statement is true, false, or unknown if you cannot make a conclusion")});async function ie(e,t,n={}){return n.usePureVision?D(e,t,n):ge(e,t,n)}function He(){let e=new Date,t=e.toLocaleDateString("en-US",{weekday:"long",year:"numeric",month:"long",day:"numeric",timeZone:"America/Los_Angeles"}),n=e.toLocaleTimeString("en-US",{hour:"2-digit",minute:"2-digit",second:"2-digit",fractionalSecondDigits:3,timeZoneName:"short",timeZone:"America/Los_Angeles"});return{dateString:t,timeString:n}}function Fe(){return`# Role
+You are an experienced QA person for web applications.
+You are tasked to verify the validity of a given statement based on the screenshot and element tree of a web page.
+`}async function ye(e,t,n={}){let{page:o,executionHistory:r}=t,i=t.agentServices.getModel();try{let a=t.agentServices.isSlicedScreenshotsEnabled(),s=t.agentServices.isResizeSlicedScreenshotsEnabled(),c=t.agentServices.isKnowledgeImagesEnabled(),u=t.agentServices.isAccessibilityTreeEnabled(),{domTree:p,screenshotBase64:l,slicedScreenshotsBase64:d,domState:h,pageContext:w}=await G(t,{useCleanScreenshot:n.useCleanScreenshotForAssertion,useSlicedScreenshots:a,resizeSlicedScreenshots:s,useAccessibilityTree:u});t.domState=h;let A="";r&&r.length>0&&(A=`
+# Previous actions in this session:
+${r.map(([g,T],f)=>`${f+1}. Action: ${g}
+   Result: ${T}`).join(`
+`)}
+`);let{dateString:x,timeString:E}=He(),P=`
+# User statement
+"${e}"
+# UI Terminology (IMPORTANT - read carefully)
+When the statement mentions a **Modal**, **Dialog**, **Popup**, or **Panel**, use this definition:
+A modal is ANY distinct UI section that:
+- Has a clear visual boundary separating it from other content
+- Contains a specific title, heading, or purpose
+- Presents options, forms, or content for user interaction
+This includes ALL of the following:
+- Traditional overlay/popup dialogs
+- Side panels (left or right)
+- Slide-out drawers
+- Bottom sheets
+- Floating panels
+- Any visually distinct section with a title
+**Important**: A side panel on the right or left side of the screen IS a modal if it has a distinct title and purpose. It does NOT need to overlay or block the main content to be considered a modal.
+NOT modals: Loading spinners, toast notifications, tooltips, main content area.
+# Current webpage state
+## Tab information:
+${w.currentTabText}Available tabs:
+${w.tabsText}
+## Element interaction guidelines:
+   - Each element has a unique index number (e.g., "[33]<button>")
+   - Elements marked with "[]Non-interactive text" are non-interactive (for context only)
+   - Elements are indented to show the structure of the element tree, with indentation level indicating depth
+## Interactive elements from current page:
+${p}
+## Screenshot
+${a&&d?"The following images are sliced screenshots of the current webpage (left, middle, right sections).":"The image provided is a screenshot of the current webpage."}
+`,R="";if(t.variables&&Object.keys(t.variables).length>0){let m=[];for(let g of Object.keys(t.variables))if(!t.sensitiveKeys?.has(g)){let f=t.variables[g],O=typeof f=="string"?f:JSON.stringify(f);m.push(`  - ${g}: "${O}"`)}m.length>0&&(R=`
+## Available Variables
+The following non-sensitive variables are available:
+${m.join(`
+`)}`)}let _=`
+${R}
+${A}
+Today is ${x}. Current local time is ${E}.
+Based on the above information, please determine if the statement is true.
+`,b=await t.agentServices.retrieveKnowledges(e),S=[{type:"text",text:P}],k=0;if(a&&d&&d.length>0)for(let m of d)S.push({type:"image",image:m}),k++;else S.push({type:"image",image:l}),k=1;if(b&&b.length>0){let m=q(b,c);S.push(...m)}S.push({type:"text",text:_});let ee=b?de(b,c):0,te=k+ee,ne=M(i,te),F=Fe(),U=await Re({model:W(i),system:F,messages:[{role:"user",content:S}],output:Ue.object({schema:Ke}),temperature:0,providerOptions:ne}),{conclusion:B,explanation:$}=U.output,z=JSON.stringify(U.output,null,2),j=[],K=J(U.usage,i);K&&j.push(K);let C=[{role:"user",content:S.map(m=>{if(m.type==="text")return{type:"text",text:m.text};if(m.type==="image"){let g=m.image,T=je(g);if(T)return{type:"image",file:T};let f=typeof g=="string"?g:"";return{type:"image",file:f.startsWith("data:")?f:`data:image/png;base64,${f}`}}return{type:"text",text:"[unknown content type]"}})}],I={systemPrompt:F,userPrompt:C,rawLlmResponse:z,screenshotWithSom:l,tokenUsages:j,retrievedKnowledges:b&&b.length>0?b:void 0,elementTree:p};return{success:B==="true",explanation:$,debugInfo:I}}catch(a){return{success:!1,error:a.message}}}async function Be(e,t){try{if(!e.action_data)return{success:!1,error:"Action entity missing action_data"};let{action_name:n,kwargs:o}=e.action_data;if(L.has(n)){let r=await L.execute(n,o,t),i=r?.success!==!1,a=r?.error||r?.message;return{success:i,error:i?void 0:a}}else{let{page:r,agentServices:i}=t;return await(await ze()).execute(r,e,i),{success:!0,error:void 0}}}catch(n){return{success:!1,error:n.message}}}var Q=null;async function ze(){if(Q)return Q;let e=await import("./handler-TPOFKKIB.js");return Q=new e.default,Q}async function Pt(e,t,n,o={},r){let i=r||{page:t,agentServices:n,domService:new Y(n.getDomServiceOptions()),executionHistory:o.executionHistory,variables:o.variables,sensitiveKeys:o.sensitiveKeys},a=n.replaceVariables(e),s=await ie(a,i,o);return s.status==="error"?{status:"error",completed:s.goalAccomplished||!1,actionEntities:[],explanation:s.reasoning,error:s.error,debugInfo:s.debugInfo}:{status:"success",completed:s.goalAccomplished||!1,actionEntities:s.actionEntity?[s.actionEntity]:[],explanation:s.reasoning,debugInfo:s.debugInfo}}async function Ct(e,t,n,o={},r){let i=r||{page:t,agentServices:n,domService:new Y(n.getDomServiceOptions()),executionHistory:o.executionHistory,variables:o.variables,sensitiveKeys:o.sensitiveKeys},a=n.replaceVariables(e),s=await ie(a,i,o);if(s.status==="error"||!s.actionEntity)return{status:"error",completed:s.goalAccomplished||!1,actionEntities:[],explanation:s.reasoning,error:s.error||"No action generated",debugInfo:s.debugInfo};let{actionEntity:c,reasoning:u,goalAccomplished:p,debugInfo:l}=s,d=await Be(c,i);return d.success?(u&&n.addNote(u),{status:"success",completed:p||!1,actionEntities:[c],explanation:u,debugInfo:l}):{status:"error",completed:!1,actionEntities:[c],error:d.error||"Action execution failed",debugInfo:l}}async function Nt(e,t,n,o={}){let r={page:t,agentServices:n,domService:new Y(n.getDomServiceOptions()),executionHistory:o.executionHistory,variables:o.variables,sensitiveKeys:o.sensitiveKeys},i=n.replaceVariables(e);return o.useCleanScreenshotForAssertion=n.isUseCleanScreenshotForAssertion(),ye(i,r,o)}async function Lt(e,t,n,o,r={}){let{runTaskLoop:i}=await import("./task-57MAWXLN.js"),a=o?c=>{o(c)}:void 0,s=await i(e,{page:t,agentServices:n,domService:void 0,executionHistory:r.executionHistory,variables:r.variables,sensitiveKeys:r.sensitiveKeys},{maxSteps:r.maxSteps,onEvent:a,abortSignal:r.abortSignal});return{status:s.success?"success":"error",completed:s.completed,actionEntities:s.trajectory.actions,explanation:s.summary,error:s.error,tokenUsages:s.metadata.tokenUsages}}export{D as a,Be as b,Pt as c,Ct as d,Nt as e,Lt as f};