@shiplightai/sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,280 @@
1
+ # @shiplightai/sdk
2
+
3
+ A companion SDK for Playwright that makes your tests resilient to UI changes like dynamic IDs, layout rearrangements, and styling updates.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @shiplightai/sdk playwright
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```typescript
14
+ import { chromium } from 'playwright';
15
+ import { createAgent, configureSdk } from '@shiplightai/sdk';
16
+
17
+ // Configure SDK with API key (call once at startup)
18
+ configureSdk({
19
+ env: { GOOGLE_API_KEY: process.env.GOOGLE_API_KEY },
20
+ });
21
+
22
+ // Create an agent
23
+ const agent = createAgent({
24
+ model: 'gemini-2.5-pro',
25
+ });
26
+
27
+ // Use with Playwright
28
+ const browser = await chromium.launch();
29
+ const page = await browser.newPage();
30
+
31
+ // Login using the Sauce Labs demo site (public test site)
32
+ await agent.login(page, {
33
+ url: 'https://www.saucedemo.com/',
34
+ username: 'standard_user',
35
+ password: 'secret_sauce',
36
+ });
37
+
38
+ // Verify login succeeded
39
+ await agent.assert(page, 'Products page is visible');
40
+
41
+ // Extract data from the page
42
+ await agent.extract(page, 'the first product name', 'productName');
43
+ console.log('First product:', agent.getVariable('productName'));
44
+
45
+ await browser.close();
46
+ ```
47
+
48
+ ## Custom Actions
49
+
50
+ Extend the agent with custom actions for your specific use case:
51
+
52
+ ```typescript
53
+ import { createAgent, z } from '@shiplightai/sdk';
54
+
55
+ const agent = createAgent({ model: 'gemini-2.5-pro' });
56
+
57
+ // Register a custom action
58
+ agent.registerAction({
59
+ name: 'extract_email_code',
60
+ description: 'Extract verification code from email inbox',
61
+ schema: z.object({
62
+ email_address: z.string().describe('The email address to check'),
63
+ code_type: z.enum(['verification', 'reset']).describe('Type of code'),
64
+ }),
65
+ async execute(args, ctx) {
66
+ // Your custom logic here
67
+ const code = await myEmailService.getCode(args.email_address, args.code_type);
68
+
69
+ // Store the result for later use
70
+ ctx.variableStore.set('verification_code', code);
71
+
72
+ return { success: true, message: `Found code: ${code}` };
73
+ },
74
+ });
75
+
76
+ // The agent will automatically use your action when needed
77
+ await agent.act(page, 'Get the verification code from email');
78
+ await agent.act(page, 'Enter {{ verification_code }} in the input field');
79
+ ```
80
+
81
+ ## API Reference
82
+
83
+ ### `createAgent(options)`
84
+
85
+ Create a new agent instance.
86
+
87
+ ```typescript
88
+ const agent = createAgent({
89
+ // Required: LLM model to use
90
+ model: 'gemini-2.5-pro',
91
+
92
+ // Optional: Initial variables
93
+ variables: { username: 'test@example.com' },
94
+
95
+ // Optional: Keys to mark as sensitive (masked in logs)
96
+ sensitiveKeys: ['password', 'apiKey'],
97
+
98
+ // Optional: Directory for test data files
99
+ testDataDir: './test-data',
100
+
101
+ // Optional: Directory for downloads
102
+ downloadDir: './downloads',
103
+
104
+ // Optional: Self-healing strategy ('none' | 'single' | 'multi')
105
+ selfHealingStrategy: 'single',
106
+ });
107
+ ```
108
+
109
+ ### Supported Models
110
+
111
+ | Provider | Model | Notes |
112
+ |----------|-------|-------|
113
+ | Google | `gemini-2.5-pro` | Recommended, requires `GOOGLE_API_KEY` |
114
+ | Google | `gemini-3-pro-preview` | More powerful, higher cost |
115
+ | OpenAI | `computer-use-preview` | Optional, for vision operations (drag and drop) |
116
+
117
+ ### `agent.registerAction(action)`
118
+
119
+ Register a custom action.
120
+
121
+ ```typescript
122
+ agent.registerAction({
123
+ // Unique action name (snake_case recommended)
124
+ name: 'my_action',
125
+
126
+ // Description for the agent
127
+ description: 'What this action does and when to use it',
128
+
129
+ // Zod schema for parameters
130
+ schema: z.object({
131
+ param1: z.string().describe('Description of the parameter'),
132
+ param2: z.number().optional(),
133
+ }),
134
+
135
+ // Execute function
136
+ async execute(args, ctx) {
137
+ // args: validated parameters
138
+ // ctx.page: Playwright page
139
+ // ctx.variableStore: access to variables
140
+
141
+ return { success: true, message: 'Optional status message' };
142
+ },
143
+ });
144
+ ```
145
+
146
+ ### `agent.act(page, instruction)`
147
+
148
+ Perform a single action on the page.
149
+
150
+ ```typescript
151
+ await agent.act(page, 'Click the login button');
152
+ await agent.act(page, 'Fill the email field with test@example.com');
153
+ await agent.act(page, 'Select "Express" from shipping dropdown');
154
+ ```
155
+
156
+ ### `agent.run(page, instruction, options?)`
157
+
158
+ Run a multi-step instruction until the goal is achieved.
159
+
160
+ ```typescript
161
+ await agent.run(page, 'Complete the checkout process');
162
+ await agent.run(page, 'Fill out the entire registration form');
163
+ await agent.run(page, 'Add 3 items to cart', { maxSteps: 10 });
164
+ ```
165
+
166
+ ### `agent.assert(page, statement)`
167
+
168
+ Assert a condition (throws on failure).
169
+
170
+ ```typescript
171
+ await agent.assert(page, 'Login button is visible');
172
+ await agent.assert(page, 'Cart contains 3 items');
173
+ ```
174
+
175
+ ### `agent.evaluate(page, statement)`
176
+
177
+ Evaluate a condition (returns boolean, doesn't throw).
178
+
179
+ ```typescript
180
+ const isLoggedIn = await agent.evaluate(page, 'User is logged in');
181
+ if (!isLoggedIn) {
182
+ await agent.run(page, 'Click the login button');
183
+ }
184
+ ```
185
+
186
+ ### `agent.extract(page, description, variableName)`
187
+
188
+ Extract data from the page and store in a variable.
189
+
190
+ ```typescript
191
+ await agent.extract(page, 'the order total', 'orderTotal');
192
+ await agent.run(page, 'Verify {{ orderTotal }} is displayed on receipt');
193
+ ```
194
+
195
+ ### `agent.waitUntil(page, condition, timeoutSeconds?)`
196
+
197
+ Wait until a condition becomes true.
198
+
199
+ ```typescript
200
+ await agent.waitUntil(page, 'Loading spinner is no longer visible');
201
+
202
+ const appeared = await agent.waitUntil(page, 'Table shows at least 5 rows', 30);
203
+ if (!appeared) {
204
+ throw new Error('Data did not load in time');
205
+ }
206
+ ```
207
+
208
+ ## Custom Action Context
209
+
210
+ The `execute` function receives a context object:
211
+
212
+ ```typescript
213
+ interface IActionExecutionContext {
214
+ // Playwright page instance
215
+ page: Page;
216
+
217
+ // Variable store for reading/writing variables
218
+ variableStore: VariableStore;
219
+ }
220
+ ```
221
+
222
+ ### Using Variables
223
+
224
+ ```typescript
225
+ async execute(args, ctx) {
226
+ // Read a variable
227
+ const email = ctx.variableStore.get('email');
228
+
229
+ // Set a variable
230
+ ctx.variableStore.set('result', 'some value');
231
+
232
+ // Set a sensitive variable (masked in logs)
233
+ ctx.variableStore.set('token', secretToken, true);
234
+
235
+ return { success: true };
236
+ }
237
+ ```
238
+
239
+ ## SDK Configuration
240
+
241
+ Configure SDK-wide settings before creating agents:
242
+
243
+ ```typescript
244
+ import { configureSdk, getSdkConfig, LogLevel } from '@shiplightai/sdk';
245
+
246
+ configureSdk({
247
+ // Log level: DEBUG, INFO, WARN, ERROR
248
+ logLevel: LogLevel.INFO,
249
+
250
+ // Enable detailed agent logging
251
+ debugAgent: false,
252
+
253
+ // Environment variables (API keys)
254
+ env: {
255
+ // Required: Google API key for Gemini models
256
+ GOOGLE_API_KEY: 'your-google-api-key',
257
+
258
+ // Optional: OpenAI API key for vision operations
259
+ // OPENAI_API_KEY: 'sk-...',
260
+ },
261
+
262
+ // Optional: paths for logs and results
263
+ agentLogPath: './logs/agent.log',
264
+ testResultsJsonPath: './results.json',
265
+ });
266
+
267
+ // Read current config
268
+ const config = getSdkConfig();
269
+ ```
270
+
271
+ ## Environment Variables
272
+
273
+ | Variable | Required | Description |
274
+ |----------|----------|-------------|
275
+ | `GOOGLE_API_KEY` | Yes | Google AI API key for Gemini models |
276
+ | `OPENAI_API_KEY` | No | OpenAI API key (for vision operations) |
277
+
278
+ ## License
279
+
280
+ MIT
@@ -0,0 +1 @@
1
+ import{b as t,c as o,d as r,e as p,f as i}from"./chunk-GDTCZALZ.js";import"./chunk-GPZJYXUG.js";import"./chunk-YR4E7JSB.js";import"./chunk-KFC5I6R5.js";import"./chunk-UFLZ3URR.js";export{p as evaluateStatement,t as executeAction,r as executeStep,o as generateActionStep,i as runTask};
@@ -0,0 +1 @@
1
+ import{b as o,d as t,e as i,f as r,g as e,h as a,i as n}from"./chunk-UHZTPBZ3.js";import"./chunk-6H2NJBNL.js";import"./chunk-KFC5I6R5.js";import"./chunk-UFLZ3URR.js";export{o as LoginType,t as checkLocators,e as createUnsignedInContext,n as generateAndValidateLoginLocators,a as generateValidationLocators,r as validateLogin,i as validateLoginLocators};
@@ -0,0 +1 @@
1
+ var g=.5*1e3,h=30*1e3,y=1*1e3;async function v(o,d){let n=new Set,i=Date.now(),w=new Set(["document","stylesheet","image","font","script","iframe"]),l=new Set(["text/html","text/css","application/javascript","image/","font/","application/json"]),r=["analytics","tracking","telemetry","beacon","metrics","doubleclick","adsystem","adserver","advertising","facebook.com/plugins","platform.twitter","linkedin.com/embed","livechat","zendesk","intercom","crisp.chat","hotjar","push-notifications","onesignal","pushwoosh","heartbeat","ping","alive","webrtc","rtmp://","wss://","cloudfront.net","fastly.net"],m=t=>{let e=t.toLowerCase();return e.startsWith("data:")||e.startsWith("blob:")?!0:r.some(s=>e.includes(s))},p=t=>{let e=t.resourceType(),s=t.url();if(!w.has(e)||["websocket","media","eventsource","manifest","other"].includes(e)||m(s))return;let a=t.headers();a.purpose==="prefetch"||["video","audio"].includes(a["sec-fetch-dest"])||(n.add(t),i=Date.now())},u=async t=>{let e=t.request();if(!n.has(e))return;let s=(t.headers()["content-type"]||"").toLowerCase();if(["streaming","video","audio","webm","mp4","event-stream","websocket","protobuf"].some(c=>s.includes(c))){n.delete(e);return}if(![...l].some(c=>s.startsWith(c))){n.delete(e);return}let f=t.headers()["content-length"];if(f)try{if(parseInt(f,10)>5*1024*1024){n.delete(e);return}}catch{}n.delete(e),i=Date.now()};o.on("request",p),o.on("response",u);try{let t=Date.now(),e=y;for(;;){await new Promise(a=>setTimeout(a,100));let s=Date.now();if(n.size===0&&s-i>=e||s-t>d)break}}finally{o.removeListener("request",p),o.removeListener("response",u)}}async function P(o,d=h,n=g){let i=Date.now();try{await v(o,d)}catch(r){throw r instanceof Error?new Error(`Failed during network stabilization: ${r.message}`):new Error("An unknown error occurred during network stabilization.")}let w=Date.now()-i,l=Math.max(n-w,0);l>0&&await new Promise(r=>setTimeout(r,l))}export{P as a};
@@ -0,0 +1,192 @@
1
+ import{a as L,b as W,c as M,d as Y}from"./chunk-GPZJYXUG.js";import{a as ce,g as le,i as ue}from"./chunk-YR4E7JSB.js";import{a as v}from"./chunk-KFC5I6R5.js";import{g as ae}from"./chunk-UFLZ3URR.js";import{zodToJsonSchema as be}from"zod-to-json-schema";function V(e){let t=be(e,{$refStrategy:"none"});if(t.$schema&&delete t.$schema,t.type!=="object")throw new Error(`Schema must be a Zod object schema, got type: ${t.type}`);return H(t),t}function H(e){if(!(typeof e!="object"||e===null)){if(e.type==="object"&&(e.additionalProperties=!1,e.properties)){let t=Object.keys(e.properties);e.required=t;for(let n of Object.values(e.properties))H(n)}e.type==="array"&&e.items&&H(e.items);for(let t of["anyOf","oneOf","allOf"])Array.isArray(e[t])&&e[t].forEach(H);for(let[t,n]of Object.entries(e))typeof n=="object"&&n!==null&&!["properties","items","anyOf","oneOf","allOf"].includes(t)&&H(n)}}var pe=class{constructor(e){this.registry=e}getToolDefinitions(){return this.registry.getTools().filter(e=>e.availability.openai).map(e=>({type:"function",function:{name:e.name,description:e.description,parameters:V(e.schema),strict:!0}}))}getToolDefinitionsFiltered(e){let t=new Set(e);return this.registry.getTools().filter(n=>t.has(n.name)&&n.availability.openai).map(n=>({type:"function",function:{name:n.name,description:n.description,parameters:V(n.schema),strict:!0}}))}toJSON(){return this.getToolDefinitions()}getToolDefinition(e){let t=this.registry.get(e);if(t)return{type:"function",function:{name:t.name,description:t.description,parameters:V(t.schema),strict:!0}}}getToolCount(){return this.registry.size()}getToolNames(){return this.registry.getToolNames()}};function ve(e,t){let n=new Map;for(let c of t)c.hash&&c.url?n.set(c.hash,c.url):c.uuid&&c.url&&n.set(c.uuid,c.url);let o=/!\[([^\]]*)\]\(image:([a-f0-9]{64}|[a-zA-Z0-9\-]+)\)/g,r=[],i=0,a;for(;(a=o.exec(e))!==null;){let c=e.slice(i,a.index);c&&r.push({type:"text",text:c});let u=a[2];n.has(u)?r.push({type:"image",image:new URL(n.get(u))}):r.push({type:"text",text:a[0]}),i=a.index+a[0].length}let s=e.slice(i);return s&&r.push({type:"text",text:s}),r.length===0&&r.push({type:"text",text:e}),r}function we(e){let t=[];for(let n of e){let o=n.content||"",r=n.images||[],i=ve(o,r);t.length>0&&i.length>0&&t.push({type:"text",text:`
2
+
3
+ `}),t.push(...i)}return t}var me=!1;function q(e,t=me){if(t){let n=we(e);if(n.length===0)return[];let o={type:"text",text:`
4
+
5
+ <retrieved_knowledge>
6
+
7
+ Below are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:
8
+ `},r={type:"text",text:`
9
+
10
+ </retrieved_knowledge>
11
+
12
+ `};return[o,...n,r]}else{let o=e.filter(s=>!s.images||s.images.length===0).map(s=>s.content||"").filter(s=>s.length>0).join(`
13
+
14
+ `);return o?[{type:"text",text:`
15
+
16
+ <retrieved_knowledge>
17
+
18
+ Below are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:
19
+ `},{type:"text",text:o},{type:"text",text:`
20
+
21
+ </retrieved_knowledge>
22
+
23
+ `}]:[]}}function de(e,t=me){if(!t)return 0;let n=0;for(let o of e)o.images&&(n+=o.images.length);return n}function xe(){let e=new Date,n=new Intl.DateTimeFormat("en-US",{timeZone:"America/Los_Angeles",year:"numeric",month:"2-digit",day:"2-digit",hour:"2-digit",minute:"2-digit",second:"2-digit",hour12:!1,timeZoneName:"short"}).formatToParts(e),o=n.find(l=>l.type==="year").value,r=n.find(l=>l.type==="month").value,i=n.find(l=>l.type==="day").value,a=n.find(l=>l.type==="hour").value,s=n.find(l=>l.type==="minute").value,c=n.find(l=>l.type==="second").value,u=n.find(l=>l.type==="timeZoneName").value,p=String(e.getMilliseconds()).padStart(3,"0");return`${o}-${r}-${i} ${a}:${s}:${c}.${p} ${u}`}function he(e){return`# Your Role
24
+ You are part of a end-to-end testing system that is designed to automate the testing of a website. Given an instruction in natural language, your job is to translate it into an action in the predefined actions. The instruction might not match any action in the predefined actions or might require to interact with an element that is not on the page. It's your job to detect these cases and return an empty action.
25
+
26
+ # Rules
27
+ ## Action Selection Rules
28
+ - If the instruction requires a specific action, you must select that action. If no action matches the specific action, you must return an empty action so that testing system can aware of the situation.
29
+ - If asked to do nothing or ignore the instruction or something similar, you must select \`wait\` action of 1 second.
30
+ - If asked to verify something, you must select \`verify\` action.
31
+ - If asked to do accurate interaction, like selecting a specific chunk of text or drawing a bounding box, you must select \`perform_accurate_operation\` action.
32
+ - If asked to scroll, you decide if you need to \`scroll\` the page or \`scroll_on_element\`. also you need to calculate how much to scroll.
33
+
34
+ ## Element Selection Rules
35
+ - If the instruction requires to interact with a specific element, you must select that element.
36
+ - If no element matches the specific element, you must return an empty action so that testing system can aware of the situation. NEVER click on alternative elements as a workaround. NEVER try to navigate to find the element (e.g. by scrolling, closing modals, clicking other buttons, or refreshing the page).
37
+ - Fail fast: If the exact target element is not visible on the current page, return an empty action immediately. The testing system will handle recovery.
38
+ - The type of the selected element doesn't have to match the target, for example, if the instruction requires to interact with an image but no image element matches, you can select a div that contains the image.
39
+
40
+ ## Instruction Completion Analysis Rules
41
+ - Reasoning about the instruction completion is critical. You must analyze the instruction and your action to determine if your action will complete the instruction.
42
+
43
+ ## Response Format Rules
44
+ - Respond using valid JSON format, which can be parsed by python json.loads():
45
+ {
46
+ "thought": "...", // step by step reasoning of your decision making process
47
+ "description": "...", // detailed description of the action to be performed. (e.g. click on the 'Submit' button to submit the form)
48
+ "action": {"one_action_name": {// action-specific parameter}},
49
+ "completes_instruction": true/false // boolean indicating whether this action completes the given instruction. Set to false if the action is only partial, requires follow-up actions, or cannot fully complete the instruction.
50
+ }
51
+
52
+ Follow the rules above strictly.
53
+
54
+ # Action Space
55
+ ${e}
56
+
57
+ # Examples
58
+ Example of \`verify\` action:
59
+ instruction: "Verify that the page title is 'Home'"
60
+ {
61
+ "thought": "I understand the instruction is to verify that the page title is 'Home'. I will use the \`verify\` action to verify the page title.", // Do not verify it yourself, just translate the instruction to the \`verify\` action
62
+ "description": "Verify that the page title is 'Home'",
63
+ "action": {"verify": {"statement": "the page title is 'Home'"}}, // the statement should be the same wording as the instruction, don't rephrase it
64
+ "completes_instruction": true // this action fully completes the instruction
65
+ }
66
+
67
+ Example of \`save_variable\` action:
68
+ instruction: "Extract and save the page title as page_title"
69
+ {
70
+ "thought": "I understand the instruction is to save the page title as page_title. The current page title is 'Home'. I will use the \`save_variable\` action to save the page title.",
71
+ "description": "Save the page title as variable page_title",
72
+ "action": {"save_variable": {"name": "page_title", "value": "Home"}}, // the value should be the same wording as the instruction, don't rephrase it
73
+ "completes_instruction": true // this action fully completes the instruction
74
+ }
75
+
76
+ Example of empty action when the target element is not on the page, or the instruction cannot be completed for any reason:
77
+ {
78
+ "thought": "The user wants me to click the 'Create Entry' button. However, the current page is a sign-in page and the only interactive element is the 'Sign In' button. The 'Create Entry' button is not present on the page. The previous attempt to click this button also failed. Therefore, I cannot complete the instruction and will return an empty action.",
79
+ "description": "Click the 'Create Entry' button.",
80
+ "action": {}, // empty action object to indicate the instruction cannot be completed
81
+ "completes_instruction": false
82
+ }
83
+ `}function Se(e){if(!e||e.length===0)return"";let t="";return e.forEach(([n,o],r)=>{t+=`(${r+1}) Description: ${n}
84
+ Feedback: ${o}
85
+ `}),`## Additional context
86
+ You just executed following steps in order:
87
+ ${t}`}function fe(e,t,n,o,r,i,a=!1,s=xe(),c=!1,u){let p=[],l=`
88
+ # Instruction
89
+ "${t}"
90
+
91
+ # Current webpage state
92
+ ## Tab information:
93
+ ${e.currentTabText}Available tabs:
94
+ ${e.tabsText}
95
+
96
+ ## Element interaction guidelines:
97
+ - Only use indexes that exist in the provided element list
98
+ - Each element has a unique index number (e.g., "[33]<button>")
99
+ - The bounding box and index of each element is marked on the screenshot.
100
+ - Elements marked with "[]Non-interactive text" are non-interactive (for context only)
101
+ - Elements are indented to show the structure of the element tree, with indentation level indicating depth
102
+ - When considering an element, also consider its children elements
103
+ - If an element is scrollable, it will be marked with "(SCROLLABLE)" (e.g., "[33](SCROLLABLE)<ul>"), use the \`scroll_on_element\` action to scroll on the element.
104
+
105
+ ## Interactive elements from current page:
106
+ ${e.elementsText}
107
+ `;if(p.push({type:"text",text:l}),a&&e.slicedScreenshotsBase64)for(let h of e.slicedScreenshotsBase64)p.push({type:"text",text:"The following images provided are sliced screenshots of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box."}),p.push({type:"image",image:h});else i&&(p.push({type:"text",text:"The following image provided is a screenshot of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box."}),p.push({type:"image",image:i}));if(r&&r.length>0){let h=q(r,c);p.push(...h)}let d="";if(d+=`
108
+ Current local time is ${s}.
109
+ `,n&&Object.keys(n).length>0){let h=[];for(let w of Object.keys(n))if(u?.has(w))h.push(` - ${w}: [SENSITIVE - value hidden]`);else{let x=n[w],E=typeof x=="string"?x:JSON.stringify(x);h.push(` - ${w}: "${E}"`)}d+=`
110
+ ## Available Data Placeholders
111
+ The following placeholders are available for use in your actions:
112
+ ${h.join(`
113
+ `)}
114
+
115
+ To use them, write Jinja-like template syntax: {{ placeholder_name }}
116
+ - Use the EXACT placeholder name as shown above
117
+ - Do NOT use the actual value directly
118
+ - The values shown are for context only to help you understand what data is available
119
+ - In action descriptions, describe what the placeholder represents in natural language (e.g., "Type the first user name" instead of "Type {{ firstUserName }}")
120
+ `}if(o&&o.length>0){let h=Se(o);d+=`
121
+ `+h}return d+=`
122
+ Based on the above information, please determine the right action to accomplish the task.
123
+ `,p.push({type:"text",text:d}),p}import Te from"openai";import Ae from"sharp";async function ke(e,t,n){await e.evaluate(()=>{let p=document.getElementById("playwright-highlight-container");p&&p.remove(),window._highlightCleanupFunctions&&(window._highlightCleanupFunctions.forEach(l=>l()),window._highlightCleanupFunctions=[])});let o=await e.screenshot({type:"png",fullPage:!1}),r=Ae(o),i=await r.metadata(),a=i.width||0,s=i.height||0;v.log(`Screenshot actual dimensions: ${a}x${s}, viewport: ${t}x${n}`);let c;return a!==t||s!==n?(v.log(`Resizing screenshot from ${a}x${s} to ${t}x${n}`),c=await r.resize(t,n).png().toBuffer()):c=o,c.toString("base64")}async function oe(e,t,n){let o=await e.evaluateHandle(c=>document.elementFromPoint(c.x,c.y),{x:t,y:n});if(!o)throw new Error(`No element found at (${t}, ${n})`);let r=o.asElement();if(!r)throw await o.dispose(),new Error(`No element found at (${t}, ${n})`);let i=await r.boundingBox();if(!i)throw await o.dispose(),new Error("Element has no bounding box");let a=t-(i.x+i.width/2),s=n-(i.y+i.height/2);return{relative_x:a,relative_y:s,element:r}}async function Ee(e,t){return t?{xpath:void 0,locator:(t?await le(e,t):null)||void 0,frame_path:[]}:{xpath:void 0,locator:void 0,frame_path:[]}}async function _e(e,t){let n=null,o=null;switch(t.type){case"click":{let i=t.button==="right"?"right_click_by_coordinates":"click_by_coordinates";if(t.x===void 0||t.y===void 0)break;let a=await oe(e,t.x,t.y);n={action_name:i,kwargs:{relative_x:a.relative_x,relative_y:a.relative_y}},o=a.element;break}case"double_click":{if(t.x===void 0||t.y===void 0)break;let i=await oe(e,t.x,t.y);n={action_name:"double_click_by_coordinates",kwargs:{relative_x:i.relative_x,relative_y:i.relative_y}},o=i.element;break}case"drag":{if(!t.path||t.path.length<2)break;let i=t.path[0].x,a=t.path[0].y,s=t.path[1].x,c=t.path[1].y,u=await oe(e,i,a);n={action_name:"drag_drop",kwargs:{relative_x:u.relative_x,relative_y:u.relative_y,delta_x:s-i,delta_y:c-a}},o=u.element;break}}let r=await Ee(e,o);return{action_data:n,locatorInfo:r}}async function D(e,t,n={}){try{let{page:o}=t,r=o.viewportSize();if(!r)return{status:"error",error:"Viewport size not available"};let i=r.width,a=r.height;v.log(`Viewport size: ${i}x${a}`);let s=await ke(o,i,a),u=ae().env?.OPENAI_API_KEY;if(!u)return{status:"error",error:"OpenAI API key not found. Set OPENAI_API_KEY environment variable or configure via SDK config."};let p=new Te({apiKey:u});v.log("Sending request to OpenAI CUA...");let d=[{role:"user",content:[{type:"input_text",text:`
124
+ You will be given an action to execute and screenshot of the current screen.
125
+ Output one computer_call object that will accomplish this action.
126
+ Action: ${e}
127
+ `},{type:"input_image",detail:"auto",image_url:`data:image/png;base64,${s}`}]}],h=await p.responses.create({model:"computer-use-preview",tools:[{type:"computer-preview",display_width:i,display_height:a,environment:"browser"}],input:d,truncation:"auto",temperature:.1});v.log("Received response from OpenAI CUA");let A=h.output.filter(_=>_.type==="computer_call")[0]||null;if(!A)return{status:"error",reasoning:h.output_text||""||"Invalid action generation response",error:"No computer_call action generated"};let x=A.action;v.log(`Generated action: ${JSON.stringify(x)}`);let{action_data:E,locatorInfo:P}=await _e(o,x);return E?{status:"success",actionEntity:{action_description:e,action_data:E,locator:P.locator||void 0,xpath:P.xpath||void 0,frame_path:P.frame_path},reasoning:"Action generated successfully using pure vision mode",goalAccomplished:!0}:{status:"error",error:"Failed to map action to ActionDataEntity"}}catch(o){return v.error("Error generating CUA action",o),{status:"error",error:o.message||"Failed to generate action with pure vision"}}}async function $e(e){let t=e.context().pages(),n=null,o=[];for(let a=0;a<t.length;a++){let s=t[a];s===e&&(n=a);let c="(title unavailable)";try{c=await Promise.race([s.title(),new Promise((p,l)=>setTimeout(()=>l(new Error("timeout")),1e3))])}catch{}let u=`Tab ${a}: ${s.url()}`;c&&(u+=` - ${c.slice(0,50)}`),o.push(u)}let r=o.length>0?o.join(`
128
+ `):"";return{currentTabText:n!==null?`Current tab: ${n}
129
+ `:"",tabsText:r}}async function Ie(e,t){let{currentTabText:n,tabsText:o}=await $e(e);return{elementsText:t,currentUrl:e.url(),currentTitle:await e.title(),currentTabText:n,tabsText:o}}async function G(e,t){let{page:n,domService:o,agentServices:r}=e,i=typeof t=="boolean"?{useCleanScreenshot:t}:t||{},a=r.getInteractiveClassNames(),{domState:s,screenshotBase64:c,slicedScreenshotsBase64:u}=await o.getClickableElementsWithScreenshot(n,{interactiveClassNames:a,useCleanScreenshot:i.useCleanScreenshot,useSlicedScreenshots:i.useSlicedScreenshots,resizeSlicedScreenshots:i.resizeSlicedScreenshots,useAccessibilityTree:i.useAccessibilityTree,actionIntent:i.actionIntent}),p=s.elementTree.clickableElementsToString(),l=await Ie(n,p);return u&&(l.slicedScreenshotsBase64=u),{domTree:p,screenshotBase64:c,slicedScreenshotsBase64:u,domState:s,pageContext:l}}function J(e,t){return e?{prompt_tokens:e.promptTokens||e.inputTokens||0,completion_tokens:e.completionTokens||e.outputTokens||0,total_tokens:e.totalTokens||0,model:t}:null}import{generateText as Oe,Output as Pe}from"ai";import{z as Z}from"zod";function Ce(e){let t=e.toLowerCase();return[/\b(type|enter|input|fill|write|set)\b/,/\b(text|value|field|box)\b.*\b(to|with|as)\b/,/\b(username|password|email|search|query)\b/].some(i=>i.test(t))?"input":[/\bscroll\b/,/\b(scroll|swipe)\s*(up|down|left|right)\b/,/\b(page|move)\s*(down|up)\b/].some(i=>i.test(t))?"scroll":[/\b(click|tap|press|select|choose|pick|check|toggle)\b/,/\b(open|close|submit|confirm|cancel|dismiss)\b/,/\b(button|link|menu|dropdown|checkbox|radio)\b/].some(i=>i.test(t))?"click":"all"}function Ne(e){if(!e)return null;if(e instanceof URL)return e.href;if(typeof e=="object"&&e.href)return String(e.href);if(typeof e=="object"&&typeof e.toString=="function"){let t=e.toString();if(t.startsWith("http://")||t.startsWith("https://"))return t}return typeof e=="string"&&(e.startsWith("http://")||e.startsWith("https://"))?e:null}function Le(e){return e.map(t=>({role:t.role,content:Array.isArray(t.content)?t.content.map(n=>{if(n.type==="image"){let o=n.image,r=Ne(o);if(r)return{type:"image",file:r};let i=typeof o=="string"?o:"";return{type:"image",file:i.startsWith("data:")?i:`data:image/png;base64,${i}`}}return{type:"text",text:n.text}}):t.content}))}async function ge(e,t,n={}){let{page:o,agentServices:r}=t,i=r.getModel(),a=n.temperature??0,s=r.retrieveKnowledges(e).catch(y=>(v.log(`Failed to retrieve knowledges: ${y}`),[])),c=r.isSlicedScreenshotsEnabled(),u=r.isResizeSlicedScreenshotsEnabled(),p=r.isKnowledgeImagesEnabled(),l=r.isAccessibilityTreeEnabled(),d=r.isActionIntentFilteringEnabled(),h=d?Ce(e):"all";d&&h!=="all"&&v.log(`Action intent filtering: detected '${h}' intent from statement`);let{screenshotBase64:w,domState:A,pageContext:x}=await G(t,{useSlicedScreenshots:c,resizeSlicedScreenshots:u,useAccessibilityTree:l,actionIntent:h});t.domState=A;let R=new pe(L).getToolDefinitions().map(y=>{let N=y.function;return`${N.name}: ${N.description}
130
+ Parameters: ${JSON.stringify(N.parameters,null,2)}`}).join(`
131
+
132
+ `),_=he(R),b=await s,S=fe(x,e,t.variables,t.executionHistory,b.length>0?b:void 0,w,c,void 0,p,t.sensitiveKeys),k=[];n.chatHistory&&n.chatHistory.length>0&&n.chatHistory.forEach(y=>{y.role==="user"?k.push({role:"user",content:y.content}):y.role==="assistant"&&k.push({role:"assistant",content:y.content})}),k.push({role:"user",content:S});let ee=Le(k),te=L.buildActionUnionSchema(),ne=Z.object({thought:Z.string().describe("Step by step reasoning of your decision making process"),description:Z.string().describe("Detailed description of the action to be performed"),action:te,completes_instruction:Z.boolean().describe("Whether this action completes the given instruction")}),F=Array.isArray(S)?S.filter(y=>y.type==="image").length:0,U=M(i,F),B=await Oe({model:W(i),system:_,messages:k,temperature:a,output:Pe.object({schema:ne}),providerOptions:U}),$=B.output,z=JSON.stringify($,null,2);ce.debug(`Generate Action Raw Output: ${z}`);let j=[],K=J(B.usage,i);K&&j.push(K);let C={systemPrompt:_,userPrompt:ee,rawLlmResponse:z,tokenUsages:j},I=$.thought||"",m=$.description||"",g=$.action||{},T=$.completes_instruction||!1;if(!g||Object.keys(g).length===0)return{status:"error",reasoning:I||m||"No action generated",goalAccomplished:T,error:"Agent did not generate any action",debugInfo:C};let f=Object.keys(g)[0];if(f==="done")return{status:"error",reasoning:I||m||"Task marked as done",goalAccomplished:T,error:"Agent indicated task is done without generating an action",debugInfo:C};if(f==="perform_accurate_operation")return await D(e,t,n);let O=g[f]||{},re={};if(typeof O.element_index=="number"){let y=O.element_index;if(y<0)return{status:"error",reasoning:I||m||"No action generated",goalAccomplished:T,error:"Agent did not generate any action",debugInfo:C};let N=A.selectorMap.get(y);N&&(re=await ue(o,N))}let se=m;return f==="verify"&&(se=e,O.statement=e),{status:"success",actionEntity:{...re,action_description:se||I||`${f}(${JSON.stringify(O)})`,action_data:{action_name:f,kwargs:O}},reasoning:I||m,goalAccomplished:T,debugInfo:C}}import{generateText as Re,Output as Ue}from"ai";import{z as X}from"zod";function je(e){if(!e)return null;if(e instanceof URL)return e.href;if(typeof e=="object"&&e.href)return String(e.href);if(typeof e=="object"&&typeof e.toString=="function"){let t=e.toString();if(t.startsWith("http://")||t.startsWith("https://"))return t}return typeof e=="string"&&(e.startsWith("http://")||e.startsWith("https://"))?e:null}var Ke=X.object({screenshotDescription:X.string().describe(`Description of the screenshot content, listing out key elements along with their Set of Mark indices,
133
+ and a description of their location: formatting example: [12] A red button with text "Submit", next to [11]
134
+ [45] A modal dialog titled "Confirmation",
135
+ in the center of the screen`),explanation:X.string().describe("Step by step reasoning explaining your conclusion about the statement"),conclusion:X.enum(["true","false","unknown"]).describe("Whether the statement is true, false, or unknown if you cannot make a conclusion")});async function ie(e,t,n={}){return n.usePureVision?D(e,t,n):ge(e,t,n)}function He(){let e=new Date,t=e.toLocaleDateString("en-US",{weekday:"long",year:"numeric",month:"long",day:"numeric",timeZone:"America/Los_Angeles"}),n=e.toLocaleTimeString("en-US",{hour:"2-digit",minute:"2-digit",second:"2-digit",fractionalSecondDigits:3,timeZoneName:"short",timeZone:"America/Los_Angeles"});return{dateString:t,timeString:n}}function Fe(){return`# Role
136
+ You are an experienced QA person for web applications.
137
+ You are tasked to verify the validity of a given statement based on the screenshot and element tree of a web page.
138
+ `}async function ye(e,t,n={}){let{page:o,executionHistory:r}=t,i=t.agentServices.getModel();try{let a=t.agentServices.isSlicedScreenshotsEnabled(),s=t.agentServices.isResizeSlicedScreenshotsEnabled(),c=t.agentServices.isKnowledgeImagesEnabled(),u=t.agentServices.isAccessibilityTreeEnabled(),{domTree:p,screenshotBase64:l,slicedScreenshotsBase64:d,domState:h,pageContext:w}=await G(t,{useCleanScreenshot:n.useCleanScreenshotForAssertion,useSlicedScreenshots:a,resizeSlicedScreenshots:s,useAccessibilityTree:u});t.domState=h;let A="";r&&r.length>0&&(A=`
139
+ # Previous actions in this session:
140
+ ${r.map(([g,T],f)=>`${f+1}. Action: ${g}
141
+ Result: ${T}`).join(`
142
+ `)}
143
+ `);let{dateString:x,timeString:E}=He(),P=`
144
+ # User statement
145
+ "${e}"
146
+
147
+ # UI Terminology (IMPORTANT - read carefully)
148
+ When the statement mentions a **Modal**, **Dialog**, **Popup**, or **Panel**, use this definition:
149
+
150
+ A modal is ANY distinct UI section that:
151
+ - Has a clear visual boundary separating it from other content
152
+ - Contains a specific title, heading, or purpose
153
+ - Presents options, forms, or content for user interaction
154
+
155
+ This includes ALL of the following:
156
+ - Traditional overlay/popup dialogs
157
+ - Side panels (left or right)
158
+ - Slide-out drawers
159
+ - Bottom sheets
160
+ - Floating panels
161
+ - Any visually distinct section with a title
162
+
163
+ **Important**: A side panel on the right or left side of the screen IS a modal if it has a distinct title and purpose. It does NOT need to overlay or block the main content to be considered a modal.
164
+
165
+ NOT modals: Loading spinners, toast notifications, tooltips, main content area.
166
+
167
+ # Current webpage state
168
+ ## Tab information:
169
+ ${w.currentTabText}Available tabs:
170
+ ${w.tabsText}
171
+
172
+ ## Element interaction guidelines:
173
+ - Each element has a unique index number (e.g., "[33]<button>")
174
+ - Elements marked with "[]Non-interactive text" are non-interactive (for context only)
175
+ - Elements are indented to show the structure of the element tree, with indentation level indicating depth
176
+
177
+ ## Interactive elements from current page:
178
+ ${p}
179
+
180
+ ## Screenshot
181
+ ${a&&d?"The following images are sliced screenshots of the current webpage (left, middle, right sections).":"The image provided is a screenshot of the current webpage."}
182
+ `,R="";if(t.variables&&Object.keys(t.variables).length>0){let m=[];for(let g of Object.keys(t.variables))if(!t.sensitiveKeys?.has(g)){let f=t.variables[g],O=typeof f=="string"?f:JSON.stringify(f);m.push(` - ${g}: "${O}"`)}m.length>0&&(R=`
183
+ ## Available Variables
184
+ The following non-sensitive variables are available:
185
+ ${m.join(`
186
+ `)}`)}let _=`
187
+ ${R}
188
+ ${A}
189
+
190
+ Today is ${x}. Current local time is ${E}.
191
+ Based on the above information, please determine if the statement is true.
192
+ `,b=await t.agentServices.retrieveKnowledges(e),S=[{type:"text",text:P}],k=0;if(a&&d&&d.length>0)for(let m of d)S.push({type:"image",image:m}),k++;else S.push({type:"image",image:l}),k=1;if(b&&b.length>0){let m=q(b,c);S.push(...m)}S.push({type:"text",text:_});let ee=b?de(b,c):0,te=k+ee,ne=M(i,te),F=Fe(),U=await Re({model:W(i),system:F,messages:[{role:"user",content:S}],output:Ue.object({schema:Ke}),temperature:0,providerOptions:ne}),{conclusion:B,explanation:$}=U.output,z=JSON.stringify(U.output,null,2),j=[],K=J(U.usage,i);K&&j.push(K);let C=[{role:"user",content:S.map(m=>{if(m.type==="text")return{type:"text",text:m.text};if(m.type==="image"){let g=m.image,T=je(g);if(T)return{type:"image",file:T};let f=typeof g=="string"?g:"";return{type:"image",file:f.startsWith("data:")?f:`data:image/png;base64,${f}`}}return{type:"text",text:"[unknown content type]"}})}],I={systemPrompt:F,userPrompt:C,rawLlmResponse:z,screenshotWithSom:l,tokenUsages:j,retrievedKnowledges:b&&b.length>0?b:void 0,elementTree:p};return{success:B==="true",explanation:$,debugInfo:I}}catch(a){return{success:!1,error:a.message}}}async function Be(e,t){try{if(!e.action_data)return{success:!1,error:"Action entity missing action_data"};let{action_name:n,kwargs:o}=e.action_data;if(L.has(n)){let r=await L.execute(n,o,t),i=r?.success!==!1,a=r?.error||r?.message;return{success:i,error:i?void 0:a}}else{let{page:r,agentServices:i}=t;return await(await ze()).execute(r,e,i),{success:!0,error:void 0}}}catch(n){return{success:!1,error:n.message}}}var Q=null;async function ze(){if(Q)return Q;let e=await import("./handler-TPOFKKIB.js");return Q=new e.default,Q}async function Pt(e,t,n,o={},r){let i=r||{page:t,agentServices:n,domService:new Y(n.getDomServiceOptions()),executionHistory:o.executionHistory,variables:o.variables,sensitiveKeys:o.sensitiveKeys},a=n.replaceVariables(e),s=await ie(a,i,o);return s.status==="error"?{status:"error",completed:s.goalAccomplished||!1,actionEntities:[],explanation:s.reasoning,error:s.error,debugInfo:s.debugInfo}:{status:"success",completed:s.goalAccomplished||!1,actionEntities:s.actionEntity?[s.actionEntity]:[],explanation:s.reasoning,debugInfo:s.debugInfo}}async function Ct(e,t,n,o={},r){let i=r||{page:t,agentServices:n,domService:new Y(n.getDomServiceOptions()),executionHistory:o.executionHistory,variables:o.variables,sensitiveKeys:o.sensitiveKeys},a=n.replaceVariables(e),s=await ie(a,i,o);if(s.status==="error"||!s.actionEntity)return{status:"error",completed:s.goalAccomplished||!1,actionEntities:[],explanation:s.reasoning,error:s.error||"No action generated",debugInfo:s.debugInfo};let{actionEntity:c,reasoning:u,goalAccomplished:p,debugInfo:l}=s,d=await Be(c,i);return d.success?(u&&n.addNote(u),{status:"success",completed:p||!1,actionEntities:[c],explanation:u,debugInfo:l}):{status:"error",completed:!1,actionEntities:[c],error:d.error||"Action execution failed",debugInfo:l}}async function Nt(e,t,n,o={}){let r={page:t,agentServices:n,domService:new Y(n.getDomServiceOptions()),executionHistory:o.executionHistory,variables:o.variables,sensitiveKeys:o.sensitiveKeys},i=n.replaceVariables(e);return o.useCleanScreenshotForAssertion=n.isUseCleanScreenshotForAssertion(),ye(i,r,o)}async function Lt(e,t,n,o,r={}){let{runTaskLoop:i}=await import("./task-57MAWXLN.js"),a=o?c=>{o(c)}:void 0,s=await i(e,{page:t,agentServices:n,domService:void 0,executionHistory:r.executionHistory,variables:r.variables,sensitiveKeys:r.sensitiveKeys},{maxSteps:r.maxSteps,onEvent:a,abortSignal:r.abortSignal});return{status:s.success?"success":"error",completed:s.completed,actionEntities:s.trajectory.actions,explanation:s.summary,error:s.error,tokenUsages:s.metadata.tokenUsages}}export{D as a,Be as b,Pt as c,Ct as d,Nt as e,Lt as f};