npm - @shiplightai/sdk - Versions diffs - 0.1.1 → 0.1.2 - Mend

@shiplightai/sdk 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +18 -10
package/dist/agentHelpers-MRG6DCNX.js +4 -0
package/dist/agentHelpers-MRG6DCNX.js.map +1 -0
package/dist/agentLogin-QZDVIJMB.js +4 -0
package/dist/agentLogin-QZDVIJMB.js.map +1 -0
package/dist/chunk-DIRPNR2B.js +195 -0
package/dist/chunk-DIRPNR2B.js.map +1 -0
package/dist/chunk-FWACDSD6.js +17 -0
package/dist/chunk-FWACDSD6.js.map +1 -0
package/dist/chunk-GVEDIII4.js +25 -0
package/dist/chunk-GVEDIII4.js.map +1 -0
package/dist/{chunk-UHZTPBZ3.js → chunk-N54UPO3H.js} +95 -92
package/dist/chunk-N54UPO3H.js.map +1 -0
package/dist/chunk-ODNKMWXO.js +6 -0
package/dist/chunk-ODNKMWXO.js.map +1 -0
package/dist/{chunk-GPZJYXUG.js → chunk-SSPF674P.js} +19 -6
package/dist/chunk-SSPF674P.js.map +1 -0
package/dist/chunk-USNFIQN5.js +4 -0
package/dist/chunk-USNFIQN5.js.map +1 -0
package/dist/chunk-W6S73J4I.js +4 -0
package/dist/chunk-W6S73J4I.js.map +1 -0
package/dist/handler-O7GYRDNA.js +4 -0
package/dist/handler-O7GYRDNA.js.map +1 -0
package/dist/index.js +12 -9
package/dist/index.js.map +1 -0
package/dist/task-E5YOHPFW.js +193 -0
package/dist/task-E5YOHPFW.js.map +1 -0
package/package.json +13 -13
package/dist/agentHelpers-UCLT5EKK.js +0 -1
package/dist/agentLogin-ARB3NEO4.js +0 -1
package/dist/chunk-6H2NJBNL.js +0 -1
package/dist/chunk-GDTCZALZ.js +0 -192
package/dist/chunk-KFC5I6R5.js +0 -14
package/dist/chunk-QIBDXB3J.js +0 -22
package/dist/chunk-UFLZ3URR.js +0 -1
package/dist/chunk-YR4E7JSB.js +0 -3
package/dist/handler-TPOFKKIB.js +0 -1
package/dist/task-57MAWXLN.js +0 -190

package/README.md CHANGED Viewed

@@ -16,7 +16,8 @@ import { createAgent, configureSdk } from '@shiplightai/sdk';
 // Configure SDK with API key (call once at startup)
 configureSdk({
-  env: { GOOGLE_API_KEY: process.env.GOOGLE_API_KEY },
+  env: { GOOGLE_API_KEY: process.env.GOOGLE_API_KEY! },
+  // Or for Claude: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY! }
 });
 // Create an agent
@@ -112,6 +113,9 @@ const agent = createAgent({
 |----------|-------|-------|
 | Google | `gemini-2.5-pro` | Recommended, requires `GOOGLE_API_KEY` |
 | Google | `gemini-3-pro-preview` | More powerful, higher cost |
+| Anthropic | `claude-haiku-4-5` | Fast and affordable, requires `ANTHROPIC_API_KEY` |
+| Anthropic | `claude-sonnet-4-5` | Balanced speed and capability |
+| Anthropic | `claude-opus-4-5` | Most capable |
 | OpenAI | `computer-use-preview` | Optional, for vision operations (drag and drop) |
 ### `agent.registerAction(action)`
@@ -252,11 +256,10 @@ configureSdk({
   // Environment variables (API keys)
   env: {
-    // Required: Google API key for Gemini models
-    GOOGLE_API_KEY: 'your-google-api-key',
-    // Optional: OpenAI API key for vision operations
-    // OPENAI_API_KEY: 'sk-...',
+    // At least one model provider key is required
+    GOOGLE_API_KEY: 'your-google-api-key',       // for Gemini models
+    // ANTHROPIC_API_KEY: 'your-anthropic-key',   // for Claude models
+    // OPENAI_API_KEY: 'sk-...',                  // for vision operations
   },
   // Optional: paths for logs and results
@@ -268,13 +271,18 @@ configureSdk({
 const config = getSdkConfig();
 ```
-## Environment Variables
+## API Keys
+API keys must be passed via `configureSdk({ env })` — the SDK does not read `process.env` directly.
-| Variable | Required | Description |
-|----------|----------|-------------|
-| `GOOGLE_API_KEY` | Yes | Google AI API key for Gemini models |
+| Key | Required | Description |
+|-----|----------|-------------|
+| `GOOGLE_API_KEY` | Yes* | Google AI API key for Gemini models |
+| `ANTHROPIC_API_KEY` | Yes* | Anthropic API key for Claude models |
 | `OPENAI_API_KEY` | No | OpenAI API key (for vision operations) |
+*At least one model provider key is required.
 ## License
 MIT

package/dist/agentHelpers-MRG6DCNX.js ADDED Viewed

@@ -0,0 +1,4 @@
+import { createRequire as __createRequire } from "module";
+const require = __createRequire(import.meta.url);
+import{b as t,c as o,d as r,e as p,f as i}from"./chunk-DIRPNR2B.js";import"./chunk-SSPF674P.js";import"./chunk-ODNKMWXO.js";import"./chunk-FWACDSD6.js";import"./chunk-W6S73J4I.js";export{p as evaluateStatement,t as executeAction,r as executeStep,o as generateActionStep,i as runTask};
+//# sourceMappingURL=agentHelpers-MRG6DCNX.js.map

package/dist/agentHelpers-MRG6DCNX.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}

package/dist/agentLogin-QZDVIJMB.js ADDED Viewed

@@ -0,0 +1,4 @@
+import { createRequire as __createRequire } from "module";
+const require = __createRequire(import.meta.url);
+import{c as a,d as o,e as t,f as s,g as e,h as i,i as r}from"./chunk-N54UPO3H.js";import"./chunk-USNFIQN5.js";import"./chunk-FWACDSD6.js";import"./chunk-W6S73J4I.js";export{a as LoginType,o as checkLocators,e as createUnsignedInContext,r as generateAndValidateLoginLocators,i as generateValidationLocators,s as validateLogin,t as validateLoginLocators};
+//# sourceMappingURL=agentLogin-QZDVIJMB.js.map

package/dist/agentLogin-QZDVIJMB.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}

package/dist/chunk-DIRPNR2B.js ADDED Viewed

@@ -0,0 +1,195 @@
+import { createRequire as __createRequire } from "module";
+const require = __createRequire(import.meta.url);
+import{a as j,b as B,c as F,d as W}from"./chunk-SSPF674P.js";import{a as re,g as se,i as le}from"./chunk-ODNKMWXO.js";import{a as x}from"./chunk-FWACDSD6.js";import{g as ae}from"./chunk-W6S73J4I.js";import{zodToJsonSchema as fe}from"zod-to-json-schema";function z(e){let t=fe(e,{$refStrategy:"none"});if(t.$schema&&delete t.$schema,t.type!=="object")throw new Error(`Schema must be a Zod object schema, got type: ${t.type}`);return P(t),t}function P(e){if(!(typeof e!="object"||e===null)){if(e.type==="object"&&(e.additionalProperties=!1,e.properties)){let t=Object.keys(e.properties);e.required=t;for(let i of Object.values(e.properties))P(i)}e.type==="array"&&e.items&&P(e.items);for(let t of["anyOf","oneOf","allOf"])Array.isArray(e[t])&&e[t].forEach(P);for(let[t,i]of Object.entries(e))typeof i=="object"&&i!==null&&!["properties","items","anyOf","oneOf","allOf"].includes(t)&&P(i)}}var ce=class{constructor(e){this.registry=e}getToolDefinitions(){return this.registry.getTools().filter(e=>e.availability.openai).map(e=>({type:"function",function:{name:e.name,description:e.description,parameters:z(e.schema),strict:!0}}))}getToolDefinitionsFiltered(e){let t=new Set(e);return this.registry.getTools().filter(i=>t.has(i.name)&&i.availability.openai).map(i=>({type:"function",function:{name:i.name,description:i.description,parameters:z(i.schema),strict:!0}}))}toJSON(){return this.getToolDefinitions()}getToolDefinition(e){let t=this.registry.get(e);if(t)return{type:"function",function:{name:t.name,description:t.description,parameters:z(t.schema),strict:!0}}}getToolCount(){return this.registry.size()}getToolNames(){return this.registry.getToolNames()}};function ye(e,t){let i=new Map;for(let l of t)l.hash&&l.url?i.set(l.hash,l.url):l.uuid&&l.url&&i.set(l.uuid,l.url);let n=/!\[([^\]]*)\]\(image:([a-f0-9]{64}|[a-zA-Z0-9\-]+)\)/g,a=[],o=0,s;for(;(s=n.exec(e))!==null;){let l=e.slice(o,s.index);l&&a.push({type:"text",text:l});let u=s[2];i.has(u)?a.push({type:"image",image:new URL(i.get(u))}):a.push({type:"text",text:s[0]}),o=s.index+s[0].length}let r=e.slice(o);return r&&a.push({type:"text",text:r}),a.length===0&&a.push({type:"text",text:e}),a}function be(e){let t=[];for(let i of e){let n=i.content||"",a=i.images||[],o=ye(n,a);t.length>0&&o.length>0&&t.push({type:"text",text:`
+`}),t.push(...o)}return t}var ue=!1;function V(e,t=ue){if(t){let i=be(e);if(i.length===0)return[];let n={type:"text",text:`
+<retrieved_knowledge>
+Below are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:
+`},a={type:"text",text:`
+</retrieved_knowledge>
+`};return[n,...i,a]}else{let i=e.filter(n=>!n.images||n.images.length===0).map(n=>n.content||"").filter(n=>n.length>0).join(`
+`);return i?[{type:"text",text:`
+<retrieved_knowledge>
+Below are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:
+`},{type:"text",text:i},{type:"text",text:`
+</retrieved_knowledge>
+`}]:[]}}function pe(e,t=ue){if(!t)return 0;let i=0;for(let n of e)n.images&&(i+=n.images.length);return i}function ve(){let e=new Date,t=new Intl.DateTimeFormat("en-US",{timeZone:"America/Los_Angeles",year:"numeric",month:"2-digit",day:"2-digit",hour:"2-digit",minute:"2-digit",second:"2-digit",hour12:!1,timeZoneName:"short"}).formatToParts(e),i=t.find(c=>c.type==="year").value,n=t.find(c=>c.type==="month").value,a=t.find(c=>c.type==="day").value,o=t.find(c=>c.type==="hour").value,s=t.find(c=>c.type==="minute").value,r=t.find(c=>c.type==="second").value,l=t.find(c=>c.type==="timeZoneName").value,u=String(e.getMilliseconds()).padStart(3,"0");return`${i}-${n}-${a} ${o}:${s}:${r}.${u} ${l}`}function he(e){return`# Your Role
+You are part of a end-to-end testing system that is designed to automate the testing of a website. Given an instruction in natural language, your job is to translate it into an action in the predefined actions. The instruction might not match any action in the predefined actions or might require to interact with an element that is not on the page. It's your job to detect these cases and return an empty action.
+# Rules
+## Action Selection Rules
+- If the instruction requires a specific action, you must select that action. If no action matches the specific action, you must return an empty action so that testing system can aware of the situation.
+- If asked to do nothing or ignore the instruction or something similar, you must select \`wait\` action of 1 second.
+- If asked to verify something, you must select \`verify\` action.
+- If asked to do accurate interaction, like selecting a specific chunk of text or drawing a bounding box, you must select \`perform_accurate_operation\` action.
+- If asked to scroll, you decide if you need to \`scroll\` the page or \`scroll_on_element\`. also you need to calculate how much to scroll.
+## Element Selection Rules
+- If the instruction requires to interact with a specific element, you must select that element.
+- If no element matches the specific element, you must return an empty action so that testing system can aware of the situation. NEVER click on alternative elements as a workaround. NEVER try to navigate to find the element (e.g. by scrolling, closing modals, clicking other buttons, or refreshing the page).
+- Fail fast: If the exact target element is not visible on the current page, return an empty action immediately. The testing system will handle recovery.
+- The type of the selected element doesn't have to match the target, for example, if the instruction requires to interact with an image but no image element matches, you can select a div that contains the image.
+## Instruction Completion Analysis Rules
+- Reasoning about the instruction completion is critical. You must analyze the instruction and your action to determine if your action will complete the instruction.
+## Response Format Rules
+- Respond using valid JSON format, which can be parsed by python json.loads():
+{
+    "thought": "...", // step by step reasoning of your decision making process
+    "description": "...", // detailed description of the action to be performed. (e.g. click on the 'Submit' button to submit the form)
+    "action": {"one_action_name": {// action-specific parameter}},
+    "completes_instruction": true/false // boolean indicating whether this action completes the given instruction. Set to false if the action is only partial, requires follow-up actions, or cannot fully complete the instruction.
+}
+Follow the rules above strictly.
+# Action Space
+${e}
+# Examples
+Example of \`verify\` action:
+instruction: "Verify that the page title is 'Home'"
+{
+    "thought": "I understand the instruction is to verify that the page title is 'Home'. I will use the \`verify\` action to verify the page title.", // Do not verify it yourself, just translate the instruction to the \`verify\` action
+    "description": "Verify that the page title is 'Home'",
+    "action": {"verify": {"statement": "the page title is 'Home'"}}, // the statement should be the same wording as the instruction, don't rephrase it
+    "completes_instruction": true // this action fully completes the instruction
+}
+Example of \`save_variable\` action:
+instruction: "Extract and save the page title as page_title"
+{
+    "thought": "I understand the instruction is to save the page title as page_title. The current page title is 'Home'. I will use the \`save_variable\` action to save the page title.",
+    "description": "Save the page title as variable page_title",
+    "action": {"save_variable": {"name": "page_title", "value": "Home"}}, // the value should be the same wording as the instruction, don't rephrase it
+    "completes_instruction": true // this action fully completes the instruction
+}
+Example of empty action when the target element is not on the page, or the instruction cannot be completed for any reason:
+{
+    "thought": "The user wants me to click the 'Create Entry' button. However, the current page is a sign-in page and the only interactive element is the 'Sign In' button. The 'Create Entry' button is not present on the page. The previous attempt to click this button also failed. Therefore, I cannot complete the instruction and will return an empty action.",
+    "description": "Click the 'Create Entry' button.",
+    "action": {}, // empty action object to indicate the instruction cannot be completed
+    "completes_instruction": false
+}
+`}function we(e){if(!e||e.length===0)return"";let t="";return e.forEach(([i,n],a)=>{t+=`(${a+1}) Description: ${i}
+    Feedback: ${n}
+`}),`## Additional context
+You just executed following steps in order:
+${t}`}function me(e,t,i,n,a,o,s=!1,r=ve(),l=!1,u){let c=[],d=`
+# Instruction
+"${t}"
+# Current webpage state
+## Tab information:
+${e.currentTabText}Available tabs:
+${e.tabsText}
+## Element interaction guidelines:
+   - Only use indexes that exist in the provided element list
+   - Each element has a unique index number (e.g., "[33]<button>")
+   - The bounding box and index of each element is marked on the screenshot.
+   - Elements marked with "[]Non-interactive text" are non-interactive (for context only)
+   - Elements are indented to show the structure of the element tree, with indentation level indicating depth
+   - When considering an element, also consider its children elements
+   - If an element is scrollable, it will be marked with "(SCROLLABLE)" (e.g., "[33](SCROLLABLE)<ul>"), use the \`scroll_on_element\` action to scroll on the element.
+## Interactive elements from current page:
+${e.elementsText}
+    `;if(c.push({type:"text",text:d}),s&&e.slicedScreenshotsBase64)for(let m of e.slicedScreenshotsBase64)c.push({type:"text",text:"The following images provided are sliced screenshots of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box."}),c.push({type:"image",image:m});else o&&(c.push({type:"text",text:"The following image provided is a screenshot of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box."}),c.push({type:"image",image:o}));if(a&&a.length>0){let m=V(a,l);c.push(...m)}let p="";if(p+=`
+Current local time is ${r}.
+`,i&&Object.keys(i).length>0){let m=[];for(let w of Object.keys(i))if(u?.has(w))m.push(`  - ${w}: [SENSITIVE - value hidden]`);else{let b=i[w],$=typeof b=="string"?b:JSON.stringify(b);m.push(`  - ${w}: "${$}"`)}p+=`
+## Available Data Placeholders
+The following placeholders are available for use in your actions:
+${m.join(`
+`)}
+To use them, write Jinja-like template syntax: {{ placeholder_name }}
+- Use the EXACT placeholder name as shown above
+- Do NOT use the actual value directly
+- The values shown are for context only to help you understand what data is available
+- In action descriptions, describe what the placeholder represents in natural language (e.g., "Type the first user name" instead of "Type {{ firstUserName }}")
+`}if(n&&n.length>0){let m=we(n);p+=`
+`+m}return p+=`
+Based on the above information, please determine the right action to accomplish the task.
+`,c.push({type:"text",text:p}),c}import xe from"openai";async function Se(e,t,i){await e.evaluate(()=>{let c=document.getElementById("playwright-highlight-container");c&&c.remove(),window._highlightCleanupFunctions&&(window._highlightCleanupFunctions.forEach(d=>d()),window._highlightCleanupFunctions=[])});let n=await e.screenshot({type:"png",fullPage:!1}),{default:a}=await import("sharp"),o=a(n),s=await o.metadata(),r=s.width||0,l=s.height||0;x.log(`Screenshot actual dimensions: ${r}x${l}, viewport: ${t}x${i}`);let u;return r!==t||l!==i?(x.log(`Resizing screenshot from ${r}x${l} to ${t}x${i}`),u=await o.resize(t,i).png().toBuffer()):u=n,u.toString("base64")}async function ie(e,t,i){let n=await e.evaluateHandle(l=>document.elementFromPoint(l.x,l.y),{x:t,y:i});if(!n)throw new Error(`No element found at (${t}, ${i})`);let a=n.asElement();if(!a)throw await n.dispose(),new Error(`No element found at (${t}, ${i})`);let o=await a.boundingBox();if(!o)throw await n.dispose(),new Error("Element has no bounding box");let s=t-(o.x+o.width/2),r=i-(o.y+o.height/2);return{relative_x:s,relative_y:r,element:a}}async function Te(e,t){return t?{xpath:void 0,locator:(t?await se(e,t):null)||void 0,frame_path:[]}:{xpath:void 0,locator:void 0,frame_path:[]}}async function _e(e,t){let i=null,n=null;switch(t.type){case"click":{let o=t.button==="right"?"right_click_by_coordinates":"click_by_coordinates";if(t.x===void 0||t.y===void 0)break;let s=await ie(e,t.x,t.y);i={action_name:o,kwargs:{relative_x:s.relative_x,relative_y:s.relative_y}},n=s.element;break}case"double_click":{if(t.x===void 0||t.y===void 0)break;let o=await ie(e,t.x,t.y);i={action_name:"double_click_by_coordinates",kwargs:{relative_x:o.relative_x,relative_y:o.relative_y}},n=o.element;break}case"drag":{if(!t.path||t.path.length<2)break;let o=t.path[0].x,s=t.path[0].y,r=t.path[1].x,l=t.path[1].y,u=await ie(e,o,s);i={action_name:"drag_drop",kwargs:{relative_x:u.relative_x,relative_y:u.relative_y,delta_x:r-o,delta_y:l-s}},n=u.element;break}}let a=await Te(e,n);return{action_data:i,locatorInfo:a}}async function Y(e,t,i={}){try{let{page:n}=t,a=n.viewportSize();if(!a)return{status:"error",error:"Viewport size not available"};let o=a.width,s=a.height;x.log(`Viewport size: ${o}x${s}`);let r=await Se(n,o,s),l=ae().env?.OPENAI_API_KEY;if(!l)return{status:"error",error:"OpenAI API key not found. Set OPENAI_API_KEY environment variable or configure via SDK config."};let u=new xe({apiKey:l});x.log("Sending request to OpenAI CUA...");let c=[{role:"user",content:[{type:"input_text",text:`
+You will be given an action to execute and screenshot of the current screen.
+Output one computer_call object that will accomplish this action.
+Action: ${e}
+`},{type:"input_image",detail:"auto",image_url:`data:image/png;base64,${r}`}]}],d=await u.responses.create({model:"computer-use-preview",tools:[{type:"computer-preview",display_width:o,display_height:s,environment:"browser"}],input:c,truncation:"auto",temperature:.1});x.log("Received response from OpenAI CUA");let p=d.output.filter($=>$.type==="computer_call")[0]||null;if(!p)return{status:"error",reasoning:d.output_text||""||"Invalid action generation response",error:"No computer_call action generated"};let m=p.action;x.log(`Generated action: ${JSON.stringify(m)}`);let{action_data:w,locatorInfo:b}=await _e(n,m);return w?{status:"success",actionEntity:{action_description:e,action_data:w,locator:b.locator||void 0,xpath:b.xpath||void 0,frame_path:b.frame_path},reasoning:"Action generated successfully using pure vision mode",goalAccomplished:!0}:{status:"error",error:"Failed to map action to ActionDataEntity"}}catch(n){return x.error("Error generating CUA action",n),{status:"error",error:n.message||"Failed to generate action with pure vision"}}}async function Ae(e){let t=e.context().pages(),i=null,n=[];for(let o=0;o<t.length;o++){let s=t[o];s===e&&(i=o);let r="(title unavailable)";try{r=await Promise.race([s.title(),new Promise((u,c)=>setTimeout(()=>c(new Error("timeout")),1e3))])}catch{}let l=`Tab ${o}: ${s.url()}`;r&&(l+=` - ${r.slice(0,50)}`),n.push(l)}let a=n.length>0?n.join(`
+`):"";return{currentTabText:i!==null?`Current tab: ${i}
+`:"",tabsText:a}}async function $e(e,t){let{currentTabText:i,tabsText:n}=await Ae(e);return{elementsText:t,currentUrl:e.url(),currentTitle:await e.title(),currentTabText:i,tabsText:n}}async function q(e,t){let{page:i,domService:n,agentServices:a}=e,o=typeof t=="boolean"?{useCleanScreenshot:t}:t||{},s=a.getInteractiveClassNames(),{domState:r,screenshotBase64:l,slicedScreenshotsBase64:u}=await n.getClickableElementsWithScreenshot(i,{interactiveClassNames:s,useCleanScreenshot:o.useCleanScreenshot,useSlicedScreenshots:o.useSlicedScreenshots,resizeSlicedScreenshots:o.resizeSlicedScreenshots,useAccessibilityTree:o.useAccessibilityTree,actionIntent:o.actionIntent}),c=r.elementTree.clickableElementsToString(),d=await $e(i,c);return u&&(d.slicedScreenshotsBase64=u),{domTree:c,screenshotBase64:l,slicedScreenshotsBase64:u,domState:r,pageContext:d}}function J(e,t){return e?{prompt_tokens:e.promptTokens||e.inputTokens||0,completion_tokens:e.completionTokens||e.outputTokens||0,total_tokens:e.totalTokens||0,model:t}:null}import{generateText as ke,Output as Ee}from"ai";import{z as M}from"zod";function Ie(e){let t=e.toLowerCase();return[/\b(type|enter|input|fill|write|set)\b/,/\b(text|value|field|box)\b.*\b(to|with|as)\b/,/\b(username|password|email|search|query)\b/].some(i=>i.test(t))?"input":[/\bscroll\b/,/\b(scroll|swipe)\s*(up|down|left|right)\b/,/\b(page|move)\s*(down|up)\b/].some(i=>i.test(t))?"scroll":[/\b(click|tap|press|select|choose|pick|check|toggle)\b/,/\b(open|close|submit|confirm|cancel|dismiss)\b/,/\b(button|link|menu|dropdown|checkbox|radio)\b/].some(i=>i.test(t))?"click":"all"}function Oe(e){if(!e)return null;if(e instanceof URL)return e.href;if(typeof e=="object"&&e.href)return String(e.href);if(typeof e=="object"&&typeof e.toString=="function"){let t=e.toString();if(t.startsWith("http://")||t.startsWith("https://"))return t}return typeof e=="string"&&(e.startsWith("http://")||e.startsWith("https://"))?e:null}function je(e){return e.map(t=>({role:t.role,content:Array.isArray(t.content)?t.content.map(i=>{if(i.type==="image"){let n=i.image,a=Oe(n);if(a)return{type:"image",file:a};let o=typeof n=="string"?n:"";return{type:"image",file:o.startsWith("data:")?o:`data:image/png;base64,${o}`}}return{type:"text",text:i.text}}):t.content}))}async function de(e,t,i={}){let{page:n,agentServices:a}=t,o=a.getModel(),s=i.temperature??0,r=a.retrieveKnowledges(e).catch(v=>(x.log(`Failed to retrieve knowledges: ${v}`),[])),l=a.isSlicedScreenshotsEnabled(),u=a.isResizeSlicedScreenshotsEnabled(),c=a.isKnowledgeImagesEnabled(),d=a.isAccessibilityTreeEnabled(),p=a.isActionIntentFilteringEnabled(),m=p?Ie(e):"all";p&&m!=="all"&&x.log(`Action intent filtering: detected '${m}' intent from statement`);let{screenshotBase64:w,domState:b,pageContext:$}=await q(t,{useSlicedScreenshots:l,resizeSlicedScreenshots:u,useAccessibilityTree:d,actionIntent:m});t.domState=b;let Q=new ce(j).getToolDefinitions().map(v=>{let O=v.function;return`${O.name}: ${O.description}
+Parameters: ${JSON.stringify(O.parameters,null,2)}`}).join(`
+`),D=he(Q),N=await r,C=me($,e,t.variables,t.executionHistory,N.length>0?N:void 0,w,l,void 0,c,t.sensitiveKeys),f=[];i.chatHistory&&i.chatHistory.length>0&&i.chatHistory.forEach(v=>{v.role==="user"?f.push({role:"user",content:v.content}):v.role==="assistant"&&f.push({role:"assistant",content:v.content})}),f.push({role:"user",content:C});let T=je(f),R=j.buildActionUnionSchema(),X=M.object({thought:M.string().describe("Step by step reasoning of your decision making process").optional().default(""),description:M.string().describe("Detailed description of the action to be performed").optional().default(""),action:R,completes_instruction:M.boolean().describe("Whether this action completes the given instruction").optional().default(!1)}),ee=Array.isArray(C)?C.filter(v=>v.type==="image").length:0,te=F(o,ee),H=await ke({model:B(o),system:D,messages:f,temperature:s,output:Ee.object({schema:X}),providerOptions:te}),S=H.output,L=JSON.stringify(S,null,2);re.debug(`Generate Action Raw Output: ${L}`);let K=[],U=J(H.usage,o);U&&K.push(U);let k={systemPrompt:D,userPrompt:T,rawLlmResponse:L,tokenUsages:K},_=S.thought||"",E=S.description||"",I=S.action||{},h=S.completes_instruction||!1;if(!I||Object.keys(I).length===0)return{status:"error",reasoning:_||E||"No action generated",goalAccomplished:h,error:"Agent did not generate any action",debugInfo:k};let g=Object.keys(I)[0];if(g==="done")return{status:"error",reasoning:_||E||"Task marked as done",goalAccomplished:h,error:"Agent indicated task is done without generating an action",debugInfo:k};if(g==="perform_accurate_operation")return await Y(e,t,i);let y=I[g]||{},A={};if(typeof y.element_index=="number"){let v=y.element_index;if(v<0)return{status:"error",reasoning:_||E||"No action generated",goalAccomplished:h,error:"Agent did not generate any action",debugInfo:k};let O=b.selectorMap.get(v);O&&(A=await le(n,O))}let oe=E;return g==="verify"&&(oe=e,y.statement=e),{status:"success",actionEntity:{...A,action_description:oe||_||`${g}(${JSON.stringify(y)})`,action_data:{action_name:g,kwargs:y}},reasoning:_||E,goalAccomplished:h,debugInfo:k}}import{generateText as Ne,Output as Ce}from"ai";import{z as Z}from"zod";function Re(e){if(!e)return null;if(e instanceof URL)return e.href;if(typeof e=="object"&&e.href)return String(e.href);if(typeof e=="object"&&typeof e.toString=="function"){let t=e.toString();if(t.startsWith("http://")||t.startsWith("https://"))return t}return typeof e=="string"&&(e.startsWith("http://")||e.startsWith("https://"))?e:null}var He=Z.object({screenshotDescription:Z.string().describe(`Description of the screenshot content, listing out key elements along with their Set of Mark indices,
+		and a description of their location: formatting example: [12] A red button with text "Submit", next to [11]
+[45] A modal dialog titled "Confirmation",
+		in the center of the screen`),explanation:Z.string().describe("Step by step reasoning explaining your conclusion about the statement"),conclusion:Z.enum(["true","false","unknown"]).describe("Whether the statement is true, false, or unknown if you cannot make a conclusion")});async function ne(e,t,i={}){return i.usePureVision?Y(e,t,i):de(e,t,i)}function Pe(){let e=new Date,t=e.toLocaleDateString("en-US",{weekday:"long",year:"numeric",month:"long",day:"numeric",timeZone:"America/Los_Angeles"}),i=e.toLocaleTimeString("en-US",{hour:"2-digit",minute:"2-digit",second:"2-digit",fractionalSecondDigits:3,timeZoneName:"short",timeZone:"America/Los_Angeles"});return{dateString:t,timeString:i}}function De(){return`# Role
+You are an experienced QA person for web applications.
+You are tasked to verify the validity of a given statement based on the screenshot and element tree of a web page.
+`}async function ge(e,t,i={}){let{page:n,executionHistory:a}=t,o=t.agentServices.getModel();try{let s=t.agentServices.isSlicedScreenshotsEnabled(),r=t.agentServices.isResizeSlicedScreenshotsEnabled(),l=t.agentServices.isKnowledgeImagesEnabled(),u=t.agentServices.isAccessibilityTreeEnabled(),{domTree:c,screenshotBase64:d,slicedScreenshotsBase64:p,domState:m,pageContext:w}=await q(t,{useCleanScreenshot:i.useCleanScreenshotForAssertion,useSlicedScreenshots:s,resizeSlicedScreenshots:r,useAccessibilityTree:u});t.domState=m;let b="";a&&a.length>0&&(b=`
+# Previous actions in this session:
+${a.map(([h,g],y)=>`${y+1}. Action: ${h}
+   Result: ${g}`).join(`
+`)}
+`);let{dateString:$,timeString:Q}=Pe(),D=`
+# User statement
+"${e}"
+# UI Terminology (IMPORTANT - read carefully)
+When the statement mentions a **Modal**, **Dialog**, **Popup**, or **Panel**, use this definition:
+A modal is ANY distinct UI section that:
+- Has a clear visual boundary separating it from other content
+- Contains a specific title, heading, or purpose
+- Presents options, forms, or content for user interaction
+This includes ALL of the following:
+- Traditional overlay/popup dialogs
+- Side panels (left or right)
+- Slide-out drawers
+- Bottom sheets
+- Floating panels
+- Any visually distinct section with a title
+**Important**: A side panel on the right or left side of the screen IS a modal if it has a distinct title and purpose. It does NOT need to overlay or block the main content to be considered a modal.
+NOT modals: Loading spinners, toast notifications, tooltips, main content area.
+# Current webpage state
+## Tab information:
+${w.currentTabText}Available tabs:
+${w.tabsText}
+## Element interaction guidelines:
+   - Each element has a unique index number (e.g., "[33]<button>")
+   - Elements marked with "[]Non-interactive text" are non-interactive (for context only)
+   - Elements are indented to show the structure of the element tree, with indentation level indicating depth
+## Interactive elements from current page:
+${c}
+## Screenshot
+${s&&p?"The following images are sliced screenshots of the current webpage (left, middle, right sections).":"The image provided is a screenshot of the current webpage."}
+`,N="";if(t.variables&&Object.keys(t.variables).length>0){let h=[];for(let g of Object.keys(t.variables))if(!t.sensitiveKeys?.has(g)){let y=t.variables[g],A=typeof y=="string"?y:JSON.stringify(y);h.push(`  - ${g}: "${A}"`)}h.length>0&&(N=`
+## Available Variables
+The following non-sensitive variables are available:
+${h.join(`
+`)}`)}let C=`
+${N}
+${b}
+Today is ${$}. Current local time is ${Q}.
+Based on the above information, please determine if the statement is true.
+`,f=await t.agentServices.retrieveKnowledges(e),T=[{type:"text",text:D}],R=0;if(s&&p&&p.length>0)for(let h of p)T.push({type:"image",image:h}),R++;else T.push({type:"image",image:d}),R=1;if(f&&f.length>0){let h=V(f,l);T.push(...h)}T.push({type:"text",text:C});let X=f?pe(f,l):0,ee=R+X,te=F(o,ee),H=De(),S=await Ne({model:B(o),system:H,messages:[{role:"user",content:T}],output:Ce.object({schema:He}),temperature:0,providerOptions:te}),{conclusion:L,explanation:K}=S.output,U=JSON.stringify(S.output,null,2),k=[],_=J(S.usage,o);_&&k.push(_);let E=[{role:"user",content:T.map(h=>{if(h.type==="text")return{type:"text",text:h.text};if(h.type==="image"){let g=h.image,y=Re(g);if(y)return{type:"image",file:y};let A=typeof g=="string"?g:"";return{type:"image",file:A.startsWith("data:")?A:`data:image/png;base64,${A}`}}return{type:"text",text:"[unknown content type]"}})}],I={systemPrompt:H,userPrompt:E,rawLlmResponse:U,screenshotWithSom:d,tokenUsages:k,retrievedKnowledges:f&&f.length>0?f:void 0,elementTree:c};return{success:L==="true",explanation:K,debugInfo:I}}catch(s){return{success:!1,error:s.message}}}async function Le(e,t){try{if(!e.action_data)return{success:!1,error:"Action entity missing action_data"};let{action_name:i,kwargs:n}=e.action_data;if(j.has(i)){let a=await j.execute(i,n,t),o=a?.success!==!1,s=a?.error||a?.message;return{success:o,error:o?void 0:s}}else{let{page:a,agentServices:o}=t;return await(await Ke()).execute(a,e,o),{success:!0,error:void 0}}}catch(i){return{success:!1,error:i.message}}}var G=null;async function Ke(){if(G)return G;let e=await import("./handler-O7GYRDNA.js");return G=new e.default,G}async function $t(e,t,i,n={},a){let o=a||{page:t,agentServices:i,domService:new W(i.getDomServiceOptions()),executionHistory:n.executionHistory,variables:n.variables,sensitiveKeys:n.sensitiveKeys},s=i.replaceVariables(e),r=await ne(s,o,n);return r.status==="error"?{status:"error",completed:r.goalAccomplished||!1,actionEntities:[],explanation:r.reasoning,error:r.error,debugInfo:r.debugInfo}:{status:"success",completed:r.goalAccomplished||!1,actionEntities:r.actionEntity?[r.actionEntity]:[],explanation:r.reasoning,debugInfo:r.debugInfo}}async function kt(e,t,i,n={},a){let o=a||{page:t,agentServices:i,domService:new W(i.getDomServiceOptions()),executionHistory:n.executionHistory,variables:n.variables,sensitiveKeys:n.sensitiveKeys},s=i.replaceVariables(e),r=await ne(s,o,n);if(r.status==="error"||!r.actionEntity)return{status:"error",completed:r.goalAccomplished||!1,actionEntities:[],explanation:r.reasoning,error:r.error||"No action generated",debugInfo:r.debugInfo};let{actionEntity:l,reasoning:u,goalAccomplished:c,debugInfo:d}=r,p=await Le(l,o);return p.success?(u&&i.addNote(u),{status:"success",completed:c||!1,actionEntities:[l],explanation:u,debugInfo:d}):{status:"error",completed:!1,actionEntities:[l],error:p.error||"Action execution failed",debugInfo:d}}async function Et(e,t,i,n={}){let a={page:t,agentServices:i,domService:new W(i.getDomServiceOptions()),executionHistory:n.executionHistory,variables:n.variables,sensitiveKeys:n.sensitiveKeys},o=i.replaceVariables(e);return n.useCleanScreenshotForAssertion=i.isUseCleanScreenshotForAssertion(),ge(o,a,n)}async function It(e,t,i,n,a={}){let{runTaskLoop:o}=await import("./task-E5YOHPFW.js"),s=n?l=>{n(l)}:void 0,r=await o(e,{page:t,agentServices:i,domService:void 0,executionHistory:a.executionHistory,variables:a.variables,sensitiveKeys:a.sensitiveKeys},{maxSteps:a.maxSteps,onEvent:s,abortSignal:a.abortSignal});return{status:r.success?"success":"error",completed:r.completed,actionEntities:r.trajectory.actions,explanation:r.summary,error:r.error,tokenUsages:r.metadata.tokenUsages}}export{Y as a,Le as b,$t as c,kt as d,Et as e,It as f};
+//# sourceMappingURL=chunk-DIRPNR2B.js.map

package/dist/chunk-DIRPNR2B.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../../sdk-core/src/llm_tools/schema.ts","../../sdk-core/src/llm_tools/providers/openai.ts","../../sdk-core/src/services/knowledgeService.ts","../../sdk-core/src/agent/action-generation/actionPrompts.ts","../../sdk-core/src/agent/action-generation/coordinatesBased.ts","../../sdk-core/src/utils/pageContext.ts","../../sdk-core/src/utils/tokenUsage.ts","../../sdk-core/src/agent/action-generation/elementBased.ts","../../sdk-core/src/agent/core/agentCore.ts","../../sdk-core/src/agent/agentHelpers.ts"],"sourcesContent":["/**\n * LLM Tools - Schema Utilities\n *\n * Utilities for converting Zod schemas to JSON Schema compatible with\n * OpenAI strict mode and MCP format. Based on browser-use's SchemaOptimizer.\n */\n\nimport { z } from 'zod';\nimport { zodToJsonSchema } from 'zod-to-json-schema';\n\n/**\n * Convert Zod schema to JSON Schema compatible with OpenAI strict mode\n *\n * OpenAI strict mode requirements:\n * - All objects must have `additionalProperties: false`\n * - All properties must be in the `required` array\n * - No $ref or $defs (everything must be inlined)\n * - Parameters must always be an object (even for single parameters)\n *\n * @param schema - Zod schema to convert\n * @returns JSON Schema object ready for OpenAI API\n */\nexport function zodToOpenAISchema(schema: z.ZodType): Record<string, any> {\n // Convert Zod to JSON Schema with flattened refs\n let jsonSchema = zodToJsonSchema(schema, {\n $refStrategy: 'none', // Flatten all $refs (no definitions)\n });\n\n // Remove $schema field if present (OpenAI doesn't want it)\n if (jsonSchema.$schema) {\n delete jsonSchema.$schema;\n }\n\n // Ensure the root is always an object (OpenAI requirement)\n if ((jsonSchema as any).type !== 'object') {\n // If it's not an object, the schema should already be an object type\n // This should not happen with z.object(), but just in case\n throw new Error(`Schema must be a Zod object schema, got type: ${(jsonSchema as any).type}`);\n }\n\n // Make compatible with OpenAI strict mode\n makeStrictCompatible(jsonSchema);\n\n return jsonSchema;\n}\n\n/**\n * Convert Zod schema to JSON Schema for MCP (Model Context Protocol)\n *\n * MCP is less strict than OpenAI, so we use a more permissive format\n *\n * @param schema - Zod schema to convert\n * @returns JSON Schema object for MCP\n */\nexport function zodToMCPSchema(schema: z.ZodType): Record<string, any> {\n return zodToJsonSchema(schema, {\n $refStrategy: 'none',\n });\n}\n\n/**\n * Make a JSON Schema compatible with OpenAI strict mode\n *\n * Recursively processes the schema to ensure:\n * 1. All objects have `additionalProperties: false`\n * 2. All object properties are required\n * 3. No optional fields remain\n *\n * @param schema - JSON Schema object to modify in-place\n */\nfunction makeStrictCompatible(schema: any): void {\n if (typeof schema !== 'object' || schema === null) {\n return;\n }\n\n // Handle objects\n if (schema.type === 'object') {\n // Add additionalProperties: false for strict mode\n schema.additionalProperties = false;\n\n // Make all properties required\n if (schema.properties) {\n const allProps = Object.keys(schema.properties);\n schema.required = allProps;\n\n // Recursively apply to nested properties\n for (const prop of Object.values(schema.properties)) {\n makeStrictCompatible(prop);\n }\n }\n }\n\n // Handle arrays\n if (schema.type === 'array' && schema.items) {\n makeStrictCompatible(schema.items);\n }\n\n // Handle union types (anyOf, oneOf, allOf)\n for (const key of ['anyOf', 'oneOf', 'allOf']) {\n if (Array.isArray(schema[key])) {\n schema[key].forEach(makeStrictCompatible);\n }\n }\n\n // Recursively process other nested objects\n for (const [key, value] of Object.entries(schema)) {\n if (\n typeof value === 'object' &&\n value !== null &&\n !['properties', 'items', 'anyOf', 'oneOf', 'allOf'].includes(key)\n ) {\n makeStrictCompatible(value);\n }\n }\n}\n\n/**\n * Optimize a JSON Schema by removing unnecessary metadata\n *\n * Removes fields like:\n * - title (unless inside properties)\n * - examples\n * - default values (if preserveDefaults is false)\n *\n * @param schema - JSON Schema to optimize\n * @param options - Optimization options\n * @returns Optimized schema\n */\nexport function optimizeSchema(\n schema: Record<string, any>,\n options: {\n preserveDefaults?: boolean;\n preserveDescriptions?: boolean;\n } = {}\n): Record<string, any> {\n const {\n preserveDefaults = false,\n preserveDescriptions = true,\n } = options;\n\n const optimized: Record<string, any> = {};\n\n for (const [key, value] of Object.entries(schema)) {\n // Skip metadata fields\n if (key === 'title' || key === 'examples') {\n continue;\n }\n\n // Skip default values if not preserving\n if (key === 'default' && !preserveDefaults) {\n continue;\n }\n\n // Always preserve descriptions (helpful for LLM)\n if (key === 'description' && preserveDescriptions) {\n optimized[key] = value;\n continue;\n }\n\n // Recursively optimize nested objects\n if (typeof value === 'object' && value !== null && !Array.isArray(value)) {\n optimized[key] = optimizeSchema(value, options);\n } else if (Array.isArray(value)) {\n optimized[key] = value.map((item) =>\n typeof item === 'object' && item !== null\n ? optimizeSchema(item, options)\n : item\n );\n } else {\n optimized[key] = value;\n }\n }\n\n return optimized;\n}\n\n/**\n * Validate that a schema is compatible with OpenAI strict mode\n *\n * Checks for:\n * - All objects have additionalProperties: false\n * - All properties are required\n * - No $ref or $defs\n *\n * @param schema - JSON Schema to validate\n * @returns Array of validation errors (empty if valid)\n */\nexport function validateStrictMode(schema: any): string[] {\n const errors: string[] = [];\n\n function validate(obj: any, path: string = 'root'): void {\n if (typeof obj !== 'object' || obj === null) {\n return;\n }\n\n // Check for $ref or $defs\n if (obj.$ref) {\n errors.push(`${path}: Contains $ref (not allowed in strict mode)`);\n }\n if (obj.$defs) {\n errors.push(`${path}: Contains $defs (not allowed in strict mode)`);\n }\n\n // Check objects for strict mode compliance\n if (obj.type === 'object') {\n if (obj.additionalProperties !== false) {\n errors.push(\n `${path}: Object must have additionalProperties: false`\n );\n }\n\n if (obj.properties) {\n const allProps = Object.keys(obj.properties);\n const required = obj.required || [];\n\n if (required.length !== allProps.length) {\n errors.push(\n `${path}: Not all properties are required (${required.length}/${allProps.length})`\n );\n }\n\n // Validate nested properties\n for (const [propName, propSchema] of Object.entries(obj.properties)) {\n validate(propSchema, `${path}.${propName}`);\n }\n }\n }\n\n // Validate arrays\n if (obj.type === 'array' && obj.items) {\n validate(obj.items, `${path}[]`);\n }\n\n // Validate union types\n for (const key of ['anyOf', 'oneOf', 'allOf']) {\n if (Array.isArray(obj[key])) {\n obj[key].forEach((item: any, i: number) => {\n validate(item, `${path}.${key}[${i}]`);\n });\n }\n }\n }\n\n validate(schema);\n return errors;\n}\n","/**\n * LLM Tools - OpenAI Function Calling Provider\n *\n * Adapter for exposing tools in OpenAI function calling format.\n * Converts tool registry to OpenAI-compatible tool definitions.\n */\n\nimport { ToolRegistry } from '../registry';\nimport { ToolDefinition } from '../types';\nimport { zodToOpenAISchema } from '../schema';\n\n/**\n * OpenAI Tool Provider\n *\n * Converts tools from the registry into OpenAI function calling format.\n *\n * Example usage:\n * ```typescript\n * const registry = new ToolRegistry();\n * // ... register tools\n *\n * const provider = new OpenAIToolProvider(registry);\n * const tools = provider.getToolDefinitions();\n *\n * // Use with OpenAI API\n * const response = await openai.chat.completions.create({\n * model: 'gpt-4',\n * messages: [...],\n * tools: tools,\n * });\n * ```\n */\nexport class OpenAIToolProvider {\n constructor(private registry: ToolRegistry) {}\n\n /**\n * Get OpenAI-compatible tool definitions for all registered tools\n * Only includes tools where availability.openai = true\n *\n * @returns Array of tool definitions in OpenAI format\n */\n getToolDefinitions(): ToolDefinition[] {\n return this.registry\n .getTools()\n .filter((tool) => tool.availability.openai)\n .map((tool) => ({\n type: 'function' as const,\n function: {\n name: tool.name,\n description: tool.description,\n parameters: zodToOpenAISchema(tool.schema),\n strict: true, // Enable OpenAI strict mode for better reliability\n },\n }));\n }\n\n /**\n * Get tool definitions for specific tools only\n * Only includes tools where availability.openai = true\n *\n * @param toolNames - Array of tool names to include\n * @returns Filtered array of tool definitions\n */\n getToolDefinitionsFiltered(toolNames: string[]): ToolDefinition[] {\n const toolSet = new Set(toolNames);\n\n return this.registry\n .getTools()\n .filter((tool) => toolSet.has(tool.name) && tool.availability.openai)\n .map((tool) => ({\n type: 'function' as const,\n function: {\n name: tool.name,\n description: tool.description,\n parameters: zodToOpenAISchema(tool.schema),\n strict: true,\n },\n }));\n }\n\n /**\n * Get tool definitions as plain JSON (for API serialization)\n *\n * @returns Tool definitions as JSON-serializable array\n */\n toJSON(): any[] {\n return this.getToolDefinitions();\n }\n\n /**\n * Get a single tool definition by name\n *\n * @param toolName - Name of the tool\n * @returns Tool definition or undefined if not found\n */\n getToolDefinition(toolName: string): ToolDefinition | undefined {\n const tool = this.registry.get(toolName);\n\n if (!tool) {\n return undefined;\n }\n\n return {\n type: 'function' as const,\n function: {\n name: tool.name,\n description: tool.description,\n parameters: zodToOpenAISchema(tool.schema),\n strict: true,\n },\n };\n }\n\n /**\n * Get count of available tools\n */\n getToolCount(): number {\n return this.registry.size();\n }\n\n /**\n * Get names of all available tools\n */\n getToolNames(): string[] {\n return this.registry.getToolNames();\n }\n}\n","/**\n * Knowledge Service - Retrieves and parses knowledge for action generation\n *\n * This module provides knowledge retrieval and parsing utilities, including\n * support for multimodal knowledge content (text + images).\n *\n * Ported from: webagent/agent_backend/utils/knowledge_parser.py\n */\n\nimport { TextPart, ImagePart } from 'ai';\nimport { PreloadedKnowledge } from 'shiplight-types';\n/**\n * Knowledge item structure with images\n */\nexport interface KnowledgeItem {\n /** Knowledge content with markdown image placeholders */\n content: string;\n /** Optional images referenced in the content */\n images?: Array<{\n /** Image hash (SHA-256) or UUID for backward compatibility */\n hash?: string;\n uuid?: string;\n /** Image URL (S3 presigned URL or other) */\n url: string;\n }>;\n /** Knowledge source/category */\n source?: string;\n /** Relevance score (0-1) */\n relevance?: number;\n}\n\n/**\n * Parse knowledge content and split it around image placeholders.\n *\n * This function parses markdown image syntax like `![alt](image:hash)` and\n * splits the content into text and image parts for multimodal message construction.\n *\n * @param content - The knowledge content with image placeholders like ![alt](image:uuid)\n * @param images - List of image objects with 'hash'/'uuid' and 'url' keys\n * @returns List of tuples (content_type, content) where:\n * - ('text', text_content) for text parts\n * - ('image', image_url) for images\n *\n * @example\n * Input:\n * content: \"Step 1: Click ![Login](image:abc-123) then enter password\"\n * images: [{ hash: \"abc-123\", url: \"https://s3...\" }]\n * Output:\n * [\n * ['text', 'Step 1: Click '],\n * ['image', 'https://s3...'],\n * ['text', ' then enter password']\n * ]\n */\nexport function parseKnowledgeWithImages(\n content: string,\n images: Array<{ hash?: string; uuid?: string; url: string }>\n): Array<TextPart | ImagePart> {\n // Create hash to URL mapping (supporting both 'uuid' and 'hash' fields for compatibility)\n const hashToUrl: Map<string, string> = new Map();\n\n for (const img of images) {\n // Support both 'uuid' (old) and 'hash' (new) field names\n if (img.hash && img.url) {\n hashToUrl.set(img.hash, img.url);\n } else if (img.uuid && img.url) {\n hashToUrl.set(img.uuid, img.url);\n }\n }\n\n // Pattern to match markdown images with image:hash placeholder\n // Matches: ![alt text](image:hash) where hash is 64 hex chars (SHA-256)\n // Also supports old UUID format for backward compatibility\n const pattern = /!\\[([^\\]]*)\\]\$image:([a-f0-9]{64}|[a-zA-Z0-9\\-]+)\$/g;\n\n const parts: Array<TextPart | ImagePart> = [];\n let lastEnd = 0;\n\n // Find all matches\n let match: RegExpExecArray | null;\n while ((match = pattern.exec(content)) !== null) {\n // Add text before the image\n const textBefore = content.slice(lastEnd, match.index);\n if (textBefore) {\n parts.push({ type: 'text', text: textBefore });\n }\n\n // Get the hash/uuid and find corresponding URL\n // match[1] is alt text (not used currently)\n const hashOrUuid = match[2];\n\n if (hashToUrl.has(hashOrUuid)) {\n // Add the image with its URL\n parts.push({ type: 'image', image: new URL(hashToUrl.get(hashOrUuid)!) });\n } else {\n // If no URL found, keep the original markdown\n parts.push({ type: 'text', text: match[0] });\n }\n\n lastEnd = match.index + match[0].length;\n }\n\n // Add any remaining text after the last image\n const remainingText = content.slice(lastEnd);\n if (remainingText) {\n parts.push({ type: 'text', text: remainingText });\n }\n\n // If no parts were created (no images found), return the whole content as text\n if (parts.length === 0) {\n parts.push({ type: 'text', text: content });\n }\n\n return parts;\n}\n\n/**\n * Process multiple knowledge items with images into a single list of content parts.\n *\n * @param knowledgeItems - List of knowledge items with 'content' and 'images' keys\n * @returns Flattened list of content parts ready for multimodal message construction\n */\nexport function createMultimodalContentParts(\n knowledgeItems: KnowledgeItem[]\n): Array<TextPart | ImagePart> {\n const allParts: Array<TextPart | ImagePart> = [];\n\n for (const item of knowledgeItems) {\n const content = item.content || '';\n const images = item.images || [];\n\n // Parse this knowledge item\n const parts = parseKnowledgeWithImages(content, images);\n\n // Add separator between knowledge items\n if (allParts.length > 0 && parts.length > 0) {\n allParts.push({ type: 'text', text: '\\n\\n' });\n }\n\n allParts.push(...parts);\n }\n\n return allParts;\n}\n\n/**\n * Default feature flag: Enable knowledge images in LLM prompts\n * This is the fallback when organization settings are not available.\n */\nexport const DEFAULT_ENABLE_KNOWLEDGE_IMAGES = false;\n\n/**\n * Create content parts specifically for LLM knowledge injection.\n *\n * This function wraps knowledge content with special tags and converts it to\n * text-only or multimodal format depending on the enableImages parameter.\n *\n * @param knowledgeItems - List of knowledge items with 'content' and 'images' keys\n * @param enableImages - Whether to include images (defaults to DEFAULT_ENABLE_KNOWLEDGE_IMAGES)\n * @returns Array of MessageContent parts ready for LLM consumption\n *\n * @example\n * Output format (text-only when enableImages=false):\n * [\n * { type: \"text\", text: \"<retrieved_knowledge>...\" },\n * { type: \"text\", text: \"Knowledge content...\" },\n * { type: \"text\", text: \"</retrieved_knowledge>\" }\n * ]\n *\n * Output format (multimodal when enableImages=true):\n * [\n * { type: \"text\", text: \"<retrieved_knowledge>...\" },\n * { type: \"text\", text: \"Knowledge content...\" },\n * { type: \"image\", image: URL(...) },\n * { type: \"text\", text: \"</retrieved_knowledge>\" }\n * ]\n */\nexport function createKnowledgeParts(\n knowledgeItems: KnowledgeItem[],\n enableImages: boolean = DEFAULT_ENABLE_KNOWLEDGE_IMAGES\n): Array<TextPart | ImagePart> {\n if (enableImages) {\n // Multimodal: include images\n const parts = createMultimodalContentParts(knowledgeItems);\n if (parts.length === 0) {\n return [];\n }\n\n const preamble: TextPart = {\n type: 'text',\n text: '\\n\\n<retrieved_knowledge>\\n\\nBelow are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:\\n',\n };\n\n const postamble: TextPart = {\n type: 'text',\n text: '\\n\\n</retrieved_knowledge>\\n\\n',\n };\n\n return [preamble, ...parts, postamble];\n } else {\n // Text-only: exclude knowledge items that have images (text doesn't make sense without images)\n const textOnlyItems = knowledgeItems.filter(item => !item.images || item.images.length === 0);\n\n const textContent = textOnlyItems\n .map(item => item.content || '')\n .filter(content => content.length > 0)\n .join('\\n\\n');\n\n if (!textContent) {\n return [];\n }\n\n const preamble: TextPart = {\n type: 'text',\n text: '\\n\\n<retrieved_knowledge>\\n\\nBelow are expert curated knowledge that are retrieved from the knowledge base; APPLY THESE KNOWLEDGES IF THEY ARE RELEVANT TO THE TASK:\\n',\n };\n\n const content: TextPart = {\n type: 'text',\n text: textContent,\n };\n\n const postamble: TextPart = {\n type: 'text',\n text: '\\n\\n</retrieved_knowledge>\\n\\n',\n };\n\n return [preamble, content, postamble];\n }\n}\n\n/**\n * Count the number of images in knowledge parts\n * Returns 0 if enableImages is false\n * @param knowledgeItems - List of knowledge items\n * @param enableImages - Whether images are enabled (defaults to DEFAULT_ENABLE_KNOWLEDGE_IMAGES)\n */\nexport function countKnowledgeImages(\n knowledgeItems: KnowledgeItem[],\n enableImages: boolean = DEFAULT_ENABLE_KNOWLEDGE_IMAGES\n): number {\n if (!enableImages) {\n return 0;\n }\n let count = 0;\n for (const item of knowledgeItems) {\n if (item.images) {\n count += item.images.length;\n }\n }\n return count;\n}\n\n/**\n * Knowledge Service for retrieving relevant knowledge\n */\nexport class KnowledgeService {\n /**\n * Retrieve relevant knowledge for a statement\n *\n * @param statement - Natural language statement\n * @param preloadedKnowledges - Optional preloaded knowledges from agent context\n * @returns Array of relevant knowledge items\n *\n * Implementation:\n * - If preloadedKnowledges are provided, returns all of them converted to KnowledgeItem format\n * (semantic matching is not available locally - all knowledges are included)\n * - For runtime (sandbox/runner), semantic matching should be done via API before passing here\n * - For offline mode, all knowledges are included in the export\n */\n async retrieve(_statement: string, preloadedKnowledges?: PreloadedKnowledge[]): Promise<KnowledgeItem[]> {\n // If preloaded knowledges are available, convert and return them\n if (preloadedKnowledges && preloadedKnowledges.length > 0) {\n return preloadedKnowledges.map(knowledge => ({\n content: knowledge.content,\n // Pass through images with presigned URLs (from retrieve endpoint)\n images: knowledge.images,\n source: knowledge.type,\n relevance: knowledge.isAlwaysInclude ? 1.0 : 0.5,\n }));\n }\n\n // No knowledges available\n return [];\n }\n}\n","/**\n * Action Generation Prompts\n *\n * This module provides stub implementations of prompt generation functions.\n * TODO: A colleague will provide the real prompt templates to replace these stubs.\n *\n * Structure mirrors Python implementation in webagent/agent_backend/api/actions/action_prompts.py\n */\n\nimport { UserContent } from 'ai';\nimport { KnowledgeItem, createKnowledgeParts } from '../../services/knowledgeService';\n\n/**\n * Get current time formatted for prompts using America/Los_Angeles timezone.\n * Matches Python implementation: datetime.now(ZoneInfo(\"America/Los_Angeles\"))\n *\n * @returns Formatted time string like \"2025-12-01 21:25:30.123 PST\"\n */\nfunction getCurrentTimeForPrompt(): string {\n const now = new Date();\n\n const formatter = new Intl.DateTimeFormat('en-US', {\n timeZone: 'America/Los_Angeles',\n year: 'numeric',\n month: '2-digit',\n day: '2-digit',\n hour: '2-digit',\n minute: '2-digit',\n second: '2-digit',\n hour12: false,\n timeZoneName: 'short',\n });\n\n const parts = formatter.formatToParts(now);\n const year = parts.find(p => p.type === 'year')!.value;\n const month = parts.find(p => p.type === 'month')!.value;\n const day = parts.find(p => p.type === 'day')!.value;\n const hour = parts.find(p => p.type === 'hour')!.value;\n const minute = parts.find(p => p.type === 'minute')!.value;\n const second = parts.find(p => p.type === 'second')!.value;\n const timeZoneName = parts.find(p => p.type === 'timeZoneName')!.value;\n const milliseconds = String(now.getMilliseconds()).padStart(3, '0');\n\n return `${year}-${month}-${day} ${hour}:${minute}:${second}.${milliseconds} ${timeZoneName}`;\n}\n\n/**\n * Page context information for prompt generation\n */\nexport interface PageContext {\n /** Formatted DOM elements text */\n elementsText: string;\n /** Current page URL */\n currentUrl: string;\n /** Current page title */\n currentTitle: string;\n /** Current tab information text */\n currentTabText: string;\n /** All available tabs text */\n tabsText: string;\n /** Screenshot base64 (for future use) */\n screenshotBase64?: string;\n /** Sliced screenshots base64 array (for future use) */\n slicedScreenshotsBase64?: string[];\n}\n\n// KnowledgeItem is now imported from knowledgeService.ts (removed duplicate definition)\n\n/**\n * Generate system prompt for action generation\n *\n * Python equivalent: get_action_generation_system_prompt() in action_prompts.py\n *\n * @param actionDescription - Description of available actions (from controller registry)\n * @returns System prompt string\n */\nexport function getActionGenerationSystemPrompt(\n actionDescription: string,\n): string {\n return `# Your Role\nYou are part of a end-to-end testing system that is designed to automate the testing of a website. Given an instruction in natural language, your job is to translate it into an action in the predefined actions. The instruction might not match any action in the predefined actions or might require to interact with an element that is not on the page. It's your job to detect these cases and return an empty action.\n\n# Rules\n## Action Selection Rules\n- If the instruction requires a specific action, you must select that action. If no action matches the specific action, you must return an empty action so that testing system can aware of the situation.\n- If asked to do nothing or ignore the instruction or something similar, you must select \\`wait\\` action of 1 second.\n- If asked to verify something, you must select \\`verify\\` action.\n- If asked to do accurate interaction, like selecting a specific chunk of text or drawing a bounding box, you must select \\`perform_accurate_operation\\` action.\n- If asked to scroll, you decide if you need to \\`scroll\\` the page or \\`scroll_on_element\\`. also you need to calculate how much to scroll.\n\n## Element Selection Rules\n- If the instruction requires to interact with a specific element, you must select that element.\n- If no element matches the specific element, you must return an empty action so that testing system can aware of the situation. NEVER click on alternative elements as a workaround. NEVER try to navigate to find the element (e.g. by scrolling, closing modals, clicking other buttons, or refreshing the page).\n- Fail fast: If the exact target element is not visible on the current page, return an empty action immediately. The testing system will handle recovery.\n- The type of the selected element doesn't have to match the target, for example, if the instruction requires to interact with an image but no image element matches, you can select a div that contains the image.\n\n## Instruction Completion Analysis Rules\n- Reasoning about the instruction completion is critical. You must analyze the instruction and your action to determine if your action will complete the instruction.\n\n## Response Format Rules\n- Respond using valid JSON format, which can be parsed by python json.loads():\n{\n \"thought\": \"...\", // step by step reasoning of your decision making process\n \"description\": \"...\", // detailed description of the action to be performed. (e.g. click on the 'Submit' button to submit the form)\n \"action\": {\"one_action_name\": {// action-specific parameter}},\n \"completes_instruction\": true/false // boolean indicating whether this action completes the given instruction. Set to false if the action is only partial, requires follow-up actions, or cannot fully complete the instruction.\n}\n\nFollow the rules above strictly.\n\n# Action Space\n${actionDescription}\n\n# Examples\nExample of \\`verify\\` action:\ninstruction: \"Verify that the page title is 'Home'\"\n{\n \"thought\": \"I understand the instruction is to verify that the page title is 'Home'. I will use the \\`verify\\` action to verify the page title.\", // Do not verify it yourself, just translate the instruction to the \\`verify\\` action\n \"description\": \"Verify that the page title is 'Home'\",\n \"action\": {\"verify\": {\"statement\": \"the page title is 'Home'\"}}, // the statement should be the same wording as the instruction, don't rephrase it\n \"completes_instruction\": true // this action fully completes the instruction\n}\n\nExample of \\`save_variable\\` action:\ninstruction: \"Extract and save the page title as page_title\"\n{\n \"thought\": \"I understand the instruction is to save the page title as page_title. The current page title is 'Home'. I will use the \\`save_variable\\` action to save the page title.\",\n \"description\": \"Save the page title as variable page_title\",\n \"action\": {\"save_variable\": {\"name\": \"page_title\", \"value\": \"Home\"}}, // the value should be the same wording as the instruction, don't rephrase it\n \"completes_instruction\": true // this action fully completes the instruction\n}\n\nExample of empty action when the target element is not on the page, or the instruction cannot be completed for any reason:\n{\n \"thought\": \"The user wants me to click the 'Create Entry' button. However, the current page is a sign-in page and the only interactive element is the 'Sign In' button. The 'Create Entry' button is not present on the page. The previous attempt to click this button also failed. Therefore, I cannot complete the instruction and will return an empty action.\",\n \"description\": \"Click the 'Create Entry' button.\",\n \"action\": {}, // empty action object to indicate the instruction cannot be completed\n \"completes_instruction\": false\n}\n`;\n}\n\n/**\n * Format execution history for prompt\n *\n * Python equivalent: format_execution_history() in action_prompts.py\n *\n * @param executionHistory - Array of [description, feedback] tuples\n * @returns Formatted execution history string (or empty string if none)\n */\nfunction formatExecutionHistory(\n executionHistory?: Array<[string, string]>\n): string {\n if (!executionHistory || executionHistory.length === 0) {\n return '';\n }\n\n let historyText = '';\n executionHistory.forEach(([description, feedback], index) => {\n historyText += `(${index + 1}) Description: ${description}\\n Feedback: ${feedback}\\n`;\n });\n\n return `## Additional context\nYou just executed following steps in order:\n${historyText}`;\n}\n\n/**\n * Generate user prompt for action generation (multimodal message array)\n *\n * Python equivalent: get_action_generation_user_prompt() in action_prompts.py\n *\n * @param pageContext - Current page context information\n * @param goal - The instruction/goal to accomplish\n * @param placeholderData - Placeholder data for variable substitution\n * @param executionHistory - Previous action execution history\n * @param knowledges - Retrieved knowledge items with optional images\n * @param screenshotBase64 - Screenshot base64 (for future use)\n * @param useSlicedScreenshots - Whether to use sliced screenshots\n * @param currentTime - Current time string for prompt\n * @param enableKnowledgeImages - Whether to include knowledge images\n * @param sensitiveKeys - Set of keys that are sensitive (values will be masked)\n * @returns Multimodal message content array\n */\nexport function getActionGenerationUserPrompt(\n pageContext: PageContext,\n goal: string,\n placeholderData?: Record<string, any>,\n executionHistory?: Array<[string, string]>,\n knowledges?: KnowledgeItem[],\n screenshotBase64?: string,\n useSlicedScreenshots: boolean = false,\n currentTime: string = getCurrentTimeForPrompt(),\n enableKnowledgeImages: boolean = false,\n sensitiveKeys?: Set<string>\n): UserContent {\n\n const messages: UserContent = [];\n\n // Part 1: Task and DOM state\n const firstPart = `\n# Instruction\n\"${goal}\"\n\n# Current webpage state\n## Tab information:\n${pageContext.currentTabText}Available tabs:\n${pageContext.tabsText}\n\n## Element interaction guidelines:\n - Only use indexes that exist in the provided element list\n - Each element has a unique index number (e.g., \"[33]<button>\")\n - The bounding box and index of each element is marked on the screenshot.\n - Elements marked with \"[]Non-interactive text\" are non-interactive (for context only)\n - Elements are indented to show the structure of the element tree, with indentation level indicating depth\n - When considering an element, also consider its children elements\n - If an element is scrollable, it will be marked with \"(SCROLLABLE)\" (e.g., \"[33](SCROLLABLE)<ul>\"), use the \\`scroll_on_element\\` action to scroll on the element.\n\n## Interactive elements from current page:\n${pageContext.elementsText}\n `;\n\n messages.push({\n type: 'text',\n text: firstPart,\n });\n\n // Part 2: Screenshot\n if (useSlicedScreenshots && pageContext.slicedScreenshotsBase64) {\n for (const screenshot of pageContext.slicedScreenshotsBase64) {\n messages.push({\n type: \"text\",\n text: \"The following images provided are sliced screenshots of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box.\",\n });\n messages.push({\n type: 'image',\n image: screenshot,\n });\n }\n } else if (screenshotBase64) {\n messages.push({\n type: \"text\",\n text: \"The following image provided is a screenshot of the current webpage, with interactive elements highlighted. The element index label locate at the top right corner of the bounding box.\",\n });\n messages.push({\n type: 'image',\n image: screenshotBase64\n });\n }\n\n // Part 3: Retrieved Knowledge (if provided)\n // Use the ported knowledge parsing functions\n if (knowledges && knowledges.length > 0) {\n const knowledgeParts = createKnowledgeParts(knowledges, enableKnowledgeImages);\n messages.push(...knowledgeParts);\n }\n\n // Part 4: Current time, sensitive data, execution history, ending instruction\n let endingText = '';\n\n endingText += `\\nCurrent local time is ${currentTime}.\\n`;\n\n // Part 5: Placeholder data\n if (placeholderData && Object.keys(placeholderData).length > 0) {\n const placeholderList: string[] = [];\n for (const key of Object.keys(placeholderData)) {\n const isSensitive = sensitiveKeys?.has(key);\n if (isSensitive) {\n // For sensitive variables, only show the name\n placeholderList.push(` - ${key}: [SENSITIVE - value hidden]`);\n } else {\n // For non-sensitive variables, show both name and value\n const value = placeholderData[key];\n const valueStr = typeof value === 'string' ? value : JSON.stringify(value);\n placeholderList.push(` - ${key}: \"${valueStr}\"`);\n }\n }\n\n endingText += `\n## Available Data Placeholders\nThe following placeholders are available for use in your actions:\n${placeholderList.join('\\n')}\n\nTo use them, write Jinja-like template syntax: {{ placeholder_name }}\n- Use the EXACT placeholder name as shown above\n- Do NOT use the actual value directly\n- The values shown are for context only to help you understand what data is available\n- In action descriptions, describe what the placeholder represents in natural language (e.g., \"Type the first user name\" instead of \"Type {{ firstUserName }}\")\n`;\n }\n\n // Part 6: Execution history\n if (executionHistory && executionHistory.length > 0) {\n const executionHistoryText = formatExecutionHistory(executionHistory);\n endingText += '\\n' + executionHistoryText;\n }\n\n // Part 7: Ending instruction\n endingText += '\\nBased on the above information, please determine the right action to accomplish the task.\\n';\n\n messages.push({\n type: 'text',\n text: endingText,\n });\n\n return messages;\n}\n","/**\n * Pure Vision mode action generation using OpenAI Computer Use API\n *\n * This module implements action generation based solely on screenshots,\n * without DOM parsing. It uses OpenAI's computer-use-preview model.\n */\n\nimport OpenAI from \"openai\";\nimport type {\n ResponseComputerToolCall,\n ResponseInput,\n ResponseOutputItem,\n} from \"openai/src/resources/responses/responses.js\";\nimport { ElementHandle, Page } from \"playwright\";\nimport { ActionDataEntity, ActionEntity, ActionEntityLocatorInfo } from \"../../actions/types\";\nimport { getSdkConfig } from \"../../config\";\nimport { pickBestLocatorForElement } from \"../../dom/utils/locator\";\nimport { agentLogger } from \"../../utils/agentLogger\";\nimport { AgentOptions, GeneratedAction, TaskExecutionContext } from \"../core/types\";\n\n/**\n * Prepare screenshot for pure vision mode\n * Resizes screenshot to viewport size and returns as base64\n */\nasync function prepareScreenshot(page: Page, viewportWidth: number, viewportHeight: number): Promise<string> {\n\n // Remove any existing highlights\n await page.evaluate(() => {\n const container = document.getElementById(\"playwright-highlight-container\");\n if (container) {\n container.remove();\n }\n if ((window as any)._highlightCleanupFunctions) {\n ((window as any)._highlightCleanupFunctions as (() => void)[]).forEach((fn) => fn());\n (window as any)._highlightCleanupFunctions = [];\n }\n });\n\n // Take screenshot\n const screenshot = await page.screenshot({\n type: \"png\",\n fullPage: false,\n });\n\n // Get actual screenshot dimensions and always resize to viewport size\n // (on Mac with Retina displays, screenshot may be larger than viewport)\n const { default: sharp } = await import(\"sharp\");\n const image = sharp(screenshot);\n const metadata = await image.metadata();\n const actualWidth = metadata.width || 0;\n const actualHeight = metadata.height || 0;\n\n agentLogger.log(\n `Screenshot actual dimensions: ${actualWidth}x${actualHeight}, viewport: ${viewportWidth}x${viewportHeight}`,\n );\n\n // Always resize to viewport size to match Python implementation behavior\n // This ensures consistent coordinate mapping regardless of device pixel ratio\n let resizedScreenshot: Buffer;\n if (actualWidth !== viewportWidth || actualHeight !== viewportHeight) {\n agentLogger.log(`Resizing screenshot from ${actualWidth}x${actualHeight} to ${viewportWidth}x${viewportHeight}`);\n resizedScreenshot = await image.resize(viewportWidth, viewportHeight).png().toBuffer();\n } else {\n // Even if dimensions match, use sharp to ensure consistent format\n resizedScreenshot = screenshot;\n }\n\n // Convert to base64\n const screenshotBase64 = resizedScreenshot.toString(\"base64\");\n\n return screenshotBase64;\n}\n\n/**\n * Convert absolute viewport coordinates to element-anchored coordinates\n */\nasync function convertToElementAnchoredCoordinates(\n page: Page,\n x: number,\n y: number,\n): Promise<{ relative_x: number; relative_y: number; element: ElementHandle | null }> {\n // Find element at the given coordinates\n const elementHandle = await page.evaluateHandle(\n (coords: { x: number; y: number }) => document.elementFromPoint(coords.x, coords.y),\n { x, y },\n );\n\n if (!elementHandle) {\n throw new Error(`No element found at (${x}, ${y})`);\n }\n\n const element = elementHandle.asElement();\n if (!element) {\n await elementHandle.dispose();\n throw new Error(`No element found at (${x}, ${y})`);\n }\n\n // Get element's bounding box\n const boundingBox = await element.boundingBox();\n if (!boundingBox) {\n await elementHandle.dispose();\n throw new Error(\"Element has no bounding box\");\n }\n\n // Calculate relative coordinates (centered on element)\n const relative_x = x - (boundingBox.x + boundingBox.width / 2);\n const relative_y = y - (boundingBox.y + boundingBox.height / 2);\n\n return {\n relative_x,\n relative_y,\n element,\n };\n}\n\n/**\n * Get element info (locator, xpath, etc.) for an element\n */\nasync function getElementLocatorInfo(page: Page, element: ElementHandle | null): Promise<ActionEntityLocatorInfo> {\n if (!element) {\n return {\n xpath: undefined,\n locator: undefined,\n frame_path: [],\n };\n }\n // Generate best locator\n const locator = element ? await pickBestLocatorForElement(page, element as ElementHandle<HTMLElement>) : null;\n\n return {\n xpath: undefined, // getXPath sometimes does not return the correct xpath, so we only use locator for now\n locator: locator || undefined,\n frame_path: [],\n };\n}\n\nasync function getXPath(element: ElementHandle<HTMLElement>): Promise<string> {\n // Get xpath using JavaScript\n const xpath = await element.evaluate((el: Element) => {\n function getElementPosition(currentElement: Element) {\n if (!currentElement.parentElement) {\n return 0; // No parent means no siblings\n }\n\n const tagName = currentElement.nodeName.toLowerCase();\n\n const siblings = Array.from(currentElement.parentElement.children).filter(\n (sib) => sib.nodeName.toLowerCase() === tagName,\n );\n\n if (siblings.length === 1) {\n return 0; // Only element of its type\n }\n\n const index = siblings.indexOf(currentElement) + 1; // 1-based index\n return index;\n }\n\n function getXPathTree(el: Element, stopAtBoundary = true) {\n const segments = [];\n let currentElement = el;\n\n while (currentElement && currentElement.nodeType === Node.ELEMENT_NODE) {\n // Stop if we hit a shadow root or iframe\n if (\n stopAtBoundary &&\n (currentElement.parentNode instanceof ShadowRoot || currentElement.parentNode instanceof HTMLIFrameElement)\n ) {\n break;\n }\n\n const position = getElementPosition(currentElement);\n const tagName = currentElement.nodeName.toLowerCase();\n const xpathIndex = position > 0 ? `[${position}]` : \"\";\n segments.unshift(`${tagName}${xpathIndex}`);\n\n currentElement = currentElement.parentNode as Element;\n }\n\n const result = segments.join(\"/\");\n return result;\n }\n return getXPathTree(el);\n });\n return xpath;\n}\n\n/**\n * Map OpenAI CUA action to ActionDataEntity and element info\n */\nasync function mapAction(\n page: Page,\n action: ResponseComputerToolCall[\"action\"],\n): Promise<{\n action_data: ActionDataEntity | null;\n locatorInfo: ActionEntityLocatorInfo;\n}> {\n let action_data: ActionDataEntity | null = null;\n let element: ElementHandle | null = null;\n\n switch (action.type) {\n case \"click\": {\n // Handle regular click and right-click based on button type\n const action_name = action.button === \"right\" ? \"right_click_by_coordinates\" : \"click_by_coordinates\";\n\n if (action.x === undefined || action.y === undefined) {\n break;\n }\n\n const elementAnchoredCoords = await convertToElementAnchoredCoordinates(page, action.x, action.y);\n\n action_data = {\n action_name,\n kwargs: {\n relative_x: elementAnchoredCoords.relative_x,\n relative_y: elementAnchoredCoords.relative_y,\n },\n };\n element = elementAnchoredCoords.element;\n break;\n }\n\n case \"double_click\": {\n if (action.x === undefined || action.y === undefined) {\n break;\n }\n\n const elementAnchoredCoords = await convertToElementAnchoredCoordinates(page, action.x, action.y);\n\n action_data = {\n action_name: \"double_click_by_coordinates\",\n kwargs: {\n relative_x: elementAnchoredCoords.relative_x,\n relative_y: elementAnchoredCoords.relative_y,\n },\n };\n element = elementAnchoredCoords.element;\n break;\n }\n\n case \"drag\": {\n if (!action.path || action.path.length < 2) {\n break;\n }\n\n const startX = action.path[0].x;\n const startY = action.path[0].y;\n const endX = action.path[1].x;\n const endY = action.path[1].y;\n\n const elementAnchoredCoords = await convertToElementAnchoredCoordinates(page, startX, startY);\n\n action_data = {\n action_name: \"drag_drop\",\n kwargs: {\n relative_x: elementAnchoredCoords.relative_x,\n relative_y: elementAnchoredCoords.relative_y,\n delta_x: endX - startX,\n delta_y: endY - startY,\n },\n };\n element = elementAnchoredCoords.element;\n break;\n }\n }\n\n const locatorInfo = await getElementLocatorInfo(page, element);\n\n return {\n action_data,\n locatorInfo,\n };\n}\n\n/**\n * Generate action using OpenAI Computer Use API (Pure Vision mode)\n *\n * This function uses OpenAI's computer-use-preview model to generate actions\n * based solely on screenshots, without DOM parsing.\n */\nexport async function generateAction(\n statement: string,\n context: TaskExecutionContext,\n options: AgentOptions = {},\n): Promise<GeneratedAction> {\n try {\n const { page } = context;\n\n // Get viewport size\n const viewport = page.viewportSize();\n if (!viewport) {\n return {\n status: \"error\",\n error: \"Viewport size not available\",\n };\n }\n\n const viewportWidth = viewport.width;\n const viewportHeight = viewport.height;\n\n agentLogger.log(`Viewport size: ${viewportWidth}x${viewportHeight}`);\n\n // Prepare screenshot\n const screenshotB64 = await prepareScreenshot(page, viewportWidth, viewportHeight);\n\n // Get OpenAI API key from config\n const config = getSdkConfig();\n const apiKey = config.env?.OPENAI_API_KEY;\n if (!apiKey) {\n return {\n status: \"error\",\n error: \"OpenAI API key not found. Set OPENAI_API_KEY environment variable or configure via SDK config.\",\n };\n }\n\n // Initialize OpenAI client\n const client = new OpenAI({ apiKey });\n\n agentLogger.log(\"Sending request to OpenAI CUA...\");\n\n const prompt = `\nYou will be given an action to execute and screenshot of the current screen.\nOutput one computer_call object that will accomplish this action.\nAction: ${statement}\n`;\n\n // Prepare input for OpenAI CUA API\n const cuaInput: ResponseInput = [\n {\n role: \"user\",\n content: [\n { type: \"input_text\", text: prompt },\n {\n type: \"input_image\",\n detail: \"auto\",\n image_url: `data:image/png;base64,${screenshotB64}`,\n },\n ],\n },\n ];\n\n // Call OpenAI Computer Use API\n // Note: Using 'as any' for tool type as the exact type may vary by OpenAI SDK version\n const response = await client.responses.create({\n model: \"computer-use-preview\",\n tools: [\n {\n type: \"computer-preview\",\n display_width: viewportWidth,\n display_height: viewportHeight,\n environment: \"browser\",\n },\n ],\n input: cuaInput,\n truncation: \"auto\",\n temperature: 0.1,\n });\n\n agentLogger.log(\"Received response from OpenAI CUA\");\n\n // Extract computer calls from response\n const computerCalls = response.output.filter((item: ResponseOutputItem) => item.type === \"computer_call\");\n const computerCall = computerCalls[0] || null;\n\n if (!computerCall) {\n const outputText = response.output_text || \"\";\n return {\n status: \"error\",\n reasoning: outputText || \"Invalid action generation response\",\n error: \"No computer_call action generated\",\n };\n }\n\n const action = computerCall.action;\n agentLogger.log(`Generated action: ${JSON.stringify(action)}`);\n\n // Map action to ActionDataEntity\n const { action_data, locatorInfo } = await mapAction(page, action);\n\n if (!action_data) {\n return {\n status: \"error\",\n error: \"Failed to map action to ActionDataEntity\",\n };\n }\n\n // Build ActionEntity\n const actionEntity: ActionEntity = {\n action_description: statement,\n action_data,\n locator: locatorInfo.locator || undefined,\n xpath: locatorInfo.xpath || undefined,\n frame_path: locatorInfo.frame_path,\n };\n\n return {\n status: \"success\",\n actionEntity,\n reasoning: \"Action generated successfully using pure vision mode\",\n goalAccomplished: true, // Pure vision mode always accomplishes the goal\n };\n } catch (error: any) {\n agentLogger.error(\"Error generating CUA action\", error);\n return {\n status: \"error\",\n error: error.message || \"Failed to generate action with pure vision\",\n };\n }\n}\n","import { Page } from 'playwright';\nimport { TaskExecutionContext } from '../agent/core/types';\nimport { PageContext } from '../agent/action-generation/actionPrompts';\nimport { ActionIntent, DOMState } from '../dom/types';\n\ninterface TabInformation {\n\tcurrentTabText: string;\n\ttabsText: string;\n}\n\nasync function formatTabInformation(page: Page): Promise<TabInformation> {\n\t// Get all pages from context (same as page.context().pages() in Python)\n\tconst pages = page.context().pages();\n\tlet currentTabId: number | null = null;\n\n\tconst tabsList: string[] = [];\n\tfor (let idx = 0; idx < pages.length; idx++) {\n\t\tconst p = pages[idx];\n\n\t\t// Find current tab by comparing page objects\n\t\tif (p === page) {\n\t\t\tcurrentTabId = idx;\n\t\t}\n\n\t\t// Get title with timeout to avoid hanging\n\t\tlet title = '(title unavailable)';\n\t\ttry {\n\t\t\t// Using Promise.race for timeout\n\t\t\ttitle = await Promise.race([\n\t\t\t\tp.title(),\n\t\t\t\tnew Promise<string>((_, reject) =>\n\t\t\t\t\tsetTimeout(() => reject(new Error('timeout')), 1000)\n\t\t\t\t)\n\t\t\t]);\n\t\t} catch (error) {\n\t\t\t// Keep default title\n\t\t}\n\n\t\tlet tabDesc = `Tab ${idx}: ${p.url()}`;\n\t\tif (title) {\n\t\t\ttabDesc += ` - ${title.slice(0, 50)}`;\n\t\t}\n\t\ttabsList.push(tabDesc);\n\t}\n\n\tconst tabsText = tabsList.length > 0 ? tabsList.join('\\n') : '';\n\tconst currentTabText = currentTabId !== null ? `Current tab: ${currentTabId}\\n` : '';\n\n\treturn { currentTabText, tabsText };\n}\n\n/**\n * Format page context for LLM\n */\nasync function formatPageContext(page: Page, domTree: string): Promise<PageContext> {\n\tconst { currentTabText, tabsText } = await formatTabInformation(page);\n\n\treturn {\n\t\telementsText: domTree,\n\t\tcurrentUrl: page.url(),\n\t\tcurrentTitle: await page.title(),\n\t\tcurrentTabText: currentTabText,\n\t\ttabsText: tabsText,\n\t};\n}\n\nexport interface BuildPageContextResult {\n\tdomTree: string;\n\tscreenshotBase64: string;\n\tslicedScreenshotsBase64?: string[];\n\tdomState: DOMState;\n\tpageContext: PageContext;\n}\n\nexport interface BuildPageContextOptions {\n\tuseCleanScreenshot?: boolean;\n\tuseSlicedScreenshots?: boolean;\n\tresizeSlicedScreenshots?: boolean;\n\t/** Use Chrome Accessibility Tree for element detection (experimental) */\n\tuseAccessibilityTree?: boolean;\n\t/** Action intent for filtering elements (click/input/scroll/all) */\n\tactionIntent?: ActionIntent;\n}\n\n/**\n * Build page context for action generation\n *\n * This function:\n * 1. Gets fresh DOM state and screenshot with Set-of-Mark\n * 2. Optionally slices the screenshot into 3 parts (left/middle/right)\n * 3. Formats page context for LLM prompts\n *\n * @param context - Task execution context with page, domService, and agentServices\n * @param options - Options for screenshot processing\n * @returns Complete page context data needed for action generation\n */\nexport async function buildPageContext(\n\tcontext: TaskExecutionContext,\n\toptions?: BuildPageContextOptions | boolean // boolean for backward compat (useCleanScreenshot)\n): Promise<BuildPageContextResult> {\n\tconst { page, domService, agentServices } = context;\n\n\t// Handle backward compatibility: if boolean, treat as useCleanScreenshot\n\tconst opts: BuildPageContextOptions = typeof options === 'boolean'\n\t\t? { useCleanScreenshot: options }\n\t\t: options || {};\n\n\t// Get fresh DOM state and screenshot with SOM (slicing handled in domService)\n\tconst interactiveClassNames = agentServices.getInteractiveClassNames();\n\tconst { domState, screenshotBase64, slicedScreenshotsBase64 } =\n\t\tawait domService.getClickableElementsWithScreenshot(page, {\n\t\t\tinteractiveClassNames,\n\t\t\tuseCleanScreenshot: opts.useCleanScreenshot,\n\t\t\tuseSlicedScreenshots: opts.useSlicedScreenshots,\n\t\t\tresizeSlicedScreenshots: opts.resizeSlicedScreenshots,\n\t\t\tuseAccessibilityTree: opts.useAccessibilityTree,\n\t\t\tactionIntent: opts.actionIntent,\n\t\t});\n\tconst domTree = domState.elementTree.clickableElementsToString();\n\n\t// Build browser state context\n\tconst pageContext = await formatPageContext(page, domTree);\n\n\t// Add sliced screenshots to pageContext for prompt generation\n\tif (slicedScreenshotsBase64) {\n\t\tpageContext.slicedScreenshotsBase64 = slicedScreenshotsBase64;\n\t}\n\n\treturn {\n\t\tdomTree,\n\t\tscreenshotBase64,\n\t\tslicedScreenshotsBase64,\n\t\tdomState,\n\t\tpageContext,\n\t};\n}\n\n","import { TokenUsage } from 'shiplight-types';\n\n/**\n * Convert AI SDK usage to TokenUsage format\n * Handles both OpenAI format (promptTokens/completionTokens) and Gemini format (inputTokens/outputTokens)\n */\nexport function convertUsageToTokenUsage(usage: any, model: string): TokenUsage | null {\n\tif (!usage) return null;\n\treturn {\n\t\tprompt_tokens: usage.promptTokens || usage.inputTokens || 0,\n\t\tcompletion_tokens: usage.completionTokens || usage.outputTokens || 0,\n\t\ttotal_tokens: usage.totalTokens || 0,\n\t\tmodel,\n\t};\n}","import { AssistantModelMessage, generateText, Output, UserModelMessage } from 'ai';\nimport { z } from 'zod';\nimport { ActionEntity } from '../../actions/types';\nimport { ActionIntent } from '../../dom/types';\nimport { toolRegistry } from '../../llm_tools/registry';\nimport { OpenAIToolProvider } from '../../llm_tools/providers/openai';\nimport { getActionEntityLocatorInfo } from '../../llm_tools/utils';\nimport { agentLogger } from '../../utils/agentLogger';\nimport logger from '../../utils/logger';\nimport { convertUsageToTokenUsage } from '../../utils/tokenUsage';\nimport { buildPageContext } from '../../utils/pageContext';\nimport { ActionGenerationDebugInfo, MessageForLogging, MessagePartForLogging, TokenUsage } from 'shiplight-types';\nimport { AgentOptions, GeneratedAction, TaskExecutionContext } from '../core/types';\n\nimport { getModel, getProviderOptions } from '../llm';\nimport { getActionGenerationSystemPrompt, getActionGenerationUserPrompt } from './actionPrompts';\nimport { generateAction as generateActionWithCoordinatesBased } from './coordinatesBased';\n\n/**\n * Determine action intent from a statement using simple keyword matching.\n * This is used to filter DOM elements to only those relevant to the likely action.\n *\n * @param statement - The user's goal/instruction\n * @returns ActionIntent: 'click', 'input', 'scroll', or 'all'\n */\nfunction determineActionIntent(statement: string): ActionIntent {\n\tconst lower = statement.toLowerCase();\n\n\t// Input intent: typing, entering, filling, etc.\n\tconst inputPatterns = [\n\t\t/\\b(type|enter|input|fill|write|set)\\b/,\n\t\t/\\b(text|value|field|box)\\b.*\\b(to|with|as)\\b/,\n\t\t/\\b(username|password|email|search|query)\\b/,\n\t];\n\tif (inputPatterns.some(pattern => pattern.test(lower))) {\n\t\treturn 'input';\n\t}\n\n\t// Scroll intent: scrolling, navigating down/up\n\tconst scrollPatterns = [\n\t\t/\\bscroll\\b/,\n\t\t/\\b(scroll|swipe)\\s*(up|down|left|right)\\b/,\n\t\t/\\b(page|move)\\s*(down|up)\\b/,\n\t];\n\tif (scrollPatterns.some(pattern => pattern.test(lower))) {\n\t\treturn 'scroll';\n\t}\n\n\t// Click intent: clicking, pressing, selecting, etc.\n\tconst clickPatterns = [\n\t\t/\\b(click|tap|press|select|choose|pick|check|toggle)\\b/,\n\t\t/\\b(open|close|submit|confirm|cancel|dismiss)\\b/,\n\t\t/\\b(button|link|menu|dropdown|checkbox|radio)\\b/,\n\t];\n\tif (clickPatterns.some(pattern => pattern.test(lower))) {\n\t\treturn 'click';\n\t}\n\n\t// Default to 'all' if we can't determine the intent\n\treturn 'all';\n}\n\n\n/**\n * Extract text parts from multimodal user prompt content\n * Used for debug info - excludes images to keep logs readable\n */\nfunction extractUserPromptText(content: any): string {\n\tif (typeof content === 'string') {\n\t\treturn content;\n\t}\n\tif (Array.isArray(content)) {\n\t\treturn content\n\t\t\t.filter((part: any) => part.type === 'text')\n\t\t\t.map((part: any) => part.text)\n\t\t\t.join('\\n');\n\t}\n\treturn '';\n}\n\n/**\n * Extract URL string from various image data formats\n */\nfunction extractImageUrl(imageData: any): string | null {\n\tif (!imageData) return null;\n\n\t// Native URL object\n\tif (imageData instanceof URL) {\n\t\treturn imageData.href;\n\t}\n\n\t// URL-like object with href property\n\tif (typeof imageData === 'object' && imageData.href) {\n\t\treturn String(imageData.href);\n\t}\n\n\t// URL-like object with toString method (URL objects have this)\n\tif (typeof imageData === 'object' && typeof imageData.toString === 'function') {\n\t\tconst str = imageData.toString();\n\t\tif (str.startsWith('http://') || str.startsWith('https://')) {\n\t\t\treturn str;\n\t\t}\n\t}\n\n\t// String URL\n\tif (typeof imageData === 'string' && (imageData.startsWith('http://') || imageData.startsWith('https://'))) {\n\t\treturn imageData;\n\t}\n\n\treturn null;\n}\n\n/**\n * Convert messages to logging format\n * Images are converted to data URLs (base64) or kept as regular URLs\n */\nfunction convertMessagesToLoggingFormat(messages: any[]): MessageForLogging[] {\n\treturn messages.map((msg) => ({\n\t\trole: msg.role,\n\t\tcontent: Array.isArray(msg.content)\n\t\t\t? msg.content.map((part: any): MessagePartForLogging => {\n\t\t\t\t\tif (part.type === 'image') {\n\t\t\t\t\t\tconst imageData = part.image;\n\t\t\t\t\t\t// Check if it's a URL, otherwise treat as base64\n\t\t\t\t\t\tconst imageUrl = extractImageUrl(imageData);\n\t\t\t\t\t\tif (imageUrl) {\n\t\t\t\t\t\t\treturn { type: 'image', file: imageUrl };\n\t\t\t\t\t\t}\n\t\t\t\t\t\tconst base64Data = typeof imageData === 'string' ? imageData : '';\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: 'image',\n\t\t\t\t\t\t\tfile: base64Data.startsWith('data:') ? base64Data : `data:image/png;base64,${base64Data}`,\n\t\t\t\t\t\t};\n\t\t\t\t\t}\n\t\t\t\t\treturn { type: 'text', text: part.text };\n\t\t\t })\n\t\t\t: msg.content,\n\t}));\n}\n\n/**\n * Generate a single action from LLM using JSON mode\n *\n * This is the core function that:\n * 1. Gets fresh DOM state and screenshot\n * 2. Calls LLM with browser context in JSON mode (optionally with chat history)\n * 3. Parses JSON response to extract action and completion flag\n *\n * @param statement - User's goal/instruction\n * @param context - Agent context with page, agent, domService\n * @param options - Agent options (model, temperature, chatHistory, etc.)\n * @returns Generated action result (always returns, never throws)\n */\nexport async function generateAction(\n\tstatement: string,\n\tcontext: TaskExecutionContext,\n\toptions: AgentOptions = {}\n): Promise<GeneratedAction> {\n\tconst { page, agentServices } = context;\n\tconst model = agentServices.getModel();\n\tconst temperature = options.temperature ?? 0;\n\n\t// Retrieve knowledges for this statement via callback\n\tconst knowledgePromise = agentServices\n\t\t.retrieveKnowledges(statement)\n\t\t.catch((error) => {\n\t\t\tagentLogger.log(`Failed to retrieve knowledges: ${error}`);\n\t\t\treturn [];\n\t\t});\n\n\t// Get organization settings\n\tconst useSlicedScreenshots = agentServices.isSlicedScreenshotsEnabled();\n\tconst resizeSlicedScreenshots = agentServices.isResizeSlicedScreenshotsEnabled();\n\tconst enableKnowledgeImages = agentServices.isKnowledgeImagesEnabled();\n\tconst useAccessibilityTree = agentServices.isAccessibilityTreeEnabled();\n\tconst useActionIntentFiltering = agentServices.isActionIntentFilteringEnabled();\n\n\t// Determine action intent from statement (if filtering is enabled)\n\tconst actionIntent = useActionIntentFiltering ? determineActionIntent(statement) : 'all';\n\tif (useActionIntentFiltering && actionIntent !== 'all') {\n\t\tagentLogger.log(`Action intent filtering: detected '${actionIntent}' intent from statement`);\n\t}\n\n\t// Build page context (DOM state, screenshot, page context)\n\tconst { screenshotBase64, domState, pageContext } =\n\t\tawait buildPageContext(context, { useSlicedScreenshots, resizeSlicedScreenshots, useAccessibilityTree, actionIntent });\n\tcontext.domState = domState;\n\n\t// Get tool descriptions for the prompt\n\tconst provider = new OpenAIToolProvider(toolRegistry);\n\tconst openaiTools = provider.getToolDefinitions();\n\tconst toolDescriptions = openaiTools\n\t\t.map((tool: any) => {\n\t\t\tconst func = tool.function;\n\t\t\treturn `${func.name}: ${func.description}\\nParameters: ${JSON.stringify(func.parameters, null, 2)}`;\n\t\t})\n\t\t.join('\\n\\n');\n\n\t// Build system prompt with tool descriptions\n\tconst systemPrompt = getActionGenerationSystemPrompt(toolDescriptions);\n\tconst knowledges = await knowledgePromise;\n\n\t// Build the current user prompt with browser state, execution history, and goal\n\tconst userPrompt = getActionGenerationUserPrompt(\n\t\tpageContext,\n\t\tstatement,\n\t\tcontext.variables,\n\t\tcontext.executionHistory,\n\t\tknowledges.length > 0 ? knowledges : undefined,\n\t\tscreenshotBase64,\n\t\tuseSlicedScreenshots,\n\t\tundefined, // currentTime - use default\n\t\tenableKnowledgeImages,\n\t\tcontext.sensitiveKeys\n\t);\n\n\t// Build messages array - start with chat history (if provided), then add current state\n\tconst messages: Array<UserModelMessage | AssistantModelMessage> = [];\n\n\t// Add previous conversation messages (for copilot mode)\n\tif (options.chatHistory && options.chatHistory.length > 0) {\n\t\toptions.chatHistory.forEach((msg) => {\n\t\t\tif (msg.role === 'user') {\n\t\t\t\tmessages.push({\n\t\t\t\t\trole: 'user',\n\t\t\t\t\tcontent: msg.content,\n\t\t\t\t});\n\t\t\t} else if (msg.role === 'assistant') {\n\t\t\t\tmessages.push({\n\t\t\t\t\trole: 'assistant',\n\t\t\t\t\tcontent: msg.content,\n\t\t\t\t});\n\t\t\t}\n\t\t});\n\t}\n\n\t// Add current user message with screenshot\n\tmessages.push({\n\t\trole: 'user',\n\t\tcontent: userPrompt,\n\t});\n\n\t// Create messagesForLogging for debugInfo (processes images without saving files)\n\tconst userMessagesForDebug = convertMessagesToLoggingFormat(messages);\n\n\t// Build action schema dynamically from tool registry\n\t// Each registered tool's schema is wrapped as { toolName: schema } and combined into a union\n\t// This gives Gemini explicit schema guidance while being plug-and-play for new actions\n\tconst actionSchema = toolRegistry.buildActionUnionSchema();\n\n\tconst actionResponseSchema = z.object({\n\t\tthought: z.string().describe('Step by step reasoning of your decision making process').optional().default(''),\n\t\tdescription: z.string().describe('Detailed description of the action to be performed').optional().default(''),\n\t\taction: actionSchema,\n\t\tcompletes_instruction: z.boolean().describe('Whether this action completes the given instruction').optional().default(false),\n\t});\n\n\t// Count images in userPrompt to determine provider options\n\t// Vertex AI only supports HIGH resolution for single images\n\tconst imageCount = Array.isArray(userPrompt)\n\t\t? userPrompt.filter((part: any) => part.type === 'image').length\n\t\t: 0;\n\tconst providerOptions = getProviderOptions(model, imageCount);\n\n\t// Call LLM with generateText + Output.object() to enforce structured JSON output\n\tconst result = await generateText({\n\t\tmodel: getModel(model),\n\t\tsystem: systemPrompt,\n\t\tmessages,\n\t\ttemperature,\n\t\toutput: Output.object({ schema: actionResponseSchema }),\n\t\tproviderOptions,\n\t});\n\n\tconst jsonResponse = result.output!;\n\tconst rawLlmResponse = JSON.stringify(jsonResponse, null, 2);\n\tlogger.debug(`Generate Action Raw Output: ${rawLlmResponse}`);\n\n\t// Build token usages\n\tconst tokenUsages: TokenUsage[] = [];\n\tconst tokenUsage = convertUsageToTokenUsage((result as any).usage, model);\n\tif (tokenUsage) {\n\t\ttokenUsages.push(tokenUsage);\n\t}\n\n\t// Build debug info (includes token usages)\n\tconst debugInfo: ActionGenerationDebugInfo = {\n\t\tsystemPrompt,\n\t\tuserPrompt: userMessagesForDebug,\n\t\trawLlmResponse,\n\t\t// screenshotWithSom: screenshotBase64,\n\t\t// retrievedKnowledges: knowledges.length > 0 ? knowledges : undefined,\n\t\ttokenUsages,\n\t\t// elementTree: pageContext.elementsText,\n\t};\n\n\t// Extract fields from JSON response\n\tconst thought = jsonResponse.thought || '';\n\tconst description = jsonResponse.description || '';\n\tconst action = jsonResponse.action || {};\n\tconst completesInstruction = jsonResponse.completes_instruction || false;\n\n\t// Check if action is empty or 'done'\n\tif (!action || Object.keys(action).length === 0) {\n\t\treturn {\n\t\t\tstatus: 'error',\n\t\t\treasoning: thought || description || 'No action generated',\n\t\t\tgoalAccomplished: completesInstruction,\n\t\t\terror: 'Agent did not generate any action',\n\t\t\tdebugInfo,\n\t\t};\n\t}\n\n\tconst actionName = Object.keys(action)[0];\n\tif (actionName === 'done') {\n\t\treturn {\n\t\t\tstatus: 'error',\n\t\t\treasoning: thought || description || 'Task marked as done',\n\t\t\tgoalAccomplished: completesInstruction,\n\t\t\terror: 'Agent indicated task is done without generating an action',\n\t\t\tdebugInfo,\n\t\t};\n\t}\n\n\tif (actionName === 'perform_accurate_operation') {\n\t\tconst coordinatesBasedResult = await generateActionWithCoordinatesBased(statement, context, options);\n\t\treturn coordinatesBasedResult;\n\t}\n\n\tconst actionParams = action[actionName] || {};\n\n\t// Build ActionEntity with locator info if action references an element by element_index\n\tlet locatorInfo: { locator?: string; xpath?: string; frame_path?: string[] } = {};\n\n\t// Check if action has an element_index parameter that references a DOM element\n\tif (typeof actionParams.element_index === 'number') {\n\t\tconst elementIndex = actionParams.element_index;\n\n\t\t// Check if element_index is negative (LLM didn't follow instructions to return empty action)\n\t\tif (elementIndex < 0) {\n\t\t\treturn {\n\t\t\t\tstatus: 'error',\n\t\t\t\treasoning: thought || description || 'No action generated',\n\t\t\t\tgoalAccomplished: completesInstruction,\n\t\t\t\terror: 'Agent did not generate any action',\n\t\t\t\tdebugInfo,\n\t\t\t};\n\t\t}\n\n\t\tconst domElement = domState.selectorMap.get(elementIndex);\n\n\t\tif (domElement) {\n\t\t\t// Get locator info (xpath, locator, frame_path) for the element\n\t\t\tlocatorInfo = await getActionEntityLocatorInfo(page, domElement);\n\t\t}\n\t}\n\n\t// If the action is a verification, must use the original statement as the assertion statement\n\tlet actionDescription = description;\n\tif (actionName === 'verify') {\n\t\tactionDescription = statement;\n\t\tactionParams['statement'] = statement;\n\t}\n\n\tconst actionEntity: ActionEntity = {\n\t\t...locatorInfo,\n\t\taction_description: actionDescription || thought || `${actionName}(${JSON.stringify(actionParams)})`,\n\t\taction_data: {\n\t\t\taction_name: actionName,\n\t\t\tkwargs: actionParams,\n\t\t},\n\t};\n\n\treturn {\n\t\tstatus: 'success',\n\t\tactionEntity,\n\t\treasoning: thought || description,\n\t\tgoalAccomplished: completesInstruction,\n\t\tdebugInfo,\n\t};\n}\n\n","/**\n * Core agent logic for action generation and evaluation\n *\n * This module contains the reusable logic for:\n * - Getting DOM state and screenshot with SOM\n * - Building browser context for LLM\n * - Calling LLM with structured output\n * - Action generation and assertion evaluation\n */\n\nimport { generateText, Output, UserContent } from 'ai';\nimport { z } from 'zod';\nimport { createKnowledgeParts, countKnowledgeImages } from '../../services/knowledgeService';\nimport { buildPageContext } from '../../utils/pageContext';\nimport { convertUsageToTokenUsage } from '../../utils/tokenUsage';\nimport { generateAction as generateActionWithCoordinatesBased } from '../action-generation/coordinatesBased';\nimport { generateAction as generateActionWithElementBased } from '../action-generation/elementBased';\nimport { getModel, getProviderOptions } from '../llm';\nimport { AgentOptions, AssertionResult, GeneratedAction, TaskExecutionContext } from '../core/types';\nimport { ActionGenerationDebugInfo, MessageForLogging, MessagePartForLogging, TokenUsage } from 'shiplight-types';\n\n/**\n * Extract URL string from various image data formats\n */\nfunction extractImageUrl(imageData: any): string | null {\n\tif (!imageData) return null;\n\n\t// Native URL object\n\tif (imageData instanceof URL) {\n\t\treturn imageData.href;\n\t}\n\n\t// URL-like object with href property\n\tif (typeof imageData === 'object' && imageData.href) {\n\t\treturn String(imageData.href);\n\t}\n\n\t// URL-like object with toString method (URL objects have this)\n\tif (typeof imageData === 'object' && typeof imageData.toString === 'function') {\n\t\tconst str = imageData.toString();\n\t\tif (str.startsWith('http://') || str.startsWith('https://')) {\n\t\t\treturn str;\n\t\t}\n\t}\n\n\t// String URL\n\tif (typeof imageData === 'string' && (imageData.startsWith('http://') || imageData.startsWith('https://'))) {\n\t\treturn imageData;\n\t}\n\n\treturn null;\n}\n\n// Schema for assertion evaluation response\nconst assertionResponseSchema = z.object({\n\tscreenshotDescription: z.string().describe(`Description of the screenshot content, listing out key elements along with their Set of Mark indices, \n\t\tand a description of their location: formatting example: [12] A red button with text \"Submit\", next to [11]\\n[45] A modal dialog titled \"Confirmation\", \n\t\tin the center of the screen`),\n\texplanation: z.string().describe('Step by step reasoning explaining your conclusion about the statement'),\n\tconclusion: z.enum(['true', 'false', 'unknown']).describe('Whether the statement is true, false, or unknown if you cannot make a conclusion'),\n});\n\nexport async function generateAction(\n\tstatement: string,\n\tcontext: TaskExecutionContext,\n\toptions: AgentOptions = {}\n): Promise<GeneratedAction> {\n\tif (options.usePureVision) {\n\t\treturn generateActionWithCoordinatesBased(statement, context, options);\n\t} else {\n\t\treturn generateActionWithElementBased(statement, context, options);\n\t}\n}\n\n/**\n * Get the current date/time formatted for prompts\n */\nfunction getCurrentTimeInfo(): { dateString: string; timeString: string } {\n\tconst now = new Date();\n\tconst dateString = now.toLocaleDateString('en-US', {\n\t\tweekday: 'long',\n\t\tyear: 'numeric',\n\t\tmonth: 'long',\n\t\tday: 'numeric',\n\t\ttimeZone: 'America/Los_Angeles',\n\t});\n\tconst timeString = now.toLocaleTimeString('en-US', {\n\t\thour: '2-digit',\n\t\tminute: '2-digit',\n\t\tsecond: '2-digit',\n\t\tfractionalSecondDigits: 3,\n\t\ttimeZoneName: 'short',\n\t\ttimeZone: 'America/Los_Angeles',\n\t});\n\treturn { dateString, timeString };\n}\n\n/**\n * Build the evaluation system prompt\n * Note: Keep this minimal - Gemini 2.5 Pro follows user prompt instructions more reliably.\n * UI terminology and detailed instructions are in the user prompt instead.\n */\nfunction getEvaluationSystemPrompt(): string {\n\treturn `# Role\nYou are an experienced QA person for web applications.\nYou are tasked to verify the validity of a given statement based on the screenshot and element tree of a web page.\n`;\n}\n\n/**\n * Evaluate a statement about the current page state using LLM\n *\n * This is the core LLM function that:\n * 1. Gets current DOM state and screenshot\n * 2. Calls LLM to evaluate if the statement is true or false\n * 3. Returns the result with explanation\n *\n * @param statement - Statement to evaluate (already resolved, no $variables)\n * @param context - Task execution context with page, domService, etc.\n * @param options - Agent options (model, temperature, etc.)\n * @returns AssertionResult with success flag and explanation\n */\nexport async function evaluate(\n\tstatement: string,\n\tcontext: TaskExecutionContext,\n\toptions: AgentOptions = {}\n): Promise<AssertionResult> {\n\tconst { page, executionHistory } = context;\n\tconst model = context.agentServices.getModel();\n\n\ttry {\n\t\t// Get organization settings\n\t\tconst useSlicedScreenshots = context.agentServices.isSlicedScreenshotsEnabled();\n\t\tconst resizeSlicedScreenshots = context.agentServices.isResizeSlicedScreenshotsEnabled();\n\t\tconst enableKnowledgeImages = context.agentServices.isKnowledgeImagesEnabled();\n\t\tconst useAccessibilityTree = context.agentServices.isAccessibilityTreeEnabled();\n\n\t\t// Get current page state\n\t\tconst { domTree, screenshotBase64, slicedScreenshotsBase64, domState, pageContext } = await buildPageContext(context, {\n\t\t\tuseCleanScreenshot: options.useCleanScreenshotForAssertion,\n\t\t\tuseSlicedScreenshots,\n\t\t\tresizeSlicedScreenshots,\n\t\t\tuseAccessibilityTree,\n\t\t});\n\t\tcontext.domState = domState;\n\n\t\t// Build execution history section if available\n\t\tlet executionHistoryText = '';\n\t\tif (executionHistory && executionHistory.length > 0) {\n\t\t\tconst historyLines = executionHistory.map(([action, result], idx) =>\n\t\t\t\t`${idx + 1}. Action: ${action}\\n Result: ${result}`\n\t\t\t).join('\\n');\n\t\t\texecutionHistoryText = `\\n# Previous actions in this session:\\n${historyLines}\\n`;\n\t\t}\n\n\t\tconst { dateString, timeString } = getCurrentTimeInfo();\n\n\t\t// Build user prompt with UI terminology (Gemini 2.5 Pro follows user prompt more reliably than system prompt)\n\t\tconst userPrompt = `\n# User statement\n\"${statement}\"\n\n# UI Terminology (IMPORTANT - read carefully)\nWhen the statement mentions a **Modal**, **Dialog**, **Popup**, or **Panel**, use this definition:\n\nA modal is ANY distinct UI section that:\n- Has a clear visual boundary separating it from other content\n- Contains a specific title, heading, or purpose\n- Presents options, forms, or content for user interaction\n\nThis includes ALL of the following:\n- Traditional overlay/popup dialogs\n- Side panels (left or right)\n- Slide-out drawers\n- Bottom sheets\n- Floating panels\n- Any visually distinct section with a title\n\n**Important**: A side panel on the right or left side of the screen IS a modal if it has a distinct title and purpose. It does NOT need to overlay or block the main content to be considered a modal.\n\nNOT modals: Loading spinners, toast notifications, tooltips, main content area.\n\n# Current webpage state\n## Tab information:\n${pageContext.currentTabText}Available tabs:\n${pageContext.tabsText}\n\n## Element interaction guidelines:\n - Each element has a unique index number (e.g., \"[33]<button>\")\n - Elements marked with \"[]Non-interactive text\" are non-interactive (for context only)\n - Elements are indented to show the structure of the element tree, with indentation level indicating depth\n\n## Interactive elements from current page:\n${domTree}\n\n## Screenshot\n${useSlicedScreenshots && slicedScreenshotsBase64 ? 'The following images are sliced screenshots of the current webpage (left, middle, right sections).' : 'The image provided is a screenshot of the current webpage.'}\n`;\n\n\t\t// Add available variables context (non-sensitive only)\n\t\tlet variablesContext = '';\n\t\tif (context.variables && Object.keys(context.variables).length > 0) {\n\t\t\tconst variableList: string[] = [];\n\t\t\tfor (const key of Object.keys(context.variables)) {\n\t\t\t\tconst isSensitive = context.sensitiveKeys?.has(key);\n\t\t\t\tif (!isSensitive) {\n\t\t\t\t\t// Only show non-sensitive variables\n\t\t\t\t\tconst value = context.variables[key];\n\t\t\t\t\tconst valueStr = typeof value === 'string' ? value : JSON.stringify(value);\n\t\t\t\t\tvariableList.push(` - ${key}: \"${valueStr}\"`);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (variableList.length > 0) {\n\t\t\t\tvariablesContext = `\n## Available Variables\nThe following non-sensitive variables are available:\n${variableList.join('\\n')}`;\n\t\t\t}\n\t\t}\n\n\t\tconst endingPrompt = `\n${variablesContext}\n${executionHistoryText}\n\nToday is ${dateString}. Current local time is ${timeString}.\nBased on the above information, please determine if the statement is true.\n`;\n\n\t\t// Retrieve knowledge via callback (matching Python assertion_prompts.py behavior)\n\t\tconst knowledges = await context.agentServices.retrieveKnowledges(statement);\n\n\t\t// Build user message content array\n\t\tconst userMessageContent: UserContent = [\n\t\t\t{ type: 'text', text: userPrompt },\n\t\t];\n\n\t\t// Add screenshot(s)\n\t\tlet screenshotCount = 0;\n\t\tif (useSlicedScreenshots && slicedScreenshotsBase64 && slicedScreenshotsBase64.length > 0) {\n\t\t\t// Add sliced screenshots\n\t\t\tfor (const slice of slicedScreenshotsBase64) {\n\t\t\t\tuserMessageContent.push({ type: 'image', image: slice });\n\t\t\t\tscreenshotCount++;\n\t\t\t}\n\t\t} else {\n\t\t\t// Add single screenshot\n\t\t\tuserMessageContent.push({ type: 'image', image: screenshotBase64 });\n\t\t\tscreenshotCount = 1;\n\t\t}\n\n\t\t// Add knowledge parts if available (matching Python: initial_parts + knowledge_pieces + [ending])\n\t\tif (knowledges && knowledges.length > 0) {\n\t\t\tconst knowledgeParts = createKnowledgeParts(knowledges, enableKnowledgeImages);\n\t\t\tuserMessageContent.push(...knowledgeParts);\n\t\t}\n\n\t\tuserMessageContent.push({ type: 'text', text: endingPrompt });\n\n\t\t// Count images: screenshots + knowledge images\n\t\tconst knowledgeImageCount = knowledges ? countKnowledgeImages(knowledges, enableKnowledgeImages) : 0;\n\t\tconst totalImageCount = screenshotCount + knowledgeImageCount;\n\t\tconst providerOptions = getProviderOptions(model, totalImageCount);\n\n\t\tconst systemPrompt = getEvaluationSystemPrompt();\n\n\t\tconst result = await generateText({\n\t\t\tmodel: getModel(model),\n\t\t\tsystem: systemPrompt,\n\t\t\tmessages: [\n\t\t\t\t{\n\t\t\t\t\trole: 'user',\n\t\t\t\t\tcontent: userMessageContent,\n\t\t\t\t},\n\t\t\t],\n\t\t\toutput: Output.object({ schema: assertionResponseSchema }),\n\t\t\ttemperature: 0,\n\t\t\tproviderOptions,\n\t\t});\n\n\t\tconst { conclusion, explanation } = result.output!;\n\t\tconst rawLlmResponse = JSON.stringify(result.output, null, 2);\n\n\t\t// Capture token usage\n\t\tconst tokenUsages: TokenUsage[] = [];\n\t\tconst tokenUsage = convertUsageToTokenUsage((result as any).usage, model);\n\t\tif (tokenUsage) {\n\t\t\ttokenUsages.push(tokenUsage);\n\t\t}\n\n\t\t// Convert actual userMessageContent to logging format (captures exactly what was sent to LLM)\n\t\tconst userPromptForLogging: MessageForLogging[] = [\n\t\t\t{\n\t\t\t\trole: 'user',\n\t\t\t\tcontent: userMessageContent.map((part): MessagePartForLogging => {\n\t\t\t\t\tif (part.type === 'text') {\n\t\t\t\t\t\treturn { type: 'text' as const, text: part.text };\n\t\t\t\t\t} else if (part.type === 'image') {\n\t\t\t\t\t\tconst imageData = (part as any).image;\n\t\t\t\t\t\t// Check if it's a URL, otherwise treat as base64\n\t\t\t\t\t\tconst imageUrl = extractImageUrl(imageData);\n\t\t\t\t\t\tif (imageUrl) {\n\t\t\t\t\t\t\treturn { type: 'image' as const, file: imageUrl };\n\t\t\t\t\t\t}\n\t\t\t\t\t\tconst base64Data = typeof imageData === 'string' ? imageData : '';\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: 'image' as const,\n\t\t\t\t\t\t\tfile: base64Data.startsWith('data:') ? base64Data : `data:image/png;base64,${base64Data}`,\n\t\t\t\t\t\t};\n\t\t\t\t\t}\n\t\t\t\t\treturn { type: 'text' as const, text: '[unknown content type]' };\n\t\t\t\t}),\n\t\t\t},\n\t\t];\n\n\t\t// Build debug info\n\t\tconst debugInfo: ActionGenerationDebugInfo = {\n\t\t\tsystemPrompt,\n\t\t\tuserPrompt: userPromptForLogging,\n\t\t\trawLlmResponse,\n\t\t\tscreenshotWithSom: screenshotBase64,\n\t\t\ttokenUsages,\n\t\t\tretrievedKnowledges: knowledges && knowledges.length > 0 ? knowledges : undefined,\n\t\t\telementTree: domTree,\n\t\t};\n\n\t\treturn {\n\t\t\tsuccess: conclusion === 'true',\n\t\t\texplanation,\n\t\t\tdebugInfo,\n\t\t};\n\t} catch (error) {\n\t\treturn {\n\t\t\tsuccess: false,\n\t\t\terror: (error as Error).message,\n\t\t};\n\t}\n}\n","/**\n * Local Web Agent - TypeScript implementation of webagent API calls\n *\n * This module provides local implementations of:\n * - executeStep: Single-step action generation and execution\n * - assertStatement: Assertion evaluation without execution\n * - runStep: Multi-step agent execution with streaming events\n *\n * These replace the need for remote webagent API calls.\n */\n\nimport { Page } from 'playwright';\nimport type { ActionHandler } from '../actions/handler';\nimport { ActionEntity } from '../actions/types';\nimport { DomService } from '../dom';\nimport { toolRegistry } from '../llm_tools/registry';\nimport { AgentServices } from './agentServices';\nimport { evaluate, generateAction } from './core/agentCore';\nimport {\n\tAgentEvent,\n\tAgentOptions,\n\tAssertionResult,\n\tStepResult,\n\tTaskExecutionContext,\n} from './core/types';\nimport { TaskExecutionOptions } from './task/types';\n\nexport async function executeAction(\n\tactionEntity: ActionEntity,\n\tcontext: TaskExecutionContext,\n): Promise<{ success: boolean; error?: string }> {\n\ttry {\n\t\tif (!actionEntity.action_data) {\n\t\t\treturn {\n\t\t\t\tsuccess: false,\n\t\t\t\terror: 'Action entity missing action_data',\n\t\t\t};\n\t\t}\n\t\t\n\t\tconst { action_name, kwargs } = actionEntity.action_data;\n\t\tif (toolRegistry.has(action_name)) {\n\t\t\tconst result = await toolRegistry.execute(action_name, kwargs, context);\n\t\t\tconst success = (result as any)?.success !== false;\n\t\t\tconst error = (result as any)?.error || (result as any)?.message;\n\t\t\treturn {\n\t\t\t\tsuccess,\n\t\t\t\terror: success ? undefined : error,\n\t\t\t};\n\t\t} else {\n\t\t\tconst { page, agentServices } = context;\n\t\t\tconst handler = await getActionHandler();\n\t\t\tawait handler.execute(page, actionEntity, agentServices);\n\t\n\t\t\treturn {\n\t\t\t\tsuccess: true,\n\t\t\t\terror: undefined,\n\t\t\t}\n\t\t}\n\t} catch (error) {\n\t\treturn {\n\t\t\tsuccess: false,\n\t\t\terror: (error as Error).message,\n\t\t};\n\t}\n}\n\n/**\n * Lazy-load ActionHandler for vision-based (coordinates) automation.\n *\n * Uses dynamic import to defer loading until needed, avoiding circular dependencies.\n * This function is only called when usePureVision=true, so most code paths won't\n * load the ActionHandler at all.\n *\n * Pattern: Cache the instance after first creation for reuse.\n */\nlet cachedActionHandler: ActionHandler | null = null;\nasync function getActionHandler(): Promise<ActionHandler> {\n\tif (cachedActionHandler) {\n\t\treturn cachedActionHandler;\n\t}\n\n\tconst handlerModule: typeof import('../actions/handler') = await import('../actions/handler');\n\tcachedActionHandler = new handlerModule.default();\n\treturn cachedActionHandler;\n}\n\n/**\n * Generate a single action without executing it\n *\n * This is the core function for action generation only:\n * 1. Call LLM with current browser state\n * 2. LLM picks ONE action from available tools\n * 3. Return the generated action WITHOUT executing it\n * 4. Return result with action entity and goal completion flag\n *\n * @param statement - User's goal/instruction in natural language\n * @param page - Playwright Page instance\n * @param agent - Agent instance for helper methods\n * @param options - Agent options (model, temperature, etc.)\n * @returns StepResult with action entity and completion flag (NOT executed)\n */\nexport async function generateActionStep(\n\tstatement: string,\n\tpage: Page,\n\tagentServices: AgentServices,\n\toptions: AgentOptions = {},\n\texistingContext?: TaskExecutionContext\n): Promise<StepResult> {\n\t// Use existing context if provided, otherwise create new\n\tconst context: TaskExecutionContext = existingContext || {\n\t\tpage,\n\t\tagentServices,\n\t\tdomService: new DomService(agentServices.getDomServiceOptions()),\n\t\texecutionHistory: options.executionHistory,\n\t\tvariables: options.variables,\n\t\tsensitiveKeys: options.sensitiveKeys,\n\t};\n\n\t// Replace $variable placeholders with actual values before calling LLM\n\tconst resolvedStatement = agentServices.replaceVariables(statement);\n\n\t// Generate action from LLM (always returns, never throws)\n\tconst generatedAction = await generateAction(resolvedStatement, context, options);\n\n\t// If generation failed, return error with explanation\n\tif (generatedAction.status === 'error') {\n\t\treturn {\n\t\t\tstatus: 'error',\n\t\t\tcompleted: generatedAction.goalAccomplished || false,\n\t\t\tactionEntities: [],\n\t\t\texplanation: generatedAction.reasoning,\n\t\t\terror: generatedAction.error,\n\t\t\tdebugInfo: generatedAction.debugInfo,\n\t\t};\n\t}\n\n\t// Return the generated action WITHOUT executing it\n\treturn {\n\t\tstatus: 'success',\n\t\tcompleted: generatedAction.goalAccomplished || false,\n\t\tactionEntities: generatedAction.actionEntity ? [generatedAction.actionEntity] : [],\n\t\texplanation: generatedAction.reasoning,\n\t\tdebugInfo: generatedAction.debugInfo,\n\t};\n}\n\n/**\n * Execute a single step - takes a statement, LLM picks ONE action, executes it\n *\n * This is the core function for single-step execution:\n * 1. Call LLM with current browser state\n * 2. LLM picks ONE action from available tools\n * 3. Execute that action\n * 4. Return result with status and completion flag\n *\n * @param statement - User's goal/instruction in natural language\n * @param page - Playwright Page instance\n * @param agent - Agent instance for helper methods\n * @param options - Agent options (model, temperature, etc.)\n * @returns StepResult with status, completion flag, and action entity\n */\nexport async function executeStep(\n\tstatement: string,\n\tpage: Page,\n\tagentServices: AgentServices,\n\toptions: AgentOptions = {},\n\texistingContext?: TaskExecutionContext\n): Promise<StepResult> {\n\t// Use existing context if provided (for session log folder), otherwise create new\n\tconst context: TaskExecutionContext = existingContext || {\n\t\tpage,\n\t\tagentServices,\n\t\tdomService: new DomService(agentServices.getDomServiceOptions()),\n\t\texecutionHistory: options.executionHistory,\n\t\tvariables: options.variables,\n\t\tsensitiveKeys: options.sensitiveKeys,\n\t};\n\n\t// Replace $variable placeholders with actual values before calling LLM\n\tconst resolvedStatement = agentServices.replaceVariables(statement);\n\n\t// Generate action from LLM (always returns, never throws)\n\tconst generatedAction = await generateAction(resolvedStatement, context, options);\n\n\t// If generation failed, return error with explanation\n\tif (generatedAction.status === 'error' || !generatedAction.actionEntity) {\n\t\treturn {\n\t\t\tstatus: 'error',\n\t\t\tcompleted: generatedAction.goalAccomplished || false,\n\t\t\tactionEntities: [],\n\t\t\texplanation: generatedAction.reasoning,\n\t\t\terror: generatedAction.error || 'No action generated',\n\t\t\tdebugInfo: generatedAction.debugInfo,\n\t\t};\n\t}\n\n\tconst { actionEntity, reasoning, goalAccomplished, debugInfo } = generatedAction;\n\n\t// Execute the action by calling the tool directly\n\tconst executionResult = await executeAction(actionEntity, context);\n\n\t// If execution failed, return error result\n\tif (!executionResult.success) {\n\t\treturn {\n\t\t\tstatus: 'error',\n\t\t\tcompleted: false,\n\t\t\tactionEntities: [actionEntity],\n\t\t\terror: executionResult.error || 'Action execution failed',\n\t\t\tdebugInfo,\n\t\t};\n\t}\n\n\t// Set agentNote with the explanation so it propagates back to the caller\n\tif (reasoning) {\n\t\tagentServices.addNote(reasoning);\n\t}\n\n\t// Use the goal completion analysis from the LLM (already determined during action generation)\n\treturn {\n\t\tstatus: 'success',\n\t\tcompleted: goalAccomplished || false,\n\t\tactionEntities: [actionEntity],\n\t\texplanation: reasoning,\n\t\tdebugInfo,\n\t};\n}\n\n/**\n * Evaluate a statement - evaluation only, no execution\n *\n * This evaluates whether a condition is true without performing any actions.\n * Renamed from assertStatement to match the IAgent.evaluate() naming convention.\n *\n * @param statement - Evaluation statement (e.g., \"The login button is visible\")\n * @param page - Playwright Page instance\n * @param agent - Agent instance\n * @param options - Agent options\n * @returns AssertionResult with success flag and explanation\n */\nexport async function evaluateStatement(\n\tstatement: string,\n\tpage: Page,\n\tagentServices: AgentServices,\n\toptions: AgentOptions = {}\n): Promise<AssertionResult> {\n\t// Build context for the core evaluate function\n\tconst context: TaskExecutionContext = {\n\t\tpage,\n\t\tagentServices,\n\t\tdomService: new DomService(agentServices.getDomServiceOptions()),\n\t\texecutionHistory: options.executionHistory,\n\t\tvariables: options.variables,\n\t\tsensitiveKeys: options.sensitiveKeys,\n\t};\n\n\t// Replace $variable placeholders (e.g., $username, $password) with actual values\n\tconst resolvedStatement = agentServices.replaceVariables(statement);\n\toptions.useCleanScreenshotForAssertion = agentServices.isUseCleanScreenshotForAssertion();\n\t// Call core LLM function\n\treturn evaluate(resolvedStatement, context, options);\n}\n\n/**\n * Run a complete task - executes multiple steps until goal is accomplished\n *\n * This is the main agent loop with intelligent task execution:\n * 1. Thinking and reasoning at each step\n * 2. Evaluation of previous actions\n * 3. Memory tracking across steps\n * 4. Goal decomposition\n * 5. Error recovery with retry logic\n *\n * @param task - User's goal/instruction (complete task description)\n * @param page - Playwright Page instance\n * @param agent - Agent instance\n * @param onEvent - Callback for streaming events\n * @param options - Agent options (maxSteps, model, chatHistory, etc.)\n * @returns Final StepResult with chatSummary for conversation history\n */\nexport async function runTask(\n\ttask: string,\n\tpage: Page,\n\tagentServices: AgentServices,\n\tonEvent?: (event: AgentEvent) => void,\n\toptions: AgentOptions = {}\n): Promise<StepResult> {\n\t// Use the task executor\n\tconst { runTaskLoop } = await import('./task');\n\n\t// Convert AgentEvent to ITaskAgentEvent\n\tconst taskAgentOnEvent = onEvent\n\t\t? (event: any) => {\n\t\t\t\t// Map ITaskAgentEvent to AgentEvent\n\t\t\t\tonEvent(event as AgentEvent);\n\t\t }\n\t\t: undefined;\n\n\t// Execute with runTaskLoop\n\tconst result = await runTaskLoop(\n\t\ttask,\n\t\t{\n\t\t\tpage,\n\t\t\tagentServices,\n\t\t\tdomService: undefined as any, // Will be created internally\n\t\t\texecutionHistory: options.executionHistory,\n\t\t\tvariables: options.variables,\n\t\t\tsensitiveKeys: options.sensitiveKeys,\n\t\t} as TaskExecutionContext,\n\t\t{\n\t\t\tmaxSteps: options.maxSteps,\n\t\t\tonEvent: taskAgentOnEvent,\n\t\t\tabortSignal: options.abortSignal,\n\t\t} as TaskExecutionOptions\n\t);\n\n\t// Map TaskExecutionResult to StepResult\n\treturn {\n\t\tstatus: result.success ? 'success' : 'error',\n\t\tcompleted: result.completed,\n\t\tactionEntities: result.trajectory.actions,\n\t\texplanation: result.summary,\n\t\terror: result.error,\n\t\ttokenUsages: result.metadata.tokenUsages,\n\t};\n}\n"],"mappings":";;uMAQA,OAAS,mBAAAA,OAAuB,qBAczB,SAASC,EAAkBC,EAAwC,CAExE,IAAIC,EAAaH,GAAgBE,EAAQ,CACvC,aAAc,MAChB,CAAC,EAQD,GALIC,EAAW,SACb,OAAOA,EAAW,QAIfA,EAAmB,OAAS,SAG/B,MAAM,IAAI,MAAM,iDAAkDA,EAAmB,IAAI,EAAE,EAI7F,OAAAC,EAAqBD,CAAU,EAExBA,CACT,CA0BA,SAASE,EAAqBC,EAAmB,CAC/C,GAAI,EAAA,OAAOA,GAAW,UAAYA,IAAW,MAK7C,CAAA,GAAIA,EAAO,OAAS,WAElBA,EAAO,qBAAuB,GAG1BA,EAAO,YAAY,CACrB,IAAMC,EAAW,OAAO,KAAKD,EAAO,UAAU,EAC9CA,EAAO,SAAWC,EAGlB,QAAWC,KAAQ,OAAO,OAAOF,EAAO,UAAU,EAChDD,EAAqBG,CAAI,CAE7B,CAIEF,EAAO,OAAS,SAAWA,EAAO,OACpCD,EAAqBC,EAAO,KAAK,EAInC,QAAWG,IAAO,CAAC,QAAS,QAAS,OAAO,EACtC,MAAM,QAAQH,EAAOG,CAAG,CAAC,GAC3BH,EAAOG,CAAG,EAAE,QAAQJ,CAAoB,EAK5C,OAAW,CAACI,EAAKC,CAAK,IAAK,OAAO,QAAQJ,CAAM,EAE5C,OAAOI,GAAU,UACjBA,IAAU,MACV,CAAC,CAAC,aAAc,QAAS,QAAS,QAAS,OAAO,EAAE,SAASD,CAAG,GAEhEJ,EAAqBK,CAAK,CAAA,CAGhC,CClFO,IAAMC,GAAN,KAAyB,CAC9B,YAAoBC,EAAwB,CAAxB,KAAA,SAAAA,CAAyB,CAQ7C,oBAAuC,CACrC,OAAO,KAAK,SACT,SAAS,EACT,OAAQC,GAASA,EAAK,aAAa,MAAM,EACzC,IAAKA,IAAU,CACd,KAAM,WACN,SAAU,CACR,KAAMA,EAAK,KACX,YAAaA,EAAK,YAClB,WAAYC,EAAkBD,EAAK,MAAM,EACzC,OAAQ,EACV,CACF,EAAE,CACN,CASA,2BAA2BE,EAAuC,CAChE,IAAMC,EAAU,IAAI,IAAID,CAAS,EAEjC,OAAO,KAAK,SACT,SAAS,EACT,OAAQF,GAASG,EAAQ,IAAIH,EAAK,IAAI,GAAKA,EAAK,aAAa,MAAM,EACnE,IAAKA,IAAU,CACd,KAAM,WACN,SAAU,CACR,KAAMA,EAAK,KACX,YAAaA,EAAK,YAClB,WAAYC,EAAkBD,EAAK,MAAM,EACzC,OAAQ,EACV,CACF,EAAE,CACN,CAOA,QAAgB,CACd,OAAO,KAAK,mBAAmB,CACjC,CAQA,kBAAkBI,EAA8C,CAC9D,IAAMJ,EAAO,KAAK,SAAS,IAAII,CAAQ,EAEvC,GAAKJ,EAIL,MAAO,CACL,KAAM,WACN,SAAU,CACR,KAAMA,EAAK,KACX,YAAaA,EAAK,YAClB,WAAYC,EAAkBD,EAAK,MAAM,EACzC,OAAQ,EACV,CACF,CACF,CAKA,cAAuB,CACrB,OAAO,KAAK,SAAS,KAAK,CAC5B,CAKA,cAAyB,CACvB,OAAO,KAAK,SAAS,aAAa,CACpC,CACF,ECxEO,SAASK,GACdC,EACAC,EAC6B,CAE7B,IAAMC,EAAiC,IAAI,IAE3C,QAAWC,KAAOF,EAEZE,EAAI,MAAQA,EAAI,IAClBD,EAAU,IAAIC,EAAI,KAAMA,EAAI,GAAG,EACtBA,EAAI,MAAQA,EAAI,KACzBD,EAAU,IAAIC,EAAI,KAAMA,EAAI,GAAG,EAOnC,IAAMC,EAAU,wDAEVC,EAAqC,CAAC,EACxCC,EAAU,EAGVC,EACJ,MAAQA,EAAQH,EAAQ,KAAKJ,CAAO,KAAO,MAAM,CAE/C,IAAMQ,EAAaR,EAAQ,MAAMM,EAASC,EAAM,KAAK,EACjDC,GACFH,EAAM,KAAK,CAAE,KAAM,OAAQ,KAAMG,CAAW,CAAC,EAK/C,IAAMC,EAAaF,EAAM,CAAC,EAEtBL,EAAU,IAAIO,CAAU,EAE1BJ,EAAM,KAAK,CAAE,KAAM,QAAS,MAAO,IAAI,IAAIH,EAAU,IAAIO,CAAU,CAAE,CAAE,CAAC,EAGxEJ,EAAM,KAAK,CAAE,KAAM,OAAQ,KAAME,EAAM,CAAC,CAAE,CAAC,EAG7CD,EAAUC,EAAM,MAAQA,EAAM,CAAC,EAAE,MACnC,CAGA,IAAMG,EAAgBV,EAAQ,MAAMM,CAAO,EAC3C,OAAII,GACFL,EAAM,KAAK,CAAE,KAAM,OAAQ,KAAMK,CAAc,CAAC,EAI9CL,EAAM,SAAW,GACnBA,EAAM,KAAK,CAAE,KAAM,OAAQ,KAAML,CAAQ,CAAC,EAGrCK,CACT,CAQO,SAASM,GACdC,EAC6B,CAC7B,IAAMC,EAAwC,CAAC,EAE/C,QAAWC,KAAQF,EAAgB,CACjC,IAAMZ,EAAUc,EAAK,SAAW,GAC1Bb,EAASa,EAAK,QAAU,CAAC,EAGzBT,EAAQN,GAAyBC,EAASC,CAAM,EAGlDY,EAAS,OAAS,GAAKR,EAAM,OAAS,GACxCQ,EAAS,KAAK,CAAE,KAAM,OAAQ,KAAM;;CAAO,CAAC,EAG9CA,EAAS,KAAK,GAAGR,CAAK,CACxB,CAEA,OAAOQ,CACT,CAMO,IAAME,GAAkC,GA4BxC,SAASC,EACdJ,EACAK,EAAwBF,GACK,CAC7B,GAAIE,EAAc,CAEhB,IAAMZ,EAAQM,GAA6BC,CAAc,EACzD,GAAIP,EAAM,SAAW,EACnB,MAAO,CAAC,EAGV,IAAMa,EAAqB,CACzB,KAAM,OACN,KAAM;;;;;CACR,EAEMC,EAAsB,CAC1B,KAAM,OACN,KAAM;;;;CACR,EAEA,MAAO,CAACD,EAAU,GAAGb,EAAOc,CAAS,CACvC,KAAO,CAIL,IAAMC,EAFgBR,EAAe,OAAOE,GAAQ,CAACA,EAAK,QAAUA,EAAK,OAAO,SAAW,CAAC,EAGzF,IAAIA,GAAQA,EAAK,SAAW,EAAE,EAC9B,OAAOd,GAAWA,EAAQ,OAAS,CAAC,EACpC,KAAK;;CAAM,EAEd,OAAKoB,EAmBE,CAfoB,CACzB,KAAM,OACN,KAAM;;;;;CACR,EAE0B,CACxB,KAAM,OACN,KAAMA,CACR,EAE4B,CAC1B,KAAM,OACN,KAAM;;;;CACR,CAEoC,EAlB3B,CAAC,CAmBZ,CACF,CAQO,SAASC,GACdT,EACAK,EAAwBF,GAChB,CACR,GAAI,CAACE,EACH,MAAO,GAET,IAAIK,EAAQ,EACZ,QAAWR,KAAQF,EACbE,EAAK,SACPQ,GAASR,EAAK,OAAO,QAGzB,OAAOQ,CACT,CCzOA,SAASC,IAAkC,CACzC,IAAMC,EAAM,IAAI,KAcVC,EAZY,IAAI,KAAK,eAAe,QAAS,CACjD,SAAU,sBACV,KAAM,UACN,MAAO,UACP,IAAK,UACL,KAAM,UACN,OAAQ,UACR,OAAQ,UACR,OAAQ,GACR,aAAc,OAChB,CAAC,EAEuB,cAAcD,CAAG,EACnCE,EAAOD,EAAM,KAAKE,GAAKA,EAAE,OAAS,MAAM,EAAG,MAC3CC,EAAQH,EAAM,KAAKE,GAAKA,EAAE,OAAS,OAAO,EAAG,MAC7CE,EAAMJ,EAAM,KAAKE,GAAKA,EAAE,OAAS,KAAK,EAAG,MACzCG,EAAOL,EAAM,KAAKE,GAAKA,EAAE,OAAS,MAAM,EAAG,MAC3CI,EAASN,EAAM,KAAKE,GAAKA,EAAE,OAAS,QAAQ,EAAG,MAC/CK,EAASP,EAAM,KAAKE,GAAKA,EAAE,OAAS,QAAQ,EAAG,MAC/CM,EAAeR,EAAM,KAAKE,GAAKA,EAAE,OAAS,cAAc,EAAG,MAC3DO,EAAe,OAAOV,EAAI,gBAAgB,CAAC,EAAE,SAAS,EAAG,GAAG,EAElE,MAAO,GAAGE,CAAI,IAAIE,CAAK,IAAIC,CAAG,IAAIC,CAAI,IAAIC,CAAM,IAAIC,CAAM,IAAIE,CAAY,IAAID,CAAY,EAC5F,CAgCO,SAASE,GACdC,EACQ,CACR,MAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAgCPA,CAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6BnB,CAUA,SAASC,GACPC,EACQ,CACR,GAAI,CAACA,GAAoBA,EAAiB,SAAW,EACnD,MAAO,GAGT,IAAIC,EAAc,GAClB,OAAAD,EAAiB,QAAQ,CAAC,CAACE,EAAaC,CAAQ,EAAGC,IAAU,CAC3DH,GAAe,IAAIG,EAAQ,CAAC,kBAAkBF,CAAW;gBAAmBC,CAAQ;CACtF,CAAC,EAEM;;EAEPF,CAAW,EACb,CAmBO,SAASI,GACdC,EACAC,EACAC,EACAR,EACAS,EACAC,EACAC,EAAgC,GAChCC,EAAsB3B,GAAwB,EAC9C4B,EAAiC,GACjCC,EACa,CAEb,IAAMC,EAAwB,CAAC,EAGzBC,EAAY;;GAEjBT,CAAI;;;;EAILD,EAAY,cAAc;EAC1BA,EAAY,QAAQ;;;;;;;;;;;;EAYpBA,EAAY,YAAY;MASxB,GANAS,EAAS,KAAK,CACZ,KAAM,OACN,KAAMC,CACR,CAAC,EAGGL,GAAwBL,EAAY,wBACtC,QAAWW,KAAcX,EAAY,wBACnCS,EAAS,KAAK,CACZ,KAAM,OACN,KAAM,iMACR,CAAC,EACDA,EAAS,KAAK,CACZ,KAAM,QACN,MAAOE,CACT,CAAC,OAEMP,IACTK,EAAS,KAAK,CACZ,KAAM,OACN,KAAM,yLACR,CAAC,EACDA,EAAS,KAAK,CACZ,KAAM,QACN,MAAOL,CACT,CAAC,GAKH,GAAID,GAAcA,EAAW,OAAS,EAAG,CACvC,IAAMS,EAAiBC,EAAqBV,EAAYI,CAAqB,EAC7EE,EAAS,KAAK,GAAGG,CAAc,CACjC,CAGA,IAAIE,EAAa,GAKjB,GAHAA,GAAc;wBAA2BR,CAAW;EAGhDJ,GAAmB,OAAO,KAAKA,CAAe,EAAE,OAAS,EAAG,CAC9D,IAAMa,EAA4B,CAAC,EACnC,QAAWC,KAAO,OAAO,KAAKd,CAAe,EAE3C,GADoBM,GAAe,IAAIQ,CAAG,EAGxCD,EAAgB,KAAK,OAAOC,CAAG,8BAA8B,MACxD,CAEL,IAAMC,EAAQf,EAAgBc,CAAG,EAC3BE,EAAW,OAAOD,GAAU,SAAWA,EAAQ,KAAK,UAAUA,CAAK,EACzEF,EAAgB,KAAK,OAAOC,CAAG,MAAME,CAAQ,GAAG,CAClD,CAGFJ,GAAc;;;EAGhBC,EAAgB,KAAK;CAAI,CAAC;;;;;;;CAQ1B,CAGA,GAAIrB,GAAoBA,EAAiB,OAAS,EAAG,CACnD,IAAMyB,EAAuB1B,GAAuBC,CAAgB,EACpEoB,GAAc;EAAOK,CACvB,CAGA,OAAAL,GAAc;;EAEdL,EAAS,KAAK,CACZ,KAAM,OACN,KAAMK,CACR,CAAC,EAEML,CACT,CC3SA,OAAOW,OAAY,SAiBnB,eAAeC,GAAkBC,EAAYC,EAAuBC,EAAyC,CAG3G,MAAMF,EAAK,SAAS,IAAM,CACxB,IAAMG,EAAY,SAAS,eAAe,gCAAgC,EACtEA,GACFA,EAAU,OAAO,EAEd,OAAe,6BAChB,OAAe,2BAA8C,QAASC,GAAOA,EAAG,CAAC,EAClF,OAAe,2BAA6B,CAAC,EAElD,CAAC,EAGD,IAAMC,EAAa,MAAML,EAAK,WAAW,CACvC,KAAM,MACN,SAAU,EACZ,CAAC,EAIK,CAAE,QAASM,CAAM,EAAI,KAAM,QAAO,OAAO,EACzCC,EAAQD,EAAMD,CAAU,EACxBG,EAAW,MAAMD,EAAM,SAAS,EAChCE,EAAcD,EAAS,OAAS,EAChCE,EAAeF,EAAS,QAAU,EAExCG,EAAY,IACV,iCAAiCF,CAAW,IAAIC,CAAY,eAAeT,CAAa,IAAIC,CAAc,EAC5G,EAIA,IAAIU,EACJ,OAAIH,IAAgBR,GAAiBS,IAAiBR,GACpDS,EAAY,IAAI,4BAA4BF,CAAW,IAAIC,CAAY,OAAOT,CAAa,IAAIC,CAAc,EAAE,EAC/GU,EAAoB,MAAML,EAAM,OAAON,EAAeC,CAAc,EAAE,IAAI,EAAE,SAAS,GAGrFU,EAAoBP,EAIGO,EAAkB,SAAS,QAAQ,CAG9D,CAKA,eAAeC,GACbb,EACAc,EACAC,EACoF,CAEpF,IAAMC,EAAgB,MAAMhB,EAAK,eAC9BiB,GAAqC,SAAS,iBAAiBA,EAAO,EAAGA,EAAO,CAAC,EAClF,CAAE,EAAAH,EAAG,EAAAC,CAAE,CACT,EAEA,GAAI,CAACC,EACH,MAAM,IAAI,MAAM,wBAAwBF,CAAC,KAAKC,CAAC,GAAG,EAGpD,IAAMG,EAAUF,EAAc,UAAU,EACxC,GAAI,CAACE,EACH,MAAA,MAAMF,EAAc,QAAQ,EACtB,IAAI,MAAM,wBAAwBF,CAAC,KAAKC,CAAC,GAAG,EAIpD,IAAMI,EAAc,MAAMD,EAAQ,YAAY,EAC9C,GAAI,CAACC,EACH,MAAA,MAAMH,EAAc,QAAQ,EACtB,IAAI,MAAM,6BAA6B,EAI/C,IAAMI,EAAaN,GAAKK,EAAY,EAAIA,EAAY,MAAQ,GACtDE,EAAaN,GAAKI,EAAY,EAAIA,EAAY,OAAS,GAE7D,MAAO,CACL,WAAAC,EACA,WAAAC,EACA,QAAAH,CACF,CACF,CAKA,eAAeI,GAAsBtB,EAAYkB,EAAiE,CAChH,OAAKA,EAUE,CACL,MAAO,OACP,SAJcA,EAAU,MAAMK,GAA0BvB,EAAMkB,CAAqC,EAAI,OAInF,OACpB,WAAY,CAAC,CACf,EAbS,CACL,MAAO,OACP,QAAS,OACT,WAAY,CAAC,CACf,CAUJ,CAwDA,eAAeM,GACbxB,EACAyB,EAIC,CACD,IAAIC,EAAuC,KACvCR,EAAgC,KAEpC,OAAQO,EAAO,KAAM,CACnB,IAAK,QAAS,CAEZ,IAAME,EAAcF,EAAO,SAAW,QAAU,6BAA+B,uBAE/E,GAAIA,EAAO,IAAM,QAAaA,EAAO,IAAM,OACzC,MAGF,IAAMG,EAAwB,MAAMf,GAAoCb,EAAMyB,EAAO,EAAGA,EAAO,CAAC,EAEhGC,EAAc,CACZ,YAAAC,EACA,OAAQ,CACN,WAAYC,EAAsB,WAClC,WAAYA,EAAsB,UACpC,CACF,EACAV,EAAUU,EAAsB,QAChC,KACF,CAEA,IAAK,eAAgB,CACnB,GAAIH,EAAO,IAAM,QAAaA,EAAO,IAAM,OACzC,MAGF,IAAMG,EAAwB,MAAMf,GAAoCb,EAAMyB,EAAO,EAAGA,EAAO,CAAC,EAEhGC,EAAc,CACZ,YAAa,8BACb,OAAQ,CACN,WAAYE,EAAsB,WAClC,WAAYA,EAAsB,UACpC,CACF,EACAV,EAAUU,EAAsB,QAChC,KACF,CAEA,IAAK,OAAQ,CACX,GAAI,CAACH,EAAO,MAAQA,EAAO,KAAK,OAAS,EACvC,MAGF,IAAMI,EAASJ,EAAO,KAAK,CAAC,EAAE,EACxBK,EAASL,EAAO,KAAK,CAAC,EAAE,EACxBM,EAAON,EAAO,KAAK,CAAC,EAAE,EACtBO,EAAOP,EAAO,KAAK,CAAC,EAAE,EAEtBG,EAAwB,MAAMf,GAAoCb,EAAM6B,EAAQC,CAAM,EAE5FJ,EAAc,CACZ,YAAa,YACb,OAAQ,CACN,WAAYE,EAAsB,WAClC,WAAYA,EAAsB,WAClC,QAASG,EAAOF,EAChB,QAASG,EAAOF,CAClB,CACF,EACAZ,EAAUU,EAAsB,QAChC,KACF,CACF,CAEA,IAAMK,EAAc,MAAMX,GAAsBtB,EAAMkB,CAAO,EAE7D,MAAO,CACL,YAAAQ,EACA,YAAAO,CACF,CACF,CAQA,eAAsBC,EACpBC,EACAC,EACAC,EAAwB,CAAC,EACC,CAC1B,GAAI,CACF,GAAM,CAAE,KAAArC,CAAK,EAAIoC,EAGXE,EAAWtC,EAAK,aAAa,EACnC,GAAI,CAACsC,EACH,MAAO,CACL,OAAQ,QACR,MAAO,6BACT,EAGF,IAAMrC,EAAgBqC,EAAS,MACzBpC,EAAiBoC,EAAS,OAEhC3B,EAAY,IAAI,kBAAkBV,CAAa,IAAIC,CAAc,EAAE,EAGnE,IAAMqC,EAAgB,MAAMxC,GAAkBC,EAAMC,EAAeC,CAAc,EAI3EsC,EADSC,GAAa,EACN,KAAK,eAC3B,GAAI,CAACD,EACH,MAAO,CACL,OAAQ,QACR,MAAO,gGACT,EAIF,IAAME,EAAS,IAAI5C,GAAO,CAAE,OAAA0C,CAAO,CAAC,EAEpC7B,EAAY,IAAI,kCAAkC,EASlD,IAAMgC,EAA0B,CAC9B,CACE,KAAM,OACN,QAAS,CACP,CAAE,KAAM,aAAc,KAXb;;;UAGTR,CAAS;CAQ0B,EACnC,CACE,KAAM,cACN,OAAQ,OACR,UAAW,yBAAyBI,CAAa,EACnD,CACF,CACF,CACF,EAIMK,EAAW,MAAMF,EAAO,UAAU,OAAO,CAC7C,MAAO,uBACP,MAAO,CACL,CACE,KAAM,mBACN,cAAezC,EACf,eAAgBC,EAChB,YAAa,SACf,CACF,EACA,MAAOyC,EACP,WAAY,OACZ,YAAa,EACf,CAAC,EAEDhC,EAAY,IAAI,mCAAmC,EAInD,IAAMkC,EADgBD,EAAS,OAAO,OAAQE,GAA6BA,EAAK,OAAS,eAAe,EACrE,CAAC,GAAK,KAEzC,GAAI,CAACD,EAEH,MAAO,CACL,OAAQ,QACR,UAHiBD,EAAS,aAAe,IAGhB,qCACzB,MAAO,mCACT,EAGF,IAAMnB,EAASoB,EAAa,OAC5BlC,EAAY,IAAI,qBAAqB,KAAK,UAAUc,CAAM,CAAC,EAAE,EAG7D,GAAM,CAAE,YAAAC,EAAa,YAAAO,CAAY,EAAI,MAAMT,GAAUxB,EAAMyB,CAAM,EAEjE,OAAKC,EAgBE,CACL,OAAQ,UACR,aAViC,CACjC,mBAAoBS,EACpB,YAAAT,EACA,QAASO,EAAY,SAAW,OAChC,MAAOA,EAAY,OAAS,OAC5B,WAAYA,EAAY,UAC1B,EAKE,UAAW,uDACX,iBAAkB,EACpB,EApBS,CACL,OAAQ,QACR,MAAO,0CACT,CAkBJ,OAASc,EAAY,CACnB,OAAApC,EAAY,MAAM,8BAA+BoC,CAAK,EAC/C,CACL,OAAQ,QACR,MAAOA,EAAM,SAAW,4CAC1B,CACF,CACF,CC9YA,eAAeC,GAAqBC,EAAqC,CAExE,IAAMC,EAAQD,EAAK,QAAQ,EAAE,MAAM,EAC/BE,EAA8B,KAE5BC,EAAqB,CAAC,EAC5B,QAASC,EAAM,EAAGA,EAAMH,EAAM,OAAQG,IAAO,CAC5C,IAAMC,EAAIJ,EAAMG,CAAG,EAGfC,IAAML,IACTE,EAAeE,GAIhB,IAAIE,EAAQ,sBACZ,GAAI,CAEHA,EAAQ,MAAM,QAAQ,KAAK,CAC1BD,EAAE,MAAM,EACR,IAAI,QAAgB,CAACE,EAAGC,IACvB,WAAW,IAAMA,EAAO,IAAI,MAAM,SAAS,CAAC,EAAG,GAAI,CACpD,CACD,CAAC,CACF,MAAgB,CAEhB,CAEA,IAAIC,EAAU,OAAOL,CAAG,KAAKC,EAAE,IAAI,CAAC,GAChCC,IACHG,GAAW,MAAMH,EAAM,MAAM,EAAG,EAAE,CAAC,IAEpCH,EAAS,KAAKM,CAAO,CACtB,CAEA,IAAMC,EAAWP,EAAS,OAAS,EAAIA,EAAS,KAAK;CAAI,EAAI,GAG7D,MAAO,CAAE,eAFcD,IAAiB,KAAO,gBAAgBA,CAAY;EAAO,GAEzD,SAAAQ,CAAS,CACnC,CAKA,eAAeC,GAAkBX,EAAYY,EAAuC,CACnF,GAAM,CAAE,eAAAC,EAAgB,SAAAH,CAAS,EAAI,MAAMX,GAAqBC,CAAI,EAEpE,MAAO,CACN,aAAcY,EACd,WAAYZ,EAAK,IAAI,EACrB,aAAc,MAAMA,EAAK,MAAM,EAC/B,eAAgBa,EAChB,SAAUH,CACX,CACD,CAgCA,eAAsBI,EACrBC,EACAC,EACkC,CAClC,GAAM,CAAE,KAAAhB,EAAM,WAAAiB,EAAY,cAAAC,CAAc,EAAIH,EAGtCI,EAAgC,OAAOH,GAAY,UACtD,CAAE,mBAAoBA,CAAQ,EAC9BA,GAAW,CAAC,EAGTI,EAAwBF,EAAc,yBAAyB,EAC/D,CAAE,SAAAG,EAAU,iBAAAC,EAAkB,wBAAAC,CAAwB,EAC3D,MAAMN,EAAW,mCAAmCjB,EAAM,CACzD,sBAAAoB,EACA,mBAAoBD,EAAK,mBACzB,qBAAsBA,EAAK,qBAC3B,wBAAyBA,EAAK,wBAC9B,qBAAsBA,EAAK,qBAC3B,aAAcA,EAAK,YACpB,CAAC,EACIP,EAAUS,EAAS,YAAY,0BAA0B,EAGzDG,EAAc,MAAMb,GAAkBX,EAAMY,CAAO,EAGzD,OAAIW,IACHC,EAAY,wBAA0BD,GAGhC,CACN,QAAAX,EACA,iBAAAU,EACA,wBAAAC,EACA,SAAAF,EACA,YAAAG,CACD,CACD,CCjIO,SAASC,EAAyBC,EAAYC,EAAkC,CACtF,OAAKD,EACE,CACN,cAAeA,EAAM,cAAgBA,EAAM,aAAe,EAC1D,kBAAmBA,EAAM,kBAAoBA,EAAM,cAAgB,EACnE,aAAcA,EAAM,aAAe,EACnC,MAAAC,CACD,EANmB,IAOpB,CCdA,OAAgC,gBAAAC,GAAc,UAAAC,OAAgC,KAC9E,OAAS,KAAAC,MAAS,MAwBlB,SAASC,GAAsBC,EAAiC,CAC/D,IAAMC,EAAQD,EAAU,YAAY,EAQpC,MALsB,CACrB,wCACA,+CACA,4CACD,EACkB,KAAKE,GAAWA,EAAQ,KAAKD,CAAK,CAAC,EAC7C,QAIe,CACtB,aACA,4CACA,6BACD,EACmB,KAAKC,GAAWA,EAAQ,KAAKD,CAAK,CAAC,EAC9C,SAIc,CACrB,wDACA,iDACA,gDACD,EACkB,KAAKC,GAAWA,EAAQ,KAAKD,CAAK,CAAC,EAC7C,QAID,KACR,CAuBA,SAASE,GAAgBC,EAA+B,CACvD,GAAI,CAACA,EAAW,OAAO,KAGvB,GAAIA,aAAqB,IACxB,OAAOA,EAAU,KAIlB,GAAI,OAAOA,GAAc,UAAYA,EAAU,KAC9C,OAAO,OAAOA,EAAU,IAAI,EAI7B,GAAI,OAAOA,GAAc,UAAY,OAAOA,EAAU,UAAa,WAAY,CAC9E,IAAMC,EAAMD,EAAU,SAAS,EAC/B,GAAIC,EAAI,WAAW,SAAS,GAAKA,EAAI,WAAW,UAAU,EACzD,OAAOA,CAET,CAGA,OAAI,OAAOD,GAAc,WAAaA,EAAU,WAAW,SAAS,GAAKA,EAAU,WAAW,UAAU,GAChGA,EAGD,IACR,CAMA,SAASE,GAA+BC,EAAsC,CAC7E,OAAOA,EAAS,IAAKC,IAAS,CAC7B,KAAMA,EAAI,KACV,QAAS,MAAM,QAAQA,EAAI,OAAO,EAC/BA,EAAI,QAAQ,IAAKC,GAAqC,CACtD,GAAIA,EAAK,OAAS,QAAS,CAC1B,IAAML,EAAYK,EAAK,MAEjBC,EAAWP,GAAgBC,CAAS,EAC1C,GAAIM,EACH,MAAO,CAAE,KAAM,QAAS,KAAMA,CAAS,EAExC,IAAMC,EAAa,OAAOP,GAAc,SAAWA,EAAY,GAC/D,MAAO,CACN,KAAM,QACN,KAAMO,EAAW,WAAW,OAAO,EAAIA,EAAa,yBAAyBA,CAAU,EACxF,CACD,CACA,MAAO,CAAE,KAAM,OAAQ,KAAMF,EAAK,IAAK,CACvC,CAAC,EACDD,EAAI,OACR,EAAE,CACH,CAeA,eAAsBI,GACrBZ,EACAa,EACAC,EAAwB,CAAC,EACE,CAC3B,GAAM,CAAE,KAAAC,EAAM,cAAAC,CAAc,EAAIH,EAC1BI,EAAQD,EAAc,SAAS,EAC/BE,EAAcJ,EAAQ,aAAe,EAGrCK,EAAmBH,EACvB,mBAAmBhB,CAAS,EAC5B,MAAOoB,IACPC,EAAY,IAAI,kCAAkCD,CAAK,EAAE,EAClD,CAAC,EACR,EAGIE,EAAuBN,EAAc,2BAA2B,EAChEO,EAA0BP,EAAc,iCAAiC,EACzEQ,EAAwBR,EAAc,yBAAyB,EAC/DS,EAAuBT,EAAc,2BAA2B,EAChEU,EAA2BV,EAAc,+BAA+B,EAGxEW,EAAeD,EAA2B3B,GAAsBC,CAAS,EAAI,MAC/E0B,GAA4BC,IAAiB,OAChDN,EAAY,IAAI,sCAAsCM,CAAY,yBAAyB,EAI5F,GAAM,CAAE,iBAAAC,EAAkB,SAAAC,EAAU,YAAAC,CAAY,EAC/C,MAAMC,EAAiBlB,EAAS,CAAE,qBAAAS,EAAsB,wBAAAC,EAAyB,qBAAAE,EAAsB,aAAAE,CAAa,CAAC,EACtHd,EAAQ,SAAWgB,EAKnB,IAAMG,EAFW,IAAIC,GAAmBC,CAAY,EACvB,mBAAmB,EAE9C,IAAKC,GAAc,CACnB,IAAMC,EAAOD,EAAK,SAClB,MAAO,GAAGC,EAAK,IAAI,KAAKA,EAAK,WAAW;cAAiB,KAAK,UAAUA,EAAK,WAAY,KAAM,CAAC,CAAC,EAClG,CAAC,EACA,KAAK;;CAAM,EAGPC,EAAeC,GAAgCN,CAAgB,EAC/DO,EAAa,MAAMpB,EAGnBqB,EAAaC,GAClBX,EACA9B,EACAa,EAAQ,UACRA,EAAQ,iBACR0B,EAAW,OAAS,EAAIA,EAAa,OACrCX,EACAN,EACA,OACAE,EACAX,EAAQ,aACT,EAGMN,EAA4D,CAAC,EAG/DO,EAAQ,aAAeA,EAAQ,YAAY,OAAS,GACvDA,EAAQ,YAAY,QAASN,GAAQ,CAChCA,EAAI,OAAS,OAChBD,EAAS,KAAK,CACb,KAAM,OACN,QAASC,EAAI,OACd,CAAC,EACSA,EAAI,OAAS,aACvBD,EAAS,KAAK,CACb,KAAM,YACN,QAASC,EAAI,OACd,CAAC,CAEH,CAAC,EAIFD,EAAS,KAAK,CACb,KAAM,OACN,QAASiC,CACV,CAAC,EAGD,IAAME,EAAuBpC,GAA+BC,CAAQ,EAK9DoC,EAAeT,EAAa,uBAAuB,EAEnDU,EAAuB9C,EAAE,OAAO,CACrC,QAASA,EAAE,OAAO,EAAE,SAAS,wDAAwD,EAAE,SAAS,EAAE,QAAQ,EAAE,EAC5G,YAAaA,EAAE,OAAO,EAAE,SAAS,oDAAoD,EAAE,SAAS,EAAE,QAAQ,EAAE,EAC5G,OAAQ6C,EACR,sBAAuB7C,EAAE,QAAQ,EAAE,SAAS,qDAAqD,EAAE,SAAS,EAAE,QAAQ,EAAK,CAC5H,CAAC,EAIK+C,GAAa,MAAM,QAAQL,CAAU,EACxCA,EAAW,OAAQ/B,GAAcA,EAAK,OAAS,OAAO,EAAE,OACxD,EACGqC,GAAkBC,EAAmB9B,EAAO4B,EAAU,EAGtDG,EAAS,MAAMpD,GAAa,CACjC,MAAOqD,EAAShC,CAAK,EACrB,OAAQoB,EACR,SAAA9B,EACA,YAAAW,EACA,OAAQrB,GAAO,OAAO,CAAE,OAAQ+C,CAAqB,CAAC,EACtD,gBAAAE,EACD,CAAC,EAEKI,EAAeF,EAAO,OACtBG,EAAiB,KAAK,UAAUD,EAAc,KAAM,CAAC,EAC3DE,GAAO,MAAM,+BAA+BD,CAAc,EAAE,EAG5D,IAAME,EAA4B,CAAC,EAC7BC,EAAaC,EAA0BP,EAAe,MAAO/B,CAAK,EACpEqC,GACHD,EAAY,KAAKC,CAAU,EAI5B,IAAME,EAAuC,CAC5C,aAAAnB,EACA,WAAYK,EACZ,eAAAS,EAGA,YAAAE,CAED,EAGMI,EAAUP,EAAa,SAAW,GAClCQ,EAAcR,EAAa,aAAe,GAC1CS,EAAST,EAAa,QAAU,CAAC,EACjCU,EAAuBV,EAAa,uBAAyB,GAGnE,GAAI,CAACS,GAAU,OAAO,KAAKA,CAAM,EAAE,SAAW,EAC7C,MAAO,CACN,OAAQ,QACR,UAAWF,GAAWC,GAAe,sBACrC,iBAAkBE,EAClB,MAAO,oCACP,UAAAJ,CACD,EAGD,IAAMK,EAAa,OAAO,KAAKF,CAAM,EAAE,CAAC,EACxC,GAAIE,IAAe,OAClB,MAAO,CACN,OAAQ,QACR,UAAWJ,GAAWC,GAAe,sBACrC,iBAAkBE,EAClB,MAAO,4DACP,UAAAJ,CACD,EAGD,GAAIK,IAAe,6BAElB,OAD+B,MAAMjD,EAAmCZ,EAAWa,EAASC,CAAO,EAIpG,IAAMgD,EAAeH,EAAOE,CAAU,GAAK,CAAC,EAGxCE,EAA2E,CAAC,EAGhF,GAAI,OAAOD,EAAa,eAAkB,SAAU,CACnD,IAAME,EAAeF,EAAa,cAGlC,GAAIE,EAAe,EAClB,MAAO,CACN,OAAQ,QACR,UAAWP,GAAWC,GAAe,sBACrC,iBAAkBE,EAClB,MAAO,oCACP,UAAAJ,CACD,EAGD,IAAMS,EAAapC,EAAS,YAAY,IAAImC,CAAY,EAEpDC,IAEHF,EAAc,MAAMG,GAA2BnD,EAAMkD,CAAU,EAEjE,CAGA,IAAIE,GAAoBT,EACxB,OAAIG,IAAe,WAClBM,GAAoBnE,EACpB8D,EAAa,UAAe9D,GAYtB,CACN,OAAQ,UACR,aAXkC,CAClC,GAAG+D,EACH,mBAAoBI,IAAqBV,GAAW,GAAGI,CAAU,IAAI,KAAK,UAAUC,CAAY,CAAC,IACjG,YAAa,CACZ,YAAaD,EACb,OAAQC,CACT,CACD,EAKC,UAAWL,GAAWC,EACtB,iBAAkBE,EAClB,UAAAJ,CACD,CACD,CClXA,OAAS,gBAAAY,GAAc,UAAAC,OAA2B,KAClD,OAAS,KAAAC,MAAS,MAalB,SAASC,GAAgBC,EAA+B,CACvD,GAAI,CAACA,EAAW,OAAO,KAGvB,GAAIA,aAAqB,IACxB,OAAOA,EAAU,KAIlB,GAAI,OAAOA,GAAc,UAAYA,EAAU,KAC9C,OAAO,OAAOA,EAAU,IAAI,EAI7B,GAAI,OAAOA,GAAc,UAAY,OAAOA,EAAU,UAAa,WAAY,CAC9E,IAAMC,EAAMD,EAAU,SAAS,EAC/B,GAAIC,EAAI,WAAW,SAAS,GAAKA,EAAI,WAAW,UAAU,EACzD,OAAOA,CAET,CAGA,OAAI,OAAOD,GAAc,WAAaA,EAAU,WAAW,SAAS,GAAKA,EAAU,WAAW,UAAU,GAChGA,EAGD,IACR,CAGA,IAAME,GAA0BJ,EAAE,OAAO,CACxC,sBAAuBA,EAAE,OAAO,EAAE,SAAS;;;8BAEd,EAC7B,YAAaA,EAAE,OAAO,EAAE,SAAS,uEAAuE,EACxG,WAAYA,EAAE,KAAK,CAAC,OAAQ,QAAS,SAAS,CAAC,EAAE,SAAS,kFAAkF,CAC7I,CAAC,EAED,eAAsBK,GACrBC,EACAC,EACAC,EAAwB,CAAC,EACE,CAC3B,OAAIA,EAAQ,cACJH,EAAmCC,EAAWC,EAASC,CAAO,EAE9DH,GAA+BC,EAAWC,EAASC,CAAO,CAEnE,CAKA,SAASC,IAAiE,CACzE,IAAMC,EAAM,IAAI,KACVC,EAAaD,EAAI,mBAAmB,QAAS,CAClD,QAAS,OACT,KAAM,UACN,MAAO,OACP,IAAK,UACL,SAAU,qBACX,CAAC,EACKE,EAAaF,EAAI,mBAAmB,QAAS,CAClD,KAAM,UACN,OAAQ,UACR,OAAQ,UACR,uBAAwB,EACxB,aAAc,QACd,SAAU,qBACX,CAAC,EACD,MAAO,CAAE,WAAAC,EAAY,WAAAC,CAAW,CACjC,CAOA,SAASC,IAAoC,CAC5C,MAAO;;;CAIR,CAeA,eAAsBC,GACrBR,EACAC,EACAC,EAAwB,CAAC,EACE,CAC3B,GAAM,CAAE,KAAAO,EAAM,iBAAAC,CAAiB,EAAIT,EAC7BU,EAAQV,EAAQ,cAAc,SAAS,EAE7C,GAAI,CAEH,IAAMW,EAAuBX,EAAQ,cAAc,2BAA2B,EACxEY,EAA0BZ,EAAQ,cAAc,iCAAiC,EACjFa,EAAwBb,EAAQ,cAAc,yBAAyB,EACvEc,EAAuBd,EAAQ,cAAc,2BAA2B,EAGxE,CAAE,QAAAe,EAAS,iBAAAC,EAAkB,wBAAAC,EAAyB,SAAAC,EAAU,YAAAC,CAAY,EAAI,MAAMC,EAAiBpB,EAAS,CACrH,mBAAoBC,EAAQ,+BAC5B,qBAAAU,EACA,wBAAAC,EACA,qBAAAE,CACD,CAAC,EACDd,EAAQ,SAAWkB,EAGnB,IAAIG,EAAuB,GACvBZ,GAAoBA,EAAiB,OAAS,IAIjDY,EAAuB;;EAHFZ,EAAiB,IAAI,CAAC,CAACa,EAAQC,CAAM,EAAGC,IAC5D,GAAGA,EAAM,CAAC,aAAaF,CAAM;aAAgBC,CAAM,EACpD,EAAE,KAAK;CAAI,CACkE;GAG9E,GAAM,CAAE,WAAAnB,EAAY,WAAAC,CAAW,EAAIH,GAAmB,EAGhDuB,EAAa;;GAElB1B,CAAS;;;;;;;;;;;;;;;;;;;;;;;;EAwBVoB,EAAY,cAAc;EAC1BA,EAAY,QAAQ;;;;;;;;EAQpBJ,CAAO;;;EAGPJ,GAAwBM,EAA0B,qGAAuG,4DAA4D;EAIjNS,EAAmB,GACvB,GAAI1B,EAAQ,WAAa,OAAO,KAAKA,EAAQ,SAAS,EAAE,OAAS,EAAG,CACnE,IAAM2B,EAAyB,CAAC,EAChC,QAAWC,KAAO,OAAO,KAAK5B,EAAQ,SAAS,EAE9C,GAAI,CADgBA,EAAQ,eAAe,IAAI4B,CAAG,EAChC,CAEjB,IAAMC,EAAQ7B,EAAQ,UAAU4B,CAAG,EAC7BE,EAAW,OAAOD,GAAU,SAAWA,EAAQ,KAAK,UAAUA,CAAK,EACzEF,EAAa,KAAK,OAAOC,CAAG,MAAME,CAAQ,GAAG,CAC9C,CAGGH,EAAa,OAAS,IACzBD,EAAmB;;;EAGrBC,EAAa,KAAK;CAAI,CAAC,GAEvB,CAEA,IAAMI,EAAe;EACrBL,CAAgB;EAChBL,CAAoB;;WAEXjB,CAAU,2BAA2BC,CAAU;;EAKlD2B,EAAa,MAAMhC,EAAQ,cAAc,mBAAmBD,CAAS,EAGrEkC,EAAkC,CACvC,CAAE,KAAM,OAAQ,KAAMR,CAAW,CAClC,EAGIS,EAAkB,EACtB,GAAIvB,GAAwBM,GAA2BA,EAAwB,OAAS,EAEvF,QAAWkB,KAASlB,EACnBgB,EAAmB,KAAK,CAAE,KAAM,QAAS,MAAOE,CAAM,CAAC,EACvDD,SAIDD,EAAmB,KAAK,CAAE,KAAM,QAAS,MAAOjB,CAAiB,CAAC,EAClEkB,EAAkB,EAInB,GAAIF,GAAcA,EAAW,OAAS,EAAG,CACxC,IAAMI,EAAiBC,EAAqBL,EAAYnB,CAAqB,EAC7EoB,EAAmB,KAAK,GAAGG,CAAc,CAC1C,CAEAH,EAAmB,KAAK,CAAE,KAAM,OAAQ,KAAMF,CAAa,CAAC,EAG5D,IAAMO,EAAsBN,EAAaO,GAAqBP,EAAYnB,CAAqB,EAAI,EAC7F2B,GAAkBN,EAAkBI,EACpCG,GAAkBC,EAAmBhC,EAAO8B,EAAe,EAE3DG,EAAerC,GAA0B,EAEzCiB,EAAS,MAAMhC,GAAa,CACjC,MAAOqD,EAASlC,CAAK,EACrB,OAAQiC,EACR,SAAU,CACT,CACC,KAAM,OACN,QAASV,CACV,CACD,EACA,OAAQzC,GAAO,OAAO,CAAE,OAAQK,EAAwB,CAAC,EACzD,YAAa,EACb,gBAAA4C,EACD,CAAC,EAEK,CAAE,WAAAI,EAAY,YAAAC,CAAY,EAAIvB,EAAO,OACrCwB,EAAiB,KAAK,UAAUxB,EAAO,OAAQ,KAAM,CAAC,EAGtDyB,EAA4B,CAAC,EAC7BC,EAAaC,EAA0B3B,EAAe,MAAOb,CAAK,EACpEuC,GACHD,EAAY,KAAKC,CAAU,EAI5B,IAAME,EAA4C,CACjD,CACC,KAAM,OACN,QAASlB,EAAmB,IAAKmB,GAAgC,CAChE,GAAIA,EAAK,OAAS,OACjB,MAAO,CAAE,KAAM,OAAiB,KAAMA,EAAK,IAAK,EAC1C,GAAIA,EAAK,OAAS,QAAS,CACjC,IAAMzD,EAAayD,EAAa,MAE1BC,EAAW3D,GAAgBC,CAAS,EAC1C,GAAI0D,EACH,MAAO,CAAE,KAAM,QAAkB,KAAMA,CAAS,EAEjD,IAAMC,EAAa,OAAO3D,GAAc,SAAWA,EAAY,GAC/D,MAAO,CACN,KAAM,QACN,KAAM2D,EAAW,WAAW,OAAO,EAAIA,EAAa,yBAAyBA,CAAU,EACxF,CACD,CACA,MAAO,CAAE,KAAM,OAAiB,KAAM,wBAAyB,CAChE,CAAC,CACF,CACD,EAGMC,EAAuC,CAC5C,aAAAZ,EACA,WAAYQ,EACZ,eAAAJ,EACA,kBAAmB/B,EACnB,YAAAgC,EACA,oBAAqBhB,GAAcA,EAAW,OAAS,EAAIA,EAAa,OACxE,YAAajB,CACd,EAEA,MAAO,CACN,QAAS8B,IAAe,OACxB,YAAAC,EACA,UAAAS,CACD,CACD,OAASC,EAAO,CACf,MAAO,CACN,QAAS,GACT,MAAQA,EAAgB,OACzB,CACD,CACD,CCtTA,eAAsBC,GACrBC,EACAC,EACgD,CAChD,GAAI,CACH,GAAI,CAACD,EAAa,YACjB,MAAO,CACN,QAAS,GACT,MAAO,mCACR,EAGD,GAAM,CAAE,YAAAE,EAAa,OAAAC,CAAO,EAAIH,EAAa,YAC7C,GAAII,EAAa,IAAIF,CAAW,EAAG,CAClC,IAAMG,EAAS,MAAMD,EAAa,QAAQF,EAAaC,EAAQF,CAAO,EAChEK,EAAWD,GAAgB,UAAY,GACvCE,EAASF,GAAgB,OAAUA,GAAgB,QACzD,MAAO,CACN,QAAAC,EACA,MAAOA,EAAU,OAAYC,CAC9B,CACD,KAAO,CACN,GAAM,CAAE,KAAAC,EAAM,cAAAC,CAAc,EAAIR,EAEhC,OAAA,MADgB,MAAMS,GAAiB,GACzB,QAAQF,EAAMR,EAAcS,CAAa,EAEhD,CACN,QAAS,GACT,MAAO,MACR,CACD,CACD,OAASF,EAAO,CACf,MAAO,CACN,QAAS,GACT,MAAQA,EAAgB,OACzB,CACD,CACD,CAWA,IAAII,EAA4C,KAChD,eAAeD,IAA2C,CACzD,GAAIC,EACH,OAAOA,EAGR,IAAMC,EAAqD,KAAM,QAAO,uBAAoB,EAC5F,OAAAD,EAAsB,IAAIC,EAAc,QACjCD,CACR,CAiBA,eAAsBE,GACrBC,EACAN,EACAC,EACAM,EAAwB,CAAC,EACzBC,EACsB,CAEtB,IAAMf,EAAgCe,GAAmB,CACxD,KAAAR,EACA,cAAAC,EACA,WAAY,IAAIQ,EAAWR,EAAc,qBAAqB,CAAC,EAC/D,iBAAkBM,EAAQ,iBAC1B,UAAWA,EAAQ,UACnB,cAAeA,EAAQ,aACxB,EAGMG,EAAoBT,EAAc,iBAAiBK,CAAS,EAG5DK,EAAkB,MAAMC,GAAeF,EAAmBjB,EAASc,CAAO,EAGhF,OAAII,EAAgB,SAAW,QACvB,CACN,OAAQ,QACR,UAAWA,EAAgB,kBAAoB,GAC/C,eAAgB,CAAC,EACjB,YAAaA,EAAgB,UAC7B,MAAOA,EAAgB,MACvB,UAAWA,EAAgB,SAC5B,EAIM,CACN,OAAQ,UACR,UAAWA,EAAgB,kBAAoB,GAC/C,eAAgBA,EAAgB,aAAe,CAACA,EAAgB,YAAY,EAAI,CAAC,EACjF,YAAaA,EAAgB,UAC7B,UAAWA,EAAgB,SAC5B,CACD,CAiBA,eAAsBE,GACrBP,EACAN,EACAC,EACAM,EAAwB,CAAC,EACzBC,EACsB,CAEtB,IAAMf,EAAgCe,GAAmB,CACxD,KAAAR,EACA,cAAAC,EACA,WAAY,IAAIQ,EAAWR,EAAc,qBAAqB,CAAC,EAC/D,iBAAkBM,EAAQ,iBAC1B,UAAWA,EAAQ,UACnB,cAAeA,EAAQ,aACxB,EAGMG,EAAoBT,EAAc,iBAAiBK,CAAS,EAG5DK,EAAkB,MAAMC,GAAeF,EAAmBjB,EAASc,CAAO,EAGhF,GAAII,EAAgB,SAAW,SAAW,CAACA,EAAgB,aAC1D,MAAO,CACN,OAAQ,QACR,UAAWA,EAAgB,kBAAoB,GAC/C,eAAgB,CAAC,EACjB,YAAaA,EAAgB,UAC7B,MAAOA,EAAgB,OAAS,sBAChC,UAAWA,EAAgB,SAC5B,EAGD,GAAM,CAAE,aAAAnB,EAAc,UAAAsB,EAAW,iBAAAC,EAAkB,UAAAC,CAAU,EAAIL,EAG3DM,EAAkB,MAAM1B,GAAcC,EAAcC,CAAO,EAGjE,OAAKwB,EAAgB,SAWjBH,GACHb,EAAc,QAAQa,CAAS,EAIzB,CACN,OAAQ,UACR,UAAWC,GAAoB,GAC/B,eAAgB,CAACvB,CAAY,EAC7B,YAAasB,EACb,UAAAE,CACD,GArBQ,CACN,OAAQ,QACR,UAAW,GACX,eAAgB,CAACxB,CAAY,EAC7B,MAAOyB,EAAgB,OAAS,0BAChC,UAAAD,CACD,CAgBF,CAcA,eAAsBE,GACrBZ,EACAN,EACAC,EACAM,EAAwB,CAAC,EACE,CAE3B,IAAMd,EAAgC,CACrC,KAAAO,EACA,cAAAC,EACA,WAAY,IAAIQ,EAAWR,EAAc,qBAAqB,CAAC,EAC/D,iBAAkBM,EAAQ,iBAC1B,UAAWA,EAAQ,UACnB,cAAeA,EAAQ,aACxB,EAGMG,EAAoBT,EAAc,iBAAiBK,CAAS,EAClE,OAAAC,EAAQ,+BAAiCN,EAAc,iCAAiC,EAEjFkB,GAAST,EAAmBjB,EAASc,CAAO,CACpD,CAmBA,eAAsBa,GACrBC,EACArB,EACAC,EACAqB,EACAf,EAAwB,CAAC,EACH,CAEtB,GAAM,CAAE,YAAAgB,CAAY,EAAI,KAAM,QAAO,oBAAQ,EAGvCC,EAAmBF,EACrBG,GAAe,CAEhBH,EAAQG,CAAmB,CAC3B,EACA,OAGG5B,EAAS,MAAM0B,EACpBF,EACA,CACC,KAAArB,EACA,cAAAC,EACA,WAAY,OACZ,iBAAkBM,EAAQ,iBAC1B,UAAWA,EAAQ,UACnB,cAAeA,EAAQ,aACxB,EACA,CACC,SAAUA,EAAQ,SAClB,QAASiB,EACT,YAAajB,EAAQ,WACtB,CACD,EAGA,MAAO,CACN,OAAQV,EAAO,QAAU,UAAY,QACrC,UAAWA,EAAO,UAClB,eAAgBA,EAAO,WAAW,QAClC,YAAaA,EAAO,QACpB,MAAOA,EAAO,MACd,YAAaA,EAAO,SAAS,WAC9B,CACD","names":["zodToJsonSchema","zodToOpenAISchema","schema","jsonSchema","makeStrictCompatible","makeStrictCompatible","schema","allProps","prop","key","value","OpenAIToolProvider","registry","tool","zodToOpenAISchema","toolNames","toolSet","toolName","parseKnowledgeWithImages","content","images","hashToUrl","img","pattern","parts","lastEnd","match","textBefore","hashOrUuid","remainingText","createMultimodalContentParts","knowledgeItems","allParts","item","DEFAULT_ENABLE_KNOWLEDGE_IMAGES","createKnowledgeParts","enableImages","preamble","postamble","textContent","countKnowledgeImages","count","getCurrentTimeForPrompt","now","parts","year","p","month","day","hour","minute","second","timeZoneName","milliseconds","getActionGenerationSystemPrompt","actionDescription","formatExecutionHistory","executionHistory","historyText","description","feedback","index","getActionGenerationUserPrompt","pageContext","goal","placeholderData","knowledges","screenshotBase64","useSlicedScreenshots","currentTime","enableKnowledgeImages","sensitiveKeys","messages","firstPart","screenshot","knowledgeParts","createKnowledgeParts","endingText","placeholderList","key","value","valueStr","executionHistoryText","OpenAI","prepareScreenshot","page","viewportWidth","viewportHeight","container","fn","screenshot","sharp","image","metadata","actualWidth","actualHeight","agentLogger","resizedScreenshot","convertToElementAnchoredCoordinates","x","y","elementHandle","coords","element","boundingBox","relative_x","relative_y","getElementLocatorInfo","pickBestLocatorForElement","mapAction","action","action_data","action_name","elementAnchoredCoords","startX","startY","endX","endY","locatorInfo","generateAction","statement","context","options","viewport","screenshotB64","apiKey","getSdkConfig","client","cuaInput","response","computerCall","item","error","formatTabInformation","page","pages","currentTabId","tabsList","idx","p","title","_","reject","tabDesc","tabsText","formatPageContext","domTree","currentTabText","buildPageContext","context","options","domService","agentServices","opts","interactiveClassNames","domState","screenshotBase64","slicedScreenshotsBase64","pageContext","convertUsageToTokenUsage","usage","model","generateText","Output","z","determineActionIntent","statement","lower","pattern","extractImageUrl","imageData","str","convertMessagesToLoggingFormat","messages","msg","part","imageUrl","base64Data","generateAction","context","options","page","agentServices","model","temperature","knowledgePromise","error","agentLogger","useSlicedScreenshots","resizeSlicedScreenshots","enableKnowledgeImages","useAccessibilityTree","useActionIntentFiltering","actionIntent","screenshotBase64","domState","pageContext","buildPageContext","toolDescriptions","OpenAIToolProvider","toolRegistry","tool","func","systemPrompt","getActionGenerationSystemPrompt","knowledges","userPrompt","getActionGenerationUserPrompt","userMessagesForDebug","actionSchema","actionResponseSchema","imageCount","providerOptions","getProviderOptions","result","getModel","jsonResponse","rawLlmResponse","logger_default","tokenUsages","tokenUsage","convertUsageToTokenUsage","debugInfo","thought","description","action","completesInstruction","actionName","actionParams","locatorInfo","elementIndex","domElement","getActionEntityLocatorInfo","actionDescription","generateText","Output","z","extractImageUrl","imageData","str","assertionResponseSchema","generateAction","statement","context","options","getCurrentTimeInfo","now","dateString","timeString","getEvaluationSystemPrompt","evaluate","page","executionHistory","model","useSlicedScreenshots","resizeSlicedScreenshots","enableKnowledgeImages","useAccessibilityTree","domTree","screenshotBase64","slicedScreenshotsBase64","domState","pageContext","buildPageContext","executionHistoryText","action","result","idx","userPrompt","variablesContext","variableList","key","value","valueStr","endingPrompt","knowledges","userMessageContent","screenshotCount","slice","knowledgeParts","createKnowledgeParts","knowledgeImageCount","countKnowledgeImages","totalImageCount","providerOptions","getProviderOptions","systemPrompt","getModel","conclusion","explanation","rawLlmResponse","tokenUsages","tokenUsage","convertUsageToTokenUsage","userPromptForLogging","part","imageUrl","base64Data","debugInfo","error","executeAction","actionEntity","context","action_name","kwargs","toolRegistry","result","success","error","page","agentServices","getActionHandler","cachedActionHandler","handlerModule","generateActionStep","statement","options","existingContext","DomService","resolvedStatement","generatedAction","generateAction","executeStep","reasoning","goalAccomplished","debugInfo","executionResult","evaluateStatement","evaluate","runTask","task","onEvent","runTaskLoop","taskAgentOnEvent","event"]}