npm - @empiricalrun/test-gen - Versions diffs - 0.42.4 → 0.42.9 - Mend

@empiricalrun/test-gen 0.42.4 → 0.42.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/CHANGELOG.md +39 -0
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +1 -0
package/dist/agent/codegen/create-test-block.d.ts.map +1 -1
package/dist/agent/codegen/create-test-block.js +4 -2
package/dist/agent/codegen/lexical-scoped-vars.d.ts.map +1 -1
package/dist/agent/codegen/lexical-scoped-vars.js +4 -6
package/dist/agent/codegen/promptBuilder.d.ts +3 -0
package/dist/agent/codegen/promptBuilder.d.ts.map +1 -0
package/dist/agent/codegen/promptBuilder.js +44 -0
package/dist/agent/codegen/repo-edit.js +1 -1
package/dist/agent/master/action-tool-calls.d.ts +40 -0
package/dist/agent/master/action-tool-calls.d.ts.map +1 -0
package/dist/agent/master/action-tool-calls.js +83 -0
package/dist/agent/master/element-annotation.d.ts +7 -2
package/dist/agent/master/element-annotation.d.ts.map +1 -1
package/dist/agent/master/element-annotation.js +13 -3
package/dist/agent/master/next-action.d.ts +12 -14
package/dist/agent/master/next-action.d.ts.map +1 -1
package/dist/agent/master/next-action.js +62 -63
package/dist/agent/master/run.d.ts.map +1 -1
package/dist/agent/master/run.js +68 -51
package/dist/agent/master/scroller.d.ts +15 -0
package/dist/agent/master/scroller.d.ts.map +1 -0
package/dist/agent/master/scroller.js +371 -0
package/dist/agent/master/with-hints.d.ts.map +1 -1
package/dist/agent/master/with-hints.js +4 -1
package/dist/agent/utils.d.ts +2 -0
package/dist/agent/utils.d.ts.map +1 -0
package/dist/agent/utils.js +12 -0
package/dist/bin/utils/platform/web/index.d.ts.map +1 -1
package/dist/bin/utils/platform/web/index.js +2 -0
package/dist/browser-injected-scripts/annotate-elements.js +122 -74
package/dist/browser-injected-scripts/annotate-elements.spec.d.ts +2 -0
package/dist/browser-injected-scripts/annotate-elements.spec.d.ts.map +1 -0
package/dist/browser-injected-scripts/annotate-elements.spec.js +186 -0
package/dist/browser-injected-scripts/annotate-elements.spec.ts +52 -26
package/dist/evals/master-agent.evals.d.ts.map +1 -1
package/dist/evals/master-agent.evals.js +5 -4
package/dist/prompts/lib/ts-transformer.d.ts +4 -0
package/dist/prompts/lib/ts-transformer.d.ts.map +1 -0
package/dist/prompts/lib/ts-transformer.js +90 -0
package/dist/prompts/lib/vitest-plugin.d.ts +8 -0
package/dist/prompts/lib/vitest-plugin.d.ts.map +1 -0
package/dist/prompts/lib/vitest-plugin.js +20 -0
package/dist/session/index.d.ts.map +1 -1
package/dist/session/index.js +4 -0
package/package.json +10 -8
package/playwright.config.ts +1 -1
package/vitest.config.ts +5 -0
package/browser-injected-scripts/annotate-elements.js +0 -491
package/browser-injected-scripts/annotate-elements.spec.ts +0 -277

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,44 @@
 # @empiricalrun/test-gen
+## 0.42.9
+### Patch Changes
+- 7560d20: chore: fix test gen configs in tests
+- 13eb978: fix: tests for create-test-block and move prompt to hbs
+- 128123f: fix: added scroller method to master agent
+- 347e20f: feat: prompt builder with handlebars templates
+## 0.42.8
+### Patch Changes
+- 18444bc: fix: pass action to BA in case of no annotations
+## 0.42.7
+### Patch Changes
+- b6879bb: test: added assert scenario for annotations
+## 0.42.6
+### Patch Changes
+- b0b578c: fix: added page for token debugging
+- 25ae4b6: fix: added preference in trace for annotations
+- 9247f19: fix: assertion capability with selector hints
+## 0.42.5
+### Patch Changes
+- 8f5a315: fix: trace url is clickable
+- 02784a8: chore: move browser-injected-scripts to enable lint
+- Updated dependencies [4df759f]
+- Updated dependencies [1749342]
+  - @empiricalrun/llm@0.9.30
 ## 0.42.4
 ### Patch Changes

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAI3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;~~AA6FD~~;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QAIjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAI3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA8FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QAIjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -61,6 +61,7 @@ async function prepareFileForUpdateScenario(genConfig, trace) {
     const scopeVariables = await (0, lexical_scoped_vars_1.getLexicalScopedVars)({
         file: await fs_extra_1.default.readFile(createTestFilePath, "utf-8"),
         referencePoint: "await createTest",
+        trace: fetchScopeVariablesSpan,
     });
     fetchScopeVariablesSpan?.end({
         name: "fetch-scope-variables",

package/dist/agent/codegen/create-test-block.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"create-test-block.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/create-test-block.ts"],"names":[],"mappings":"AAAA,OAAO,~~EAIL~~,WAAW,~~EACZ~~,MAAM,mBAAmB,CAAC;~~AAY3B~~,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;~~AAE7D~~,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,+~~BAyDA~~"}
1	+ {"version":3,"file":"create-test-block.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/create-test-block.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAavE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,+BAqDA"}

package/dist/agent/codegen/create-test-block.js CHANGED Viewed

@@ -6,7 +6,9 @@ const logger_1 = require("../../bin/logger");
 const context_1 = require("../../bin/utils/context");
 const web_1 = require("../../bin/utils/platform/web");
 const constants_1 = require("../../constants");
+const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software test engineer who is given a task to write an empty test block.\nBased on the inputs you need to create an empty playwright test block with correctly imported fixture.\n\nThe test will contain a test name which you will need to use to build the empty test case block.\n\nYou will be provided with current tests, fixtures and page object models for you to use and create test case block as\nper the task provided to you.\n\nBefore responding you need to ensure that the code change is minimal and the change is reusable across tests. You need\nto ensure the code follows DRY principle.\n\nHere is the list of current tests and fixtures:\n\n{{testFiles}}\n\nHere is the list of current page object models:\n\n{{pageFiles}}\n{{/section}}\n\n{{#section \"user\"}}\nFollowing is the test scenario for which you need to write the empty test case block:\ntest name:\n{{scenarioName}}\n\ntask:\ncreate an empty test case block for the following test steps:\n{{scenario}}\n\ntest file path: {{scenarioFile}}\n\n------\n\nYou also need to ensure that the empty test case block has a starting page to begin test.\n\nIn order to identify the right page with which the test should start, follow the steps:\n- based on the similarities with other test cases mentioned in the file, identify the right page fixture to be imported\n- Read the page fixture methods step by step. Identify whether the fixture handles navigating to a page.\n- Identify whether other tests using the page fixture had to add separate steps for navigation or not\n- Based on the above analysis there will be following cases and choose either for the given test scenario:\n-- Case 1: if the test case scenario provided inside the task mentions about page navigation, then use that page\nnavigation. skip other cases if this case is satisfied.\n-- Case 2: refer other test cases which import similar fixtures and infer the first page navigation of this test case.\nYou should prefer tests which are in the same file. Tests within same file have higher overlaps in first page\nnavigation.\n- Once the page fixture is decided, look for userContext fixture in files. If its available then add \"userContext\" to\nthe test case block\n\n\n\nFollow these instructions before responding with output:\n- Read the code line by line and achieve the task provided to you\n- Read the dependencies of the code block by scanning through file paths and file provided to you. refer the same file\npath while responding with update\n- Focus only on the test case provided and associated JS methods called from the test case.\n- Respond only with the new empty test case block to be created and nothing else.\n- DO NOT respond with any backticks or markdown syntax\n- If \"userContext\" fixture is available in fixtures file, ensure importing that fixture in the test case block.\n- Provide a reason based on the test steps provided to you on why you chose the fixture or page.goto statement. The\nreason should be one of the list steps provided to you and mention why the case was chosen\n{{/section}}";
 const session_1 = require("../../session");
+const promptBuilder_1 = require("./promptBuilder");
 async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
     const logger = new logger_1.CustomLogger({ useReporter: false });
     logger.log("Creating new test block");
@@ -27,13 +29,13 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
     const promptSpan = trace?.span({
         name: "build-create-empty-test-case-prompt",
     });
-    const prompt = await (0, llm_1.getPrompt)("create-empty-test-block", {
+    const prompt = await (0, promptBuilder_1.compilePrompt)(promptTemplate_0, {
         testFiles: context.codePrompt,
         pageFiles: context.pomPrompt,
         scenarioName: testCase.name,
         scenario: testCase.steps.join("\n"),
         scenarioFile: file,
-    }, 17);
+    });
     promptSpan?.end({ output: { prompt } });
     const llm = new llm_1.LLM({
         trace,

package/dist/agent/codegen/lexical-scoped-vars.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"lexical-scoped-vars.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/lexical-scoped-vars.ts"],"names":[],"mappings":"AAAA,OAAO,~~EAAkB~~,WAAW,EAAE,MAAM,mBAAmB,CAAC;~~AAQhE~~,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;~~AAEnD~~,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,IAAI,EACJ,cAAc,EACd,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,~~qBA0DA~~"}
1	+ {"version":3,"file":"lexical-scoped-vars.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/lexical-scoped-vars.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AASrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAGnD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,IAAI,EACJ,cAAc,EACd,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,qBAqDA"}

package/dist/agent/codegen/lexical-scoped-vars.js CHANGED Viewed

@@ -3,18 +3,16 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.getLexicalScopedVars = void 0;
 const llm_1 = require("@empiricalrun/llm");
 const constants_1 = require("../../constants");
+const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software engineer tasked with analysing Typescript code to identify all variables available in the lexical\nscope at a specific reference point within a file. You will be given a file that contains multiple Playwright tests or\npage object models, along with a reference point inside the file. Your goal is to evaluate the list of all variables\navailable in the lexical scope at that reference point.\n\nTo accomplish this, you need to evaluate the Abstract Syntax Tree (AST) and accumulate all variables that are in the\nlexical scope, which includes:\n1. Variables declared within the test before the reference point.\n2. Arguments of the function.\n3. Variables defined in the parent scope. Identify all variables available in the lexical scope at a specific execution\nreference point within a file, considering only those variables that have been declared and assigned prior to the\nexecution of this point in the code.\n4. Global variables defined in the file.\n\nBefore responding:\n- Ignore variables imported from the `\"./pages\"` path.\n- keep in mind temporal dead zone phenomenon before responding with variables\n{{/section}}\n\n{{#section \"user\"}}\nFile:\n{{testFile}}\n\nReference point:\n{{referencePoint}}\n{{/section}}";
+const promptBuilder_1 = require("./promptBuilder");
 async function getLexicalScopedVars({ trace, file, referencePoint, options, }) {
     const fetchLexicalScopedVarsSpan = trace?.span({
         name: "lexical-scoped-vars",
     });
-    const promptSpan = fetchLexicalScopedVarsSpan?.span({
-        name: "lexical-scoped-vars-prompt",
-    });
-    const instruction = await (0, llm_1.getPrompt)("lexical-scope-variables", {
+    const messages = await (0, promptBuilder_1.compilePrompt)(promptTemplate_0, {
         testFile: file || "",
         referencePoint: referencePoint || "",
     });
-    promptSpan?.end({ output: { instruction } });
     const llm = new llm_1.LLM({
         trace: fetchLexicalScopedVarsSpan,
         provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
@@ -22,7 +20,7 @@ async function getLexicalScopedVars({ trace, file, referencePoint, options, }) {
         providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
     });
     const message = await llm.createChatCompletion({
-        messages: instruction,
+        messages,
         modelParameters: {
             ...constants_1.DEFAULT_MODEL_PARAMETERS,
             ...options?.modelParameters,

package/dist/agent/codegen/promptBuilder.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import OpenAI from "openai";
+export declare function compilePrompt<T extends object>(promptTemplate: string, params: T): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]>;
+//# sourceMappingURL=promptBuilder.d.ts.map

package/dist/agent/codegen/promptBuilder.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"promptBuilder.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/promptBuilder.ts"],"names":[],"mappings":"AACA,OAAO,MAAM,MAAM,QAAQ,CAAC;AA6B5B,wBAAsB,aAAa,CAAC,CAAC,SAAS,MAAM,EAClD,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,CAAC,GACR,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC,CAe/D"}

package/dist/agent/codegen/promptBuilder.js ADDED Viewed

@@ -0,0 +1,44 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.compilePrompt = void 0;
+const handlebars_1 = __importDefault(require("handlebars"));
+class SectionManager {
+    sections = {};
+    getSection(name) {
+        return this.sections[name] || "";
+    }
+    setSection(name, content) {
+        this.sections[name] = content;
+    }
+    getAllSections() {
+        return this.sections;
+    }
+}
+function createHandlebarsEnv() {
+    const HandlebarsEnv = handlebars_1.default.create();
+    const sectionManager = new SectionManager();
+    HandlebarsEnv.registerHelper("section", function (name, options) {
+        const content = options.fn(this);
+        sectionManager.setSection(name, content);
+        return ""; // Don't output anything in place
+    });
+    return { HandlebarsEnv, sectionManager };
+}
+async function compilePrompt(promptTemplate, params) {
+    const { HandlebarsEnv, sectionManager } = createHandlebarsEnv();
+    const template = HandlebarsEnv.compile(promptTemplate, { noEscape: true });
+    template(params);
+    const { system, user } = sectionManager.getAllSections();
+    if (!system || !user) {
+        // TODO: support templates that have only one section
+        throw new Error("Both system and user sections must be defined in the template");
+    }
+    return [
+        { role: "system", content: system },
+        { role: "user", content: user },
+    ];
+}
+exports.compilePrompt = compilePrompt;

package/dist/agent/codegen/repo-edit.js CHANGED Viewed

@@ -125,7 +125,7 @@ exports.generateCodeUsingRepoAgent = generateCodeUsingRepoAgent;
 async function repoEditAgent({ trace, task, logger, }) {
     const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
     void testgenUpdatesReporter.sendMessage(`Updating test code as per the task. \n View [trace](${trace?.getTraceUrl()})`);
-    logger?.log(`Starting repo agent: ${trace?.getTraceUrl()}`);
+    logger?.log(`Starting repo agent: [trace](${trace?.getTraceUrl()})`);
     const { prompt: repoFiles } = await (0, context_1.generateTxtForRepository)();
     const repoAgentOutput = await generateCodeUsingRepoAgent({
         task,

package/dist/agent/master/action-tool-calls.d.ts ADDED Viewed

@@ -0,0 +1,40 @@
+export declare enum ActionType {
+    FILL = "fill",
+    PAGE_GOTO = "page_goto",
+    CLICK = "click",
+    PRESS_ACTION = "keyboard_press_on_element",
+    ASSERT_TEXT = "assert_text",
+    HOVER = "hover_element",
+    SCROLL = "scroll"
+}
+export declare function isValidActionType(value: string): value is ActionType;
+export declare function getActionToolCalls(): {
+    type: string;
+    function: {
+        name: string;
+        description: string;
+        parameters: {
+            type: string;
+            properties: {
+                reason: {
+                    type: string;
+                    description: string;
+                };
+                action: {
+                    type: string;
+                    description: string;
+                };
+                element_description: {
+                    type: string;
+                    description: string;
+                };
+                is_element_visible: {
+                    type: string;
+                    description: string;
+                };
+            };
+            required: string[];
+        };
+    };
+}[];
+//# sourceMappingURL=action-tool-calls.d.ts.map

package/dist/agent/master/action-tool-calls.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"action-tool-calls.d.ts","sourceRoot":"","sources":["../../../src/agent/master/action-tool-calls.ts"],"names":[],"mappings":"AAAA,oBAAY,UAAU;IACpB,IAAI,SAAS;IACb,SAAS,cAAc;IACvB,KAAK,UAAU;IACf,YAAY,8BAA8B;IAC1C,WAAW,gBAAgB;IAC3B,KAAK,kBAAkB;IACvB,MAAM,WAAW;CAClB;AAED,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,KAAK,IAAI,UAAU,CAEpE;AAyDD,wBAAgB,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAoCjC"}

package/dist/agent/master/action-tool-calls.js ADDED Viewed

@@ -0,0 +1,83 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.getActionToolCalls = exports.isValidActionType = exports.ActionType = void 0;
+var ActionType;
+(function (ActionType) {
+    ActionType["FILL"] = "fill";
+    ActionType["PAGE_GOTO"] = "page_goto";
+    ActionType["CLICK"] = "click";
+    ActionType["PRESS_ACTION"] = "keyboard_press_on_element";
+    ActionType["ASSERT_TEXT"] = "assert_text";
+    ActionType["HOVER"] = "hover_element";
+    ActionType["SCROLL"] = "scroll";
+})(ActionType || (exports.ActionType = ActionType = {}));
+function isValidActionType(value) {
+    return Object.values(ActionType).includes(value);
+}
+exports.isValidActionType = isValidActionType;
+const createActionCall = (name, description, additionalProperties = {}) => ({
+    name: "next_task",
+    description: "take the next action based on the provided task",
+    schema: {
+        type: "function",
+        function: {
+            name,
+            description,
+            parameters: {
+                type: "object",
+                properties: {
+                    reason: {
+                        type: "string",
+                        description: "explain how this action will help to complete the task. the reason should align with the task provided",
+                    },
+                    action: {
+                        type: "string",
+                        description: `explain the next action in natural language.
+                The next action should be as atomic as possible, precise and should contain enough details about the action to be performed.
+                E.g. each click, key press, input, assert should be a separate action.
+                Each action should take the task to completion, if not the action is invalid.
+                If the element is not visible then action should be scroll.`,
+                    },
+                    element_description: {
+                        type: "string",
+                        description: "The description of the element on which action needs to be taken, including its position, appearance, etc.",
+                    },
+                    is_element_visible: {
+                        type: "boolean",
+                        description: "A boolean to indicate whether the concerned element is visible in the screenshot.",
+                    },
+                    ...additionalProperties,
+                },
+                required: [
+                    "reason",
+                    "action",
+                    "element_description",
+                    "is_element_visible",
+                    ...Object.keys(additionalProperties),
+                ],
+            },
+        },
+    },
+});
+function getActionToolCalls() {
+    return [
+        createActionCall(ActionType.ASSERT_TEXT, "assert whether the given element on the page is visible", {
+            assertion_text: {
+                type: "string",
+                description: "The text for which the visibility needs to be asserted.",
+            },
+        }),
+        createActionCall(ActionType.FILL, "fill the input element with a particular text"),
+        createActionCall(ActionType.CLICK, "click an element on the page"),
+        createActionCall(ActionType.PAGE_GOTO, "navigate the page to a new URL"),
+        createActionCall(ActionType.PRESS_ACTION, "Focuses the matching element and presses a combination of the keys"),
+        createActionCall(ActionType.HOVER, "hover over an element on the page"),
+        createActionCall(ActionType.SCROLL, "Scroll the page since element is not visible in the screenshot", {
+            element: {
+                type: "string",
+                description: "Element to look for after scrolling to complete the task.",
+            },
+        }),
+    ].map((call) => call.schema);
+}
+exports.getActionToolCalls = getActionToolCalls;

package/dist/agent/master/element-annotation.d.ts CHANGED Viewed

@@ -1,15 +1,20 @@
 import { LLM, TraceClient } from "@empiricalrun/llm";
 import { Page } from "playwright";
 import { BrowsingAgentOptions } from "../browsing";
-export declare function getElementAnnotation({ elementDescription, annotations, annotatedScreenshot, trace, llm, options, }: {
+import { ActionType } from "./action-tool-calls";
+export declare function getElementAnnotation({ elementDescription, annotations, annotatedScreenshot, trace, llm, options, preference, }: {
     elementDescription: string;
     annotations: string;
     annotatedScreenshot: string;
     trace?: TraceClient;
     llm?: LLM;
     options?: BrowsingAgentOptions;
+    preference: AnnotationPreference;
 }): Promise<string | undefined>;
-export type AnnotationPreference = "all" | "fill";
+export type AnnotationPreference = {
+    actionType: "all" | ActionType.FILL | ActionType.ASSERT_TEXT | ActionType.SCROLL;
+    assertionText?: string | undefined;
+};
 export declare function getAnnotationKeys({ page, preference, options, }: {
     page: Page;
     preference: AnnotationPreference;

package/dist/agent/master/element-annotation.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;~~AAgDnD~~,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,~~GACR~~,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;~~CAChC~~,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,~~CA4F9B~~;AAED,MAAM,MAAM,oBAAoB,GAAG,KAAK,~~GAAG~~,MAAM,CAAC;~~AAElD~~,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAqDD"}
1	+ {"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA0DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA6F9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAqDD"}

package/dist/agent/master/element-annotation.js CHANGED Viewed

@@ -36,23 +36,33 @@ const annotationToolAction = {
                         type: "string",
                         description: "Explain why this element is selected. The reason should be clear and align with the task or purpose.",
                     },
+                    element: {
+                        type: "string",
+                        description: "Detailed description of the DOM element's visual characteristics and position.",
+                    },
                     element_annotation: {
                         type: "string",
                         description: "Return the unique element ID for the element on which the action needs to be performed.",
                     },
                 },
-                required: ["enriched_annotations", "reason", "element_annotation"],
+                required: [
+                    "enriched_annotations",
+                    "reason",
+                    "element",
+                    "element_annotation",
+                ],
             },
         },
     },
 };
-async function getElementAnnotation({ elementDescription, annotations, annotatedScreenshot, trace, llm, options, }) {
+async function getElementAnnotation({ elementDescription, annotations, annotatedScreenshot, trace, llm, options, preference, }) {
     const annotationsSpan = trace?.span({
         name: "get-element-annotation",
         input: {
             elementDescription,
             annotations,
             annotatedScreenshot,
+            preference,
         },
     });
     const systemMessage = {
@@ -136,7 +146,7 @@ async function getAnnotationKeys({ page, preference, options, }) {
     const annotationKeys = await page.evaluate(({ preference, options }) => {
         // @ts-ignore
         // eslint-disable-next-line no-undef
-        window.annotationInstance = annotateClickableElements({
+        window.annotationInstance = annotateElementsWithPreference({
             options: options,
             preference: preference,
         });

package/dist/agent/master/next-action.d.ts CHANGED Viewed

@@ -1,16 +1,10 @@
 import { LLM, TraceClient } from "@empiricalrun/llm";
+import { Page } from "playwright";
 import { PlaywrightActions } from "../../actions";
+import { CustomLogger } from "../../bin/logger";
 import { BrowsingAgentOptions } from "../browsing";
-export declare enum actionTypes {
-    PLAYWRIGHT_FILL_ACTION_NAME = "fill_input_element",
-    PLAYWRIGHT_GOTO_ACTION_NAME = "page_goto",
-    PLAYWRIGHT_CLICK_ACTION_NAME = "click_element",
-    PLAYWRIGHT_PRESS_ACTION_NAME = "keyboard_press_on_element",
-    PLAYWRIGHT_ASSERT_TEXT_VISIBILITY_ACTION_NAME = "assert_text_visibility",
-    PLAYWRIGHT_HOVER_ACTION_NAME = "hover_element"
-}
-export declare function isValidActionType(value: string): value is actionTypes;
-export declare function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, actions, disableSkills, }: {
+import { ActionType } from "./action-tool-calls";
+export declare function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, actions, actionTypes, disableSkills, page, logger, }: {
     task: string;
     executedActions: string[];
     failedActions: any[];
@@ -18,10 +12,14 @@ export declare function getNextAction({ task, executedActions, failedActions, pa
     trace?: TraceClient;
     llm?: LLM;
     options?: BrowsingAgentOptions;
-    pageScreenshot: string;
-    annotatedPageScreenshot?: string;
+    pageScreenshot: string[];
     actions: PlaywrightActions;
+    actionTypes: typeof ActionType;
     disableSkills: boolean;
-    annotations?: string[];
-}): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
+    page: Page;
+    logger?: CustomLogger;
+}): Promise<{
+    actionType: string;
+    toolCallArgs: string;
+} | undefined>;
 //# sourceMappingURL=next-action.d.ts.map

package/dist/agent/master/next-action.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIrD,OAAO,EAAE,~~iBAAiB~~,EAAE,MAAM,~~eAAe~~,CAAC;~~AAOlD~~,OAAO,EAAE,~~oBAAoB~~,EAAE,MAAM,~~aAAa~~,CAAC;~~AAEnD~~,~~oBAAY~~,~~WAAW;IACrB~~,~~2BAA2B~~,~~uBAAuB;IAClD~~,~~2BAA2B~~,~~cAAc;IACzC,4BAA4B,~~kBAAkB~~;IAC9C~~,~~4BAA4B,8BAA8B~~;~~IAC1D~~,~~6CAA6C~~,~~2BAA2B;IACxE~~,~~4BAA4B~~,~~kBAAkB;CAC/C;AAED~~,~~wBAAgB~~,~~iBAAiB~~,CAAC,~~KAAK~~,EAAE,~~MAAM~~,~~GAAG~~,~~KAAK~~,~~IAAI~~,~~WAAW,CAErE~~;~~AAED~~,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,OAAO,EACP,aAAa,~~GACd~~,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;~~IACvB~~,~~uBAAuB~~,~~CAAC,~~EAAE,~~MAAM~~,CAAC;~~IACjC~~,~~OAAO~~,EAAE,~~iBAAiB~~,CAAC;~~IAC3B~~,aAAa,EAAE,OAAO,CAAC;IACvB,~~WAAW~~,CAAC,EAAE,MAAM,EAAE,CAAC;~~CACxB~~,~~2FA0KA~~"}
1	+ {"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAElD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAMhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAsB,MAAM,qBAAqB,CAAC;AAGrE,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,OAAO,EACP,WAAW,EACX,aAAa,EACb,IAAI,EACJ,MAAM,GACP,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,OAAO,EAAE,iBAAiB,CAAC;IAC3B,WAAW,EAAE,OAAO,UAAU,CAAC;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,IAAI,EAAE,IAAI,CAAC;IACX,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CACP;IACE,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;CACtB,GACD,SAAS,CACZ,CAsLA"}

package/dist/agent/master/next-action.js CHANGED Viewed

@@ -1,24 +1,14 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.getNextAction = exports.isValidActionType = exports.actionTypes = void 0;
+exports.getNextAction = void 0;
 const llm_1 = require("@empiricalrun/llm");
 const vision_1 = require("@empiricalrun/llm/vision");
 const skill_1 = require("../../actions/skill");
 const constants_1 = require("../../constants");
-var actionTypes;
-(function (actionTypes) {
-    actionTypes["PLAYWRIGHT_FILL_ACTION_NAME"] = "fill_input_element";
-    actionTypes["PLAYWRIGHT_GOTO_ACTION_NAME"] = "page_goto";
-    actionTypes["PLAYWRIGHT_CLICK_ACTION_NAME"] = "click_element";
-    actionTypes["PLAYWRIGHT_PRESS_ACTION_NAME"] = "keyboard_press_on_element";
-    actionTypes["PLAYWRIGHT_ASSERT_TEXT_VISIBILITY_ACTION_NAME"] = "assert_text_visibility";
-    actionTypes["PLAYWRIGHT_HOVER_ACTION_NAME"] = "hover_element";
-})(actionTypes || (exports.actionTypes = actionTypes = {}));
-function isValidActionType(value) {
-    return Object.values(actionTypes).includes(value);
-}
-exports.isValidActionType = isValidActionType;
-async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, actions, disableSkills, }) {
+const utils_1 = require("../utils");
+const action_tool_calls_1 = require("./action-tool-calls");
+const scroller_1 = require("./scroller");
+async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, actions, actionTypes, disableSkills, page, logger, }) {
     const nextActionSpan = trace?.span({
         name: "master-agent-next-action",
         input: {
@@ -35,9 +25,17 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
     const promptSpan = nextActionSpan?.span({
         name: "master-agent-prompt",
     });
+    const screenshotsForPrompt = pageScreenshot.map((screenshot) => {
+        return {
+            type: "image_url",
+            image_url: {
+                url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, screenshot),
+            },
+        };
+    });
     const systemMessage = {
         role: "system",
-        content: `You are an web automation tool which is given a task to complete. You need to execute the task provided to you with the help of web page screenshot, a browser automation tool or skills which are learnt while writing previous tests.
+        content: `You are a web automation tool which is given a task to complete. You need to execute the task provided to you with the help of web page screenshot, a browser automation tool or skills which are learnt while writing previous tests.
   Browser automation tool is a tool which uses Playwright and browser to execute action using next_action tool call.
   Skill usage is a tool which helps to execute previously known pieces of code to achieve a task.
@@ -49,7 +47,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
   You will also be provided with failed next action predicted by you, so that you can avoid suggesting the same action again - which failed.
   The next action should be as atomic as possible.
-  e.g: click on an element, fill an input element, assert, extract text from an element are valid next action as they are atomic in nature.
+  e.g: scroll, click on an element, fill an input element, assert, extract text from an element are valid next action as they are atomic in nature.
   You also need to provide the action type using the list below, action type which is not present in the list is invalid.
   ${Object.values(actionTypes)}
@@ -100,14 +98,9 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
             },
             {
                 type: "text",
-                text: "Screenshot in normal mode 👇",
-            },
-            {
-                type: "image_url",
-                image_url: {
-                    url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
-                },
+                text: pageScreenshot.length > 1 ? "Screenshots:" : "Screenshot:",
             },
+            ...screenshotsForPrompt,
         ],
     };
     const messages = [
@@ -117,44 +110,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
     const actionSchemas = disableSkills || skill_1.testCaseSkills.getAvailableSkills().length === 0
         ? []
         : actions.getMasterActionSchemas();
-    const actionToolCall = {
-        name: "next_task",
-        schema: {
-            type: "function",
-            function: {
-                name: "next_task",
-                description: "take the next action base on the provided task",
-                parameters: {
-                    type: "object",
-                    properties: {
-                        reason: {
-                            type: "string",
-                            description: "explain how this action will help to complete the task. the reason should align with the task provided",
-                        },
-                        action: {
-                            type: "string",
-                            description: `explain the next action in natural language.
-    The next action should be as atomic as possible, precise and should contain enough details about the action to be performed.
-    E.g. each click, key press, input, assert should be a separate action.
-    Each action should take the task to completion, if not the action is invalid.`,
-                        },
-                        action_type: {
-                            type: "string",
-                            enum: Object.values(actionTypes),
-                            description: `type of the action that needs to be taken.
-              Any other action type than the provided action type is invalid.`,
-                        },
-                        element_description: {
-                            type: "string",
-                            description: "The description of the element on which action needs to be taken, including its position, appearance, etc.",
-                        },
-                    },
-                    required: ["reason", "action", "element_description", "action_type"],
-                },
-            },
-        },
-    };
-    const tools = [actionToolCall.schema, ...actionSchemas];
+    const tools = [...(0, action_tool_calls_1.getActionToolCalls)(), ...actionSchemas];
     promptSpan?.end({ output: { messages } });
     llm =
         llm ||
@@ -177,6 +133,49 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
     });
     const toolCall = completion?.tool_calls?.[0];
     nextActionSpan?.end({ output: toolCall });
-    return toolCall;
+    if (toolCall) {
+        const toolCallArgs = (0, utils_1.parseJson)(toolCall.function.arguments);
+        const actionType = toolCall.function.name;
+        // If the action type is scroll, we need to scroll the page and get the reference to the frame in which the element is visible
+        // else we return the next action
+        // For scroll we have at max 2 retries
+        // If the element is not visible after 2 retries, we throw an error
+        if (actionType === "scroll" && toolCallArgs) {
+            let maxScrollRetries = 2;
+            while (maxScrollRetries--) {
+                const frames = await (0, scroller_1.scroller)({
+                    elementDescription: toolCallArgs.element,
+                    page,
+                    trace: nextActionSpan,
+                    logger,
+                });
+                if (frames.length > 0) {
+                    return getNextAction({
+                        task,
+                        executedActions,
+                        failedActions,
+                        pageUrl,
+                        trace,
+                        llm,
+                        options,
+                        pageScreenshot: frames.map((frame) => frame.frameScreenshot),
+                        actions,
+                        actionTypes,
+                        disableSkills,
+                        page,
+                        logger,
+                    });
+                }
+            }
+            if (maxScrollRetries === 0) {
+                throw new Error("Agent is not able to figure out the next action, since element is not visible on screen.");
+            }
+        }
+        return {
+            actionType,
+            toolCallArgs: toolCall?.function.arguments,
+        };
+    }
+    return;
 }
 exports.getNextAction = getNextAction;

package/dist/agent/master/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;~~AAelC~~,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;~~AA0BrB~~,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;~~GAmUA~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA4BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA+UA"}