npm - @empiricalrun/test-gen - Versions diffs - 0.10.4 → 0.11.0 - Mend

@empiricalrun/test-gen 0.10.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +17 -0
package/dist/actions/index.d.ts.map +1 -1
package/dist/actions/index.js +2 -1
package/dist/actions/reload-page.d.ts +4 -0
package/dist/actions/reload-page.d.ts.map +1 -0
package/dist/actions/reload-page.js +37 -0
package/dist/agent/browsing/index.d.ts.map +1 -1
package/dist/agent/browsing/index.js +30 -8
package/dist/agent/browsing/utils.d.ts +1 -0
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +8 -2
package/dist/bin/ai/index.d.ts +2 -1
package/dist/bin/ai/index.d.ts.map +1 -1
package/dist/bin/ai/index.js +2 -1
package/dist/bin/scenarios/index.js +9 -8
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,22 @@
 # @empiricalrun/test-gen
+## 0.11.0
+### Minor Changes
+- a1edabb: feat: support page reload as an action
+### Patch Changes
+- 21dae78: fix: add sanitisation of browsing agent task
+## 0.10.5
+### Patch Changes
+- 08ecca2: fix: test gen should quit after 3 consecutive errors and update dashboard sink message format
+- 08ecca2: fix: test gen should quit after 3 consecutive errors
 ## 0.10.4
 ### Patch Changes

package/dist/actions/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;~~AAOhD~~,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;~~IAWhB~~,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;~~IAkBhE~~,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}

package/dist/actions/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ const click_1 = require("./click");
 const done_1 = require("./done");
 const fill_1 = require("./fill");
 const goto_1 = require("./goto");
+const reload_page_1 = require("./reload-page");
 class PlaywrightActions {
     actions;
     recordedActions;
@@ -16,6 +17,7 @@ class PlaywrightActions {
             (0, click_1.clickActionGenerator)(page),
             (0, done_1.doneActionGenerator)(page),
             (0, assertTextVisibility_1.assertTextVisibilityActionGenerator)(page),
+            (0, reload_page_1.reloadActionGenerator)(page),
         ];
         this.recordedActions = [];
     }
@@ -33,7 +35,6 @@ class PlaywrightActions {
             console.log(`code: ${code}`, "\n\n");
         }
         catch (e) {
-            // TODO: make these specific errors so that its easy to consume
             throw Error(`Error executing ${name} action of playwright: ${e}`);
         }
     }

package/dist/actions/reload-page.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { PlaywrightActionGenerator } from "../types";
+export declare const PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
+export declare const reloadActionGenerator: PlaywrightActionGenerator;
+//# sourceMappingURL=reload-page.d.ts.map

package/dist/actions/reload-page.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"reload-page.d.ts","sourceRoot":"","sources":["../../src/actions/reload-page.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAErD,eAAO,MAAM,6BAA6B,gBAAgB,CAAC;AAE3D,eAAO,MAAM,qBAAqB,EAAE,yBA8BnC,CAAC"}

package/dist/actions/reload-page.js ADDED Viewed

@@ -0,0 +1,37 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.reloadActionGenerator = exports.PLAYWRIGHT_RELOAD_ACTION_NAME = void 0;
+const utils_1 = require("../agent/browsing/utils");
+exports.PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
+const reloadActionGenerator = (page) => {
+    return {
+        execute: async () => {
+            await page.reload();
+            await page.waitForTimeout(3000);
+            await (0, utils_1.injectPwLocatorGenerator)(page);
+        },
+        template: () => {
+            const templ = `await page.reload();`;
+            return templ;
+        },
+        name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
+        schema: {
+            type: "function",
+            function: {
+                name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
+                description: "reload the page by calling this method",
+                parameters: {
+                    type: "object",
+                    properties: {
+                        reason: {
+                            type: "string",
+                            description: "reason for calling this function",
+                        },
+                    },
+                    required: ["reason"],
+                },
+            },
+        },
+    };
+};
+exports.reloadActionGenerator = reloadActionGenerator;

package/dist/agent/browsing/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,~~mBAwDP~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAiFP"}

package/dist/agent/browsing/index.js CHANGED Viewed

@@ -17,12 +17,12 @@ async function browsingAgent(task, page, options = {}) {
     const actions = new actions_1.PlaywrightActions(page);
     const tools = actions.getActionSchemas();
     let isTaskDone = false;
-    const previousActions = [];
+    const executedActions = [];
     await (0, utils_1.injectPwLocatorGenerator)(page);
-    let lastActionErrors = [];
+    trace.update({ input: { task } });
+    let lastActionExecTrace = "";
     // await page.waitForTimeout(3_00_000)
     while (!isTaskDone) {
-        trace.update({ input: { task } });
         const pageContentSpan = trace.startSpan("page-content");
         const pageContent = await page.content();
         pageContentSpan.end({ output: { pageContent } });
@@ -30,33 +30,55 @@ async function browsingAgent(task, page, options = {}) {
         const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
         sanitizationSpan.end({ output: { pageSnapshot } });
         const promptSpan = trace.startSpan("page-prompt");
+        // extract all successful actions
+        const successfulActions = executedActions
+            .filter((a) => !a.isError)
+            .map((a) => a.action);
         const messages = await (0, provider_1.getPromptForNextAction)({
             pageSnapshot,
-            previousActions,
+            previousActions: successfulActions,
             task,
-            lastActionErrors,
+            lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
         });
         promptSpan.end({ output: { messages } });
-        lastActionErrors = [];
         const completion = await (0, ai_1.getLLMResult)({
             messages,
             tools,
             trace,
+            tool_choice: "required",
         });
         const toolCalls = completion?.tool_calls || [];
         for (const i in toolCalls) {
             const toolCall = toolCalls[i];
             try {
                 await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
-                previousActions.push(JSON.stringify(toolCall));
+                executedActions.push({
+                    isError: false,
+                    action: JSON.stringify(toolCall),
+                });
+                lastActionExecTrace = "";
             }
             catch (e) {
                 // TODO: implement feedback loop to llm
-                lastActionErrors.push(e.message);
+                executedActions.push({
+                    isError: true,
+                    action: JSON.stringify(toolCall),
+                });
+                lastActionExecTrace = e.message;
                 logger.error(e);
             }
         }
         isTaskDone = actions.isComplete();
+        // mark task as done if llm is stuck in loop
+        if (executedActions.length > 3) {
+            const lastThreeActions = executedActions.slice(-3);
+            const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
+            if (lastThreeActionsFailed) {
+                // TODO: this should be sent to dashboard
+                logger.error("Agent is not able to figure out next action, marking task as done");
+                isTaskDone = true;
+            }
+        }
     }
     const code = actions.generateCode();
     trace.update({ input: { task }, output: { code } });

package/dist/agent/browsing/utils.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { Page } from "playwright";
 import { TestGenConfig } from "../../types";
+export declare function prepareBrowsingAgentTask(steps: string[], assert?: string): string;
 export declare function prepareFileForBrowsingAgent(genConfig: TestGenConfig): Promise<void>;
 export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
 export declare function canRunBrowsingAgent(filePath: string): void;

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,UAIxE;AAED,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -3,15 +3,21 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = void 0;
+exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = exports.prepareBrowsingAgentTask = void 0;
 const child_process_1 = require("child_process");
 const fs_extra_1 = __importDefault(require("fs-extra"));
 const logger_1 = require("../../bin/logger");
 const web_1 = require("../../bin/utils/platform/web");
+function prepareBrowsingAgentTask(steps, assert) {
+    const sanitizedSteps = steps.map((step) => step.replace(/`/g, "\\`"));
+    const task = `${sanitizedSteps.join("\n")}\n${assert || ""}`;
+    return task;
+}
+exports.prepareBrowsingAgentTask = prepareBrowsingAgentTask;
 async function prepareFileForBrowsingAgent(genConfig) {
     const { specPath, scenarios } = genConfig;
     const { name, steps, assert } = scenarios[0];
-    const mergedSteps = `${steps.join("\n")}\n${assert}`;
+    const mergedSteps = prepareBrowsingAgentTask(steps, assert);
     if (!fs_extra_1.default.existsSync(specPath)) {
         await fs_extra_1.default.createFile(specPath);
         await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));

package/dist/bin/ai/index.d.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 import OpenAI from "openai";
 import LLMTracing from "./trace";
-export declare function getLLMResult({ messages, trace, tools, }: {
+export declare function getLLMResult({ messages, trace, tools, tool_choice, }: {
     messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
     trace?: LLMTracing;
     tools?: OpenAI.Chat.Completions.ChatCompletionTool[];
+    tool_choice?: OpenAI.Chat.Completions.ChatCompletionToolChoiceOption;
 }): Promise<OpenAI.Chat.Completions.ChatCompletionMessage | undefined>;
 //# sourceMappingURL=index.d.ts.map

package/dist/bin/ai/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,~~GACN~~,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;~~CACtD~~,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,~~CAqBrE~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,EACL,WAAW,GACZ,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;IACrD,WAAW,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,8BAA8B,CAAC;CACtE,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAsBrE"}

package/dist/bin/ai/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.getLLMResult = void 0;
 const openai_1 = __importDefault(require("openai"));
-async function getLLMResult({ messages, trace, tools, }) {
+async function getLLMResult({ messages, trace, tools, tool_choice, }) {
     const openai = new openai_1.default();
     const model = "gpt-4o";
     const parameters = {
@@ -22,6 +22,7 @@ async function getLLMResult({ messages, trace, tools, }) {
         model,
         tools,
         ...parameters,
+        tool_choice,
     });
     const output = completion.choices[0]?.message;
     generation?.end({ output });

package/dist/bin/scenarios/index.js CHANGED Viewed

@@ -91,6 +91,14 @@ async function loadScenarios(scenariosPath) {
     else if (scenariosPath.endsWith(".yaml")) {
         return await loadScenariosFromYAML(scenariosPath);
     }
+    else if (scenariosPath.endsWith(".ts")) {
+        return [
+            {
+                specPath: scenariosPath,
+                scenarios: [],
+            },
+        ];
+    }
     else if (isValidJSON(atob(scenariosPath))) {
         const str = atob(scenariosPath);
         const config = JSON.parse(str);
@@ -101,14 +109,7 @@ async function loadScenarios(scenariosPath) {
                 scenarios: [
                     {
                         name: config.name,
-                        steps: config.steps
-                            .map((s) => {
-                            if (s.trim().startsWith("-")) {
-                                return s.replace("-", "");
-                            }
-                            return s;
-                        })
-                            .filter((s) => !!s),
+                        steps: config.steps.filter((s) => !!s),
                         assert: config.assert,
                     },
                 ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.10.4",
+  "version": "0.11.0",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"