npm - @empiricalrun/test-gen - Versions diffs - 0.27.1 → 0.27.3 - Mend

@empiricalrun/test-gen 0.27.1 → 0.27.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +14 -0
package/dist/actions/index.d.ts +1 -0
package/dist/actions/index.d.ts.map +1 -1
package/dist/actions/index.js +5 -0
package/dist/agent/browsing/index.d.ts.map +1 -1
package/dist/agent/browsing/index.js +139 -112
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +45 -42
package/dist/agent/codegen/update-flow.d.ts.map +1 -1
package/dist/agent/codegen/update-flow.js +21 -5
package/dist/agent/verification/index.d.ts.map +1 -1
package/dist/agent/verification/index.js +6 -18
package/package.json +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,19 @@
 # @empiricalrun/test-gen
+## 0.27.3
+### Patch Changes
+- 2fc2f93: fix: handle consecutive action error handling
+- Updated dependencies [e7ae100]
+  - @empiricalrun/reporter@0.17.11
+## 0.27.2
+### Patch Changes
+- 947dc1a: feat: support array types for testMatch and testIgnore in project detection
 ## 0.27.1
 ### Patch Changes

package/dist/actions/index.d.ts CHANGED Viewed

@@ -7,6 +7,7 @@ export declare class PlaywrightActions {
     executeAction(name: string | undefined, args: Record<string, any>): Promise<void>;
     getActionSchemas(): ActionSchema[];
     generateCode(): string;
+    getLastCodeLines(count: number): string[];
     isComplete(): boolean;
 }
 //# sourceMappingURL=index.d.ts.map

package/dist/actions/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAuBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;~~CASX~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAuBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,gBAAgB,CAAC,KAAK,EAAE,MAAM;IAO9B,UAAU;CAUX"}

package/dist/actions/index.js CHANGED Viewed

@@ -52,8 +52,13 @@ class PlaywrightActions {
     generateCode() {
         return this.recordedActions.map((a) => a.code).join("\n");
     }
+    getLastCodeLines(count) {
+        const actions = this.recordedActions.filter((a) => a.name !== done_1.PLAYWRIGHT_DONE_ACTION_NAME);
+        return actions.slice(-count).map((a) => a.code);
+    }
     isComplete() {
         const [doneAction] = this.recordedActions.filter((a) => a.name === done_1.PLAYWRIGHT_DONE_ACTION_NAME);
+        // filter out done action from recorded actions aftet execution is marked complete
         this.recordedActions = this.recordedActions.filter((a) => a.name !== done_1.PLAYWRIGHT_DONE_ACTION_NAME);
         return !!doneAction;
     }

package/dist/agent/browsing/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,~~mBA+L9B~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBAqO9B"}

package/dist/agent/browsing/index.js CHANGED Viewed

@@ -44,137 +44,164 @@ async function browsingAgentUsingMasterAgent(task, page, options) {
     let lastActionExecTrace = "";
     let isGivenTaskDone = false;
     const masterAgentActions = [];
-    while (!isGivenTaskDone) {
-        const masterAgentSpan = trace.span({ name: "master-agent" });
-        if (masterAgentActions.length > 0) {
-            const verificationAgentResp = await (0, verification_1.verificationAgent)({
-                llm,
-                trace: masterAgentSpan,
-                task,
-                conversation: ["Successfully executed actions", ...masterAgentActions],
-            });
-            isGivenTaskDone = verificationAgentResp.isDone;
-            if (isGivenTaskDone) {
-                await testgenUpdatesReporter.sendMessage(`${verificationAgentResp.reason} Marking the task as done.`);
-                break;
-            }
-        }
-        const sessionState = await (0, session_1.getSessionState)();
-        if (sessionState === "request_complete") {
-            await testgenUpdatesReporter.sendMessage("Aborting task, marking the task as done.");
-            break;
-        }
-        const { action, reason } = await (0, run_1.masterAgent)(task, page, masterAgentActions, masterAgentSpan, llm, options);
-        logger.log(`Next action: ${action} \n reason: ${reason}`);
-        if (!action) {
-            break;
-        }
-        if (isGivenTaskDone) {
-            break;
-        }
-        let isTaskDone = false;
-        const executedActions = [];
-        while (!isTaskDone) {
-            const browsingAgentSpan = masterAgentSpan.span({
-                name: `browsing-agent`,
-            });
-            const sessionState = await (0, session_1.getSessionState)();
-            if (sessionState === "request_complete") {
-                break;
-            }
-            const pageContentSpan = browsingAgentSpan.span({ name: "page-content" });
-            const pageContent = await page.content();
-            pageContentSpan.end({ output: { pageContent } });
-            const sanitizationSpan = browsingAgentSpan.span({
-                name: "page-sanitization",
-            });
-            const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
-            sanitizationSpan.end({ output: { pageSnapshot } });
-            const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
-            // extract all successful actions
-            const successfulActions = executedActions
-                .filter((a) => !a.isError)
-                .map((a) => a.action);
-            if (successfulActions.length > 0) {
+    try {
+        while (!isGivenTaskDone) {
+            const masterAgentSpan = trace.span({ name: "master-agent" });
+            if (masterAgentActions.length > 0) {
                 const verificationAgentResp = await (0, verification_1.verificationAgent)({
                     llm,
-                    trace: browsingAgentSpan,
-                    task: action,
-                    conversation: ["Successfully executed actions", ...successfulActions],
+                    trace: masterAgentSpan,
+                    task,
+                    conversation: [
+                        "Successfully executed actions",
+                        ...masterAgentActions,
+                    ],
                 });
-                isTaskDone = verificationAgentResp.isDone;
-                logger.log(`isTaskDone: ${isTaskDone}`);
-                logger.log(`reason: ${verificationAgentResp.reason}`);
-                if (isTaskDone) {
-                    browsingAgentSpan.event({ name: "task-done" });
-                    browsingAgentSpan.end({
-                        output: { taskDone: true, reason: verificationAgentResp.reason },
-                    });
+                isGivenTaskDone = verificationAgentResp.isDone;
+                if (isGivenTaskDone) {
+                    await testgenUpdatesReporter.sendMessage(`${verificationAgentResp.reason} Marking the task as done.`);
                     break;
                 }
             }
-            const messages = await (0, utils_1.getPromptForNextAction)({
-                pageSnapshot,
-                previousActions: successfulActions,
-                task: action,
-                lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
-                promptType: "browsing-agent-as-tool",
-            });
-            promptSpan.end({ output: { messages } });
-            const completion = await llm.createChatCompletion({
-                messages,
-                tools,
-                trace: browsingAgentSpan,
-                model: options.model || constants_1.DEFAULT_MODEL,
-                modelParameters: {
-                    ...constants_1.DEFAULT_MODEL_PARAMETERS,
-                    ...options.modelParameters,
-                    tool_choice: "required",
-                },
-            });
-            const toolCalls = completion?.tool_calls || [];
-            const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
-            for (const i in toolCalls) {
-                const toolCall = toolCalls[i];
+            const sessionState = await (0, session_1.getSessionState)();
+            if (sessionState === "request_complete") {
+                await testgenUpdatesReporter.sendMessage("Aborting task, marking the task as done.");
+                break;
+            }
+            const { action, reason } = await (0, run_1.masterAgent)(task, page, masterAgentActions, masterAgentSpan, llm, options);
+            logger.log(`Next action: ${action} \n reason: ${reason}`);
+            if (!action) {
+                break;
+            }
+            if (isGivenTaskDone) {
+                break;
+            }
+            let isTaskDone = false;
+            const executedActions = [];
+            while (!isTaskDone) {
+                const browsingAgentSpan = masterAgentSpan.span({
+                    name: `browsing-agent`,
+                });
                 const sessionState = await (0, session_1.getSessionState)();
                 if (sessionState === "request_complete") {
                     break;
                 }
-                try {
-                    await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
-                    executedActions.push({
-                        isError: false,
-                        action: JSON.stringify(toolCall),
+                const pageContentSpan = browsingAgentSpan.span({
+                    name: "page-content",
+                });
+                const pageContent = await page.content();
+                pageContentSpan.end({ output: { pageContent } });
+                const sanitizationSpan = browsingAgentSpan.span({
+                    name: "page-sanitization",
+                });
+                const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
+                sanitizationSpan.end({ output: { pageSnapshot } });
+                const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
+                // extract all successful actions
+                const successfulActions = executedActions
+                    .filter((a) => !a.isError)
+                    .map((a) => a.action);
+                if (successfulActions.length > 0) {
+                    const verificationAgentResp = await (0, verification_1.verificationAgent)({
+                        llm,
+                        trace: browsingAgentSpan,
+                        task: action,
+                        conversation: [
+                            "Successfully executed actions",
+                            ...successfulActions,
+                        ],
                     });
-                    lastActionExecTrace = "";
+                    isTaskDone = verificationAgentResp.isDone;
+                    logger.log(`isTaskDone: ${isTaskDone}`);
+                    logger.log(`reason: ${verificationAgentResp.reason}`);
+                    if (isTaskDone) {
+                        browsingAgentSpan.event({ name: "task-done" });
+                        browsingAgentSpan.end({
+                            output: { taskDone: true, reason: verificationAgentResp.reason },
+                        });
+                        break;
+                    }
                 }
-                catch (e) {
-                    // TODO: implement feedback loop to llm
+                const messages = await (0, utils_1.getPromptForNextAction)({
+                    pageSnapshot,
+                    previousActions: successfulActions,
+                    task: action,
+                    lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
+                    promptType: "browsing-agent-as-tool",
+                });
+                promptSpan.end({ output: { messages } });
+                const completion = await llm.createChatCompletion({
+                    messages,
+                    tools,
+                    trace: browsingAgentSpan,
+                    model: options.model || constants_1.DEFAULT_MODEL,
+                    modelParameters: {
+                        ...constants_1.DEFAULT_MODEL_PARAMETERS,
+                        ...options.modelParameters,
+                        tool_choice: "required",
+                    },
+                });
+                const toolCalls = completion?.tool_calls || [];
+                // LLM might respond with empty tool_calls and we can go into endless loop
+                // if we donot record this action and mark it as error
+                if (!toolCalls.length) {
                     executedActions.push({
                         isError: true,
-                        action: JSON.stringify(toolCall.function.arguments)
-                            ?.reason,
+                        action: "",
                     });
-                    lastActionExecTrace = e.message;
-                    void testgenUpdatesReporter.sendMessage(e.message);
-                    logger.error(lastActionExecTrace, e);
                 }
-            }
-            toolCallsSpan.end({ output: { toolCalls } });
-            // mark task as done if llm is stuck in loop
-            if (executedActions.length > 4) {
-                const lastThreeActions = executedActions.slice(-4);
-                const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
-                if (lastThreeActionsFailed) {
-                    // TODO: this should be sent to dashboard
-                    logger.error("Agent is not able to figure out next action, marking task as done");
-                    isTaskDone = true;
-                    break;
+                const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
+                for (const i in toolCalls) {
+                    const toolCall = toolCalls[i];
+                    const sessionState = await (0, session_1.getSessionState)();
+                    if (sessionState === "request_complete") {
+                        break;
+                    }
+                    try {
+                        await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
+                        executedActions.push({
+                            isError: false,
+                            action: JSON.stringify(toolCall),
+                        });
+                        lastActionExecTrace = "";
+                    }
+                    catch (e) {
+                        // TODO: implement feedback loop to llm
+                        executedActions.push({
+                            isError: true,
+                            action: JSON.stringify(toolCall.function.arguments)
+                                ?.reason,
+                        });
+                        lastActionExecTrace = e.message;
+                        void testgenUpdatesReporter.sendMessage(e.message);
+                        logger.error(lastActionExecTrace, e);
+                    }
+                }
+                toolCallsSpan.end({ output: { toolCalls } });
+                // mark task as done if llm is stuck in loop
+                if (executedActions.length >= 3) {
+                    const lastThreeActions = executedActions.slice(-3);
+                    const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
+                    // get last 3 lines of code
+                    const lastThreeLinesOfCode = actions.getLastCodeLines(3);
+                    const areLastActionsRepeatitive = lastThreeLinesOfCode.length === 3 &&
+                        lastThreeLinesOfCode.every((a) => a === lastThreeLinesOfCode[0]);
+                    if (lastThreeActionsFailed || areLastActionsRepeatitive) {
+                        // TODO: this should be sent to dashboard
+                        logger.error("Agent is not able to figure out next action, marking task as done");
+                        await testgenUpdatesReporter.sendMessage("Agent is not able to figure out next action, marking task as done");
+                        isGivenTaskDone = true;
+                        break;
+                    }
                 }
             }
+            masterAgentSpan.end({ output: { action, reason } });
+            masterAgentActions.push(action);
         }
-        masterAgentSpan.end({ output: { action, reason } });
-        masterAgentActions.push(action);
+    }
+    catch (e) {
+        console.error("Failed to generate code for the given task. Please retry again.", e);
+        await testgenUpdatesReporter.sendMessage(`Failed to generate code for the given task. Please retry again.`);
     }
     await page.close();
     const code = actions.generateCode();

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAUvD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAG5C,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAyFD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,GACvB,OAAO,CAAC,MAAM,CAAC,CAuBjB;AAwCD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAiBxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA6BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;~~AAED~~;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,CAAC,~~CAkDjB~~;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAUvD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAG5C,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAyFD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,GACvB,OAAO,CAAC,MAAM,CAAC,CAuBjB;AAwCD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAiBxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA6BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,CAAC,CA8CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -187,6 +187,15 @@ async function readPlaywrightConfig() {
     return playwrightConfig;
 }
 exports.readPlaywrightConfig = readPlaywrightConfig;
+function matchAgainstPattern(pattern, filePathToTest) {
+    if (isRegExp(pattern)) {
+        const regExp = pattern;
+        return regExp.test(filePathToTest);
+    }
+    else {
+        return (0, minimatch_1.minimatch)(filePathToTest, pattern);
+    }
+}
 /**
  * detect the project name for the given file in playwright test repo
  * if project and test file path for running test don't match, then playwright throws error
@@ -195,51 +204,45 @@ exports.readPlaywrightConfig = readPlaywrightConfig;
  */
 async function detectProjectName(testFilePath, playwrightConfig) {
     const filePath = testFilePath.replace("./tests/", "");
-    let project = "chromium"; // default project
-    if (playwrightConfig.projects && playwrightConfig.projects.length > 0) {
-        const filteredProjectNames = playwrightConfig.projects
-            .map((p) => {
-            const testIgnore = p.testIgnore;
-            const testMatch = p.testMatch || "**";
-            let ignoreFile = false;
-            if (testIgnore &&
-                (typeof testIgnore === "string" || isRegExp(testIgnore))) {
-                if (isRegExp(testIgnore)) {
-                    const regExp = testIgnore;
-                    ignoreFile = regExp.test(filePath);
-                }
-                else {
-                    ignoreFile = (0, minimatch_1.minimatch)(filePath, testIgnore);
-                }
-            }
-            // if test file path is in ignore list then skip it
-            // TODO: support for array based glob match
-            if (ignoreFile) {
-                return "";
-            }
-            if (typeof testMatch === "string" || isRegExp(testMatch)) {
-                let isMatch = false;
-                if (isRegExp(testMatch)) {
-                    isMatch = testMatch.test(filePath);
-                }
-                else {
-                    isMatch = (0, minimatch_1.minimatch)(filePath, testMatch);
-                }
-                if (isMatch && p.use?.defaultBrowserType === "chromium") {
-                    return p.name;
-                }
-                else {
-                    return "";
-                }
+    if (!playwrightConfig.projects || playwrightConfig.projects.length === 0) {
+        throw new Error(`No projects found in playwright config.`);
+    }
+    const filteredProjectNames = playwrightConfig.projects
+        .map((p) => {
+        const testIgnore = p.testIgnore;
+        const testMatch = p.testMatch || "**";
+        let ignoreFile = false;
+        if (testIgnore) {
+            if (typeof testIgnore === "string" || isRegExp(testIgnore)) {
+                ignoreFile = matchAgainstPattern(testIgnore, filePath);
             }
-            else {
-                throw Error("playwright config contains a testMatch which is not a string or regex. This is not supported.");
+            else if (typeof testIgnore === "object") {
+                ignoreFile = testIgnore.some((ignore) => matchAgainstPattern(ignore, filePath));
             }
-        })
-            .filter((p) => !!p);
-        project = filteredProjectNames[0] || project;
+        }
+        // if test file path is in ignore list then skip it
+        if (ignoreFile) {
+            return "";
+        }
+        let isMatch = false;
+        if (typeof testMatch === "string" || isRegExp(testMatch)) {
+            isMatch = matchAgainstPattern(testMatch, filePath);
+        }
+        else if (typeof testMatch === "object") {
+            isMatch = testMatch.some((match) => matchAgainstPattern(match, filePath));
+        }
+        if (isMatch && p.use?.defaultBrowserType === "chromium") {
+            return p.name || "";
+        }
+        else {
+            return "";
+        }
+    })
+        .filter((p) => !!p);
+    if (filteredProjectNames.length === 0) {
+        throw new Error(`No project found for the test file: ${testFilePath} in playwright config.`);
     }
-    return project;
+    return filteredProjectNames[0];
 }
 exports.detectProjectName = detectProjectName;
 async function getPromptForNextAction({ pageSnapshot = "", task = "", previousActions = [], lastActionErrors = [], promptType = "browsing-agent-next-action", }) {

package/dist/agent/codegen/update-flow.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"~~AAwBA~~,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAqB7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAEF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,GACtB,OAAO,CAAC,eAAe,EAAE,CAAC,~~CAoH5B~~"}
1	+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAyBA,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAqB7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAEF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,GACtB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6I5B"}

package/dist/agent/codegen/update-flow.js CHANGED Viewed

@@ -7,6 +7,7 @@ exports.updateTest = void 0;
 const llm_1 = require("@empiricalrun/llm");
 const crypto_1 = __importDefault(require("crypto"));
 const fs_extra_1 = __importDefault(require("fs-extra"));
+const ts_morph_1 = require("ts-morph");
 const logger_1 = require("../../bin/logger");
 const context_1 = require("../../bin/utils/context");
 const web_1 = require("../../bin/utils/platform/web");
@@ -104,13 +105,28 @@ async function updateTest(testCase, file, options, logging = true) {
             logger.success(`${fileChange.filePath} file formatted successfully!`);
         }
         else {
-            // since we dont know what is getting updated,
-            // we believe that the patch is correct and contains few before and after lines
-            // to make the change unique for search & replace
             const readWriteFileSpan = trace.span({ name: "write-to-file" });
             let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
-            //TODO: move this to usage of ast blocks
-            contents = contents.replace(fileChange.oldCode, `\n\n${fileChange.newCode}`);
+            const project = new ts_morph_1.Project();
+            const sourceFile = project.createSourceFile("updated-code.ts", fileChange.newCode);
+            const functions = sourceFile.getFunctions();
+            // if there is a single method update in the file
+            if (functions.length === 1 &&
+                functions[0]?.getText() === fileChange.newCode) {
+                const updatedCodeFuncNames = functions.map((f) => f.getName());
+                const funcName = updatedCodeFuncNames[0];
+                const originalSource = project.createSourceFile("current-code.ts", contents);
+                const matchingNodes = originalSource
+                    .getDescendantsOfKind(ts_morph_1.SyntaxKind.FunctionDeclaration)
+                    .filter((node) => node.getName() === funcName);
+                contents = contents.replace(matchingNodes[0].getText(), functions[0]?.getText());
+            }
+            else {
+                // since we dont know what is getting updated,
+                // we believe that the patch is correct and contains few before and after lines
+                // to make the change unique for search & replace
+                contents = contents.replace(fileChange.oldCode, `\n\n${fileChange.newCode}`);
+            }
             await fs_extra_1.default.writeFile(fileChange.filePath, contents, "utf-8");
             readWriteFileSpan.end({ output: { contents } });
             trace.event({ name: "format-file" });

package/dist/agent/verification/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,~~EAAE~~,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;~~AAErD~~;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,EACtC,GAAG,EACH,KAAK,EACL,IAAI,EACJ,YAAY,GACb,EAAE;IACD,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;;;~~GA6DA~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhE;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,EACtC,GAAG,EACH,KAAK,EACL,IAAI,EACJ,YAAY,GACb,EAAE;IACD,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;;;GAgDA"}

package/dist/agent/verification/index.js CHANGED Viewed

@@ -1,30 +1,18 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.verificationAgent = void 0;
+const llm_1 = require("@empiricalrun/llm");
 /**
  * This agent is used to verify whether the task is done basis the conversation history
  */
 async function verificationAgent({ llm, trace, task, conversation, }) {
+    const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
+        task,
+        conversation: conversation.join("\n"),
+    });
     const response = await llm.createChatCompletion({
         trace,
-        messages: [
-            {
-                role: "system",
-                content: `Given a conversation and a task, your task is to analyse the conversation and tell if the task is completed.
-If not, you need to tell what is not completed and suggest next steps to complete the task.
-You need to respond assuming the conversation provided to you is truthful.
-`,
-            },
-            {
-                role: "user",
-                content: `
-Task: ${task}
-Conversation:
-${conversation.join("\n")}
-        `,
-            },
-        ],
+        messages,
         tools: [
             {
                 type: "function",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.27.1",
+  "version": "0.27.3",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -43,7 +43,7 @@
     "typescript": "^5.3.3",
     "@empiricalrun/llm": "^0.9.1",
     "@empiricalrun/r2-uploader": "^0.1.3",
-    "@empiricalrun/reporter": "^0.17.10"
+    "@empiricalrun/reporter": "^0.17.11"
   },
   "devDependencies": {
     "@types/detect-port": "^1.3.5",