npm - @empiricalrun/test-gen - Versions diffs - 0.47.4 → 0.48.1 - Mend

@empiricalrun/test-gen 0.47.4 → 0.48.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/CHANGELOG.md +30 -0
package/dist/agent/browsing/run.d.ts +0 -9
package/dist/agent/browsing/run.d.ts.map +1 -1
package/dist/agent/browsing/run.js +2 -14
package/dist/agent/browsing/utils.d.ts +10 -0
package/dist/agent/browsing/utils.d.ts.map +1 -1
package/dist/agent/browsing/utils.js +26 -5
package/dist/agent/chat.d.ts +2 -2
package/dist/agent/chat.d.ts.map +1 -1
package/dist/agent/chat.js +73 -51
package/dist/agent/master/browser-tests/skills.spec.js +3 -0
package/dist/agent/master/run.d.ts +1 -0
package/dist/agent/master/run.d.ts.map +1 -1
package/dist/agent/master/run.js +4 -2
package/dist/bin/index.js +4 -1
package/dist/tools/browser-agent.d.ts +2 -15
package/dist/tools/browser-agent.d.ts.map +1 -1
package/dist/tools/browser-agent.js +109 -65
package/dist/tools/codegen-agent.d.ts +2 -8
package/dist/tools/codegen-agent.d.ts.map +1 -1
package/dist/tools/codegen-agent.js +48 -36
package/dist/tools/diagnosis-fetcher.d.ts +3 -0
package/dist/tools/diagnosis-fetcher.d.ts.map +1 -0
package/dist/tools/diagnosis-fetcher.js +88 -0
package/dist/tools/test-run.d.ts +2 -9
package/dist/tools/test-run.d.ts.map +1 -1
package/dist/tools/test-run.js +41 -27
package/dist/tools/types.d.ts +11 -0
package/dist/tools/types.d.ts.map +1 -0
package/dist/tools/types.js +2 -0
package/dist/utils/repo-tree.d.ts +2 -0
package/dist/utils/repo-tree.d.ts.map +1 -0
package/dist/utils/repo-tree.js +75 -0
package/package.json +3 -3

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,35 @@
 # @empiricalrun/test-gen
+## 0.48.1
+### Patch Changes
+- 13e3048: fix: add semi colon to terminate createTest line
+- b7092d0: fix: project detection should normalize paths
+- Updated dependencies [a2609f5]
+  - @empiricalrun/llm@0.10.1
+## 0.48.0
+### Minor Changes
+- fc952c9: feat: Add new tool call to fetch diagnosis details
+### Patch Changes
+- 32eaf6f: feat: add repo tree in ascii to chat system prompt
+- 6cc97ca: feat: use claude3.7 in chat agent for diagnosis auto-fix scenarios
+- 2e28c20: fix: master agent should not throw when used in tool call
+- f03ff97: feat: update master agent tool call to use TODO(agent) preparation method
+- ed273c2: fix: disable skills usage in master agent
+- 049102a: fix: support suites in agent tool calls
+- Updated dependencies [6cc97ca]
+- Updated dependencies [0fee9bf]
+- Updated dependencies [1f95e4b]
+- Updated dependencies [049102a]
+  - @empiricalrun/llm@0.10.0
+  - @empiricalrun/test-run@0.7.2
 ## 0.47.4
 ### Patch Changes

package/dist/agent/browsing/run.d.ts CHANGED Viewed

@@ -5,15 +5,6 @@ type GenerateTestsType = {
     testGenToken: string;
     repoDir: string;
 };
-/**
- *
- * Function to generate tests using master agent
- * @export
- * @param {GenerateTestsType} {
- *   testFilePath,
- *   filePathToUpdate,
- * }
- */
 export declare function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, }: GenerateTestsType): Promise<{
     isError: boolean;
     error: string;

package/dist/agent/browsing/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF~~;;;;;;;;GAQG;AACH~~,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;~~GAkFnB~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GA8EnB"}

package/dist/agent/browsing/run.js CHANGED Viewed

@@ -10,15 +10,6 @@ const web_1 = require("../../bin/utils/platform/web");
 const server_1 = require("../../file/server");
 const exec_1 = require("../../utils/exec");
 const utils_1 = require("./utils");
-/**
- *
- * Function to generate tests using master agent
- * @export
- * @param {GenerateTestsType} {
- *   testFilePath,
- *   filePathToUpdate,
- * }
- */
 async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, }) {
     if (!fs_extra_1.default.existsSync(testFilePath)) {
         throw new Error(`File for master agent to run not found: ${testFilePath}`);
@@ -45,7 +36,8 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
         await teardowns.skipAll();
     }
     const command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0 --headed`;
-    let isError = false, error = "";
+    let isError = false;
+    let error = "";
     try {
         await (0, exec_1.cmd)(command.split(" "), {
             env: {
@@ -80,10 +72,6 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
     }
     // remove the test only from the file
     await (0, web_1.removeTestOnly)(testFilePath);
-    if (isError) {
-        // throw the error because of which test gen failed
-        throw Error(error);
-    }
     await fileService.stop();
     return {
         isError,

package/dist/agent/browsing/utils.d.ts CHANGED Viewed

@@ -4,6 +4,16 @@ import { Page } from "playwright";
 import { PlaywrightTestConfig } from "playwright/test";
 export declare function isRegExp(obj: any): obj is RegExp;
 export declare function prepareBrowsingAgentTask(steps: string[]): string;
+export declare function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseSuites, }: {
+    testFilePath: string;
+    testCaseName: string;
+    testCaseSuites: string[];
+}): Promise<void>;
+export declare function markTestAsOnly({ testCaseName, testCaseSuites, specPath, }: {
+    testCaseName: string;
+    testCaseSuites: string[];
+    specPath: string;
+}): Promise<void>;
 export declare function prepareFileForMasterAgent({ testCase, specPath, trace, }: {
     testCase: TestCase;
     specPath: string;

package/dist/agent/browsing/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;~~AA+FD~~,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAkBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}

package/dist/agent/browsing/utils.js CHANGED Viewed

@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
+exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
 const fs_extra_1 = __importDefault(require("fs-extra"));
 const minimatch_1 = require("minimatch");
 const path_1 = __importDefault(require("path"));
@@ -81,11 +81,32 @@ async function prepareFileForUpdateScenario({ testCase, specPath, trace, }) {
         nonSpecFileCode: nonSpecFilePrompt,
         testCase: testCase,
     });
+    await markTestAsOnly({
+        testCaseName: name,
+        testCaseSuites: suites,
+        specPath,
+    });
+    return createTestFilePath;
+}
+async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseSuites, }) {
+    // This method is an alternative to prepareFileForUpdateScenario
+    // TODO: Does not support multiple pages, scoped variables, updates in POM files
+    const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
+    await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(/\/\/ TODO\(agent\): (.*)/, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
+    await addImportForCreateTest(testFilePath);
+    await markTestAsOnly({
+        testCaseName,
+        testCaseSuites,
+        specPath: testFilePath,
+    });
+}
+exports.replaceTodoWithCreateTest = replaceTodoWithCreateTest;
+async function markTestAsOnly({ testCaseName, testCaseSuites, specPath, }) {
     const testFileContent = await fs_extra_1.default.readFile(specPath, "utf-8");
     const { testBlock, testNode } = (0, web_1.getTypescriptTestBlock)({
-        scenarioName: name,
+        scenarioName: testCaseName,
         content: testFileContent,
-        suites,
+        suites: testCaseSuites,
     });
     const parentDescribe = (0, web_1.findFirstSerialDescribeBlock)(testNode);
     const isFileMarkedSerial = await (0, web_1.hasTopLevelDescribeConfigureWithSerialMode)(specPath);
@@ -94,8 +115,8 @@ async function prepareFileForUpdateScenario({ testCase, specPath, trace, }) {
         const updatedTestFileContent = newContentsWithTestOnly(testFileContent, testBlock, testBlock, parentDescribe?.getText() || "");
         await fs_extra_1.default.writeFile(specPath, updatedTestFileContent);
     }
-    return createTestFilePath;
 }
+exports.markTestAsOnly = markTestAsOnly;
 async function prepareFileForMasterAgent({ testCase, specPath, trace, }) {
     const prepareFileSpan = trace?.span({
         name: "prepare-file-for-master-agent",
@@ -288,7 +309,7 @@ function matchAgainstPattern(pattern, filePathToTest) {
  * @returns
  */
 async function detectProjectName(testFilePath, playwrightConfig, pwProjectsFilter = ["*"]) {
-    const filePath = testFilePath.replace("./tests/", "");
+    const filePath = path_1.default.normalize(testFilePath).replace("tests/", "");
     if (!playwrightConfig.projects || playwrightConfig.projects.length === 0) {
         throw new Error(`No projects found in playwright config.`);
     }

package/dist/agent/chat.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { TraceClient } from "@empiricalrun/llm";
-import { ChatCompletionMessageParam } from "openai/resources/chat/completions.mjs";
+import type { Anthropic } from "@empiricalrun/llm/claude";
 export declare function chatAgent({ prompt, }: {
     prompt: string;
     trace?: TraceClient;
-}): Promise<ChatCompletionMessageParam[]>;
+}): Promise<Anthropic.Messages.MessageParam[]>;
 //# sourceMappingURL=chat.d.ts.map

package/dist/agent/chat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,~~EAAO~~,WAAW,EAAE,MAAM,mBAAmB,CAAC;~~AACrD~~,OAAO,~~EACL~~,~~0BAA0B~~,~~EAE3B~~,MAAM,~~uCAAuC~~,CAAC;~~AAsC/C~~,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,~~yCA6DA~~"}
1	+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA6D1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,8CAmEA"}

package/dist/agent/chat.js CHANGED Viewed

@@ -1,61 +1,75 @@
 "use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.chatAgent = void 0;
-const llm_1 = require("@empiricalrun/llm");
+const claude_1 = require("@empiricalrun/llm/claude");
+const path_1 = __importDefault(require("path"));
 const human_in_the_loop_1 = require("../human-in-the-loop");
 const browser_agent_1 = require("../tools/browser-agent");
-const codegen_agent_1 = require("../tools/codegen-agent");
+const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
 const test_run_1 = require("../tools/test-run");
+const repo_tree_1 = require("../utils/repo-tree");
 const systemPrompt = `
 You are a helpful assistant that can answer questions and help with tasks.
-You are given a set to tools to use to fulfill the user's request.
+You are given a set of tools to use to fulfill the user's request. Read their descriptions to
+understand what each tool does.
-For example, if the user asks you to run a test, you should use the runTest tool.
+For example, if the user asks you to run a test, you could use the runTest tool.
 Once the test is run, you will receive the results in the form of a JSON object.
 Summarize the results in a few sentences.
-Or if the user asks you to modify a test, you should use the
-generateTestWithCodegen or the generateTestWithBrowserAgent tool. Read their
-descriptions to understand what each tool does.
+If the user provides a diagnosis URL, you can use the fetchDiagnosisDetails tool
+to get more information about the test case and its results.
+Or if the user asks you to modify a test, you could use the generateTestWithBrowserAgent tool. If you suspect
+that a UI selector needs to be updated, using the browser agent is a good idea.
+Before using generateTestWithBrowserAgent, you need to prepare the test code for the browser agent.
+You can do this by using the str_replace_editor tool to add a TODO comment to the test code. This
+comment should explain to the browser agent what to do.
+For example, if the expected modification is to click on a login button, you could add the following comment.
+// TODO(agent): Click on the login button
+The position of the comment is important: the browser agent will look for this comment and replace it with
+the actual code to click on the login button. If you are fixing a failing test, your comment should be
+around the failing line of code, so that it can be replaced/modified.
+You are running as a CLI tool inside the directory of the repo where this test file is located. Here is
+the repo directory structure:
+${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
+While specifying paths to files, use relative paths from the current working directory. For example:
+- Correct path: "tests/lesson.spec.ts"
+- Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
 `;
-const getToolExecutor = (toolName) => {
-    if (toolName === "runTest") {
-        return test_run_1.runTestTool;
-    }
-    else if (toolName === "generateTestWithCodegen") {
-        return codegen_agent_1.generateTestWithCodegenTool;
-    }
-    else if (toolName === "generateTestWithBrowserAgent") {
-        return browser_agent_1.generateTestWithBrowserAgentTool;
-    }
-    else {
-        throw new Error(`Tool name ${toolName} not found`);
-    }
+const tools = [test_run_1.runTestTool, browser_agent_1.browserAgentTool, diagnosis_fetcher_1.diagnosisTool];
+const toolExecutors = {
+    ...Object.fromEntries(tools.map((tool) => [tool.schema.function.name, tool.execute])),
+    str_replace_editor: claude_1.strReplaceEditorTool,
 };
 async function chatAgent({ prompt, }) {
     let userPrompt = prompt;
-    let chatHistory = [
-        { role: "system", content: systemPrompt },
-        { role: "user", content: userPrompt },
-    ];
-    const llm = new llm_1.LLM({ provider: "openai" });
-    let response;
+    let chatState = new claude_1.ChatState();
+    chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
     let shouldAskUserForInput = false;
-    let pendingToolCalls = [];
     while (!userPrompt.toLowerCase().includes("stop")) {
-        if (pendingToolCalls.length > 0) {
-            const toolCall = pendingToolCalls.shift();
-            if (!toolCall) {
-                throw new Error("No tool call found");
+        const toolUse = chatState.getPendingToolCall();
+        if (toolUse) {
+            console.log("Executing tool:", toolUse.name, "with args:", toolUse.input);
+            const toolExecutor = toolExecutors[toolUse.name];
+            if (!toolExecutor) {
+                throw new Error(`Tool ${toolUse.name} not found`);
             }
-            console.log("Executing tool call:", toolCall.function.name, "with args:", toolCall.function.arguments);
-            const toolExecutor = getToolExecutor(toolCall.function.name);
-            const toolArgs = JSON.parse(toolCall.function.arguments);
-            const toolResult = await toolExecutor(toolArgs);
-            chatHistory.push({
-                role: "tool",
-                content: JSON.stringify(toolResult),
-                tool_call_id: toolCall.id,
+            const toolResult = await toolExecutor(toolUse.input);
+            chatState.pushToolResultToMessages({
+                toolCall: toolUse,
+                isError: toolResult.isError,
+                result: toolResult.result,
             });
             continue;
         }
@@ -63,27 +77,35 @@ async function chatAgent({ prompt, }) {
             userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
                 message: "Your response?",
             });
-            chatHistory.push({ role: "user", content: userPrompt });
+            chatState.pushTextMessage({
+                message: { role: "user", content: userPrompt },
+            });
             shouldAskUserForInput = false;
             continue;
         }
-        response = await llm.createChatCompletion({
-            model: "gpt-4o",
-            messages: chatHistory,
-            tools: [test_run_1.schema, codegen_agent_1.schema, browser_agent_1.schema],
-        });
+        const response = await (0, claude_1.createChatCompletion)(systemPrompt, chatState.getMessages(), [
+            ...tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)(tool.schema)),
+            {
+                type: "text_editor_20250124",
+                name: "str_replace_editor",
+            },
+        ]);
         if (!response) {
             throw new Error("No response from LLM");
         }
-        chatHistory.push(response);
-        if (response.tool_calls) {
-            pendingToolCalls.push(...response.tool_calls);
+        chatState.pushTextMessage({
+            message: { role: "assistant", content: response.content },
+        });
+        const textBlock = response.content.find((b) => b.type === "text");
+        const toolUseBlock = response.content.find((b) => b.type === "tool_use");
+        console.log("Assistant response:", textBlock?.text);
+        if (toolUseBlock) {
+            chatState.addPendingToolCall({ toolCall: toolUseBlock });
         }
-        else if (response.content) {
-            console.log("Assistant response:", response.content);
+        else {
             shouldAskUserForInput = true;
         }
     }
-    return chatHistory;
+    return chatState.getMessages();
 }
 exports.chatAgent = chatAgent;

package/dist/agent/master/browser-tests/skills.spec.js CHANGED Viewed

@@ -31,6 +31,7 @@ fixtures_1.test.afterEach(async () => {
     fs_1.default.rmSync("pages", { recursive: true });
 });
 (0, fixtures_1.test)("use skills to subscribe to blog", async ({ page, server }) => {
+    fixtures_1.test.skip(!run_1.IS_ALLOWED_TO_USE_SKILLS, "Skills are disabled");
     await page.goto(`${server.baseURL}/blog-page.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         task: `subscribe as user@example.com`,
@@ -54,6 +55,7 @@ fixtures_1.test.afterEach(async () => {
     (0, fixtures_1.expect)(response.code).toMatch(/await.*subscribeToBlog.*page.*email.*user@example\.com/);
 });
 (0, fixtures_1.test)("use skills to extract blog post title", async ({ page, server }) => {
+    fixtures_1.test.skip(!run_1.IS_ALLOWED_TO_USE_SKILLS, "Skills are disabled");
     await page.goto(`${server.baseURL}/blog-page.html`);
     const response = await (0, run_1.createTestUsingMasterAgent)({
         // TODO: Extend this to click on "read more" and verify generated code
@@ -77,6 +79,7 @@ fixtures_1.test.afterEach(async () => {
     (0, fixtures_1.expect)(response.code).toMatch(/^const.*=.*await extractTitleForPost.*page.*nth:.*1/);
 });
 (0, fixtures_1.test)("use skills to subscribe with multiple pages", async ({ page, server, }) => {
+    fixtures_1.test.skip(!run_1.IS_ALLOWED_TO_USE_SKILLS, "Skills are disabled");
     await page.goto(`${server.baseURL}/icons-navbar.html`);
     const blogPage = await page.context().newPage();
     await blogPage.goto(`${server.baseURL}/blog-page.html`);

package/dist/agent/master/run.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types"
 import { Page } from "playwright";
 import { ScopeVars } from "../../types";
 export { executeUsingComputerUseAgent } from "../cua";
+export declare const IS_ALLOWED_TO_USE_SKILLS = false;
 export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
     task: string;
     page: Page;

package/dist/agent/master/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;~~AAuBtD~~,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;AAKtD,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAqB9C,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}

package/dist/agent/master/run.js CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.createTestUsingMasterAgent = exports.executeUsingComputerUseAgent = void 0;
+exports.createTestUsingMasterAgent = exports.IS_ALLOWED_TO_USE_SKILLS = exports.executeUsingComputerUseAgent = void 0;
 const llm_1 = require("@empiricalrun/llm");
 const actions_1 = require("../../actions");
 const skill_1 = require("../../actions/skill");
@@ -22,6 +22,8 @@ const next_action_1 = require("./next-action");
 var cua_1 = require("../cua");
 Object.defineProperty(exports, "executeUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.executeUsingComputerUseAgent; } });
 const MAX_ERROR_COUNT = 2;
+// Disabling skills as we're seeing false usage with chat agent
+exports.IS_ALLOWED_TO_USE_SKILLS = false;
 function getPageVariables(stateVariables) {
     const keys = Object.keys(stateVariables);
     // This checks for whether page.url() exists, which is true for all pages
@@ -69,7 +71,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
         maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
     });
     let skills = [];
-    if (testCase) {
+    if (testCase && exports.IS_ALLOWED_TO_USE_SKILLS) {
         skills = await (0, skills_retriever_1.getAppropriateSkills)({
             testCase,
             trace,

package/dist/bin/index.js CHANGED Viewed

@@ -148,13 +148,16 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
         void (0, session_1.updateSessionStatus)(testGenConfig.options?.metadata.testSessionId, {
             status: "agent_live_session_started",
         });
-        await (0, run_1.generateTestsUsingMasterAgent)({
+        const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
             testFilePath: specPath,
             filePathToUpdate,
             pwProjectsFilter: testGenConfig.environment?.playwrightProjects,
             testGenToken,
             repoDir: process.cwd(),
         });
+        if (isError) {
+            throw new Error(error);
+        }
     }
     return agent;
 }

package/dist/tools/browser-agent.d.ts CHANGED Viewed

@@ -1,16 +1,3 @@
-import { OpenAI } from "openai";
-export declare const schema: OpenAI.Chat.Completions.ChatCompletionTool;
-export declare const generateTestWithBrowserAgentTool: ({ testName, fileName, changeToMake, }: {
-    testName: string;
-    fileName: string;
-    changeToMake: string;
-}) => Promise<{
-    result: string;
-    gitPatch: string;
-    error?: undefined;
-} | {
-    result: string;
-    error: string;
-    gitPatch?: undefined;
-}>;
+import type { Tool } from "./types";
+export declare const browserAgentTool: Tool;
 //# sourceMappingURL=browser-agent.d.ts.map

package/dist/tools/browser-agent.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"~~AACA~~,OAAO,EAAE,~~MAAM~~,EAAE,MAAM,~~QAAQ~~,CAAC;~~AAQhC~~,eAAO,MAAM,~~MAAM~~,EAAE,~~MAAM~~,CAAC~~,IAAI,CAAC,WAAW,CAAC,kBA0B5C,CAAC;AAEF,eAAO,MAAM,gCAAgC;cAKjC,MAAM;cACN,MAAM;kBACF,MAAM;;;;;;;;;EAsCrB,CAAC~~"}
1	+ {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,gBAAgB,EAAE,IAkH9B,CAAC"}

package/dist/tools/browser-agent.js CHANGED Viewed

@@ -1,76 +1,120 @@
 "use strict";
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.generateTestWithBrowserAgentTool = exports.schema = void 0;
-const path_1 = __importDefault(require("path"));
+exports.browserAgentTool = void 0;
 const run_1 = require("../agent/browsing/run");
 const utils_1 = require("../agent/browsing/utils");
 const scenarios_1 = require("../bin/utils/scenarios");
 const git_1 = require("../utils/git");
-exports.schema = {
-    type: "function",
-    function: {
-        name: "generateTestWithBrowserAgent",
-        description: "Create or modify a test case with browser agent. This is useful when the modifications involve changing a selector or executing browser interactions (like click, fill, etc)",
-        parameters: {
-            type: "object",
-            properties: {
-                testName: {
-                    type: "string",
-                    description: "The name of the test to create or modify",
-                },
-                fileName: {
-                    type: "string",
-                    description: "The name of the file where the test is located. File name must end with .spec.ts",
-                },
-                changeToMake: {
-                    type: "string",
-                    description: "The change to make to the test",
+exports.browserAgentTool = {
+    schema: {
+        type: "function",
+        function: {
+            name: "generateTestWithBrowserAgent",
+            description: `
+Create or modify a test case with browser agent. The browser agent can take user interactions in a web browser
+and generate Playwright code for that actions. This is a useful tool when the modifications require knowing the
+locator/selector for an element on the page.
+IMPORTANT: Before you invoke this tool, you need to ensure that the test code is correctly prepared for this
+agent. Preparation involves adding a TODO comment that describes the change that needs to be made. A good
+comment calls out the element and browser interactions sto take on them. The TODO comment also has (agent) next to it, to
+clearly label that the change is for the agent to make.
+For example: This is a good TODO comment
+\`\`\`
+test("Example test code", async ({ page }) => {
+  await page.goto("https://example.com");
+  // TODO(agent): Click on the login button
+});
+\`\`\`
+The browser agent will execute the steps before the TODO comment and replace the TODO comment with the Playwright
+code that performs the actions described in the comment. For instance, on the running the tool, the agent will
+output the following final code:
+\`\`\`
+test("Example test code", async ({ page }) => {
+  await page.goto("https://example.com");
+  await page.getByRole("button", { name: "Login" }).click();
+});
+\`\`\`
+      `,
+            parameters: {
+                type: "object",
+                properties: {
+                    testName: {
+                        type: "string",
+                        description: "The name of the test to create or modify",
+                    },
+                    testSuites: {
+                        type: "array",
+                        description: "The suites (describe blocks) where the test is located",
+                        items: {
+                            type: "string",
+                        },
+                    },
+                    fileName: {
+                        type: "string",
+                        description: "The name of the file where the test is located. File name must end with .spec.ts",
+                    },
+                    project: {
+                        type: "string",
+                        description: "The Playwright project to run tests against (e.g. 'chromium' or 'firefox')",
+                    },
+                    changeToMake: {
+                        type: "string",
+                        description: "The change to make to the test",
+                    },
                 },
+                required: [
+                    "testName",
+                    "testSuites",
+                    "fileName",
+                    "changeToMake",
+                    "project",
+                ],
             },
-            required: ["testName", "fileName", "changeToMake"],
         },
     },
+    execute: async (input) => {
+        const { testName, testSuites, fileName, changeToMake, project } = input;
+        await (0, utils_1.replaceTodoWithCreateTest)({
+            testCaseName: testName,
+            testCaseSuites: testSuites,
+            testFilePath: fileName,
+        });
+        const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
+            testFilePath: fileName,
+            filePathToUpdate: fileName,
+            pwProjectsFilter: [project],
+            testGenToken: (0, scenarios_1.buildTokenFromOptions)({
+                name: testName,
+                file: fileName,
+                prompt: changeToMake,
+            }),
+            repoDir: process.cwd(),
+        });
+        if (!isError) {
+            const gitPatch = (0, git_1.getGitDiff)(fileName);
+            return {
+                isError,
+                result: `Test was generated successfully. Here is the git patch:
+\`\`\`
+${gitPatch}
+\`\`\`
+`,
+            };
+        }
+        else {
+            return {
+                isError,
+                result: `Test was not generated successfully. Here is the error:
+\`\`\`
+${error}
+\`\`\`
+`,
+            };
+        }
+    },
 };
-const generateTestWithBrowserAgentTool = async ({ testName, fileName, changeToMake, }) => {
-    const testCase = {
-        id: 0,
-        name: testName,
-        filePath: fileName,
-        suites: [], // TODO: Support suites
-        steps: [changeToMake],
-    };
-    const filePathFromCwd = path_1.default.join("tests", fileName);
-    const filePathToUpdate = await (0, utils_1.prepareFileForMasterAgent)({
-        testCase,
-        specPath: filePathFromCwd,
-    });
-    const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
-        testFilePath: filePathFromCwd,
-        filePathToUpdate,
-        // TODO: Remove this hardcoded project name
-        pwProjectsFilter: ["chromium"],
-        testGenToken: (0, scenarios_1.buildTokenFromOptions)({
-            name: testName,
-            file: fileName,
-            prompt: changeToMake,
-        }),
-        repoDir: process.cwd(),
-    });
-    if (!isError) {
-        const gitPatch = (0, git_1.getGitDiff)(filePathFromCwd);
-        return {
-            result: "Test was generated successfully",
-            gitPatch,
-        };
-    }
-    else {
-        return {
-            result: "Test was not generated successfully",
-            error,
-        };
-    }
-};
-exports.generateTestWithBrowserAgentTool = generateTestWithBrowserAgentTool;

package/dist/tools/codegen-agent.d.ts CHANGED Viewed

@@ -1,9 +1,3 @@
-import { TestCase } from "@empiricalrun/shared-types";
-import { OpenAI } from "openai";
-export declare const schema: OpenAI.Chat.Completions.ChatCompletionTool;
-export declare const generateTestWithCodegenTool: ({ testName, fileName, changeToMake, }: {
-    testName: string;
-    fileName: string;
-    changeToMake: string;
-}) => Promise<void | TestCase[]>;
+import type { Tool } from "./types";
+export declare const codegenTool: Tool;
 //# sourceMappingURL=codegen-agent.d.ts.map

package/dist/tools/codegen-agent.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"codegen-agent.d.ts","sourceRoot":"","sources":["../../src/tools/codegen-agent.ts"],"names":[],"mappings":"~~AAAA~~,OAAO,~~EAAE~~,~~QAAQ,~~EAAE,~~MAAM~~,~~4BAA4B,CAAC;AACtD,OAAO,~~EAAE,MAAM,~~EAAE~~,~~MAAM,QAAQ,~~CAAC;~~AAIhC~~,eAAO,MAAM,~~MAAM~~,EAAE,~~MAAM~~,CAAC~~,IAAI,CAAC,WAAW,CAAC,kBA0B5C,CAAC;AAEF,eAAO,MAAM,2BAA2B;cAK5B,MAAM;cACN,MAAM;kBACF,MAAM;gCAcrB,CAAC~~"}
1	+ {"version":3,"file":"codegen-agent.d.ts","sourceRoot":"","sources":["../../src/tools/codegen-agent.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AASpC,eAAO,MAAM,WAAW,EAAE,IAsDzB,CAAC"}

package/dist/tools/codegen-agent.js CHANGED Viewed

@@ -1,44 +1,56 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.generateTestWithCodegenTool = exports.schema = void 0;
+exports.codegenTool = void 0;
 const run_1 = require("../agent/codegen/run");
-exports.schema = {
-    type: "function",
-    function: {
-        name: "generateTestWithCodegen",
-        description: "Create or modify a test case with code generation. This is useful when modifications can be done with TypeScript only, and don't require any browser interactions or element selectors.",
-        parameters: {
-            type: "object",
-            properties: {
-                testName: {
-                    type: "string",
-                    description: "The name of the test to create or modify",
-                },
-                fileName: {
-                    type: "string",
-                    description: "The name of the file where the test is located. File name must end with .spec.ts",
-                },
-                changeToMake: {
-                    type: "string",
-                    description: "The change to make to the test",
+exports.codegenTool = {
+    schema: {
+        type: "function",
+        function: {
+            name: "generateTestWithCodegen",
+            description: "Create or modify a test case with code generation. This is useful when modifications can be done with TypeScript only, and don't require any browser interactions or element selectors.",
+            parameters: {
+                type: "object",
+                properties: {
+                    testName: {
+                        type: "string",
+                        description: "The name of the test to create or modify",
+                    },
+                    testSuites: {
+                        type: "array",
+                        description: "The suites (describe blocks) where the test is located",
+                        items: {
+                            type: "string",
+                        },
+                    },
+                    fileName: {
+                        type: "string",
+                        description: "The name of the file where the test is located. File name must end with .spec.ts",
+                    },
+                    changeToMake: {
+                        type: "string",
+                        description: "The change to make to the test",
+                    },
                 },
+                required: ["testName", "testSuites", "fileName", "changeToMake"],
             },
-            required: ["testName", "fileName", "changeToMake"],
         },
     },
+    execute: async (input) => {
+        const { testName, testSuites, fileName, changeToMake } = input;
+        const testCase = {
+            id: 0,
+            name: testName,
+            filePath: fileName,
+            suites: testSuites,
+            steps: [changeToMake],
+        };
+        const result = await (0, run_1.generateTestWithCodegen)({
+            testCase,
+            file: fileName,
+        });
+        return {
+            result: JSON.stringify(result),
+            isError: false,
+        };
+    },
 };
-const generateTestWithCodegenTool = async ({ testName, fileName, changeToMake, }) => {
-    const testCase = {
-        id: 0,
-        name: testName,
-        filePath: fileName,
-        suites: [], // TODO: Support suites
-        steps: [changeToMake],
-    };
-    const result = await (0, run_1.generateTestWithCodegen)({
-        testCase,
-        file: fileName,
-    });
-    return result;
-};
-exports.generateTestWithCodegenTool = generateTestWithCodegenTool;

package/dist/tools/diagnosis-fetcher.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { Tool } from "./types";
+export declare const diagnosisTool: Tool;
+//# sourceMappingURL=diagnosis-fetcher.d.ts.map

package/dist/tools/diagnosis-fetcher.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAMpC,eAAO,MAAM,aAAa,EAAE,IA4F3B,CAAC"}

package/dist/tools/diagnosis-fetcher.js ADDED Viewed

@@ -0,0 +1,88 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.diagnosisTool = void 0;
+const promises_1 = __importDefault(require("fs/promises"));
+const path_1 = __importDefault(require("path"));
+exports.diagnosisTool = {
+    schema: {
+        type: "function",
+        function: {
+            name: "fetchDiagnosisDetails",
+            description: "Fetch details about a test case diagnosis using its URL or slug",
+            parameters: {
+                type: "object",
+                properties: {
+                    diagnosisUrl: {
+                        type: "string",
+                        description: "The full URL of the diagnosis (e.g. https://dash.empirical.run/shopflo-tests/diagnosis/split-cod-place-the-order--byynrPjCml57)",
+                    },
+                },
+                required: ["diagnosisUrl"],
+            },
+        },
+    },
+    execute: async (input) => {
+        const { diagnosisUrl } = input;
+        // Extract the slug from the URL - it's the part after the last '--'
+        const slug = diagnosisUrl.split("--").pop();
+        if (!slug) {
+            throw new Error("Invalid diagnosis URL - could not extract slug");
+        }
+        // Make the API call to fetch diagnosis details
+        const response = await fetch(`https://dash.empirical.run/api/diagnosis/${slug}/detailed`, {
+            method: "GET",
+            headers: {
+                Authorization: "weQPMWKT", // Using the auth token from test-endpoint.mdc
+            },
+        });
+        if (!response.ok) {
+            return {
+                result: `Failed to fetch diagnosis details: ${response.statusText}`,
+                isError: true,
+            };
+        }
+        const data = await response.json();
+        const { test_case, diagnosis } = data.data;
+        const project = diagnosis[0]?.test_project || "unknown";
+        const sourceContext = await promises_1.default.readFile(path_1.default.join("tests", test_case.file_path), "utf-8");
+        // Format the response as markdown
+        const markdownResponse = `
+# Test Case Diagnosis
+## Test Case Information
+- **Test Case Name**: ${test_case.name}
+- **Test Suite**: ${test_case.suites}
+- **File Name**: tests/${test_case.file_path}
+- **Project**: ${project}
+## Source Context
+${sourceContext}
+## What Happened in the Test Run
+### Failure Details
+- **Failing Line**: ${diagnosis[0]?.failing_line || "No failing line available"}
+#### Error Stack
+\`\`\`
+${diagnosis[0]?.failed_run_metadata?.stack?.replace("/runner/_work/shopflo-tests/shopflo-tests/source-repo/", "") || "No error stack available"}
+\`\`\`
+#### Error Summary
+${diagnosis[0]?.error_stack_summary?.content || "No error summary available"}
+#### Visual Analysis
+${diagnosis[0]?.visual_diff_summary?.summary || "No visual analysis available"}
+#### Merged Summary
+${diagnosis[0]?.merged_summary?.content || "No merged summary available"}
+`;
+        return {
+            result: markdownResponse,
+            isError: false,
+        };
+    },
+};

package/dist/tools/test-run.d.ts CHANGED Viewed

@@ -1,10 +1,3 @@
-import { OpenAI } from "openai";
-export declare const schema: OpenAI.Chat.Completions.ChatCompletionTool;
-export declare const runTestTool: ({ testName, fileName, }: {
-    testName: string;
-    fileName: string;
-}) => Promise<{
-    hasTestPassed: boolean;
-    summaryJson: any;
-}>;
+import type { Tool } from "./types";
+export declare const runTestTool: Tool;
 //# sourceMappingURL=test-run.d.ts.map

package/dist/tools/test-run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"~~AACA~~,OAAO,~~EAAE~~,~~MAAM,~~EAAE,~~MAAM~~,~~QAAQ,CAAC;AAEhC,eAAO,MAAM,MAAM,~~EAAE,MAAM,~~CAAC~~,~~IAAI,~~CAAC~~,WAAW,CAAC,kBAqB5C,CAAC~~;~~AAEF~~,eAAO,MAAM,WAAW~~;cAIZ~~,~~MAAM;cACN~~,~~MAAM;;;;EASjB~~,CAAC"}
1	+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AASpC,eAAO,MAAM,WAAW,EAAE,IA8CzB,CAAC"}

package/dist/tools/test-run.js CHANGED Viewed

@@ -1,35 +1,49 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.runTestTool = exports.schema = void 0;
+exports.runTestTool = void 0;
 const test_run_1 = require("@empiricalrun/test-run");
-exports.schema = {
-    type: "function",
-    function: {
-        name: "runTest",
-        description: "Run a test",
-        parameters: {
-            type: "object",
-            properties: {
-                testName: {
-                    type: "string",
-                    description: "The name of the test to run",
-                },
-                fileName: {
-                    type: "string",
-                    description: "The name of the file where the test is located. File name must end with .spec.ts",
+exports.runTestTool = {
+    schema: {
+        type: "function",
+        function: {
+            name: "runTest",
+            description: "Run a test",
+            parameters: {
+                type: "object",
+                properties: {
+                    testName: {
+                        type: "string",
+                        description: "The name of the test to run",
+                    },
+                    suites: {
+                        type: "array",
+                        description: "The suites (describe blocks) where the test is located.",
+                        items: { type: "string" },
+                    },
+                    fileName: {
+                        type: "string",
+                        description: "The name of the file where the test is located. File name must end with .spec.ts",
+                    },
+                    project: {
+                        type: "string",
+                        description: "The project to run the test on",
+                    },
                 },
+                required: ["testName", "suites", "fileName", "project"],
             },
-            required: ["testName", "fileName"],
         },
     },
+    execute: async (input) => {
+        const { testName, suites, fileName, project } = input;
+        const result = await (0, test_run_1.runSingleTest)({
+            testName,
+            suites,
+            fileName,
+            projects: [project],
+        });
+        return {
+            result: JSON.stringify(result),
+            isError: false,
+        };
+    },
 };
-const runTestTool = async ({ testName, fileName, }) => {
-    // TODO: Remove this hardcoded project name
-    const result = await (0, test_run_1.runSingleTest)({
-        testName,
-        fileName,
-        projects: ["chromium"],
-    });
-    return result;
-};
-exports.runTestTool = runTestTool;

package/dist/tools/types.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import type { OpenAI } from "openai";
+export type ToolSchema = OpenAI.Chat.Completions.ChatCompletionTool;
+export type ToolResult = {
+    result: string;
+    isError: boolean;
+};
+export type Tool = {
+    schema: ToolSchema;
+    execute: (input: any) => Promise<ToolResult>;
+};
+//# sourceMappingURL=types.d.ts.map

package/dist/tools/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/tools/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAErC,MAAM,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEpE,MAAM,MAAM,UAAU,GAAG;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,IAAI,GAAG;IAIjB,MAAM,EAAE,UAAU,CAAC;IACnB,OAAO,EAAE,CAAC,KAAK,EAAE,GAAG,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CAC9C,CAAC"}

package/dist/tools/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ "use strict";
2	+ Object.defineProperty(exports, "__esModule", { value: true });

package/dist/utils/repo-tree.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare function generateAsciiTree(dirPath: string, options?: {}): string;
2	+ //# sourceMappingURL=repo-tree.d.ts.map

package/dist/utils/repo-tree.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAYA,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}

package/dist/utils/repo-tree.js ADDED Viewed

@@ -0,0 +1,75 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.generateAsciiTree = void 0;
+const fs_1 = __importDefault(require("fs"));
+const path_1 = __importDefault(require("path"));
+const DEFAULT_EXCLUDE = [
+    "node_modules",
+    "dist",
+    "build",
+    /\.git/,
+    ".DS_Store",
+    "playwright-report",
+];
+function generateAsciiTree(dirPath, options = {}) {
+    const defaultOptions = {
+        showHidden: false,
+        exclude: DEFAULT_EXCLUDE,
+        maxDepth: 10,
+    };
+    const opts = { ...defaultOptions, ...options };
+    // Make sure the path exists and is a directory
+    if (!fs_1.default.existsSync(dirPath) || !fs_1.default.statSync(dirPath).isDirectory()) {
+        throw new Error(`"${dirPath}" is not a valid directory path`);
+    }
+    // Start with the root directory name
+    const rootName = path_1.default.basename(dirPath);
+    let result = rootName + "\n";
+    function processDirectory(currentPath, prefix = "", depth = 1) {
+        if (depth > opts.maxDepth)
+            return;
+        const items = fs_1.default.readdirSync(currentPath);
+        // Sort items: directories first, then files
+        const sortedItems = items.sort((a, b) => {
+            const aIsDir = fs_1.default.statSync(path_1.default.join(currentPath, a)).isDirectory();
+            const bIsDir = fs_1.default.statSync(path_1.default.join(currentPath, b)).isDirectory();
+            if (aIsDir && !bIsDir)
+                return -1;
+            if (!aIsDir && bIsDir)
+                return 1;
+            return a.localeCompare(b);
+        });
+        // Process each item
+        sortedItems.forEach((item, index) => {
+            // Skip hidden files if not showing hidden
+            if (!opts.showHidden && item.startsWith("."))
+                return;
+            // Skip excluded patterns
+            if (opts.exclude.some((pattern) => typeof pattern === "string"
+                ? item === pattern
+                : pattern instanceof RegExp
+                    ? pattern.test(item)
+                    : false))
+                return;
+            const itemPath = path_1.default.join(currentPath, item);
+            const isDirectory = fs_1.default.statSync(itemPath).isDirectory();
+            const isLast = index === sortedItems.length - 1;
+            // Current item symbols
+            const symbol = isLast ? "└── " : "├── ";
+            const nextPrefix = isLast ? "    " : "│   ";
+            // Add the current item to the result
+            result += `${prefix}${symbol}${item}${isDirectory ? "/" : ""}\n`;
+            // Process subdirectories
+            if (isDirectory) {
+                processDirectory(itemPath, prefix + nextPrefix, depth + 1);
+            }
+        });
+    }
+    // Start the recursive processing
+    processDirectory(dirPath);
+    return result;
+}
+exports.generateAsciiTree = generateAsciiTree;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.47.4",
+  "version": "0.48.1",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -73,10 +73,10 @@
     "ts-morph": "^23.0.0",
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
-    "@empiricalrun/llm": "^0.9.36",
+    "@empiricalrun/llm": "^0.10.1",
     "@empiricalrun/r2-uploader": "^0.3.8",
     "@empiricalrun/reporter": "^0.23.1",
-    "@empiricalrun/test-run": "^0.7.1"
+    "@empiricalrun/test-run": "^0.7.2"
   },
   "devDependencies": {
     "@playwright/test": "1.47.1",