@empiricalrun/test-gen 0.47.4 → 0.48.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.48.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 13e3048: fix: add semi colon to terminate createTest line
8
+ - b7092d0: fix: project detection should normalize paths
9
+ - Updated dependencies [a2609f5]
10
+ - @empiricalrun/llm@0.10.1
11
+
12
+ ## 0.48.0
13
+
14
+ ### Minor Changes
15
+
16
+ - fc952c9: feat: Add new tool call to fetch diagnosis details
17
+
18
+ ### Patch Changes
19
+
20
+ - 32eaf6f: feat: add repo tree in ascii to chat system prompt
21
+ - 6cc97ca: feat: use claude3.7 in chat agent for diagnosis auto-fix scenarios
22
+ - 2e28c20: fix: master agent should not throw when used in tool call
23
+ - f03ff97: feat: update master agent tool call to use TODO(agent) preparation method
24
+ - ed273c2: fix: disable skills usage in master agent
25
+ - 049102a: fix: support suites in agent tool calls
26
+ - Updated dependencies [6cc97ca]
27
+ - Updated dependencies [0fee9bf]
28
+ - Updated dependencies [1f95e4b]
29
+ - Updated dependencies [049102a]
30
+ - @empiricalrun/llm@0.10.0
31
+ - @empiricalrun/test-run@0.7.2
32
+
3
33
  ## 0.47.4
4
34
 
5
35
  ### Patch Changes
@@ -5,15 +5,6 @@ type GenerateTestsType = {
5
5
  testGenToken: string;
6
6
  repoDir: string;
7
7
  };
8
- /**
9
- *
10
- * Function to generate tests using master agent
11
- * @export
12
- * @param {GenerateTestsType} {
13
- * testFilePath,
14
- * filePathToUpdate,
15
- * }
16
- */
17
8
  export declare function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, }: GenerateTestsType): Promise<{
18
9
  isError: boolean;
19
10
  error: string;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF;;;;;;;;GAQG;AACH,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GAkFnB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,GACR,EAAE,iBAAiB;;;GA8EnB"}
@@ -10,15 +10,6 @@ const web_1 = require("../../bin/utils/platform/web");
10
10
  const server_1 = require("../../file/server");
11
11
  const exec_1 = require("../../utils/exec");
12
12
  const utils_1 = require("./utils");
13
- /**
14
- *
15
- * Function to generate tests using master agent
16
- * @export
17
- * @param {GenerateTestsType} {
18
- * testFilePath,
19
- * filePathToUpdate,
20
- * }
21
- */
22
13
  async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, }) {
23
14
  if (!fs_extra_1.default.existsSync(testFilePath)) {
24
15
  throw new Error(`File for master agent to run not found: ${testFilePath}`);
@@ -45,7 +36,8 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
45
36
  await teardowns.skipAll();
46
37
  }
47
38
  const command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0 --headed`;
48
- let isError = false, error = "";
39
+ let isError = false;
40
+ let error = "";
49
41
  try {
50
42
  await (0, exec_1.cmd)(command.split(" "), {
51
43
  env: {
@@ -80,10 +72,6 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
80
72
  }
81
73
  // remove the test only from the file
82
74
  await (0, web_1.removeTestOnly)(testFilePath);
83
- if (isError) {
84
- // throw the error because of which test gen failed
85
- throw Error(error);
86
- }
87
75
  await fileService.stop();
88
76
  return {
89
77
  isError,
@@ -4,6 +4,16 @@ import { Page } from "playwright";
4
4
  import { PlaywrightTestConfig } from "playwright/test";
5
5
  export declare function isRegExp(obj: any): obj is RegExp;
6
6
  export declare function prepareBrowsingAgentTask(steps: string[]): string;
7
+ export declare function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseSuites, }: {
8
+ testFilePath: string;
9
+ testCaseName: string;
10
+ testCaseSuites: string[];
11
+ }): Promise<void>;
12
+ export declare function markTestAsOnly({ testCaseName, testCaseSuites, specPath, }: {
13
+ testCaseName: string;
14
+ testCaseSuites: string[];
15
+ specPath: string;
16
+ }): Promise<void>;
7
17
  export declare function prepareFileForMasterAgent({ testCase, specPath, trace, }: {
8
18
  testCase: TestCase;
9
19
  specPath: string;
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA+FD,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBAkBA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
6
+ exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.markTestAsOnly = exports.replaceTodoWithCreateTest = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
7
7
  const fs_extra_1 = __importDefault(require("fs-extra"));
8
8
  const minimatch_1 = require("minimatch");
9
9
  const path_1 = __importDefault(require("path"));
@@ -81,11 +81,32 @@ async function prepareFileForUpdateScenario({ testCase, specPath, trace, }) {
81
81
  nonSpecFileCode: nonSpecFilePrompt,
82
82
  testCase: testCase,
83
83
  });
84
+ await markTestAsOnly({
85
+ testCaseName: name,
86
+ testCaseSuites: suites,
87
+ specPath,
88
+ });
89
+ return createTestFilePath;
90
+ }
91
+ async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseSuites, }) {
92
+ // This method is an alternative to prepareFileForUpdateScenario
93
+ // TODO: Does not support multiple pages, scoped variables, updates in POM files
94
+ const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
95
+ await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(/\/\/ TODO\(agent\): (.*)/, (_, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", page);`));
96
+ await addImportForCreateTest(testFilePath);
97
+ await markTestAsOnly({
98
+ testCaseName,
99
+ testCaseSuites,
100
+ specPath: testFilePath,
101
+ });
102
+ }
103
+ exports.replaceTodoWithCreateTest = replaceTodoWithCreateTest;
104
+ async function markTestAsOnly({ testCaseName, testCaseSuites, specPath, }) {
84
105
  const testFileContent = await fs_extra_1.default.readFile(specPath, "utf-8");
85
106
  const { testBlock, testNode } = (0, web_1.getTypescriptTestBlock)({
86
- scenarioName: name,
107
+ scenarioName: testCaseName,
87
108
  content: testFileContent,
88
- suites,
109
+ suites: testCaseSuites,
89
110
  });
90
111
  const parentDescribe = (0, web_1.findFirstSerialDescribeBlock)(testNode);
91
112
  const isFileMarkedSerial = await (0, web_1.hasTopLevelDescribeConfigureWithSerialMode)(specPath);
@@ -94,8 +115,8 @@ async function prepareFileForUpdateScenario({ testCase, specPath, trace, }) {
94
115
  const updatedTestFileContent = newContentsWithTestOnly(testFileContent, testBlock, testBlock, parentDescribe?.getText() || "");
95
116
  await fs_extra_1.default.writeFile(specPath, updatedTestFileContent);
96
117
  }
97
- return createTestFilePath;
98
118
  }
119
+ exports.markTestAsOnly = markTestAsOnly;
99
120
  async function prepareFileForMasterAgent({ testCase, specPath, trace, }) {
100
121
  const prepareFileSpan = trace?.span({
101
122
  name: "prepare-file-for-master-agent",
@@ -288,7 +309,7 @@ function matchAgainstPattern(pattern, filePathToTest) {
288
309
  * @returns
289
310
  */
290
311
  async function detectProjectName(testFilePath, playwrightConfig, pwProjectsFilter = ["*"]) {
291
- const filePath = testFilePath.replace("./tests/", "");
312
+ const filePath = path_1.default.normalize(testFilePath).replace("tests/", "");
292
313
  if (!playwrightConfig.projects || playwrightConfig.projects.length === 0) {
293
314
  throw new Error(`No projects found in playwright config.`);
294
315
  }
@@ -1,7 +1,7 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { ChatCompletionMessageParam } from "openai/resources/chat/completions.mjs";
2
+ import type { Anthropic } from "@empiricalrun/llm/claude";
3
3
  export declare function chatAgent({ prompt, }: {
4
4
  prompt: string;
5
5
  trace?: TraceClient;
6
- }): Promise<ChatCompletionMessageParam[]>;
6
+ }): Promise<Anthropic.Messages.MessageParam[]>;
7
7
  //# sourceMappingURL=chat.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,0BAA0B,EAE3B,MAAM,uCAAuC,CAAC;AAsC/C,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,yCA6DA"}
1
+ {"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AA6D1D,wBAAsB,SAAS,CAAC,EAC9B,MAAM,GACP,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,8CAmEA"}
@@ -1,61 +1,75 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.chatAgent = void 0;
4
- const llm_1 = require("@empiricalrun/llm");
7
+ const claude_1 = require("@empiricalrun/llm/claude");
8
+ const path_1 = __importDefault(require("path"));
5
9
  const human_in_the_loop_1 = require("../human-in-the-loop");
6
10
  const browser_agent_1 = require("../tools/browser-agent");
7
- const codegen_agent_1 = require("../tools/codegen-agent");
11
+ const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
8
12
  const test_run_1 = require("../tools/test-run");
13
+ const repo_tree_1 = require("../utils/repo-tree");
9
14
  const systemPrompt = `
10
15
  You are a helpful assistant that can answer questions and help with tasks.
11
- You are given a set to tools to use to fulfill the user's request.
16
+ You are given a set of tools to use to fulfill the user's request. Read their descriptions to
17
+ understand what each tool does.
12
18
 
13
- For example, if the user asks you to run a test, you should use the runTest tool.
19
+ For example, if the user asks you to run a test, you could use the runTest tool.
14
20
  Once the test is run, you will receive the results in the form of a JSON object.
15
21
  Summarize the results in a few sentences.
16
22
 
17
- Or if the user asks you to modify a test, you should use the
18
- generateTestWithCodegen or the generateTestWithBrowserAgent tool. Read their
19
- descriptions to understand what each tool does.
23
+ If the user provides a diagnosis URL, you can use the fetchDiagnosisDetails tool
24
+ to get more information about the test case and its results.
25
+
26
+ Or if the user asks you to modify a test, you could use the generateTestWithBrowserAgent tool. If you suspect
27
+ that a UI selector needs to be updated, using the browser agent is a good idea.
28
+
29
+ Before using generateTestWithBrowserAgent, you need to prepare the test code for the browser agent.
30
+ You can do this by using the str_replace_editor tool to add a TODO comment to the test code. This
31
+ comment should explain to the browser agent what to do.
32
+
33
+ For example, if the expected modification is to click on a login button, you could add the following comment.
34
+
35
+ // TODO(agent): Click on the login button
36
+
37
+ The position of the comment is important: the browser agent will look for this comment and replace it with
38
+ the actual code to click on the login button. If you are fixing a failing test, your comment should be
39
+ around the failing line of code, so that it can be replaced/modified.
40
+
41
+ You are running as a CLI tool inside the directory of the repo where this test file is located. Here is
42
+ the repo directory structure:
43
+
44
+ ${(0, repo_tree_1.generateAsciiTree)(process.cwd())}
45
+
46
+ While specifying paths to files, use relative paths from the current working directory. For example:
47
+ - Correct path: "tests/lesson.spec.ts"
48
+ - Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
20
49
  `;
21
- const getToolExecutor = (toolName) => {
22
- if (toolName === "runTest") {
23
- return test_run_1.runTestTool;
24
- }
25
- else if (toolName === "generateTestWithCodegen") {
26
- return codegen_agent_1.generateTestWithCodegenTool;
27
- }
28
- else if (toolName === "generateTestWithBrowserAgent") {
29
- return browser_agent_1.generateTestWithBrowserAgentTool;
30
- }
31
- else {
32
- throw new Error(`Tool name ${toolName} not found`);
33
- }
50
+ const tools = [test_run_1.runTestTool, browser_agent_1.browserAgentTool, diagnosis_fetcher_1.diagnosisTool];
51
+ const toolExecutors = {
52
+ ...Object.fromEntries(tools.map((tool) => [tool.schema.function.name, tool.execute])),
53
+ str_replace_editor: claude_1.strReplaceEditorTool,
34
54
  };
35
55
  async function chatAgent({ prompt, }) {
36
56
  let userPrompt = prompt;
37
- let chatHistory = [
38
- { role: "system", content: systemPrompt },
39
- { role: "user", content: userPrompt },
40
- ];
41
- const llm = new llm_1.LLM({ provider: "openai" });
42
- let response;
57
+ let chatState = new claude_1.ChatState();
58
+ chatState.pushTextMessage({ message: { role: "user", content: userPrompt } });
43
59
  let shouldAskUserForInput = false;
44
- let pendingToolCalls = [];
45
60
  while (!userPrompt.toLowerCase().includes("stop")) {
46
- if (pendingToolCalls.length > 0) {
47
- const toolCall = pendingToolCalls.shift();
48
- if (!toolCall) {
49
- throw new Error("No tool call found");
61
+ const toolUse = chatState.getPendingToolCall();
62
+ if (toolUse) {
63
+ console.log("Executing tool:", toolUse.name, "with args:", toolUse.input);
64
+ const toolExecutor = toolExecutors[toolUse.name];
65
+ if (!toolExecutor) {
66
+ throw new Error(`Tool ${toolUse.name} not found`);
50
67
  }
51
- console.log("Executing tool call:", toolCall.function.name, "with args:", toolCall.function.arguments);
52
- const toolExecutor = getToolExecutor(toolCall.function.name);
53
- const toolArgs = JSON.parse(toolCall.function.arguments);
54
- const toolResult = await toolExecutor(toolArgs);
55
- chatHistory.push({
56
- role: "tool",
57
- content: JSON.stringify(toolResult),
58
- tool_call_id: toolCall.id,
68
+ const toolResult = await toolExecutor(toolUse.input);
69
+ chatState.pushToolResultToMessages({
70
+ toolCall: toolUse,
71
+ isError: toolResult.isError,
72
+ result: toolResult.result,
59
73
  });
60
74
  continue;
61
75
  }
@@ -63,27 +77,35 @@ async function chatAgent({ prompt, }) {
63
77
  userPrompt = await human_in_the_loop_1.humanLoop.getFeedback({
64
78
  message: "Your response?",
65
79
  });
66
- chatHistory.push({ role: "user", content: userPrompt });
80
+ chatState.pushTextMessage({
81
+ message: { role: "user", content: userPrompt },
82
+ });
67
83
  shouldAskUserForInput = false;
68
84
  continue;
69
85
  }
70
- response = await llm.createChatCompletion({
71
- model: "gpt-4o",
72
- messages: chatHistory,
73
- tools: [test_run_1.schema, codegen_agent_1.schema, browser_agent_1.schema],
74
- });
86
+ const response = await (0, claude_1.createChatCompletion)(systemPrompt, chatState.getMessages(), [
87
+ ...tools.map((tool) => (0, claude_1.convertOpenAISchemaToAnthropic)(tool.schema)),
88
+ {
89
+ type: "text_editor_20250124",
90
+ name: "str_replace_editor",
91
+ },
92
+ ]);
75
93
  if (!response) {
76
94
  throw new Error("No response from LLM");
77
95
  }
78
- chatHistory.push(response);
79
- if (response.tool_calls) {
80
- pendingToolCalls.push(...response.tool_calls);
96
+ chatState.pushTextMessage({
97
+ message: { role: "assistant", content: response.content },
98
+ });
99
+ const textBlock = response.content.find((b) => b.type === "text");
100
+ const toolUseBlock = response.content.find((b) => b.type === "tool_use");
101
+ console.log("Assistant response:", textBlock?.text);
102
+ if (toolUseBlock) {
103
+ chatState.addPendingToolCall({ toolCall: toolUseBlock });
81
104
  }
82
- else if (response.content) {
83
- console.log("Assistant response:", response.content);
105
+ else {
84
106
  shouldAskUserForInput = true;
85
107
  }
86
108
  }
87
- return chatHistory;
109
+ return chatState.getMessages();
88
110
  }
89
111
  exports.chatAgent = chatAgent;
@@ -31,6 +31,7 @@ fixtures_1.test.afterEach(async () => {
31
31
  fs_1.default.rmSync("pages", { recursive: true });
32
32
  });
33
33
  (0, fixtures_1.test)("use skills to subscribe to blog", async ({ page, server }) => {
34
+ fixtures_1.test.skip(!run_1.IS_ALLOWED_TO_USE_SKILLS, "Skills are disabled");
34
35
  await page.goto(`${server.baseURL}/blog-page.html`);
35
36
  const response = await (0, run_1.createTestUsingMasterAgent)({
36
37
  task: `subscribe as user@example.com`,
@@ -54,6 +55,7 @@ fixtures_1.test.afterEach(async () => {
54
55
  (0, fixtures_1.expect)(response.code).toMatch(/await.*subscribeToBlog.*page.*email.*user@example\.com/);
55
56
  });
56
57
  (0, fixtures_1.test)("use skills to extract blog post title", async ({ page, server }) => {
58
+ fixtures_1.test.skip(!run_1.IS_ALLOWED_TO_USE_SKILLS, "Skills are disabled");
57
59
  await page.goto(`${server.baseURL}/blog-page.html`);
58
60
  const response = await (0, run_1.createTestUsingMasterAgent)({
59
61
  // TODO: Extend this to click on "read more" and verify generated code
@@ -77,6 +79,7 @@ fixtures_1.test.afterEach(async () => {
77
79
  (0, fixtures_1.expect)(response.code).toMatch(/^const.*=.*await extractTitleForPost.*page.*nth:.*1/);
78
80
  });
79
81
  (0, fixtures_1.test)("use skills to subscribe with multiple pages", async ({ page, server, }) => {
82
+ fixtures_1.test.skip(!run_1.IS_ALLOWED_TO_USE_SKILLS, "Skills are disabled");
80
83
  await page.goto(`${server.baseURL}/icons-navbar.html`);
81
84
  const blogPage = await page.context().newPage();
82
85
  await blogPage.goto(`${server.baseURL}/blog-page.html`);
@@ -2,6 +2,7 @@ import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types"
2
2
  import { Page } from "playwright";
3
3
  import { ScopeVars } from "../../types";
4
4
  export { executeUsingComputerUseAgent } from "../cua";
5
+ export declare const IS_ALLOWED_TO_USE_SKILLS = false;
5
6
  export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
6
7
  task: string;
7
8
  page: Page;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;AAuBtD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;AAKtD,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAqB9C,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createTestUsingMasterAgent = exports.executeUsingComputerUseAgent = void 0;
3
+ exports.createTestUsingMasterAgent = exports.IS_ALLOWED_TO_USE_SKILLS = exports.executeUsingComputerUseAgent = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const actions_1 = require("../../actions");
6
6
  const skill_1 = require("../../actions/skill");
@@ -22,6 +22,8 @@ const next_action_1 = require("./next-action");
22
22
  var cua_1 = require("../cua");
23
23
  Object.defineProperty(exports, "executeUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.executeUsingComputerUseAgent; } });
24
24
  const MAX_ERROR_COUNT = 2;
25
+ // Disabling skills as we're seeing false usage with chat agent
26
+ exports.IS_ALLOWED_TO_USE_SKILLS = false;
25
27
  function getPageVariables(stateVariables) {
26
28
  const keys = Object.keys(stateVariables);
27
29
  // This checks for whether page.url() exists, which is true for all pages
@@ -69,7 +71,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
69
71
  maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
70
72
  });
71
73
  let skills = [];
72
- if (testCase) {
74
+ if (testCase && exports.IS_ALLOWED_TO_USE_SKILLS) {
73
75
  skills = await (0, skills_retriever_1.getAppropriateSkills)({
74
76
  testCase,
75
77
  trace,
package/dist/bin/index.js CHANGED
@@ -148,13 +148,16 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
148
148
  void (0, session_1.updateSessionStatus)(testGenConfig.options?.metadata.testSessionId, {
149
149
  status: "agent_live_session_started",
150
150
  });
151
- await (0, run_1.generateTestsUsingMasterAgent)({
151
+ const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
152
152
  testFilePath: specPath,
153
153
  filePathToUpdate,
154
154
  pwProjectsFilter: testGenConfig.environment?.playwrightProjects,
155
155
  testGenToken,
156
156
  repoDir: process.cwd(),
157
157
  });
158
+ if (isError) {
159
+ throw new Error(error);
160
+ }
158
161
  }
159
162
  return agent;
160
163
  }
@@ -1,16 +1,3 @@
1
- import { OpenAI } from "openai";
2
- export declare const schema: OpenAI.Chat.Completions.ChatCompletionTool;
3
- export declare const generateTestWithBrowserAgentTool: ({ testName, fileName, changeToMake, }: {
4
- testName: string;
5
- fileName: string;
6
- changeToMake: string;
7
- }) => Promise<{
8
- result: string;
9
- gitPatch: string;
10
- error?: undefined;
11
- } | {
12
- result: string;
13
- error: string;
14
- gitPatch?: undefined;
15
- }>;
1
+ import type { Tool } from "./types";
2
+ export declare const browserAgentTool: Tool;
16
3
  //# sourceMappingURL=browser-agent.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAQhC,eAAO,MAAM,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBA0B5C,CAAC;AAEF,eAAO,MAAM,gCAAgC;cAKjC,MAAM;cACN,MAAM;kBACF,MAAM;;;;;;;;;EAsCrB,CAAC"}
1
+ {"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAUpC,eAAO,MAAM,gBAAgB,EAAE,IAkH9B,CAAC"}
@@ -1,76 +1,120 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.generateTestWithBrowserAgentTool = exports.schema = void 0;
7
- const path_1 = __importDefault(require("path"));
3
+ exports.browserAgentTool = void 0;
8
4
  const run_1 = require("../agent/browsing/run");
9
5
  const utils_1 = require("../agent/browsing/utils");
10
6
  const scenarios_1 = require("../bin/utils/scenarios");
11
7
  const git_1 = require("../utils/git");
12
- exports.schema = {
13
- type: "function",
14
- function: {
15
- name: "generateTestWithBrowserAgent",
16
- description: "Create or modify a test case with browser agent. This is useful when the modifications involve changing a selector or executing browser interactions (like click, fill, etc)",
17
- parameters: {
18
- type: "object",
19
- properties: {
20
- testName: {
21
- type: "string",
22
- description: "The name of the test to create or modify",
23
- },
24
- fileName: {
25
- type: "string",
26
- description: "The name of the file where the test is located. File name must end with .spec.ts",
27
- },
28
- changeToMake: {
29
- type: "string",
30
- description: "The change to make to the test",
8
+ exports.browserAgentTool = {
9
+ schema: {
10
+ type: "function",
11
+ function: {
12
+ name: "generateTestWithBrowserAgent",
13
+ description: `
14
+ Create or modify a test case with browser agent. The browser agent can take user interactions in a web browser
15
+ and generate Playwright code for that actions. This is a useful tool when the modifications require knowing the
16
+ locator/selector for an element on the page.
17
+
18
+ IMPORTANT: Before you invoke this tool, you need to ensure that the test code is correctly prepared for this
19
+ agent. Preparation involves adding a TODO comment that describes the change that needs to be made. A good
20
+ comment calls out the element and browser interactions sto take on them. The TODO comment also has (agent) next to it, to
21
+ clearly label that the change is for the agent to make.
22
+
23
+ For example: This is a good TODO comment
24
+
25
+ \`\`\`
26
+ test("Example test code", async ({ page }) => {
27
+ await page.goto("https://example.com");
28
+ // TODO(agent): Click on the login button
29
+ });
30
+ \`\`\`
31
+
32
+ The browser agent will execute the steps before the TODO comment and replace the TODO comment with the Playwright
33
+ code that performs the actions described in the comment. For instance, on the running the tool, the agent will
34
+ output the following final code:
35
+
36
+ \`\`\`
37
+ test("Example test code", async ({ page }) => {
38
+ await page.goto("https://example.com");
39
+ await page.getByRole("button", { name: "Login" }).click();
40
+ });
41
+ \`\`\`
42
+ `,
43
+ parameters: {
44
+ type: "object",
45
+ properties: {
46
+ testName: {
47
+ type: "string",
48
+ description: "The name of the test to create or modify",
49
+ },
50
+ testSuites: {
51
+ type: "array",
52
+ description: "The suites (describe blocks) where the test is located",
53
+ items: {
54
+ type: "string",
55
+ },
56
+ },
57
+ fileName: {
58
+ type: "string",
59
+ description: "The name of the file where the test is located. File name must end with .spec.ts",
60
+ },
61
+ project: {
62
+ type: "string",
63
+ description: "The Playwright project to run tests against (e.g. 'chromium' or 'firefox')",
64
+ },
65
+ changeToMake: {
66
+ type: "string",
67
+ description: "The change to make to the test",
68
+ },
31
69
  },
70
+ required: [
71
+ "testName",
72
+ "testSuites",
73
+ "fileName",
74
+ "changeToMake",
75
+ "project",
76
+ ],
32
77
  },
33
- required: ["testName", "fileName", "changeToMake"],
34
78
  },
35
79
  },
80
+ execute: async (input) => {
81
+ const { testName, testSuites, fileName, changeToMake, project } = input;
82
+ await (0, utils_1.replaceTodoWithCreateTest)({
83
+ testCaseName: testName,
84
+ testCaseSuites: testSuites,
85
+ testFilePath: fileName,
86
+ });
87
+ const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
88
+ testFilePath: fileName,
89
+ filePathToUpdate: fileName,
90
+ pwProjectsFilter: [project],
91
+ testGenToken: (0, scenarios_1.buildTokenFromOptions)({
92
+ name: testName,
93
+ file: fileName,
94
+ prompt: changeToMake,
95
+ }),
96
+ repoDir: process.cwd(),
97
+ });
98
+ if (!isError) {
99
+ const gitPatch = (0, git_1.getGitDiff)(fileName);
100
+ return {
101
+ isError,
102
+ result: `Test was generated successfully. Here is the git patch:
103
+ \`\`\`
104
+ ${gitPatch}
105
+ \`\`\`
106
+ `,
107
+ };
108
+ }
109
+ else {
110
+ return {
111
+ isError,
112
+ result: `Test was not generated successfully. Here is the error:
113
+ \`\`\`
114
+ ${error}
115
+ \`\`\`
116
+ `,
117
+ };
118
+ }
119
+ },
36
120
  };
37
- const generateTestWithBrowserAgentTool = async ({ testName, fileName, changeToMake, }) => {
38
- const testCase = {
39
- id: 0,
40
- name: testName,
41
- filePath: fileName,
42
- suites: [], // TODO: Support suites
43
- steps: [changeToMake],
44
- };
45
- const filePathFromCwd = path_1.default.join("tests", fileName);
46
- const filePathToUpdate = await (0, utils_1.prepareFileForMasterAgent)({
47
- testCase,
48
- specPath: filePathFromCwd,
49
- });
50
- const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
51
- testFilePath: filePathFromCwd,
52
- filePathToUpdate,
53
- // TODO: Remove this hardcoded project name
54
- pwProjectsFilter: ["chromium"],
55
- testGenToken: (0, scenarios_1.buildTokenFromOptions)({
56
- name: testName,
57
- file: fileName,
58
- prompt: changeToMake,
59
- }),
60
- repoDir: process.cwd(),
61
- });
62
- if (!isError) {
63
- const gitPatch = (0, git_1.getGitDiff)(filePathFromCwd);
64
- return {
65
- result: "Test was generated successfully",
66
- gitPatch,
67
- };
68
- }
69
- else {
70
- return {
71
- result: "Test was not generated successfully",
72
- error,
73
- };
74
- }
75
- };
76
- exports.generateTestWithBrowserAgentTool = generateTestWithBrowserAgentTool;
@@ -1,9 +1,3 @@
1
- import { TestCase } from "@empiricalrun/shared-types";
2
- import { OpenAI } from "openai";
3
- export declare const schema: OpenAI.Chat.Completions.ChatCompletionTool;
4
- export declare const generateTestWithCodegenTool: ({ testName, fileName, changeToMake, }: {
5
- testName: string;
6
- fileName: string;
7
- changeToMake: string;
8
- }) => Promise<void | TestCase[]>;
1
+ import type { Tool } from "./types";
2
+ export declare const codegenTool: Tool;
9
3
  //# sourceMappingURL=codegen-agent.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"codegen-agent.d.ts","sourceRoot":"","sources":["../../src/tools/codegen-agent.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAIhC,eAAO,MAAM,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBA0B5C,CAAC;AAEF,eAAO,MAAM,2BAA2B;cAK5B,MAAM;cACN,MAAM;kBACF,MAAM;gCAcrB,CAAC"}
1
+ {"version":3,"file":"codegen-agent.d.ts","sourceRoot":"","sources":["../../src/tools/codegen-agent.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AASpC,eAAO,MAAM,WAAW,EAAE,IAsDzB,CAAC"}
@@ -1,44 +1,56 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.generateTestWithCodegenTool = exports.schema = void 0;
3
+ exports.codegenTool = void 0;
4
4
  const run_1 = require("../agent/codegen/run");
5
- exports.schema = {
6
- type: "function",
7
- function: {
8
- name: "generateTestWithCodegen",
9
- description: "Create or modify a test case with code generation. This is useful when modifications can be done with TypeScript only, and don't require any browser interactions or element selectors.",
10
- parameters: {
11
- type: "object",
12
- properties: {
13
- testName: {
14
- type: "string",
15
- description: "The name of the test to create or modify",
16
- },
17
- fileName: {
18
- type: "string",
19
- description: "The name of the file where the test is located. File name must end with .spec.ts",
20
- },
21
- changeToMake: {
22
- type: "string",
23
- description: "The change to make to the test",
5
+ exports.codegenTool = {
6
+ schema: {
7
+ type: "function",
8
+ function: {
9
+ name: "generateTestWithCodegen",
10
+ description: "Create or modify a test case with code generation. This is useful when modifications can be done with TypeScript only, and don't require any browser interactions or element selectors.",
11
+ parameters: {
12
+ type: "object",
13
+ properties: {
14
+ testName: {
15
+ type: "string",
16
+ description: "The name of the test to create or modify",
17
+ },
18
+ testSuites: {
19
+ type: "array",
20
+ description: "The suites (describe blocks) where the test is located",
21
+ items: {
22
+ type: "string",
23
+ },
24
+ },
25
+ fileName: {
26
+ type: "string",
27
+ description: "The name of the file where the test is located. File name must end with .spec.ts",
28
+ },
29
+ changeToMake: {
30
+ type: "string",
31
+ description: "The change to make to the test",
32
+ },
24
33
  },
34
+ required: ["testName", "testSuites", "fileName", "changeToMake"],
25
35
  },
26
- required: ["testName", "fileName", "changeToMake"],
27
36
  },
28
37
  },
38
+ execute: async (input) => {
39
+ const { testName, testSuites, fileName, changeToMake } = input;
40
+ const testCase = {
41
+ id: 0,
42
+ name: testName,
43
+ filePath: fileName,
44
+ suites: testSuites,
45
+ steps: [changeToMake],
46
+ };
47
+ const result = await (0, run_1.generateTestWithCodegen)({
48
+ testCase,
49
+ file: fileName,
50
+ });
51
+ return {
52
+ result: JSON.stringify(result),
53
+ isError: false,
54
+ };
55
+ },
29
56
  };
30
- const generateTestWithCodegenTool = async ({ testName, fileName, changeToMake, }) => {
31
- const testCase = {
32
- id: 0,
33
- name: testName,
34
- filePath: fileName,
35
- suites: [], // TODO: Support suites
36
- steps: [changeToMake],
37
- };
38
- const result = await (0, run_1.generateTestWithCodegen)({
39
- testCase,
40
- file: fileName,
41
- });
42
- return result;
43
- };
44
- exports.generateTestWithCodegenTool = generateTestWithCodegenTool;
@@ -0,0 +1,3 @@
1
+ import type { Tool } from "./types";
2
+ export declare const diagnosisTool: Tool;
3
+ //# sourceMappingURL=diagnosis-fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAMpC,eAAO,MAAM,aAAa,EAAE,IA4F3B,CAAC"}
@@ -0,0 +1,88 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.diagnosisTool = void 0;
7
+ const promises_1 = __importDefault(require("fs/promises"));
8
+ const path_1 = __importDefault(require("path"));
9
+ exports.diagnosisTool = {
10
+ schema: {
11
+ type: "function",
12
+ function: {
13
+ name: "fetchDiagnosisDetails",
14
+ description: "Fetch details about a test case diagnosis using its URL or slug",
15
+ parameters: {
16
+ type: "object",
17
+ properties: {
18
+ diagnosisUrl: {
19
+ type: "string",
20
+ description: "The full URL of the diagnosis (e.g. https://dash.empirical.run/shopflo-tests/diagnosis/split-cod-place-the-order--byynrPjCml57)",
21
+ },
22
+ },
23
+ required: ["diagnosisUrl"],
24
+ },
25
+ },
26
+ },
27
+ execute: async (input) => {
28
+ const { diagnosisUrl } = input;
29
+ // Extract the slug from the URL - it's the part after the last '--'
30
+ const slug = diagnosisUrl.split("--").pop();
31
+ if (!slug) {
32
+ throw new Error("Invalid diagnosis URL - could not extract slug");
33
+ }
34
+ // Make the API call to fetch diagnosis details
35
+ const response = await fetch(`https://dash.empirical.run/api/diagnosis/${slug}/detailed`, {
36
+ method: "GET",
37
+ headers: {
38
+ Authorization: "weQPMWKT", // Using the auth token from test-endpoint.mdc
39
+ },
40
+ });
41
+ if (!response.ok) {
42
+ return {
43
+ result: `Failed to fetch diagnosis details: ${response.statusText}`,
44
+ isError: true,
45
+ };
46
+ }
47
+ const data = await response.json();
48
+ const { test_case, diagnosis } = data.data;
49
+ const project = diagnosis[0]?.test_project || "unknown";
50
+ const sourceContext = await promises_1.default.readFile(path_1.default.join("tests", test_case.file_path), "utf-8");
51
+ // Format the response as markdown
52
+ const markdownResponse = `
53
+ # Test Case Diagnosis
54
+
55
+ ## Test Case Information
56
+ - **Test Case Name**: ${test_case.name}
57
+ - **Test Suite**: ${test_case.suites}
58
+ - **File Name**: tests/${test_case.file_path}
59
+ - **Project**: ${project}
60
+
61
+ ## Source Context
62
+ ${sourceContext}
63
+
64
+ ## What Happened in the Test Run
65
+
66
+ ### Failure Details
67
+ - **Failing Line**: ${diagnosis[0]?.failing_line || "No failing line available"}
68
+
69
+ #### Error Stack
70
+ \`\`\`
71
+ ${diagnosis[0]?.failed_run_metadata?.stack?.replace("/runner/_work/shopflo-tests/shopflo-tests/source-repo/", "") || "No error stack available"}
72
+ \`\`\`
73
+
74
+ #### Error Summary
75
+ ${diagnosis[0]?.error_stack_summary?.content || "No error summary available"}
76
+
77
+ #### Visual Analysis
78
+ ${diagnosis[0]?.visual_diff_summary?.summary || "No visual analysis available"}
79
+
80
+ #### Merged Summary
81
+ ${diagnosis[0]?.merged_summary?.content || "No merged summary available"}
82
+ `;
83
+ return {
84
+ result: markdownResponse,
85
+ isError: false,
86
+ };
87
+ },
88
+ };
@@ -1,10 +1,3 @@
1
- import { OpenAI } from "openai";
2
- export declare const schema: OpenAI.Chat.Completions.ChatCompletionTool;
3
- export declare const runTestTool: ({ testName, fileName, }: {
4
- testName: string;
5
- fileName: string;
6
- }) => Promise<{
7
- hasTestPassed: boolean;
8
- summaryJson: any;
9
- }>;
1
+ import type { Tool } from "./types";
2
+ export declare const runTestTool: Tool;
10
3
  //# sourceMappingURL=test-run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,eAAO,MAAM,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAqB5C,CAAC;AAEF,eAAO,MAAM,WAAW;cAIZ,MAAM;cACN,MAAM;;;;EASjB,CAAC"}
1
+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AASpC,eAAO,MAAM,WAAW,EAAE,IA8CzB,CAAC"}
@@ -1,35 +1,49 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.runTestTool = exports.schema = void 0;
3
+ exports.runTestTool = void 0;
4
4
  const test_run_1 = require("@empiricalrun/test-run");
5
- exports.schema = {
6
- type: "function",
7
- function: {
8
- name: "runTest",
9
- description: "Run a test",
10
- parameters: {
11
- type: "object",
12
- properties: {
13
- testName: {
14
- type: "string",
15
- description: "The name of the test to run",
16
- },
17
- fileName: {
18
- type: "string",
19
- description: "The name of the file where the test is located. File name must end with .spec.ts",
5
+ exports.runTestTool = {
6
+ schema: {
7
+ type: "function",
8
+ function: {
9
+ name: "runTest",
10
+ description: "Run a test",
11
+ parameters: {
12
+ type: "object",
13
+ properties: {
14
+ testName: {
15
+ type: "string",
16
+ description: "The name of the test to run",
17
+ },
18
+ suites: {
19
+ type: "array",
20
+ description: "The suites (describe blocks) where the test is located.",
21
+ items: { type: "string" },
22
+ },
23
+ fileName: {
24
+ type: "string",
25
+ description: "The name of the file where the test is located. File name must end with .spec.ts",
26
+ },
27
+ project: {
28
+ type: "string",
29
+ description: "The project to run the test on",
30
+ },
20
31
  },
32
+ required: ["testName", "suites", "fileName", "project"],
21
33
  },
22
- required: ["testName", "fileName"],
23
34
  },
24
35
  },
36
+ execute: async (input) => {
37
+ const { testName, suites, fileName, project } = input;
38
+ const result = await (0, test_run_1.runSingleTest)({
39
+ testName,
40
+ suites,
41
+ fileName,
42
+ projects: [project],
43
+ });
44
+ return {
45
+ result: JSON.stringify(result),
46
+ isError: false,
47
+ };
48
+ },
25
49
  };
26
- const runTestTool = async ({ testName, fileName, }) => {
27
- // TODO: Remove this hardcoded project name
28
- const result = await (0, test_run_1.runSingleTest)({
29
- testName,
30
- fileName,
31
- projects: ["chromium"],
32
- });
33
- return result;
34
- };
35
- exports.runTestTool = runTestTool;
@@ -0,0 +1,11 @@
1
+ import type { OpenAI } from "openai";
2
+ export type ToolSchema = OpenAI.Chat.Completions.ChatCompletionTool;
3
+ export type ToolResult = {
4
+ result: string;
5
+ isError: boolean;
6
+ };
7
+ export type Tool = {
8
+ schema: ToolSchema;
9
+ execute: (input: any) => Promise<ToolResult>;
10
+ };
11
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/tools/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAErC,MAAM,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEpE,MAAM,MAAM,UAAU,GAAG;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,IAAI,GAAG;IAIjB,MAAM,EAAE,UAAU,CAAC;IACnB,OAAO,EAAE,CAAC,KAAK,EAAE,GAAG,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CAC9C,CAAC"}
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,2 @@
1
+ export declare function generateAsciiTree(dirPath: string, options?: {}): string;
2
+ //# sourceMappingURL=repo-tree.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAYA,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
@@ -0,0 +1,75 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.generateAsciiTree = void 0;
7
+ const fs_1 = __importDefault(require("fs"));
8
+ const path_1 = __importDefault(require("path"));
9
+ const DEFAULT_EXCLUDE = [
10
+ "node_modules",
11
+ "dist",
12
+ "build",
13
+ /\.git/,
14
+ ".DS_Store",
15
+ "playwright-report",
16
+ ];
17
+ function generateAsciiTree(dirPath, options = {}) {
18
+ const defaultOptions = {
19
+ showHidden: false,
20
+ exclude: DEFAULT_EXCLUDE,
21
+ maxDepth: 10,
22
+ };
23
+ const opts = { ...defaultOptions, ...options };
24
+ // Make sure the path exists and is a directory
25
+ if (!fs_1.default.existsSync(dirPath) || !fs_1.default.statSync(dirPath).isDirectory()) {
26
+ throw new Error(`"${dirPath}" is not a valid directory path`);
27
+ }
28
+ // Start with the root directory name
29
+ const rootName = path_1.default.basename(dirPath);
30
+ let result = rootName + "\n";
31
+ function processDirectory(currentPath, prefix = "", depth = 1) {
32
+ if (depth > opts.maxDepth)
33
+ return;
34
+ const items = fs_1.default.readdirSync(currentPath);
35
+ // Sort items: directories first, then files
36
+ const sortedItems = items.sort((a, b) => {
37
+ const aIsDir = fs_1.default.statSync(path_1.default.join(currentPath, a)).isDirectory();
38
+ const bIsDir = fs_1.default.statSync(path_1.default.join(currentPath, b)).isDirectory();
39
+ if (aIsDir && !bIsDir)
40
+ return -1;
41
+ if (!aIsDir && bIsDir)
42
+ return 1;
43
+ return a.localeCompare(b);
44
+ });
45
+ // Process each item
46
+ sortedItems.forEach((item, index) => {
47
+ // Skip hidden files if not showing hidden
48
+ if (!opts.showHidden && item.startsWith("."))
49
+ return;
50
+ // Skip excluded patterns
51
+ if (opts.exclude.some((pattern) => typeof pattern === "string"
52
+ ? item === pattern
53
+ : pattern instanceof RegExp
54
+ ? pattern.test(item)
55
+ : false))
56
+ return;
57
+ const itemPath = path_1.default.join(currentPath, item);
58
+ const isDirectory = fs_1.default.statSync(itemPath).isDirectory();
59
+ const isLast = index === sortedItems.length - 1;
60
+ // Current item symbols
61
+ const symbol = isLast ? "└── " : "├── ";
62
+ const nextPrefix = isLast ? " " : "│ ";
63
+ // Add the current item to the result
64
+ result += `${prefix}${symbol}${item}${isDirectory ? "/" : ""}\n`;
65
+ // Process subdirectories
66
+ if (isDirectory) {
67
+ processDirectory(itemPath, prefix + nextPrefix, depth + 1);
68
+ }
69
+ });
70
+ }
71
+ // Start the recursive processing
72
+ processDirectory(dirPath);
73
+ return result;
74
+ }
75
+ exports.generateAsciiTree = generateAsciiTree;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.47.4",
3
+ "version": "0.48.1",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -73,10 +73,10 @@
73
73
  "ts-morph": "^23.0.0",
74
74
  "tsx": "^4.16.2",
75
75
  "typescript": "^5.3.3",
76
- "@empiricalrun/llm": "^0.9.36",
76
+ "@empiricalrun/llm": "^0.10.1",
77
77
  "@empiricalrun/r2-uploader": "^0.3.8",
78
78
  "@empiricalrun/reporter": "^0.23.1",
79
- "@empiricalrun/test-run": "^0.7.1"
79
+ "@empiricalrun/test-run": "^0.7.2"
80
80
  },
81
81
  "devDependencies": {
82
82
  "@playwright/test": "1.47.1",