@empiricalrun/test-gen 0.53.12 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.54.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 8324aa6: chore: update auth header for dashboard requests
8
+
9
+ ### Patch Changes
10
+
11
+ - 769c3e7: feat: Implement environment-aware authentication with fallback
12
+ - 4297ed5: chore: Move text editor tools from llm to test-gen package
13
+ - 2ecdd64: fix: invalid tool calls should pipe back into the llm
14
+ - a8f135e: fix: add package-lock.json to default exclude for grep tool
15
+ - Updated dependencies [4297ed5]
16
+ - @empiricalrun/llm@0.14.8
17
+
18
+ ## 0.53.13
19
+
20
+ ### Patch Changes
21
+
22
+ - b847558: feat: cua codegen works for clicks in iframes
23
+ - bb402a4: feat: accept framelocators in test-gen tool calls
24
+ - aa38bee: feat: add valid playwright project names to system prompt
25
+
3
26
  ## 0.53.12
4
27
 
5
28
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EAMX,MAAM,wBAAwB,CAAC;AAYhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAyChE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBA2CA"}
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,UAAU,EAIX,MAAM,wBAAwB,CAAC;AAiBhC,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAyChE,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAiDA"}
@@ -7,6 +7,7 @@ const web_1 = require("../../bin/utils/platform/web");
7
7
  const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
8
8
  const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
9
9
  const grep_1 = require("../../tools/grep");
10
+ const str_replace_editor_1 = require("../../tools/str_replace_editor");
10
11
  const test_gen_browser_1 = require("../../tools/test-gen-browser");
11
12
  const test_run_1 = require("../../tools/test-run");
12
13
  const test_run_fetcher_1 = require("../../tools/test-run-fetcher");
@@ -23,17 +24,17 @@ function getTools(selectedModel) {
23
24
  ];
24
25
  if (selectedModel.startsWith("gemini")) {
25
26
  // Claude will have its own built-in text editor tools
26
- chat_1.textEditorTools.forEach((tool) => {
27
+ str_replace_editor_1.textEditorTools.forEach((tool) => {
27
28
  const originalExecute = tool.execute;
28
29
  tool.execute = (input) => originalExecute(input, web_1.validateTypescript);
29
30
  });
30
- tools.push(...chat_1.textEditorTools);
31
+ tools.push(...str_replace_editor_1.textEditorTools);
31
32
  }
32
33
  const toolExecutors = {
33
34
  ...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
34
35
  };
35
36
  if (selectedModel.startsWith("claude")) {
36
- toolExecutors.str_replace_editor = (input) => (0, chat_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
37
+ toolExecutors.str_replace_editor = (input) => (0, str_replace_editor_1.strReplaceEditorExecutor)(input, web_1.validateTypescript);
37
38
  }
38
39
  return { tools, toolExecutors };
39
40
  }
@@ -49,6 +50,7 @@ const log = (...args) => {
49
50
  };
50
51
  async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
51
52
  const systemPrompt = await (0, prompt_1.buildSystemPrompt)();
53
+ trace?.update({ input: { systemPrompt } });
52
54
  const { tools, toolExecutors } = getTools(selectedModel);
53
55
  while (!chatModel.askUserForInput) {
54
56
  const toolCalls = chatModel.getPendingToolCalls();
@@ -58,15 +60,21 @@ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
58
60
  const args = JSON.stringify(call.input);
59
61
  log(`Executing tool ${call.name} with args: ${args}`);
60
62
  const toolExecutor = toolExecutors[call.name];
63
+ let callResponse;
61
64
  if (!toolExecutor) {
62
- throw new Error(`Tool ${call.name} not found`);
63
- }
64
- const callResponse = await toolExecutor(call.input);
65
- if (callResponse.isError) {
66
- log(`Tool ${call.name} failed: ${callResponse.result}`);
65
+ callResponse = {
66
+ isError: true,
67
+ result: `Invalid function/tool call: ${call.name} not found`,
68
+ };
67
69
  }
68
70
  else {
69
- log(`Tool ${call.name} completed`);
71
+ callResponse = await toolExecutor(call.input);
72
+ if (callResponse.isError) {
73
+ log(`Tool ${call.name} failed: ${callResponse.result}`);
74
+ }
75
+ else {
76
+ log(`Tool ${call.name} completed`);
77
+ }
70
78
  }
71
79
  toolResults.push(callResponse);
72
80
  }
@@ -86,6 +94,6 @@ async function chatAgentLoop({ chatModel, selectedModel, reporter, trace, }) {
86
94
  const latest = chatModel.getHumanReadableLatestMessage();
87
95
  await reporter((0, state_1.chatStateFromModel)(chatModel, selectedModel), latest);
88
96
  }
89
- (0, chat_1.cleanupBackupFiles)(process.cwd());
97
+ (0, str_replace_editor_1.cleanupBackupFiles)(process.cwd());
90
98
  }
91
99
  exports.chatAgentLoop = chatAgentLoop;
@@ -106,7 +106,7 @@ async function getChatSessionFromDashboard(chatSessionId) {
106
106
  const response = await fetch(`${DASHBOARD_DOMAIN}/api/chat-sessions/${chatSessionId}`, {
107
107
  headers: {
108
108
  "Content-Type": "application/json",
109
- Authorization: `weQPMWKT`,
109
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
110
110
  },
111
111
  });
112
112
  const data = await response.json();
@@ -136,7 +136,7 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
136
136
  }),
137
137
  headers: {
138
138
  "Content-Type": "application/json",
139
- Authorization: `weQPMWKT`,
139
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
140
140
  },
141
141
  });
142
142
  const data = await response.json();
@@ -1 +1 @@
1
- {"version":3,"file":"repo.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/repo.ts"],"names":[],"mappings":"AAmCA,wBAAsB,cAAc,oBAwBnC"}
1
+ {"version":3,"file":"repo.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/repo.ts"],"names":[],"mappings":"AAqCA,wBAAsB,cAAc,oBAyCnC"}
@@ -7,6 +7,7 @@ exports.getRepoContext = void 0;
7
7
  const fs_extra_1 = __importDefault(require("fs-extra"));
8
8
  const path_1 = __importDefault(require("path"));
9
9
  const repo_tree_1 = require("../../utils/repo-tree");
10
+ const utils_1 = require("../browsing/utils");
10
11
  async function getAllMarkdownFiles() {
11
12
  const dir = path_1.default.join(process.cwd(), ".empiricalrun");
12
13
  if (!fs_extra_1.default.existsSync(dir)) {
@@ -48,6 +49,22 @@ While specifying paths to files, use relative paths from the current working dir
48
49
  - Correct path: "tests/lesson.spec.ts"
49
50
  - Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
50
51
  `;
52
+ try {
53
+ const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
54
+ const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
55
+ if (validProjectNames.length > 0) {
56
+ REPO_CONTEXT_PROMPT += `
57
+ ## Playwright configuration
58
+
59
+ This repo is configured with these Playwright projects (in the playwright.config.ts file):
60
+ ${validProjectNames.map((name) => `- ${name}`).join("\n")}
61
+
62
+ `;
63
+ }
64
+ }
65
+ catch (error) {
66
+ console.warn("Failed to read playwright config", error);
67
+ }
51
68
  const knowledge = await knowledgeContext();
52
69
  if (knowledge.length > 0) {
53
70
  REPO_CONTEXT_PROMPT += `
@@ -1 +1 @@
1
- {"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,cAAc,CAClC,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,MAAM,GACV,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAMD;AAED,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAqID"}
1
+ {"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,cAAc,CAClC,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,MAAM,GACV,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAMD;AA4DD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAiID"}
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.handleModelAction = exports.handlePageGoto = exports.getScreenshot = void 0;
4
+ const utils_1 = require("../browsing/utils");
4
5
  async function getScreenshot(page) {
5
6
  const screenshotBytes = await page.screenshot();
6
7
  return Buffer.from(screenshotBytes).toString("base64");
@@ -43,6 +44,53 @@ async function handlePageGoto(page, url) {
43
44
  };
44
45
  }
45
46
  exports.handlePageGoto = handlePageGoto;
47
+ async function getLocatorForClick(page, { x, y }) {
48
+ const locator = await page.evaluate(([x, y]) => {
49
+ const element = document.elementFromPoint(x, y);
50
+ const bbox = element?.getBoundingClientRect();
51
+ return {
52
+ locator: window.playwright.generateLocator(element),
53
+ isIframe: element?.tagName === "IFRAME",
54
+ x0: bbox?.x,
55
+ y0: bbox?.y,
56
+ src: element?.getAttribute("src"),
57
+ id: element?.getAttribute("id"),
58
+ };
59
+ }, [x, y]);
60
+ if (!locator.isIframe) {
61
+ return locator.locator;
62
+ }
63
+ await (0, utils_1.injectPwLocatorGenerator)(page);
64
+ const relativeX = x - locator.x0;
65
+ const relativeY = y - locator.y0;
66
+ // TODO: Reuse locator.locator for the frameLocator.
67
+ let frameEl = locator.src
68
+ ? page.locator(`[src="${locator.src}"]`)
69
+ : page.locator(`#${locator.id}`);
70
+ const frameLocator = locator.src
71
+ ? `locator('[src="${locator.src}"]')`
72
+ : `locator('#${locator.id}')`;
73
+ const elementLocatorInsideFrame = await frameEl
74
+ .contentFrame()
75
+ .locator(":root")
76
+ .evaluate((rootElement, coords) => {
77
+ const xPos = coords[0];
78
+ const yPos = coords[1];
79
+ if (xPos === undefined || yPos === undefined) {
80
+ throw new Error("Coordinates are undefined in evaluate call");
81
+ }
82
+ const element = document.elementFromPoint(xPos, yPos);
83
+ return window.playwright.generateLocator(element);
84
+ }, [relativeX, relativeY]);
85
+ return `${frameLocator}.contentFrame().${elementLocatorInsideFrame}`;
86
+ }
87
+ async function getLocatorForFill(page) {
88
+ const locator = await page.evaluate(() => {
89
+ const element = document.activeElement;
90
+ return window.playwright.generateLocator(element);
91
+ });
92
+ return locator;
93
+ }
46
94
  async function handleModelAction(page, action) {
47
95
  const actionType = action.type;
48
96
  let actionCode = "";
@@ -61,19 +109,18 @@ async function handleModelAction(page, action) {
61
109
  pwButton = "middle";
62
110
  }
63
111
  if (pwButton) {
64
- const locator = await page.evaluate(([x, y]) => {
65
- const element = document.elementFromPoint(x, y);
66
- return window.playwright.generateLocator(element);
67
- }, [x, y]);
112
+ const locator = await getLocatorForClick(page, { x, y });
68
113
  actionCode = `await page.${locator}.click();\n`;
69
114
  await page.mouse.click(x, y, { button: pwButton });
70
115
  }
71
116
  if (button === "back" || button === "forward") {
72
117
  // Do page navigations, since there is no way to click on the back/forward buttons
73
118
  if (button === "back") {
119
+ actionCode = `await page.goBack();\n`;
74
120
  await page.goBack();
75
121
  }
76
122
  else if (button === "forward") {
123
+ actionCode = `await page.goForward();\n`;
77
124
  await page.goForward();
78
125
  }
79
126
  }
@@ -83,6 +130,8 @@ async function handleModelAction(page, action) {
83
130
  const { x, y } = action;
84
131
  console.log(`Action: doubleclick at (${x}, ${y})`);
85
132
  actionSummary = `Double click at (${x}, ${y})`;
133
+ const locator = await getLocatorForClick(page, { x, y });
134
+ actionCode = `await page.${locator}.dblclick();\n`;
86
135
  await page.mouse.dblclick(x, y, { button: "left" });
87
136
  break;
88
137
  }
@@ -96,6 +145,7 @@ async function handleModelAction(page, action) {
96
145
  case "drag": {
97
146
  const { path } = action;
98
147
  console.log(`Action: drag along path ${path}`);
148
+ // TODO: actionCode is not implemented
99
149
  actionSummary = `Drag along path ${path}`;
100
150
  if (!path || path.length === 0) {
101
151
  break;
@@ -138,10 +188,7 @@ async function handleModelAction(page, action) {
138
188
  console.log(`Action: type text '${text}'`);
139
189
  actionSummary = `Type text '${text}'`;
140
190
  await page.keyboard.type(text);
141
- const locator = await page.evaluate(() => {
142
- const element = document.activeElement;
143
- return window.playwright.generateLocator(element);
144
- });
191
+ const locator = await getLocatorForFill(page);
145
192
  actionCode = `await page.${locator}.fill("${text}");\n`;
146
193
  break;
147
194
  }
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=cua.spec.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cua.spec.d.ts","sourceRoot":"","sources":["../../../../src/agent/master/browser-tests/cua.spec.ts"],"names":[],"mappings":""}
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const run_1 = require("../run");
4
+ const fixtures_1 = require("./fixtures");
5
+ (0, fixtures_1.test)("computer use agent generates code for iframes", async ({ page, server, }) => {
6
+ await page.goto(`${server.baseURL}/iframe-elements.html`);
7
+ const result = await (0, run_1.createTestUsingComputerUseAgent)({
8
+ page,
9
+ task: `Click on both of the "Enter your name" input fields`,
10
+ });
11
+ (0, fixtures_1.expect)(result.code).toMatch(/page\.getByPlaceholder\(['"]Enter your name['"]?\)\.click/);
12
+ (0, fixtures_1.expect)(result.code).toMatch(/locator\(['"]#nested-frame['"]?\)\.contentFrame\(\)\.getByPlaceholder\(['"]Enter your name['"]?\)/);
13
+ });
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Page } from "playwright";
1
+ import { FrameLocator, Page } from "playwright";
2
2
  import { ScopeVars } from "./types";
3
- export declare function createTest(task: string, page: Page, scope?: ScopeVars): Promise<void>;
3
+ export declare function createTest(task: string, pageRef: Page | FrameLocator, scope?: ScopeVars): Promise<void>;
4
4
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoBpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAsD3E"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoBpC,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBAwDlB"}
package/dist/index.js CHANGED
@@ -21,11 +21,12 @@ function setupProcessListeners(cleanup) {
21
21
  events.forEach((event) => process.removeListener(event, cleanup));
22
22
  };
23
23
  }
24
- async function createTest(task, page, scope) {
24
+ async function createTest(task, pageRef, scope) {
25
25
  const removeListeners = setupProcessListeners(flushEvents);
26
26
  try {
27
27
  const testConfigArg = process.env.TEST_GEN_TOKEN;
28
28
  const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
29
+ const useComputerUseAgent = testGenConfig.options?.useComputerUseAgent;
29
30
  if (testGenConfig.options && testGenConfig.options.metadata) {
30
31
  (0, reporter_1.setReporterConfig)({
31
32
  projectRepoName: testGenConfig.options?.metadata.projectRepoName,
@@ -39,8 +40,9 @@ async function createTest(task, page, scope) {
39
40
  projectRepoName: testGenConfig.options?.metadata.projectRepoName,
40
41
  });
41
42
  }
43
+ // pageRef can be a FrameLocator, in which case we need to get the Page where the iframe is located
44
+ let page = "owner" in pageRef ? pageRef.owner().page() : pageRef;
42
45
  const fileServiceClient = new client_1.default();
43
- const useComputerUseAgent = testGenConfig.options?.useComputerUseAgent;
44
46
  let agentResult;
45
47
  if (useComputerUseAgent) {
46
48
  agentResult = await (0, cua_1.createTestUsingComputerUseAgent)({
@@ -1 +1 @@
1
- {"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../../src/reporter/lib.ts"],"names":[],"mappings":"AAMA,KAAK,uBAAuB,GAAG;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EACP,SAAS,GACT,aAAa,GACb,kBAAkB,GAClB,OAAO,GACP,UAAU,GACV,SAAS,GACT,WAAW,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF,qBAAa,cAAc;IAEvB,OAAO,CAAC,KAAK;gBAAL,KAAK,EAAE;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,uBAAuB,CAAC,aAAa,CAAC,CAAC;KAC9C;IAGH,YAAY,CAAC,gBAAgB,CAAC,EAAE;QAC9B,QAAQ,CAAC,EAAE,OAAO,CAAC,uBAAuB,CAAC,CAAC;KAC7C,GAAG,uBAAuB;CAO5B;AAED,qBAAa,QAAQ;IAEjB,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE;QACd,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;KACtB;IAGG,MAAM,CAAC,cAAc,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;YAc7C,sBAAsB;CAmCrC"}
1
+ {"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../../src/reporter/lib.ts"],"names":[],"mappings":"AAMA,KAAK,uBAAuB,GAAG;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EACP,SAAS,GACT,aAAa,GACb,kBAAkB,GAClB,OAAO,GACP,UAAU,GACV,SAAS,GACT,WAAW,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF,qBAAa,cAAc;IAEvB,OAAO,CAAC,KAAK;gBAAL,KAAK,EAAE;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,uBAAuB,CAAC,aAAa,CAAC,CAAC;KAC9C;IAGH,YAAY,CAAC,gBAAgB,CAAC,EAAE;QAC9B,QAAQ,CAAC,EAAE,OAAO,CAAC,uBAAuB,CAAC,CAAC;KAC7C,GAAG,uBAAuB;CAO5B;AAED,qBAAa,QAAQ;IAEjB,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE;QACd,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;KACtB;IAGG,MAAM,CAAC,cAAc,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;YAc7C,sBAAsB;CAkCrC"}
@@ -51,8 +51,7 @@ class Reporter {
51
51
  method: "POST",
52
52
  headers: {
53
53
  "Content-Type": "application/json",
54
- // TODO: fix this with authentication of github updates api
55
- Authorization: "weQPMWKT",
54
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
56
55
  },
57
56
  body,
58
57
  });
@@ -47,7 +47,7 @@ async function getSessionState() {
47
47
  method: "GET",
48
48
  headers: {
49
49
  "Content-Type": "application/json",
50
- Authorization: "weQPMWKT",
50
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
51
51
  },
52
52
  });
53
53
  const generationStateData = (await response.json());
@@ -69,7 +69,7 @@ async function endSession() {
69
69
  method: "POST",
70
70
  headers: {
71
71
  "Content-Type": "application/json",
72
- Authorization: "weQPMWKT",
72
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
73
73
  },
74
74
  body: JSON.stringify({
75
75
  state: {
@@ -93,7 +93,7 @@ async function updateSessionStatus(sessionId, payload) {
93
93
  method: "PATCH",
94
94
  headers: {
95
95
  "Content-Type": "application/json",
96
- Authorization: "weQPMWKT",
96
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
97
97
  },
98
98
  body,
99
99
  });
@@ -0,0 +1,23 @@
1
+ import { Tool, ToolResult } from "@empiricalrun/llm/chat";
2
+ interface StrReplaceInput {
3
+ command: string;
4
+ path: string;
5
+ view_range?: [number, number];
6
+ old_str?: string;
7
+ new_str?: string;
8
+ file_text?: string;
9
+ insert_line?: number;
10
+ }
11
+ /**
12
+ * Cleans up any backup files that were created during the editing process
13
+ * @returns The number of backup files that were cleaned up
14
+ */
15
+ export declare function cleanupBackupFiles(repoDir: string): number;
16
+ /**
17
+ * Our implementation of Claude's built-in text editor tool
18
+ * https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
19
+ */
20
+ export declare function strReplaceEditorExecutor(input: StrReplaceInput, typeChecker?: (filePath: string) => string[]): Promise<ToolResult>;
21
+ export declare const textEditorTools: Tool[];
22
+ export {};
23
+ //# sourceMappingURL=str_replace_editor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"str_replace_editor.d.ts","sourceRoot":"","sources":["../../src/tools/str_replace_editor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAyB1D,UAAU,eAAe;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAwC1D;AAMD;;;GAGG;AACH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,eAAe,EACtB,WAAW,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,MAAM,EAAE,GAC3C,OAAO,CAAC,UAAU,CAAC,CA8IrB;AA6GD,eAAO,MAAM,eAAe,EAAE,IAAI,EAKjC,CAAC"}
@@ -0,0 +1,304 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.textEditorTools = exports.strReplaceEditorExecutor = exports.cleanupBackupFiles = void 0;
7
+ const fs_1 = __importDefault(require("fs"));
8
+ const path_1 = __importDefault(require("path"));
9
+ const zod_1 = require("zod");
10
+ function createBackup(filePath) {
11
+ const backupPath = `${filePath}.bak`;
12
+ if (fs_1.default.existsSync(filePath)) {
13
+ fs_1.default.copyFileSync(filePath, backupPath);
14
+ }
15
+ }
16
+ function hasBackup(filePath) {
17
+ const backupPath = `${filePath}.bak`;
18
+ return fs_1.default.existsSync(backupPath);
19
+ }
20
+ function restoreBackup(filePath) {
21
+ const backupPath = `${filePath}.bak`;
22
+ if (fs_1.default.existsSync(backupPath)) {
23
+ fs_1.default.copyFileSync(backupPath, filePath);
24
+ fs_1.default.unlinkSync(backupPath);
25
+ }
26
+ }
27
+ /**
28
+ * Cleans up any backup files that were created during the editing process
29
+ * @returns The number of backup files that were cleaned up
30
+ */
31
+ function cleanupBackupFiles(repoDir) {
32
+ let cleanedCount = 0;
33
+ const walkDir = (dir) => {
34
+ let files = [];
35
+ try {
36
+ files = fs_1.default.readdirSync(dir);
37
+ }
38
+ catch (readDirError) {
39
+ return;
40
+ }
41
+ for (const file of files) {
42
+ const fullPath = path_1.default.join(dir, file);
43
+ let stat;
44
+ try {
45
+ stat = fs_1.default.statSync(fullPath);
46
+ }
47
+ catch (statError) {
48
+ continue;
49
+ }
50
+ if (stat.isDirectory()) {
51
+ if (file !== "node_modules") {
52
+ walkDir(fullPath);
53
+ }
54
+ }
55
+ else if (file.endsWith(".bak")) {
56
+ try {
57
+ fs_1.default.unlinkSync(fullPath);
58
+ cleanedCount++;
59
+ }
60
+ catch (unlinkError) {
61
+ // Intentionally ignore errors during cleanup
62
+ }
63
+ }
64
+ }
65
+ };
66
+ try {
67
+ walkDir(repoDir);
68
+ }
69
+ catch (error) {
70
+ // Intentionally ignore errors during cleanup
71
+ }
72
+ return cleanedCount;
73
+ }
74
+ exports.cleanupBackupFiles = cleanupBackupFiles;
75
+ function escapeRegExp(text) {
76
+ return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
77
+ }
78
+ /**
79
+ * Our implementation of Claude's built-in text editor tool
80
+ * https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
81
+ */
82
+ async function strReplaceEditorExecutor(input, typeChecker) {
83
+ const { path: filePath } = input;
84
+ try {
85
+ let content;
86
+ let lines;
87
+ let newContent;
88
+ switch (input.command) {
89
+ case "view":
90
+ // TODO: This assumes repoDir is process.cwd()
91
+ if (!fs_1.default.existsSync(filePath)) {
92
+ return {
93
+ result: "Error: File not found",
94
+ isError: true,
95
+ };
96
+ }
97
+ // Handle directory view
98
+ if (fs_1.default.statSync(filePath).isDirectory()) {
99
+ const files = fs_1.default.readdirSync(filePath);
100
+ return {
101
+ result: files.join("\n"),
102
+ isError: false,
103
+ };
104
+ }
105
+ // Handle file view
106
+ content = fs_1.default.readFileSync(filePath, "utf8");
107
+ lines = content.split("\n");
108
+ if (input.view_range) {
109
+ const [start, end] = input.view_range;
110
+ const endLine = end === -1 ? lines.length : end;
111
+ return {
112
+ result: lines
113
+ .slice(start - 1, endLine)
114
+ .map((line, idx) => `${start + idx}: ${line}`)
115
+ .join("\n"),
116
+ isError: false,
117
+ };
118
+ }
119
+ return {
120
+ result: lines.map((line, idx) => `${idx + 1}: ${line}`).join("\n"),
121
+ isError: false,
122
+ };
123
+ case "create":
124
+ if (input.file_text === undefined || input.file_text === null) {
125
+ throw new Error("file_text is required for create command");
126
+ }
127
+ fs_1.default.writeFileSync(filePath, input.file_text);
128
+ return {
129
+ result: `Successfully created file ${filePath}`,
130
+ isError: false,
131
+ };
132
+ case "str_replace":
133
+ if (!input.old_str) {
134
+ throw new Error("old_str is required for str_replace command");
135
+ }
136
+ if (input.new_str === undefined || input.new_str === null) {
137
+ // "" is valid as new_str, so we check for nullish -- not falsy
138
+ throw new Error("new_str is required for str_replace command");
139
+ }
140
+ createBackup(filePath);
141
+ content = fs_1.default.readFileSync(filePath, "utf8");
142
+ if (!content.includes(input.old_str)) {
143
+ return {
144
+ result: `old_str not found in file: ${filePath}`,
145
+ isError: true,
146
+ };
147
+ }
148
+ else {
149
+ const escapedOldStr = escapeRegExp(input.old_str);
150
+ const occurences = content.match(new RegExp(escapedOldStr, "g"));
151
+ if (occurences && occurences.length > 1) {
152
+ return {
153
+ result: `Error: old_str found ${occurences.length} times in file: ${filePath}. Please provide more context to make a unique match.`,
154
+ isError: true,
155
+ };
156
+ }
157
+ newContent = content.replace(input.old_str, input.new_str);
158
+ fs_1.default.writeFileSync(filePath, newContent);
159
+ if (typeChecker) {
160
+ const errors = typeChecker(filePath);
161
+ if (errors.length > 0) {
162
+ return {
163
+ result: `Edits to file ${filePath} have been applied. However, type checks are failing with errors:\n${errors.join("\n")}`,
164
+ isError: true,
165
+ };
166
+ }
167
+ else {
168
+ return {
169
+ result: `Edits to file ${filePath} have been applied. Type checks have also passed.`,
170
+ isError: false,
171
+ };
172
+ }
173
+ }
174
+ return {
175
+ result: `Edits to file ${filePath} have been applied.`,
176
+ isError: false,
177
+ };
178
+ }
179
+ case "insert":
180
+ if (input.insert_line === undefined || !input.new_str) {
181
+ throw new Error("insert_line and new_str are required for insert command");
182
+ }
183
+ createBackup(filePath);
184
+ content = fs_1.default.readFileSync(filePath, "utf8");
185
+ lines = content.split("\n");
186
+ lines.splice(input.insert_line, 0, input.new_str);
187
+ fs_1.default.writeFileSync(filePath, lines.join("\n"));
188
+ return {
189
+ result: "Success",
190
+ isError: false,
191
+ };
192
+ case "undo_edit":
193
+ if (hasBackup(filePath)) {
194
+ restoreBackup(filePath);
195
+ return {
196
+ result: "Success",
197
+ isError: false,
198
+ };
199
+ }
200
+ return {
201
+ result: "No backup file found",
202
+ isError: true,
203
+ };
204
+ default:
205
+ throw new Error(`Unknown command: ${input.command}`);
206
+ }
207
+ }
208
+ catch (error) {
209
+ return {
210
+ result: error instanceof Error ? error.message : "Unknown error occurred",
211
+ isError: true,
212
+ };
213
+ }
214
+ }
215
+ exports.strReplaceEditorExecutor = strReplaceEditorExecutor;
216
+ const fileViewTool = {
217
+ schema: {
218
+ name: "fileViewTool",
219
+ description: `A tool to view the content of a file or directory. If the path points
220
+ to a directory, the tool will return a list of files in the directory, separated by line breaks.
221
+ If the path points to a file, the tool will return the content of the file. File contents
222
+ are returned with line numbers, starting from 1.
223
+
224
+ 1: line 1
225
+ 2: line 2
226
+ ...`,
227
+ parameters: zod_1.z.object({
228
+ // Does not support view_range for now
229
+ path: zod_1.z.string().describe("The path to the file or directory to view."),
230
+ }),
231
+ },
232
+ execute: async (input) => {
233
+ return strReplaceEditorExecutor({
234
+ command: "view",
235
+ path: input.path,
236
+ });
237
+ },
238
+ };
239
+ const fileCreateTool = {
240
+ schema: {
241
+ name: "fileCreateTool",
242
+ description: "A tool to create a new file with given contents.",
243
+ parameters: zod_1.z.object({
244
+ path: zod_1.z.string().describe("The path to the new file."),
245
+ file_text: zod_1.z.string().describe("The contents of the new file."),
246
+ }),
247
+ },
248
+ execute: async (input, typeChecker) => {
249
+ return strReplaceEditorExecutor({
250
+ command: "create",
251
+ path: input.path,
252
+ file_text: input.file_text,
253
+ }, typeChecker);
254
+ },
255
+ };
256
+ const stringReplaceTool = {
257
+ schema: {
258
+ name: "stringReplaceTool",
259
+ description: `A tool to replace a string in a file. This tool requires old_str to be unique
260
+ in the file. If old_str is not unique, the tool will return an error.`,
261
+ parameters: zod_1.z.object({
262
+ path: zod_1.z.string().describe("The path to the file."),
263
+ old_str: zod_1.z.string().describe("The string to be replaced."),
264
+ new_str: zod_1.z.string().describe("The string to replace old_str with."),
265
+ }),
266
+ },
267
+ execute: async (input, typeChecker) => {
268
+ return strReplaceEditorExecutor({
269
+ command: "str_replace",
270
+ path: input.path,
271
+ old_str: input.old_str,
272
+ new_str: input.new_str,
273
+ }, typeChecker);
274
+ },
275
+ };
276
+ const stringInsertTool = {
277
+ schema: {
278
+ name: "stringInsertTool",
279
+ description: "A tool to insert a string at a specific line in a file.",
280
+ parameters: zod_1.z.object({
281
+ path: zod_1.z.string().describe("The path to the file."),
282
+ insert_line: zod_1.z
283
+ .number()
284
+ .int()
285
+ .min(0)
286
+ .describe("The line number after which to insert the text (0 for beginning of file)."),
287
+ new_str: zod_1.z.string().describe("The string to insert."),
288
+ }),
289
+ },
290
+ execute: async (input, typeChecker) => {
291
+ return strReplaceEditorExecutor({
292
+ command: "insert",
293
+ path: input.path,
294
+ insert_line: input.insert_line,
295
+ new_str: input.new_str,
296
+ }, typeChecker);
297
+ },
298
+ };
299
+ exports.textEditorTools = [
300
+ fileViewTool,
301
+ fileCreateTool,
302
+ stringReplaceTool,
303
+ stringInsertTool,
304
+ ];
@@ -1 +1 @@
1
- {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AA8EnD,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}
1
+ {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyFnD,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}
@@ -66,6 +66,17 @@ await extPage
66
66
  // Instead, add the TODO before or after the multi-line statement
67
67
  \`\`\`
68
68
 
69
+ The TODO comment must be inside a test block, not outside of it. For example, this is invalid:
70
+
71
+ \`\`\`
72
+ // --- BEGIN INVALID EXAMPLE ---
73
+ test("Example test code", async ({ page }) => {
74
+ await page.goto("https://example.com");
75
+ });
76
+ // TODO(agent on page): Click on the login button
77
+ // --- END INVALID EXAMPLE ---
78
+ \`\`\`
79
+
69
80
  After execution, the browser agent will return a summary of actions that it took, and the generated Playwright code for them.
70
81
  You can then use the text editor tool to replace the TODO comment with the generated Playwright code.
71
82
  `;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,wBAAsB,oBAAoB,CAAC,CAAC,EAAE,EAC5C,IAAI,EACJ,MAAc,EACd,IAAI,GACL,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,GAAG,OAAO,CAAC,CAAC,CAAC,CAoBb;AAED,wBAAsB,eAAe,CAAC,EACpC,MAAM,EACN,GAAG,EACH,IAAI,GACL,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,oBAWA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,wBAAsB,oBAAoB,CAAC,CAAC,EAAE,EAC5C,IAAI,EACJ,MAAc,EACd,IAAI,GACL,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,GAAG,OAAO,CAAC,CAAC,CAAC,CAmBb;AAED,wBAAsB,eAAe,CAAC,EACpC,MAAM,EACN,GAAG,EACH,IAAI,GACL,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,oBAWA"}
@@ -4,8 +4,7 @@ exports.callGitHubProxy = exports.makeDashboardRequest = void 0;
4
4
  async function makeDashboardRequest({ path, method = "GET", body, }) {
5
5
  const requestHeaders = {
6
6
  "Content-Type": "application/json",
7
- // TODO: Move to env variable for authentication
8
- Authorization: "weQPMWKT",
7
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
9
8
  "User-Agent": "empiricalrun/test-gen",
10
9
  };
11
10
  const baseUrl = "https://dash.empirical.run";
@@ -1 +1 @@
1
- {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,qBAU3B,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
1
+ {"version":3,"file":"repo-tree.d.ts","sourceRoot":"","sources":["../../src/utils/repo-tree.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,eAAe,qBAW3B,CAAC;AAEF,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,KAAK,UAsE9D"}
@@ -16,6 +16,7 @@ exports.DEFAULT_EXCLUDE = [
16
16
  "test-results",
17
17
  ".empiricalrun",
18
18
  "auth",
19
+ "package-lock.json",
19
20
  ];
20
21
  function generateAsciiTree(dirPath, options = {}) {
21
22
  const defaultOptions = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.53.12",
3
+ "version": "0.54.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -56,7 +56,7 @@
56
56
  "tsx": "^4.16.2",
57
57
  "typescript": "^5.3.3",
58
58
  "zod": "^3.23.8",
59
- "@empiricalrun/llm": "^0.14.7",
59
+ "@empiricalrun/llm": "^0.14.8",
60
60
  "@empiricalrun/r2-uploader": "^0.3.8",
61
61
  "@empiricalrun/test-run": "^0.7.7"
62
62
  },