@empiricalrun/test-gen 0.53.2 → 0.53.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +6 -5
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +2 -0
- package/dist/agent/cua/computer.d.ts +4 -0
- package/dist/agent/cua/computer.d.ts.map +1 -1
- package/dist/agent/cua/computer.js +9 -1
- package/dist/agent/cua/index.d.ts.map +1 -1
- package/dist/agent/cua/index.js +60 -30
- package/dist/agent/cua/model.d.ts +2 -2
- package/dist/agent/cua/model.d.ts.map +1 -1
- package/dist/agent/cua/model.js +19 -0
- package/dist/bin/utils/index.js +1 -1
- package/dist/tools/commit-and-create-pr.d.ts +3 -0
- package/dist/tools/commit-and-create-pr.d.ts.map +1 -0
- package/dist/tools/commit-and-create-pr.js +102 -0
- package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
- package/dist/tools/diagnosis-fetcher.js +13 -10
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +11 -5
- package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
- package/dist/tools/test-run-fetcher/index.js +13 -11
- package/dist/tools/utils/index.d.ts +11 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +36 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.53.4
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 1426372: fix: remove stray console.log
|
|
8
|
+
- 7efc3dc: feat: add page.goto to cua implementation + prompt edits
|
|
9
|
+
- Updated dependencies [7efc3dc]
|
|
10
|
+
- @empiricalrun/llm@0.14.3
|
|
11
|
+
|
|
12
|
+
## 0.53.3
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- 094b9f7: feat: add tool call for commit and push changes from chat agent
|
|
17
|
+
- cc64ff1: feat: enable browser tool call to pick the right page to interact with
|
|
18
|
+
|
|
3
19
|
## 0.53.2
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAe,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAIxE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAsBvD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAiFD,wBAAsB,yBAAyB,CAAC,EAC9C,YAAY,EACZ,YAAY,EACZ,cAAc,GACf,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B,iBA2BA;AAED,wBAAsB,cAAc,CAAC,EACnC,YAAY,EACZ,cAAc,EACd,QAAQ,GACT,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB,iBAoBA;AAED,wBAAsB,yBAAyB,CAAC,EAC9C,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAyDlB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBA2HxD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,oBAAoB,CAAC,CAM/B;AAWD,wBAAsB,oBAAoB,CACxC,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,EAAE,CAAC,CAQnB;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAoBb,SAAS;CAKjB"}
|
|
@@ -90,15 +90,16 @@ async function prepareFileForUpdateScenario({ testCase, specPath, trace, }) {
|
|
|
90
90
|
}
|
|
91
91
|
async function replaceTodoWithCreateTest({ testFilePath, testCaseName, testCaseSuites, }) {
|
|
92
92
|
// This method is an alternative to prepareFileForUpdateScenario
|
|
93
|
-
// TODO: Does not support
|
|
93
|
+
// TODO: Does not support scoped variables and updates in POM files
|
|
94
94
|
const fileContent = await fs_extra_1.default.readFile(testFilePath, "utf-8");
|
|
95
|
-
const todoRegex = /\/\/ TODO\(agent\)
|
|
95
|
+
const todoRegex = /\/\/ TODO\(agent(?:\s+on\s+(\w+))?\):\s*(.*)/;
|
|
96
96
|
const todoMatch = fileContent.match(todoRegex);
|
|
97
97
|
if (!todoMatch) {
|
|
98
|
-
throw new Error(`No "// TODO(agent):" comment found in file: ${testFilePath}`);
|
|
98
|
+
throw new Error(`No "// TODO(agent):" or "// TODO(agent on pageName):" comment found in file: ${testFilePath}`);
|
|
99
99
|
}
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
const [, pageVarName] = todoMatch;
|
|
101
|
+
const pageVariable = pageVarName || "page"; // Default to "page" if not specified
|
|
102
|
+
await fs_extra_1.default.writeFile(testFilePath, fileContent.replace(todoRegex, (_, __, todoText) => `await createTest("${todoText.replace(/"/g, '\\"')}", ${pageVariable});`));
|
|
102
103
|
await addImportForCreateTest(testFilePath);
|
|
103
104
|
await markTestAsOnly({
|
|
104
105
|
testCaseName,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AA2FA,wBAAsB,SAAS,CAAC,EAC9B,aAA4C,EAC5C,mBAA2B,EAC3B,oBAAoB,GACrB,EAAE;IACD,aAAa,CAAC,EACV,4BAA4B,GAC5B,4BAA4B,GAC5B,8BAA8B,CAAC;IACnC,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B,mBAyFA"}
|
package/dist/agent/chat/index.js
CHANGED
|
@@ -5,6 +5,7 @@ const chat_1 = require("@empiricalrun/llm/chat");
|
|
|
5
5
|
const picocolors_1 = require("picocolors");
|
|
6
6
|
const web_1 = require("../../bin/utils/platform/web");
|
|
7
7
|
const human_in_the_loop_1 = require("../../human-in-the-loop");
|
|
8
|
+
const commit_and_create_pr_1 = require("../../tools/commit-and-create-pr");
|
|
8
9
|
const diagnosis_fetcher_1 = require("../../tools/diagnosis-fetcher");
|
|
9
10
|
const grep_1 = require("../../tools/grep");
|
|
10
11
|
const test_gen_browser_1 = require("../../tools/test-gen-browser");
|
|
@@ -18,6 +19,7 @@ function getTools(selectedModel) {
|
|
|
18
19
|
test_run_fetcher_1.fetchTestRunReportTool,
|
|
19
20
|
diagnosis_fetcher_1.fetchDiagnosisReportTool,
|
|
20
21
|
test_gen_browser_1.generateTestWithBrowserAgent,
|
|
22
|
+
commit_and_create_pr_1.commitAndPushChangesTool,
|
|
21
23
|
];
|
|
22
24
|
if (selectedModel.startsWith("gemini")) {
|
|
23
25
|
// Claude will have its own built-in text editor tools
|
|
@@ -2,6 +2,10 @@ import { ResponseComputerToolCall } from "openai/resources/responses/responses.m
|
|
|
2
2
|
import type { Page } from "playwright";
|
|
3
3
|
type ComputerAction = ResponseComputerToolCall.Click | ResponseComputerToolCall.DoubleClick | ResponseComputerToolCall.Drag | ResponseComputerToolCall.Keypress | ResponseComputerToolCall.Move | ResponseComputerToolCall.Screenshot | ResponseComputerToolCall.Scroll | ResponseComputerToolCall.Type | ResponseComputerToolCall.Wait;
|
|
4
4
|
export declare function getScreenshot(page: Page): Promise<string>;
|
|
5
|
+
export declare function handlePageGoto(page: Page, url: string): Promise<{
|
|
6
|
+
actionSummary: string;
|
|
7
|
+
actionCode: string;
|
|
8
|
+
}>;
|
|
5
9
|
export declare function handleModelAction(page: Page, action: ComputerAction): Promise<{
|
|
6
10
|
actionSummary: string;
|
|
7
11
|
actionCode: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAqID"}
|
|
1
|
+
{"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,cAAc,CAClC,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,MAAM,GACV,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAMD;AAED,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC;IACT,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAqID"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.handleModelAction = exports.getScreenshot = void 0;
|
|
3
|
+
exports.handleModelAction = exports.handlePageGoto = exports.getScreenshot = void 0;
|
|
4
4
|
async function getScreenshot(page) {
|
|
5
5
|
const screenshotBytes = await page.screenshot();
|
|
6
6
|
return Buffer.from(screenshotBytes).toString("base64");
|
|
@@ -35,6 +35,14 @@ const CUA_KEY_TO_PLAYWRIGHT_KEY = {
|
|
|
35
35
|
tab: "Tab",
|
|
36
36
|
win: "Meta",
|
|
37
37
|
};
|
|
38
|
+
async function handlePageGoto(page, url) {
|
|
39
|
+
await page.goto(url);
|
|
40
|
+
return {
|
|
41
|
+
actionSummary: `Navigated page to ${url}`,
|
|
42
|
+
actionCode: `await page.goto("${url}");\n`,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
exports.handlePageGoto = handlePageGoto;
|
|
38
46
|
async function handleModelAction(page, action) {
|
|
39
47
|
const actionType = action.type;
|
|
40
48
|
let actionCode = "";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAmMD"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -58,7 +58,7 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
|
58
58
|
content: [
|
|
59
59
|
{
|
|
60
60
|
type: "input_text",
|
|
61
|
-
text: task
|
|
61
|
+
text: `Task to execute: ${task}\n\nCurrent page URL: ${page.url()}`,
|
|
62
62
|
},
|
|
63
63
|
{
|
|
64
64
|
type: "input_image",
|
|
@@ -85,7 +85,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
|
85
85
|
input: { response },
|
|
86
86
|
});
|
|
87
87
|
const computerCalls = response.output.filter((item) => item.type === "computer_call");
|
|
88
|
-
|
|
88
|
+
const functionCalls = response.output.filter((item) => item.type === "function_call");
|
|
89
|
+
if (computerCalls.length === 0 && functionCalls.length === 0) {
|
|
89
90
|
const assistantOutput = response.output.find((item) => item.type === "message");
|
|
90
91
|
if (assistantOutput) {
|
|
91
92
|
const content = assistantOutput.content.find((item) => item.type === "output_text");
|
|
@@ -105,46 +106,75 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
|
105
106
|
actionsSummary.push(`Action reasoning: ${summaryText}`);
|
|
106
107
|
}
|
|
107
108
|
}
|
|
108
|
-
// We expect
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
actionsSummary.push(`
|
|
118
|
-
|
|
109
|
+
// We expect either a function call or a computer call in the response.
|
|
110
|
+
let toolCallOutput;
|
|
111
|
+
let executedActionSummary = "";
|
|
112
|
+
// We are assuming only one function call per response
|
|
113
|
+
const functionCall = functionCalls[0];
|
|
114
|
+
if (functionCall) {
|
|
115
|
+
const args = JSON.parse(functionCall.arguments);
|
|
116
|
+
const { actionSummary, actionCode } = await (0, computer_1.handlePageGoto)(page, args.url);
|
|
117
|
+
executedActionSummary = actionSummary;
|
|
118
|
+
actionsSummary.push(`Action executed: ${actionSummary}`);
|
|
119
|
+
if (actionCode) {
|
|
120
|
+
actionsSummary.push(`Generated code: ${actionCode}`);
|
|
121
|
+
generatedCode += actionCode;
|
|
122
|
+
}
|
|
123
|
+
toolCallOutput = {
|
|
124
|
+
type: "function_call_output",
|
|
125
|
+
call_id: functionCall.call_id,
|
|
126
|
+
output: `Navigating page to ${args.url}`,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
else if (computerCalls.length >= 1) {
|
|
130
|
+
// We expect at most one computer call per response.
|
|
131
|
+
const computerCall = computerCalls[0];
|
|
132
|
+
const action = computerCall.action;
|
|
133
|
+
// Execute the action and take a screenshot
|
|
134
|
+
const { actionSummary, actionCode } = await (0, computer_1.handleModelAction)(page, action);
|
|
135
|
+
executedActionSummary = actionSummary;
|
|
136
|
+
actionsSummary.push(`Action executed: ${actionSummary}`);
|
|
137
|
+
if (actionCode) {
|
|
138
|
+
actionsSummary.push(`Generated code: ${actionCode}`);
|
|
139
|
+
generatedCode += actionCode;
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
actionsSummary.push(`No code generated: Will rely on Playwright's ability to auto-wait or auto-scroll`);
|
|
143
|
+
}
|
|
144
|
+
// Allow time for changes to take effect.
|
|
145
|
+
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
146
|
+
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
147
|
+
// Populate toolCallOutput
|
|
148
|
+
toolCallOutput = {
|
|
149
|
+
type: "computer_call_output",
|
|
150
|
+
call_id: computerCall.call_id,
|
|
151
|
+
output: {
|
|
152
|
+
type: "computer_screenshot",
|
|
153
|
+
image_url: `data:image/png;base64,${screenshotBytes}`,
|
|
154
|
+
},
|
|
155
|
+
acknowledged_safety_checks: computerCall.pending_safety_checks,
|
|
156
|
+
};
|
|
119
157
|
}
|
|
120
158
|
else {
|
|
121
|
-
|
|
159
|
+
throw new Error("No tool call found in response.");
|
|
122
160
|
}
|
|
123
|
-
// Allow time for changes to take effect.
|
|
124
|
-
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
125
|
-
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
126
|
-
// Send the screenshot back as a computer_call_output
|
|
127
|
-
const computerCallSpan = iterationSpan?.span({
|
|
128
|
-
name: "computer-call-output",
|
|
129
|
-
input: { lastCallId, acknowledged_safety_checks: pendingSafetyChecks },
|
|
130
|
-
});
|
|
131
161
|
response = await (0, model_1.callComputerUseModel)({
|
|
132
162
|
previousResponseId: response.id,
|
|
133
163
|
input: [
|
|
164
|
+
toolCallOutput,
|
|
134
165
|
{
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
166
|
+
role: "user",
|
|
167
|
+
content: [
|
|
168
|
+
{
|
|
169
|
+
type: "input_text",
|
|
170
|
+
text: `Action executed: ${executedActionSummary || "None"}\nCurrent page URL: ${page.url()}`,
|
|
171
|
+
},
|
|
172
|
+
],
|
|
142
173
|
},
|
|
143
174
|
],
|
|
144
175
|
screenWidth,
|
|
145
176
|
screenHeight,
|
|
146
177
|
});
|
|
147
|
-
computerCallSpan?.end({ output: response });
|
|
148
178
|
iterationSpan?.end({ output: response });
|
|
149
179
|
}
|
|
150
180
|
if (!isTaskDone) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { Response,
|
|
1
|
+
import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
|
|
2
2
|
export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
|
|
3
|
-
input:
|
|
3
|
+
input: ResponseInputItem[];
|
|
4
4
|
previousResponseId?: string;
|
|
5
5
|
screenWidth: number;
|
|
6
6
|
screenHeight: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
|
package/dist/agent/cua/model.js
CHANGED
|
@@ -13,12 +13,31 @@ you click on the submit button -- even if it looks like a scary action.
|
|
|
13
13
|
|
|
14
14
|
If you have been asked to retrieve text or verify something on the UI, then communicate
|
|
15
15
|
that in your responses so that the user can see your thinking process in its entirety.`;
|
|
16
|
+
const pageGotoTool = {
|
|
17
|
+
type: "function",
|
|
18
|
+
name: "page_goto",
|
|
19
|
+
description: "Navigate to a given URL (e.g. https://www.openai.com). Call this if you are looking at a blank page or a new page.",
|
|
20
|
+
parameters: {
|
|
21
|
+
type: "object",
|
|
22
|
+
properties: {
|
|
23
|
+
url: {
|
|
24
|
+
type: "string",
|
|
25
|
+
description: "The URL to navigate to",
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
additionalProperties: false,
|
|
29
|
+
required: ["url"],
|
|
30
|
+
},
|
|
31
|
+
strict: true,
|
|
32
|
+
};
|
|
16
33
|
async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
|
|
17
34
|
const openai = new openai_1.default();
|
|
18
35
|
return await openai.responses.create({
|
|
19
36
|
model: "computer-use-preview-2025-03-11",
|
|
20
37
|
previous_response_id: previousResponseId,
|
|
38
|
+
parallel_tool_calls: false,
|
|
21
39
|
tools: [
|
|
40
|
+
pageGotoTool,
|
|
22
41
|
{
|
|
23
42
|
type: "computer-preview",
|
|
24
43
|
display_width: screenWidth,
|
package/dist/bin/utils/index.js
CHANGED
|
@@ -71,7 +71,7 @@ function printBanner() {
|
|
|
71
71
|
--"-"-`;
|
|
72
72
|
const version = require("../../../package.json").version;
|
|
73
73
|
const logLine1 = `Running test-gen v${version}`;
|
|
74
|
-
const logLine2 = `from ${__dirname}`;
|
|
74
|
+
const logLine2 = `from ${__dirname.split("/bin/utils")[0]}`;
|
|
75
75
|
// Process ASCII art
|
|
76
76
|
const asciiLines = asciiArtRaw
|
|
77
77
|
.split("\n")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyBnD,eAAO,MAAM,wBAAwB,EAAE,IAwFtC,CAAC"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.commitAndPushChangesTool = void 0;
|
|
7
|
+
const child_process_1 = require("child_process");
|
|
8
|
+
const crypto_1 = __importDefault(require("crypto"));
|
|
9
|
+
const zod_1 = require("zod");
|
|
10
|
+
const utils_1 = require("./utils");
|
|
11
|
+
const GIT_USER_NAME = "empiricalrun[bot]";
|
|
12
|
+
const GIT_USER_EMAIL = "180257021+empiricalrun[bot]@users.noreply.github.com";
|
|
13
|
+
const CommitAndPushChangesSchema = zod_1.z.object({
|
|
14
|
+
commitMessage: zod_1.z
|
|
15
|
+
.string()
|
|
16
|
+
.describe("A short message to use for the commit. Should not be more than 8 words. Should follow conventional commit format."),
|
|
17
|
+
});
|
|
18
|
+
exports.commitAndPushChangesTool = {
|
|
19
|
+
schema: {
|
|
20
|
+
name: "commitAndPushChanges",
|
|
21
|
+
description: `Creates a commit with all modified files and pushes them to the current branch.
|
|
22
|
+
If currently on main branch, creates a new branch with a random name.
|
|
23
|
+
If the current branch already has an open PR, commits and pushes changes to that PR.
|
|
24
|
+
Uses the empiricalrun[bot] credentials for git operations.
|
|
25
|
+
Returns the URL of the created or updated pull request.`,
|
|
26
|
+
parameters: CommitAndPushChangesSchema,
|
|
27
|
+
},
|
|
28
|
+
execute: async (input) => {
|
|
29
|
+
try {
|
|
30
|
+
const { commitMessage } = input;
|
|
31
|
+
const currentBranch = (0, child_process_1.execSync)("git rev-parse --abbrev-ref HEAD")
|
|
32
|
+
.toString()
|
|
33
|
+
.trim();
|
|
34
|
+
let branchName = currentBranch;
|
|
35
|
+
if (currentBranch === "main") {
|
|
36
|
+
// If on main, create a new branch
|
|
37
|
+
const randomId = crypto_1.default.randomUUID().substring(0, 8);
|
|
38
|
+
branchName = `branch-${randomId}`;
|
|
39
|
+
(0, child_process_1.execSync)(`git checkout -b ${branchName}`);
|
|
40
|
+
}
|
|
41
|
+
const modifiedFiles = (0, child_process_1.execSync)("git status --porcelain")
|
|
42
|
+
.toString()
|
|
43
|
+
.split("\n")
|
|
44
|
+
.filter((line) => line && !line.includes(".bak"))
|
|
45
|
+
.map((line) => line.substring(3)); // Remove status prefix
|
|
46
|
+
if (modifiedFiles.length === 0) {
|
|
47
|
+
return {
|
|
48
|
+
isError: true,
|
|
49
|
+
result: "No modified files to commit",
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
for (const file of modifiedFiles) {
|
|
53
|
+
(0, child_process_1.execSync)(`git add "${file}"`);
|
|
54
|
+
}
|
|
55
|
+
// Use -c flag to set config just for this commit
|
|
56
|
+
(0, child_process_1.execSync)(`git -c user.name="${GIT_USER_NAME}" -c user.email="${GIT_USER_EMAIL}" commit -m "${commitMessage} [skip ci]"`);
|
|
57
|
+
const repoUrl = (0, child_process_1.execSync)("git config --get remote.origin.url")
|
|
58
|
+
.toString()
|
|
59
|
+
.trim();
|
|
60
|
+
const [owner, repo] = repoUrl
|
|
61
|
+
.replace("https://github.com/", "")
|
|
62
|
+
.replace(".git", "")
|
|
63
|
+
.split("/");
|
|
64
|
+
const existingPRs = (await (0, utils_1.callGitHubProxy)({
|
|
65
|
+
method: "GET",
|
|
66
|
+
url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
|
|
67
|
+
body: {
|
|
68
|
+
head: `${owner}:${branchName}`,
|
|
69
|
+
state: "open",
|
|
70
|
+
},
|
|
71
|
+
}));
|
|
72
|
+
(0, child_process_1.execSync)(`git push origin ${branchName} --set-upstream`);
|
|
73
|
+
const existingPR = existingPRs?.find((pr) => pr.head.ref === branchName);
|
|
74
|
+
if (existingPR) {
|
|
75
|
+
return {
|
|
76
|
+
isError: false,
|
|
77
|
+
result: `Committed and pushed changes to existing PR: ${existingPR.html_url}`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const pr = (await (0, utils_1.callGitHubProxy)({
|
|
81
|
+
method: "POST",
|
|
82
|
+
url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
|
|
83
|
+
body: {
|
|
84
|
+
title: commitMessage,
|
|
85
|
+
head: branchName,
|
|
86
|
+
base: "main",
|
|
87
|
+
body: "Created via CommitAndPushChanges tool",
|
|
88
|
+
},
|
|
89
|
+
}));
|
|
90
|
+
return {
|
|
91
|
+
isError: false,
|
|
92
|
+
result: `Committed and pushed changes to new PR: ${pr.html_url}`,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
return {
|
|
97
|
+
isError: true,
|
|
98
|
+
result: `Failed to commit and push changes: ${error instanceof Error ? error.message : String(error)}`,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"diagnosis-fetcher.d.ts","sourceRoot":"","sources":["../../src/tools/diagnosis-fetcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAenD,eAAO,MAAM,wBAAwB,EAAE,IAgFtC,CAAC"}
|
|
@@ -7,6 +7,7 @@ exports.fetchDiagnosisReportTool = void 0;
|
|
|
7
7
|
const promises_1 = __importDefault(require("fs/promises"));
|
|
8
8
|
const path_1 = __importDefault(require("path"));
|
|
9
9
|
const zod_1 = require("zod");
|
|
10
|
+
const utils_1 = require("./utils");
|
|
10
11
|
const DiagnosisSchema = zod_1.z.object({
|
|
11
12
|
diagnosisUrl: zod_1.z
|
|
12
13
|
.string()
|
|
@@ -23,21 +24,23 @@ exports.fetchDiagnosisReportTool = {
|
|
|
23
24
|
// Extract the slug from the URL - it's the part after the last '--'
|
|
24
25
|
const slug = diagnosisUrl.split("--").pop();
|
|
25
26
|
if (!slug) {
|
|
26
|
-
|
|
27
|
+
return {
|
|
28
|
+
isError: true,
|
|
29
|
+
result: "Invalid diagnosis URL - could not extract slug",
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
let data = null;
|
|
33
|
+
try {
|
|
34
|
+
data = await (0, utils_1.makeDashboardRequest)({
|
|
35
|
+
path: `/api/diagnosis/${slug}/detailed`,
|
|
36
|
+
});
|
|
27
37
|
}
|
|
28
|
-
|
|
29
|
-
method: "GET",
|
|
30
|
-
headers: {
|
|
31
|
-
Authorization: "weQPMWKT",
|
|
32
|
-
},
|
|
33
|
-
});
|
|
34
|
-
if (!response.ok) {
|
|
38
|
+
catch (error) {
|
|
35
39
|
return {
|
|
36
|
-
result: `Failed to fetch diagnosis details: ${response.statusText}`,
|
|
37
40
|
isError: true,
|
|
41
|
+
result: `Failed to fetch diagnosis details: ${error instanceof Error ? error.message : String(error)}`,
|
|
38
42
|
};
|
|
39
43
|
}
|
|
40
|
-
const data = await response.json();
|
|
41
44
|
const { test_case, diagnosis } = data.data;
|
|
42
45
|
const project = diagnosis?.test_project || "unknown";
|
|
43
46
|
const sourceContext = await promises_1.default.readFile(path_1.default.join("tests", test_case.file_path), "utf-8");
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AA8EnD,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}
|
|
@@ -29,16 +29,22 @@ and generate Playwright code for that actions. This is a useful tool when the mo
|
|
|
29
29
|
locator/selector for an element on the page.
|
|
30
30
|
|
|
31
31
|
IMPORTANT: Before you invoke this tool, you need to ensure that the test code is correctly prepared for this
|
|
32
|
-
agent. Preparation involves adding a TODO comment that describes the change that needs to be made
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
agent. Preparation involves adding a TODO comment that describes the change that needs to be made, and the page
|
|
33
|
+
variable name where the actions must be performed. The content of the TODO comment calls out the element and browser
|
|
34
|
+
interactions it must take. The TODO comment also has (agent on pageName) next to it, to clearly label that the change
|
|
35
|
+
is for the agent to make on the given page (pageName in this case).
|
|
36
|
+
|
|
37
|
+
To choose the page variable name, go through the test code and find available page variables. If you are replacing some
|
|
38
|
+
existing test code, use the same page variable name as in the existing test code. If you are adding steps to the test,
|
|
39
|
+
use the page variable name that is appropriate for the new steps. The page variable represents the browser page (or tab) that
|
|
40
|
+
the agent is supposed to interact with.
|
|
35
41
|
|
|
36
42
|
For example, this is a good TODO comment:
|
|
37
43
|
|
|
38
44
|
\`\`\`
|
|
39
45
|
test("Example test code", async ({ page }) => {
|
|
40
46
|
await page.goto("https://example.com");
|
|
41
|
-
// TODO(agent): Click on the login button
|
|
47
|
+
// TODO(agent on page): Click on the login button
|
|
42
48
|
});
|
|
43
49
|
\`\`\`
|
|
44
50
|
|
|
@@ -53,7 +59,7 @@ For example, this is invalid:
|
|
|
53
59
|
\`\`\`
|
|
54
60
|
await extPage
|
|
55
61
|
.getByTestId("virtuoso-item-list")
|
|
56
|
-
// TODO(agent): Click on the STARS button
|
|
62
|
+
// TODO(agent on extPage): Click on the STARS button
|
|
57
63
|
.getByText("STARS", { exact: true })
|
|
58
64
|
.click();
|
|
59
65
|
// This is invalid, because the TODO is in the middle of a multi-line statement
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAcnD,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,sBAAsB,EAAE,IAwHpC,CAAC"}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.fetchTestRunReportTool = exports.extractPathAfterSourceRepo = void 0;
|
|
4
4
|
const zod_1 = require("zod");
|
|
5
|
+
const utils_1 = require("../utils");
|
|
5
6
|
const TestRunSchema = zod_1.z.object({
|
|
6
7
|
testRunUrl: zod_1.z
|
|
7
8
|
.string()
|
|
@@ -32,22 +33,23 @@ exports.fetchTestRunReportTool = {
|
|
|
32
33
|
const runId = urlParts.pop(); // Last part is the run ID
|
|
33
34
|
const repoName = urlParts[urlParts.length - 2]; // Second to last part is the repo name
|
|
34
35
|
if (!runId || !repoName) {
|
|
35
|
-
|
|
36
|
+
return {
|
|
37
|
+
isError: true,
|
|
38
|
+
result: "Invalid test run URL - could not extract run ID or repo name",
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
let data = null;
|
|
42
|
+
try {
|
|
43
|
+
data = await (0, utils_1.makeDashboardRequest)({
|
|
44
|
+
path: `/api/test-runs/${runId}?repo_name=${repoName}`,
|
|
45
|
+
});
|
|
36
46
|
}
|
|
37
|
-
|
|
38
|
-
const response = await fetch(`https://dash.empirical.run/api/test-runs/${runId}?repo_name=${repoName}`, {
|
|
39
|
-
method: "GET",
|
|
40
|
-
headers: {
|
|
41
|
-
Authorization: "weQPMWKT",
|
|
42
|
-
},
|
|
43
|
-
});
|
|
44
|
-
if (!response.ok) {
|
|
47
|
+
catch (error) {
|
|
45
48
|
return {
|
|
46
|
-
result: `Failed to fetch test run details: ${response.statusText}`,
|
|
47
49
|
isError: true,
|
|
50
|
+
result: `Failed to fetch test run details: ${error instanceof Error ? error.message : String(error)}`,
|
|
48
51
|
};
|
|
49
52
|
}
|
|
50
|
-
const data = (await response.json());
|
|
51
53
|
// To efficiently use input_tokens, we
|
|
52
54
|
// 1. Truncate stack trace to last 300 characters
|
|
53
55
|
// 2. Remove request/response headers from network metadata
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export declare function makeDashboardRequest<T>({ path, method, body, }: {
|
|
2
|
+
path: string;
|
|
3
|
+
method?: string;
|
|
4
|
+
body?: any;
|
|
5
|
+
}): Promise<T>;
|
|
6
|
+
export declare function callGitHubProxy({ method, url, body, }: {
|
|
7
|
+
method: string;
|
|
8
|
+
url: string;
|
|
9
|
+
body?: any;
|
|
10
|
+
}): Promise<unknown>;
|
|
11
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/index.ts"],"names":[],"mappings":"AAAA,wBAAsB,oBAAoB,CAAC,CAAC,EAAE,EAC5C,IAAI,EACJ,MAAc,EACd,IAAI,GACL,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,GAAG,OAAO,CAAC,CAAC,CAAC,CAoBb;AAED,wBAAsB,eAAe,CAAC,EACpC,MAAM,EACN,GAAG,EACH,IAAI,GACL,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,GAAG,CAAC;CACZ,oBAWA"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.callGitHubProxy = exports.makeDashboardRequest = void 0;
|
|
4
|
+
async function makeDashboardRequest({ path, method = "GET", body, }) {
|
|
5
|
+
const requestHeaders = {
|
|
6
|
+
"Content-Type": "application/json",
|
|
7
|
+
// TODO: Move to env variable for authentication
|
|
8
|
+
Authorization: "weQPMWKT",
|
|
9
|
+
"User-Agent": "empiricalrun/test-gen",
|
|
10
|
+
};
|
|
11
|
+
const baseUrl = "https://dash.empirical.run";
|
|
12
|
+
const response = await fetch(`${baseUrl}${path}`, {
|
|
13
|
+
method,
|
|
14
|
+
headers: requestHeaders,
|
|
15
|
+
...(body && { body: JSON.stringify(body) }),
|
|
16
|
+
});
|
|
17
|
+
if (!response.ok) {
|
|
18
|
+
const errorBody = await response.text();
|
|
19
|
+
throw new Error(`API request failed for ${method} ${path} (Status: ${response.status}). Body: ${errorBody}`);
|
|
20
|
+
}
|
|
21
|
+
return await response.json();
|
|
22
|
+
}
|
|
23
|
+
exports.makeDashboardRequest = makeDashboardRequest;
|
|
24
|
+
async function callGitHubProxy({ method, url, body, }) {
|
|
25
|
+
const githubApiPath = url.replace("https://api.github.com", "");
|
|
26
|
+
return makeDashboardRequest({
|
|
27
|
+
path: "/api/github/proxy",
|
|
28
|
+
method: "POST",
|
|
29
|
+
body: {
|
|
30
|
+
method,
|
|
31
|
+
url: githubApiPath,
|
|
32
|
+
body,
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
exports.callGitHubProxy = callGitHubProxy;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.53.
|
|
3
|
+
"version": "0.53.4",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -68,7 +68,7 @@
|
|
|
68
68
|
"tsx": "^4.16.2",
|
|
69
69
|
"typescript": "^5.3.3",
|
|
70
70
|
"zod": "^3.23.8",
|
|
71
|
-
"@empiricalrun/llm": "^0.14.
|
|
71
|
+
"@empiricalrun/llm": "^0.14.3",
|
|
72
72
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
73
73
|
"@empiricalrun/test-run": "^0.7.6"
|
|
74
74
|
},
|