@empiricalrun/test-gen 0.47.1 → 0.47.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/agent/codegen/run.d.ts +4 -2
- package/dist/agent/codegen/run.d.ts.map +1 -1
- package/dist/agent/cua/computer.d.ts +7 -0
- package/dist/agent/cua/computer.d.ts.map +1 -0
- package/dist/agent/cua/computer.js +151 -0
- package/dist/agent/cua/index.d.ts +13 -0
- package/dist/agent/cua/index.d.ts.map +1 -0
- package/dist/agent/cua/index.js +132 -0
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +2 -1
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +1 -1
- package/dist/agent/master/run.d.ts +1 -0
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +3 -1
- package/package.json +5 -5
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.47.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- af97c0f: feat: cua agent can generate code
|
|
8
|
+
- d7f1678: feat: support openai cua for overlay dismissal, bump openai to 4.87.3
|
|
9
|
+
- 09e880a: feat: add more actions for cua with better types
|
|
10
|
+
- Updated dependencies [d7f1678]
|
|
11
|
+
- @empiricalrun/llm@0.9.36
|
|
12
|
+
|
|
3
13
|
## 0.47.1
|
|
4
14
|
|
|
5
15
|
### Patch Changes
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
3
|
+
import OpenAI from "openai";
|
|
3
4
|
import { CustomLogger } from "../../bin/logger";
|
|
5
|
+
import { CreateTestCodeUpdate } from "./types";
|
|
4
6
|
export declare function createTestWithCodeAgent({ testCase, file, repoFiles, trace, }: {
|
|
5
7
|
testCase: TestCase;
|
|
6
8
|
file: string;
|
|
@@ -8,9 +10,9 @@ export declare function createTestWithCodeAgent({ testCase, file, repoFiles, tra
|
|
|
8
10
|
trace?: TraceClient;
|
|
9
11
|
logger?: CustomLogger;
|
|
10
12
|
}): Promise<{
|
|
11
|
-
prompt:
|
|
13
|
+
prompt: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
12
14
|
agentResponse: string;
|
|
13
|
-
fileChanges:
|
|
15
|
+
fileChanges: CreateTestCodeUpdate[];
|
|
14
16
|
}>;
|
|
15
17
|
export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[] | void>;
|
|
16
18
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAYhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAQ/C,wBAAsB,uBAAuB,CAAC,EAC5C,QAAQ,EACR,IAAI,EACJ,SAAS,EACT,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC;IACV,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC7D,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,oBAAoB,EAAE,CAAC;CACrC,CAAC,CAyDD;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,CA8D5B"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ResponseComputerToolCall } from "openai/resources/responses/responses.mjs";
|
|
2
|
+
import type { Page } from "playwright";
|
|
3
|
+
type ComputerAction = ResponseComputerToolCall.Click | ResponseComputerToolCall.DoubleClick | ResponseComputerToolCall.Drag | ResponseComputerToolCall.Keypress | ResponseComputerToolCall.Move | ResponseComputerToolCall.Screenshot | ResponseComputerToolCall.Scroll | ResponseComputerToolCall.Type | ResponseComputerToolCall.Wait;
|
|
4
|
+
export declare function getScreenshot(page: Page): Promise<string>;
|
|
5
|
+
export declare function handleModelAction(page: Page, action: ComputerAction): Promise<string>;
|
|
6
|
+
export {};
|
|
7
|
+
//# sourceMappingURL=computer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,MAAM,CAAC,CA2HjB"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.handleModelAction = exports.getScreenshot = void 0;
|
|
4
|
+
async function getScreenshot(page) {
|
|
5
|
+
const screenshotBytes = await page.screenshot();
|
|
6
|
+
return Buffer.from(screenshotBytes).toString("base64");
|
|
7
|
+
}
|
|
8
|
+
exports.getScreenshot = getScreenshot;
|
|
9
|
+
const CUA_KEY_TO_PLAYWRIGHT_KEY = {
|
|
10
|
+
"/": "Divide",
|
|
11
|
+
"\\": "Backslash",
|
|
12
|
+
alt: "Alt",
|
|
13
|
+
arrowdown: "ArrowDown",
|
|
14
|
+
arrowleft: "ArrowLeft",
|
|
15
|
+
arrowright: "ArrowRight",
|
|
16
|
+
arrowup: "ArrowUp",
|
|
17
|
+
backspace: "Backspace",
|
|
18
|
+
capslock: "CapsLock",
|
|
19
|
+
// "cmd" and "ctrl" are both mapped to "ControlOrMeta" for platform
|
|
20
|
+
// agnostic behavior, as opposed to cmd: "Meta" and ctrl: "Control"
|
|
21
|
+
cmd: "ControlOrMeta",
|
|
22
|
+
ctrl: "ControlOrMeta",
|
|
23
|
+
delete: "Delete",
|
|
24
|
+
end: "End",
|
|
25
|
+
enter: "Enter",
|
|
26
|
+
esc: "Escape",
|
|
27
|
+
home: "Home",
|
|
28
|
+
insert: "Insert",
|
|
29
|
+
option: "Alt",
|
|
30
|
+
pagedown: "PageDown",
|
|
31
|
+
pageup: "PageUp",
|
|
32
|
+
shift: "Shift",
|
|
33
|
+
space: " ",
|
|
34
|
+
super: "Meta",
|
|
35
|
+
tab: "Tab",
|
|
36
|
+
win: "Meta",
|
|
37
|
+
};
|
|
38
|
+
async function handleModelAction(page, action) {
|
|
39
|
+
const actionType = action.type;
|
|
40
|
+
let actionCode = "";
|
|
41
|
+
try {
|
|
42
|
+
switch (actionType) {
|
|
43
|
+
case "click": {
|
|
44
|
+
const { x, y, button = "left" } = action;
|
|
45
|
+
console.log(`Action: click at (${x}, ${y}) with button '${button}'`);
|
|
46
|
+
let pwButton = undefined;
|
|
47
|
+
if (button === "left" || button === "right") {
|
|
48
|
+
pwButton = button;
|
|
49
|
+
}
|
|
50
|
+
else if (button === "wheel") {
|
|
51
|
+
pwButton = "middle";
|
|
52
|
+
}
|
|
53
|
+
if (pwButton) {
|
|
54
|
+
const locator = await page.evaluate(([x, y]) => {
|
|
55
|
+
const element = document.elementFromPoint(x, y);
|
|
56
|
+
return window.playwright.generateLocator(element);
|
|
57
|
+
}, [x, y]);
|
|
58
|
+
actionCode = `await page.${locator}.click();\n`;
|
|
59
|
+
await page.mouse.click(x, y, { button: pwButton });
|
|
60
|
+
}
|
|
61
|
+
if (button === "back" || button === "forward") {
|
|
62
|
+
// Do page navigations, since there is no way to click on the back/forward buttons
|
|
63
|
+
if (button === "back") {
|
|
64
|
+
await page.goBack();
|
|
65
|
+
}
|
|
66
|
+
else if (button === "forward") {
|
|
67
|
+
await page.goForward();
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
case "double_click": {
|
|
73
|
+
const { x, y } = action;
|
|
74
|
+
console.log(`Action: doubleclick at (${x}, ${y})`);
|
|
75
|
+
await page.mouse.dblclick(x, y, { button: "left" });
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
case "move": {
|
|
79
|
+
const { x, y } = action;
|
|
80
|
+
console.log(`Action: move to (${x}, ${y})`);
|
|
81
|
+
await page.mouse.move(x, y);
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
case "drag": {
|
|
85
|
+
const { path } = action;
|
|
86
|
+
console.log(`Action: drag along path ${path}`);
|
|
87
|
+
if (!path || path.length === 0) {
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
await page.mouse.move(path[0].x, path[0].y);
|
|
91
|
+
await page.mouse.down();
|
|
92
|
+
for (let i = 1; i < path.length; i++) {
|
|
93
|
+
await page.mouse.move(path[i].x, path[i].y);
|
|
94
|
+
}
|
|
95
|
+
await page.mouse.up();
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
case "scroll": {
|
|
99
|
+
const { x, y, scroll_x, scroll_y } = action;
|
|
100
|
+
console.log(`Action: scroll at (${x}, ${y}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y})`);
|
|
101
|
+
await page.mouse.move(x, y);
|
|
102
|
+
await page.evaluate(`window.scrollBy(${scroll_x}, ${scroll_y})`);
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
case "keypress": {
|
|
106
|
+
const { keys } = action;
|
|
107
|
+
const mappedKeys = keys.map((k) => {
|
|
108
|
+
return CUA_KEY_TO_PLAYWRIGHT_KEY[k.toLowerCase()] || k;
|
|
109
|
+
});
|
|
110
|
+
const mappedKey = mappedKeys.join("+"); // ["CTRL", "A"] becomes ControlOrMeta+A
|
|
111
|
+
console.log(`Action: keypress for keys ${keys} -> '${mappedKey}'`);
|
|
112
|
+
try {
|
|
113
|
+
await page.keyboard.press(mappedKey);
|
|
114
|
+
actionCode = `await page.keyboard.press('${mappedKey}');\n`;
|
|
115
|
+
}
|
|
116
|
+
catch (e) {
|
|
117
|
+
console.error("Error pressing key", mappedKey, ":", e);
|
|
118
|
+
}
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
case "type": {
|
|
122
|
+
const { text } = action;
|
|
123
|
+
console.log(`Action: type text '${text}'`);
|
|
124
|
+
await page.keyboard.type(text);
|
|
125
|
+
const locator = await page.evaluate(() => {
|
|
126
|
+
const element = document.activeElement;
|
|
127
|
+
return window.playwright.generateLocator(element);
|
|
128
|
+
});
|
|
129
|
+
actionCode = `await page.${locator}.fill("${text}");\n`;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
case "wait": {
|
|
133
|
+
console.log(`Action: wait`);
|
|
134
|
+
await page.waitForTimeout(2000);
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
case "screenshot": {
|
|
138
|
+
// Nothing to do as screenshot is taken at each turn
|
|
139
|
+
console.log(`Action: screenshot`);
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
default:
|
|
143
|
+
console.log("Unrecognized action:", action);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
catch (e) {
|
|
147
|
+
console.error("Error handling action", action, ":", e);
|
|
148
|
+
}
|
|
149
|
+
return actionCode;
|
|
150
|
+
}
|
|
151
|
+
exports.handleModelAction = handleModelAction;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Page } from "playwright";
|
|
2
|
+
export declare function startPlaywrightCodegen(page: Page): Promise<void>;
|
|
3
|
+
/**
|
|
4
|
+
* Run the loop that executes computer actions until no 'computer_call' is found.
|
|
5
|
+
*/
|
|
6
|
+
export declare function executeUsingComputerUseAgent({ page, task, }: {
|
|
7
|
+
page: Page;
|
|
8
|
+
task: string;
|
|
9
|
+
}): Promise<{
|
|
10
|
+
code: string;
|
|
11
|
+
importPaths: string[];
|
|
12
|
+
}>;
|
|
13
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAWlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED;;GAEG;AACH,wBAAsB,4BAA4B,CAAC,EACjD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB,CAAC,CAkGD"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.executeUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
|
|
7
|
+
const openai_1 = __importDefault(require("openai"));
|
|
8
|
+
const utils_1 = require("../browsing/utils");
|
|
9
|
+
const computer_1 = require("./computer");
|
|
10
|
+
const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
|
|
11
|
+
Don't ask the user for confirmations - just execute the actions.
|
|
12
|
+
|
|
13
|
+
For example, if the user message says "Click on Submit button", then
|
|
14
|
+
you click on the submit button -- even if it looks like a scary action.`;
|
|
15
|
+
async function startPlaywrightCodegen(page) {
|
|
16
|
+
// TODO: Use this method to offload code generation to Playwright
|
|
17
|
+
// Unclear how to retrieve source code that is generated
|
|
18
|
+
await page.evaluate(() => {
|
|
19
|
+
setTimeout(() => {
|
|
20
|
+
// First, we start recording
|
|
21
|
+
// @ts-ignore
|
|
22
|
+
console.log(window["__pw_recorderSetMode"]("recording"));
|
|
23
|
+
// Then, we will resume the effect of pause()
|
|
24
|
+
// @ts-ignore
|
|
25
|
+
console.log(window["__pw_resume"]());
|
|
26
|
+
// Then, we remove highlights that Playwright shows on the screen
|
|
27
|
+
// @ts-ignore
|
|
28
|
+
const glassPane = document.querySelector("x-pw-glass");
|
|
29
|
+
if (glassPane) {
|
|
30
|
+
glassPane.remove();
|
|
31
|
+
}
|
|
32
|
+
}, 3000);
|
|
33
|
+
});
|
|
34
|
+
await page.pause();
|
|
35
|
+
}
|
|
36
|
+
exports.startPlaywrightCodegen = startPlaywrightCodegen;
|
|
37
|
+
/**
|
|
38
|
+
* Run the loop that executes computer actions until no 'computer_call' is found.
|
|
39
|
+
*/
|
|
40
|
+
async function executeUsingComputerUseAgent({ page, task, }) {
|
|
41
|
+
let generatedCode = "";
|
|
42
|
+
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
43
|
+
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
44
|
+
const viewport = page.viewportSize();
|
|
45
|
+
let screenWidth = viewport?.width || 1280;
|
|
46
|
+
let screenHeight = viewport?.height || 720;
|
|
47
|
+
const openai = new openai_1.default();
|
|
48
|
+
let response = await openai.responses.create({
|
|
49
|
+
model: "computer-use-preview",
|
|
50
|
+
tools: [
|
|
51
|
+
{
|
|
52
|
+
type: "computer-preview",
|
|
53
|
+
display_width: screenWidth,
|
|
54
|
+
display_height: screenHeight,
|
|
55
|
+
environment: "browser",
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
instructions: INSTRUCTIONS,
|
|
59
|
+
input: [
|
|
60
|
+
{
|
|
61
|
+
role: "user",
|
|
62
|
+
content: [
|
|
63
|
+
{
|
|
64
|
+
type: "input_text",
|
|
65
|
+
text: task,
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
type: "input_image",
|
|
69
|
+
image_url: `data:image/png;base64,${screenshotBytes}`,
|
|
70
|
+
detail: "high",
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
truncation: "auto",
|
|
76
|
+
});
|
|
77
|
+
// eslint-disable-next-line no-constant-condition
|
|
78
|
+
while (true) {
|
|
79
|
+
const computerCalls = response.output.filter((item) => item.type === "computer_call");
|
|
80
|
+
if (computerCalls.length === 0) {
|
|
81
|
+
console.log("No computer call found. Output from model:");
|
|
82
|
+
/**
|
|
83
|
+
* TODO: Sometimes the mdoel will ask for a user confirmation - handle this flow
|
|
84
|
+
* item.type is "message", status is "completed", item.content.type is "output_text"
|
|
85
|
+
*/
|
|
86
|
+
response.output.forEach((item) => {
|
|
87
|
+
console.log(JSON.stringify(item, null, 2));
|
|
88
|
+
});
|
|
89
|
+
break; // Exit when no computer calls are issued.
|
|
90
|
+
}
|
|
91
|
+
// We expect at most one computer call per response.
|
|
92
|
+
const computerCall = computerCalls[0];
|
|
93
|
+
const lastCallId = computerCall.call_id;
|
|
94
|
+
const action = computerCall.action;
|
|
95
|
+
// Execute the action (function defined in step 3)
|
|
96
|
+
const actionCode = await (0, computer_1.handleModelAction)(page, action);
|
|
97
|
+
generatedCode += actionCode;
|
|
98
|
+
await new Promise((resolve) => setTimeout(resolve, 1000)); // Allow time for changes to take effect.
|
|
99
|
+
// Take a screenshot after the action (function defined in step 4)
|
|
100
|
+
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
101
|
+
// Send the screenshot back as a computer_call_output
|
|
102
|
+
response = await openai.responses.create({
|
|
103
|
+
model: "computer-use-preview",
|
|
104
|
+
previous_response_id: response.id,
|
|
105
|
+
tools: [
|
|
106
|
+
{
|
|
107
|
+
type: "computer-preview",
|
|
108
|
+
display_width: screenWidth,
|
|
109
|
+
display_height: screenHeight,
|
|
110
|
+
environment: "browser",
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
input: [
|
|
114
|
+
{
|
|
115
|
+
call_id: lastCallId,
|
|
116
|
+
type: "computer_call_output",
|
|
117
|
+
output: {
|
|
118
|
+
type: "computer_screenshot",
|
|
119
|
+
image_url: `data:image/png;base64,${screenshotBytes}`,
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
],
|
|
123
|
+
truncation: "auto",
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
return {
|
|
127
|
+
code: generatedCode,
|
|
128
|
+
// TODO: Does not support skills, so import paths are empty
|
|
129
|
+
importPaths: [],
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
exports.executeUsingComputerUseAgent = executeUsingComputerUseAgent;
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { TestErrorDiagnosisDetails } from "@empiricalrun/shared-types";
|
|
2
|
+
import OpenAI from "openai";
|
|
2
3
|
export declare function fixStrictModeViolationPrompt({ screenshotsData, diagnosis, }: {
|
|
3
4
|
screenshotsData: {
|
|
4
5
|
success: string[];
|
|
5
6
|
failure: string[];
|
|
6
7
|
};
|
|
7
8
|
diagnosis: TestErrorDiagnosisDetails;
|
|
8
|
-
}):
|
|
9
|
+
}): OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
9
10
|
//# sourceMappingURL=strict-mode-violation.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"strict-mode-violation.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/strict-mode-violation.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"strict-mode-violation.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/strict-mode-violation.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAoB5B,wBAAgB,4BAA4B,CAAC,EAC3C,eAAe,EACf,SAAS,GACV,EAAE;IACD,eAAe,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAC1D,SAAS,EAAE,yBAAyB,CAAC;CACtC,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAiBvD"}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
2
2
|
import { Page } from "playwright";
|
|
3
3
|
import { ScopeVars } from "../../types";
|
|
4
|
+
export { executeUsingComputerUseAgent } from "../cua";
|
|
4
5
|
export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
|
|
5
6
|
task: string;
|
|
6
7
|
page: Page;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAWxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;AAuBtD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.createTestUsingMasterAgent = void 0;
|
|
3
|
+
exports.createTestUsingMasterAgent = exports.executeUsingComputerUseAgent = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const actions_1 = require("../../actions");
|
|
6
6
|
const skill_1 = require("../../actions/skill");
|
|
@@ -20,6 +20,8 @@ const action_tool_calls_1 = require("./action-tool-calls");
|
|
|
20
20
|
const execute_browser_action_1 = require("./execute-browser-action");
|
|
21
21
|
const execute_skill_action_1 = require("./execute-skill-action");
|
|
22
22
|
const next_action_1 = require("./next-action");
|
|
23
|
+
var cua_1 = require("../cua");
|
|
24
|
+
Object.defineProperty(exports, "executeUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.executeUsingComputerUseAgent; } });
|
|
23
25
|
const MAX_ERROR_COUNT = 2;
|
|
24
26
|
function getPageVariables(stateVariables) {
|
|
25
27
|
const keys = Object.keys(stateVariables);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.47.
|
|
3
|
+
"version": "0.47.2",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
"mime": "^4.0.4",
|
|
65
65
|
"minimatch": "^10.0.1",
|
|
66
66
|
"nanoid": "^5.0.7",
|
|
67
|
-
"openai": "4.
|
|
67
|
+
"openai": "4.87.3",
|
|
68
68
|
"picocolors": "^1.0.1",
|
|
69
69
|
"prettier": "^3.2.5",
|
|
70
70
|
"remove-markdown": "^0.5.5",
|
|
@@ -73,9 +73,9 @@
|
|
|
73
73
|
"ts-morph": "^23.0.0",
|
|
74
74
|
"tsx": "^4.16.2",
|
|
75
75
|
"typescript": "^5.3.3",
|
|
76
|
-
"@empiricalrun/
|
|
76
|
+
"@empiricalrun/llm": "^0.9.36",
|
|
77
77
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
78
|
-
"@empiricalrun/
|
|
78
|
+
"@empiricalrun/reporter": "^0.23.1"
|
|
79
79
|
},
|
|
80
80
|
"devDependencies": {
|
|
81
81
|
"@playwright/test": "1.47.1",
|
|
@@ -91,7 +91,7 @@
|
|
|
91
91
|
"js-levenshtein": "^1.1.6",
|
|
92
92
|
"playwright": "1.47.1",
|
|
93
93
|
"ts-patch": "^3.3.0",
|
|
94
|
-
"@empiricalrun/shared-types": "0.0.
|
|
94
|
+
"@empiricalrun/shared-types": "0.0.6"
|
|
95
95
|
},
|
|
96
96
|
"scripts": {
|
|
97
97
|
"dev": "tspc --build --watch",
|