@empiricalrun/test-gen 0.47.1 → 0.47.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/actions/index.d.ts +1 -1
- package/dist/actions/index.js +1 -1
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +2 -3
- package/dist/agent/codegen/generate-code-apply-changes.d.ts.map +1 -1
- package/dist/agent/codegen/generate-code-apply-changes.js +5 -6
- package/dist/agent/codegen/run.d.ts +4 -2
- package/dist/agent/codegen/run.d.ts.map +1 -1
- package/dist/agent/codegen/utils.d.ts +0 -12
- package/dist/agent/codegen/utils.d.ts.map +1 -1
- package/dist/agent/codegen/utils.js +2 -39
- package/dist/agent/cua/computer.d.ts +7 -0
- package/dist/agent/cua/computer.d.ts.map +1 -0
- package/dist/agent/cua/computer.js +151 -0
- package/dist/agent/cua/index.d.ts +13 -0
- package/dist/agent/cua/index.d.ts.map +1 -0
- package/dist/agent/cua/index.js +132 -0
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +2 -1
- package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +1 -1
- package/dist/agent/master/browser-tests/index.spec.js +15 -1
- package/dist/agent/master/element-annotation.d.ts.map +1 -1
- package/dist/agent/master/element-annotation.js +1 -2
- package/dist/agent/master/execute-browser-action.d.ts.map +1 -1
- package/dist/agent/master/execute-browser-action.js +1 -2
- package/dist/agent/master/execute-skill-action.d.ts.map +1 -1
- package/dist/agent/master/execute-skill-action.js +1 -2
- package/dist/agent/master/next-action.d.ts.map +1 -1
- package/dist/agent/master/next-action.js +2 -3
- package/dist/agent/master/planner.d.ts.map +1 -1
- package/dist/agent/master/planner.js +1 -2
- package/dist/agent/master/run.d.ts +1 -0
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +4 -3
- package/dist/agent/master/scroller.d.ts.map +1 -1
- package/dist/agent/master/scroller.js +2 -3
- package/dist/agent/planner/run-time-planner.d.ts.map +1 -1
- package/dist/agent/planner/run-time-planner.js +1 -2
- package/package.json +5 -5
- package/dist/agent/utils.d.ts +0 -2
- package/dist/agent/utils.d.ts.map +0 -1
- package/dist/agent/utils.js +0 -12
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.47.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 56ed4eb: chore: remove parseJson utility, let it throw exceptions
|
|
8
|
+
|
|
9
|
+
## 0.47.2
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- af97c0f: feat: cua agent can generate code
|
|
14
|
+
- d7f1678: feat: support openai cua for overlay dismissal, bump openai to 4.87.3
|
|
15
|
+
- 09e880a: feat: add more actions for cua with better types
|
|
16
|
+
- Updated dependencies [d7f1678]
|
|
17
|
+
- @empiricalrun/llm@0.9.36
|
|
18
|
+
|
|
3
19
|
## 0.47.1
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
package/dist/actions/index.d.ts
CHANGED
|
@@ -9,7 +9,7 @@ export declare class PlaywrightActions {
|
|
|
9
9
|
constructor(page: TestGenPage, stateVariables?: Record<string, any>);
|
|
10
10
|
executeAction(name: string | undefined, args: ActionArgs, trace?: TraceClient): Promise<string | undefined>;
|
|
11
11
|
getBrowsingActionSchemas(): ActionSchema[];
|
|
12
|
-
|
|
12
|
+
getSkillsActionSchemas(): ActionSchema[];
|
|
13
13
|
generateCode(): {
|
|
14
14
|
code: string;
|
|
15
15
|
importPaths: string[];
|
package/dist/actions/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAMlD,MAAM,MAAM,kBAAkB,GAAG;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1B,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,KAAK,GACN,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,iBAAiB,CAAC;IAC3B,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,kBAAkB,GAAG,SAAS,CAAC,CA8D1C"}
|
|
@@ -6,7 +6,6 @@ const constants_1 = require("../../constants");
|
|
|
6
6
|
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a browser automation agent who is given a task to generate code for navigation and assertion. This task is your\ngoal and you must achieve it.\n\nYou will be provided with already executed actions and basis that you need to pick the next step to achieve the task.\nRemember that the goal must be achieved.\n\nYou will be provided with the web page snapshot in the form of Document Object Model. Based on the goal and available\ntool calls you need to pick the appropriate tool call.\n\nInstructions:\n- Take actions one at a time. Do not try to take multiple actions\n- You can respond with multiple assertions in one shot\n- Do not repeat the same actions again otherwise your response will be marked INVALID\n- Avoid repeating errors which we got while executing the last action\n- Stick to the task provided to you and mark the task done once the task is complete\n- Do not execute any action which is not mentioned in the task\n- Do not repeat actions which are already executed more than twice otherwise your response will be marked INVALID\n- Always refer to \"Executed actions\" before deciding your next action for completion of the task.\n- End the task done if all actions required for task are executed\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\nCurrent page snapshot:\n{{pageSnapshot}}\n{{/section}}";
|
|
7
7
|
const reporter_1 = require("../../reporter");
|
|
8
8
|
const html_1 = require("../../utils/html");
|
|
9
|
-
const utils_1 = require("../utils");
|
|
10
9
|
async function executeTaskUsingBrowsingAgent({ action, page, actions, llm, trace, }) {
|
|
11
10
|
let generatedCodeSteps;
|
|
12
11
|
const tools = actions.getBrowsingActionSchemas();
|
|
@@ -48,9 +47,9 @@ async function executeTaskUsingBrowsingAgent({ action, page, actions, llm, trace
|
|
|
48
47
|
const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
|
|
49
48
|
for (const i in toolCalls) {
|
|
50
49
|
const toolCall = toolCalls[i];
|
|
51
|
-
const args =
|
|
50
|
+
const args = JSON.parse(toolCall.function.arguments);
|
|
52
51
|
try {
|
|
53
|
-
const code = await actions.executeAction(toolCall.function.name,
|
|
52
|
+
const code = await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
|
|
54
53
|
generatedCodeSteps = {
|
|
55
54
|
// Passing reason as action, in order to pass the correct action that took place to runtime planner
|
|
56
55
|
action: args.reason,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-code-apply-changes.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/generate-code-apply-changes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAQrE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"generate-code-apply-changes.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/generate-code-apply-changes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAQrE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAMhD,OAAO,EAAsB,UAAU,EAAE,MAAM,SAAS,CAAC;AAqLzD,wBAAgB,8BAA8B,CAAC,KAAK,EAAE,MAAM,UAkD3D;AA2DD,wBAAsB,2BAA2B,CAAC,EAChD,IAAI,EACJ,KAAK,EACL,MAAM,EACN,gBAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,gBAAgB,EAAE,MAAM,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,GAAG,SAAS,CAAA;KAAE,CAAC,CAAC;CACjE,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAuIxB"}
|
|
@@ -8,9 +8,8 @@ const llm_1 = require("@empiricalrun/llm");
|
|
|
8
8
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
9
9
|
const path_1 = require("path");
|
|
10
10
|
const constants_1 = require("../../constants");
|
|
11
|
-
const utils_1 = require("../utils");
|
|
12
11
|
const types_1 = require("./types");
|
|
13
|
-
const
|
|
12
|
+
const utils_1 = require("./utils");
|
|
14
13
|
function getCodeEditorToolCalls() {
|
|
15
14
|
const strReplace = {
|
|
16
15
|
name: "code-block-replace",
|
|
@@ -160,7 +159,7 @@ async function getPlanForCodeEditorAgent({ prompt, trace, }) {
|
|
|
160
159
|
}
|
|
161
160
|
if (completion.tool_calls[0].function.name === "change_plan") {
|
|
162
161
|
const args = completion.tool_calls[0].function.arguments;
|
|
163
|
-
const plan =
|
|
162
|
+
const plan = JSON.parse(args).plan;
|
|
164
163
|
return plan;
|
|
165
164
|
}
|
|
166
165
|
}
|
|
@@ -342,7 +341,7 @@ async function generateCodeAndApplyChanges({ task, trace, logger, getRelevantFil
|
|
|
342
341
|
}
|
|
343
342
|
await Promise.all(createFileToolCalls.map((tc) => {
|
|
344
343
|
return (async () => {
|
|
345
|
-
const args =
|
|
344
|
+
const args = JSON.parse(tc.function.arguments);
|
|
346
345
|
updatedFiles.push({
|
|
347
346
|
filePath: args.filePath,
|
|
348
347
|
oldCode: "",
|
|
@@ -360,13 +359,13 @@ async function generateCodeAndApplyChanges({ task, trace, logger, getRelevantFil
|
|
|
360
359
|
}
|
|
361
360
|
// Filter out the tool calls which are for replacing code in existing files
|
|
362
361
|
const fileChanges = strReplaceToolCalls
|
|
363
|
-
.map((toolCall) =>
|
|
362
|
+
.map((toolCall) => JSON.parse(toolCall.function.arguments))
|
|
364
363
|
.filter((f) => f.filePath && fs_extra_1.default.existsSync(f.filePath));
|
|
365
364
|
// We add all the suggested changes to the updatedFiles array
|
|
366
365
|
// This is used to validate and format files later
|
|
367
366
|
updatedFiles.push(...fileChanges);
|
|
368
367
|
// applyChangesResponse contains the errors occurred while applying the changes
|
|
369
|
-
const updates = await (0,
|
|
368
|
+
const updates = await (0, utils_1.applyFileChangesUsingStrReplace)({
|
|
370
369
|
trace: codeEditorSpan,
|
|
371
370
|
fileChanges,
|
|
372
371
|
logger,
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
3
|
+
import OpenAI from "openai";
|
|
3
4
|
import { CustomLogger } from "../../bin/logger";
|
|
5
|
+
import { CreateTestCodeUpdate } from "./types";
|
|
4
6
|
export declare function createTestWithCodeAgent({ testCase, file, repoFiles, trace, }: {
|
|
5
7
|
testCase: TestCase;
|
|
6
8
|
file: string;
|
|
@@ -8,9 +10,9 @@ export declare function createTestWithCodeAgent({ testCase, file, repoFiles, tra
|
|
|
8
10
|
trace?: TraceClient;
|
|
9
11
|
logger?: CustomLogger;
|
|
10
12
|
}): Promise<{
|
|
11
|
-
prompt:
|
|
13
|
+
prompt: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
12
14
|
agentResponse: string;
|
|
13
|
-
fileChanges:
|
|
15
|
+
fileChanges: CreateTestCodeUpdate[];
|
|
14
16
|
}>;
|
|
15
17
|
export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[] | void>;
|
|
16
18
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAYhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAQ/C,wBAAsB,uBAAuB,CAAC,EAC5C,QAAQ,EACR,IAAI,EACJ,SAAS,EACT,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC;IACV,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC7D,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,oBAAoB,EAAE,CAAC;CACrC,CAAC,CAyDD;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,CA8D5B"}
|
|
@@ -72,18 +72,6 @@ export declare function applyFileChangesUsingStrReplace({ trace, fileChanges, lo
|
|
|
72
72
|
fileChanges: CodeUpdate[];
|
|
73
73
|
logger?: CustomLogger;
|
|
74
74
|
}): Promise<FileUpdateResponse[]>;
|
|
75
|
-
export declare function searchAndReplaceCodeUsingStrReplace({ logger, fileChange, }: {
|
|
76
|
-
fileChange: {
|
|
77
|
-
filePath: string | undefined;
|
|
78
|
-
oldCode: string | undefined;
|
|
79
|
-
newCode: string | undefined;
|
|
80
|
-
reason: string | undefined;
|
|
81
|
-
};
|
|
82
|
-
logger?: CustomLogger;
|
|
83
|
-
}): Promise<{
|
|
84
|
-
result: FileUpdateResponse;
|
|
85
|
-
updatedContent: string;
|
|
86
|
-
}>;
|
|
87
75
|
export declare function applyFileChangesForCreateTest({ trace, fileChanges, testgenUpdatesReporter, }: {
|
|
88
76
|
trace?: TraceClient;
|
|
89
77
|
fileChanges: CreateTestCodeUpdate[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChE,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAMhD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAExD,OAAO,EAAE,UAAU,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAE/E;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,EAAE,CAiB9D;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,GACZ,oBAAoB,EAAE,CAgBxB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,GAAG;IACvD,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CA8BF;AAED,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,MAAM,GAAG;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,EAAE,CAqCF;AAED,wBAAsB,0BAA0B,CAAC,EAC/C,aAAoB,EACpB,KAAK,EACL,QAAQ,EACR,SAAS,EACT,MAAM,EACN,cAAc,EACd,SAAS,EACT,iBAAiB,GAClB,EAAE;IACD,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;IAClC,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B,iBA0BA;AAED,wBAAsB,+BAA+B,CAAC,EACpD,KAAK,EACL,WAAW,EACX,MAAM,GACP,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,WAAW,EAAE,UAAU,EAAE,CAAC;IAC1B,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC,CA6ChC;AAED,wBAAsB,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChE,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAMhD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAExD,OAAO,EAAE,UAAU,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAE/E;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,EAAE,CAiB9D;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,GACZ,oBAAoB,EAAE,CAgBxB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,GAAG;IACvD,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,cAAc,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CA8BF;AAED,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,MAAM,GAAG;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB,EAAE,CAqCF;AAED,wBAAsB,0BAA0B,CAAC,EAC/C,aAAoB,EACpB,KAAK,EACL,QAAQ,EACR,SAAS,EACT,MAAM,EACN,cAAc,EACd,SAAS,EACT,iBAAiB,GAClB,EAAE;IACD,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;IAClC,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B,iBA0BA;AAED,wBAAsB,+BAA+B,CAAC,EACpD,KAAK,EACL,WAAW,EACX,MAAM,GACP,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,WAAW,EAAE,UAAU,EAAE,CAAC;IAC1B,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC,CA6ChC;AAED,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,WAAW,EACX,sBAAsB,GACvB,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,WAAW,EAAE,oBAAoB,EAAE,CAAC;IACpC,sBAAsB,CAAC,EAAE,sBAAsB,CAAC;CACjD,iBAoDA;AAED,wBAAsB,oBAAoB,CAAC,EACzC,MAAM,EACN,UAAU,GACX,EAAE;IACD,UAAU,EAAE;QACV,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;QAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;KAC5B,CAAC;IACF,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC;IACV,MAAM,EAAE;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IACnE,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CA8BD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,KAAK,EACL,QAAQ,EACR,WAAW,EACX,MAAM,GACP,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,WAAW,EAAE;QACX,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;QAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;KAC5B,EAAE,CAAC;IACJ,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B,GAAG,OAAO,CAAC;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,EAAE,CAAC,CAkIxE"}
|
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.applyFileChanges = exports.searchAndReplaceCode = exports.applyFileChangesForCreateTest = exports.
|
|
6
|
+
exports.applyFileChanges = exports.searchAndReplaceCode = exports.applyFileChangesForCreateTest = exports.applyFileChangesUsingStrReplace = exports.validateTypesAndFormatCode = exports.extractTestStepsSuggestions = exports.extractAppendTestUpdates = exports.extractCreateTestUpdates = exports.extractTestUpdates = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
9
9
|
const ts_morph_1 = require("ts-morph");
|
|
@@ -198,44 +198,6 @@ async function applyFileChangesUsingStrReplace({ trace, fileChanges, logger, })
|
|
|
198
198
|
return results;
|
|
199
199
|
}
|
|
200
200
|
exports.applyFileChangesUsingStrReplace = applyFileChangesUsingStrReplace;
|
|
201
|
-
async function searchAndReplaceCodeUsingStrReplace({ logger, fileChange, }) {
|
|
202
|
-
let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
|
|
203
|
-
if (contents.includes(fileChange.oldCode)) {
|
|
204
|
-
// Check for multiple instances of old code block
|
|
205
|
-
// If there are multiple instances, then we cannot safely determine which instance to replace
|
|
206
|
-
const firstIndex = contents.indexOf(fileChange.oldCode);
|
|
207
|
-
const lastIndex = contents.lastIndexOf(fileChange.oldCode);
|
|
208
|
-
if (firstIndex !== lastIndex) {
|
|
209
|
-
return {
|
|
210
|
-
result: {
|
|
211
|
-
error: true,
|
|
212
|
-
errorMessage: `Multiple instances of the code block found in file "${fileChange.filePath}". Cannot safely determine which instance to replace.`,
|
|
213
|
-
filePath: fileChange.filePath,
|
|
214
|
-
},
|
|
215
|
-
updatedContent: contents,
|
|
216
|
-
};
|
|
217
|
-
}
|
|
218
|
-
const updatedContent = contents.replace(fileChange.oldCode, `\n\n${fileChange.newCode}`);
|
|
219
|
-
return {
|
|
220
|
-
result: {
|
|
221
|
-
error: false,
|
|
222
|
-
errorMessage: "",
|
|
223
|
-
filePath: fileChange.filePath,
|
|
224
|
-
},
|
|
225
|
-
updatedContent,
|
|
226
|
-
};
|
|
227
|
-
}
|
|
228
|
-
logger?.error(`Unable to find the code to update in ${fileChange.filePath}`);
|
|
229
|
-
return {
|
|
230
|
-
result: {
|
|
231
|
-
error: true,
|
|
232
|
-
errorMessage: `The content of "old_code_block" corresponding to file path "${fileChange.filePath}" did not match the current content of the file "${fileChange.filePath}"`,
|
|
233
|
-
filePath: fileChange.filePath,
|
|
234
|
-
},
|
|
235
|
-
updatedContent: contents,
|
|
236
|
-
};
|
|
237
|
-
}
|
|
238
|
-
exports.searchAndReplaceCodeUsingStrReplace = searchAndReplaceCodeUsingStrReplace;
|
|
239
201
|
async function applyFileChangesForCreateTest({ trace, fileChanges, testgenUpdatesReporter, }) {
|
|
240
202
|
const repoEditFileChangesSpan = trace?.span({
|
|
241
203
|
name: "create-test-file-changes",
|
|
@@ -298,6 +260,7 @@ async function searchAndReplaceCode({ logger, fileChange, }) {
|
|
|
298
260
|
return {
|
|
299
261
|
result: {
|
|
300
262
|
error: true,
|
|
263
|
+
// TODO: old_code_block should be replaced with oldCode
|
|
301
264
|
errorMessage: `The content of "old_code_block" corresponding to file path "${fileChange.filePath}" did not match the current content of the file "${fileChange.filePath}"`,
|
|
302
265
|
filePath: fileChange.filePath,
|
|
303
266
|
},
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { ResponseComputerToolCall } from "openai/resources/responses/responses.mjs";
|
|
2
|
+
import type { Page } from "playwright";
|
|
3
|
+
type ComputerAction = ResponseComputerToolCall.Click | ResponseComputerToolCall.DoubleClick | ResponseComputerToolCall.Drag | ResponseComputerToolCall.Keypress | ResponseComputerToolCall.Move | ResponseComputerToolCall.Screenshot | ResponseComputerToolCall.Scroll | ResponseComputerToolCall.Type | ResponseComputerToolCall.Wait;
|
|
4
|
+
export declare function getScreenshot(page: Page): Promise<string>;
|
|
5
|
+
export declare function handleModelAction(page: Page, action: ComputerAction): Promise<string>;
|
|
6
|
+
export {};
|
|
7
|
+
//# sourceMappingURL=computer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"computer.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/computer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,wBAAwB,EAAE,MAAM,0CAA0C,CAAC;AACpF,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,KAAK,cAAc,GACf,wBAAwB,CAAC,KAAK,GAC9B,wBAAwB,CAAC,WAAW,GACpC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,QAAQ,GACjC,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,UAAU,GACnC,wBAAwB,CAAC,MAAM,GAC/B,wBAAwB,CAAC,IAAI,GAC7B,wBAAwB,CAAC,IAAI,CAAC;AAElC,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,mBAG7C;AAgCD,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,MAAM,CAAC,CA2HjB"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.handleModelAction = exports.getScreenshot = void 0;
|
|
4
|
+
async function getScreenshot(page) {
|
|
5
|
+
const screenshotBytes = await page.screenshot();
|
|
6
|
+
return Buffer.from(screenshotBytes).toString("base64");
|
|
7
|
+
}
|
|
8
|
+
exports.getScreenshot = getScreenshot;
|
|
9
|
+
const CUA_KEY_TO_PLAYWRIGHT_KEY = {
|
|
10
|
+
"/": "Divide",
|
|
11
|
+
"\\": "Backslash",
|
|
12
|
+
alt: "Alt",
|
|
13
|
+
arrowdown: "ArrowDown",
|
|
14
|
+
arrowleft: "ArrowLeft",
|
|
15
|
+
arrowright: "ArrowRight",
|
|
16
|
+
arrowup: "ArrowUp",
|
|
17
|
+
backspace: "Backspace",
|
|
18
|
+
capslock: "CapsLock",
|
|
19
|
+
// "cmd" and "ctrl" are both mapped to "ControlOrMeta" for platform
|
|
20
|
+
// agnostic behavior, as opposed to cmd: "Meta" and ctrl: "Control"
|
|
21
|
+
cmd: "ControlOrMeta",
|
|
22
|
+
ctrl: "ControlOrMeta",
|
|
23
|
+
delete: "Delete",
|
|
24
|
+
end: "End",
|
|
25
|
+
enter: "Enter",
|
|
26
|
+
esc: "Escape",
|
|
27
|
+
home: "Home",
|
|
28
|
+
insert: "Insert",
|
|
29
|
+
option: "Alt",
|
|
30
|
+
pagedown: "PageDown",
|
|
31
|
+
pageup: "PageUp",
|
|
32
|
+
shift: "Shift",
|
|
33
|
+
space: " ",
|
|
34
|
+
super: "Meta",
|
|
35
|
+
tab: "Tab",
|
|
36
|
+
win: "Meta",
|
|
37
|
+
};
|
|
38
|
+
async function handleModelAction(page, action) {
|
|
39
|
+
const actionType = action.type;
|
|
40
|
+
let actionCode = "";
|
|
41
|
+
try {
|
|
42
|
+
switch (actionType) {
|
|
43
|
+
case "click": {
|
|
44
|
+
const { x, y, button = "left" } = action;
|
|
45
|
+
console.log(`Action: click at (${x}, ${y}) with button '${button}'`);
|
|
46
|
+
let pwButton = undefined;
|
|
47
|
+
if (button === "left" || button === "right") {
|
|
48
|
+
pwButton = button;
|
|
49
|
+
}
|
|
50
|
+
else if (button === "wheel") {
|
|
51
|
+
pwButton = "middle";
|
|
52
|
+
}
|
|
53
|
+
if (pwButton) {
|
|
54
|
+
const locator = await page.evaluate(([x, y]) => {
|
|
55
|
+
const element = document.elementFromPoint(x, y);
|
|
56
|
+
return window.playwright.generateLocator(element);
|
|
57
|
+
}, [x, y]);
|
|
58
|
+
actionCode = `await page.${locator}.click();\n`;
|
|
59
|
+
await page.mouse.click(x, y, { button: pwButton });
|
|
60
|
+
}
|
|
61
|
+
if (button === "back" || button === "forward") {
|
|
62
|
+
// Do page navigations, since there is no way to click on the back/forward buttons
|
|
63
|
+
if (button === "back") {
|
|
64
|
+
await page.goBack();
|
|
65
|
+
}
|
|
66
|
+
else if (button === "forward") {
|
|
67
|
+
await page.goForward();
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
case "double_click": {
|
|
73
|
+
const { x, y } = action;
|
|
74
|
+
console.log(`Action: doubleclick at (${x}, ${y})`);
|
|
75
|
+
await page.mouse.dblclick(x, y, { button: "left" });
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
case "move": {
|
|
79
|
+
const { x, y } = action;
|
|
80
|
+
console.log(`Action: move to (${x}, ${y})`);
|
|
81
|
+
await page.mouse.move(x, y);
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
case "drag": {
|
|
85
|
+
const { path } = action;
|
|
86
|
+
console.log(`Action: drag along path ${path}`);
|
|
87
|
+
if (!path || path.length === 0) {
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
await page.mouse.move(path[0].x, path[0].y);
|
|
91
|
+
await page.mouse.down();
|
|
92
|
+
for (let i = 1; i < path.length; i++) {
|
|
93
|
+
await page.mouse.move(path[i].x, path[i].y);
|
|
94
|
+
}
|
|
95
|
+
await page.mouse.up();
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
case "scroll": {
|
|
99
|
+
const { x, y, scroll_x, scroll_y } = action;
|
|
100
|
+
console.log(`Action: scroll at (${x}, ${y}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y})`);
|
|
101
|
+
await page.mouse.move(x, y);
|
|
102
|
+
await page.evaluate(`window.scrollBy(${scroll_x}, ${scroll_y})`);
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
case "keypress": {
|
|
106
|
+
const { keys } = action;
|
|
107
|
+
const mappedKeys = keys.map((k) => {
|
|
108
|
+
return CUA_KEY_TO_PLAYWRIGHT_KEY[k.toLowerCase()] || k;
|
|
109
|
+
});
|
|
110
|
+
const mappedKey = mappedKeys.join("+"); // ["CTRL", "A"] becomes ControlOrMeta+A
|
|
111
|
+
console.log(`Action: keypress for keys ${keys} -> '${mappedKey}'`);
|
|
112
|
+
try {
|
|
113
|
+
await page.keyboard.press(mappedKey);
|
|
114
|
+
actionCode = `await page.keyboard.press('${mappedKey}');\n`;
|
|
115
|
+
}
|
|
116
|
+
catch (e) {
|
|
117
|
+
console.error("Error pressing key", mappedKey, ":", e);
|
|
118
|
+
}
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
case "type": {
|
|
122
|
+
const { text } = action;
|
|
123
|
+
console.log(`Action: type text '${text}'`);
|
|
124
|
+
await page.keyboard.type(text);
|
|
125
|
+
const locator = await page.evaluate(() => {
|
|
126
|
+
const element = document.activeElement;
|
|
127
|
+
return window.playwright.generateLocator(element);
|
|
128
|
+
});
|
|
129
|
+
actionCode = `await page.${locator}.fill("${text}");\n`;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
case "wait": {
|
|
133
|
+
console.log(`Action: wait`);
|
|
134
|
+
await page.waitForTimeout(2000);
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
case "screenshot": {
|
|
138
|
+
// Nothing to do as screenshot is taken at each turn
|
|
139
|
+
console.log(`Action: screenshot`);
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
default:
|
|
143
|
+
console.log("Unrecognized action:", action);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
catch (e) {
|
|
147
|
+
console.error("Error handling action", action, ":", e);
|
|
148
|
+
}
|
|
149
|
+
return actionCode;
|
|
150
|
+
}
|
|
151
|
+
exports.handleModelAction = handleModelAction;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Page } from "playwright";
|
|
2
|
+
export declare function startPlaywrightCodegen(page: Page): Promise<void>;
|
|
3
|
+
/**
|
|
4
|
+
* Run the loop that executes computer actions until no 'computer_call' is found.
|
|
5
|
+
*/
|
|
6
|
+
export declare function executeUsingComputerUseAgent({ page, task, }: {
|
|
7
|
+
page: Page;
|
|
8
|
+
task: string;
|
|
9
|
+
}): Promise<{
|
|
10
|
+
code: string;
|
|
11
|
+
importPaths: string[];
|
|
12
|
+
}>;
|
|
13
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAWlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED;;GAEG;AACH,wBAAsB,4BAA4B,CAAC,EACjD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB,CAAC,CAkGD"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.executeUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
|
|
7
|
+
const openai_1 = __importDefault(require("openai"));
|
|
8
|
+
const utils_1 = require("../browsing/utils");
|
|
9
|
+
const computer_1 = require("./computer");
|
|
10
|
+
const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
|
|
11
|
+
Don't ask the user for confirmations - just execute the actions.
|
|
12
|
+
|
|
13
|
+
For example, if the user message says "Click on Submit button", then
|
|
14
|
+
you click on the submit button -- even if it looks like a scary action.`;
|
|
15
|
+
async function startPlaywrightCodegen(page) {
|
|
16
|
+
// TODO: Use this method to offload code generation to Playwright
|
|
17
|
+
// Unclear how to retrieve source code that is generated
|
|
18
|
+
await page.evaluate(() => {
|
|
19
|
+
setTimeout(() => {
|
|
20
|
+
// First, we start recording
|
|
21
|
+
// @ts-ignore
|
|
22
|
+
console.log(window["__pw_recorderSetMode"]("recording"));
|
|
23
|
+
// Then, we will resume the effect of pause()
|
|
24
|
+
// @ts-ignore
|
|
25
|
+
console.log(window["__pw_resume"]());
|
|
26
|
+
// Then, we remove highlights that Playwright shows on the screen
|
|
27
|
+
// @ts-ignore
|
|
28
|
+
const glassPane = document.querySelector("x-pw-glass");
|
|
29
|
+
if (glassPane) {
|
|
30
|
+
glassPane.remove();
|
|
31
|
+
}
|
|
32
|
+
}, 3000);
|
|
33
|
+
});
|
|
34
|
+
await page.pause();
|
|
35
|
+
}
|
|
36
|
+
exports.startPlaywrightCodegen = startPlaywrightCodegen;
|
|
37
|
+
/**
|
|
38
|
+
* Run the loop that executes computer actions until no 'computer_call' is found.
|
|
39
|
+
*/
|
|
40
|
+
async function executeUsingComputerUseAgent({ page, task, }) {
|
|
41
|
+
let generatedCode = "";
|
|
42
|
+
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
43
|
+
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
44
|
+
const viewport = page.viewportSize();
|
|
45
|
+
let screenWidth = viewport?.width || 1280;
|
|
46
|
+
let screenHeight = viewport?.height || 720;
|
|
47
|
+
const openai = new openai_1.default();
|
|
48
|
+
let response = await openai.responses.create({
|
|
49
|
+
model: "computer-use-preview",
|
|
50
|
+
tools: [
|
|
51
|
+
{
|
|
52
|
+
type: "computer-preview",
|
|
53
|
+
display_width: screenWidth,
|
|
54
|
+
display_height: screenHeight,
|
|
55
|
+
environment: "browser",
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
instructions: INSTRUCTIONS,
|
|
59
|
+
input: [
|
|
60
|
+
{
|
|
61
|
+
role: "user",
|
|
62
|
+
content: [
|
|
63
|
+
{
|
|
64
|
+
type: "input_text",
|
|
65
|
+
text: task,
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
type: "input_image",
|
|
69
|
+
image_url: `data:image/png;base64,${screenshotBytes}`,
|
|
70
|
+
detail: "high",
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
truncation: "auto",
|
|
76
|
+
});
|
|
77
|
+
// eslint-disable-next-line no-constant-condition
|
|
78
|
+
while (true) {
|
|
79
|
+
const computerCalls = response.output.filter((item) => item.type === "computer_call");
|
|
80
|
+
if (computerCalls.length === 0) {
|
|
81
|
+
console.log("No computer call found. Output from model:");
|
|
82
|
+
/**
|
|
83
|
+
* TODO: Sometimes the mdoel will ask for a user confirmation - handle this flow
|
|
84
|
+
* item.type is "message", status is "completed", item.content.type is "output_text"
|
|
85
|
+
*/
|
|
86
|
+
response.output.forEach((item) => {
|
|
87
|
+
console.log(JSON.stringify(item, null, 2));
|
|
88
|
+
});
|
|
89
|
+
break; // Exit when no computer calls are issued.
|
|
90
|
+
}
|
|
91
|
+
// We expect at most one computer call per response.
|
|
92
|
+
const computerCall = computerCalls[0];
|
|
93
|
+
const lastCallId = computerCall.call_id;
|
|
94
|
+
const action = computerCall.action;
|
|
95
|
+
// Execute the action (function defined in step 3)
|
|
96
|
+
const actionCode = await (0, computer_1.handleModelAction)(page, action);
|
|
97
|
+
generatedCode += actionCode;
|
|
98
|
+
await new Promise((resolve) => setTimeout(resolve, 1000)); // Allow time for changes to take effect.
|
|
99
|
+
// Take a screenshot after the action (function defined in step 4)
|
|
100
|
+
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
101
|
+
// Send the screenshot back as a computer_call_output
|
|
102
|
+
response = await openai.responses.create({
|
|
103
|
+
model: "computer-use-preview",
|
|
104
|
+
previous_response_id: response.id,
|
|
105
|
+
tools: [
|
|
106
|
+
{
|
|
107
|
+
type: "computer-preview",
|
|
108
|
+
display_width: screenWidth,
|
|
109
|
+
display_height: screenHeight,
|
|
110
|
+
environment: "browser",
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
input: [
|
|
114
|
+
{
|
|
115
|
+
call_id: lastCallId,
|
|
116
|
+
type: "computer_call_output",
|
|
117
|
+
output: {
|
|
118
|
+
type: "computer_screenshot",
|
|
119
|
+
image_url: `data:image/png;base64,${screenshotBytes}`,
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
],
|
|
123
|
+
truncation: "auto",
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
return {
|
|
127
|
+
code: generatedCode,
|
|
128
|
+
// TODO: Does not support skills, so import paths are empty
|
|
129
|
+
importPaths: [],
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
exports.executeUsingComputerUseAgent = executeUsingComputerUseAgent;
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { TestErrorDiagnosisDetails } from "@empiricalrun/shared-types";
|
|
2
|
+
import OpenAI from "openai";
|
|
2
3
|
export declare function fixStrictModeViolationPrompt({ screenshotsData, diagnosis, }: {
|
|
3
4
|
screenshotsData: {
|
|
4
5
|
success: string[];
|
|
5
6
|
failure: string[];
|
|
6
7
|
};
|
|
7
8
|
diagnosis: TestErrorDiagnosisDetails;
|
|
8
|
-
}):
|
|
9
|
+
}): OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
9
10
|
//# sourceMappingURL=strict-mode-violation.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"strict-mode-violation.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/strict-mode-violation.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"strict-mode-violation.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/strict-mode-violation.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,MAAM,MAAM,QAAQ,CAAC;AAoB5B,wBAAgB,4BAA4B,CAAC,EAC3C,eAAe,EACf,SAAS,GACV,EAAE;IACD,eAAe,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAC1D,SAAS,EAAE,yBAAyB,CAAC;CACtC,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAiBvD"}
|
|
@@ -49,7 +49,7 @@ click on maverick inside ford dropdown`,
|
|
|
49
49
|
(0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Ford.+.click/))).toBeTruthy();
|
|
50
50
|
(0, fixtures_1.expect)(lines.find((l) => l.match(/^await page.+Maverick.+.click/))).toBeTruthy();
|
|
51
51
|
});
|
|
52
|
-
(0, fixtures_1.test)("agent can click icons accurately", async ({ page, server }) => {
|
|
52
|
+
(0, fixtures_1.test)("master agent can click icons accurately", async ({ page, server }) => {
|
|
53
53
|
await page.goto(`${server.baseURL}/icons-navbar.html`);
|
|
54
54
|
await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
|
|
55
55
|
const response = await (0, run_1.createTestUsingMasterAgent)({
|
|
@@ -72,6 +72,20 @@ click on maverick inside ford dropdown`,
|
|
|
72
72
|
// expect(icons.length).toBe(4); // 1 for each unique icon
|
|
73
73
|
fs_1.default.unlinkSync(iconsRegistryFile);
|
|
74
74
|
});
|
|
75
|
+
(0, fixtures_1.test)("cua agent can click icons accurately", async ({ page, server }) => {
|
|
76
|
+
await page.goto(`${server.baseURL}/icons-navbar.html`);
|
|
77
|
+
await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
|
|
78
|
+
const response = await (0, run_1.executeUsingComputerUseAgent)({
|
|
79
|
+
task: `click on the gear icon`,
|
|
80
|
+
page,
|
|
81
|
+
});
|
|
82
|
+
// Validate code generated and action performed
|
|
83
|
+
await (0, fixtures_1.expect)(page.getByText("you clicked Gear")).toBeVisible();
|
|
84
|
+
console.log(response);
|
|
85
|
+
(0, fixtures_1.expect)(response.importPaths.length).toBe(0);
|
|
86
|
+
(0, fixtures_1.expect)(response.code).toContain("page.locator");
|
|
87
|
+
(0, fixtures_1.expect)(response.code).toContain("click()");
|
|
88
|
+
});
|
|
75
89
|
(0, fixtures_1.test)("annotate and enrich annotations correctly", async ({ page, server }) => {
|
|
76
90
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
77
91
|
await page.goto(`${server.baseURL}/iframe-elements.html`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA2DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA8C9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAgBF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAoFD"}
|
|
@@ -4,7 +4,6 @@ exports.getAnnotationKeys = exports.getElementAnnotation = void 0;
|
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const constants_1 = require("../../constants");
|
|
6
6
|
const promptTemplate_0 = "{{#section \"system\"}}\nYou are an expert in describing the images and it's content. You need to provide the descriptions of annotated elements\npresent in the image.\n\nYou will be provided with an annotated screenshot where interact-able / clickable elements are annotated. The annotation\nis done by drawing a red box around the element and a small yellow box on it which contains unique element id.\n\nYou are given a Annotations which contains list of unique element id and description of the element separated by \":\".\n\nYou are also given the description of the element on which the action needs to be taken. The description includes\ninformation about how the element looks, it's position etc.\n\nYour task is to provide the annotation of the element on which the action needs to be performed based on the element\ndescription.\n\nFollow steps to fulfil your task:\n- Using the list of all element Ids provided to you, map all the element Ids on the annotated screen and describe each\nelement.\n- For describing each element Id\n-- iterate over each element Id in annotation list\n-- check if the description is already present for the element Id in the Annotation provided to you. If present skip\ndescribing it and use it as is.\n-- if the description is NA, then identify the element in the annotated screenshot and describe it using the image or\nicon enclosed in the element.\n- Respond with the mapped element Ids as \"enriched_annotations\"\n- Based on the description provided to you and the enriched annotations, first identify the element Id whose description\nmatches the task provided\n\nNote:\n- Ensure providing the description of all the elements in the list.\n- Don't update the description if its already present in the given annotations\n- Replace all the \"NA\" with description of the element. Its position, how does it look like etc.\n- There should be no \"NA\" present in any of the element description\n{{/section}}\n\n{{#section \"user\"}}\nElement description:\n{{elementDescription}}\n\nAnnotations:\n{{annotations}}\n\n{{image annotatedScreenshot}}\n{{/section}}";
|
|
7
|
-
const utils_1 = require("../utils");
|
|
8
7
|
const icon_descriptor_1 = require("./icon-descriptor");
|
|
9
8
|
const annotationToolAction = {
|
|
10
9
|
name: "element_annotation",
|
|
@@ -94,7 +93,7 @@ async function getElementAnnotation({ elementDescription, annotations, annotated
|
|
|
94
93
|
const toolCall = completion?.tool_calls?.[0];
|
|
95
94
|
annotationsSpan?.end({ output: toolCall });
|
|
96
95
|
if (toolCall) {
|
|
97
|
-
const args =
|
|
96
|
+
const args = JSON.parse(toolCall.function.arguments);
|
|
98
97
|
return args.element_annotation;
|
|
99
98
|
}
|
|
100
99
|
return;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execute-browser-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/execute-browser-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"execute-browser-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/execute-browser-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAWlD,wBAAsB,oBAAoB,CAAC,EACzC,IAAI,EACJ,UAAU,EACV,KAAK,EACL,OAAO,EACP,GAAG,EACH,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,EAAE;QACL,4BAA4B,EAAE,OAAO,CAAC;KACvC,CAAC;IACF,OAAO,EAAE,iBAAiB,CAAC;IAC3B,KAAK,EAAE,WAAW,GAAG,SAAS,CAAC;IAC/B,GAAG,EAAE,GAAG,CAAC;CACV;;;gBAIW,MAAM;gBACN,MAAM;;;GAkIjB"}
|
|
@@ -3,12 +3,11 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.executeBrowserAction = void 0;
|
|
4
4
|
const reporter_1 = require("../../reporter");
|
|
5
5
|
const browsing_1 = require("../browsing");
|
|
6
|
-
const utils_1 = require("../utils");
|
|
7
6
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
8
7
|
const element_annotation_1 = require("./element-annotation");
|
|
9
8
|
const with_hints_1 = require("./with-hints");
|
|
10
9
|
async function executeBrowserAction({ page, nextAction, flags, actions, llm, trace, }) {
|
|
11
|
-
const args =
|
|
10
|
+
const args = JSON.parse(nextAction.toolCallArgs);
|
|
12
11
|
let generatedCodeSteps = [];
|
|
13
12
|
let output = {
|
|
14
13
|
action: args.action,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execute-skill-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/execute-skill-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"execute-skill-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/execute-skill-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAMlD,wBAAsB,kBAAkB,CAAC,EACvC,OAAO,EACP,UAAU,EACV,KAAK,GACN,EAAE;IACD,OAAO,EAAE,iBAAiB,CAAC;IAC3B,UAAU,EAAE;QACV,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,EAAE,WAAW,GAAG,SAAS,CAAC;CAChC,+BAeA"}
|
|
@@ -5,9 +5,8 @@ const skill_1 = require("../../actions/skill");
|
|
|
5
5
|
const errors_1 = require("../../errors");
|
|
6
6
|
const human_in_the_loop_1 = require("../../human-in-the-loop");
|
|
7
7
|
const env_1 = require("../../utils/env");
|
|
8
|
-
const utils_1 = require("../utils");
|
|
9
8
|
async function executeSkillAction({ actions, nextAction, trace, }) {
|
|
10
|
-
const args =
|
|
9
|
+
const args = JSON.parse(nextAction.toolCallArgs);
|
|
11
10
|
if (!(0, env_1.isRunningOnCloud)()) {
|
|
12
11
|
const { skillDetails } = (0, skill_1.extractSkillFromArgs)(args);
|
|
13
12
|
const feedback = await human_in_the_loop_1.humanLoop.getFeedback({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAElD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"next-action.d.ts","sourceRoot":"","sources":["../../../src/agent/master/next-action.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAElD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAUhD,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,cAAc,EACd,IAAI,EACJ,eAAe,EACf,aAAa,EACb,KAAK,EACL,GAAG,EACH,OAAO,EACP,OAAO,EACP,aAAa,EACb,MAAM,GACP,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACxC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CACP;IACE,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;CACtB,GACD,SAAS,CACZ,CA8FA"}
|
|
@@ -5,7 +5,6 @@ const llm_1 = require("@empiricalrun/llm");
|
|
|
5
5
|
const skill_1 = require("../../actions/skill");
|
|
6
6
|
const constants_1 = require("../../constants");
|
|
7
7
|
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a web automation tool which is given a task to complete. You need to execute the\ntask provided to you with the help of web page screenshot, a browser automation tool or skills\nwhich are learnt while writing previous tests. \n \nBrowser automation tool is a tool which uses Playwright and browser to execute action using\nnext_action tool call.\nSkill usage is a tool which helps to execute previously known pieces of code to achieve a task.\n\nYou will be provided with a screenshot of the webpage which you will use to extract the action\nthat needs to be taken.\n\nYou will be provided with previously executed actions by the browser automation tool and based\non the current screenshot and previously executed actions, you need to predict the next action\nto be taken.\n\nYou will also be provided with failed next action predicted by you, so that you can avoid\nsuggesting the same action again - which failed.\n\nThe next action should be as atomic as possible. e.g: scroll, click on an element, fill an input\nelement, assert, extract text from an element are valid next action as they are atomic in nature.\n\nYou also need to provide the action type using the list below, action type which is not present in\nthe list is invalid: {{validActionTypes}}\n\nYou will also be provided with skill usage tool which you can use to execute action. These skills\nare compound functions which helps you to complete your action.\n\nYou need to respond with either:\n- Next action to be taken by a browser automation tool \n- Use previously learnt skills in the form of tool call.\n \nYou need to make a decision whether the given skill can be reused if \"YES\" respond with the\nskill else respond with the next action.\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\n-----\n\nPrevious executed actions:\n{{executedActions}}\n\n-----\n\nPrevious failed actions:\n{{failedActions}}\n\n-----\n\nYou are also provided with a page screenshot for you to decide the next action.\n\nCurrent page URL: {{pageUrl}}\n\nFollow the instructions before responding:\n- Divide the task into sub tasks\n- Using previously executed actions, identify tasks are complete and which tasks needs to be executed next.\n- You will be provided a skill usage action, if the testStep matches the next action then respond with the skill usage.\n- If responding with next action, ensure next action to be detailed and explicit about what action needs to be done. Provide all the information which can be extracted from the screenshot as a part of next action.\n- Mark task as complete only when executed actions provided to you indicates that the task is done.\n- Refer to the text and references available in the screenshot to create the next action.\n- Do not take any extra actions which are not required for the execution of the task\n- If there are no further actions required based on the task, then respond with task as done.\n- Do not recommend actions which are not available in the screenshot\n\nScreenshots:\n{{images pageScreenshots}}\n\n{{/section}}\n";
|
|
8
|
-
const utils_1 = require("../utils");
|
|
9
8
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
10
9
|
const scroller_1 = require("./scroller");
|
|
11
10
|
async function getNextAction({ page, pageScreenshot, task, executedActions, failedActions, trace, llm, options, actions, disableSkills, logger, }) {
|
|
@@ -33,7 +32,7 @@ async function getNextAction({ page, pageScreenshot, task, executedActions, fail
|
|
|
33
32
|
});
|
|
34
33
|
const actionSchemas = disableSkills || skill_1.testCaseSkills.getAvailableSkills().length === 0
|
|
35
34
|
? []
|
|
36
|
-
: actions.
|
|
35
|
+
: actions.getSkillsActionSchemas();
|
|
37
36
|
const tools = [...(0, action_tool_calls_1.getActionToolCalls)(), ...actionSchemas];
|
|
38
37
|
llm =
|
|
39
38
|
llm ||
|
|
@@ -57,7 +56,7 @@ async function getNextAction({ page, pageScreenshot, task, executedActions, fail
|
|
|
57
56
|
const toolCall = completion?.tool_calls?.[0];
|
|
58
57
|
nextActionSpan?.end({ output: toolCall });
|
|
59
58
|
if (toolCall) {
|
|
60
|
-
const toolCallArgs =
|
|
59
|
+
const toolCallArgs = JSON.parse(toolCall.function.arguments);
|
|
61
60
|
const actionType = toolCall.function.name;
|
|
62
61
|
// If the action type is scroll, we need to scroll the page and get the reference to the frame in which the element is visible
|
|
63
62
|
// else we return the next action
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"planner.d.ts","sourceRoot":"","sources":["../../../src/agent/master/planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"planner.d.ts","sourceRoot":"","sources":["../../../src/agent/master/planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAIvC,wBAAsB,4BAA4B,CAAC,EACjD,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,IAAI,EACJ,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,IAAI,EAAE,IAAI,CAAC;IACX,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAwIA"}
|
|
@@ -4,7 +4,6 @@ exports.runtimePlannerWithScreenshot = void 0;
|
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
6
|
const constants_1 = require("../../constants");
|
|
7
|
-
const utils_1 = require("../utils");
|
|
8
7
|
async function runtimePlannerWithScreenshot({ trace, task, conversation, pages, page, currentPage, }) {
|
|
9
8
|
const buffer = await page.screenshot({
|
|
10
9
|
//This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
|
|
@@ -120,7 +119,7 @@ async function runtimePlannerWithScreenshot({ trace, task, conversation, pages,
|
|
|
120
119
|
});
|
|
121
120
|
const toolCallResp = (response?.tool_calls || [])[0];
|
|
122
121
|
if (toolCallResp) {
|
|
123
|
-
const toolCall =
|
|
122
|
+
const toolCall = JSON.parse(toolCallResp.function.arguments);
|
|
124
123
|
const output = {
|
|
125
124
|
pageName: toolCall.pageName,
|
|
126
125
|
isDone: toolCall.isDone,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
2
2
|
import { Page } from "playwright";
|
|
3
3
|
import { ScopeVars } from "../../types";
|
|
4
|
+
export { executeUsingComputerUseAgent } from "../cua";
|
|
4
5
|
export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
|
|
5
6
|
task: string;
|
|
6
7
|
page: Page;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,4BAA4B,EAAE,MAAM,QAAQ,CAAC;AAuBtD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.createTestUsingMasterAgent = void 0;
|
|
3
|
+
exports.createTestUsingMasterAgent = exports.executeUsingComputerUseAgent = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const actions_1 = require("../../actions");
|
|
6
6
|
const skill_1 = require("../../actions/skill");
|
|
@@ -15,11 +15,12 @@ const utils_2 = require("../browsing/utils");
|
|
|
15
15
|
const skills_retriever_1 = require("../codegen/skills-retriever");
|
|
16
16
|
const run_1 = require("../planner/run");
|
|
17
17
|
const run_time_planner_1 = require("../planner/run-time-planner");
|
|
18
|
-
const utils_3 = require("../utils");
|
|
19
18
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
20
19
|
const execute_browser_action_1 = require("./execute-browser-action");
|
|
21
20
|
const execute_skill_action_1 = require("./execute-skill-action");
|
|
22
21
|
const next_action_1 = require("./next-action");
|
|
22
|
+
var cua_1 = require("../cua");
|
|
23
|
+
Object.defineProperty(exports, "executeUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.executeUsingComputerUseAgent; } });
|
|
23
24
|
const MAX_ERROR_COUNT = 2;
|
|
24
25
|
function getPageVariables(stateVariables) {
|
|
25
26
|
const keys = Object.keys(stateVariables);
|
|
@@ -162,7 +163,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
162
163
|
await testgenUpdatesReporter.sendMessage("Agent is not able to figure out next action since element is not visible on screen.");
|
|
163
164
|
break;
|
|
164
165
|
}
|
|
165
|
-
const args =
|
|
166
|
+
const args = JSON.parse(nextAction.toolCallArgs);
|
|
166
167
|
const masterAgentActionSpan = masterAgentSpan?.span({
|
|
167
168
|
name: "master-agent-execute-action",
|
|
168
169
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scroller.d.ts","sourceRoot":"","sources":["../../../src/agent/master/scroller.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"scroller.d.ts","sourceRoot":"","sources":["../../../src/agent/master/scroller.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAahD,MAAM,MAAM,cAAc,GAAG;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AA2ZF,wBAAsB,QAAQ,CAAC,EAC7B,kBAAkB,EAClB,IAAI,EACJ,KAAK,EACL,cAAc,EACd,MAAM,GACP,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA6D5B"}
|
|
@@ -5,7 +5,6 @@ const llm_1 = require("@empiricalrun/llm");
|
|
|
5
5
|
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
6
|
const constants_1 = require("../../constants");
|
|
7
7
|
const reporter_1 = require("../../reporter");
|
|
8
|
-
const utils_1 = require("../utils");
|
|
9
8
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
10
9
|
const element_annotation_1 = require("./element-annotation");
|
|
11
10
|
let usedAnnotations = [];
|
|
@@ -165,7 +164,7 @@ Follow the instructions before responding:
|
|
|
165
164
|
const toolCall = completion?.tool_calls?.[0];
|
|
166
165
|
scrollSpan?.end({ output: toolCall });
|
|
167
166
|
if (toolCall) {
|
|
168
|
-
const args =
|
|
167
|
+
const args = JSON.parse(toolCall.function.arguments);
|
|
169
168
|
isVisible = args.is_visible || false;
|
|
170
169
|
}
|
|
171
170
|
else {
|
|
@@ -303,7 +302,7 @@ ${annotationKeysString}`,
|
|
|
303
302
|
const toolCall = completion?.tool_calls?.[0];
|
|
304
303
|
annotationsSpan?.end({ output: toolCall });
|
|
305
304
|
if (toolCall) {
|
|
306
|
-
const args =
|
|
305
|
+
const args = JSON.parse(toolCall.function.arguments);
|
|
307
306
|
const isAnnotationPresentInKeys = annotationKeys.some((annotation) => annotation.elementID === args.element_annotation);
|
|
308
307
|
if (args.element_annotation !== "NA" && isAnnotationPresentInKeys) {
|
|
309
308
|
usedAnnotations.push(args.element_annotation);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvC,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC5B,WAAW,EAAE,WAAW,CAAC;CAC1B;;;;GA+FA"}
|
|
@@ -3,7 +3,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.runtimePlanner = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const promptTemplate_0 = "{{#section \"system\"}}\nYou are given a list of successfully executed actions that are done towards completing a task (which\nis also provided to you). Your goal is to analyse the list and determine if the task is completed.\n\nIf the task is not fully completed, identify which specific actions are missing\nand suggest next steps to complete the task. Assume that the conversation provided\nis entirely truthful and no additional actions were performed beyond those listed.\n\nThese actions were executed by AI agents using Playwright on a browser. These agents\nalready have access to browser tabs to execute actions. If there is a pending action,\none of the agents will execute it in the browser. However, they need your help to\nchoose which browser tab (= page) to use for the next action.\n\nTo fulfil your goal, follow these steps:\n- Divide the task into individual actions.\n- Compare each task action against the actions listed in the successfully executed actions list.\n- Identify which actions have been executed and which have not.\n- If all actions are executed, respond with the task as done.\n- If any actions are missing, respond with the task as not done, listing all actions\n and specifying which are complete and which are missing.\n- If provided with list of pages, based on the next pending action and previously executed\n action, identify the page on which next action needs to be taken\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\n----\n\nSuccessfully executed actions:\n{{successfulActions}}\n\n----\n\nList of pages with their current URLs:\n{{pagesSummary}}\n\n\n{{/section}}\n";
|
|
6
|
-
const utils_1 = require("../utils");
|
|
7
6
|
async function runtimePlanner({ trace, task, successfulActions, pages, currentPage, }) {
|
|
8
7
|
const runTimePlannerSpan = trace?.span({
|
|
9
8
|
name: "runtime-planner",
|
|
@@ -76,7 +75,7 @@ async function runtimePlanner({ trace, task, successfulActions, pages, currentPa
|
|
|
76
75
|
});
|
|
77
76
|
const toolCallResp = (response?.tool_calls || [])[0];
|
|
78
77
|
if (toolCallResp) {
|
|
79
|
-
const toolCall =
|
|
78
|
+
const toolCall = JSON.parse(toolCallResp.function.arguments);
|
|
80
79
|
const output = {
|
|
81
80
|
pageName: toolCall.pageName,
|
|
82
81
|
isDone: toolCall.isDone,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.47.
|
|
3
|
+
"version": "0.47.3",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
"mime": "^4.0.4",
|
|
65
65
|
"minimatch": "^10.0.1",
|
|
66
66
|
"nanoid": "^5.0.7",
|
|
67
|
-
"openai": "4.
|
|
67
|
+
"openai": "4.87.3",
|
|
68
68
|
"picocolors": "^1.0.1",
|
|
69
69
|
"prettier": "^3.2.5",
|
|
70
70
|
"remove-markdown": "^0.5.5",
|
|
@@ -73,9 +73,9 @@
|
|
|
73
73
|
"ts-morph": "^23.0.0",
|
|
74
74
|
"tsx": "^4.16.2",
|
|
75
75
|
"typescript": "^5.3.3",
|
|
76
|
-
"@empiricalrun/
|
|
76
|
+
"@empiricalrun/llm": "^0.9.36",
|
|
77
77
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
78
|
-
"@empiricalrun/
|
|
78
|
+
"@empiricalrun/reporter": "^0.23.1"
|
|
79
79
|
},
|
|
80
80
|
"devDependencies": {
|
|
81
81
|
"@playwright/test": "1.47.1",
|
|
@@ -91,7 +91,7 @@
|
|
|
91
91
|
"js-levenshtein": "^1.1.6",
|
|
92
92
|
"playwright": "1.47.1",
|
|
93
93
|
"ts-patch": "^3.3.0",
|
|
94
|
-
"@empiricalrun/shared-types": "0.0.
|
|
94
|
+
"@empiricalrun/shared-types": "0.0.6"
|
|
95
95
|
},
|
|
96
96
|
"scripts": {
|
|
97
97
|
"dev": "tspc --build --watch",
|
package/dist/agent/utils.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/agent/utils.ts"],"names":[],"mappings":"AAAA,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,OAMrC"}
|
package/dist/agent/utils.js
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.parseJson = void 0;
|
|
4
|
-
function parseJson(args) {
|
|
5
|
-
try {
|
|
6
|
-
return JSON.parse(args);
|
|
7
|
-
}
|
|
8
|
-
catch (e) {
|
|
9
|
-
console.error(`Failed to parse JSON with args ${args}`, e);
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
exports.parseJson = parseJson;
|