@empiricalrun/test-gen 0.53.4 → 0.53.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import { TraceClient } from "@empiricalrun/llm";
|
|
1
2
|
import { Page } from "playwright";
|
|
2
3
|
export declare function startPlaywrightCodegen(page: Page): Promise<void>;
|
|
3
|
-
export declare function createTestUsingComputerUseAgent({ page, task, }: {
|
|
4
|
+
export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
|
|
4
5
|
page: Page;
|
|
5
6
|
task: string;
|
|
7
|
+
trace?: TraceClient;
|
|
6
8
|
}): Promise<{
|
|
7
9
|
code: string;
|
|
8
10
|
importPaths: string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAS/D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAMlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAqLD"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -5,8 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
|
-
const
|
|
9
|
-
const logger_1 = require("../../bin/logger");
|
|
8
|
+
const openai_1 = __importDefault(require("openai"));
|
|
10
9
|
const utils_1 = require("../browsing/utils");
|
|
11
10
|
const computer_1 = require("./computer");
|
|
12
11
|
const model_1 = require("./model");
|
|
@@ -32,25 +31,18 @@ async function startPlaywrightCodegen(page) {
|
|
|
32
31
|
await page.pause();
|
|
33
32
|
}
|
|
34
33
|
exports.startPlaywrightCodegen = startPlaywrightCodegen;
|
|
35
|
-
async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
34
|
+
async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
36
35
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
37
36
|
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
38
37
|
const viewport = page.viewportSize();
|
|
39
38
|
let screenWidth = viewport?.width || 1280;
|
|
40
39
|
let screenHeight = viewport?.height || 720;
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if (trace) {
|
|
48
|
-
const traceUrl = trace.getTraceUrl();
|
|
49
|
-
logger.log(`Starting computer use agent: ${traceUrl}`);
|
|
50
|
-
}
|
|
51
|
-
const span = trace?.span({
|
|
52
|
-
name: "initial-model-call",
|
|
53
|
-
});
|
|
40
|
+
const openAIClient = trace
|
|
41
|
+
? (0, llm_1.observeOpenAI)(new openai_1.default(), {
|
|
42
|
+
generationName: `computer-use-agent`,
|
|
43
|
+
parent: trace,
|
|
44
|
+
})
|
|
45
|
+
: new openai_1.default();
|
|
54
46
|
let response = await (0, model_1.callComputerUseModel)({
|
|
55
47
|
input: [
|
|
56
48
|
{
|
|
@@ -70,8 +62,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
|
70
62
|
],
|
|
71
63
|
screenWidth,
|
|
72
64
|
screenHeight,
|
|
65
|
+
openAIClient,
|
|
73
66
|
});
|
|
74
|
-
span?.end({ output: response });
|
|
75
67
|
let isTaskDone = false;
|
|
76
68
|
let maxIterations = 15;
|
|
77
69
|
let generatedCode = "";
|
|
@@ -80,10 +72,6 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
|
80
72
|
while (!isTaskDone && iterationIndex < maxIterations) {
|
|
81
73
|
actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
|
|
82
74
|
iterationIndex++;
|
|
83
|
-
const iterationSpan = trace?.span({
|
|
84
|
-
name: `iteration-${iterationIndex}`,
|
|
85
|
-
input: { response },
|
|
86
|
-
});
|
|
87
75
|
const computerCalls = response.output.filter((item) => item.type === "computer_call");
|
|
88
76
|
const functionCalls = response.output.filter((item) => item.type === "function_call");
|
|
89
77
|
if (computerCalls.length === 0 && functionCalls.length === 0) {
|
|
@@ -174,8 +162,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
|
174
162
|
],
|
|
175
163
|
screenWidth,
|
|
176
164
|
screenHeight,
|
|
165
|
+
openAIClient,
|
|
177
166
|
});
|
|
178
|
-
iterationSpan?.end({ output: response });
|
|
179
167
|
}
|
|
180
168
|
if (!isTaskDone) {
|
|
181
169
|
actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
1
2
|
import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
|
|
2
|
-
export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
|
|
3
|
+
export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }: {
|
|
3
4
|
input: ResponseInputItem[];
|
|
4
5
|
previousResponseId?: string;
|
|
5
6
|
screenWidth: number;
|
|
6
7
|
screenHeight: number;
|
|
8
|
+
openAIClient: OpenAI;
|
|
7
9
|
}): Promise<Response>;
|
|
8
10
|
//# sourceMappingURL=model.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
|
package/dist/agent/cua/model.js
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.callComputerUseModel = void 0;
|
|
7
|
-
const openai_1 = __importDefault(require("openai"));
|
|
8
4
|
const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
|
|
9
5
|
Don't ask the user for confirmations - just execute the actions.
|
|
10
6
|
|
|
@@ -30,9 +26,8 @@ const pageGotoTool = {
|
|
|
30
26
|
},
|
|
31
27
|
strict: true,
|
|
32
28
|
};
|
|
33
|
-
async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
|
|
34
|
-
const
|
|
35
|
-
return await openai.responses.create({
|
|
29
|
+
async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }) {
|
|
30
|
+
const response = await openAIClient.responses.create({
|
|
36
31
|
model: "computer-use-preview-2025-03-11",
|
|
37
32
|
previous_response_id: previousResponseId,
|
|
38
33
|
parallel_tool_calls: false,
|
|
@@ -53,5 +48,6 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
|
|
|
53
48
|
input,
|
|
54
49
|
truncation: "auto",
|
|
55
50
|
});
|
|
51
|
+
return response;
|
|
56
52
|
}
|
|
57
53
|
exports.callComputerUseModel = callComputerUseModel;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.53.
|
|
3
|
+
"version": "0.53.5",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -68,7 +68,7 @@
|
|
|
68
68
|
"tsx": "^4.16.2",
|
|
69
69
|
"typescript": "^5.3.3",
|
|
70
70
|
"zod": "^3.23.8",
|
|
71
|
-
"@empiricalrun/llm": "^0.14.
|
|
71
|
+
"@empiricalrun/llm": "^0.14.4",
|
|
72
72
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
73
73
|
"@empiricalrun/test-run": "^0.7.6"
|
|
74
74
|
},
|