@empiricalrun/test-gen 0.64.3 → 0.66.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +49 -0
- package/dist/agent/browsing/run.d.ts +6 -4
- package/dist/agent/browsing/run.d.ts.map +1 -1
- package/dist/agent/browsing/run.js +10 -9
- package/dist/agent/chat/agent-loop.d.ts +2 -1
- package/dist/agent/chat/agent-loop.d.ts.map +1 -1
- package/dist/agent/chat/exports.d.ts +2 -2
- package/dist/agent/chat/exports.d.ts.map +1 -1
- package/dist/agent/chat/index.d.ts +2 -3
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +23 -5
- package/dist/agent/chat/models.d.ts +1 -1
- package/dist/agent/chat/models.d.ts.map +1 -1
- package/dist/agent/chat/state.d.ts +2 -2
- package/dist/agent/chat/state.d.ts.map +1 -1
- package/dist/agent/chat/utils.d.ts +2 -1
- package/dist/agent/chat/utils.d.ts.map +1 -1
- package/dist/agent/cua/computer.js +1 -1
- package/dist/agent/cua/index.d.ts +10 -3
- package/dist/agent/cua/index.d.ts.map +1 -1
- package/dist/agent/cua/index.js +71 -34
- package/dist/agent/cua/model.d.ts +7 -0
- package/dist/agent/cua/model.d.ts.map +1 -1
- package/dist/agent/cua/model.js +10 -0
- package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts +7 -5
- package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts.map +1 -1
- package/dist/agent/cua/pw-codegen/pw-pause/index.js +57 -29
- package/dist/agent/cua/pw-codegen/pw-pause/ipc.d.ts +3 -0
- package/dist/agent/cua/pw-codegen/pw-pause/ipc.d.ts.map +1 -0
- package/dist/agent/cua/pw-codegen/pw-pause/ipc.js +13 -0
- package/dist/agent/cua/pw-codegen/pw-pause/{utils.d.ts → patch.d.ts} +4 -11
- package/dist/agent/cua/pw-codegen/pw-pause/patch.d.ts.map +1 -0
- package/dist/agent/cua/pw-codegen/pw-pause/{utils.js → patch.js} +49 -3
- package/dist/bin/index.js +3 -5
- package/dist/bin/utils/index.d.ts +1 -2
- package/dist/bin/utils/index.d.ts.map +1 -1
- package/dist/bin/utils/index.js +5 -3
- package/dist/file/client.d.ts +2 -8
- package/dist/file/client.d.ts.map +1 -1
- package/dist/file/client.js +2 -23
- package/dist/file/server.d.ts +15 -3
- package/dist/file/server.d.ts.map +1 -1
- package/dist/file/server.js +17 -28
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -13
- package/dist/tool-call-service/index.d.ts +3 -3
- package/dist/tool-call-service/index.d.ts.map +1 -1
- package/dist/tool-call-service/index.js +6 -3
- package/dist/tool-call-service/utils.d.ts +4 -10
- package/dist/tool-call-service/utils.d.ts.map +1 -1
- package/dist/tool-call-service/utils.js +28 -5
- package/dist/tools/commit-and-create-pr.d.ts +0 -6
- package/dist/tools/commit-and-create-pr.d.ts.map +1 -1
- package/dist/tools/commit-and-create-pr.js +21 -40
- package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
- package/dist/tools/diagnosis-fetcher.js +1 -0
- package/dist/tools/download-build.d.ts.map +1 -1
- package/dist/tools/download-build.js +1 -0
- package/dist/tools/grep/index.d.ts.map +1 -1
- package/dist/tools/grep/index.js +1 -0
- package/dist/tools/list-environments.d.ts +3 -0
- package/dist/tools/list-environments.d.ts.map +1 -0
- package/dist/tools/list-environments.js +49 -0
- package/dist/tools/str_replace_editor.d.ts +3 -2
- package/dist/tools/str_replace_editor.d.ts.map +1 -1
- package/dist/tools/str_replace_editor.js +48 -7
- package/dist/tools/test-gen-browser.d.ts +1 -1
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +58 -12
- package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
- package/dist/tools/test-run-fetcher/index.js +1 -0
- package/dist/tools/test-run.d.ts.map +1 -1
- package/dist/tools/test-run.js +1 -0
- package/dist/tools/upgrade-packages/index.d.ts +3 -0
- package/dist/tools/upgrade-packages/index.d.ts.map +1 -0
- package/dist/tools/upgrade-packages/index.js +124 -0
- package/dist/tools/upgrade-packages/utils.d.ts +13 -0
- package/dist/tools/upgrade-packages/utils.d.ts.map +1 -0
- package/dist/tools/upgrade-packages/utils.js +106 -0
- package/dist/tools/utils/index.d.ts +50 -2
- package/dist/tools/utils/index.d.ts.map +1 -1
- package/dist/tools/utils/index.js +87 -0
- package/dist/utils/git.d.ts +12 -1
- package/dist/utils/git.d.ts.map +1 -1
- package/dist/utils/git.js +66 -1
- package/package.json +4 -4
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/agent/cua/pw-codegen/pw-pause/utils.d.ts.map +0 -1
- package/dist/tools/environment-crud.d.ts +0 -4
- package/dist/tools/environment-crud.d.ts.map +0 -1
- package/dist/tools/environment-crud.js +0 -100
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,54 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.66.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 576870a: feat: allow specifying package version in upgradePackages tool
|
|
8
|
+
- e94a2da: feat: update upgradePackages tool to include input schema for package selection
|
|
9
|
+
- dfb2007: feat: add upgradePackages tool
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- 33abadf: fix: text appendBranchNameToQueueUrl output is under 80 characters fixed
|
|
14
|
+
- 1300a80: fix: branch name with dot should deploy tool execute service
|
|
15
|
+
- 985d721: fix: git patch for new files
|
|
16
|
+
- 6a19421: feat: collect git patch artifacts from text editor tools
|
|
17
|
+
- 1a46013: feat: page.pause codegen works when tool execution is headed
|
|
18
|
+
- 805f35f: fix: handle error when fetching environment variables in chat agent
|
|
19
|
+
- 0a9ec78: feat: add o3 with reduced costs
|
|
20
|
+
- c430cc0: fix: use mouse wheel to execute scroll in cua
|
|
21
|
+
- Updated dependencies [9b873e3]
|
|
22
|
+
- Updated dependencies [41c266d]
|
|
23
|
+
- Updated dependencies [0a9ec78]
|
|
24
|
+
- Updated dependencies [8a83b29]
|
|
25
|
+
- @empiricalrun/test-run@0.10.2
|
|
26
|
+
- @empiricalrun/llm@0.18.1
|
|
27
|
+
|
|
28
|
+
## 0.65.0
|
|
29
|
+
|
|
30
|
+
### Minor Changes
|
|
31
|
+
|
|
32
|
+
- 4623300: feat: added browser queue to add concurrency and avoid blocking of server due to browser dependent tools
|
|
33
|
+
|
|
34
|
+
### Patch Changes
|
|
35
|
+
|
|
36
|
+
- 82acf53: feat: enable tools to contribute to cost calculations
|
|
37
|
+
- 73dd841: feat: change getEnvironment tool to become listEnvironments
|
|
38
|
+
- 8233d49: chore: remove ecs feature flag from Dashboard
|
|
39
|
+
- 0b55884: feat: browser agent tool call returns images in tool result
|
|
40
|
+
- 459d029: feat: pass feature flags to tool calls
|
|
41
|
+
- 7712b2e: chore: move more types to shared-types package
|
|
42
|
+
- 1b08d58: feat: tool response interface supports images for claude
|
|
43
|
+
- 1b9087e: feat: improve feature flags ui, upgrade gemini-pro
|
|
44
|
+
- Updated dependencies [82acf53]
|
|
45
|
+
- Updated dependencies [1177d63]
|
|
46
|
+
- Updated dependencies [7712b2e]
|
|
47
|
+
- Updated dependencies [fb32af6]
|
|
48
|
+
- Updated dependencies [1b08d58]
|
|
49
|
+
- Updated dependencies [1b9087e]
|
|
50
|
+
- @empiricalrun/llm@0.18.0
|
|
51
|
+
|
|
3
52
|
## 0.64.3
|
|
4
53
|
|
|
5
54
|
### Patch Changes
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { ArtifactInput } from "@empiricalrun/shared-types";
|
|
1
|
+
import { ArtifactInput, Usage } from "@empiricalrun/shared-types";
|
|
2
|
+
import { BrowserAgentResult } from "../cua";
|
|
2
3
|
type GenerateTestsType = {
|
|
3
4
|
testCaseName: string;
|
|
4
5
|
testCaseSuites: string[];
|
|
@@ -16,11 +17,12 @@ export declare function convertProjectsFilterToProject({ pwProjectsFilter, repoD
|
|
|
16
17
|
repoDir: string;
|
|
17
18
|
testFilePath: string;
|
|
18
19
|
}): Promise<string>;
|
|
19
|
-
export declare function
|
|
20
|
+
export declare function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
|
|
20
21
|
isError: boolean;
|
|
21
22
|
error: string;
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
result: BrowserAgentResult | undefined;
|
|
24
|
+
usage: Usage | undefined;
|
|
25
|
+
artifacts: ArtifactInput[];
|
|
24
26
|
}>;
|
|
25
27
|
export {};
|
|
26
28
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAclE,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAG5C,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,gBAAgB,CAAC,EACrC,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,GAAG,SAAS,CAAC;IACvC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC;IACzB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAmGD"}
|
|
@@ -4,7 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.convertProjectsFilterToProject = convertProjectsFilterToProject;
|
|
7
|
-
exports.
|
|
7
|
+
exports.runBrowsingAgent = runBrowsingAgent;
|
|
8
8
|
const test_run_1 = require("@empiricalrun/test-run");
|
|
9
9
|
const detect_port_1 = __importDefault(require("detect-port"));
|
|
10
10
|
const fs_1 = __importDefault(require("fs"));
|
|
@@ -19,7 +19,7 @@ async function convertProjectsFilterToProject({ pwProjectsFilter, repoDir, testF
|
|
|
19
19
|
const project = await (0, utils_2.detectProjectName)(testFilePath, playwrightConfig, pwProjectsFilter);
|
|
20
20
|
return project;
|
|
21
21
|
}
|
|
22
|
-
async function
|
|
22
|
+
async function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
|
|
23
23
|
const absTestFilePath = path_1.default.join(repoDir, testFilePath);
|
|
24
24
|
if (!fs_1.default.existsSync(absTestFilePath)) {
|
|
25
25
|
const errorMsg = `File for master agent to run not found: ${testFilePath}`;
|
|
@@ -51,6 +51,7 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
|
|
|
51
51
|
projects: [projectName],
|
|
52
52
|
passthroughArgs: "--retries 0 --timeout 0",
|
|
53
53
|
repoDir,
|
|
54
|
+
envOverrides: envOverrides,
|
|
54
55
|
// @ts-ignore
|
|
55
56
|
platform: "web",
|
|
56
57
|
});
|
|
@@ -79,7 +80,6 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
|
|
|
79
80
|
error = `Failed to generate test using master agent ${e}`;
|
|
80
81
|
console.error(`[generateTestsUsingMasterAgent] ${error}`);
|
|
81
82
|
}
|
|
82
|
-
let artifacts = [];
|
|
83
83
|
if (error) {
|
|
84
84
|
// Clean up the file if there is any error
|
|
85
85
|
try {
|
|
@@ -92,16 +92,17 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
|
|
|
92
92
|
console.error(`[generateTestsUsingMasterAgent] Failed to remove extra scripts from files post test gen error:`, e);
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
-
artifacts = [
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
const artifacts = [
|
|
96
|
+
...((0, utils_1.findPlaywrightArtifacts)(repoDir) || []),
|
|
97
|
+
...(fileServer.getArtifactInputsFromServer() || []),
|
|
98
|
+
];
|
|
98
99
|
await fileServer.stop();
|
|
100
|
+
const { result, usage } = fileServer.getResultAndUsage();
|
|
99
101
|
return {
|
|
100
102
|
isError: !!error,
|
|
101
103
|
error: error || "",
|
|
102
|
-
|
|
103
|
-
error ||
|
|
104
|
-
"Unknown error, there was no summary or error reported",
|
|
104
|
+
result,
|
|
105
105
|
artifacts,
|
|
106
|
+
usage,
|
|
106
107
|
};
|
|
107
108
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { IChatModel
|
|
2
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
3
4
|
import { ToolCallService } from "../../tool-call-service";
|
|
4
5
|
import { FileInfo } from "../../types";
|
|
5
6
|
import { ReporterFunction } from "./types";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,
|
|
1
|
+
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAEjE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS
|
|
1
|
+
import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS } from "@empiricalrun/llm/chat";
|
|
2
2
|
import { FileInfo } from "../../types";
|
|
3
3
|
import { chatAgentLoop } from "./agent-loop";
|
|
4
4
|
import { defaultModel } from "./models";
|
|
5
5
|
import { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatStateFromModel, createChatState, createChatStateForMessages, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState } from "./state";
|
|
6
6
|
import { ReporterFunction } from "./types";
|
|
7
7
|
export { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatAgentLoop, chatStateFromModel, createChatModel, createChatState, createChatStateForMessages, defaultModel, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState, SUPPORTED_CHAT_MODELS, };
|
|
8
|
-
export type { FileInfo, IChatModel, ReporterFunction
|
|
8
|
+
export type { FileInfo, IChatModel, ReporterFunction };
|
|
9
9
|
//# sourceMappingURL=exports.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,
|
|
1
|
+
{"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC"}
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import { SupportedChatModels } from "@empiricalrun/
|
|
2
|
-
export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent,
|
|
1
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
2
|
+
export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }: {
|
|
3
3
|
selectedModel: SupportedChatModels;
|
|
4
4
|
useDiskForChatState: boolean;
|
|
5
5
|
initialPromptContent: string | undefined;
|
|
6
|
-
withRetry?: boolean;
|
|
7
6
|
}): Promise<void>;
|
|
8
7
|
export declare function runChatAgentForDashboard({ chatSessionId, selectedModel, }: {
|
|
9
8
|
selectedModel: SupportedChatModels;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAGL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAkEpC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBA4HA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
|
package/dist/agent/chat/index.js
CHANGED
|
@@ -21,7 +21,26 @@ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
|
|
|
21
21
|
(0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
|
|
22
22
|
}
|
|
23
23
|
}
|
|
24
|
-
async function
|
|
24
|
+
async function fetchEnvironmentVariables() {
|
|
25
|
+
// TODO: Wrap in try-catch and log error
|
|
26
|
+
const response = await fetch(`${DASHBOARD_DOMAIN}/api/environment-variables`, {
|
|
27
|
+
headers: {
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
const data = await response.json();
|
|
33
|
+
if (!data.data) {
|
|
34
|
+
console.error("Failed to fetch environment variables:", data);
|
|
35
|
+
throw new Error("Failed to fetch environment variables");
|
|
36
|
+
}
|
|
37
|
+
const envVars = data.data.environment_variables.reduce((acc, envVar) => {
|
|
38
|
+
acc[envVar.name] = envVar.value;
|
|
39
|
+
return acc;
|
|
40
|
+
}, {});
|
|
41
|
+
return envVars;
|
|
42
|
+
}
|
|
43
|
+
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
|
|
25
44
|
let chatState;
|
|
26
45
|
if (useDiskForChatState) {
|
|
27
46
|
chatState = (0, state_1.loadChatState)();
|
|
@@ -46,8 +65,8 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
46
65
|
console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
|
|
47
66
|
}
|
|
48
67
|
}
|
|
49
|
-
|
|
50
|
-
|
|
68
|
+
if (chatState && chatState.error) {
|
|
69
|
+
// Reset error state as we are attempting a retry
|
|
51
70
|
chatState.error = null;
|
|
52
71
|
}
|
|
53
72
|
const handleSigInt = () => {
|
|
@@ -99,7 +118,6 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
99
118
|
}
|
|
100
119
|
}
|
|
101
120
|
else {
|
|
102
|
-
// TODO: Should we pass a loader function? That would allow us to show a spinner
|
|
103
121
|
if (!process.env.EMPIRICALRUN_API_KEY) {
|
|
104
122
|
throw new Error("EMPIRICALRUN_API_KEY is not set");
|
|
105
123
|
}
|
|
@@ -111,7 +129,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
111
129
|
apiKey: process.env.EMPIRICALRUN_API_KEY,
|
|
112
130
|
trace,
|
|
113
131
|
featureFlags: [],
|
|
114
|
-
environmentOverrides:
|
|
132
|
+
environmentOverrides: await fetchEnvironmentVariables(),
|
|
115
133
|
});
|
|
116
134
|
const fileInfo = await (0, file_tree_1.getFileInfoFromFS)(process.cwd());
|
|
117
135
|
await (0, agent_loop_1.chatAgentLoop)({
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
2
2
|
export declare const defaultModel: SupportedChatModels;
|
|
3
3
|
export declare const modelLabels: Record<SupportedChatModels, string>;
|
|
4
4
|
//# sourceMappingURL=models.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAWjE,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { IChatModel
|
|
2
|
-
import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
|
|
1
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
2
|
+
import { CanonicalMessage, ChatState, ChatStateError, SupportedChatModels } from "@empiricalrun/shared-types";
|
|
3
3
|
export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
|
|
4
4
|
export declare const LATEST_CHAT_STATE_VERSION = "0.1";
|
|
5
5
|
export declare const CHAT_STATE_PATH: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { IChatModel
|
|
2
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
3
4
|
import { ReporterFunction } from "./types";
|
|
4
5
|
export declare const log: (...args: any[]) => void;
|
|
5
6
|
export declare function getModelName(model: string): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAEL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
|
|
@@ -148,7 +148,7 @@ async function executeModelAction(page, action, codegen) {
|
|
|
148
148
|
scroll_y,
|
|
149
149
|
});
|
|
150
150
|
await page.mouse.move(x, y);
|
|
151
|
-
await page.
|
|
151
|
+
await page.mouse.wheel(scroll_x, scroll_y);
|
|
152
152
|
actionSummary = `Scroll at (${x}, ${y}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y})`;
|
|
153
153
|
actionCode = await codegen.getCodeForLastAction();
|
|
154
154
|
break;
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import {
|
|
2
|
+
import { ArtifactInputData, Usage } from "@empiricalrun/shared-types";
|
|
3
3
|
import { Page } from "playwright";
|
|
4
|
+
export type BrowserAgentResult = Array<{
|
|
5
|
+
type: "text";
|
|
6
|
+
text: string;
|
|
7
|
+
} | {
|
|
8
|
+
type: "screenshot";
|
|
9
|
+
screenshot: ArtifactInputData;
|
|
10
|
+
}>;
|
|
4
11
|
export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
|
|
5
12
|
page: Page;
|
|
6
13
|
task: string;
|
|
@@ -8,7 +15,7 @@ export declare function createTestUsingComputerUseAgent({ page, task, trace, }:
|
|
|
8
15
|
}): Promise<{
|
|
9
16
|
code: string;
|
|
10
17
|
importPaths: string[];
|
|
11
|
-
|
|
12
|
-
|
|
18
|
+
result: BrowserAgentResult;
|
|
19
|
+
usage: Usage;
|
|
13
20
|
}>;
|
|
14
21
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAgClC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -9,26 +9,34 @@ const openai_1 = __importDefault(require("openai"));
|
|
|
9
9
|
const computer_1 = require("./computer");
|
|
10
10
|
const model_1 = require("./model");
|
|
11
11
|
const element_from_point_1 = require("./pw-codegen/element-from-point");
|
|
12
|
-
|
|
12
|
+
const pw_pause_1 = require("./pw-codegen/pw-pause");
|
|
13
|
+
const MAX_ITERATIONS = 15;
|
|
14
|
+
async function getCodegenInstance() {
|
|
15
|
+
const repoDir = process.cwd();
|
|
16
|
+
const canUsePwPause = await (0, pw_pause_1.canUsePauseCodegen)(repoDir);
|
|
17
|
+
if (canUsePwPause) {
|
|
18
|
+
console.log("[getCodegen] using PlaywrightPauseCodegen");
|
|
19
|
+
return new pw_pause_1.PlaywrightPauseCodegen();
|
|
20
|
+
}
|
|
21
|
+
console.log("[getCodegen] using ElementFromPointCodegen");
|
|
13
22
|
return new element_from_point_1.ElementFromPointCodegen();
|
|
14
|
-
// TODO: Add support for page.pause approach
|
|
15
|
-
// We can use PlaywrightPauseCodegen if playwright patch was successful,
|
|
16
|
-
// IPC port is available and PW_CODEGEN_NO_INSPECTOR env var is set
|
|
17
23
|
}
|
|
18
|
-
function
|
|
24
|
+
function artifact(screenshot, name) {
|
|
19
25
|
return {
|
|
20
|
-
name: `${
|
|
26
|
+
name: `${name}`,
|
|
21
27
|
contentType: "image/png",
|
|
22
|
-
data: Buffer.from(
|
|
28
|
+
data: Buffer.from(screenshot, "base64"),
|
|
23
29
|
};
|
|
24
30
|
}
|
|
25
31
|
async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
26
|
-
const codegen =
|
|
32
|
+
const codegen = await getCodegenInstance();
|
|
27
33
|
await codegen.initialize(page);
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
|
|
34
|
+
const screenshot = await (0, computer_1.getScreenshot)(page);
|
|
35
|
+
const initialArtifact = artifact(screenshot, "Initial screen");
|
|
36
|
+
let result = [
|
|
37
|
+
{ type: "screenshot", screenshot: initialArtifact },
|
|
31
38
|
];
|
|
39
|
+
let tokensUsed = { input: 0, output: 0 };
|
|
32
40
|
const viewport = page.viewportSize();
|
|
33
41
|
let screenWidth = viewport?.width || 1280;
|
|
34
42
|
let screenHeight = viewport?.height || 720;
|
|
@@ -49,7 +57,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
49
57
|
},
|
|
50
58
|
{
|
|
51
59
|
type: "input_image",
|
|
52
|
-
image_url: `data:image/png;base64,${
|
|
60
|
+
image_url: `data:image/png;base64,${screenshot}`,
|
|
53
61
|
detail: "high",
|
|
54
62
|
},
|
|
55
63
|
],
|
|
@@ -59,13 +67,16 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
59
67
|
screenHeight,
|
|
60
68
|
openAIClient,
|
|
61
69
|
});
|
|
70
|
+
tokensUsed.input += response.usage?.input_tokens || 0;
|
|
71
|
+
tokensUsed.output += response.usage?.output_tokens || 0;
|
|
62
72
|
let isTaskDone = false;
|
|
63
|
-
let maxIterations = 15;
|
|
64
73
|
let generatedCode = "";
|
|
65
|
-
let actionsSummary = [];
|
|
66
74
|
let iterationIndex = 0;
|
|
67
|
-
while (!isTaskDone && iterationIndex <
|
|
68
|
-
|
|
75
|
+
while (!isTaskDone && iterationIndex < MAX_ITERATIONS) {
|
|
76
|
+
result.push({
|
|
77
|
+
type: "text",
|
|
78
|
+
text: `# Agent iteration ${iterationIndex}`,
|
|
79
|
+
});
|
|
69
80
|
iterationIndex++;
|
|
70
81
|
const computerCalls = response.output.filter((item) => item.type === "computer_call");
|
|
71
82
|
const functionCalls = response.output.filter((item) => item.type === "function_call");
|
|
@@ -75,7 +86,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
75
86
|
const content = assistantOutput.content.find((item) => item.type === "output_text");
|
|
76
87
|
if (content && "text" in content) {
|
|
77
88
|
// TODO: This ignores `ResponseOutputRefusal` type (refusal from assistant)
|
|
78
|
-
|
|
89
|
+
result.push({
|
|
90
|
+
type: "text",
|
|
91
|
+
text: `Agent summary: ${content.text}`,
|
|
92
|
+
});
|
|
79
93
|
}
|
|
80
94
|
}
|
|
81
95
|
isTaskDone = true;
|
|
@@ -86,7 +100,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
86
100
|
const reasoningItem = reasoning;
|
|
87
101
|
const summaryText = reasoningItem.summary?.find((item) => item.type === "summary_text")?.text;
|
|
88
102
|
if (summaryText) {
|
|
89
|
-
|
|
103
|
+
result.push({
|
|
104
|
+
type: "text",
|
|
105
|
+
text: `Action reasoning: ${summaryText}`,
|
|
106
|
+
});
|
|
90
107
|
}
|
|
91
108
|
}
|
|
92
109
|
// We expect either a function call or a computer call in the response.
|
|
@@ -98,9 +115,15 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
98
115
|
const args = JSON.parse(functionCall.arguments);
|
|
99
116
|
const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, { type: "goto", url: args.url }, codegen);
|
|
100
117
|
executedActionSummary = actionSummary;
|
|
101
|
-
|
|
118
|
+
result.push({
|
|
119
|
+
type: "text",
|
|
120
|
+
text: `Action executed: ${actionSummary}`,
|
|
121
|
+
});
|
|
102
122
|
if (actionCode) {
|
|
103
|
-
|
|
123
|
+
result.push({
|
|
124
|
+
type: "text",
|
|
125
|
+
text: `Generated code: ${actionCode}`,
|
|
126
|
+
});
|
|
104
127
|
generatedCode += actionCode;
|
|
105
128
|
}
|
|
106
129
|
toolCallOutput = {
|
|
@@ -116,20 +139,28 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
116
139
|
// Execute the action and take a screenshot
|
|
117
140
|
const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, action, codegen);
|
|
118
141
|
executedActionSummary = actionSummary;
|
|
119
|
-
|
|
120
|
-
|
|
142
|
+
result.push({
|
|
143
|
+
type: "text",
|
|
144
|
+
text: `Action executed: ${actionSummary}`,
|
|
145
|
+
});
|
|
146
|
+
result.push({
|
|
147
|
+
type: "text",
|
|
148
|
+
text: `Generated code: ${actionCode}`,
|
|
149
|
+
});
|
|
121
150
|
generatedCode += actionCode;
|
|
122
151
|
// Allow time for changes to take effect.
|
|
123
152
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
124
|
-
const
|
|
125
|
-
|
|
126
|
-
|
|
153
|
+
const screenshot = await (0, computer_1.getScreenshot)(page);
|
|
154
|
+
result.push({
|
|
155
|
+
type: "screenshot",
|
|
156
|
+
screenshot: artifact(screenshot, actionSummary),
|
|
157
|
+
});
|
|
127
158
|
toolCallOutput = {
|
|
128
159
|
type: "computer_call_output",
|
|
129
160
|
call_id: computerCall.call_id,
|
|
130
161
|
output: {
|
|
131
162
|
type: "computer_screenshot",
|
|
132
|
-
image_url: `data:image/png;base64,${
|
|
163
|
+
image_url: `data:image/png;base64,${screenshot}`,
|
|
133
164
|
},
|
|
134
165
|
acknowledged_safety_checks: computerCall.pending_safety_checks,
|
|
135
166
|
};
|
|
@@ -155,19 +186,25 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
155
186
|
screenHeight,
|
|
156
187
|
openAIClient,
|
|
157
188
|
});
|
|
189
|
+
tokensUsed.input += response.usage?.input_tokens || 0;
|
|
190
|
+
tokensUsed.output += response.usage?.output_tokens || 0;
|
|
158
191
|
}
|
|
159
192
|
if (!isTaskDone) {
|
|
160
|
-
|
|
193
|
+
const logMessage = `Max iteration limit hit: Task not done after ${MAX_ITERATIONS} iterations`;
|
|
194
|
+
console.log(logMessage);
|
|
195
|
+
result.push({
|
|
196
|
+
type: "text",
|
|
197
|
+
text: logMessage,
|
|
198
|
+
});
|
|
161
199
|
}
|
|
162
|
-
trace?.update({
|
|
163
|
-
output: { code: generatedCode, actionsSummary: actionsSummary.join("\n") },
|
|
164
|
-
});
|
|
200
|
+
trace?.update({ output: { result } });
|
|
165
201
|
return {
|
|
166
|
-
|
|
202
|
+
result,
|
|
167
203
|
code: generatedCode,
|
|
168
|
-
// TODO: Does not support skills (from helper methods in pages/ dir),
|
|
169
|
-
// and therefore, import paths are empty
|
|
170
204
|
importPaths: [],
|
|
171
|
-
|
|
205
|
+
usage: {
|
|
206
|
+
tokens: tokensUsed,
|
|
207
|
+
cost: (0, model_1.tokensToCost)(tokensUsed),
|
|
208
|
+
},
|
|
172
209
|
};
|
|
173
210
|
}
|
|
@@ -7,4 +7,11 @@ export declare function callComputerUseModel({ input, previousResponseId, screen
|
|
|
7
7
|
screenHeight: number;
|
|
8
8
|
openAIClient: OpenAI;
|
|
9
9
|
}): Promise<Response>;
|
|
10
|
+
export declare function tokensToCost(tokens: {
|
|
11
|
+
input: number;
|
|
12
|
+
output: number;
|
|
13
|
+
}): {
|
|
14
|
+
input: number;
|
|
15
|
+
output: number;
|
|
16
|
+
};
|
|
10
17
|
//# sourceMappingURL=model.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
|
package/dist/agent/cua/model.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.callComputerUseModel = callComputerUseModel;
|
|
4
|
+
exports.tokensToCost = tokensToCost;
|
|
4
5
|
const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
|
|
5
6
|
Don't ask the user for confirmations - just execute the actions.
|
|
6
7
|
|
|
@@ -50,3 +51,12 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
|
|
|
50
51
|
});
|
|
51
52
|
return response;
|
|
52
53
|
}
|
|
54
|
+
function tokensToCost(tokens) {
|
|
55
|
+
// Costs for "computer-use-preview-2025-03-11"
|
|
56
|
+
// https://platform.openai.com/docs/models/computer-use-preview
|
|
57
|
+
const inputUsdFor1MTokens = 3.0;
|
|
58
|
+
const outputUsdFor1MTokens = 12.0;
|
|
59
|
+
const inputCost = (tokens.input / 1_000_000) * inputUsdFor1MTokens;
|
|
60
|
+
const outputCost = (tokens.output / 1_000_000) * outputUsdFor1MTokens;
|
|
61
|
+
return { input: inputCost, output: outputCost };
|
|
62
|
+
}
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import type { Page } from "playwright";
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
export declare function
|
|
5
|
-
export declare class PlaywrightPauseCodegen {
|
|
2
|
+
import { BasePlaywrightCodegen } from "../types";
|
|
3
|
+
export { preparePlaywrightForCodegen, revertToOriginalPwCode } from "./patch";
|
|
4
|
+
export declare function canUsePauseCodegen(repoDir: string): Promise<boolean>;
|
|
5
|
+
export declare class PlaywrightPauseCodegen implements BasePlaywrightCodegen {
|
|
6
6
|
private port;
|
|
7
7
|
private page;
|
|
8
8
|
private server;
|
|
9
|
+
private codeForLastAction;
|
|
9
10
|
constructor();
|
|
11
|
+
private saveCode;
|
|
10
12
|
initialize(page: Page): Promise<void>;
|
|
11
13
|
startPlaywrightCodegen(page: Page): Promise<void>;
|
|
12
14
|
recordAction(): Promise<void>;
|
|
13
|
-
|
|
15
|
+
getCodeForLastAction(): Promise<string>;
|
|
14
16
|
}
|
|
15
17
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAIjD,OAAO,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,MAAM,SAAS,CAAC;AAE9E,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,MAAM,oBAqCvD;AAiBD,qBAAa,sBAAuB,YAAW,qBAAqB;IAClE,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAmB;IAC/B,OAAO,CAAC,MAAM,CAA4C;IAC1D,OAAO,CAAC,iBAAiB,CAAqB;;YAMhC,QAAQ;IAOhB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBrC,sBAAsB,CAAC,IAAI,EAAE,IAAI;IAsBjC,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,oBAAoB,IAAI,OAAO,CAAC,MAAM,CAAC;CAU9C"}
|