@empiricalrun/test-gen 0.64.2 → 0.65.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/dist/agent/browsing/run.d.ts +6 -4
- package/dist/agent/browsing/run.d.ts.map +1 -1
- package/dist/agent/browsing/run.js +9 -9
- package/dist/agent/chat/agent-loop.d.ts +2 -1
- package/dist/agent/chat/agent-loop.d.ts.map +1 -1
- package/dist/agent/chat/exports.d.ts +2 -2
- package/dist/agent/chat/exports.d.ts.map +1 -1
- package/dist/agent/chat/index.d.ts +1 -1
- package/dist/agent/chat/index.d.ts.map +1 -1
- package/dist/agent/chat/index.js +16 -1
- package/dist/agent/chat/models.d.ts +1 -1
- package/dist/agent/chat/models.d.ts.map +1 -1
- package/dist/agent/chat/state.d.ts +2 -2
- package/dist/agent/chat/state.d.ts.map +1 -1
- package/dist/agent/chat/utils.d.ts +2 -1
- package/dist/agent/chat/utils.d.ts.map +1 -1
- package/dist/agent/cua/computer.js +1 -1
- package/dist/agent/cua/index.d.ts +10 -3
- package/dist/agent/cua/index.d.ts.map +1 -1
- package/dist/agent/cua/index.js +61 -29
- package/dist/agent/cua/model.d.ts +7 -0
- package/dist/agent/cua/model.d.ts.map +1 -1
- package/dist/agent/cua/model.js +10 -0
- package/dist/bin/index.js +2 -2
- package/dist/bin/utils/index.d.ts +1 -1
- package/dist/bin/utils/index.d.ts.map +1 -1
- package/dist/bin/utils/index.js +3 -3
- package/dist/file/client.d.ts +2 -8
- package/dist/file/client.d.ts.map +1 -1
- package/dist/file/client.js +2 -23
- package/dist/file/server.d.ts +15 -3
- package/dist/file/server.d.ts.map +1 -1
- package/dist/file/server.js +17 -28
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -13
- package/dist/tool-call-service/index.d.ts +3 -3
- package/dist/tool-call-service/index.d.ts.map +1 -1
- package/dist/tool-call-service/index.js +4 -3
- package/dist/tool-call-service/utils.d.ts +3 -10
- package/dist/tool-call-service/utils.d.ts.map +1 -1
- package/dist/tool-call-service/utils.js +21 -5
- package/dist/tools/commit-and-create-pr.d.ts.map +1 -1
- package/dist/tools/commit-and-create-pr.js +5 -13
- package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
- package/dist/tools/diagnosis-fetcher.js +1 -0
- package/dist/tools/download-build.d.ts.map +1 -1
- package/dist/tools/download-build.js +1 -0
- package/dist/tools/grep/index.d.ts.map +1 -1
- package/dist/tools/grep/index.js +1 -0
- package/dist/tools/list-environments.d.ts +3 -0
- package/dist/tools/list-environments.d.ts.map +1 -0
- package/dist/tools/list-environments.js +49 -0
- package/dist/tools/str_replace_editor.d.ts.map +1 -1
- package/dist/tools/str_replace_editor.js +4 -0
- package/dist/tools/test-gen-browser.d.ts +1 -1
- package/dist/tools/test-gen-browser.d.ts.map +1 -1
- package/dist/tools/test-gen-browser.js +50 -12
- package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
- package/dist/tools/test-run-fetcher/index.js +1 -0
- package/dist/tools/test-run.d.ts.map +1 -1
- package/dist/tools/test-run.js +4 -1
- package/dist/utils/index.d.ts +1 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +3 -1
- package/dist/utils/json.d.ts +2 -0
- package/dist/utils/json.d.ts.map +1 -0
- package/dist/utils/json.js +24 -0
- package/package.json +3 -3
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/tools/environment-crud.d.ts +0 -4
- package/dist/tools/environment-crud.d.ts.map +0 -1
- package/dist/tools/environment-crud.js +0 -100
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,37 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.65.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 4623300: feat: added browser queue to add concurrency and avoid blocking of server due to browser dependent tools
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- 82acf53: feat: enable tools to contribute to cost calculations
|
|
12
|
+
- 73dd841: feat: change getEnvironment tool to become listEnvironments
|
|
13
|
+
- 8233d49: chore: remove ecs feature flag from Dashboard
|
|
14
|
+
- 0b55884: feat: browser agent tool call returns images in tool result
|
|
15
|
+
- 459d029: feat: pass feature flags to tool calls
|
|
16
|
+
- 7712b2e: chore: move more types to shared-types package
|
|
17
|
+
- 1b08d58: feat: tool response interface supports images for claude
|
|
18
|
+
- 1b9087e: feat: improve feature flags ui, upgrade gemini-pro
|
|
19
|
+
- Updated dependencies [82acf53]
|
|
20
|
+
- Updated dependencies [1177d63]
|
|
21
|
+
- Updated dependencies [7712b2e]
|
|
22
|
+
- Updated dependencies [fb32af6]
|
|
23
|
+
- Updated dependencies [1b08d58]
|
|
24
|
+
- Updated dependencies [1b9087e]
|
|
25
|
+
- @empiricalrun/llm@0.18.0
|
|
26
|
+
|
|
27
|
+
## 0.64.3
|
|
28
|
+
|
|
29
|
+
### Patch Changes
|
|
30
|
+
|
|
31
|
+
- d8d624d: fix: pass mapped keys to codegen recordAction for proper code generation
|
|
32
|
+
- addd52e: feat: add JSON value truncation to prevent oversized tool responses
|
|
33
|
+
- f8a53b0: fix: PR description should not get concatenated across versions
|
|
34
|
+
|
|
3
35
|
## 0.64.2
|
|
4
36
|
|
|
5
37
|
### Patch Changes
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { ArtifactInput } from "@empiricalrun/shared-types";
|
|
1
|
+
import { ArtifactInput, Usage } from "@empiricalrun/shared-types";
|
|
2
|
+
import { BrowserAgentResult } from "../cua";
|
|
2
3
|
type GenerateTestsType = {
|
|
3
4
|
testCaseName: string;
|
|
4
5
|
testCaseSuites: string[];
|
|
@@ -16,11 +17,12 @@ export declare function convertProjectsFilterToProject({ pwProjectsFilter, repoD
|
|
|
16
17
|
repoDir: string;
|
|
17
18
|
testFilePath: string;
|
|
18
19
|
}): Promise<string>;
|
|
19
|
-
export declare function
|
|
20
|
+
export declare function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
|
|
20
21
|
isError: boolean;
|
|
21
22
|
error: string;
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
result: BrowserAgentResult | undefined;
|
|
24
|
+
usage: Usage | undefined;
|
|
25
|
+
artifacts: ArtifactInput[];
|
|
24
26
|
}>;
|
|
25
27
|
export {};
|
|
26
28
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAclE,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAG5C,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,gBAAgB,CAAC,EACrC,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,GAAG,SAAS,CAAC;IACvC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC;IACzB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAkGD"}
|
|
@@ -4,7 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.convertProjectsFilterToProject = convertProjectsFilterToProject;
|
|
7
|
-
exports.
|
|
7
|
+
exports.runBrowsingAgent = runBrowsingAgent;
|
|
8
8
|
const test_run_1 = require("@empiricalrun/test-run");
|
|
9
9
|
const detect_port_1 = __importDefault(require("detect-port"));
|
|
10
10
|
const fs_1 = __importDefault(require("fs"));
|
|
@@ -19,7 +19,7 @@ async function convertProjectsFilterToProject({ pwProjectsFilter, repoDir, testF
|
|
|
19
19
|
const project = await (0, utils_2.detectProjectName)(testFilePath, playwrightConfig, pwProjectsFilter);
|
|
20
20
|
return project;
|
|
21
21
|
}
|
|
22
|
-
async function
|
|
22
|
+
async function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
|
|
23
23
|
const absTestFilePath = path_1.default.join(repoDir, testFilePath);
|
|
24
24
|
if (!fs_1.default.existsSync(absTestFilePath)) {
|
|
25
25
|
const errorMsg = `File for master agent to run not found: ${testFilePath}`;
|
|
@@ -79,7 +79,6 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
|
|
|
79
79
|
error = `Failed to generate test using master agent ${e}`;
|
|
80
80
|
console.error(`[generateTestsUsingMasterAgent] ${error}`);
|
|
81
81
|
}
|
|
82
|
-
let artifacts = [];
|
|
83
82
|
if (error) {
|
|
84
83
|
// Clean up the file if there is any error
|
|
85
84
|
try {
|
|
@@ -92,16 +91,17 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
|
|
|
92
91
|
console.error(`[generateTestsUsingMasterAgent] Failed to remove extra scripts from files post test gen error:`, e);
|
|
93
92
|
}
|
|
94
93
|
}
|
|
95
|
-
artifacts = [
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
const artifacts = [
|
|
95
|
+
...((0, utils_1.findPlaywrightArtifacts)(repoDir) || []),
|
|
96
|
+
...(fileServer.getArtifactInputsFromServer() || []),
|
|
97
|
+
];
|
|
98
98
|
await fileServer.stop();
|
|
99
|
+
const { result, usage } = fileServer.getResultAndUsage();
|
|
99
100
|
return {
|
|
100
101
|
isError: !!error,
|
|
101
102
|
error: error || "",
|
|
102
|
-
|
|
103
|
-
error ||
|
|
104
|
-
"Unknown error, there was no summary or error reported",
|
|
103
|
+
result,
|
|
105
104
|
artifacts,
|
|
105
|
+
usage,
|
|
106
106
|
};
|
|
107
107
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { IChatModel
|
|
2
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
3
4
|
import { ToolCallService } from "../../tool-call-service";
|
|
4
5
|
import { FileInfo } from "../../types";
|
|
5
6
|
import { ReporterFunction } from "./types";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,
|
|
1
|
+
{"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAEjE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS
|
|
1
|
+
import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS } from "@empiricalrun/llm/chat";
|
|
2
2
|
import { FileInfo } from "../../types";
|
|
3
3
|
import { chatAgentLoop } from "./agent-loop";
|
|
4
4
|
import { defaultModel } from "./models";
|
|
5
5
|
import { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatStateFromModel, createChatState, createChatStateForMessages, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState } from "./state";
|
|
6
6
|
import { ReporterFunction } from "./types";
|
|
7
7
|
export { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatAgentLoop, chatStateFromModel, createChatModel, createChatState, createChatStateForMessages, defaultModel, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState, SUPPORTED_CHAT_MODELS, };
|
|
8
|
-
export type { FileInfo, IChatModel, ReporterFunction
|
|
8
|
+
export type { FileInfo, IChatModel, ReporterFunction };
|
|
9
9
|
//# sourceMappingURL=exports.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,
|
|
1
|
+
{"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { SupportedChatModels } from "@empiricalrun/
|
|
1
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
2
2
|
export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }: {
|
|
3
3
|
selectedModel: SupportedChatModels;
|
|
4
4
|
useDiskForChatState: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAGL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA8DpC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,GACV,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,iBAwHA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
|
package/dist/agent/chat/index.js
CHANGED
|
@@ -21,6 +21,21 @@ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
|
|
|
21
21
|
(0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
|
|
22
22
|
}
|
|
23
23
|
}
|
|
24
|
+
async function fetchEnvironmentVariables() {
|
|
25
|
+
// TODO: Wrap in try-catch and log error
|
|
26
|
+
const response = await fetch(`${DASHBOARD_DOMAIN}/api/environment-variables`, {
|
|
27
|
+
headers: {
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
const data = await response.json();
|
|
33
|
+
const envVars = data.data.environment_variables.reduce((acc, envVar) => {
|
|
34
|
+
acc[envVar.name] = envVar.value;
|
|
35
|
+
return acc;
|
|
36
|
+
}, {});
|
|
37
|
+
return envVars;
|
|
38
|
+
}
|
|
24
39
|
async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
|
|
25
40
|
let chatState;
|
|
26
41
|
if (useDiskForChatState) {
|
|
@@ -111,7 +126,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
|
|
|
111
126
|
apiKey: process.env.EMPIRICALRUN_API_KEY,
|
|
112
127
|
trace,
|
|
113
128
|
featureFlags: [],
|
|
114
|
-
environmentOverrides:
|
|
129
|
+
environmentOverrides: await fetchEnvironmentVariables(),
|
|
115
130
|
});
|
|
116
131
|
const fileInfo = await (0, file_tree_1.getFileInfoFromFS)(process.cwd());
|
|
117
132
|
await (0, agent_loop_1.chatAgentLoop)({
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
2
2
|
export declare const defaultModel: SupportedChatModels;
|
|
3
3
|
export declare const modelLabels: Record<SupportedChatModels, string>;
|
|
4
4
|
//# sourceMappingURL=models.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAWjE,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { IChatModel
|
|
2
|
-
import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
|
|
1
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
2
|
+
import { CanonicalMessage, ChatState, ChatStateError, SupportedChatModels } from "@empiricalrun/shared-types";
|
|
3
3
|
export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
|
|
4
4
|
export declare const LATEST_CHAT_STATE_VERSION = "0.1";
|
|
5
5
|
export declare const CHAT_STATE_PATH: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,
|
|
1
|
+
{"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { IChatModel
|
|
2
|
+
import { IChatModel } from "@empiricalrun/llm/chat";
|
|
3
|
+
import { SupportedChatModels } from "@empiricalrun/shared-types";
|
|
3
4
|
import { ReporterFunction } from "./types";
|
|
4
5
|
export declare const log: (...args: any[]) => void;
|
|
5
6
|
export declare function getModelName(model: string): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAEL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
|
|
@@ -155,10 +155,10 @@ async function executeModelAction(page, action, codegen) {
|
|
|
155
155
|
}
|
|
156
156
|
case "keypress": {
|
|
157
157
|
const { keys } = action;
|
|
158
|
-
await codegen.recordAction({ type: "keypress", keys });
|
|
159
158
|
const mappedKeys = keys.map((k) => {
|
|
160
159
|
return CUA_KEY_TO_PLAYWRIGHT_KEY[k.toLowerCase()] || k;
|
|
161
160
|
});
|
|
161
|
+
await codegen.recordAction({ type: "keypress", keys: mappedKeys });
|
|
162
162
|
const mappedKey = mappedKeys.join("+"); // ["CTRL", "A"] becomes ControlOrMeta+A
|
|
163
163
|
console.log(`Action: keypress for keys ${keys} -> '${mappedKey}'`);
|
|
164
164
|
await page.keyboard.press(mappedKey);
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import {
|
|
2
|
+
import { ArtifactInputData, Usage } from "@empiricalrun/shared-types";
|
|
3
3
|
import { Page } from "playwright";
|
|
4
|
+
export type BrowserAgentResult = Array<{
|
|
5
|
+
type: "text";
|
|
6
|
+
text: string;
|
|
7
|
+
} | {
|
|
8
|
+
type: "screenshot";
|
|
9
|
+
screenshot: ArtifactInputData;
|
|
10
|
+
}>;
|
|
4
11
|
export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
|
|
5
12
|
page: Page;
|
|
6
13
|
task: string;
|
|
@@ -8,7 +15,7 @@ export declare function createTestUsingComputerUseAgent({ page, task, trace, }:
|
|
|
8
15
|
}): Promise<{
|
|
9
16
|
code: string;
|
|
10
17
|
importPaths: string[];
|
|
11
|
-
|
|
12
|
-
|
|
18
|
+
result: BrowserAgentResult;
|
|
19
|
+
usage: Usage;
|
|
13
20
|
}>;
|
|
14
21
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAwBlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -9,26 +9,29 @@ const openai_1 = __importDefault(require("openai"));
|
|
|
9
9
|
const computer_1 = require("./computer");
|
|
10
10
|
const model_1 = require("./model");
|
|
11
11
|
const element_from_point_1 = require("./pw-codegen/element-from-point");
|
|
12
|
+
const MAX_ITERATIONS = 15;
|
|
12
13
|
function getCodegen() {
|
|
13
14
|
return new element_from_point_1.ElementFromPointCodegen();
|
|
14
15
|
// TODO: Add support for page.pause approach
|
|
15
16
|
// We can use PlaywrightPauseCodegen if playwright patch was successful,
|
|
16
17
|
// IPC port is available and PW_CODEGEN_NO_INSPECTOR env var is set
|
|
17
18
|
}
|
|
18
|
-
function
|
|
19
|
+
function artifact(screenshot, name) {
|
|
19
20
|
return {
|
|
20
|
-
name: `${
|
|
21
|
+
name: `${name}`,
|
|
21
22
|
contentType: "image/png",
|
|
22
|
-
data: Buffer.from(
|
|
23
|
+
data: Buffer.from(screenshot, "base64"),
|
|
23
24
|
};
|
|
24
25
|
}
|
|
25
26
|
async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
26
27
|
const codegen = getCodegen();
|
|
27
28
|
await codegen.initialize(page);
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
|
|
29
|
+
const screenshot = await (0, computer_1.getScreenshot)(page);
|
|
30
|
+
const initialArtifact = artifact(screenshot, "Initial screen");
|
|
31
|
+
let result = [
|
|
32
|
+
{ type: "screenshot", screenshot: initialArtifact },
|
|
31
33
|
];
|
|
34
|
+
let tokensUsed = { input: 0, output: 0 };
|
|
32
35
|
const viewport = page.viewportSize();
|
|
33
36
|
let screenWidth = viewport?.width || 1280;
|
|
34
37
|
let screenHeight = viewport?.height || 720;
|
|
@@ -49,7 +52,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
49
52
|
},
|
|
50
53
|
{
|
|
51
54
|
type: "input_image",
|
|
52
|
-
image_url: `data:image/png;base64,${
|
|
55
|
+
image_url: `data:image/png;base64,${screenshot}`,
|
|
53
56
|
detail: "high",
|
|
54
57
|
},
|
|
55
58
|
],
|
|
@@ -59,13 +62,16 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
59
62
|
screenHeight,
|
|
60
63
|
openAIClient,
|
|
61
64
|
});
|
|
65
|
+
tokensUsed.input += response.usage?.input_tokens || 0;
|
|
66
|
+
tokensUsed.output += response.usage?.output_tokens || 0;
|
|
62
67
|
let isTaskDone = false;
|
|
63
|
-
let maxIterations = 15;
|
|
64
68
|
let generatedCode = "";
|
|
65
|
-
let actionsSummary = [];
|
|
66
69
|
let iterationIndex = 0;
|
|
67
|
-
while (!isTaskDone && iterationIndex <
|
|
68
|
-
|
|
70
|
+
while (!isTaskDone && iterationIndex < MAX_ITERATIONS) {
|
|
71
|
+
result.push({
|
|
72
|
+
type: "text",
|
|
73
|
+
text: `# Agent iteration ${iterationIndex}`,
|
|
74
|
+
});
|
|
69
75
|
iterationIndex++;
|
|
70
76
|
const computerCalls = response.output.filter((item) => item.type === "computer_call");
|
|
71
77
|
const functionCalls = response.output.filter((item) => item.type === "function_call");
|
|
@@ -75,7 +81,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
75
81
|
const content = assistantOutput.content.find((item) => item.type === "output_text");
|
|
76
82
|
if (content && "text" in content) {
|
|
77
83
|
// TODO: This ignores `ResponseOutputRefusal` type (refusal from assistant)
|
|
78
|
-
|
|
84
|
+
result.push({
|
|
85
|
+
type: "text",
|
|
86
|
+
text: `Agent summary: ${content.text}`,
|
|
87
|
+
});
|
|
79
88
|
}
|
|
80
89
|
}
|
|
81
90
|
isTaskDone = true;
|
|
@@ -86,7 +95,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
86
95
|
const reasoningItem = reasoning;
|
|
87
96
|
const summaryText = reasoningItem.summary?.find((item) => item.type === "summary_text")?.text;
|
|
88
97
|
if (summaryText) {
|
|
89
|
-
|
|
98
|
+
result.push({
|
|
99
|
+
type: "text",
|
|
100
|
+
text: `Action reasoning: ${summaryText}`,
|
|
101
|
+
});
|
|
90
102
|
}
|
|
91
103
|
}
|
|
92
104
|
// We expect either a function call or a computer call in the response.
|
|
@@ -98,9 +110,15 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
98
110
|
const args = JSON.parse(functionCall.arguments);
|
|
99
111
|
const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, { type: "goto", url: args.url }, codegen);
|
|
100
112
|
executedActionSummary = actionSummary;
|
|
101
|
-
|
|
113
|
+
result.push({
|
|
114
|
+
type: "text",
|
|
115
|
+
text: `Action executed: ${actionSummary}`,
|
|
116
|
+
});
|
|
102
117
|
if (actionCode) {
|
|
103
|
-
|
|
118
|
+
result.push({
|
|
119
|
+
type: "text",
|
|
120
|
+
text: `Generated code: ${actionCode}`,
|
|
121
|
+
});
|
|
104
122
|
generatedCode += actionCode;
|
|
105
123
|
}
|
|
106
124
|
toolCallOutput = {
|
|
@@ -116,20 +134,28 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
116
134
|
// Execute the action and take a screenshot
|
|
117
135
|
const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, action, codegen);
|
|
118
136
|
executedActionSummary = actionSummary;
|
|
119
|
-
|
|
120
|
-
|
|
137
|
+
result.push({
|
|
138
|
+
type: "text",
|
|
139
|
+
text: `Action executed: ${actionSummary}`,
|
|
140
|
+
});
|
|
141
|
+
result.push({
|
|
142
|
+
type: "text",
|
|
143
|
+
text: `Generated code: ${actionCode}`,
|
|
144
|
+
});
|
|
121
145
|
generatedCode += actionCode;
|
|
122
146
|
// Allow time for changes to take effect.
|
|
123
147
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
124
|
-
const
|
|
125
|
-
|
|
126
|
-
|
|
148
|
+
const screenshot = await (0, computer_1.getScreenshot)(page);
|
|
149
|
+
result.push({
|
|
150
|
+
type: "screenshot",
|
|
151
|
+
screenshot: artifact(screenshot, actionSummary),
|
|
152
|
+
});
|
|
127
153
|
toolCallOutput = {
|
|
128
154
|
type: "computer_call_output",
|
|
129
155
|
call_id: computerCall.call_id,
|
|
130
156
|
output: {
|
|
131
157
|
type: "computer_screenshot",
|
|
132
|
-
image_url: `data:image/png;base64,${
|
|
158
|
+
image_url: `data:image/png;base64,${screenshot}`,
|
|
133
159
|
},
|
|
134
160
|
acknowledged_safety_checks: computerCall.pending_safety_checks,
|
|
135
161
|
};
|
|
@@ -155,19 +181,25 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
|
|
|
155
181
|
screenHeight,
|
|
156
182
|
openAIClient,
|
|
157
183
|
});
|
|
184
|
+
tokensUsed.input += response.usage?.input_tokens || 0;
|
|
185
|
+
tokensUsed.output += response.usage?.output_tokens || 0;
|
|
158
186
|
}
|
|
159
187
|
if (!isTaskDone) {
|
|
160
|
-
|
|
188
|
+
const logMessage = `Max iteration limit hit: Task not done after ${MAX_ITERATIONS} iterations`;
|
|
189
|
+
console.log(logMessage);
|
|
190
|
+
result.push({
|
|
191
|
+
type: "text",
|
|
192
|
+
text: logMessage,
|
|
193
|
+
});
|
|
161
194
|
}
|
|
162
|
-
trace?.update({
|
|
163
|
-
output: { code: generatedCode, actionsSummary: actionsSummary.join("\n") },
|
|
164
|
-
});
|
|
195
|
+
trace?.update({ output: { result } });
|
|
165
196
|
return {
|
|
166
|
-
|
|
197
|
+
result,
|
|
167
198
|
code: generatedCode,
|
|
168
|
-
// TODO: Does not support skills (from helper methods in pages/ dir),
|
|
169
|
-
// and therefore, import paths are empty
|
|
170
199
|
importPaths: [],
|
|
171
|
-
|
|
200
|
+
usage: {
|
|
201
|
+
tokens: tokensUsed,
|
|
202
|
+
cost: (0, model_1.tokensToCost)(tokensUsed),
|
|
203
|
+
},
|
|
172
204
|
};
|
|
173
205
|
}
|
|
@@ -7,4 +7,11 @@ export declare function callComputerUseModel({ input, previousResponseId, screen
|
|
|
7
7
|
screenHeight: number;
|
|
8
8
|
openAIClient: OpenAI;
|
|
9
9
|
}): Promise<Response>;
|
|
10
|
+
export declare function tokensToCost(tokens: {
|
|
11
|
+
input: number;
|
|
12
|
+
output: number;
|
|
13
|
+
}): {
|
|
14
|
+
input: number;
|
|
15
|
+
output: number;
|
|
16
|
+
};
|
|
10
17
|
//# sourceMappingURL=model.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
|
|
1
|
+
{"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
|
package/dist/agent/cua/model.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.callComputerUseModel = callComputerUseModel;
|
|
4
|
+
exports.tokensToCost = tokensToCost;
|
|
4
5
|
const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
|
|
5
6
|
Don't ask the user for confirmations - just execute the actions.
|
|
6
7
|
|
|
@@ -50,3 +51,12 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
|
|
|
50
51
|
});
|
|
51
52
|
return response;
|
|
52
53
|
}
|
|
54
|
+
function tokensToCost(tokens) {
|
|
55
|
+
// Costs for "computer-use-preview-2025-03-11"
|
|
56
|
+
// https://platform.openai.com/docs/models/computer-use-preview
|
|
57
|
+
const inputUsdFor1MTokens = 3.0;
|
|
58
|
+
const outputUsdFor1MTokens = 12.0;
|
|
59
|
+
const inputCost = (tokens.input / 1_000_000) * inputUsdFor1MTokens;
|
|
60
|
+
const outputCost = (tokens.output / 1_000_000) * outputUsdFor1MTokens;
|
|
61
|
+
return { input: inputCost, output: outputCost };
|
|
62
|
+
}
|
package/dist/bin/index.js
CHANGED
|
@@ -182,7 +182,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
|
|
|
182
182
|
repoDir: process.cwd(),
|
|
183
183
|
testFilePath: specPath,
|
|
184
184
|
});
|
|
185
|
-
const { isError, error } = await (0, run_1.
|
|
185
|
+
const { isError, error } = await (0, run_1.runBrowsingAgent)({
|
|
186
186
|
testCaseName: testCase.name,
|
|
187
187
|
testCaseSuites: testCase.suites,
|
|
188
188
|
testFilePath: specPath,
|
|
@@ -212,7 +212,7 @@ async function main() {
|
|
|
212
212
|
.option("--use-chat", "Use chat agent (and not the workflow)")
|
|
213
213
|
.option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
|
|
214
214
|
.option("--use-disk-for-chat-state", "Save and load chat state from disk")
|
|
215
|
-
.option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-
|
|
215
|
+
.option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-06-05)")
|
|
216
216
|
.option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
|
|
217
217
|
.option("--with-retry", "Use the retry strategy")
|
|
218
218
|
.parse(process.argv);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAGjE,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAajE,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
|
package/dist/bin/utils/index.js
CHANGED
|
@@ -13,9 +13,9 @@ exports.ARGS_TO_MODEL_MAP = {
|
|
|
13
13
|
"claude-4": "claude-sonnet-4-20250514",
|
|
14
14
|
"claude-sonnet-4": "claude-sonnet-4-20250514",
|
|
15
15
|
"claude-opus-4": "claude-opus-4-20250514",
|
|
16
|
-
"gemini-2.5": "gemini-2.5-pro-preview-
|
|
17
|
-
"gemini-2.5-pro": "gemini-2.5-pro-preview-
|
|
18
|
-
"gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-
|
|
16
|
+
"gemini-2.5": "gemini-2.5-pro-preview-06-05",
|
|
17
|
+
"gemini-2.5-pro": "gemini-2.5-pro-preview-06-05",
|
|
18
|
+
"gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-06-05",
|
|
19
19
|
"o4-mini": "o4-mini-2025-04-16",
|
|
20
20
|
"o4-mini-2025-04-16": "o4-mini-2025-04-16",
|
|
21
21
|
};
|
package/dist/file/client.d.ts
CHANGED
|
@@ -1,17 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { BrowserAgentIPCPayload } from "./server";
|
|
2
2
|
declare class FileServiceClient {
|
|
3
3
|
baseUrl: string;
|
|
4
4
|
port: number | undefined;
|
|
5
5
|
constructor();
|
|
6
6
|
static isAvailable(): boolean;
|
|
7
|
-
|
|
8
|
-
generatedCode: string;
|
|
9
|
-
task: string;
|
|
10
|
-
importPaths: string[];
|
|
11
|
-
actionsSummary?: string;
|
|
12
|
-
}): Promise<any>;
|
|
7
|
+
sendAgentResult(payload: BrowserAgentIPCPayload): Promise<any>;
|
|
13
8
|
post(path: string, body: any): Promise<any>;
|
|
14
|
-
sendArtifactInputsToServer(artifacts: ArtifactInput[]): Promise<any>;
|
|
15
9
|
}
|
|
16
10
|
export default FileServiceClient;
|
|
17
11
|
//# sourceMappingURL=client.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAElD,cAAM,iBAAiB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;IAUzB,MAAM,CAAC,WAAW;IAIZ,eAAe,CAAC,OAAO,EAAE,sBAAsB;IAI/C,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;CAgBnC;AAED,eAAe,iBAAiB,CAAC"}
|
package/dist/file/client.js
CHANGED
|
@@ -13,13 +13,8 @@ class FileServiceClient {
|
|
|
13
13
|
static isAvailable() {
|
|
14
14
|
return !!Number(process.env.IPC_FILE_SERVICE_PORT);
|
|
15
15
|
}
|
|
16
|
-
async
|
|
17
|
-
return this.post("/
|
|
18
|
-
generatedCode,
|
|
19
|
-
task,
|
|
20
|
-
importPaths,
|
|
21
|
-
actionsSummary,
|
|
22
|
-
});
|
|
16
|
+
async sendAgentResult(payload) {
|
|
17
|
+
return this.post("/agent-results", payload);
|
|
23
18
|
}
|
|
24
19
|
async post(path, body) {
|
|
25
20
|
const resp = await fetch(`${this.baseUrl}${path}`, {
|
|
@@ -37,21 +32,5 @@ class FileServiceClient {
|
|
|
37
32
|
return data;
|
|
38
33
|
}
|
|
39
34
|
}
|
|
40
|
-
async sendArtifactInputsToServer(artifacts) {
|
|
41
|
-
const resp = await fetch(`${this.baseUrl}/artifact`, {
|
|
42
|
-
method: "POST",
|
|
43
|
-
headers: {
|
|
44
|
-
"Content-Type": "application/json",
|
|
45
|
-
},
|
|
46
|
-
body: JSON.stringify(artifacts),
|
|
47
|
-
});
|
|
48
|
-
if (!resp.ok) {
|
|
49
|
-
throw new Error(`API failed with status ${resp.statusText}`);
|
|
50
|
-
}
|
|
51
|
-
else {
|
|
52
|
-
const data = await resp.json();
|
|
53
|
-
return data;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
35
|
}
|
|
57
36
|
exports.default = FileServiceClient;
|