@empiricalrun/test-gen 0.64.3 → 0.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/dist/agent/browsing/run.d.ts +6 -4
  3. package/dist/agent/browsing/run.d.ts.map +1 -1
  4. package/dist/agent/browsing/run.js +9 -9
  5. package/dist/agent/chat/agent-loop.d.ts +2 -1
  6. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  7. package/dist/agent/chat/exports.d.ts +2 -2
  8. package/dist/agent/chat/exports.d.ts.map +1 -1
  9. package/dist/agent/chat/index.d.ts +1 -1
  10. package/dist/agent/chat/index.d.ts.map +1 -1
  11. package/dist/agent/chat/index.js +16 -1
  12. package/dist/agent/chat/models.d.ts +1 -1
  13. package/dist/agent/chat/models.d.ts.map +1 -1
  14. package/dist/agent/chat/state.d.ts +2 -2
  15. package/dist/agent/chat/state.d.ts.map +1 -1
  16. package/dist/agent/chat/utils.d.ts +2 -1
  17. package/dist/agent/chat/utils.d.ts.map +1 -1
  18. package/dist/agent/cua/index.d.ts +10 -3
  19. package/dist/agent/cua/index.d.ts.map +1 -1
  20. package/dist/agent/cua/index.js +61 -29
  21. package/dist/agent/cua/model.d.ts +7 -0
  22. package/dist/agent/cua/model.d.ts.map +1 -1
  23. package/dist/agent/cua/model.js +10 -0
  24. package/dist/bin/index.js +2 -2
  25. package/dist/bin/utils/index.d.ts +1 -1
  26. package/dist/bin/utils/index.d.ts.map +1 -1
  27. package/dist/bin/utils/index.js +3 -3
  28. package/dist/file/client.d.ts +2 -8
  29. package/dist/file/client.d.ts.map +1 -1
  30. package/dist/file/client.js +2 -23
  31. package/dist/file/server.d.ts +15 -3
  32. package/dist/file/server.d.ts.map +1 -1
  33. package/dist/file/server.js +17 -28
  34. package/dist/index.d.ts.map +1 -1
  35. package/dist/index.js +6 -13
  36. package/dist/tool-call-service/index.d.ts +3 -3
  37. package/dist/tool-call-service/index.d.ts.map +1 -1
  38. package/dist/tool-call-service/index.js +4 -3
  39. package/dist/tool-call-service/utils.d.ts +3 -10
  40. package/dist/tool-call-service/utils.d.ts.map +1 -1
  41. package/dist/tool-call-service/utils.js +21 -5
  42. package/dist/tools/commit-and-create-pr.d.ts.map +1 -1
  43. package/dist/tools/commit-and-create-pr.js +1 -0
  44. package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
  45. package/dist/tools/diagnosis-fetcher.js +1 -0
  46. package/dist/tools/download-build.d.ts.map +1 -1
  47. package/dist/tools/download-build.js +1 -0
  48. package/dist/tools/grep/index.d.ts.map +1 -1
  49. package/dist/tools/grep/index.js +1 -0
  50. package/dist/tools/list-environments.d.ts +3 -0
  51. package/dist/tools/list-environments.d.ts.map +1 -0
  52. package/dist/tools/list-environments.js +49 -0
  53. package/dist/tools/str_replace_editor.d.ts.map +1 -1
  54. package/dist/tools/str_replace_editor.js +4 -0
  55. package/dist/tools/test-gen-browser.d.ts +1 -1
  56. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  57. package/dist/tools/test-gen-browser.js +50 -12
  58. package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
  59. package/dist/tools/test-run-fetcher/index.js +1 -0
  60. package/dist/tools/test-run.d.ts.map +1 -1
  61. package/dist/tools/test-run.js +1 -0
  62. package/package.json +3 -3
  63. package/tsconfig.tsbuildinfo +1 -1
  64. package/dist/tools/environment-crud.d.ts +0 -4
  65. package/dist/tools/environment-crud.d.ts.map +0 -1
  66. package/dist/tools/environment-crud.js +0 -100
package/CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.65.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 4623300: feat: added browser queue to add concurrency and avoid blocking of server due to browser dependent tools
8
+
9
+ ### Patch Changes
10
+
11
+ - 82acf53: feat: enable tools to contribute to cost calculations
12
+ - 73dd841: feat: change getEnvironment tool to become listEnvironments
13
+ - 8233d49: chore: remove ecs feature flag from Dashboard
14
+ - 0b55884: feat: browser agent tool call returns images in tool result
15
+ - 459d029: feat: pass feature flags to tool calls
16
+ - 7712b2e: chore: move more types to shared-types package
17
+ - 1b08d58: feat: tool response interface supports images for claude
18
+ - 1b9087e: feat: improve feature flags ui, upgrade gemini-pro
19
+ - Updated dependencies [82acf53]
20
+ - Updated dependencies [1177d63]
21
+ - Updated dependencies [7712b2e]
22
+ - Updated dependencies [fb32af6]
23
+ - Updated dependencies [1b08d58]
24
+ - Updated dependencies [1b9087e]
25
+ - @empiricalrun/llm@0.18.0
26
+
3
27
  ## 0.64.3
4
28
 
5
29
  ### Patch Changes
@@ -1,4 +1,5 @@
1
- import { ArtifactInput } from "@empiricalrun/shared-types";
1
+ import { ArtifactInput, Usage } from "@empiricalrun/shared-types";
2
+ import { BrowserAgentResult } from "../cua";
2
3
  type GenerateTestsType = {
3
4
  testCaseName: string;
4
5
  testCaseSuites: string[];
@@ -16,11 +17,12 @@ export declare function convertProjectsFilterToProject({ pwProjectsFilter, repoD
16
17
  repoDir: string;
17
18
  testFilePath: string;
18
19
  }): Promise<string>;
19
- export declare function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
20
+ export declare function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
20
21
  isError: boolean;
21
22
  error: string;
22
- actionsSummary?: string;
23
- artifacts?: ArtifactInput[];
23
+ result: BrowserAgentResult | undefined;
24
+ usage: Usage | undefined;
25
+ artifacts: ArtifactInput[];
24
26
  }>;
25
27
  export {};
26
28
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAgB3D,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,aAAa,EAAE,CAAC;CAC7B,CAAC,CAqGD"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAclE,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAG5C,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,gBAAgB,CAAC,EACrC,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,GAAG,SAAS,CAAC;IACvC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC;IACzB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAkGD"}
@@ -4,7 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.convertProjectsFilterToProject = convertProjectsFilterToProject;
7
- exports.generateTestsUsingMasterAgent = generateTestsUsingMasterAgent;
7
+ exports.runBrowsingAgent = runBrowsingAgent;
8
8
  const test_run_1 = require("@empiricalrun/test-run");
9
9
  const detect_port_1 = __importDefault(require("detect-port"));
10
10
  const fs_1 = __importDefault(require("fs"));
@@ -19,7 +19,7 @@ async function convertProjectsFilterToProject({ pwProjectsFilter, repoDir, testF
19
19
  const project = await (0, utils_2.detectProjectName)(testFilePath, playwrightConfig, pwProjectsFilter);
20
20
  return project;
21
21
  }
22
- async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
22
+ async function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
23
23
  const absTestFilePath = path_1.default.join(repoDir, testFilePath);
24
24
  if (!fs_1.default.existsSync(absTestFilePath)) {
25
25
  const errorMsg = `File for master agent to run not found: ${testFilePath}`;
@@ -79,7 +79,6 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
79
79
  error = `Failed to generate test using master agent ${e}`;
80
80
  console.error(`[generateTestsUsingMasterAgent] ${error}`);
81
81
  }
82
- let artifacts = [];
83
82
  if (error) {
84
83
  // Clean up the file if there is any error
85
84
  try {
@@ -92,16 +91,17 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
92
91
  console.error(`[generateTestsUsingMasterAgent] Failed to remove extra scripts from files post test gen error:`, e);
93
92
  }
94
93
  }
95
- artifacts = [...artifacts, ...(0, utils_1.findPlaywrightArtifacts)(repoDir)];
96
- const serverArtifacts = fileServer.getArtifactInputsFromServer() || [];
97
- artifacts = [...serverArtifacts, ...artifacts];
94
+ const artifacts = [
95
+ ...((0, utils_1.findPlaywrightArtifacts)(repoDir) || []),
96
+ ...(fileServer.getArtifactInputsFromServer() || []),
97
+ ];
98
98
  await fileServer.stop();
99
+ const { result, usage } = fileServer.getResultAndUsage();
99
100
  return {
100
101
  isError: !!error,
101
102
  error: error || "",
102
- actionsSummary: fileServer.getActionsSummary() ||
103
- error ||
104
- "Unknown error, there was no summary or error reported",
103
+ result,
105
104
  artifacts,
105
+ usage,
106
106
  };
107
107
  }
@@ -1,5 +1,6 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
+ import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
3
4
  import { ToolCallService } from "../../tool-call-service";
4
5
  import { FileInfo } from "../../types";
5
6
  import { ReporterFunction } from "./types";
@@ -1 +1 @@
1
- {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,UAAU,EACV,mBAAmB,EAEpB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAEjE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
@@ -1,9 +1,9 @@
1
- import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS, type SupportedChatModels } from "@empiricalrun/llm/chat";
1
+ import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS } from "@empiricalrun/llm/chat";
2
2
  import { FileInfo } from "../../types";
3
3
  import { chatAgentLoop } from "./agent-loop";
4
4
  import { defaultModel } from "./models";
5
5
  import { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatStateFromModel, createChatState, createChatStateForMessages, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState } from "./state";
6
6
  import { ReporterFunction } from "./types";
7
7
  export { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatAgentLoop, chatStateFromModel, createChatModel, createChatState, createChatStateForMessages, defaultModel, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState, SUPPORTED_CHAT_MODELS, };
8
- export type { FileInfo, IChatModel, ReporterFunction, SupportedChatModels };
8
+ export type { FileInfo, IChatModel, ReporterFunction };
9
9
  //# sourceMappingURL=exports.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACrB,KAAK,mBAAmB,EACzB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,CAAC"}
1
+ {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC"}
@@ -1,4 +1,4 @@
1
- import { SupportedChatModels } from "@empiricalrun/llm/chat";
1
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
2
2
  export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }: {
3
3
  selectedModel: SupportedChatModels;
4
4
  useDiskForChatState: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AACA,OAAO,EAGL,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAyChC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,GACV,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,iBAwHA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAGL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA8DpC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,GACV,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,iBAwHA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
@@ -21,6 +21,21 @@ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
21
21
  (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
22
22
  }
23
23
  }
24
+ async function fetchEnvironmentVariables() {
25
+ // TODO: Wrap in try-catch and log error
26
+ const response = await fetch(`${DASHBOARD_DOMAIN}/api/environment-variables`, {
27
+ headers: {
28
+ "Content-Type": "application/json",
29
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
30
+ },
31
+ });
32
+ const data = await response.json();
33
+ const envVars = data.data.environment_variables.reduce((acc, envVar) => {
34
+ acc[envVar.name] = envVar.value;
35
+ return acc;
36
+ }, {});
37
+ return envVars;
38
+ }
24
39
  async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
25
40
  let chatState;
26
41
  if (useDiskForChatState) {
@@ -111,7 +126,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
111
126
  apiKey: process.env.EMPIRICALRUN_API_KEY,
112
127
  trace,
113
128
  featureFlags: [],
114
- environmentOverrides: {},
129
+ environmentOverrides: await fetchEnvironmentVariables(),
115
130
  });
116
131
  const fileInfo = await (0, file_tree_1.getFileInfoFromFS)(process.cwd());
117
132
  await (0, agent_loop_1.chatAgentLoop)({
@@ -1,4 +1,4 @@
1
- import { type SupportedChatModels } from "@empiricalrun/llm/chat";
1
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
2
2
  export declare const defaultModel: SupportedChatModels;
3
3
  export declare const modelLabels: Record<SupportedChatModels, string>;
4
4
  //# sourceMappingURL=models.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,wBAAwB,CAAC;AAWhC,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAWjE,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
@@ -1,5 +1,5 @@
1
- import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
- import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { CanonicalMessage, ChatState, ChatStateError, SupportedChatModels } from "@empiricalrun/shared-types";
3
3
  export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
4
4
  export declare const LATEST_CHAT_STATE_VERSION = "0.1";
5
5
  export declare const CHAT_STATE_PATH: string;
@@ -1 +1 @@
1
- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACf,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
@@ -1,5 +1,6 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
+ import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
3
4
  import { ReporterFunction } from "./types";
4
5
  export declare const log: (...args: any[]) => void;
5
6
  export declare function getModelName(model: string): string;
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAEL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
@@ -1,6 +1,13 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { ArtifactInput } from "@empiricalrun/shared-types";
2
+ import { ArtifactInputData, Usage } from "@empiricalrun/shared-types";
3
3
  import { Page } from "playwright";
4
+ export type BrowserAgentResult = Array<{
5
+ type: "text";
6
+ text: string;
7
+ } | {
8
+ type: "screenshot";
9
+ screenshot: ArtifactInputData;
10
+ }>;
4
11
  export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
5
12
  page: Page;
6
13
  task: string;
@@ -8,7 +15,7 @@ export declare function createTestUsingComputerUseAgent({ page, task, trace, }:
8
15
  }): Promise<{
9
16
  code: string;
10
17
  importPaths: string[];
11
- actionsSummary: string;
12
- artifacts: ArtifactInput[];
18
+ result: BrowserAgentResult;
19
+ usage: Usage;
13
20
  }>;
14
21
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAS3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAwBlC,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAyLD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAwBlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
@@ -9,26 +9,29 @@ const openai_1 = __importDefault(require("openai"));
9
9
  const computer_1 = require("./computer");
10
10
  const model_1 = require("./model");
11
11
  const element_from_point_1 = require("./pw-codegen/element-from-point");
12
+ const MAX_ITERATIONS = 15;
12
13
  function getCodegen() {
13
14
  return new element_from_point_1.ElementFromPointCodegen();
14
15
  // TODO: Add support for page.pause approach
15
16
  // We can use PlaywrightPauseCodegen if playwright patch was successful,
16
17
  // IPC port is available and PW_CODEGEN_NO_INSPECTOR env var is set
17
18
  }
18
- function getStructuredArtifactInput(screenshotBytes, actionName) {
19
+ function artifact(screenshot, name) {
19
20
  return {
20
- name: `${actionName}`,
21
+ name: `${name}`,
21
22
  contentType: "image/png",
22
- data: Buffer.from(screenshotBytes, "base64"),
23
+ data: Buffer.from(screenshot, "base64"),
23
24
  };
24
25
  }
25
26
  async function createTestUsingComputerUseAgent({ page, task, trace, }) {
26
27
  const codegen = getCodegen();
27
28
  await codegen.initialize(page);
28
- const screenshotBytes = await (0, computer_1.getScreenshot)(page);
29
- const artifacts = [
30
- getStructuredArtifactInput(screenshotBytes, "Initial Screen"),
29
+ const screenshot = await (0, computer_1.getScreenshot)(page);
30
+ const initialArtifact = artifact(screenshot, "Initial screen");
31
+ let result = [
32
+ { type: "screenshot", screenshot: initialArtifact },
31
33
  ];
34
+ let tokensUsed = { input: 0, output: 0 };
32
35
  const viewport = page.viewportSize();
33
36
  let screenWidth = viewport?.width || 1280;
34
37
  let screenHeight = viewport?.height || 720;
@@ -49,7 +52,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
49
52
  },
50
53
  {
51
54
  type: "input_image",
52
- image_url: `data:image/png;base64,${screenshotBytes}`,
55
+ image_url: `data:image/png;base64,${screenshot}`,
53
56
  detail: "high",
54
57
  },
55
58
  ],
@@ -59,13 +62,16 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
59
62
  screenHeight,
60
63
  openAIClient,
61
64
  });
65
+ tokensUsed.input += response.usage?.input_tokens || 0;
66
+ tokensUsed.output += response.usage?.output_tokens || 0;
62
67
  let isTaskDone = false;
63
- let maxIterations = 15;
64
68
  let generatedCode = "";
65
- let actionsSummary = [];
66
69
  let iterationIndex = 0;
67
- while (!isTaskDone && iterationIndex < maxIterations) {
68
- actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
70
+ while (!isTaskDone && iterationIndex < MAX_ITERATIONS) {
71
+ result.push({
72
+ type: "text",
73
+ text: `# Agent iteration ${iterationIndex}`,
74
+ });
69
75
  iterationIndex++;
70
76
  const computerCalls = response.output.filter((item) => item.type === "computer_call");
71
77
  const functionCalls = response.output.filter((item) => item.type === "function_call");
@@ -75,7 +81,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
75
81
  const content = assistantOutput.content.find((item) => item.type === "output_text");
76
82
  if (content && "text" in content) {
77
83
  // TODO: This ignores `ResponseOutputRefusal` type (refusal from assistant)
78
- actionsSummary.push(`Agent summary: ${content.text}`);
84
+ result.push({
85
+ type: "text",
86
+ text: `Agent summary: ${content.text}`,
87
+ });
79
88
  }
80
89
  }
81
90
  isTaskDone = true;
@@ -86,7 +95,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
86
95
  const reasoningItem = reasoning;
87
96
  const summaryText = reasoningItem.summary?.find((item) => item.type === "summary_text")?.text;
88
97
  if (summaryText) {
89
- actionsSummary.push(`Action reasoning: ${summaryText}`);
98
+ result.push({
99
+ type: "text",
100
+ text: `Action reasoning: ${summaryText}`,
101
+ });
90
102
  }
91
103
  }
92
104
  // We expect either a function call or a computer call in the response.
@@ -98,9 +110,15 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
98
110
  const args = JSON.parse(functionCall.arguments);
99
111
  const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, { type: "goto", url: args.url }, codegen);
100
112
  executedActionSummary = actionSummary;
101
- actionsSummary.push(`Action executed: ${actionSummary}`);
113
+ result.push({
114
+ type: "text",
115
+ text: `Action executed: ${actionSummary}`,
116
+ });
102
117
  if (actionCode) {
103
- actionsSummary.push(`Generated code: ${actionCode}`);
118
+ result.push({
119
+ type: "text",
120
+ text: `Generated code: ${actionCode}`,
121
+ });
104
122
  generatedCode += actionCode;
105
123
  }
106
124
  toolCallOutput = {
@@ -116,20 +134,28 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
116
134
  // Execute the action and take a screenshot
117
135
  const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, action, codegen);
118
136
  executedActionSummary = actionSummary;
119
- actionsSummary.push(`Action executed: ${actionSummary}`);
120
- actionsSummary.push(`Generated code: ${actionCode}`);
137
+ result.push({
138
+ type: "text",
139
+ text: `Action executed: ${actionSummary}`,
140
+ });
141
+ result.push({
142
+ type: "text",
143
+ text: `Generated code: ${actionCode}`,
144
+ });
121
145
  generatedCode += actionCode;
122
146
  // Allow time for changes to take effect.
123
147
  await new Promise((resolve) => setTimeout(resolve, 1000));
124
- const screenshotBytes = await (0, computer_1.getScreenshot)(page);
125
- artifacts.push(getStructuredArtifactInput(screenshotBytes, actionSummary));
126
- // Populate toolCallOutput
148
+ const screenshot = await (0, computer_1.getScreenshot)(page);
149
+ result.push({
150
+ type: "screenshot",
151
+ screenshot: artifact(screenshot, actionSummary),
152
+ });
127
153
  toolCallOutput = {
128
154
  type: "computer_call_output",
129
155
  call_id: computerCall.call_id,
130
156
  output: {
131
157
  type: "computer_screenshot",
132
- image_url: `data:image/png;base64,${screenshotBytes}`,
158
+ image_url: `data:image/png;base64,${screenshot}`,
133
159
  },
134
160
  acknowledged_safety_checks: computerCall.pending_safety_checks,
135
161
  };
@@ -155,19 +181,25 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
155
181
  screenHeight,
156
182
  openAIClient,
157
183
  });
184
+ tokensUsed.input += response.usage?.input_tokens || 0;
185
+ tokensUsed.output += response.usage?.output_tokens || 0;
158
186
  }
159
187
  if (!isTaskDone) {
160
- actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
188
+ const logMessage = `Max iteration limit hit: Task not done after ${MAX_ITERATIONS} iterations`;
189
+ console.log(logMessage);
190
+ result.push({
191
+ type: "text",
192
+ text: logMessage,
193
+ });
161
194
  }
162
- trace?.update({
163
- output: { code: generatedCode, actionsSummary: actionsSummary.join("\n") },
164
- });
195
+ trace?.update({ output: { result } });
165
196
  return {
166
- actionsSummary: actionsSummary.join("\n"),
197
+ result,
167
198
  code: generatedCode,
168
- // TODO: Does not support skills (from helper methods in pages/ dir),
169
- // and therefore, import paths are empty
170
199
  importPaths: [],
171
- artifacts,
200
+ usage: {
201
+ tokens: tokensUsed,
202
+ cost: (0, model_1.tokensToCost)(tokensUsed),
203
+ },
172
204
  };
173
205
  }
@@ -7,4 +7,11 @@ export declare function callComputerUseModel({ input, previousResponseId, screen
7
7
  screenHeight: number;
8
8
  openAIClient: OpenAI;
9
9
  }): Promise<Response>;
10
+ export declare function tokensToCost(tokens: {
11
+ input: number;
12
+ output: number;
13
+ }): {
14
+ input: number;
15
+ output: number;
16
+ };
10
17
  //# sourceMappingURL=model.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.callComputerUseModel = callComputerUseModel;
4
+ exports.tokensToCost = tokensToCost;
4
5
  const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
5
6
  Don't ask the user for confirmations - just execute the actions.
6
7
 
@@ -50,3 +51,12 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
50
51
  });
51
52
  return response;
52
53
  }
54
+ function tokensToCost(tokens) {
55
+ // Costs for "computer-use-preview-2025-03-11"
56
+ // https://platform.openai.com/docs/models/computer-use-preview
57
+ const inputUsdFor1MTokens = 3.0;
58
+ const outputUsdFor1MTokens = 12.0;
59
+ const inputCost = (tokens.input / 1_000_000) * inputUsdFor1MTokens;
60
+ const outputCost = (tokens.output / 1_000_000) * outputUsdFor1MTokens;
61
+ return { input: inputCost, output: outputCost };
62
+ }
package/dist/bin/index.js CHANGED
@@ -182,7 +182,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
182
182
  repoDir: process.cwd(),
183
183
  testFilePath: specPath,
184
184
  });
185
- const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
185
+ const { isError, error } = await (0, run_1.runBrowsingAgent)({
186
186
  testCaseName: testCase.name,
187
187
  testCaseSuites: testCase.suites,
188
188
  testFilePath: specPath,
@@ -212,7 +212,7 @@ async function main() {
212
212
  .option("--use-chat", "Use chat agent (and not the workflow)")
213
213
  .option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
214
214
  .option("--use-disk-for-chat-state", "Save and load chat state from disk")
215
- .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
215
+ .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-06-05)")
216
216
  .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
217
217
  .option("--with-retry", "Use the retry strategy")
218
218
  .parse(process.argv);
@@ -1,4 +1,4 @@
1
- import { SupportedChatModels } from "@empiricalrun/llm/chat";
1
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
2
2
  export declare const ARGS_TO_MODEL_MAP: Record<string, SupportedChatModels>;
3
3
  export interface CLIOptions {
4
4
  token?: string;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAG7D,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAajE,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAGjE,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAajE,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
@@ -13,9 +13,9 @@ exports.ARGS_TO_MODEL_MAP = {
13
13
  "claude-4": "claude-sonnet-4-20250514",
14
14
  "claude-sonnet-4": "claude-sonnet-4-20250514",
15
15
  "claude-opus-4": "claude-opus-4-20250514",
16
- "gemini-2.5": "gemini-2.5-pro-preview-03-25",
17
- "gemini-2.5-pro": "gemini-2.5-pro-preview-03-25",
18
- "gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-03-25",
16
+ "gemini-2.5": "gemini-2.5-pro-preview-06-05",
17
+ "gemini-2.5-pro": "gemini-2.5-pro-preview-06-05",
18
+ "gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-06-05",
19
19
  "o4-mini": "o4-mini-2025-04-16",
20
20
  "o4-mini-2025-04-16": "o4-mini-2025-04-16",
21
21
  };
@@ -1,17 +1,11 @@
1
- import { ArtifactInput } from "@empiricalrun/shared-types";
1
+ import { BrowserAgentIPCPayload } from "./server";
2
2
  declare class FileServiceClient {
3
3
  baseUrl: string;
4
4
  port: number | undefined;
5
5
  constructor();
6
6
  static isAvailable(): boolean;
7
- updateTest({ generatedCode, task, importPaths, actionsSummary, }: {
8
- generatedCode: string;
9
- task: string;
10
- importPaths: string[];
11
- actionsSummary?: string;
12
- }): Promise<any>;
7
+ sendAgentResult(payload: BrowserAgentIPCPayload): Promise<any>;
13
8
  post(path: string, body: any): Promise<any>;
14
- sendArtifactInputsToServer(artifacts: ArtifactInput[]): Promise<any>;
15
9
  }
16
10
  export default FileServiceClient;
17
11
  //# sourceMappingURL=client.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAE3D,cAAM,iBAAiB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;IAUzB,MAAM,CAAC,WAAW;IAIZ,UAAU,CAAC,EACf,aAAa,EACb,IAAI,EACJ,WAAW,EACX,cAAc,GACf,EAAE;QACD,aAAa,EAAE,MAAM,CAAC;QACtB,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,EAAE,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB;IASK,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;IAiB5B,0BAA0B,CAAC,SAAS,EAAE,aAAa,EAAE;CAgB5D;AAED,eAAe,iBAAiB,CAAC"}
1
+ {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAElD,cAAM,iBAAiB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;IAUzB,MAAM,CAAC,WAAW;IAIZ,eAAe,CAAC,OAAO,EAAE,sBAAsB;IAI/C,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;CAgBnC;AAED,eAAe,iBAAiB,CAAC"}
@@ -13,13 +13,8 @@ class FileServiceClient {
13
13
  static isAvailable() {
14
14
  return !!Number(process.env.IPC_FILE_SERVICE_PORT);
15
15
  }
16
- async updateTest({ generatedCode, task, importPaths, actionsSummary, }) {
17
- return this.post("/test", {
18
- generatedCode,
19
- task,
20
- importPaths,
21
- actionsSummary,
22
- });
16
+ async sendAgentResult(payload) {
17
+ return this.post("/agent-results", payload);
23
18
  }
24
19
  async post(path, body) {
25
20
  const resp = await fetch(`${this.baseUrl}${path}`, {
@@ -37,21 +32,5 @@ class FileServiceClient {
37
32
  return data;
38
33
  }
39
34
  }
40
- async sendArtifactInputsToServer(artifacts) {
41
- const resp = await fetch(`${this.baseUrl}/artifact`, {
42
- method: "POST",
43
- headers: {
44
- "Content-Type": "application/json",
45
- },
46
- body: JSON.stringify(artifacts),
47
- });
48
- if (!resp.ok) {
49
- throw new Error(`API failed with status ${resp.statusText}`);
50
- }
51
- else {
52
- const data = await resp.json();
53
- return data;
54
- }
55
- }
56
35
  }
57
36
  exports.default = FileServiceClient;