@empiricalrun/test-gen 0.64.3 → 0.66.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +49 -0
  2. package/dist/agent/browsing/run.d.ts +6 -4
  3. package/dist/agent/browsing/run.d.ts.map +1 -1
  4. package/dist/agent/browsing/run.js +10 -9
  5. package/dist/agent/chat/agent-loop.d.ts +2 -1
  6. package/dist/agent/chat/agent-loop.d.ts.map +1 -1
  7. package/dist/agent/chat/exports.d.ts +2 -2
  8. package/dist/agent/chat/exports.d.ts.map +1 -1
  9. package/dist/agent/chat/index.d.ts +2 -3
  10. package/dist/agent/chat/index.d.ts.map +1 -1
  11. package/dist/agent/chat/index.js +23 -5
  12. package/dist/agent/chat/models.d.ts +1 -1
  13. package/dist/agent/chat/models.d.ts.map +1 -1
  14. package/dist/agent/chat/state.d.ts +2 -2
  15. package/dist/agent/chat/state.d.ts.map +1 -1
  16. package/dist/agent/chat/utils.d.ts +2 -1
  17. package/dist/agent/chat/utils.d.ts.map +1 -1
  18. package/dist/agent/cua/computer.js +1 -1
  19. package/dist/agent/cua/index.d.ts +10 -3
  20. package/dist/agent/cua/index.d.ts.map +1 -1
  21. package/dist/agent/cua/index.js +71 -34
  22. package/dist/agent/cua/model.d.ts +7 -0
  23. package/dist/agent/cua/model.d.ts.map +1 -1
  24. package/dist/agent/cua/model.js +10 -0
  25. package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts +7 -5
  26. package/dist/agent/cua/pw-codegen/pw-pause/index.d.ts.map +1 -1
  27. package/dist/agent/cua/pw-codegen/pw-pause/index.js +57 -29
  28. package/dist/agent/cua/pw-codegen/pw-pause/ipc.d.ts +3 -0
  29. package/dist/agent/cua/pw-codegen/pw-pause/ipc.d.ts.map +1 -0
  30. package/dist/agent/cua/pw-codegen/pw-pause/ipc.js +13 -0
  31. package/dist/agent/cua/pw-codegen/pw-pause/{utils.d.ts → patch.d.ts} +4 -11
  32. package/dist/agent/cua/pw-codegen/pw-pause/patch.d.ts.map +1 -0
  33. package/dist/agent/cua/pw-codegen/pw-pause/{utils.js → patch.js} +49 -3
  34. package/dist/bin/index.js +3 -5
  35. package/dist/bin/utils/index.d.ts +1 -2
  36. package/dist/bin/utils/index.d.ts.map +1 -1
  37. package/dist/bin/utils/index.js +5 -3
  38. package/dist/file/client.d.ts +2 -8
  39. package/dist/file/client.d.ts.map +1 -1
  40. package/dist/file/client.js +2 -23
  41. package/dist/file/server.d.ts +15 -3
  42. package/dist/file/server.d.ts.map +1 -1
  43. package/dist/file/server.js +17 -28
  44. package/dist/index.d.ts.map +1 -1
  45. package/dist/index.js +6 -13
  46. package/dist/tool-call-service/index.d.ts +3 -3
  47. package/dist/tool-call-service/index.d.ts.map +1 -1
  48. package/dist/tool-call-service/index.js +6 -3
  49. package/dist/tool-call-service/utils.d.ts +4 -10
  50. package/dist/tool-call-service/utils.d.ts.map +1 -1
  51. package/dist/tool-call-service/utils.js +28 -5
  52. package/dist/tools/commit-and-create-pr.d.ts +0 -6
  53. package/dist/tools/commit-and-create-pr.d.ts.map +1 -1
  54. package/dist/tools/commit-and-create-pr.js +21 -40
  55. package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
  56. package/dist/tools/diagnosis-fetcher.js +1 -0
  57. package/dist/tools/download-build.d.ts.map +1 -1
  58. package/dist/tools/download-build.js +1 -0
  59. package/dist/tools/grep/index.d.ts.map +1 -1
  60. package/dist/tools/grep/index.js +1 -0
  61. package/dist/tools/list-environments.d.ts +3 -0
  62. package/dist/tools/list-environments.d.ts.map +1 -0
  63. package/dist/tools/list-environments.js +49 -0
  64. package/dist/tools/str_replace_editor.d.ts +3 -2
  65. package/dist/tools/str_replace_editor.d.ts.map +1 -1
  66. package/dist/tools/str_replace_editor.js +48 -7
  67. package/dist/tools/test-gen-browser.d.ts +1 -1
  68. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  69. package/dist/tools/test-gen-browser.js +58 -12
  70. package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
  71. package/dist/tools/test-run-fetcher/index.js +1 -0
  72. package/dist/tools/test-run.d.ts.map +1 -1
  73. package/dist/tools/test-run.js +1 -0
  74. package/dist/tools/upgrade-packages/index.d.ts +3 -0
  75. package/dist/tools/upgrade-packages/index.d.ts.map +1 -0
  76. package/dist/tools/upgrade-packages/index.js +124 -0
  77. package/dist/tools/upgrade-packages/utils.d.ts +13 -0
  78. package/dist/tools/upgrade-packages/utils.d.ts.map +1 -0
  79. package/dist/tools/upgrade-packages/utils.js +106 -0
  80. package/dist/tools/utils/index.d.ts +50 -2
  81. package/dist/tools/utils/index.d.ts.map +1 -1
  82. package/dist/tools/utils/index.js +87 -0
  83. package/dist/utils/git.d.ts +12 -1
  84. package/dist/utils/git.d.ts.map +1 -1
  85. package/dist/utils/git.js +66 -1
  86. package/package.json +4 -4
  87. package/tsconfig.tsbuildinfo +1 -1
  88. package/dist/agent/cua/pw-codegen/pw-pause/utils.d.ts.map +0 -1
  89. package/dist/tools/environment-crud.d.ts +0 -4
  90. package/dist/tools/environment-crud.d.ts.map +0 -1
  91. package/dist/tools/environment-crud.js +0 -100
package/CHANGELOG.md CHANGED
@@ -1,5 +1,54 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.66.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 576870a: feat: allow specifying package version in upgradePackages tool
8
+ - e94a2da: feat: update upgradePackages tool to include input schema for package selection
9
+ - dfb2007: feat: add upgradePackages tool
10
+
11
+ ### Patch Changes
12
+
13
+ - 33abadf: fix: text appendBranchNameToQueueUrl output is under 80 characters fixed
14
+ - 1300a80: fix: branch name with dot should deploy tool execute service
15
+ - 985d721: fix: git patch for new files
16
+ - 6a19421: feat: collect git patch artifacts from text editor tools
17
+ - 1a46013: feat: page.pause codegen works when tool execution is headed
18
+ - 805f35f: fix: handle error when fetching environment variables in chat agent
19
+ - 0a9ec78: feat: add o3 with reduced costs
20
+ - c430cc0: fix: use mouse wheel to execute scroll in cua
21
+ - Updated dependencies [9b873e3]
22
+ - Updated dependencies [41c266d]
23
+ - Updated dependencies [0a9ec78]
24
+ - Updated dependencies [8a83b29]
25
+ - @empiricalrun/test-run@0.10.2
26
+ - @empiricalrun/llm@0.18.1
27
+
28
+ ## 0.65.0
29
+
30
+ ### Minor Changes
31
+
32
+ - 4623300: feat: added browser queue to add concurrency and avoid blocking of server due to browser dependent tools
33
+
34
+ ### Patch Changes
35
+
36
+ - 82acf53: feat: enable tools to contribute to cost calculations
37
+ - 73dd841: feat: change getEnvironment tool to become listEnvironments
38
+ - 8233d49: chore: remove ecs feature flag from Dashboard
39
+ - 0b55884: feat: browser agent tool call returns images in tool result
40
+ - 459d029: feat: pass feature flags to tool calls
41
+ - 7712b2e: chore: move more types to shared-types package
42
+ - 1b08d58: feat: tool response interface supports images for claude
43
+ - 1b9087e: feat: improve feature flags ui, upgrade gemini-pro
44
+ - Updated dependencies [82acf53]
45
+ - Updated dependencies [1177d63]
46
+ - Updated dependencies [7712b2e]
47
+ - Updated dependencies [fb32af6]
48
+ - Updated dependencies [1b08d58]
49
+ - Updated dependencies [1b9087e]
50
+ - @empiricalrun/llm@0.18.0
51
+
3
52
  ## 0.64.3
4
53
 
5
54
  ### Patch Changes
@@ -1,4 +1,5 @@
1
- import { ArtifactInput } from "@empiricalrun/shared-types";
1
+ import { ArtifactInput, Usage } from "@empiricalrun/shared-types";
2
+ import { BrowserAgentResult } from "../cua";
2
3
  type GenerateTestsType = {
3
4
  testCaseName: string;
4
5
  testCaseSuites: string[];
@@ -16,11 +17,12 @@ export declare function convertProjectsFilterToProject({ pwProjectsFilter, repoD
16
17
  repoDir: string;
17
18
  testFilePath: string;
18
19
  }): Promise<string>;
19
- export declare function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
20
+ export declare function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
20
21
  isError: boolean;
21
22
  error: string;
22
- actionsSummary?: string;
23
- artifacts?: ArtifactInput[];
23
+ result: BrowserAgentResult | undefined;
24
+ usage: Usage | undefined;
25
+ artifacts: ArtifactInput[];
24
26
  }>;
25
27
  export {};
26
28
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAgB3D,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,aAAa,EAAE,CAAC;CAC7B,CAAC,CAqGD"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAclE,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAG5C,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,gBAAgB,CAAC,EACrC,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,GAAG,SAAS,CAAC;IACvC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC;IACzB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAmGD"}
@@ -4,7 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.convertProjectsFilterToProject = convertProjectsFilterToProject;
7
- exports.generateTestsUsingMasterAgent = generateTestsUsingMasterAgent;
7
+ exports.runBrowsingAgent = runBrowsingAgent;
8
8
  const test_run_1 = require("@empiricalrun/test-run");
9
9
  const detect_port_1 = __importDefault(require("detect-port"));
10
10
  const fs_1 = __importDefault(require("fs"));
@@ -19,7 +19,7 @@ async function convertProjectsFilterToProject({ pwProjectsFilter, repoDir, testF
19
19
  const project = await (0, utils_2.detectProjectName)(testFilePath, playwrightConfig, pwProjectsFilter);
20
20
  return project;
21
21
  }
22
- async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
22
+ async function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
23
23
  const absTestFilePath = path_1.default.join(repoDir, testFilePath);
24
24
  if (!fs_1.default.existsSync(absTestFilePath)) {
25
25
  const errorMsg = `File for master agent to run not found: ${testFilePath}`;
@@ -51,6 +51,7 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
51
51
  projects: [projectName],
52
52
  passthroughArgs: "--retries 0 --timeout 0",
53
53
  repoDir,
54
+ envOverrides: envOverrides,
54
55
  // @ts-ignore
55
56
  platform: "web",
56
57
  });
@@ -79,7 +80,6 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
79
80
  error = `Failed to generate test using master agent ${e}`;
80
81
  console.error(`[generateTestsUsingMasterAgent] ${error}`);
81
82
  }
82
- let artifacts = [];
83
83
  if (error) {
84
84
  // Clean up the file if there is any error
85
85
  try {
@@ -92,16 +92,17 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
92
92
  console.error(`[generateTestsUsingMasterAgent] Failed to remove extra scripts from files post test gen error:`, e);
93
93
  }
94
94
  }
95
- artifacts = [...artifacts, ...(0, utils_1.findPlaywrightArtifacts)(repoDir)];
96
- const serverArtifacts = fileServer.getArtifactInputsFromServer() || [];
97
- artifacts = [...serverArtifacts, ...artifacts];
95
+ const artifacts = [
96
+ ...((0, utils_1.findPlaywrightArtifacts)(repoDir) || []),
97
+ ...(fileServer.getArtifactInputsFromServer() || []),
98
+ ];
98
99
  await fileServer.stop();
100
+ const { result, usage } = fileServer.getResultAndUsage();
99
101
  return {
100
102
  isError: !!error,
101
103
  error: error || "",
102
- actionsSummary: fileServer.getActionsSummary() ||
103
- error ||
104
- "Unknown error, there was no summary or error reported",
104
+ result,
105
105
  artifacts,
106
+ usage,
106
107
  };
107
108
  }
@@ -1,5 +1,6 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
+ import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
3
4
  import { ToolCallService } from "../../tool-call-service";
4
5
  import { FileInfo } from "../../types";
5
6
  import { ReporterFunction } from "./types";
@@ -1 +1 @@
1
- {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EACL,UAAU,EACV,mBAAmB,EAEpB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAEjE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
@@ -1,9 +1,9 @@
1
- import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS, type SupportedChatModels } from "@empiricalrun/llm/chat";
1
+ import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS } from "@empiricalrun/llm/chat";
2
2
  import { FileInfo } from "../../types";
3
3
  import { chatAgentLoop } from "./agent-loop";
4
4
  import { defaultModel } from "./models";
5
5
  import { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatStateFromModel, createChatState, createChatStateForMessages, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState } from "./state";
6
6
  import { ReporterFunction } from "./types";
7
7
  export { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatAgentLoop, chatStateFromModel, createChatModel, createChatState, createChatStateForMessages, defaultModel, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState, SUPPORTED_CHAT_MODELS, };
8
- export type { FileInfo, IChatModel, ReporterFunction, SupportedChatModels };
8
+ export type { FileInfo, IChatModel, ReporterFunction };
9
9
  //# sourceMappingURL=exports.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACrB,KAAK,mBAAmB,EACzB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,CAAC"}
1
+ {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC"}
@@ -1,9 +1,8 @@
1
- import { SupportedChatModels } from "@empiricalrun/llm/chat";
2
- export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }: {
1
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
2
+ export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }: {
3
3
  selectedModel: SupportedChatModels;
4
4
  useDiskForChatState: boolean;
5
5
  initialPromptContent: string | undefined;
6
- withRetry?: boolean;
7
6
  }): Promise<void>;
8
7
  export declare function runChatAgentForDashboard({ chatSessionId, selectedModel, }: {
9
8
  selectedModel: SupportedChatModels;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AACA,OAAO,EAGL,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAyChC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,GACV,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,iBAwHA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAGL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAkEpC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBA4HA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
@@ -21,7 +21,26 @@ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
21
21
  (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
22
22
  }
23
23
  }
24
- async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
24
+ async function fetchEnvironmentVariables() {
25
+ // TODO: Wrap in try-catch and log error
26
+ const response = await fetch(`${DASHBOARD_DOMAIN}/api/environment-variables`, {
27
+ headers: {
28
+ "Content-Type": "application/json",
29
+ Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
30
+ },
31
+ });
32
+ const data = await response.json();
33
+ if (!data.data) {
34
+ console.error("Failed to fetch environment variables:", data);
35
+ throw new Error("Failed to fetch environment variables");
36
+ }
37
+ const envVars = data.data.environment_variables.reduce((acc, envVar) => {
38
+ acc[envVar.name] = envVar.value;
39
+ return acc;
40
+ }, {});
41
+ return envVars;
42
+ }
43
+ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, }) {
25
44
  let chatState;
26
45
  if (useDiskForChatState) {
27
46
  chatState = (0, state_1.loadChatState)();
@@ -46,8 +65,8 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
46
65
  console.log(`${(0, picocolors_1.blue)(latest.role)}: ${latest.textMessage}`);
47
66
  }
48
67
  }
49
- // if withRetry set the chatState error null
50
- if (withRetry && chatState) {
68
+ if (chatState && chatState.error) {
69
+ // Reset error state as we are attempting a retry
51
70
  chatState.error = null;
52
71
  }
53
72
  const handleSigInt = () => {
@@ -99,7 +118,6 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
99
118
  }
100
119
  }
101
120
  else {
102
- // TODO: Should we pass a loader function? That would allow us to show a spinner
103
121
  if (!process.env.EMPIRICALRUN_API_KEY) {
104
122
  throw new Error("EMPIRICALRUN_API_KEY is not set");
105
123
  }
@@ -111,7 +129,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
111
129
  apiKey: process.env.EMPIRICALRUN_API_KEY,
112
130
  trace,
113
131
  featureFlags: [],
114
- environmentOverrides: {},
132
+ environmentOverrides: await fetchEnvironmentVariables(),
115
133
  });
116
134
  const fileInfo = await (0, file_tree_1.getFileInfoFromFS)(process.cwd());
117
135
  await (0, agent_loop_1.chatAgentLoop)({
@@ -1,4 +1,4 @@
1
- import { type SupportedChatModels } from "@empiricalrun/llm/chat";
1
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
2
2
  export declare const defaultModel: SupportedChatModels;
3
3
  export declare const modelLabels: Record<SupportedChatModels, string>;
4
4
  //# sourceMappingURL=models.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,wBAAwB,CAAC;AAWhC,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAWjE,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
@@ -1,5 +1,5 @@
1
- import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
- import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
1
+ import { IChatModel } from "@empiricalrun/llm/chat";
2
+ import { CanonicalMessage, ChatState, ChatStateError, SupportedChatModels } from "@empiricalrun/shared-types";
3
3
  export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
4
4
  export declare const LATEST_CHAT_STATE_VERSION = "0.1";
5
5
  export declare const CHAT_STATE_PATH: string;
@@ -1 +1 @@
1
- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACf,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
@@ -1,5 +1,6 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
2
+ import { IChatModel } from "@empiricalrun/llm/chat";
3
+ import { SupportedChatModels } from "@empiricalrun/shared-types";
3
4
  import { ReporterFunction } from "./types";
4
5
  export declare const log: (...args: any[]) => void;
5
6
  export declare function getModelName(model: string): string;
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAEL,UAAU,EACV,mBAAmB,EACpB,MAAM,wBAAwB,CAAC;AAKhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAEL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
@@ -148,7 +148,7 @@ async function executeModelAction(page, action, codegen) {
148
148
  scroll_y,
149
149
  });
150
150
  await page.mouse.move(x, y);
151
- await page.evaluate(`window.scrollBy(${scroll_x}, ${scroll_y})`);
151
+ await page.mouse.wheel(scroll_x, scroll_y);
152
152
  actionSummary = `Scroll at (${x}, ${y}) with offsets (scroll_x=${scroll_x}, scroll_y=${scroll_y})`;
153
153
  actionCode = await codegen.getCodeForLastAction();
154
154
  break;
@@ -1,6 +1,13 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { ArtifactInput } from "@empiricalrun/shared-types";
2
+ import { ArtifactInputData, Usage } from "@empiricalrun/shared-types";
3
3
  import { Page } from "playwright";
4
+ export type BrowserAgentResult = Array<{
5
+ type: "text";
6
+ text: string;
7
+ } | {
8
+ type: "screenshot";
9
+ screenshot: ArtifactInputData;
10
+ }>;
4
11
  export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
5
12
  page: Page;
6
13
  task: string;
@@ -8,7 +15,7 @@ export declare function createTestUsingComputerUseAgent({ page, task, trace, }:
8
15
  }): Promise<{
9
16
  code: string;
10
17
  importPaths: string[];
11
- actionsSummary: string;
12
- artifacts: ArtifactInput[];
18
+ result: BrowserAgentResult;
19
+ usage: Usage;
13
20
  }>;
14
21
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAS3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAwBlC,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAyLD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAgClC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}
@@ -9,26 +9,34 @@ const openai_1 = __importDefault(require("openai"));
9
9
  const computer_1 = require("./computer");
10
10
  const model_1 = require("./model");
11
11
  const element_from_point_1 = require("./pw-codegen/element-from-point");
12
- function getCodegen() {
12
+ const pw_pause_1 = require("./pw-codegen/pw-pause");
13
+ const MAX_ITERATIONS = 15;
14
+ async function getCodegenInstance() {
15
+ const repoDir = process.cwd();
16
+ const canUsePwPause = await (0, pw_pause_1.canUsePauseCodegen)(repoDir);
17
+ if (canUsePwPause) {
18
+ console.log("[getCodegen] using PlaywrightPauseCodegen");
19
+ return new pw_pause_1.PlaywrightPauseCodegen();
20
+ }
21
+ console.log("[getCodegen] using ElementFromPointCodegen");
13
22
  return new element_from_point_1.ElementFromPointCodegen();
14
- // TODO: Add support for page.pause approach
15
- // We can use PlaywrightPauseCodegen if playwright patch was successful,
16
- // IPC port is available and PW_CODEGEN_NO_INSPECTOR env var is set
17
23
  }
18
- function getStructuredArtifactInput(screenshotBytes, actionName) {
24
+ function artifact(screenshot, name) {
19
25
  return {
20
- name: `${actionName}`,
26
+ name: `${name}`,
21
27
  contentType: "image/png",
22
- data: Buffer.from(screenshotBytes, "base64"),
28
+ data: Buffer.from(screenshot, "base64"),
23
29
  };
24
30
  }
25
31
  async function createTestUsingComputerUseAgent({ page, task, trace, }) {
26
- const codegen = getCodegen();
32
+ const codegen = await getCodegenInstance();
27
33
  await codegen.initialize(page);
28
- const screenshotBytes = await (0, computer_1.getScreenshot)(page);
29
- const artifacts = [
30
- getStructuredArtifactInput(screenshotBytes, "Initial Screen"),
34
+ const screenshot = await (0, computer_1.getScreenshot)(page);
35
+ const initialArtifact = artifact(screenshot, "Initial screen");
36
+ let result = [
37
+ { type: "screenshot", screenshot: initialArtifact },
31
38
  ];
39
+ let tokensUsed = { input: 0, output: 0 };
32
40
  const viewport = page.viewportSize();
33
41
  let screenWidth = viewport?.width || 1280;
34
42
  let screenHeight = viewport?.height || 720;
@@ -49,7 +57,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
49
57
  },
50
58
  {
51
59
  type: "input_image",
52
- image_url: `data:image/png;base64,${screenshotBytes}`,
60
+ image_url: `data:image/png;base64,${screenshot}`,
53
61
  detail: "high",
54
62
  },
55
63
  ],
@@ -59,13 +67,16 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
59
67
  screenHeight,
60
68
  openAIClient,
61
69
  });
70
+ tokensUsed.input += response.usage?.input_tokens || 0;
71
+ tokensUsed.output += response.usage?.output_tokens || 0;
62
72
  let isTaskDone = false;
63
- let maxIterations = 15;
64
73
  let generatedCode = "";
65
- let actionsSummary = [];
66
74
  let iterationIndex = 0;
67
- while (!isTaskDone && iterationIndex < maxIterations) {
68
- actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
75
+ while (!isTaskDone && iterationIndex < MAX_ITERATIONS) {
76
+ result.push({
77
+ type: "text",
78
+ text: `# Agent iteration ${iterationIndex}`,
79
+ });
69
80
  iterationIndex++;
70
81
  const computerCalls = response.output.filter((item) => item.type === "computer_call");
71
82
  const functionCalls = response.output.filter((item) => item.type === "function_call");
@@ -75,7 +86,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
75
86
  const content = assistantOutput.content.find((item) => item.type === "output_text");
76
87
  if (content && "text" in content) {
77
88
  // TODO: This ignores `ResponseOutputRefusal` type (refusal from assistant)
78
- actionsSummary.push(`Agent summary: ${content.text}`);
89
+ result.push({
90
+ type: "text",
91
+ text: `Agent summary: ${content.text}`,
92
+ });
79
93
  }
80
94
  }
81
95
  isTaskDone = true;
@@ -86,7 +100,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
86
100
  const reasoningItem = reasoning;
87
101
  const summaryText = reasoningItem.summary?.find((item) => item.type === "summary_text")?.text;
88
102
  if (summaryText) {
89
- actionsSummary.push(`Action reasoning: ${summaryText}`);
103
+ result.push({
104
+ type: "text",
105
+ text: `Action reasoning: ${summaryText}`,
106
+ });
90
107
  }
91
108
  }
92
109
  // We expect either a function call or a computer call in the response.
@@ -98,9 +115,15 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
98
115
  const args = JSON.parse(functionCall.arguments);
99
116
  const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, { type: "goto", url: args.url }, codegen);
100
117
  executedActionSummary = actionSummary;
101
- actionsSummary.push(`Action executed: ${actionSummary}`);
118
+ result.push({
119
+ type: "text",
120
+ text: `Action executed: ${actionSummary}`,
121
+ });
102
122
  if (actionCode) {
103
- actionsSummary.push(`Generated code: ${actionCode}`);
123
+ result.push({
124
+ type: "text",
125
+ text: `Generated code: ${actionCode}`,
126
+ });
104
127
  generatedCode += actionCode;
105
128
  }
106
129
  toolCallOutput = {
@@ -116,20 +139,28 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
116
139
  // Execute the action and take a screenshot
117
140
  const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, action, codegen);
118
141
  executedActionSummary = actionSummary;
119
- actionsSummary.push(`Action executed: ${actionSummary}`);
120
- actionsSummary.push(`Generated code: ${actionCode}`);
142
+ result.push({
143
+ type: "text",
144
+ text: `Action executed: ${actionSummary}`,
145
+ });
146
+ result.push({
147
+ type: "text",
148
+ text: `Generated code: ${actionCode}`,
149
+ });
121
150
  generatedCode += actionCode;
122
151
  // Allow time for changes to take effect.
123
152
  await new Promise((resolve) => setTimeout(resolve, 1000));
124
- const screenshotBytes = await (0, computer_1.getScreenshot)(page);
125
- artifacts.push(getStructuredArtifactInput(screenshotBytes, actionSummary));
126
- // Populate toolCallOutput
153
+ const screenshot = await (0, computer_1.getScreenshot)(page);
154
+ result.push({
155
+ type: "screenshot",
156
+ screenshot: artifact(screenshot, actionSummary),
157
+ });
127
158
  toolCallOutput = {
128
159
  type: "computer_call_output",
129
160
  call_id: computerCall.call_id,
130
161
  output: {
131
162
  type: "computer_screenshot",
132
- image_url: `data:image/png;base64,${screenshotBytes}`,
163
+ image_url: `data:image/png;base64,${screenshot}`,
133
164
  },
134
165
  acknowledged_safety_checks: computerCall.pending_safety_checks,
135
166
  };
@@ -155,19 +186,25 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
155
186
  screenHeight,
156
187
  openAIClient,
157
188
  });
189
+ tokensUsed.input += response.usage?.input_tokens || 0;
190
+ tokensUsed.output += response.usage?.output_tokens || 0;
158
191
  }
159
192
  if (!isTaskDone) {
160
- actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
193
+ const logMessage = `Max iteration limit hit: Task not done after ${MAX_ITERATIONS} iterations`;
194
+ console.log(logMessage);
195
+ result.push({
196
+ type: "text",
197
+ text: logMessage,
198
+ });
161
199
  }
162
- trace?.update({
163
- output: { code: generatedCode, actionsSummary: actionsSummary.join("\n") },
164
- });
200
+ trace?.update({ output: { result } });
165
201
  return {
166
- actionsSummary: actionsSummary.join("\n"),
202
+ result,
167
203
  code: generatedCode,
168
- // TODO: Does not support skills (from helper methods in pages/ dir),
169
- // and therefore, import paths are empty
170
204
  importPaths: [],
171
- artifacts,
205
+ usage: {
206
+ tokens: tokensUsed,
207
+ cost: (0, model_1.tokensToCost)(tokensUsed),
208
+ },
172
209
  };
173
210
  }
@@ -7,4 +7,11 @@ export declare function callComputerUseModel({ input, previousResponseId, screen
7
7
  screenHeight: number;
8
8
  openAIClient: OpenAI;
9
9
  }): Promise<Response>;
10
+ export declare function tokensToCost(tokens: {
11
+ input: number;
12
+ output: number;
13
+ }): {
14
+ input: number;
15
+ output: number;
16
+ };
10
17
  //# sourceMappingURL=model.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.callComputerUseModel = callComputerUseModel;
4
+ exports.tokensToCost = tokensToCost;
4
5
  const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
5
6
  Don't ask the user for confirmations - just execute the actions.
6
7
 
@@ -50,3 +51,12 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
50
51
  });
51
52
  return response;
52
53
  }
54
+ function tokensToCost(tokens) {
55
+ // Costs for "computer-use-preview-2025-03-11"
56
+ // https://platform.openai.com/docs/models/computer-use-preview
57
+ const inputUsdFor1MTokens = 3.0;
58
+ const outputUsdFor1MTokens = 12.0;
59
+ const inputCost = (tokens.input / 1_000_000) * inputUsdFor1MTokens;
60
+ const outputCost = (tokens.output / 1_000_000) * outputUsdFor1MTokens;
61
+ return { input: inputCost, output: outputCost };
62
+ }
@@ -1,15 +1,17 @@
1
1
  import type { Page } from "playwright";
2
- export { revertToOriginalPwCode } from "./utils";
3
- export declare const PW_PAUSE_IPC_PORT = 3039;
4
- export declare function preparePlaywrightForCodegen(repoDir: string): Promise<void>;
5
- export declare class PlaywrightPauseCodegen {
2
+ import { BasePlaywrightCodegen } from "../types";
3
+ export { preparePlaywrightForCodegen, revertToOriginalPwCode } from "./patch";
4
+ export declare function canUsePauseCodegen(repoDir: string): Promise<boolean>;
5
+ export declare class PlaywrightPauseCodegen implements BasePlaywrightCodegen {
6
6
  private port;
7
7
  private page;
8
8
  private server;
9
+ private codeForLastAction;
9
10
  constructor();
11
+ private saveCode;
10
12
  initialize(page: Page): Promise<void>;
11
13
  startPlaywrightCodegen(page: Page): Promise<void>;
12
14
  recordAction(): Promise<void>;
13
- getCodegenResult(): Promise<string>;
15
+ getCodeForLastAction(): Promise<string>;
14
16
  }
15
17
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AASvC,OAAO,EAAE,sBAAsB,EAAE,MAAM,SAAS,CAAC;AAEjD,eAAO,MAAM,iBAAiB,OAAO,CAAC;AAEtC,wBAAsB,2BAA2B,CAAC,OAAO,EAAE,MAAM,iBAoBhE;AAGD,qBAAa,sBAAsB;IACjC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAmB;IAC/B,OAAO,CAAC,MAAM,CAA4C;;IAMpD,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBrC,sBAAsB,CAAC,IAAI,EAAE,IAAI;IAsBjC,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,gBAAgB,IAAI,OAAO,CAAC,MAAM,CAAC;CAI1C"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/agent/cua/pw-codegen/pw-pause/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAIjD,OAAO,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,MAAM,SAAS,CAAC;AAE9E,wBAAsB,kBAAkB,CAAC,OAAO,EAAE,MAAM,oBAqCvD;AAiBD,qBAAa,sBAAuB,YAAW,qBAAqB;IAClE,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAmB;IAC/B,OAAO,CAAC,MAAM,CAA4C;IAC1D,OAAO,CAAC,iBAAiB,CAAqB;;YAMhC,QAAQ;IAOhB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBrC,sBAAsB,CAAC,IAAI,EAAE,IAAI;IAsBjC,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,oBAAoB,IAAI,OAAO,CAAC,MAAM,CAAC;CAU9C"}