npm - @empiricalrun/test-gen - Versions diffs - 0.64.2 → 0.65.0 - Mend

@empiricalrun/test-gen 0.64.2 → 0.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/CHANGELOG.md +32 -0
package/dist/agent/browsing/run.d.ts +6 -4
package/dist/agent/browsing/run.d.ts.map +1 -1
package/dist/agent/browsing/run.js +9 -9
package/dist/agent/chat/agent-loop.d.ts +2 -1
package/dist/agent/chat/agent-loop.d.ts.map +1 -1
package/dist/agent/chat/exports.d.ts +2 -2
package/dist/agent/chat/exports.d.ts.map +1 -1
package/dist/agent/chat/index.d.ts +1 -1
package/dist/agent/chat/index.d.ts.map +1 -1
package/dist/agent/chat/index.js +16 -1
package/dist/agent/chat/models.d.ts +1 -1
package/dist/agent/chat/models.d.ts.map +1 -1
package/dist/agent/chat/state.d.ts +2 -2
package/dist/agent/chat/state.d.ts.map +1 -1
package/dist/agent/chat/utils.d.ts +2 -1
package/dist/agent/chat/utils.d.ts.map +1 -1
package/dist/agent/cua/computer.js +1 -1
package/dist/agent/cua/index.d.ts +10 -3
package/dist/agent/cua/index.d.ts.map +1 -1
package/dist/agent/cua/index.js +61 -29
package/dist/agent/cua/model.d.ts +7 -0
package/dist/agent/cua/model.d.ts.map +1 -1
package/dist/agent/cua/model.js +10 -0
package/dist/bin/index.js +2 -2
package/dist/bin/utils/index.d.ts +1 -1
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/bin/utils/index.js +3 -3
package/dist/file/client.d.ts +2 -8
package/dist/file/client.d.ts.map +1 -1
package/dist/file/client.js +2 -23
package/dist/file/server.d.ts +15 -3
package/dist/file/server.d.ts.map +1 -1
package/dist/file/server.js +17 -28
package/dist/index.d.ts.map +1 -1
package/dist/index.js +6 -13
package/dist/tool-call-service/index.d.ts +3 -3
package/dist/tool-call-service/index.d.ts.map +1 -1
package/dist/tool-call-service/index.js +4 -3
package/dist/tool-call-service/utils.d.ts +3 -10
package/dist/tool-call-service/utils.d.ts.map +1 -1
package/dist/tool-call-service/utils.js +21 -5
package/dist/tools/commit-and-create-pr.d.ts.map +1 -1
package/dist/tools/commit-and-create-pr.js +5 -13
package/dist/tools/diagnosis-fetcher.d.ts.map +1 -1
package/dist/tools/diagnosis-fetcher.js +1 -0
package/dist/tools/download-build.d.ts.map +1 -1
package/dist/tools/download-build.js +1 -0
package/dist/tools/grep/index.d.ts.map +1 -1
package/dist/tools/grep/index.js +1 -0
package/dist/tools/list-environments.d.ts +3 -0
package/dist/tools/list-environments.d.ts.map +1 -0
package/dist/tools/list-environments.js +49 -0
package/dist/tools/str_replace_editor.d.ts.map +1 -1
package/dist/tools/str_replace_editor.js +4 -0
package/dist/tools/test-gen-browser.d.ts +1 -1
package/dist/tools/test-gen-browser.d.ts.map +1 -1
package/dist/tools/test-gen-browser.js +50 -12
package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
package/dist/tools/test-run-fetcher/index.js +1 -0
package/dist/tools/test-run.d.ts.map +1 -1
package/dist/tools/test-run.js +4 -1
package/dist/utils/index.d.ts +1 -0
package/dist/utils/index.d.ts.map +1 -1
package/dist/utils/index.js +3 -1
package/dist/utils/json.d.ts +2 -0
package/dist/utils/json.d.ts.map +1 -0
package/dist/utils/json.js +24 -0
package/package.json +3 -3
package/tsconfig.tsbuildinfo +1 -1
package/dist/tools/environment-crud.d.ts +0 -4
package/dist/tools/environment-crud.d.ts.map +0 -1
package/dist/tools/environment-crud.js +0 -100

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,37 @@
 # @empiricalrun/test-gen
+## 0.65.0
+### Minor Changes
+- 4623300: feat: added browser queue to add concurrency and avoid blocking of server due to browser dependent tools
+### Patch Changes
+- 82acf53: feat: enable tools to contribute to cost calculations
+- 73dd841: feat: change getEnvironment tool to become listEnvironments
+- 8233d49: chore: remove ecs feature flag from Dashboard
+- 0b55884: feat: browser agent tool call returns images in tool result
+- 459d029: feat: pass feature flags to tool calls
+- 7712b2e: chore: move more types to shared-types package
+- 1b08d58: feat: tool response interface supports images for claude
+- 1b9087e: feat: improve feature flags ui, upgrade gemini-pro
+- Updated dependencies [82acf53]
+- Updated dependencies [1177d63]
+- Updated dependencies [7712b2e]
+- Updated dependencies [fb32af6]
+- Updated dependencies [1b08d58]
+- Updated dependencies [1b9087e]
+  - @empiricalrun/llm@0.18.0
+## 0.64.3
+### Patch Changes
+- d8d624d: fix: pass mapped keys to codegen recordAction for proper code generation
+- addd52e: feat: add JSON value truncation to prevent oversized tool responses
+- f8a53b0: fix: PR description should not get concatenated across versions
 ## 0.64.2
 ### Patch Changes

package/dist/agent/browsing/run.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
-import { ArtifactInput } from "@empiricalrun/shared-types";
+import { ArtifactInput, Usage } from "@empiricalrun/shared-types";
+import { BrowserAgentResult } from "../cua";
 type GenerateTestsType = {
     testCaseName: string;
     testCaseSuites: string[];
@@ -16,11 +17,12 @@ export declare function convertProjectsFilterToProject({ pwProjectsFilter, repoD
     repoDir: string;
     testFilePath: string;
 }): Promise<string>;
-export declare function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
+export declare function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }: GenerateTestsType): Promise<{
     isError: boolean;
     error: string;
-    actionsSummary?: string;
-    artifacts?: ArtifactInput[];
+    result: BrowserAgentResult | undefined;
+    usage: Usage | undefined;
+    artifacts: ArtifactInput[];
 }>;
 export {};
 //# sourceMappingURL=run.d.ts.map

package/dist/agent/browsing/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;~~AAgB3D~~,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,~~6BAA6B~~,CAAC,~~EAClD~~,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,~~cAAc~~,CAAC,EAAE,~~MAAM~~,CAAC;~~IACxB~~,SAAS,~~CAAC,~~EAAE,aAAa,EAAE,CAAC;~~CAC7B~~,CAAC,~~CAqGD~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAclE,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAG5C,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC,CAAC;AAEF,wBAAsB,8BAA8B,CAAC,EACnD,gBAAgB,EAChB,OAAO,EACP,YAAY,GACb,EAAE;IACD,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB;AAED,wBAAsB,gBAAgB,CAAC,EACrC,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,WAAW,EACX,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,EACzB,YAAY,GACb,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,kBAAkB,GAAG,SAAS,CAAC;IACvC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC;IACzB,SAAS,EAAE,aAAa,EAAE,CAAC;CAC5B,CAAC,CAkGD"}

package/dist/agent/browsing/run.js CHANGED Viewed

@@ -4,7 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.convertProjectsFilterToProject = convertProjectsFilterToProject;
-exports.generateTestsUsingMasterAgent = generateTestsUsingMasterAgent;
+exports.runBrowsingAgent = runBrowsingAgent;
 const test_run_1 = require("@empiricalrun/test-run");
 const detect_port_1 = __importDefault(require("detect-port"));
 const fs_1 = __importDefault(require("fs"));
@@ -19,7 +19,7 @@ async function convertProjectsFilterToProject({ pwProjectsFilter, repoDir, testF
     const project = await (0, utils_2.detectProjectName)(testFilePath, playwrightConfig, pwProjectsFilter);
     return project;
 }
-async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
+async function runBrowsingAgent({ testCaseName, testCaseSuites, testFilePath, filePathToUpdate, projectName, testGenToken, repoDir, traceId, editFileWithGeneratedCode, envOverrides, }) {
     const absTestFilePath = path_1.default.join(repoDir, testFilePath);
     if (!fs_1.default.existsSync(absTestFilePath)) {
         const errorMsg = `File for master agent to run not found: ${testFilePath}`;
@@ -79,7 +79,6 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
         error = `Failed to generate test using master agent ${e}`;
         console.error(`[generateTestsUsingMasterAgent] ${error}`);
     }
-    let artifacts = [];
     if (error) {
         // Clean up the file if there is any error
         try {
@@ -92,16 +91,17 @@ async function generateTestsUsingMasterAgent({ testCaseName, testCaseSuites, tes
             console.error(`[generateTestsUsingMasterAgent] Failed to remove extra scripts from files post test gen error:`, e);
         }
     }
-    artifacts = [...artifacts, ...(0, utils_1.findPlaywrightArtifacts)(repoDir)];
-    const serverArtifacts = fileServer.getArtifactInputsFromServer() || [];
-    artifacts = [...serverArtifacts, ...artifacts];
+    const artifacts = [
+        ...((0, utils_1.findPlaywrightArtifacts)(repoDir) || []),
+        ...(fileServer.getArtifactInputsFromServer() || []),
+    ];
     await fileServer.stop();
+    const { result, usage } = fileServer.getResultAndUsage();
     return {
         isError: !!error,
         error: error || "",
-        actionsSummary: fileServer.getActionsSummary() ||
-            error ||
-            "Unknown error, there was no summary or error reported",
+        result,
         artifacts,
+        usage,
     };
 }

package/dist/agent/chat/agent-loop.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { TraceClient } from "@empiricalrun/llm";
-import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
+import { IChatModel } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "@empiricalrun/shared-types";
 import { ToolCallService } from "../../tool-call-service";
 import { FileInfo } from "../../types";
 import { ReporterFunction } from "./types";

package/dist/agent/chat/agent-loop.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,~~EACL~~,UAAU,~~EACV~~,mBAAmB,~~EAEpB~~,MAAM,~~wBAAwB~~,CAAC;~~AAEhC~~,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}
1	+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAmB,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAEjE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAG3C,wBAAsB,aAAa,CAAC,EAClC,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,EACL,eAAe,EACf,QAAQ,EACR,qBAAqB,GACtB,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,eAAe,CAAC;IACjC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,qBAAqB,EAAE,OAAO,CAAC;CAChC,iBAqDA"}

package/dist/agent/chat/exports.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS, type SupportedChatModels } from "@empiricalrun/llm/chat";
+import { createChatModel, IChatModel, SUPPORTED_CHAT_MODELS } from "@empiricalrun/llm/chat";
 import { FileInfo } from "../../types";
 import { chatAgentLoop } from "./agent-loop";
 import { defaultModel } from "./models";
 import { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatStateFromModel, createChatState, createChatStateForMessages, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState } from "./state";
 import { ReporterFunction } from "./types";
 export { CHAT_STATE_VERSIONS_MIGRATIONS_MAP, chatAgentLoop, chatStateFromModel, createChatModel, createChatState, createChatStateForMessages, defaultModel, fetchToolCallAvailability, getLatestDownloadBuildUrl, LATEST_CHAT_STATE_VERSION, migrateChatState, SUPPORTED_CHAT_MODELS, };
-export type { FileInfo, IChatModel, ReporterFunction, SupportedChatModels };
+export type { FileInfo, IChatModel, ReporterFunction };
 //# sourceMappingURL=exports.d.ts.map

package/dist/agent/chat/exports.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,~~EACrB~~,~~KAAK,mBAAmB,EACzB,~~MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,~~mBAAmB,EAAE,~~CAAC"}
1	+ {"version":3,"file":"exports.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/exports.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,UAAU,EACV,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EACL,kCAAkC,EAClC,kBAAkB,EAClB,eAAe,EACf,0BAA0B,EAC1B,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EACjB,MAAM,SAAS,CAAC;AACjB,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,OAAO,EACL,kCAAkC,EAClC,aAAa,EACb,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,0BAA0B,EAC1B,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,qBAAqB,GACtB,CAAC;AAEF,YAAY,EAAE,QAAQ,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC"}

package/dist/agent/chat/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { SupportedChatModels } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "@empiricalrun/shared-types";
 export declare function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }: {
     selectedModel: SupportedChatModels;
     useDiskForChatState: boolean;

package/dist/agent/chat/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"~~AACA~~,OAAO,EAGL,mBAAmB,EACpB,MAAM,~~wBAAwB~~,CAAC;~~AAyChC~~,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,GACV,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,iBAwHA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAGL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA8DpC,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,GACV,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;IACzC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,iBAwHA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA6DA"}

package/dist/agent/chat/index.js CHANGED Viewed

@@ -21,6 +21,21 @@ function concludeAgent(chatModel, useDiskForChatState, selectedModel, error) {
         (0, state_1.saveToDisk)(chatModel.messages, selectedModel, chatModel.askUserForInput, error);
     }
 }
+async function fetchEnvironmentVariables() {
+    // TODO: Wrap in try-catch and log error
+    const response = await fetch(`${DASHBOARD_DOMAIN}/api/environment-variables`, {
+        headers: {
+            "Content-Type": "application/json",
+            Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
+        },
+    });
+    const data = await response.json();
+    const envVars = data.data.environment_variables.reduce((acc, envVar) => {
+        acc[envVar.name] = envVar.value;
+        return acc;
+    }, {});
+    return envVars;
+}
 async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialPromptContent, withRetry, }) {
     let chatState;
     if (useDiskForChatState) {
@@ -111,7 +126,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
                 apiKey: process.env.EMPIRICALRUN_API_KEY,
                 trace,
                 featureFlags: [],
-                environmentOverrides: {},
+                environmentOverrides: await fetchEnvironmentVariables(),
             });
             const fileInfo = await (0, file_tree_1.getFileInfoFromFS)(process.cwd());
             await (0, agent_loop_1.chatAgentLoop)({

package/dist/agent/chat/models.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { type SupportedChatModels } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "@empiricalrun/shared-types";
 export declare const defaultModel: SupportedChatModels;
 export declare const modelLabels: Record<SupportedChatModels, string>;
 //# sourceMappingURL=models.d.ts.map

package/dist/agent/chat/models.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"~~AAAA~~,OAAO,~~EAEL~~,~~KAAK,~~mBAAmB,~~EACzB~~,MAAM,~~wBAAwB~~,CAAC;~~AAWhC~~,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}
1	+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/models.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAWjE,eAAO,MAAM,YAAY,EAAE,mBAA6C,CAAC;AAEzE,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAOzD,CAAC"}

package/dist/agent/chat/state.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
-import { CanonicalMessage, ChatState, ChatStateError } from "@empiricalrun/shared-types";
+import { IChatModel } from "@empiricalrun/llm/chat";
+import { CanonicalMessage, ChatState, ChatStateError, SupportedChatModels } from "@empiricalrun/shared-types";
 export declare const CHAT_STATE_VERSIONS_MIGRATIONS_MAP: Record<string, (state: any) => any>;
 export declare const LATEST_CHAT_STATE_VERSION = "0.1";
 export declare const CHAT_STATE_PATH: string;

package/dist/agent/chat/state.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,~~EACV~~,~~mBAAmB,EACpB,~~MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,~~EACf~~,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}
1	+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,UAAU,EACX,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,gBAAgB,EAChB,SAAS,EACT,cAAc,EACd,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AA+BpC,eAAO,MAAM,kCAAkC,EAAE,MAAM,CACrD,MAAM,EACN,CAAC,KAAK,EAAE,GAAG,KAAK,GAAG,CAIpB,CAAC;AAEF,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAE/C,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,wBAAgB,eAAe,CAAC,EAC9B,UAAU,EACV,aAAa,EACb,aAAa,EACb,KAAK,GACN,EAAE;IACD,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,aAAa,EAAE,SAAS,GAAG,SAAS,CAAC;IACrC,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAYA;AAED,wBAAgB,0BAA0B,CAAC,EACzC,QAAQ,EACR,aAAa,EACb,eAAe,EACf,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,GAAG,CAAC;IACd,aAAa,EAAE,mBAAmB,CAAC;IACnC,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,GAAG,SAAS,CASZ;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,EACpC,SAAS,EACT,aAAa,EACb,KAAK,GACN,EAAE;IACD,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,aAAa,EAAE,mBAAmB,CAAC;IACnC,KAAK,EAAE,cAAc,GAAG,IAAI,CAAC;CAC9B,aAOA;AAED,wBAAgB,aAAa,IAAI,SAAS,GAAG,SAAS,CAarD;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,GAAG,GAAG,SAAS,CAqBzD;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,EAClC,eAAe,EAAE,OAAO,EACxB,KAAK,EAAE,cAAc,GAAG,IAAI,QAgB7B;AA2BD,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,yBAAyB,CACvC,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,gBAAgB,EAAE;;;EAe7B"}

package/dist/agent/chat/utils.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { TraceClient } from "@empiricalrun/llm";
-import { IChatModel, SupportedChatModels } from "@empiricalrun/llm/chat";
+import { IChatModel } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "@empiricalrun/shared-types";
 import { ReporterFunction } from "./types";
 export declare const log: (...args: any[]) => void;
 export declare function getModelName(model: string): string;

package/dist/agent/chat/utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,~~EAEL~~,UAAU,~~EACV~~,mBAAmB,EACpB,MAAM,~~wBAAwB~~,CAAC;~~AAKhC~~,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}
1	+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAuB,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzE,OAAO,EAEL,mBAAmB,EACpB,MAAM,4BAA4B,CAAC;AAIpC,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAE3C,eAAO,MAAM,GAAG,GAAI,GAAG,MAAM,GAAG,EAAE,SAEjC,CAAC;AAcF,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAsB,gBAAgB,CAAC,EACrC,OAAO,EACP,KAAK,EACL,SAAS,EACT,aAAa,EACb,QAAQ,EACR,KAAK,GACN,EAAE;IACD,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,mBAAmB,CAAC;IACnC,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,iBAsBA"}

package/dist/agent/cua/computer.js CHANGED Viewed

@@ -155,10 +155,10 @@ async function executeModelAction(page, action, codegen) {
             }
             case "keypress": {
                 const { keys } = action;
-                await codegen.recordAction({ type: "keypress", keys });
                 const mappedKeys = keys.map((k) => {
                     return CUA_KEY_TO_PLAYWRIGHT_KEY[k.toLowerCase()] || k;
                 });
+                await codegen.recordAction({ type: "keypress", keys: mappedKeys });
                 const mappedKey = mappedKeys.join("+"); // ["CTRL", "A"] becomes ControlOrMeta+A
                 console.log(`Action: keypress for keys ${keys} -> '${mappedKey}'`);
                 await page.keyboard.press(mappedKey);

package/dist/agent/cua/index.d.ts CHANGED Viewed

@@ -1,6 +1,13 @@
 import { TraceClient } from "@empiricalrun/llm";
-import { ArtifactInput } from "@empiricalrun/shared-types";
+import { ArtifactInputData, Usage } from "@empiricalrun/shared-types";
 import { Page } from "playwright";
+export type BrowserAgentResult = Array<{
+    type: "text";
+    text: string;
+} | {
+    type: "screenshot";
+    screenshot: ArtifactInputData;
+}>;
 export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
     page: Page;
     task: string;
@@ -8,7 +15,7 @@ export declare function createTestUsingComputerUseAgent({ page, task, trace, }:
 }): Promise<{
     code: string;
     importPaths: string[];
-    actionsSummary: string;
-    artifacts: ArtifactInput[];
+    result: BrowserAgentResult;
+    usage: Usage;
 }>;
 //# sourceMappingURL=index.d.ts.map

package/dist/agent/cua/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,~~aAAa~~,EAAE,MAAM,4BAA4B,CAAC;~~AAS3D~~,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAwBlC,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,~~cAAc~~,EAAE,~~MAAM~~,CAAC;~~IACvB~~,~~SAAS~~,EAAE,~~aAAa~~,~~EAAE,~~CAAC;~~CAC5B~~,CAAC,~~CAyLD~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAStE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAwBlC,MAAM,MAAM,kBAAkB,GAAG,KAAK,CAClC;IACE,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd,GACD;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,iBAAiB,CAAC;CAC/B,CACJ,CAAC;AAEF,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,kBAAkB,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;CACd,CAAC,CAuND"}

package/dist/agent/cua/index.js CHANGED Viewed

@@ -9,26 +9,29 @@ const openai_1 = __importDefault(require("openai"));
 const computer_1 = require("./computer");
 const model_1 = require("./model");
 const element_from_point_1 = require("./pw-codegen/element-from-point");
+const MAX_ITERATIONS = 15;
 function getCodegen() {
     return new element_from_point_1.ElementFromPointCodegen();
     // TODO: Add support for page.pause approach
     // We can use PlaywrightPauseCodegen if playwright patch was successful,
     // IPC port is available and PW_CODEGEN_NO_INSPECTOR env var is set
 }
-function getStructuredArtifactInput(screenshotBytes, actionName) {
+function artifact(screenshot, name) {
     return {
-        name: `${actionName}`,
+        name: `${name}`,
         contentType: "image/png",
-        data: Buffer.from(screenshotBytes, "base64"),
+        data: Buffer.from(screenshot, "base64"),
     };
 }
 async function createTestUsingComputerUseAgent({ page, task, trace, }) {
     const codegen = getCodegen();
     await codegen.initialize(page);
-    const screenshotBytes = await (0, computer_1.getScreenshot)(page);
-    const artifacts = [
-        getStructuredArtifactInput(screenshotBytes, "Initial Screen"),
+    const screenshot = await (0, computer_1.getScreenshot)(page);
+    const initialArtifact = artifact(screenshot, "Initial screen");
+    let result = [
+        { type: "screenshot", screenshot: initialArtifact },
     ];
+    let tokensUsed = { input: 0, output: 0 };
     const viewport = page.viewportSize();
     let screenWidth = viewport?.width || 1280;
     let screenHeight = viewport?.height || 720;
@@ -49,7 +52,7 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
                     },
                     {
                         type: "input_image",
-                        image_url: `data:image/png;base64,${screenshotBytes}`,
+                        image_url: `data:image/png;base64,${screenshot}`,
                         detail: "high",
                     },
                 ],
@@ -59,13 +62,16 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
         screenHeight,
         openAIClient,
     });
+    tokensUsed.input += response.usage?.input_tokens || 0;
+    tokensUsed.output += response.usage?.output_tokens || 0;
     let isTaskDone = false;
-    let maxIterations = 15;
     let generatedCode = "";
-    let actionsSummary = [];
     let iterationIndex = 0;
-    while (!isTaskDone && iterationIndex < maxIterations) {
-        actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
+    while (!isTaskDone && iterationIndex < MAX_ITERATIONS) {
+        result.push({
+            type: "text",
+            text: `# Agent iteration ${iterationIndex}`,
+        });
         iterationIndex++;
         const computerCalls = response.output.filter((item) => item.type === "computer_call");
         const functionCalls = response.output.filter((item) => item.type === "function_call");
@@ -75,7 +81,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
                 const content = assistantOutput.content.find((item) => item.type === "output_text");
                 if (content && "text" in content) {
                     // TODO: This ignores `ResponseOutputRefusal` type (refusal from assistant)
-                    actionsSummary.push(`Agent summary: ${content.text}`);
+                    result.push({
+                        type: "text",
+                        text: `Agent summary: ${content.text}`,
+                    });
                 }
             }
             isTaskDone = true;
@@ -86,7 +95,10 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
             const reasoningItem = reasoning;
             const summaryText = reasoningItem.summary?.find((item) => item.type === "summary_text")?.text;
             if (summaryText) {
-                actionsSummary.push(`Action reasoning: ${summaryText}`);
+                result.push({
+                    type: "text",
+                    text: `Action reasoning: ${summaryText}`,
+                });
             }
         }
         // We expect either a function call or a computer call in the response.
@@ -98,9 +110,15 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
             const args = JSON.parse(functionCall.arguments);
             const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, { type: "goto", url: args.url }, codegen);
             executedActionSummary = actionSummary;
-            actionsSummary.push(`Action executed: ${actionSummary}`);
+            result.push({
+                type: "text",
+                text: `Action executed: ${actionSummary}`,
+            });
             if (actionCode) {
-                actionsSummary.push(`Generated code: ${actionCode}`);
+                result.push({
+                    type: "text",
+                    text: `Generated code: ${actionCode}`,
+                });
                 generatedCode += actionCode;
             }
             toolCallOutput = {
@@ -116,20 +134,28 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
             // Execute the action and take a screenshot
             const { actionSummary, actionCode } = await (0, computer_1.executeModelAction)(page, action, codegen);
             executedActionSummary = actionSummary;
-            actionsSummary.push(`Action executed: ${actionSummary}`);
-            actionsSummary.push(`Generated code: ${actionCode}`);
+            result.push({
+                type: "text",
+                text: `Action executed: ${actionSummary}`,
+            });
+            result.push({
+                type: "text",
+                text: `Generated code: ${actionCode}`,
+            });
             generatedCode += actionCode;
             // Allow time for changes to take effect.
             await new Promise((resolve) => setTimeout(resolve, 1000));
-            const screenshotBytes = await (0, computer_1.getScreenshot)(page);
-            artifacts.push(getStructuredArtifactInput(screenshotBytes, actionSummary));
-            // Populate toolCallOutput
+            const screenshot = await (0, computer_1.getScreenshot)(page);
+            result.push({
+                type: "screenshot",
+                screenshot: artifact(screenshot, actionSummary),
+            });
             toolCallOutput = {
                 type: "computer_call_output",
                 call_id: computerCall.call_id,
                 output: {
                     type: "computer_screenshot",
-                    image_url: `data:image/png;base64,${screenshotBytes}`,
+                    image_url: `data:image/png;base64,${screenshot}`,
                 },
                 acknowledged_safety_checks: computerCall.pending_safety_checks,
             };
@@ -155,19 +181,25 @@ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
             screenHeight,
             openAIClient,
         });
+        tokensUsed.input += response.usage?.input_tokens || 0;
+        tokensUsed.output += response.usage?.output_tokens || 0;
     }
     if (!isTaskDone) {
-        actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
+        const logMessage = `Max iteration limit hit: Task not done after ${MAX_ITERATIONS} iterations`;
+        console.log(logMessage);
+        result.push({
+            type: "text",
+            text: logMessage,
+        });
     }
-    trace?.update({
-        output: { code: generatedCode, actionsSummary: actionsSummary.join("\n") },
-    });
+    trace?.update({ output: { result } });
     return {
-        actionsSummary: actionsSummary.join("\n"),
+        result,
         code: generatedCode,
-        // TODO: Does not support skills (from helper methods in pages/ dir),
-        // and therefore, import paths are empty
         importPaths: [],
-        artifacts,
+        usage: {
+            tokens: tokensUsed,
+            cost: (0, model_1.tokensToCost)(tokensUsed),
+        },
     };
 }

package/dist/agent/cua/model.d.ts CHANGED Viewed

@@ -7,4 +7,11 @@ export declare function callComputerUseModel({ input, previousResponseId, screen
     screenHeight: number;
     openAIClient: OpenAI;
 }): Promise<Response>;
+export declare function tokensToCost(tokens: {
+    input: number;
+    output: number;
+}): {
+    input: number;
+    output: number;
+};
 //# sourceMappingURL=model.d.ts.map

package/dist/agent/cua/model.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1	+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE;;;EAQrE"}

package/dist/agent/cua/model.js CHANGED Viewed

@@ -1,6 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.callComputerUseModel = callComputerUseModel;
+exports.tokensToCost = tokensToCost;
 const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
 Don't ask the user for confirmations - just execute the actions.
@@ -50,3 +51,12 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
     });
     return response;
 }
+function tokensToCost(tokens) {
+    // Costs for "computer-use-preview-2025-03-11"
+    // https://platform.openai.com/docs/models/computer-use-preview
+    const inputUsdFor1MTokens = 3.0;
+    const outputUsdFor1MTokens = 12.0;
+    const inputCost = (tokens.input / 1_000_000) * inputUsdFor1MTokens;
+    const outputCost = (tokens.output / 1_000_000) * outputUsdFor1MTokens;
+    return { input: inputCost, output: outputCost };
+}

package/dist/bin/index.js CHANGED Viewed

@@ -182,7 +182,7 @@ async function runAgentsWorkflow(testGenConfig, testGenToken) {
             repoDir: process.cwd(),
             testFilePath: specPath,
         });
-        const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
+        const { isError, error } = await (0, run_1.runBrowsingAgent)({
             testCaseName: testCase.name,
             testCaseSuites: testCase.suites,
             testFilePath: specPath,
@@ -212,7 +212,7 @@ async function main() {
         .option("--use-chat", "Use chat agent (and not the workflow)")
         .option("--chat-session-id <chat-session-id>", "Identifier for chat session (fetched from dash.empirical.run)")
         .option("--use-disk-for-chat-state", "Save and load chat state from disk")
-        .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-03-25)")
+        .option("--chat-model <model>", "Chat model to use (claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20241022 or gemini-2.5-pro-preview-06-05)")
         .option("--initial-prompt <path>", "Path to an initial prompt file (e.g. prompt.md)")
         .option("--with-retry", "Use the retry strategy")
         .parse(process.argv);

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { SupportedChatModels } from "@empiricalrun/llm/chat";
+import { SupportedChatModels } from "@empiricalrun/shared-types";
 export declare const ARGS_TO_MODEL_MAP: Record<string, SupportedChatModels>;
 export interface CLIOptions {
     token?: string;

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,~~wBAAwB~~,CAAC;~~AAG7D~~,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAajE,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAGjE,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAajE,CAAC;AAEF,MAAM,WAAW,UAAU;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,EAAE,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAQD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAyDrB;AAED,wBAAgB,WAAW,SAgC1B"}

package/dist/bin/utils/index.js CHANGED Viewed

@@ -13,9 +13,9 @@ exports.ARGS_TO_MODEL_MAP = {
     "claude-4": "claude-sonnet-4-20250514",
     "claude-sonnet-4": "claude-sonnet-4-20250514",
     "claude-opus-4": "claude-opus-4-20250514",
-    "gemini-2.5": "gemini-2.5-pro-preview-03-25",
-    "gemini-2.5-pro": "gemini-2.5-pro-preview-03-25",
-    "gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-03-25",
+    "gemini-2.5": "gemini-2.5-pro-preview-06-05",
+    "gemini-2.5-pro": "gemini-2.5-pro-preview-06-05",
+    "gemini-2.5-pro-preview-03-25": "gemini-2.5-pro-preview-06-05",
     "o4-mini": "o4-mini-2025-04-16",
     "o4-mini-2025-04-16": "o4-mini-2025-04-16",
 };

package/dist/file/client.d.ts CHANGED Viewed

@@ -1,17 +1,11 @@
-import { ArtifactInput } from "@empiricalrun/shared-types";
+import { BrowserAgentIPCPayload } from "./server";
 declare class FileServiceClient {
     baseUrl: string;
     port: number | undefined;
     constructor();
     static isAvailable(): boolean;
-    updateTest({ generatedCode, task, importPaths, actionsSummary, }: {
-        generatedCode: string;
-        task: string;
-        importPaths: string[];
-        actionsSummary?: string;
-    }): Promise<any>;
+    sendAgentResult(payload: BrowserAgentIPCPayload): Promise<any>;
     post(path: string, body: any): Promise<any>;
-    sendArtifactInputsToServer(artifacts: ArtifactInput[]): Promise<any>;
 }
 export default FileServiceClient;
 //# sourceMappingURL=client.d.ts.map

package/dist/file/client.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,~~aAAa~~,EAAE,MAAM,~~4BAA4B~~,CAAC;~~AAE3D~~,cAAM,iBAAiB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;IAUzB,MAAM,CAAC,WAAW;IAIZ,~~UAAU~~,CAAC,~~EACf~~,~~aAAa,EACb,IAAI,EACJ,WAAW,EACX,cAAc,GACf,~~EAAE~~;QACD~~,~~aAAa,EAAE,MAAM,CAAC~~;~~QACtB~~,IAAI,~~EAAE,MAAM,~~CAAC~~;QACb~~,~~WAAW,EAAE,MAAM,EAAE,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB;IASK,~~IAAI,~~CAAC,IAAI,~~EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;~~IAiB5B,0BAA0B,CAAC,SAAS,EAAE,aAAa,EAAE~~;~~CAgB5D;~~AAED,eAAe,iBAAiB,CAAC"}
1	+ {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/file/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAElD,cAAM,iBAAiB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;;IAUzB,MAAM,CAAC,WAAW;IAIZ,eAAe,CAAC,OAAO,EAAE,sBAAsB;IAI/C,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG;CAgBnC;AAED,eAAe,iBAAiB,CAAC"}

package/dist/file/client.js CHANGED Viewed

@@ -13,13 +13,8 @@ class FileServiceClient {
     static isAvailable() {
         return !!Number(process.env.IPC_FILE_SERVICE_PORT);
     }
-    async updateTest({ generatedCode, task, importPaths, actionsSummary, }) {
-        return this.post("/test", {
-            generatedCode,
-            task,
-            importPaths,
-            actionsSummary,
-        });
+    async sendAgentResult(payload) {
+        return this.post("/agent-results", payload);
     }
     async post(path, body) {
         const resp = await fetch(`${this.baseUrl}${path}`, {
@@ -37,21 +32,5 @@ class FileServiceClient {
             return data;
         }
     }
-    async sendArtifactInputsToServer(artifacts) {
-        const resp = await fetch(`${this.baseUrl}/artifact`, {
-            method: "POST",
-            headers: {
-                "Content-Type": "application/json",
-            },
-            body: JSON.stringify(artifacts),
-        });
-        if (!resp.ok) {
-            throw new Error(`API failed with status ${resp.statusText}`);
-        }
-        else {
-            const data = await resp.json();
-            return data;
-        }
-    }
 }
 exports.default = FileServiceClient;