npm - @empiricalrun/test-gen - Versions diffs - 0.53.4 → 0.53.5 - Mend

@empiricalrun/test-gen 0.53.4 → 0.53.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +8 -0
package/dist/agent/cua/index.d.ts +3 -1
package/dist/agent/cua/index.d.ts.map +1 -1
package/dist/agent/cua/index.js +10 -22
package/dist/agent/cua/model.d.ts +3 -1
package/dist/agent/cua/model.d.ts.map +1 -1
package/dist/agent/cua/model.js +3 -7
package/package.json +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 # @empiricalrun/test-gen
+## 0.53.5
+### Patch Changes
+- 9f3cb10: feat: automated tracing for LLM call overlay dismiss
+- Updated dependencies [9f3cb10]
+  - @empiricalrun/llm@0.14.4
 ## 0.53.4
 ### Patch Changes

package/dist/agent/cua/index.d.ts CHANGED Viewed

@@ -1,8 +1,10 @@
+import { TraceClient } from "@empiricalrun/llm";
 import { Page } from "playwright";
 export declare function startPlaywrightCodegen(page: Page): Promise<void>;
-export declare function createTestUsingComputerUseAgent({ page, task, }: {
+export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
     page: Page;
     task: string;
+    trace?: TraceClient;
 }): Promise<{
     code: string;
     importPaths: string[];

package/dist/agent/cua/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"~~AASA~~,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;~~AAOlC~~,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,~~GACL~~,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;~~CACd~~,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,~~CAmMD~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAS/D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAMlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAqLD"}

package/dist/agent/cua/index.js CHANGED Viewed

@@ -5,8 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
 const llm_1 = require("@empiricalrun/llm");
-const crypto_1 = __importDefault(require("crypto"));
-const logger_1 = require("../../bin/logger");
+const openai_1 = __importDefault(require("openai"));
 const utils_1 = require("../browsing/utils");
 const computer_1 = require("./computer");
 const model_1 = require("./model");
@@ -32,25 +31,18 @@ async function startPlaywrightCodegen(page) {
     await page.pause();
 }
 exports.startPlaywrightCodegen = startPlaywrightCodegen;
-async function createTestUsingComputerUseAgent({ page, task, }) {
+async function createTestUsingComputerUseAgent({ page, task, trace, }) {
     await (0, utils_1.injectPwLocatorGenerator)(page);
     const screenshotBytes = await (0, computer_1.getScreenshot)(page);
     const viewport = page.viewportSize();
     let screenWidth = viewport?.width || 1280;
     let screenHeight = viewport?.height || 720;
-    const logger = new logger_1.CustomLogger({ useReporter: false });
-    const trace = llm_1.langfuseInstance?.trace({
-        name: "computer-use-agent",
-        id: crypto_1.default.randomUUID(),
-        input: { task },
-    });
-    if (trace) {
-        const traceUrl = trace.getTraceUrl();
-        logger.log(`Starting computer use agent: ${traceUrl}`);
-    }
-    const span = trace?.span({
-        name: "initial-model-call",
-    });
+    const openAIClient = trace
+        ? (0, llm_1.observeOpenAI)(new openai_1.default(), {
+            generationName: `computer-use-agent`,
+            parent: trace,
+        })
+        : new openai_1.default();
     let response = await (0, model_1.callComputerUseModel)({
         input: [
             {
@@ -70,8 +62,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
         ],
         screenWidth,
         screenHeight,
+        openAIClient,
     });
-    span?.end({ output: response });
     let isTaskDone = false;
     let maxIterations = 15;
     let generatedCode = "";
@@ -80,10 +72,6 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
     while (!isTaskDone && iterationIndex < maxIterations) {
         actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
         iterationIndex++;
-        const iterationSpan = trace?.span({
-            name: `iteration-${iterationIndex}`,
-            input: { response },
-        });
         const computerCalls = response.output.filter((item) => item.type === "computer_call");
         const functionCalls = response.output.filter((item) => item.type === "function_call");
         if (computerCalls.length === 0 && functionCalls.length === 0) {
@@ -174,8 +162,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
             ],
             screenWidth,
             screenHeight,
+            openAIClient,
         });
-        iterationSpan?.end({ output: response });
     }
     if (!isTaskDone) {
         actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);

package/dist/agent/cua/model.d.ts CHANGED Viewed

@@ -1,8 +1,10 @@
+import OpenAI from "openai";
 import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
-export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
+export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }: {
     input: ResponseInputItem[];
     previousResponseId?: string;
     screenWidth: number;
     screenHeight: number;
+    openAIClient: OpenAI;
 }): Promise<Response>;
 //# sourceMappingURL=model.d.ts.map

package/dist/agent/cua/model.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"~~AACA~~,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1	+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}

package/dist/agent/cua/model.js CHANGED Viewed

@@ -1,10 +1,6 @@
 "use strict";
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.callComputerUseModel = void 0;
-const openai_1 = __importDefault(require("openai"));
 const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
 Don't ask the user for confirmations - just execute the actions.
@@ -30,9 +26,8 @@ const pageGotoTool = {
     },
     strict: true,
 };
-async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
-    const openai = new openai_1.default();
-    return await openai.responses.create({
+async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }) {
+    const response = await openAIClient.responses.create({
         model: "computer-use-preview-2025-03-11",
         previous_response_id: previousResponseId,
         parallel_tool_calls: false,
@@ -53,5 +48,6 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
         input,
         truncation: "auto",
     });
+    return response;
 }
 exports.callComputerUseModel = callComputerUseModel;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.53.4",
+  "version": "0.53.5",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -68,7 +68,7 @@
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
     "zod": "^3.23.8",
-    "@empiricalrun/llm": "^0.14.3",
+    "@empiricalrun/llm": "^0.14.4",
     "@empiricalrun/r2-uploader": "^0.3.8",
     "@empiricalrun/test-run": "^0.7.6"
   },