@empiricalrun/test-gen 0.53.4 → 0.53.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.53.5
4
+
5
+ ### Patch Changes
6
+
7
+ - 9f3cb10: feat: automated tracing for LLM call overlay dismiss
8
+ - Updated dependencies [9f3cb10]
9
+ - @empiricalrun/llm@0.14.4
10
+
3
11
  ## 0.53.4
4
12
 
5
13
  ### Patch Changes
@@ -1,8 +1,10 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
1
2
  import { Page } from "playwright";
2
3
  export declare function startPlaywrightCodegen(page: Page): Promise<void>;
3
- export declare function createTestUsingComputerUseAgent({ page, task, }: {
4
+ export declare function createTestUsingComputerUseAgent({ page, task, trace, }: {
4
5
  page: Page;
5
6
  task: string;
7
+ trace?: TraceClient;
6
8
  }): Promise<{
7
9
  code: string;
8
10
  importPaths: string[];
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAmMD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAS/D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAMlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,EACJ,KAAK,GACN,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAqLD"}
@@ -5,8 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
- const crypto_1 = __importDefault(require("crypto"));
9
- const logger_1 = require("../../bin/logger");
8
+ const openai_1 = __importDefault(require("openai"));
10
9
  const utils_1 = require("../browsing/utils");
11
10
  const computer_1 = require("./computer");
12
11
  const model_1 = require("./model");
@@ -32,25 +31,18 @@ async function startPlaywrightCodegen(page) {
32
31
  await page.pause();
33
32
  }
34
33
  exports.startPlaywrightCodegen = startPlaywrightCodegen;
35
- async function createTestUsingComputerUseAgent({ page, task, }) {
34
+ async function createTestUsingComputerUseAgent({ page, task, trace, }) {
36
35
  await (0, utils_1.injectPwLocatorGenerator)(page);
37
36
  const screenshotBytes = await (0, computer_1.getScreenshot)(page);
38
37
  const viewport = page.viewportSize();
39
38
  let screenWidth = viewport?.width || 1280;
40
39
  let screenHeight = viewport?.height || 720;
41
- const logger = new logger_1.CustomLogger({ useReporter: false });
42
- const trace = llm_1.langfuseInstance?.trace({
43
- name: "computer-use-agent",
44
- id: crypto_1.default.randomUUID(),
45
- input: { task },
46
- });
47
- if (trace) {
48
- const traceUrl = trace.getTraceUrl();
49
- logger.log(`Starting computer use agent: ${traceUrl}`);
50
- }
51
- const span = trace?.span({
52
- name: "initial-model-call",
53
- });
40
+ const openAIClient = trace
41
+ ? (0, llm_1.observeOpenAI)(new openai_1.default(), {
42
+ generationName: `computer-use-agent`,
43
+ parent: trace,
44
+ })
45
+ : new openai_1.default();
54
46
  let response = await (0, model_1.callComputerUseModel)({
55
47
  input: [
56
48
  {
@@ -70,8 +62,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
70
62
  ],
71
63
  screenWidth,
72
64
  screenHeight,
65
+ openAIClient,
73
66
  });
74
- span?.end({ output: response });
75
67
  let isTaskDone = false;
76
68
  let maxIterations = 15;
77
69
  let generatedCode = "";
@@ -80,10 +72,6 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
80
72
  while (!isTaskDone && iterationIndex < maxIterations) {
81
73
  actionsSummary.push(`\n# Agent iteration ${iterationIndex}`);
82
74
  iterationIndex++;
83
- const iterationSpan = trace?.span({
84
- name: `iteration-${iterationIndex}`,
85
- input: { response },
86
- });
87
75
  const computerCalls = response.output.filter((item) => item.type === "computer_call");
88
76
  const functionCalls = response.output.filter((item) => item.type === "function_call");
89
77
  if (computerCalls.length === 0 && functionCalls.length === 0) {
@@ -174,8 +162,8 @@ async function createTestUsingComputerUseAgent({ page, task, }) {
174
162
  ],
175
163
  screenWidth,
176
164
  screenHeight,
165
+ openAIClient,
177
166
  });
178
- iterationSpan?.end({ output: response });
179
167
  }
180
168
  if (!isTaskDone) {
181
169
  actionsSummary.push(`Max iteration limit hit: Task not done after ${maxIterations} iterations`);
@@ -1,8 +1,10 @@
1
+ import OpenAI from "openai";
1
2
  import { Response, ResponseInputItem } from "openai/resources/responses/responses.mjs";
2
- export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }: {
3
+ export declare function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }: {
3
4
  input: ResponseInputItem[];
4
5
  previousResponseId?: string;
5
6
  screenWidth: number;
6
7
  screenHeight: number;
8
+ openAIClient: OpenAI;
7
9
  }): Promise<Response>;
8
10
  //# sourceMappingURL=model.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
1
+ {"version":3,"file":"model.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/model.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAEL,QAAQ,EACR,iBAAiB,EAClB,MAAM,0CAA0C,CAAC;AA8BlD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,kBAAkB,EAClB,WAAW,EACX,YAAY,EACZ,YAAY,GACb,EAAE;IACD,KAAK,EAAE,iBAAiB,EAAE,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;CACtB,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuBpB"}
@@ -1,10 +1,6 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
3
  exports.callComputerUseModel = void 0;
7
- const openai_1 = __importDefault(require("openai"));
8
4
  const INSTRUCTIONS = `You will be asked to execute some actions in a browser context.
9
5
  Don't ask the user for confirmations - just execute the actions.
10
6
 
@@ -30,9 +26,8 @@ const pageGotoTool = {
30
26
  },
31
27
  strict: true,
32
28
  };
33
- async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, }) {
34
- const openai = new openai_1.default();
35
- return await openai.responses.create({
29
+ async function callComputerUseModel({ input, previousResponseId, screenWidth, screenHeight, openAIClient, }) {
30
+ const response = await openAIClient.responses.create({
36
31
  model: "computer-use-preview-2025-03-11",
37
32
  previous_response_id: previousResponseId,
38
33
  parallel_tool_calls: false,
@@ -53,5 +48,6 @@ async function callComputerUseModel({ input, previousResponseId, screenWidth, sc
53
48
  input,
54
49
  truncation: "auto",
55
50
  });
51
+ return response;
56
52
  }
57
53
  exports.callComputerUseModel = callComputerUseModel;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.53.4",
3
+ "version": "0.53.5",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -68,7 +68,7 @@
68
68
  "tsx": "^4.16.2",
69
69
  "typescript": "^5.3.3",
70
70
  "zod": "^3.23.8",
71
- "@empiricalrun/llm": "^0.14.3",
71
+ "@empiricalrun/llm": "^0.14.4",
72
72
  "@empiricalrun/r2-uploader": "^0.3.8",
73
73
  "@empiricalrun/test-run": "^0.7.6"
74
74
  },