@empiricalrun/test-gen 0.31.9 → 0.31.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.31.12
4
+
5
+ ### Patch Changes
6
+
7
+ - a8f2ea6: chore: add test for master agent
8
+ - Updated dependencies [10f554d]
9
+ - @empiricalrun/reporter@0.20.2
10
+
11
+ ## 0.31.11
12
+
13
+ ### Patch Changes
14
+
15
+ - Updated dependencies [9623b74]
16
+ - @empiricalrun/reporter@0.20.1
17
+
18
+ ## 0.31.10
19
+
20
+ ### Patch Changes
21
+
22
+ - c065064: fix: throw error for invalid response from o1
23
+
3
24
  ## 0.31.9
4
25
 
5
26
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAOnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBAmP9B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAOnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBAqP9B"}
@@ -19,6 +19,8 @@ const utils_1 = require("./utils");
19
19
  async function browsingAgentUsingMasterAgent(task, page, options) {
20
20
  const logger = new logger_1.CustomLogger({ useReporter: false });
21
21
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
22
+ // add timeout for the page to settle in
23
+ await page.waitForTimeout(3000);
22
24
  const trace = llm_1.langfuseInstance.trace({
23
25
  name: "test-generator",
24
26
  id: crypto_1.default.randomUUID(),
@@ -1 +1 @@
1
- {"version":3,"file":"o1-completion.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/o1-completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,kBAAkB,EACnB,MAAM,4BAA4B,CAAC;AAKpC,wBAAsB,eAAe,CAAC,EACpC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,0BAA0B,EAAE,CAAC;IACvC,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAoD7C"}
1
+ {"version":3,"file":"o1-completion.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/o1-completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,kBAAkB,EACnB,MAAM,4BAA4B,CAAC;AAKpC,wBAAsB,eAAe,CAAC,EACpC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,0BAA0B,EAAE,CAAC;IACvC,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAyD7C"}
@@ -41,6 +41,10 @@ async function getO1Completion({ messages, tools, trace, }) {
41
41
  o1Span?.end({ output: response });
42
42
  const toolResponseStr = (0, remove_markdown_1.default)(response.content);
43
43
  const toolRespJSON = JSON.parse(toolResponseStr);
44
+ const parameters = toolRespJSON.function.parameters || toolRespJSON.function.arguments;
45
+ if (!parameters) {
46
+ throw new Error("No parameters found in tool response");
47
+ }
44
48
  const tool = {
45
49
  type: "function",
46
50
  function: {
@@ -2,6 +2,15 @@ import { LLM, TraceClient } from "@empiricalrun/llm";
2
2
  import { Page } from "playwright";
3
3
  import { TestGenConfigOptions } from "../../types";
4
4
  type BrowsingAgentOptions = Partial<TestGenConfigOptions>;
5
+ export declare function getNextAction({ task, executedActions, page, trace, llm, options, pageScreenshot, }: {
6
+ task: string;
7
+ executedActions: string[];
8
+ page: Page;
9
+ trace?: TraceClient;
10
+ llm: LLM;
11
+ options: BrowsingAgentOptions;
12
+ pageScreenshot: string;
13
+ }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
5
14
  export declare function masterAgent(task: string, page: Page, executedActions: string[], trace: TraceClient, llm: LLM, options: BrowsingAgentOptions): Promise<any>;
6
15
  export {};
7
16
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AASlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;AAE1D,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,eAAe,EAAE,MAAM,EAAE,EACzB,KAAK,EAAE,WAAW,EAClB,GAAG,EAAE,GAAG,EACR,OAAO,EAAE,oBAAoB,gBA0E9B"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AASlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;AAE1D,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,IAAI,EACJ,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,GACf,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,oBAAoB,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB,2FA+CA;AAED,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,eAAe,EAAE,MAAM,EAAE,EACzB,KAAK,EAAE,WAAW,EAClB,GAAG,EAAE,GAAG,EACR,OAAO,EAAE,oBAAoB,gBAsC9B"}
@@ -1,19 +1,14 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.masterAgent = void 0;
3
+ exports.masterAgent = exports.getNextAction = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const vision_1 = require("@empiricalrun/llm/vision");
6
6
  const done_1 = require("../../actions/done");
7
7
  const next_task_1 = require("../../actions/next-task");
8
8
  const constants_1 = require("../../constants");
9
9
  const reporter_1 = require("../../reporter");
10
- async function masterAgent(task, page, executedActions, trace, llm, options) {
11
- trace.update({ input: { task } });
12
- const promptSpan = trace.span({ name: "page-prompt" });
13
- const buffer = await page.screenshot({ fullPage: true });
14
- const testGenReporter = new reporter_1.TestGenUpdatesReporter();
15
- const testGenSnapshotUpdatePromise = testGenReporter.sendCurrentView(buffer);
16
- const pageScreenshot = buffer.toString("base64");
10
+ async function getNextAction({ task, executedActions, page, trace, llm, options, pageScreenshot, }) {
11
+ const promptSpan = trace?.span({ name: "page-prompt" });
17
12
  const promptMessages = await (0, llm_1.getPrompt)("test-gen", {
18
13
  task,
19
14
  executedActions: executedActions.map((a) => a).join("\n"),
@@ -39,7 +34,7 @@ async function masterAgent(task, page, executedActions, trace, llm, options) {
39
34
  userMessage,
40
35
  ];
41
36
  const tools = [next_task_1.NextTaskAction.schema, (0, done_1.doneActionGenerator)(page).schema];
42
- promptSpan.end({ output: { messages } });
37
+ promptSpan?.end({ output: { messages } });
43
38
  const completion = await llm.createChatCompletion({
44
39
  messages,
45
40
  modelParameters: {
@@ -52,8 +47,26 @@ async function masterAgent(task, page, executedActions, trace, llm, options) {
52
47
  // @ts-ignore
53
48
  tools,
54
49
  });
55
- let output;
56
50
  const toolCall = completion?.tool_calls?.[0];
51
+ return toolCall;
52
+ }
53
+ exports.getNextAction = getNextAction;
54
+ async function masterAgent(task, page, executedActions, trace, llm, options) {
55
+ trace.update({ input: { task } });
56
+ const buffer = await page.screenshot({ fullPage: true });
57
+ const testGenReporter = new reporter_1.TestGenUpdatesReporter();
58
+ const testGenSnapshotUpdatePromise = testGenReporter.sendCurrentView(buffer);
59
+ const pageScreenshot = buffer.toString("base64");
60
+ let output;
61
+ const toolCall = await getNextAction({
62
+ task,
63
+ executedActions,
64
+ page,
65
+ trace,
66
+ llm,
67
+ options,
68
+ pageScreenshot,
69
+ });
57
70
  if (toolCall) {
58
71
  if (toolCall.function.name === "task_done") {
59
72
  output = {
package/dist/bin/index.js CHANGED
@@ -69,10 +69,12 @@ async function runAgent(testGenConfig) {
69
69
  testGenFailed = true;
70
70
  new logger_1.CustomLogger().error(`Failed to generate test for the scenario. ${process.env.LOG_URL ? `[view log](${process.env.LOG_URL})` : ""}`);
71
71
  }
72
- await new reporter_1.TestGenUpdatesReporter().reportGenAssets({
73
- projectRepoName: testGenConfig.options.metadata.projectRepoName,
74
- testName: testGenConfig.testCase.name,
75
- });
72
+ if (testGenConfig.options?.agent !== "code") {
73
+ await new reporter_1.TestGenUpdatesReporter().reportGenAssets({
74
+ projectRepoName: testGenConfig.options.metadata.projectRepoName,
75
+ testName: testGenConfig.testCase.name,
76
+ });
77
+ }
76
78
  // TODO: move these reporters to a better lifecycle
77
79
  await (0, ci_1.reportOnCI)(testGenConfig.testCase);
78
80
  await (0, llm_1.flushAllTraces)();
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/bin/utils/platform/web/index.ts"],"names":[],"mappings":"AAIA,OAAO,EAGL,IAAI,EAIL,MAAM,UAAU,CAAC;AAGlB;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,EACrC,YAAY,EACZ,MAAM,EACN,OAAO,GACR,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,GAAG;IACF,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,QAAQ,EAAE,IAAI,GAAG,SAAS,CAAC;CAC5B,CAuCA;AAwBD,wBAAsB,0CAA0C,CAC9D,QAAQ,EAAE,MAAM,oBA+BjB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,IAAI,GAAG,SAAS,GACrB,IAAI,GAAG,SAAS,CA4BlB;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAG5E;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CA8C7D;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,mCAWjB;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAShD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAQhD;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,UAE5E;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,iBAMpD;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,UAcpD;AAED,wBAAsB,iCAAiC,CAAC,QAAQ,EAAE,MAAM,+BAmBvE;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,UA0CtB;AAED,eAAO,MAAM,6BAA6B;qBAKvB,MAAM;iBACV,MAAM;YACX,MAAM,EAAE;YA2DjB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/bin/utils/platform/web/index.ts"],"names":[],"mappings":"AAIA,OAAO,EAGL,IAAI,EAIL,MAAM,UAAU,CAAC;AAGlB;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,EACrC,YAAY,EACZ,MAAM,EACN,OAAO,GACR,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,GAAG;IACF,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,QAAQ,EAAE,IAAI,GAAG,SAAS,CAAC;CAC5B,CAuCA;AAwBD,wBAAsB,0CAA0C,CAC9D,QAAQ,EAAE,MAAM,oBA+BjB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,IAAI,GAAG,SAAS,GACrB,IAAI,GAAG,SAAS,CA4BlB;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAG5E;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CA8C7D;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,mCAWjB;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAShD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAQhD;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,UAE5E;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,iBAMpD;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,UAcpD;AAED,wBAAsB,iCAAiC,CAAC,QAAQ,EAAE,MAAM,+BAoBvE;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,UA0CtB;AAED,eAAO,MAAM,6BAA6B;qBAKvB,MAAM;iBACV,MAAM;YACX,MAAM,EAAE;YA2DjB,CAAC"}
@@ -268,7 +268,8 @@ async function getPageVariableNameFromCreateTest(filePath) {
268
268
  const descendentTexts = descendants.map((d) => d.getText());
269
269
  const parameterSeparatorIdx = descendentTexts.findIndex((s) => s === ",");
270
270
  const pageVariableName = descendentTexts[parameterSeparatorIdx + 1];
271
- return pageVariableName;
271
+ // handle cases like "page as Page"
272
+ return pageVariableName?.split(" ")[0];
272
273
  }
273
274
  exports.getPageVariableNameFromCreateTest = getPageVariableNameFromCreateTest;
274
275
  function replaceCreateTestWithNewCode(filePath, contents, generatedCode) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.31.9",
3
+ "version": "0.31.12",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -46,7 +46,7 @@
46
46
  "typescript": "^5.3.3",
47
47
  "@empiricalrun/llm": "^0.9.3",
48
48
  "@empiricalrun/r2-uploader": "^0.3.3",
49
- "@empiricalrun/reporter": "^0.20.0"
49
+ "@empiricalrun/reporter": "^0.20.2"
50
50
  },
51
51
  "devDependencies": {
52
52
  "@types/detect-port": "^1.3.5",