@empiricalrun/test-gen 0.34.5 → 0.35.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dist/agent/browsing/index.d.ts +1 -1
  3. package/dist/agent/browsing/index.d.ts.map +1 -1
  4. package/dist/agent/browsing/index.js +11 -11
  5. package/dist/agent/codegen/create-test-block.js +1 -1
  6. package/dist/agent/codegen/run.d.ts +1 -1
  7. package/dist/agent/codegen/run.d.ts.map +1 -1
  8. package/dist/agent/codegen/run.js +12 -10
  9. package/dist/agent/codegen/skills-retriever.d.ts +11 -0
  10. package/dist/agent/codegen/skills-retriever.d.ts.map +1 -1
  11. package/dist/agent/codegen/skills-retriever.js +27 -9
  12. package/dist/agent/codegen/update-flow.d.ts.map +1 -1
  13. package/dist/agent/codegen/update-flow.js +21 -17
  14. package/dist/agent/infer-agent/index.d.ts +0 -1
  15. package/dist/agent/infer-agent/index.d.ts.map +1 -1
  16. package/dist/agent/infer-agent/index.js +4 -5
  17. package/dist/agent/master/run.d.ts +4 -4
  18. package/dist/agent/master/run.d.ts.map +1 -1
  19. package/dist/agent/master/run.js +48 -20
  20. package/dist/agent/master/with-hints.d.ts +1 -1
  21. package/dist/agent/master/with-hints.d.ts.map +1 -1
  22. package/dist/agent/master/with-hints.js +2 -2
  23. package/dist/bin/index.js +8 -6
  24. package/dist/evals/fetch-pom-skills-agent.evals.d.ts +4 -0
  25. package/dist/evals/fetch-pom-skills-agent.evals.d.ts.map +1 -0
  26. package/dist/evals/fetch-pom-skills-agent.evals.js +36 -0
  27. package/dist/evals/master-agent.evals.d.ts +4 -0
  28. package/dist/evals/master-agent.evals.d.ts.map +1 -0
  29. package/dist/evals/master-agent.evals.js +36 -0
  30. package/package.json +2 -2
  31. package/dist/evals/infer-master-code.d.ts +0 -2
  32. package/dist/evals/infer-master-code.d.ts.map +0 -1
  33. package/dist/evals/infer-master-code.js +0 -18
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.35.1
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [557324f]
8
+ - @empiricalrun/llm@0.9.22
9
+
10
+ ## 0.35.0
11
+
12
+ ### Minor Changes
13
+
14
+ - 069347f: feat: add support for master agent evals
15
+ - 11e4cbd: feat: add fetch skills agent evals
16
+
17
+ ### Patch Changes
18
+
19
+ - 297508d: fix: langfuse key errors
20
+ - Updated dependencies [069347f]
21
+ - Updated dependencies [297508d]
22
+ - @empiricalrun/llm@0.9.21
23
+
3
24
  ## 0.34.5
4
25
 
5
26
  ### Patch Changes
@@ -10,7 +10,7 @@ export type BrowsingAgentOptions = Partial<TestGenConfigOptions> & {
10
10
  };
11
11
  export declare function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }: {
12
12
  action: string;
13
- trace: TraceClient;
13
+ trace?: TraceClient;
14
14
  logger: CustomLogger;
15
15
  page: Page;
16
16
  options: BrowsingAgentOptions;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,WAAW,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,oBAAoB,CAAC;IAC9B,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,iBAAiB,CAAC;CAC5B,iBAwIA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,oBAAoB,CAAC;IAC9B,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,iBAAiB,CAAC;CAC5B,iBAwIA"}
@@ -15,23 +15,23 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
15
15
  const tools = actions.getBrowsingActionSchemas();
16
16
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
17
17
  while (!isTaskDone) {
18
- const browsingAgentSpan = trace.span({
18
+ const browsingAgentSpan = trace?.span({
19
19
  name: `browsing-agent`,
20
20
  });
21
21
  if (await (0, session_1.shouldStopSession)()) {
22
22
  break;
23
23
  }
24
- const pageContentSpan = browsingAgentSpan.span({
24
+ const pageContentSpan = browsingAgentSpan?.span({
25
25
  name: "page-content",
26
26
  });
27
27
  const pageContent = await page.content();
28
- pageContentSpan.end({ output: { pageContent } });
29
- const sanitizationSpan = browsingAgentSpan.span({
28
+ pageContentSpan?.end({ output: { pageContent } });
29
+ const sanitizationSpan = browsingAgentSpan?.span({
30
30
  name: "page-sanitization",
31
31
  });
32
32
  const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
33
- sanitizationSpan.end({ output: { pageSnapshot } });
34
- const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
33
+ sanitizationSpan?.end({ output: { pageSnapshot } });
34
+ const promptSpan = browsingAgentSpan?.span({ name: "page-prompt" });
35
35
  // extract all successful actions
36
36
  const successfulActions = executedActions
37
37
  .filter((a) => !a.isError)
@@ -46,8 +46,8 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
46
46
  logger.log(`isTaskDone: ${isTaskDone}`);
47
47
  logger.log(`reason: ${verificationAgentResp.reason}`);
48
48
  if (isTaskDone) {
49
- browsingAgentSpan.event({ name: "task-done" });
50
- browsingAgentSpan.end({
49
+ browsingAgentSpan?.event({ name: "task-done" });
50
+ browsingAgentSpan?.end({
51
51
  output: { taskDone: true, reason: verificationAgentResp.reason },
52
52
  });
53
53
  break;
@@ -60,7 +60,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
60
60
  lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
61
61
  promptType: "browsing-agent-as-tool",
62
62
  });
63
- promptSpan.end({ output: { messages } });
63
+ promptSpan?.end({ output: { messages } });
64
64
  let completion;
65
65
  completion = await (0, o1_completion_1.getO1Completion)({
66
66
  //@ts-ignore
@@ -91,7 +91,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
91
91
  action: "",
92
92
  });
93
93
  }
94
- const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
94
+ const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
95
95
  for (const i in toolCalls) {
96
96
  const toolCall = toolCalls[i];
97
97
  if (await (0, session_1.shouldStopSession)()) {
@@ -116,7 +116,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
116
116
  logger.error(lastActionExecTrace, e);
117
117
  }
118
118
  }
119
- toolCallsSpan.end({ output: { toolCalls } });
119
+ toolCallsSpan?.end({ output: { toolCalls } });
120
120
  // mark task as done if llm is stuck in loop
121
121
  if (executedActions.length >= 3) {
122
122
  const lastThreeActions = executedActions.slice(-3);
@@ -15,7 +15,7 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
15
15
  const session = (0, session_1.getSessionDetails)();
16
16
  trace =
17
17
  trace ||
18
- llm_1.langfuseInstance.trace({
18
+ llm_1.langfuseInstance?.trace({
19
19
  name: "create-empty-test-block",
20
20
  id: crypto.randomUUID(),
21
21
  release: session.version,
@@ -1,4 +1,4 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
2
  import { TestCase, TestGenConfigOptions } from "../../types";
3
- export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace: TraceClient): Promise<TestCase[]>;
3
+ export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[]>;
4
4
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,EAAE,WAAW,GACjB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAwGrB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CA0GrB"}
@@ -31,7 +31,7 @@ async function generateTest(testCase, file, options, trace) {
31
31
  }
32
32
  const generatedTestCases = [];
33
33
  logger.logEmptyLine();
34
- const createTestSpan = trace.span({
34
+ const createTestSpan = trace?.span({
35
35
  name: "create-test",
36
36
  input: {
37
37
  testCase,
@@ -39,7 +39,7 @@ async function generateTest(testCase, file, options, trace) {
39
39
  options,
40
40
  },
41
41
  });
42
- createTestSpan.event({
42
+ createTestSpan?.event({
43
43
  name: "collate-files-as-text",
44
44
  output: {
45
45
  codePrompt,
@@ -47,7 +47,7 @@ async function generateTest(testCase, file, options, trace) {
47
47
  testFileContent,
48
48
  },
49
49
  });
50
- const promptSpan = createTestSpan.span({
50
+ const promptSpan = createTestSpan?.span({
51
51
  name: "add-scenario-prompt",
52
52
  });
53
53
  const instruction = await (0, llm_1.getPrompt)("add-scenario", {
@@ -57,7 +57,7 @@ async function generateTest(testCase, file, options, trace) {
57
57
  scenarioSteps: testCase.steps.join("\n"),
58
58
  scenarioFile: file,
59
59
  });
60
- promptSpan.end({ output: { instruction } });
60
+ promptSpan?.end({ output: { instruction } });
61
61
  const llm = new llm_1.LLM({
62
62
  trace,
63
63
  provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
@@ -73,7 +73,7 @@ async function generateTest(testCase, file, options, trace) {
73
73
  });
74
74
  let response = firstShotMessage?.content || "";
75
75
  logger.success("Test generated successfully!");
76
- const readWriteFileSpan = trace.span({ name: "write-to-file" });
76
+ const readWriteFileSpan = trace?.span({ name: "write-to-file" });
77
77
  let contents = fs_extra_1.default.readFileSync(file, "utf-8");
78
78
  const [prependContent, strippedContent] = await (0, web_1.stripAndPrependImports)(response, testCase?.name);
79
79
  let updatedContent = prependContent +
@@ -83,9 +83,9 @@ async function generateTest(testCase, file, options, trace) {
83
83
  codeSnippet: `\n\n${strippedContent}`,
84
84
  });
85
85
  await fs_extra_1.default.writeFile(file, updatedContent, "utf-8");
86
- readWriteFileSpan.end({ output: { updatedContent } });
86
+ readWriteFileSpan?.end({ output: { updatedContent } });
87
87
  logger.log("Linting generated code...");
88
- createTestSpan.event({ name: "lint-file" });
88
+ createTestSpan?.event({ name: "lint-file" });
89
89
  await (0, web_1.lintErrors)(file);
90
90
  await (0, fix_ts_errors_1.validateAndFixTypescriptErrors)({
91
91
  trace,
@@ -96,12 +96,14 @@ async function generateTest(testCase, file, options, trace) {
96
96
  testCase: testCase,
97
97
  options,
98
98
  });
99
- createTestSpan.event({ name: "format-file" });
99
+ createTestSpan?.event({ name: "format-file" });
100
100
  await (0, web_1.formatCode)(file);
101
101
  logger.success("File formatted successfully!");
102
- logger.log(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
102
+ if (trace) {
103
+ logger.log(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
104
+ }
103
105
  generatedTestCases.push(testCase);
104
- createTestSpan.end({ output: { response } });
106
+ createTestSpan?.end({ output: { response } });
105
107
  await (0, llm_1.flushAllTraces)();
106
108
  return generatedTestCases;
107
109
  }
@@ -1,5 +1,16 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
2
  import { TestCase, TestGenConfigOptions } from "../../types";
3
+ export declare const fetchPomSkills: ({ testCase, pomFiles, options, trace, }: {
4
+ testCase: TestCase;
5
+ pomFiles?: string | undefined;
6
+ trace?: TraceClient | undefined;
7
+ options?: TestGenConfigOptions | undefined;
8
+ }) => Promise<{
9
+ testStep: string;
10
+ filePath: string;
11
+ usageExample: string;
12
+ reason: string;
13
+ }[]>;
3
14
  export declare function getAppropriateSkills({ testCase, options, trace, }: {
4
15
  testCase: TestCase;
5
16
  options?: TestGenConfigOptions;
@@ -1 +1 @@
1
- {"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;KAmDA"}
1
+ {"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,eAAO,MAAM,cAAc;cAMf,QAAQ;;;;;;;;;IA0CnB,CAAC;AAEF,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;KA6BA"}
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.getAppropriateSkills = void 0;
6
+ exports.getAppropriateSkills = exports.fetchPomSkills = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
8
  const fs_1 = __importDefault(require("fs"));
9
9
  const logger_1 = require("../../bin/logger");
@@ -11,18 +11,15 @@ const context_1 = require("../../bin/utils/context");
11
11
  const fs_2 = require("../../bin/utils/fs");
12
12
  const constants_1 = require("../../constants");
13
13
  const utils_1 = require("./utils");
14
- async function getAppropriateSkills({ testCase, options, trace, }) {
15
- const logger = new logger_1.CustomLogger({ useReporter: false });
16
- logger.log("getting skill set for the repository");
17
- const filter = await (0, context_1.createGitIgnoreFileFilter)();
18
- const pomFiles = await (0, fs_2.generatePromptFromDirectory)("./pages", filter);
19
- const fetchSkillsSpan = trace?.span({
14
+ const fetchPomSkills = async ({ testCase, pomFiles, options, trace, }) => {
15
+ const fetchSkillsUsingPOMFilesSpan = trace?.span({
20
16
  name: "fetch-pom-skills",
21
17
  input: {
18
+ pomFiles,
22
19
  testCase,
23
20
  },
24
21
  });
25
- const promptSpan = fetchSkillsSpan?.span({
22
+ const promptSpan = fetchSkillsUsingPOMFilesSpan?.span({
26
23
  name: "fetch-pom-skills-prompt",
27
24
  });
28
25
  const prompt = await (0, llm_1.getPrompt)("fetch-skills-prompt", {
@@ -32,7 +29,7 @@ async function getAppropriateSkills({ testCase, options, trace, }) {
32
29
  });
33
30
  promptSpan?.end({ output: { prompt } });
34
31
  const llm = new llm_1.LLM({
35
- trace: fetchSkillsSpan,
32
+ trace: fetchSkillsUsingPOMFilesSpan,
36
33
  provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
37
34
  defaultModel: options?.model || constants_1.DEFAULT_MODEL,
38
35
  providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
@@ -47,6 +44,27 @@ async function getAppropriateSkills({ testCase, options, trace, }) {
47
44
  });
48
45
  let response = firstShotMessage?.content || "";
49
46
  const skills = (0, utils_1.extractTestStepsSuggestions)(response);
47
+ fetchSkillsUsingPOMFilesSpan?.end({ output: { skills } });
48
+ return skills;
49
+ };
50
+ exports.fetchPomSkills = fetchPomSkills;
51
+ async function getAppropriateSkills({ testCase, options, trace, }) {
52
+ const logger = new logger_1.CustomLogger({ useReporter: false });
53
+ logger.log("getting skill set for the repository");
54
+ const filter = await (0, context_1.createGitIgnoreFileFilter)();
55
+ const pomFiles = await (0, fs_2.generatePromptFromDirectory)("./pages", filter);
56
+ const fetchSkillsSpan = trace?.span({
57
+ name: "get-appropriate-skills",
58
+ input: {
59
+ testCase,
60
+ },
61
+ });
62
+ const skills = await (0, exports.fetchPomSkills)({
63
+ testCase,
64
+ pomFiles,
65
+ trace: fetchSkillsSpan,
66
+ options,
67
+ });
50
68
  const validateSkillsSpan = fetchSkillsSpan?.span({
51
69
  name: "validate-skills",
52
70
  input: {
@@ -1 +1 @@
1
- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CAoG5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA6E7B"}
1
+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CAsG5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}
@@ -29,7 +29,7 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
29
29
  if (testBlockUpdate) {
30
30
  // assuming the test case getting updated
31
31
  // maintaining the previous accuracy of the test case update
32
- const readWriteFileSpan = trace.span({ name: "write-to-file" });
32
+ const readWriteFileSpan = trace?.span({ name: "write-to-file" });
33
33
  let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
34
34
  const [prependContent, strippedContent] = await (0, web_1.stripAndPrependImports)(fileChange.newCode, testCase?.name);
35
35
  let updatedContent = prependContent + contents + `\n\n${strippedContent}`;
@@ -41,10 +41,10 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
41
41
  contents = contents.replace(testBlock, `\n\n${strippedContent}`);
42
42
  updatedContent = prependContent + contents;
43
43
  await fs_extra_1.default.writeFile(fileChange.filePath, updatedContent, "utf-8");
44
- readWriteFileSpan.end({ output: { updatedContent } });
44
+ readWriteFileSpan?.end({ output: { updatedContent } });
45
45
  }
46
46
  else {
47
- const readWriteFileSpan = trace.span({ name: "write-to-file" });
47
+ const readWriteFileSpan = trace?.span({ name: "write-to-file" });
48
48
  let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
49
49
  const project = new ts_morph_1.Project();
50
50
  const sourceFile = project.createSourceFile("updated-code.ts", fileChange.newCode);
@@ -84,7 +84,7 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
84
84
  contents = contents.replace(fileChange.oldCode, `\n\n${fileChange.newCode}`);
85
85
  }
86
86
  await fs_extra_1.default.writeFile(fileChange.filePath, contents, "utf-8");
87
- readWriteFileSpan.end({ output: { contents } });
87
+ readWriteFileSpan?.end({ output: { contents } });
88
88
  }
89
89
  // format and validate file change
90
90
  if (validateTypes) {
@@ -98,7 +98,7 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
98
98
  options: testGenOptions,
99
99
  });
100
100
  }
101
- trace.event({ name: "format-file" });
101
+ trace?.event({ name: "format-file" });
102
102
  await (0, web_1.formatCode)(fileChange.filePath);
103
103
  logger.success(`${fileChange.filePath} file formatted successfully!`);
104
104
  }));
@@ -112,7 +112,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
112
112
  const session = (0, session_1.getSessionDetails)();
113
113
  trace =
114
114
  trace ||
115
- llm_1.langfuseInstance.trace({
115
+ llm_1.langfuseInstance?.trace({
116
116
  name: "update-test",
117
117
  id: crypto_1.default.randomUUID(),
118
118
  release: session.version,
@@ -121,7 +121,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
121
121
  options?.metadata.environment || "",
122
122
  ].filter((s) => !!s),
123
123
  });
124
- const updateTestSpan = trace.span({
124
+ const updateTestSpan = trace?.span({
125
125
  name: "update-test",
126
126
  input: {
127
127
  testCase,
@@ -129,7 +129,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
129
129
  options,
130
130
  },
131
131
  });
132
- updateTestSpan.event({
132
+ updateTestSpan?.event({
133
133
  name: "collate-files-as-text",
134
134
  output: {
135
135
  codePrompt,
@@ -137,7 +137,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
137
137
  testFileContent,
138
138
  },
139
139
  });
140
- const promptSpan = updateTestSpan.span({
140
+ const promptSpan = updateTestSpan?.span({
141
141
  name: "update-scenario-prompt",
142
142
  });
143
143
  const promptName = "update-scenario";
@@ -161,7 +161,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
161
161
  scenarioFile: file,
162
162
  currentScenarioCodeBlock,
163
163
  });
164
- promptSpan.end({ output: { instruction } });
164
+ promptSpan?.end({ output: { instruction } });
165
165
  const llm = new llm_1.LLM({
166
166
  trace: updateTestSpan,
167
167
  provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
@@ -188,12 +188,14 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
188
188
  pomPrompt: pomPrompt,
189
189
  codePrompt: codePrompt,
190
190
  });
191
- logger.log(`Trace: ${trace.getTraceUrl()}`);
191
+ if (trace) {
192
+ logger.log(`Trace: ${trace?.getTraceUrl()}`);
193
+ }
192
194
  generatedTestCases.push({
193
195
  ...testCase,
194
196
  updatedFiles: fileChanges.map((f) => f.filePath),
195
197
  });
196
- updateTestSpan.end({ output: { response } });
198
+ updateTestSpan?.end({ output: { response } });
197
199
  await (0, llm_1.flushAllTraces)();
198
200
  return generatedTestCases;
199
201
  }
@@ -215,7 +217,7 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
215
217
  const session = (0, session_1.getSessionDetails)();
216
218
  trace =
217
219
  trace ||
218
- llm_1.langfuseInstance.trace({
220
+ llm_1.langfuseInstance?.trace({
219
221
  name: "append-create-test-block",
220
222
  id: crypto_1.default.randomUUID(),
221
223
  release: session.version,
@@ -225,7 +227,7 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
225
227
  ].filter((s) => !!s),
226
228
  });
227
229
  const promptName = "append-create-test-block";
228
- const promptSpan = trace.span({
230
+ const promptSpan = trace?.span({
229
231
  name: "append-create-test-block-prompt",
230
232
  });
231
233
  const instruction = await (0, llm_1.getPrompt)(promptName, {
@@ -235,7 +237,7 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
235
237
  scenarioSteps: testCase.steps.join("\n"),
236
238
  scenarioFile: file,
237
239
  });
238
- promptSpan.end({ output: { instruction } });
240
+ promptSpan?.end({ output: { instruction } });
239
241
  const [userInstruction] = instruction.filter((s) => s.role === "user");
240
242
  const [systemInstruction] = instruction.filter((s) => s.role === "system");
241
243
  userInstruction.content = `${systemInstruction?.content}
@@ -267,12 +269,14 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
267
269
  codePrompt: codePrompt,
268
270
  validateTypes,
269
271
  });
270
- logger.log(`Trace: ${trace.getTraceUrl()}`);
272
+ if (trace) {
273
+ logger.log(`Trace: ${trace.getTraceUrl()}`);
274
+ }
271
275
  generatedTestCases.push({
272
276
  ...testCase,
273
277
  updatedFiles: fileChanges.map((f) => f.filePath),
274
278
  });
275
- trace.update({ input: { testCase }, output: { response } });
279
+ trace?.update({ input: { testCase }, output: { response } });
276
280
  await (0, llm_1.flushAllTraces)();
277
281
  return generatedTestCases;
278
282
  }
@@ -6,6 +6,5 @@ export declare function inferAgentBasedTask({ task, options, trace, }: {
6
6
  trace?: TraceClient;
7
7
  }): Promise<{
8
8
  response: Agent;
9
- trace: TraceClient;
10
9
  }>;
11
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG1D,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,CAAC;IAAC,KAAK,EAAE,WAAW,CAAA;CAAE,CAAC,CA6EnD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG1D,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,CAAA;CAAE,CAAC,CA4E/B"}
@@ -8,19 +8,19 @@ const session = (0, session_1.getSessionDetails)();
8
8
  async function inferAgentBasedTask({ task, options, trace, }) {
9
9
  trace =
10
10
  trace ||
11
- llm_1.langfuseInstance.trace({
11
+ llm_1.langfuseInstance?.trace({
12
12
  name: "infer-agent-task",
13
13
  id: crypto.randomUUID(),
14
14
  release: session.version,
15
15
  });
16
- const inferAgentSpan = trace.span({
16
+ const inferAgentSpan = trace?.span({
17
17
  name: "infer-agent",
18
18
  input: {
19
19
  task,
20
20
  options,
21
21
  },
22
22
  });
23
- const promptSpan = inferAgentSpan.span({
23
+ const promptSpan = inferAgentSpan?.span({
24
24
  name: "infer-agent-prompt",
25
25
  input: {
26
26
  task,
@@ -65,7 +65,7 @@ async function inferAgentBasedTask({ task, options, trace, }) {
65
65
  },
66
66
  });
67
67
  const output = JSON.parse(firstShotMessage?.content || "{}");
68
- inferAgentSpan.end({
68
+ inferAgentSpan?.end({
69
69
  output: {
70
70
  response: output.response,
71
71
  reason: output.reason,
@@ -73,7 +73,6 @@ async function inferAgentBasedTask({ task, options, trace, }) {
73
73
  });
74
74
  return {
75
75
  response: output.response,
76
- trace: inferAgentSpan,
77
76
  };
78
77
  }
79
78
  exports.inferAgentBasedTask = inferAgentBasedTask;
@@ -3,14 +3,14 @@ import { Page } from "playwright";
3
3
  import { PlaywrightActions } from "../../actions";
4
4
  import { TestCase } from "../../types";
5
5
  import { BrowsingAgentOptions } from "../browsing";
6
- export declare function getNextAction({ task, executedActions, failedActions, page, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints, }: {
6
+ export declare function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints, }: {
7
7
  task: string;
8
8
  executedActions: string[];
9
9
  failedActions: any[];
10
- page: Page;
10
+ pageUrl: string;
11
11
  trace?: TraceClient;
12
- llm: LLM;
13
- options: BrowsingAgentOptions;
12
+ llm?: LLM;
13
+ options?: BrowsingAgentOptions;
14
14
  pageScreenshot: string;
15
15
  annotatedPageScreenshot?: string;
16
16
  actions: PlaywrightActions;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,IAAI,EACJ,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,oBAAoB,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAgEA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,GACR,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;CAC/B;;;GA6QA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,GACR,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;CAC/B;;;GAiRA"}
@@ -16,13 +16,30 @@ const skills_retriever_1 = require("../codegen/skills-retriever");
16
16
  const verification_1 = require("../verification");
17
17
  const with_hints_1 = require("./with-hints");
18
18
  const MAX_ERROR_COUNT = 2;
19
- async function getNextAction({ task, executedActions, failedActions, page, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints = false, }) {
20
- const promptSpan = trace?.span({ name: "master-agent-prompt" });
19
+ async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints = false, }) {
20
+ const nextActionSpan = trace?.span({
21
+ name: "master-agent-next-action",
22
+ input: {
23
+ task,
24
+ executedActions,
25
+ failedActions,
26
+ pageUrl,
27
+ options,
28
+ pageScreenshot,
29
+ annotatedPageScreenshot,
30
+ disableSkills,
31
+ useHints,
32
+ skills: skill_1.testCaseSkills.getAvailableSkills(),
33
+ },
34
+ });
35
+ const promptSpan = nextActionSpan?.span({
36
+ name: "master-agent-prompt",
37
+ });
21
38
  const promptMessages = await (0, llm_1.getPrompt)("test-gen", {
22
39
  task,
23
40
  failedActions: failedActions.map((a) => a).join("\n"),
24
41
  executedActions: executedActions.map((a) => a).join("\n"),
25
- pageUrl: page.url(),
42
+ pageUrl,
26
43
  }, useHints ? 16 : 14);
27
44
  // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
28
45
  const userMessage = promptMessages.filter((m) => m.role === "user")[0];
@@ -43,7 +60,7 @@ async function getNextAction({ task, executedActions, failedActions, page, trace
43
60
  {
44
61
  type: "image_url",
45
62
  image_url: {
46
- url: (0, vision_1.imageFormatForProvider)(options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
63
+ url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
47
64
  },
48
65
  },
49
66
  ];
@@ -56,20 +73,27 @@ async function getNextAction({ task, executedActions, failedActions, page, trace
56
73
  : actions.getMasterActionSchemas();
57
74
  const tools = [next_task_1.NextTaskAction.schema, ...actionSchemas];
58
75
  promptSpan?.end({ output: { messages } });
76
+ llm =
77
+ llm ||
78
+ new llm_1.LLM({
79
+ provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
80
+ defaultModel: options?.model || constants_1.DEFAULT_MODEL,
81
+ });
59
82
  const completion = await llm.createChatCompletion({
60
83
  messages,
61
84
  modelParameters: {
62
85
  ...constants_1.DEFAULT_MODEL_PARAMETERS,
63
- ...options.modelParameters,
86
+ ...options?.modelParameters,
64
87
  tool_choice: "required",
65
88
  temperature: 1,
66
89
  },
67
- trace,
90
+ trace: nextActionSpan,
68
91
  traceName: "master-agent-llm",
69
92
  // @ts-ignore
70
93
  tools,
71
94
  });
72
95
  const toolCall = completion?.tool_calls?.[0];
96
+ nextActionSpan?.end({ output: toolCall });
73
97
  return toolCall;
74
98
  }
75
99
  exports.getNextAction = getNextAction;
@@ -80,7 +104,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
80
104
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
81
105
  // add timeout for the page to settle in
82
106
  await page.waitForTimeout(3000);
83
- const trace = llm_1.langfuseInstance.trace({
107
+ const trace = llm_1.langfuseInstance?.trace({
84
108
  name: "test-generator",
85
109
  id: crypto.randomUUID(),
86
110
  version: (0, session_1.getSessionDetails)().version,
@@ -93,9 +117,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
93
117
  options.metadata?.environment,
94
118
  ].filter((s) => !!s),
95
119
  });
96
- void testgenUpdatesReporter.sendMessage(`Starting master agent. [view trace](${trace.getTraceUrl()})`);
97
- logger.log(`Starting master agent: ${trace.getTraceUrl()}`);
98
- void testgenUpdatesReporter.sendAgentTraceUrl(trace.getTraceUrl());
120
+ if (trace) {
121
+ void testgenUpdatesReporter.sendMessage(`Starting master agent. [view trace](${trace?.getTraceUrl()})`);
122
+ logger.log(`Starting master agent: ${trace?.getTraceUrl()}`);
123
+ void testgenUpdatesReporter.sendAgentTraceUrl(trace.getTraceUrl());
124
+ }
99
125
  const llm = new llm_1.LLM({
100
126
  trace,
101
127
  provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
@@ -113,7 +139,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
113
139
  skill_1.testCaseSkills.updateSkills(skills);
114
140
  const actions = new actions_1.PlaywrightActions(page);
115
141
  await (0, utils_1.injectPwLocatorGenerator)(page);
116
- trace.update({ input: { task } });
142
+ trace?.update({ input: { task } });
117
143
  let isGivenTaskDone = false;
118
144
  const masterAgentActions = [];
119
145
  let failedActions = [];
@@ -122,7 +148,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
122
148
  if (await (0, session_1.shouldStopSession)()) {
123
149
  break;
124
150
  }
125
- const masterAgentSpan = trace.span({
151
+ const masterAgentSpan = trace?.span({
126
152
  name: "master-agent",
127
153
  input: {
128
154
  task,
@@ -189,7 +215,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
189
215
  task,
190
216
  executedActions: masterAgentActions,
191
217
  failedActions,
192
- page,
218
+ pageUrl: page.url(),
193
219
  trace: masterAgentSpan,
194
220
  llm,
195
221
  options,
@@ -204,7 +230,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
204
230
  }
205
231
  if (toolCall) {
206
232
  const args = JSON.parse(toolCall.function.arguments);
207
- const masterAgentActionSpan = masterAgentSpan.span({
233
+ const masterAgentActionSpan = masterAgentSpan?.span({
208
234
  name: "master-agent-action",
209
235
  });
210
236
  try {
@@ -256,7 +282,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
256
282
  // resetting error count as there is a successful action
257
283
  failedActions = [];
258
284
  masterAgentActions.push(output.action);
259
- masterAgentActionSpan.end({
285
+ masterAgentActionSpan?.end({
260
286
  input: {
261
287
  action: output.action,
262
288
  reason: output.reason,
@@ -274,7 +300,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
274
300
  logger.log("Disabling skill usage for next retry");
275
301
  disableSkills = true;
276
302
  }
277
- masterAgentActionSpan.end({
303
+ masterAgentActionSpan?.end({
278
304
  input: {
279
305
  action: output.action,
280
306
  reason: output.reason,
@@ -295,19 +321,21 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
295
321
  }
296
322
  }
297
323
  }
298
- trace.update({ input: { task }, output: { output } });
324
+ trace?.update({ input: { task }, output: { output } });
299
325
  await testGenSnapshotUpdatePromise;
300
326
  if (testGenAnnotatedSnapshotUpdatePromise) {
301
327
  await testGenAnnotatedSnapshotUpdatePromise;
302
328
  }
303
- masterAgentSpan.end({
329
+ masterAgentSpan?.end({
304
330
  output: { action: output.action, reason: output.reason },
305
331
  });
306
332
  }
307
333
  const { code, importPaths } = actions.generateCode();
308
- trace.update({ input: { task }, output: { code } });
334
+ trace?.update({ input: { task }, output: { code } });
309
335
  logger.success("Successfully generated code for the given task");
310
- await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
336
+ if (trace) {
337
+ await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
338
+ }
311
339
  return {
312
340
  code,
313
341
  importPaths,
@@ -4,7 +4,7 @@ import { Page } from "playwright";
4
4
  import { BrowsingAgentOptions } from "../browsing";
5
5
  export declare const getUserMessageWithForHints: ({ userMessage, options, pageScreenshot, annotatedPageScreenshot, }: {
6
6
  userMessage: OpenAI.ChatCompletionUserMessageParam;
7
- options: BrowsingAgentOptions;
7
+ options?: BrowsingAgentOptions | undefined;
8
8
  pageScreenshot: string;
9
9
  annotatedPageScreenshot: string;
10
10
  }) => string | OpenAI.ChatCompletionContentPart[];
@@ -1 +1 @@
1
- {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;AAExC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;aACzC,oBAAoB;oBACb,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAMF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,IAAI;SACL,GAAG;MACN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAgFA,CAAC"}
1
+ {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;AAExC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAMF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,IAAI;SACL,GAAG;MACN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAgFA,CAAC"}
@@ -17,7 +17,7 @@ const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, anno
17
17
  {
18
18
  type: "image_url",
19
19
  image_url: {
20
- url: (0, vision_1.imageFormatForProvider)(options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
20
+ url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
21
21
  },
22
22
  },
23
23
  {
@@ -27,7 +27,7 @@ const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, anno
27
27
  {
28
28
  type: "image_url",
29
29
  image_url: {
30
- url: (0, vision_1.imageFormatForProvider)(options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedPageScreenshot),
30
+ url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedPageScreenshot),
31
31
  },
32
32
  },
33
33
  ];
package/dist/bin/index.js CHANGED
@@ -46,7 +46,7 @@ async function runAgent(testGenConfig) {
46
46
  }
47
47
  let agent = testGenConfig.options?.agent;
48
48
  const session = (0, session_1.getSessionDetails)();
49
- const trace = llm_1.langfuseInstance.trace({
49
+ const trace = llm_1.langfuseInstance?.trace({
50
50
  name: "generate-test",
51
51
  id: crypto.randomUUID(),
52
52
  release: session.version,
@@ -55,11 +55,13 @@ async function runAgent(testGenConfig) {
55
55
  testGenConfig.options?.metadata.environment || "",
56
56
  ].filter((s) => !!s),
57
57
  });
58
- try {
59
- await new reporter_1.TestGenUpdatesReporter().sendAgentTraceUrl(trace.getTraceUrl());
60
- }
61
- catch (e) {
62
- console.warn("Failed to send trace url as test gen update", e);
58
+ if (trace) {
59
+ try {
60
+ await new reporter_1.TestGenUpdatesReporter().sendAgentTraceUrl(trace.getTraceUrl());
61
+ }
62
+ catch (e) {
63
+ console.warn("Failed to send trace url as test gen update", e);
64
+ }
63
65
  }
64
66
  if (!agent || agent === "auto") {
65
67
  agent = await resolveAgentUsingTask({
@@ -0,0 +1,4 @@
1
+ import { EvaluateFn } from "./type";
2
+ declare const fetchSkillsAgentEvaluator: EvaluateFn;
3
+ export default fetchSkillsAgentEvaluator;
4
+ //# sourceMappingURL=fetch-pom-skills-agent.evals.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-pom-skills-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/fetch-pom-skills-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,yBAAyB,EAAE,UAiChC,CAAC;AAEF,eAAe,yBAAyB,CAAC"}
@@ -0,0 +1,36 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const skills_retriever_1 = require("../agent/codegen/skills-retriever");
4
+ const fetchSkillsAgentEvaluator = async ({ item, trace }) => {
5
+ const { testCase, pomFiles } = item.input;
6
+ const output = await (0, skills_retriever_1.fetchPomSkills)({
7
+ testCase,
8
+ pomFiles,
9
+ trace,
10
+ });
11
+ if (item.expectedOutput.length === 0 && output.length === 0) {
12
+ return {
13
+ scores: [
14
+ {
15
+ name: "equality",
16
+ value: 1,
17
+ },
18
+ ],
19
+ output,
20
+ };
21
+ }
22
+ return {
23
+ scores: [
24
+ {
25
+ name: "equality",
26
+ value: output.some((o) => {
27
+ return item.expectedOutput.some((e) => e.usageExample === o.usageExample);
28
+ })
29
+ ? 1
30
+ : 0,
31
+ },
32
+ ],
33
+ output,
34
+ };
35
+ };
36
+ exports.default = fetchSkillsAgentEvaluator;
@@ -0,0 +1,4 @@
1
+ import { EvaluateFn } from "./type";
2
+ export declare const masterGetNextActionEvaluator: EvaluateFn;
3
+ export default masterGetNextActionEvaluator;
4
+ //# sourceMappingURL=master-agent.evals.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"master-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/master-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,4BAA4B,EAAE,UA0C1C,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
@@ -0,0 +1,36 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.masterGetNextActionEvaluator = void 0;
4
+ const actions_1 = require("../actions");
5
+ const skill_1 = require("../actions/skill");
6
+ const run_1 = require("../agent/master/run");
7
+ const masterGetNextActionEvaluator = async ({ item, trace, }) => {
8
+ const { task, executedActions, failedActions, pageUrl, options, pageScreenshot, annotatedPageScreenshot, disableSkills, useHints, skills = [], } = item.input;
9
+ const page = {};
10
+ skill_1.testCaseSkills.updateSkills(skills);
11
+ const actions = new actions_1.PlaywrightActions(page);
12
+ const output = await (0, run_1.getNextAction)({
13
+ task,
14
+ executedActions,
15
+ failedActions,
16
+ pageUrl,
17
+ trace,
18
+ options,
19
+ pageScreenshot,
20
+ annotatedPageScreenshot,
21
+ actions,
22
+ disableSkills,
23
+ useHints,
24
+ });
25
+ return {
26
+ scores: [
27
+ {
28
+ name: "action_correctness",
29
+ value: item.expectedOutput.function.name === output?.function.name ? 1 : 0,
30
+ },
31
+ ],
32
+ output,
33
+ };
34
+ };
35
+ exports.masterGetNextActionEvaluator = masterGetNextActionEvaluator;
36
+ exports.default = exports.masterGetNextActionEvaluator;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.34.5",
3
+ "version": "0.35.1",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -44,7 +44,7 @@
44
44
  "ts-morph": "^23.0.0",
45
45
  "tsx": "^4.16.2",
46
46
  "typescript": "^5.3.3",
47
- "@empiricalrun/llm": "^0.9.20",
47
+ "@empiricalrun/llm": "^0.9.22",
48
48
  "@empiricalrun/r2-uploader": "^0.3.6",
49
49
  "@empiricalrun/reporter": "^0.21.2"
50
50
  },
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=infer-master-code.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"infer-master-code.d.ts","sourceRoot":"","sources":["../../src/evals/infer-master-code.ts"],"names":[],"mappings":""}
@@ -1,18 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- const llm_1 = require("@empiricalrun/llm");
4
- const infer_agent_1 = require("../agent/infer-agent");
5
- const datasetName = "infer-master-or-code-agent";
6
- (async function main() {
7
- const dataset = await llm_1.langfuseInstance.getDataset(datasetName);
8
- const runName = `${datasetName}-${Date.now()}`;
9
- for (const item of dataset.items) {
10
- const { response, trace } = await (0, infer_agent_1.inferAgentBasedTask)(item.input);
11
- await item.link(trace, runName, {});
12
- trace?.score({
13
- name: "equality",
14
- value: item.expectedOutput === response ? 1 : 0, // score value
15
- });
16
- }
17
- await llm_1.langfuseInstance.flushAsync();
18
- })();