@empiricalrun/test-gen 0.42.18 → 0.42.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/dist/agent/browsing/index.d.ts +4 -6
  3. package/dist/agent/browsing/index.d.ts.map +1 -1
  4. package/dist/agent/browsing/index.js +49 -127
  5. package/dist/agent/browsing/utils.d.ts +0 -7
  6. package/dist/agent/browsing/utils.d.ts.map +1 -1
  7. package/dist/agent/browsing/utils.js +1 -13
  8. package/dist/agent/codegen/create-test-block.d.ts.map +1 -1
  9. package/dist/agent/codegen/create-test-block.js +2 -2
  10. package/dist/agent/codegen/lexical-scoped-vars.d.ts.map +1 -1
  11. package/dist/agent/codegen/lexical-scoped-vars.js +2 -2
  12. package/dist/agent/infer-agent/index.d.ts +2 -1
  13. package/dist/agent/infer-agent/index.d.ts.map +1 -1
  14. package/dist/agent/infer-agent/index.js +4 -11
  15. package/dist/agent/master/element-annotation.d.ts.map +1 -1
  16. package/dist/agent/master/element-annotation.js +7 -53
  17. package/dist/agent/master/run.d.ts +2 -1
  18. package/dist/agent/master/run.d.ts.map +1 -1
  19. package/dist/agent/master/run.js +23 -8
  20. package/dist/agent/planner/run-time-planner.d.ts.map +1 -1
  21. package/dist/agent/planner/run-time-planner.js +2 -1
  22. package/dist/agent/planner/run.d.ts +2 -3
  23. package/dist/agent/planner/run.d.ts.map +1 -1
  24. package/dist/agent/planner/run.js +6 -15
  25. package/dist/bin/index.js +2 -1
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +3 -1
  28. package/dist/prompts/lib/index.d.ts +8 -0
  29. package/dist/prompts/lib/index.d.ts.map +1 -0
  30. package/dist/prompts/lib/index.js +118 -0
  31. package/dist/types/index.d.ts +5 -4
  32. package/dist/types/index.d.ts.map +1 -1
  33. package/package.json +5 -4
  34. package/dist/agent/codegen/promptBuilder.d.ts +0 -3
  35. package/dist/agent/codegen/promptBuilder.d.ts.map +0 -1
  36. package/dist/agent/codegen/promptBuilder.js +0 -44
  37. package/dist/agent/verification/index.d.ts +0 -13
  38. package/dist/agent/verification/index.d.ts.map +0 -1
  39. package/dist/agent/verification/index.js +0 -84
  40. package/dist/evals/verification-agent.evals.d.ts +0 -4
  41. package/dist/evals/verification-agent.evals.d.ts.map +0 -1
  42. package/dist/evals/verification-agent.evals.js +0 -23
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.42.20
4
+
5
+ ### Patch Changes
6
+
7
+ - 9488f81: fix: infer-agent prompt template
8
+ - b625749: chore: move infer-agent and planner prompts to handlebars
9
+ - 5fb977c: test: clean up some unreliable tests
10
+ - 8914542: feat: run planner inside master agent flow
11
+
12
+ ## 0.42.19
13
+
14
+ ### Patch Changes
15
+
16
+ - c36efe4: chore: remove any type for get-next-action output
17
+ - ebb0bfa: feat: support images in handlebar prompts
18
+ - 63ed479: fix: remove verification and looping inside browsing agent
19
+
3
20
  ## 0.42.18
4
21
 
5
22
  ### Patch Changes
@@ -1,20 +1,18 @@
1
1
  import { LLM, TraceClient } from "@empiricalrun/llm";
2
2
  import { Page } from "playwright";
3
3
  import { PlaywrightActions } from "../../actions";
4
- import { CustomLogger } from "../../bin/logger";
5
4
  import { TestGenConfigOptions } from "../../types";
6
5
  export type BrowsingAgentOptions = Partial<TestGenConfigOptions> & {
7
6
  htmlSanitize?: {
8
7
  disallowedStrings?: string[];
9
8
  };
10
9
  };
11
- export declare function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }: {
10
+ export declare function executeTaskUsingBrowsingAgent({ action, page, actions, llm, options, trace, }: {
12
11
  action: string;
13
- trace?: TraceClient;
14
- logger: CustomLogger;
15
12
  page: Page;
16
- options: BrowsingAgentOptions;
17
- llm: LLM;
18
13
  actions: PlaywrightActions;
14
+ llm: LLM;
15
+ trace?: TraceClient;
16
+ options: BrowsingAgentOptions;
19
17
  }): Promise<string[] | undefined>;
20
18
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAOnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,oBAAoB,CAAC;IAC9B,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,iBAAiB,CAAC;CAC5B,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,CAiJhC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAKlD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAKnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,EACP,KAAK,GACN,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,iBAAiB,CAAC;IAC3B,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,CAiEhC"}
@@ -2,148 +2,70 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.executeTaskUsingBrowsingAgent = void 0;
4
4
  const constants_1 = require("../../constants");
5
+ const promptTemplate_0 = "{{#section \"system\"}}\nYou are a browser automation agent who is given a task to generate code for navigation and assertion. This task is your\ngoal and you must achieve it.\n\nYou will be provided with already executed actions and basis that you need to pick the next step to achieve the task.\nRemember that the goal must be achieved.\n\nYou will be provided with the web page snapshot in the form of Document Object Model. Based on the goal and available\ntool calls you need to pick the appropriate tool call.\n\nInstructions:\n- Take actions one at a time. Do not try to take multiple actions\n- You can respond with multiple assertions in one shot\n- Do not repeat the same actions again otherwise your response will be marked INVALID\n- Avoid repeating errors which we got while executing the last action\n- Stick to the task provided to you and mark the task done once the task is complete\n- Do not execute any action which is not mentioned in the task\n- Do not repeat actions which are already executed more than twice otherwise your response will be marked INVALID\n- Always refer to \"Executed actions\" before deciding your next action for completion of the task.\n- End the task done if all actions required for task are executed\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\nCurrent page snapshot:\n{{pageSnapshot}}\n{{/section}}";
6
+ const lib_1 = require("../../prompts/lib");
5
7
  const reporter_1 = require("../../reporter");
6
- const session_1 = require("../../session");
7
8
  const html_1 = require("../../utils/html");
8
9
  const utils_1 = require("../utils");
9
- const verification_1 = require("../verification");
10
10
  const o1_completion_1 = require("./o1-completion");
11
- const utils_2 = require("./utils");
12
- async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }) {
13
- let isTaskDone = false;
14
- const executedActions = [];
15
- let lastActionExecTrace = "";
11
+ async function executeTaskUsingBrowsingAgent({ action, page, actions, llm, options, trace, }) {
16
12
  let generatedCodeSteps = [];
17
13
  const tools = actions.getBrowsingActionSchemas();
18
14
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
19
- while (!isTaskDone) {
20
- const browsingAgentSpan = trace?.span({
21
- name: `browsing-agent`,
22
- input: {
23
- action,
24
- },
25
- });
26
- if (await (0, session_1.shouldStopSession)()) {
27
- break;
28
- }
29
- const pageContentSpan = browsingAgentSpan?.span({
30
- name: "page-content",
31
- });
32
- const pageContent = await page.content();
33
- pageContentSpan?.end({ output: { pageContent } });
34
- const sanitizationSpan = browsingAgentSpan?.span({
35
- name: "page-sanitization",
36
- });
37
- const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
38
- sanitizationSpan?.end({ output: { pageSnapshot } });
39
- const promptSpan = browsingAgentSpan?.span({ name: "page-prompt" });
40
- // extract all successful actions
41
- const successfulActions = executedActions
42
- .filter((a) => !a.isError)
43
- .map((a) => a.action);
44
- if (successfulActions.length > 0) {
45
- const verificationAgentResp = await (0, verification_1.verificationAgent)({
46
- trace: browsingAgentSpan,
47
- task: action,
48
- conversation: ["Successfully executed actions", ...successfulActions],
49
- });
50
- isTaskDone = verificationAgentResp.isDone;
51
- logger.log(`isTaskDone: ${isTaskDone}`);
52
- logger.log(`reason: ${verificationAgentResp.reason}`);
53
- if (isTaskDone) {
54
- browsingAgentSpan?.event({ name: "task-done" });
55
- browsingAgentSpan?.end({
56
- output: {
57
- taskDone: true,
58
- reason: verificationAgentResp.reason,
59
- code: generatedCodeSteps,
60
- },
61
- });
62
- break;
63
- }
64
- }
65
- const messages = await (0, utils_2.getPromptForNextAction)({
66
- pageSnapshot,
67
- previousActions: successfulActions,
68
- task: action,
69
- lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
70
- promptType: "browsing-agent-as-tool",
71
- });
72
- promptSpan?.end({ output: { messages } });
73
- let completion;
74
- completion = await (0, o1_completion_1.getO1Completion)({
75
- //@ts-ignore
15
+ const browsingAgentSpan = trace?.span({
16
+ name: `browsing-agent`,
17
+ input: {
18
+ action,
19
+ },
20
+ });
21
+ const pageContentSpan = browsingAgentSpan?.span({
22
+ name: "page-content",
23
+ });
24
+ const pageContent = await page.content();
25
+ pageContentSpan?.end({ output: { pageContent } });
26
+ const sanitizationSpan = browsingAgentSpan?.span({
27
+ name: "page-sanitization",
28
+ });
29
+ const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
30
+ sanitizationSpan?.end({ output: { pageSnapshot } });
31
+ const promptSpan = browsingAgentSpan?.span({ name: "page-prompt" });
32
+ const messages = await (0, lib_1.compilePrompt)(promptTemplate_0, { pageSnapshot, task: action });
33
+ promptSpan?.end({ output: { messages } });
34
+ let completion;
35
+ completion = await (0, o1_completion_1.getO1Completion)({
36
+ messages,
37
+ tools,
38
+ trace: browsingAgentSpan,
39
+ });
40
+ // If O1 completion fails due to any reason, resort to old flow
41
+ if (!completion) {
42
+ completion = await llm.createChatCompletion({
76
43
  messages,
77
44
  tools,
78
45
  trace: browsingAgentSpan,
46
+ model: options.model || constants_1.DEFAULT_MODEL,
47
+ modelParameters: {
48
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
49
+ ...options.modelParameters,
50
+ tool_choice: "required",
51
+ },
79
52
  });
80
- // If O1 completion fails due to any reason, resort to old flow
81
- if (!completion) {
82
- completion = await llm.createChatCompletion({
83
- messages,
84
- tools,
85
- trace: browsingAgentSpan,
86
- model: options.model || constants_1.DEFAULT_MODEL,
87
- modelParameters: {
88
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
89
- ...options.modelParameters,
90
- tool_choice: "required",
91
- },
92
- });
93
- }
94
- const toolCalls = completion?.tool_calls || [];
95
- // LLM might respond with empty tool_calls and we can go into endless loop
96
- // if we donot record this action and mark it as error
97
- if (!toolCalls.length) {
98
- executedActions.push({
99
- isError: true,
100
- action: "",
101
- });
102
- }
103
- const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
104
- for (const i in toolCalls) {
105
- const toolCall = toolCalls[i];
106
- if (await (0, session_1.shouldStopSession)()) {
107
- break;
108
- }
109
- try {
110
- const code = await actions.executeAction(toolCall.function.name, (0, utils_1.parseJson)(toolCall.function.arguments), toolCallsSpan);
111
- if (code) {
112
- generatedCodeSteps.push(code);
113
- }
114
- executedActions.push({
115
- isError: false,
116
- action: (0, utils_1.parseJson)(toolCall.function.arguments)?.reason,
117
- });
118
- lastActionExecTrace = "";
119
- }
120
- catch (e) {
121
- // TODO: implement feedback loop to llm
122
- executedActions.push({
123
- isError: true,
124
- action: (0, utils_1.parseJson)(toolCall.function.arguments)?.reason,
125
- });
126
- lastActionExecTrace = e.message;
127
- void testgenUpdatesReporter.sendMessage(e.message);
128
- logger.error(lastActionExecTrace, e);
53
+ }
54
+ const toolCalls = completion?.tool_calls || [];
55
+ const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
56
+ for (const i in toolCalls) {
57
+ const toolCall = toolCalls[i];
58
+ try {
59
+ const code = await actions.executeAction(toolCall.function.name, (0, utils_1.parseJson)(toolCall.function.arguments), toolCallsSpan);
60
+ if (code) {
61
+ generatedCodeSteps.push(code);
129
62
  }
130
63
  }
131
- toolCallsSpan?.end({ output: { toolCalls } });
132
- // mark task as done if llm is stuck in loop
133
- if (executedActions.length >= 3) {
134
- const lastThreeActions = executedActions.slice(-3);
135
- const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
136
- // get last 3 lines of code
137
- const isStuckInLoop = actions.isStuckInLoop();
138
- if (lastThreeActionsFailed || isStuckInLoop) {
139
- // TODO: this should be sent to dashboard
140
- const error = "Agent is not able to figure out next browser action, ending retries";
141
- logger.error(error);
142
- await testgenUpdatesReporter.sendMessage(error);
143
- throw Error(error);
144
- }
64
+ catch (e) {
65
+ void testgenUpdatesReporter.sendMessage(e.message);
145
66
  }
146
67
  }
68
+ toolCallsSpan?.end({ output: { toolCalls } });
147
69
  return generatedCodeSteps;
148
70
  }
149
71
  exports.executeTaskUsingBrowsingAgent = executeTaskUsingBrowsingAgent;
@@ -29,13 +29,6 @@ export declare function readPlaywrightConfig(): Promise<PlaywrightTestConfig>;
29
29
  * @returns
30
30
  */
31
31
  export declare function detectProjectName(testFilePath: string, playwrightConfig: PlaywrightTestConfig, pwProjectsFilter?: string[]): Promise<string>;
32
- export declare function getPromptForNextAction({ pageSnapshot, task, previousActions, lastActionErrors, promptType, }: {
33
- pageSnapshot: string;
34
- task: string;
35
- previousActions: string[];
36
- lastActionErrors: string[];
37
- promptType?: string;
38
- }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageParam[]>;
39
32
  export declare class TeardownManager {
40
33
  private directory;
41
34
  constructor(directory: string);
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAI3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA8FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QAIjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIhD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA8FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QAIjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
@@ -3,8 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.TeardownManager = exports.getPromptForNextAction = exports.detectProjectName = exports.readPlaywrightConfig = exports.canRunMasterAgent = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
7
- const llm_1 = require("@empiricalrun/llm");
6
+ exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.canRunMasterAgent = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
8
7
  const fs_extra_1 = __importDefault(require("fs-extra"));
9
8
  const minimatch_1 = require("minimatch");
10
9
  const path_1 = __importDefault(require("path"));
@@ -348,17 +347,6 @@ async function detectProjectName(testFilePath, playwrightConfig, pwProjectsFilte
348
347
  return filteredProjectNames[0];
349
348
  }
350
349
  exports.detectProjectName = detectProjectName;
351
- async function getPromptForNextAction({ pageSnapshot = "", task = "", previousActions = [], lastActionErrors = [], promptType = "browsing-agent-next-action", }) {
352
- const previousActionsStr = previousActions.join("\n\n ---- \n\n");
353
- const prompt = await (0, llm_1.getPrompt)(promptType, {
354
- pageSnapshot,
355
- previousActionsStr,
356
- task,
357
- lastActionErrors,
358
- });
359
- return prompt;
360
- }
361
- exports.getPromptForNextAction = getPromptForNextAction;
362
350
  class TeardownManager {
363
351
  directory;
364
352
  constructor(directory) {
@@ -1 +1 @@
1
- {"version":3,"file":"create-test-block.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/create-test-block.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAavE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,+BAqDA"}
1
+ {"version":3,"file":"create-test-block.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/create-test-block.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAcvE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,+BAqDA"}
@@ -7,8 +7,8 @@ const context_1 = require("../../bin/utils/context");
7
7
  const web_1 = require("../../bin/utils/platform/web");
8
8
  const constants_1 = require("../../constants");
9
9
  const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software test engineer who is given a task to write an empty test block.\nBased on the inputs you need to create an empty playwright test block with correctly imported fixture.\n\nThe test will contain a test name which you will need to use to build the empty test case block.\n\nYou will be provided with current tests, fixtures and page object models for you to use and create test case block as\nper the task provided to you.\n\nBefore responding you need to ensure that the code change is minimal and the change is reusable across tests. You need\nto ensure the code follows DRY principle.\n\nHere is the list of current tests and fixtures:\n\n{{testFiles}}\n\nHere is the list of current page object models:\n\n{{pageFiles}}\n{{/section}}\n\n{{#section \"user\"}}\nFollowing is the test scenario for which you need to write the empty test case block:\ntest name:\n{{scenarioName}}\n\ntask:\ncreate an empty test case block for the following test steps:\n{{scenario}}\n\ntest file path: {{scenarioFile}}\n\n------\n\nYou also need to ensure that the empty test case block has a starting page to begin test.\n\nIn order to identify the right page with which the test should start, follow the steps:\n- based on the similarities with other test cases mentioned in the file, identify the right page fixture to be imported\n- Read the page fixture methods step by step. Identify whether the fixture handles navigating to a page.\n- Identify whether other tests using the page fixture had to add separate steps for navigation or not\n- Based on the above analysis there will be following cases and choose either for the given test scenario:\n-- Case 1: if the test case scenario provided inside the task mentions about page navigation, then use that page\nnavigation. skip other cases if this case is satisfied.\n-- Case 2: refer other test cases which import similar fixtures and infer the first page navigation of this test case.\nYou should prefer tests which are in the same file. Tests within same file have higher overlaps in first page\nnavigation.\n- Once the page fixture is decided, look for userContext fixture in files. If its available then add \"userContext\" to\nthe test case block\n\n\n\nFollow these instructions before responding with output:\n- Read the code line by line and achieve the task provided to you\n- Read the dependencies of the code block by scanning through file paths and file provided to you. refer the same file\npath while responding with update\n- Focus only on the test case provided and associated JS methods called from the test case.\n- Respond only with the new empty test case block to be created and nothing else.\n- DO NOT respond with any backticks or markdown syntax\n- If \"userContext\" fixture is available in fixtures file, ensure importing that fixture in the test case block.\n- Provide a reason based on the test steps provided to you on why you chose the fixture or page.goto statement. The\nreason should be one of the list steps provided to you and mention why the case was chosen\n{{/section}}";
10
+ const lib_1 = require("../../prompts/lib");
10
11
  const session_1 = require("../../session");
11
- const promptBuilder_1 = require("./promptBuilder");
12
12
  async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
13
13
  const logger = new logger_1.CustomLogger({ useReporter: false });
14
14
  logger.log("Creating new test block");
@@ -29,7 +29,7 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
29
29
  const promptSpan = trace?.span({
30
30
  name: "build-create-empty-test-case-prompt",
31
31
  });
32
- const prompt = await (0, promptBuilder_1.compilePrompt)(promptTemplate_0, {
32
+ const prompt = await (0, lib_1.compilePrompt)(promptTemplate_0, {
33
33
  testFiles: context.codePrompt,
34
34
  pageFiles: context.pomPrompt,
35
35
  scenarioName: testCase.name,
@@ -1 +1 @@
1
- {"version":3,"file":"lexical-scoped-vars.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/lexical-scoped-vars.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAQrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAGnD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,IAAI,EACJ,cAAc,EACd,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,qBAoDA"}
1
+ {"version":3,"file":"lexical-scoped-vars.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/lexical-scoped-vars.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AASrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,IAAI,EACJ,cAAc,EACd,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,qBAoDA"}
@@ -4,12 +4,12 @@ exports.getLexicalScopedVars = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const constants_1 = require("../../constants");
6
6
  const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software engineer tasked with analysing Typescript code to identify all variables available in the lexical\nscope at a specific reference point within a file. You will be given a file that contains multiple Playwright tests or\npage object models, along with a reference point inside the file. Your goal is to evaluate the list of all variables\navailable in the lexical scope at that reference point.\n\nTo accomplish this, you need to evaluate the Abstract Syntax Tree (AST) and accumulate all variables that are in the\nlexical scope, which includes:\n1. Variables declared within the test before the reference point.\n2. Arguments of the function.\n3. Variables defined in the parent scope. Identify all variables available in the lexical scope at a specific execution\nreference point within a file, considering only those variables that have been declared and assigned prior to the\nexecution of this point in the code.\n4. Global variables defined in the file.\n\nBefore responding:\n- Ignore variables imported from the `\"./pages\"` path.\n- keep in mind temporal dead zone phenomenon before responding with variables\n{{/section}}\n\n{{#section \"user\"}}\nFile:\n{{testFile}}\n\nReference point:\n{{referencePoint}}\n{{/section}}";
7
- const promptBuilder_1 = require("./promptBuilder");
7
+ const lib_1 = require("../../prompts/lib");
8
8
  async function getLexicalScopedVars({ trace, file, referencePoint, options, }) {
9
9
  const fetchLexicalScopedVarsSpan = trace?.span({
10
10
  name: "lexical-scoped-vars",
11
11
  });
12
- const messages = await (0, promptBuilder_1.compilePrompt)(promptTemplate_0, {
12
+ const messages = await (0, lib_1.compilePrompt)(promptTemplate_0, {
13
13
  testFile: file || "",
14
14
  referencePoint: referencePoint || "",
15
15
  });
@@ -1,5 +1,6 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { Agent, TestGenConfigOptions } from "../../types";
2
+ import { Agent } from "@empiricalrun/shared-types";
3
+ import { TestGenConfigOptions } from "../../types";
3
4
  export declare function inferAgentBasedTask({ task, options, trace, }: {
4
5
  task: string;
5
6
  options?: TestGenConfigOptions;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG1D,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,CAAA;CAAE,CAAC,CA4E/B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAWnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAGnD,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,CAAA;CAAE,CAAC,CAmE/B"}
@@ -3,6 +3,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.inferAgentBasedTask = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const constants_1 = require("../../constants");
6
+ const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software test engineer specializing in Playwright end-to-end tests. You are given a task which is a part of an\nend-to-end test scenario. The task may involve updating an existing end-to-end test case or writing a new test case from\nscratch. Tests involve user interactions (e.g. click on element) or other actions supported by Playwright (e.g.\nintercept network requests)\n\nYour objective is to identify whether the task requires accessing a web browser or not.\n\nTo fulfill your objective, answer the following questions:\n\n1. Does it require you to interact with a UI element in the browser? Examples of interactions are click, fill, type, key\npress, assert visibility of the element. Actions that interact with network requests are not UI element interactions.\n\n2. Is the locator of this UI element given to you in the task? Locators look like `getByText(...)`, `getByTestId(...)`\nand other locator methods in Playwright\n\n3. Decide if you need a browser: if you need to interact with a UI element AND you are NOT given the locator for that\nelement, you WILL NEED a browser.\n\n4. If you NEED a browser, then respond with answer as \"master\", otherwise respond with \"code\"\n\n\n# Example 1\n## Input\nTask:\nin this test don't delete the agent and remove steps after that\n\n## Output\n- ui_interaction_to_be_performed: There is no interaction here\n- ui_element_to_interact_with: No element specified\n- has_locator_for_that_element: No element specified\n- reasoning_for_browser_required: No interaction hence browser is not required\n- answer: code\n\n# Example 2\n## Input\nTask:\nin the swapfast test, replace the selectTokenForSwap method. Instead we will do this\\nclick on token button - this will\nshow a drawer\\nEnter usd in the search field that shows up in the drawer\\nSelect USDC.axl for Cosmos Hub - very\nimportant to choose this instead of USDC.axl on Osmosis\n\n## Output\n- ui_interaction_to_be_performed: Click on token button\n- ui_element_to_interact_with: Token button\n- has_locator_for_that_element: false\n- reasoning_for_browser_required: Task requires interacting with a UI element and identifying its locator which needs a\nbrowser\n- answer: master\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n{{/section}}";
7
+ const lib_1 = require("../../prompts/lib");
6
8
  const session_1 = require("../../session");
7
9
  const session = (0, session_1.getSessionDetails)();
8
10
  async function inferAgentBasedTask({ task, options, trace, }) {
@@ -20,16 +22,7 @@ async function inferAgentBasedTask({ task, options, trace, }) {
20
22
  options,
21
23
  },
22
24
  });
23
- const promptSpan = inferAgentSpan?.span({
24
- name: "infer-agent-prompt",
25
- input: {
26
- task,
27
- },
28
- });
29
- const prompt = await (0, llm_1.getPrompt)("infer-agent", {
30
- task,
31
- });
32
- promptSpan?.end({ output: { prompt } });
25
+ const messages = (0, lib_1.compilePrompt)(promptTemplate_0, { task });
33
26
  const llm = new llm_1.LLM({
34
27
  trace: inferAgentSpan,
35
28
  provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
@@ -37,7 +30,7 @@ async function inferAgentBasedTask({ task, options, trace, }) {
37
30
  providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
38
31
  });
39
32
  const firstShotMessage = await llm.createChatCompletion({
40
- messages: prompt,
33
+ messages,
41
34
  modelParameters: {
42
35
  ...constants_1.DEFAULT_MODEL_PARAMETERS,
43
36
  ...options?.modelParameters,
@@ -1 +1 @@
1
- {"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA0DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAgG9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAqDD"}
1
+ {"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AASlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA0DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA8C9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAqDD"}
@@ -2,8 +2,9 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.getAnnotationKeys = exports.getElementAnnotation = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
- const vision_1 = require("@empiricalrun/llm/vision");
6
5
  const constants_1 = require("../../constants");
6
+ const promptTemplate_0 = "{{#section \"system\"}}\nYou are an expert in describing the images and it's content. You need to provide the descriptions of annotated elements\npresent in the image.\n\nYou will be provided with an annotated screenshot where interact-able / clickable elements are annotated. The annotation\nis done by drawing a red box around the element and a small yellow box on it which contains unique element id.\n\nYou are given a Annotations which contains list of unique element id and description of the element separated by \":\".\n\nYou are also given the description of the element on which the action needs to be taken. The description includes\ninformation about how the element looks, it's position etc.\n\nYour task is to provide the annotation of the element on which the action needs to be performed based on the element\ndescription.\n\nFollow steps to fulfil your task:\n- Using the list of all element Ids provided to you, map all the element Ids on the annotated screen and describe each\nelement.\n- For describing each element Id\n-- iterate over each element Id in annotation list\n-- check if the description is already present for the element Id in the Annotation provided to you. If present skip\ndescribing it and use it as is.\n-- if the description is NA, then identify the element in the annotated screenshot and describe it using the image or\nicon enclosed in the element.\n- Respond with the mapped element Ids as \"enriched_annotations\"\n- Based on the description provided to you and the enriched annotations, first identify the element Id whose description\nmatches the task provided\n\nNote:\n- Ensure providing the description of all the elements in the list.\n- Don't update the description if its already present in the given annotations\n- Replace all the \"NA\" with description of the element. Its position, how does it look like etc.\n- There should be no \"NA\" present in any of the element description\n{{/section}}\n\n{{#section \"user\"}}\nElement description:\n{{elementDescription}}\n\nAnnotations:\n{{annotations}}\n\n{{image annotatedScreenshot}}\n{{/section}}";
7
+ const lib_1 = require("../../prompts/lib");
7
8
  const utils_1 = require("../utils");
8
9
  const annotationToolAction = {
9
10
  name: "element_annotation",
@@ -66,58 +67,11 @@ async function getElementAnnotation({ elementDescription, annotations, annotated
66
67
  preference,
67
68
  },
68
69
  });
69
- const systemMessage = {
70
- role: "system",
71
- content: `You are an expert in describing the images and it's content. You need to provide the descriptions of annotated elements present in the image.
72
-
73
- You will be provided with an annotated screenshot where interact-able / clickable elements are annotated. The annotation is done by drawing a red box around the element and a small yellow box on it which contains unique element id.
74
-
75
- You are given a Annotations which contains list of unique element id and description of the element separated by ":".
76
-
77
- You are also given the description of the element on which the action needs to be taken. The description includes information about how the element looks, it's position etc.
78
-
79
- Your task is to provide the annotation of the element on which the action needs to be performed based on the element description.
80
-
81
- Follow steps to fulfil your task:
82
- - Using the list of all element Ids provided to you, map all the element Ids on the annotated screen and describe each element.
83
- - For describing each element Id
84
- -- iterate over each element Id in annotation list
85
- -- check if the description is already present for the element Id in the Annotation provided to you. If present skip describing it and use it as is.
86
- -- if the description is NA, then identify the element in the annotated screenshot and describe it using the image or icon enclosed in the element.
87
- - Respond with the mapped element Ids as "enriched_annotations"
88
- - Based on the description provided to you and the enriched annotations, first identify the element Id whose description matches the task provided
89
-
90
- Note:
91
- - Ensure providing the description of all the elements in the list.
92
- - Don't update the description if its already present in the given annotations
93
- - Replace all the "NA" with description of the element. Its position, how does it look like etc.
94
- - There should be no "NA" present in any of the element description
95
- `,
96
- };
97
- const userMessage = {
98
- role: "user",
99
- content: [
100
- {
101
- type: "text",
102
- text: `
103
- Element description:
104
- ${elementDescription}
105
-
106
- Annotations:
107
- ${annotations}`,
108
- },
109
- {
110
- type: "image_url",
111
- image_url: {
112
- url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedScreenshot),
113
- },
114
- },
115
- ],
116
- };
117
- const messages = [
118
- systemMessage,
119
- userMessage,
120
- ];
70
+ const messages = (0, lib_1.compilePrompt)(promptTemplate_0, {
71
+ elementDescription,
72
+ annotations,
73
+ annotatedScreenshot,
74
+ }, options);
121
75
  llm =
122
76
  llm ||
123
77
  new llm_1.LLM({
@@ -1,10 +1,11 @@
1
1
  import { Page } from "playwright";
2
2
  import { ScopeVars, TestCase } from "../../types";
3
3
  import { BrowsingAgentOptions } from "../browsing";
4
- export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
4
+ export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
5
5
  task: string;
6
6
  page: Page;
7
7
  testCase?: TestCase;
8
+ specPath?: string;
8
9
  options: BrowsingAgentOptions;
9
10
  scopeVars?: ScopeVars;
10
11
  }): Promise<{
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA4BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA+UA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA6BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA0WA"}
@@ -13,6 +13,7 @@ const session_1 = require("../../session");
13
13
  const browsing_1 = require("../browsing");
14
14
  const utils_2 = require("../browsing/utils");
15
15
  const skills_retriever_1 = require("../codegen/skills-retriever");
16
+ const run_1 = require("../planner/run");
16
17
  const run_time_planner_1 = require("../planner/run-time-planner");
17
18
  const utils_3 = require("../utils");
18
19
  const action_tool_calls_1 = require("./action-tool-calls");
@@ -29,8 +30,9 @@ function getPageVariables(stateVariables) {
29
30
  }, {});
30
31
  return pages;
31
32
  }
32
- async function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }) {
33
+ async function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }) {
33
34
  const useActionSpecificAnnotations = options?.useActionSpecificAnnotations || false;
35
+ const usePlannerInMaster = options?.usePlannerInMaster || false;
34
36
  const logger = new logger_1.CustomLogger({ useReporter: false });
35
37
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
36
38
  const session = (0, session_1.getSessionDetails)();
@@ -74,6 +76,19 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
74
76
  });
75
77
  }
76
78
  skill_1.testCaseSkills.updateSkills(skills);
79
+ if (usePlannerInMaster && testCase && specPath) {
80
+ void testgenUpdatesReporter.sendMessage(`Planner is working on task: ${task}`);
81
+ logger.log(`Planner is working on task: ${task}`);
82
+ const plan = await (0, run_1.planTask)({
83
+ task,
84
+ specPath,
85
+ trace,
86
+ });
87
+ void testgenUpdatesReporter.sendMessage(`Here is the plan:\n${plan}`);
88
+ logger.log(`Here is the plan:\n${plan}`);
89
+ // Will assume this is the task hereon
90
+ task = plan;
91
+ }
77
92
  const actions = new actions_1.PlaywrightActions(testGenPage, scopeVars);
78
93
  await (0, utils_2.injectPwLocatorGenerator)(page);
79
94
  trace?.update({ input: { task } });
@@ -150,11 +165,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
150
165
  const masterAgentActionSpan = masterAgentSpan?.span({
151
166
  name: "master-agent-execute-action",
152
167
  });
168
+ output = {
169
+ action: args.action || args.skill,
170
+ reason: args.reason,
171
+ };
153
172
  try {
154
- output = {
155
- action: args.action || args.skill,
156
- reason: args.reason,
157
- };
158
173
  void testGenReporter.sendMessage(output.reason);
159
174
  logger.log(`Next Action: ${output.action}`);
160
175
  if (toolCall.actionType === skill_1.SKILL_USAGE) {
@@ -197,6 +212,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
197
212
  options,
198
213
  });
199
214
  if (annotationKeys.length > 0) {
215
+ // TODO: this string has newline characters that makes it harder to read
200
216
  const annotationMapString = annotationKeys
201
217
  ?.map((a) => `${a.elementID}: ${a.text}`)
202
218
  .join("\n");
@@ -254,7 +270,6 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
254
270
  generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
255
271
  trace: masterAgentActionSpan,
256
272
  action: output.action,
257
- logger,
258
273
  page,
259
274
  options,
260
275
  llm,
@@ -307,8 +322,8 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
307
322
  trace?.update({ input: { task }, output: { output } });
308
323
  masterAgentSpan?.end({
309
324
  output: {
310
- action: output.action,
311
- reason: output.reason,
325
+ action: output?.action,
326
+ reason: output?.reason,
312
327
  code: generatedCodeSteps,
313
328
  },
314
329
  });
@@ -1 +1 @@
1
- {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAKrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAiHA"}
1
+ {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAKrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAkHA"}
@@ -40,7 +40,8 @@ To fulfil your goal, follow these steps:
40
40
  {
41
41
  role: "user",
42
42
  content: `
43
- Task: ${task}
43
+ Task:
44
+ ${task}
44
45
 
45
46
  ----
46
47
 
@@ -1,7 +1,6 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
- import { TestCase } from "../../types";
3
- export declare function planTask({ testCase, specPath, trace, }: {
4
- testCase: TestCase;
2
+ export declare function planTask({ task, specPath, trace, }: {
3
+ task: string;
5
4
  specPath: string;
6
5
  trace?: TraceClient;
7
6
  }): Promise<string>;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAMhE,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAsBvC,wBAAsB,QAAQ,CAAC,EAC7B,QAAQ,EACR,QAAQ,EACR,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,mBA0DA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AA6BrD,wBAAsB,QAAQ,CAAC,EAC7B,IAAI,EACJ,QAAQ,EACR,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,mBAyCA"}
@@ -3,6 +3,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.planTask = void 0;
4
4
  const llm_1 = require("@empiricalrun/llm");
5
5
  const context_1 = require("../../bin/utils/context");
6
+ const lib_1 = require("../../prompts/lib");
7
+ const promptTemplate_0 = "{{#section \"system\"}}\nYou are an expert software engineer in test. You are given a task to provide a high level plan to create a test for a\ngiven scenario.\n\nYou will be provided with already added tests and page object object models which you can use to plan out how to write\nthe test.\n\nThe expected plan should be a list of bullet points and each bullet point is a step in the test.\nYou will be provided with app knowledge as well, which can help you groom the steps in the tests.\n{{/section}}\n\n{{#section \"user\"}}\nApp knowledge\n{{appKnowledge}}\n\n-----\n\nCurrent tests and page object model references\n\n{{fileContext}}\n\n------\n\nTask:\n\n{{task}}\n\n------\n\nFollow the steps to create a test plan:\n- create sub tasks\n- read the task step by step and create sub tasks from the given task\n- Ensure no new steps are added which are not mentioned in the task\n- enriched sub tasks\n- Read the app knowledge provided to you and enrich the verified sub tasks based on the provided information in app\nknowledge.\n- Fill in the missing information in the verified sub tasks based on the app knowledge.\n- based on the type of task, add sub tasks to the verified sub tasks based on the matching criteria\n- final plan:\n- Once all the subtasks are enriched, list all the sub tasks as bullet points\n- Each bullet point should be one of the following actions:\n- Open page, Click on, Fill in, Assert, hover on, press, extract textContent\n- Do not respond with points which do not start with above actions.\n\nFollow the steps before responding\n- The steps should only contain bullet points on list of steps for the test\n- Do not add any other assertion which is not mentioned in the task or app knowledge\n- Respond with <create_sub_tasks></create_sub_tasks>\n<enriched_sub_tasks></enriched_sub_tasks> and <final_plan></final_plan>\n- The final plan should not mention reference to the knowledge base used to generate it\n{{/section}}";
6
8
  function extractTestPlan(input) {
7
9
  const result = {
8
10
  createSubTasks: "",
@@ -20,9 +22,7 @@ function extractTestPlan(input) {
20
22
  }
21
23
  return result;
22
24
  }
23
- async function planTask({ testCase, specPath, trace, }) {
24
- const task = testCase.steps.join("\n");
25
- // TODO: fix this
25
+ async function planTask({ task, specPath, trace, }) {
26
26
  let fileContext = "";
27
27
  try {
28
28
  const { pomPrompt, testFileContent } = await (0, context_1.contextForGeneration)(specPath);
@@ -37,24 +37,15 @@ ${pomPrompt}
37
37
  const plannerSpan = trace?.span({
38
38
  name: "planner",
39
39
  input: {
40
- testCase,
40
+ task,
41
41
  },
42
42
  });
43
43
  const appKnowledge = await (0, context_1.fetchAppKnowledge)();
44
- const promptSpan = plannerSpan?.span({
45
- name: "planner-prompt",
46
- input: {
47
- appKnowledge,
48
- fileContext,
49
- testCase,
50
- },
51
- });
52
- const messages = await (0, llm_1.getPrompt)("planner", {
44
+ const messages = (0, lib_1.compilePrompt)(promptTemplate_0, {
53
45
  appKnowledge,
54
46
  fileContext,
55
47
  task,
56
- }, 2);
57
- promptSpan?.end({ output: { messages } });
48
+ });
58
49
  const llm = new llm_1.LLM({
59
50
  provider: "openai",
60
51
  //TODO: change to o1
package/dist/bin/index.js CHANGED
@@ -102,8 +102,9 @@ async function runAgent(testGenConfig) {
102
102
  testCase.steps = [requestedChangeResp.output];
103
103
  }
104
104
  if (agent === "plan") {
105
+ const task = testCase.steps.join("\n");
105
106
  const plan = await (0, run_3.planTask)({
106
- testCase,
107
+ task,
107
108
  specPath,
108
109
  trace,
109
110
  });
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAQpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAsC3E"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAQpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAuC3E"}
package/dist/index.js CHANGED
@@ -31,8 +31,10 @@ async function createTest(task, page, scope) {
31
31
  projectRepoName: testGenConfig.options?.metadata.projectRepoName,
32
32
  });
33
33
  const fileService = new client_1.default(Number(port));
34
+ const { testCase, specPath } = testGenConfig;
34
35
  const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
35
- testCase: testGenConfig.testCase,
36
+ testCase,
37
+ specPath,
36
38
  page,
37
39
  task,
38
40
  options: {
@@ -0,0 +1,8 @@
1
+ import { LLMProvider } from "@empiricalrun/llm";
2
+ import OpenAI from "openai";
3
+ type PromptOptions = {
4
+ modelProvider?: LLMProvider;
5
+ };
6
+ export declare function compilePrompt<T extends object>(promptTemplate: string, params: T, options?: PromptOptions): OpenAI.Chat.Completions.ChatCompletionMessageParam[];
7
+ export {};
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/prompts/lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAyF5B,KAAK,aAAa,GAAG;IACnB,aAAa,CAAC,EAAE,WAAW,CAAC;CAC7B,CAAC;AAEF,wBAAgB,aAAa,CAAC,CAAC,SAAS,MAAM,EAC5C,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,CAAC,EACT,OAAO,CAAC,EAAE,aAAa,GACtB,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAwCtD"}
@@ -0,0 +1,118 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.compilePrompt = void 0;
7
+ const vision_1 = require("@empiricalrun/llm/vision");
8
+ const handlebars_1 = __importDefault(require("handlebars"));
9
+ const constants_1 = require("../../constants");
10
+ class SectionManager {
11
+ sections = {};
12
+ getSection(name) {
13
+ return this.sections[name] || "";
14
+ }
15
+ setSection(name, content) {
16
+ this.sections[name] = content;
17
+ }
18
+ getAllSections() {
19
+ return this.sections;
20
+ }
21
+ }
22
+ const IMAGE_TOKEN_PREFIX = "[[[HANDLEBARS_IMAGE:";
23
+ const IMAGE_TOKEN_SUFFIX = "]]]";
24
+ function createHandlebarsEnv() {
25
+ const HandlebarsEnv = handlebars_1.default.create();
26
+ const sectionManager = new SectionManager();
27
+ HandlebarsEnv.registerHelper("section", function (name, options) {
28
+ const content = options.fn(this);
29
+ sectionManager.setSection(name, content);
30
+ return ""; // don't output anything in place
31
+ });
32
+ HandlebarsEnv.registerHelper("image", function (imageParam) {
33
+ const tokenPayload = JSON.stringify({ url: imageParam });
34
+ // Use encodeURIComponent to avoid conflicts with special characters.
35
+ const token = `${IMAGE_TOKEN_PREFIX}${encodeURIComponent(tokenPayload)}${IMAGE_TOKEN_SUFFIX}`;
36
+ return token;
37
+ });
38
+ HandlebarsEnv.registerHelper("images", function (imagesParam) {
39
+ if (!Array.isArray(imagesParam))
40
+ return "";
41
+ return imagesParam
42
+ .map((url) => {
43
+ const tokenPayload = JSON.stringify({ url });
44
+ return `${IMAGE_TOKEN_PREFIX}${encodeURIComponent(tokenPayload)}${IMAGE_TOKEN_SUFFIX}`;
45
+ })
46
+ .join("");
47
+ });
48
+ return { HandlebarsEnv, sectionManager };
49
+ }
50
+ function processSectionContent(content) {
51
+ if (!content.includes(IMAGE_TOKEN_PREFIX)) {
52
+ return content.trim();
53
+ }
54
+ const segments = [];
55
+ const regex = /\[\[\[HANDLEBARS_IMAGE:(.*?)\]\]\]/g;
56
+ let lastIndex = 0;
57
+ let match;
58
+ while ((match = regex.exec(content)) !== null) {
59
+ // Get the text before the token.
60
+ const textPart = content.slice(lastIndex, match.index).trim();
61
+ if (textPart) {
62
+ segments.push({ type: "text", text: textPart });
63
+ }
64
+ // Decode the token payload.
65
+ try {
66
+ const payloadJson = decodeURIComponent(match[1]);
67
+ const payload = JSON.parse(payloadJson);
68
+ segments.push({ type: "image_url", image_url: { url: payload.url } });
69
+ }
70
+ catch (err) {
71
+ // If decoding/parsing fails, treat the token as literal text.
72
+ segments.push({ type: "text", text: match[0] });
73
+ }
74
+ lastIndex = match.index + match[0].length;
75
+ }
76
+ const remaining = content.slice(lastIndex).trim();
77
+ if (remaining) {
78
+ segments.push({ type: "text", text: remaining });
79
+ }
80
+ return segments;
81
+ }
82
+ function compilePrompt(promptTemplate, params, options) {
83
+ const { HandlebarsEnv, sectionManager } = createHandlebarsEnv();
84
+ const template = HandlebarsEnv.compile(promptTemplate, { noEscape: true });
85
+ template(params);
86
+ const sections = sectionManager.getAllSections();
87
+ // TODO: system cannot have images, we can add validation for that
88
+ const system = sections["system"];
89
+ const user = sections["user"];
90
+ if (!system || !user) {
91
+ // TODO: support templates that have only one section
92
+ throw new Error("Both system and user sections must be defined in the template");
93
+ }
94
+ const systemContent = processSectionContent(system);
95
+ const userContent = processSectionContent(user);
96
+ let userContentCorrectedForImageFormat = userContent;
97
+ if (Array.isArray(userContent)) {
98
+ const provider = options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER;
99
+ userContentCorrectedForImageFormat = userContent.map((c) => {
100
+ if (c.type === "image_url") {
101
+ return {
102
+ ...c,
103
+ image_url: {
104
+ url: (0, vision_1.imageFormatForProvider)(provider, c.image_url.url),
105
+ },
106
+ };
107
+ }
108
+ else {
109
+ return c;
110
+ }
111
+ });
112
+ }
113
+ return [
114
+ { role: "system", content: systemContent },
115
+ { role: "user", content: userContentCorrectedForImageFormat },
116
+ ];
117
+ }
118
+ exports.compilePrompt = compilePrompt;
@@ -1,4 +1,5 @@
1
- import { LLMModel, LLMProvider, ModelParameters, TraceClient } from "@empiricalrun/llm";
1
+ import { ModelParameters, TraceClient } from "@empiricalrun/llm";
2
+ import { Agent, LLMModel, LLMProvider } from "@empiricalrun/shared-types";
2
3
  import OpenAI from "openai";
3
4
  import { TestGenPage } from "../page";
4
5
  export type FileContent = {
@@ -10,14 +11,11 @@ export type Environment = {
10
11
  slug: string;
11
12
  playwrightProjects: string[];
12
13
  };
13
- export type Agent = "code" | "master" | "auto" | "plan";
14
14
  export type TestGenConfigOptions = {
15
15
  agent: Agent;
16
16
  model: LLMModel;
17
17
  modelProvider: LLMProvider;
18
18
  modelParameters?: ModelParameters;
19
- useActionSpecificAnnotations?: boolean;
20
- useStrReplace?: boolean;
21
19
  metadata: {
22
20
  testSessionId: number;
23
21
  generationId: number;
@@ -26,6 +24,9 @@ export type TestGenConfigOptions = {
26
24
  projectName: string;
27
25
  environment: "development" | "production";
28
26
  };
27
+ useActionSpecificAnnotations?: boolean;
28
+ useStrReplace?: boolean;
29
+ usePlannerInMaster?: boolean;
29
30
  };
30
31
  export type Build = {
31
32
  url?: string;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,WAAW,EACX,eAAe,EACf,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAExD,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,QAAQ,CAAC;IAChB,aAAa,EAAE,WAAW,CAAC;IAC3B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,QAAQ,EAAE;QACR,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,aAAa,GAAG,YAAY,CAAC;KAC3C,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG;IAClB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,kBAAkB,CAAC,EAAE,yBAAyB,CAAC;IAC/C,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG,CACtC,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE;IACP,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACpC,iBAAiB,EAAE,CAAC,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,IAAI,CAAC;CAClE,KACE,MAAM,CAAC;AAEZ,MAAM,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEtE,MAAM,MAAM,MAAM,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,CAAC,OAAO,EAAE;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,WAAW,CAAC;KACrB,KAAK,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC,CAAC;IACjE,QAAQ,EAAE,CACR,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACzB,OAAO,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,KAChD;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,QAAQ,CAAC;IAChB,aAAa,EAAE,WAAW,CAAC;IAC3B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,QAAQ,EAAE;QACR,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,aAAa,GAAG,YAAY,CAAC;KAC3C,CAAC;IAEF,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG;IAClB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,kBAAkB,CAAC,EAAE,yBAAyB,CAAC;IAC/C,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG,CACtC,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE;IACP,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACpC,iBAAiB,EAAE,CAAC,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,IAAI,CAAC;CAClE,KACE,MAAM,CAAC;AAEZ,MAAM,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEtE,MAAM,MAAM,MAAM,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,CAAC,OAAO,EAAE;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,WAAW,CAAC;KACrB,KAAK,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC,CAAC;IACjE,QAAQ,EAAE,CACR,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACzB,OAAO,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,KAChD;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.42.18",
3
+ "version": "0.42.20",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -73,8 +73,8 @@
73
73
  "tsx": "^4.16.2",
74
74
  "typescript": "^5.3.3",
75
75
  "@empiricalrun/llm": "^0.9.32",
76
- "@empiricalrun/reporter": "^0.23.1",
77
- "@empiricalrun/r2-uploader": "^0.3.8"
76
+ "@empiricalrun/r2-uploader": "^0.3.8",
77
+ "@empiricalrun/reporter": "^0.23.1"
78
78
  },
79
79
  "devDependencies": {
80
80
  "@playwright/test": "1.47.1",
@@ -86,7 +86,8 @@
86
86
  "@types/md5": "^2.3.5",
87
87
  "js-levenshtein": "^1.1.6",
88
88
  "playwright": "1.47.1",
89
- "ts-patch": "^3.3.0"
89
+ "ts-patch": "^3.3.0",
90
+ "@empiricalrun/shared-types": "0.0.0"
90
91
  },
91
92
  "scripts": {
92
93
  "dev": "tspc --build --watch",
@@ -1,3 +0,0 @@
1
- import OpenAI from "openai";
2
- export declare function compilePrompt<T extends object>(promptTemplate: string, params: T): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]>;
3
- //# sourceMappingURL=promptBuilder.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"promptBuilder.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/promptBuilder.ts"],"names":[],"mappings":"AACA,OAAO,MAAM,MAAM,QAAQ,CAAC;AA6B5B,wBAAsB,aAAa,CAAC,CAAC,SAAS,MAAM,EAClD,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,CAAC,GACR,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC,CAe/D"}
@@ -1,44 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.compilePrompt = void 0;
7
- const handlebars_1 = __importDefault(require("handlebars"));
8
- class SectionManager {
9
- sections = {};
10
- getSection(name) {
11
- return this.sections[name] || "";
12
- }
13
- setSection(name, content) {
14
- this.sections[name] = content;
15
- }
16
- getAllSections() {
17
- return this.sections;
18
- }
19
- }
20
- function createHandlebarsEnv() {
21
- const HandlebarsEnv = handlebars_1.default.create();
22
- const sectionManager = new SectionManager();
23
- HandlebarsEnv.registerHelper("section", function (name, options) {
24
- const content = options.fn(this);
25
- sectionManager.setSection(name, content);
26
- return ""; // Don't output anything in place
27
- });
28
- return { HandlebarsEnv, sectionManager };
29
- }
30
- async function compilePrompt(promptTemplate, params) {
31
- const { HandlebarsEnv, sectionManager } = createHandlebarsEnv();
32
- const template = HandlebarsEnv.compile(promptTemplate, { noEscape: true });
33
- template(params);
34
- const { system, user } = sectionManager.getAllSections();
35
- if (!system || !user) {
36
- // TODO: support templates that have only one section
37
- throw new Error("Both system and user sections must be defined in the template");
38
- }
39
- return [
40
- { role: "system", content: system },
41
- { role: "user", content: user },
42
- ];
43
- }
44
- exports.compilePrompt = compilePrompt;
@@ -1,13 +0,0 @@
1
- import { TraceClient } from "@empiricalrun/llm";
2
- /**
3
- * This agent is used to verify whether the task is done basis the conversation history
4
- */
5
- export declare function verificationAgent({ trace, task, conversation, }: {
6
- trace?: TraceClient;
7
- conversation: string[];
8
- task: string;
9
- }): Promise<{
10
- isDone: boolean;
11
- reason: string;
12
- }>;
13
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIhE;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,EACtC,KAAK,EACL,IAAI,EACJ,YAAY,GACb,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;;;GA+EA"}
@@ -1,84 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.verificationAgent = void 0;
4
- const llm_1 = require("@empiricalrun/llm");
5
- const utils_1 = require("../utils");
6
- /**
7
- * This agent is used to verify whether the task is done basis the conversation history
8
- */
9
- async function verificationAgent({ trace, task, conversation, }) {
10
- const verificationAgentSpan = trace?.span({
11
- name: "verification-agent",
12
- input: {
13
- task,
14
- conversation,
15
- },
16
- });
17
- const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
18
- task,
19
- conversation: conversation.join("\n"),
20
- }, 5);
21
- const llm = new llm_1.LLM({ provider: "openai" });
22
- const response = await llm.createChatCompletion({
23
- trace: verificationAgentSpan,
24
- traceName: "verification-agent-llm",
25
- model: "gpt-4o",
26
- messages,
27
- tools: [
28
- {
29
- type: "function",
30
- function: {
31
- name: "task_done",
32
- description: "end the task by calling this method",
33
- parameters: {
34
- type: "object",
35
- properties: {
36
- actions: {
37
- type: "string",
38
- description: "actions extracted from task",
39
- },
40
- successful_actions: {
41
- type: "string",
42
- description: "successful actions mentioned in the conversation",
43
- },
44
- reason: {
45
- type: "string",
46
- description: "reasoning for identification of task status",
47
- },
48
- isDone: {
49
- type: "boolean",
50
- description: "whether the task is done",
51
- },
52
- },
53
- required: ["isDone", "reason"],
54
- },
55
- },
56
- },
57
- ],
58
- modelParameters: {
59
- tool_choice: "required",
60
- temperature: 0.5,
61
- },
62
- });
63
- const toolCallResp = (response?.tool_calls || [])[0];
64
- if (toolCallResp) {
65
- const toolCall = (0, utils_1.parseJson)(toolCallResp.function.arguments);
66
- const output = {
67
- isDone: toolCall.isDone,
68
- reason: toolCall.reason,
69
- };
70
- verificationAgentSpan?.end({
71
- output,
72
- });
73
- return output;
74
- }
75
- const output = {
76
- isDone: false,
77
- reason: "LLM failed to generate a valid response",
78
- };
79
- verificationAgentSpan?.end({
80
- output,
81
- });
82
- return output;
83
- }
84
- exports.verificationAgent = verificationAgent;
@@ -1,4 +0,0 @@
1
- import { EvaluateFn } from "./type";
2
- export declare const verifierAgentEvaluate: EvaluateFn;
3
- export default verifierAgentEvaluate;
4
- //# sourceMappingURL=verification-agent.evals.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"verification-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/verification-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,qBAAqB,EAAE,UAgBnC,CAAC;AAEF,eAAe,qBAAqB,CAAC"}
@@ -1,23 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.verifierAgentEvaluate = void 0;
4
- const verification_1 = require("../agent/verification");
5
- const verifierAgentEvaluate = async ({ item, trace }) => {
6
- const { conversation = [], task = "" } = item.input;
7
- const output = await (0, verification_1.verificationAgent)({
8
- conversation,
9
- trace,
10
- task,
11
- });
12
- return {
13
- scores: [
14
- {
15
- name: "equality",
16
- value: item.expectedOutput.isDone === output.isDone ? 1 : 0,
17
- },
18
- ],
19
- output,
20
- };
21
- };
22
- exports.verifierAgentEvaluate = verifierAgentEvaluate;
23
- exports.default = exports.verifierAgentEvaluate;