@empiricalrun/test-gen 0.31.19 → 0.31.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/dist/actions/assert.d.ts.map +1 -1
  3. package/dist/actions/assert.js +6 -4
  4. package/dist/actions/click.d.ts.map +1 -1
  5. package/dist/actions/click.js +5 -3
  6. package/dist/actions/done.js +1 -1
  7. package/dist/actions/fill.d.ts.map +1 -1
  8. package/dist/actions/fill.js +4 -2
  9. package/dist/actions/goto.d.ts.map +1 -1
  10. package/dist/actions/goto.js +5 -3
  11. package/dist/actions/hover.js +2 -2
  12. package/dist/actions/index.d.ts +8 -3
  13. package/dist/actions/index.d.ts.map +1 -1
  14. package/dist/actions/index.js +56 -9
  15. package/dist/actions/reload-page.d.ts.map +1 -1
  16. package/dist/actions/reload-page.js +4 -2
  17. package/dist/actions/skill.d.ts +18 -0
  18. package/dist/actions/skill.d.ts.map +1 -0
  19. package/dist/actions/skill.js +94 -0
  20. package/dist/actions/text-content.d.ts.map +1 -1
  21. package/dist/actions/text-content.js +4 -2
  22. package/dist/agent/browsing/index.d.ts +13 -3
  23. package/dist/agent/browsing/index.d.ts.map +1 -1
  24. package/dist/agent/browsing/index.js +119 -207
  25. package/dist/agent/codegen/create-test-block.js +1 -1
  26. package/dist/agent/codegen/skills-retriever.d.ts +13 -0
  27. package/dist/agent/codegen/skills-retriever.d.ts.map +1 -0
  28. package/dist/agent/codegen/skills-retriever.js +61 -0
  29. package/dist/agent/codegen/use-skill.d.ts +9 -0
  30. package/dist/agent/codegen/use-skill.d.ts.map +1 -0
  31. package/dist/agent/codegen/use-skill.js +49 -0
  32. package/dist/agent/codegen/utils.d.ts +9 -0
  33. package/dist/agent/codegen/utils.d.ts.map +1 -1
  34. package/dist/agent/codegen/utils.js +20 -1
  35. package/dist/agent/master/run.d.ts +16 -5
  36. package/dist/agent/master/run.d.ts.map +1 -1
  37. package/dist/agent/master/run.js +178 -38
  38. package/dist/agent/verification/index.d.ts.map +1 -1
  39. package/dist/agent/verification/index.js +2 -0
  40. package/dist/bin/utils/context.d.ts +1 -0
  41. package/dist/bin/utils/context.d.ts.map +1 -1
  42. package/dist/bin/utils/context.js +7 -2
  43. package/dist/bin/utils/platform/web/index.d.ts +1 -0
  44. package/dist/bin/utils/platform/web/index.d.ts.map +1 -1
  45. package/dist/bin/utils/platform/web/index.js +28 -1
  46. package/dist/file/client.d.ts +2 -1
  47. package/dist/file/client.d.ts.map +1 -1
  48. package/dist/file/client.js +2 -2
  49. package/dist/file/server.d.ts.map +1 -1
  50. package/dist/file/server.js +3 -2
  51. package/dist/index.d.ts.map +1 -1
  52. package/dist/index.js +13 -5
  53. package/dist/types/index.d.ts +10 -4
  54. package/dist/types/index.d.ts.map +1 -1
  55. package/package.json +3 -3
@@ -1,229 +1,141 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.browsingAgentUsingMasterAgent = void 0;
7
- const llm_1 = require("@empiricalrun/llm");
8
- const crypto_1 = __importDefault(require("crypto"));
9
- const actions_1 = require("../../actions");
10
- const logger_1 = require("../../bin/logger");
3
+ exports.executeTaskUsingBrowsingAgent = void 0;
11
4
  const constants_1 = require("../../constants");
12
5
  const reporter_1 = require("../../reporter");
13
6
  const session_1 = require("../../session");
14
7
  const html_1 = require("../../utils/html");
15
- const run_1 = require("../master/run");
16
8
  const verification_1 = require("../verification");
17
9
  const o1_completion_1 = require("./o1-completion");
18
10
  const utils_1 = require("./utils");
19
- async function browsingAgentUsingMasterAgent(task, page, options) {
20
- const logger = new logger_1.CustomLogger({ useReporter: false });
21
- const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
22
- // add timeout for the page to settle in
23
- await page.waitForTimeout(3000);
24
- const trace = llm_1.langfuseInstance.trace({
25
- name: "test-generator",
26
- id: crypto_1.default.randomUUID(),
27
- version: (0, session_1.getSessionDetails)().version,
28
- metadata: {
29
- generationId: (0, session_1.getSessionDetails)().generationId,
30
- sessionId: (0, session_1.getSessionDetails)().sessionId,
31
- },
32
- tags: [
33
- options.metadata?.projectName,
34
- options.metadata?.environment,
35
- ].filter((s) => !!s),
36
- });
37
- const llm = new llm_1.LLM({
38
- trace,
39
- provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
40
- defaultModel: options.model || constants_1.DEFAULT_MODEL,
41
- providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
42
- // we will be using google model for larger context window, in such cases 1 million tokens is not enough
43
- maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
44
- });
45
- const actions = new actions_1.PlaywrightActions(page);
46
- const tools = actions.getActionSchemas();
47
- await (0, utils_1.injectPwLocatorGenerator)(page);
48
- trace.update({ input: { task } });
11
+ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }) {
12
+ let isTaskDone = false;
13
+ const executedActions = [];
49
14
  let lastActionExecTrace = "";
50
- let isGivenTaskDone = false;
51
- const masterAgentActions = [];
52
- try {
53
- while (!isGivenTaskDone) {
54
- const masterAgentSpan = trace.span({ name: "master-agent" });
55
- if (masterAgentActions.length > 0) {
56
- const verificationAgentResp = await (0, verification_1.verificationAgent)({
57
- llm,
58
- trace: masterAgentSpan,
59
- task,
60
- conversation: [
61
- "Successfully executed actions",
62
- ...masterAgentActions,
63
- ],
15
+ const tools = actions.getBrowsingActionSchemas();
16
+ const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
17
+ while (!isTaskDone) {
18
+ const browsingAgentSpan = trace.span({
19
+ name: `browsing-agent`,
20
+ });
21
+ const sessionState = await (0, session_1.getSessionState)();
22
+ if (sessionState === "request_complete") {
23
+ break;
24
+ }
25
+ const pageContentSpan = browsingAgentSpan.span({
26
+ name: "page-content",
27
+ });
28
+ const pageContent = await page.content();
29
+ pageContentSpan.end({ output: { pageContent } });
30
+ const sanitizationSpan = browsingAgentSpan.span({
31
+ name: "page-sanitization",
32
+ });
33
+ const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
34
+ sanitizationSpan.end({ output: { pageSnapshot } });
35
+ const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
36
+ // extract all successful actions
37
+ const successfulActions = executedActions
38
+ .filter((a) => !a.isError)
39
+ .map((a) => a.action);
40
+ if (successfulActions.length > 0) {
41
+ const verificationAgentResp = await (0, verification_1.verificationAgent)({
42
+ llm,
43
+ trace: browsingAgentSpan,
44
+ task: action,
45
+ conversation: ["Successfully executed actions", ...successfulActions],
46
+ });
47
+ isTaskDone = verificationAgentResp.isDone;
48
+ logger.log(`isTaskDone: ${isTaskDone}`);
49
+ logger.log(`reason: ${verificationAgentResp.reason}`);
50
+ if (isTaskDone) {
51
+ browsingAgentSpan.event({ name: "task-done" });
52
+ browsingAgentSpan.end({
53
+ output: { taskDone: true, reason: verificationAgentResp.reason },
64
54
  });
65
- isGivenTaskDone = verificationAgentResp.isDone;
66
- if (isGivenTaskDone) {
67
- await testgenUpdatesReporter.sendMessage(`${verificationAgentResp.reason} Marking the task as done.`);
68
- break;
69
- }
55
+ break;
70
56
  }
57
+ }
58
+ const messages = await (0, utils_1.getPromptForNextAction)({
59
+ pageSnapshot,
60
+ previousActions: successfulActions,
61
+ task: action,
62
+ lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
63
+ promptType: "browsing-agent-as-tool",
64
+ });
65
+ promptSpan.end({ output: { messages } });
66
+ let completion;
67
+ completion = await (0, o1_completion_1.getO1Completion)({
68
+ //@ts-ignore
69
+ messages,
70
+ tools,
71
+ trace: browsingAgentSpan,
72
+ });
73
+ // If O1 completion fails due to any reason, resort to old flow
74
+ if (!completion) {
75
+ completion = await llm.createChatCompletion({
76
+ messages,
77
+ tools,
78
+ trace: browsingAgentSpan,
79
+ model: options.model || constants_1.DEFAULT_MODEL,
80
+ modelParameters: {
81
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
82
+ ...options.modelParameters,
83
+ tool_choice: "required",
84
+ },
85
+ });
86
+ }
87
+ const toolCalls = completion?.tool_calls || [];
88
+ // LLM might respond with empty tool_calls and we can go into endless loop
89
+ // if we donot record this action and mark it as error
90
+ if (!toolCalls.length) {
91
+ executedActions.push({
92
+ isError: true,
93
+ action: "",
94
+ });
95
+ }
96
+ const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
97
+ for (const i in toolCalls) {
98
+ const toolCall = toolCalls[i];
71
99
  const sessionState = await (0, session_1.getSessionState)();
72
100
  if (sessionState === "request_complete") {
73
- await testgenUpdatesReporter.sendMessage("Aborting task, marking the task as done.");
74
- break;
75
- }
76
- const { action, reason } = await (0, run_1.masterAgent)(task, page, masterAgentActions, masterAgentSpan, llm, options);
77
- logger.log(`Next action: ${action} \n reason: ${reason}`);
78
- if (!action) {
79
101
  break;
80
102
  }
81
- if (isGivenTaskDone) {
82
- break;
83
- }
84
- let isTaskDone = false;
85
- const executedActions = [];
86
- while (!isTaskDone) {
87
- const browsingAgentSpan = masterAgentSpan.span({
88
- name: `browsing-agent`,
89
- });
90
- const sessionState = await (0, session_1.getSessionState)();
91
- if (sessionState === "request_complete") {
92
- break;
93
- }
94
- const pageContentSpan = browsingAgentSpan.span({
95
- name: "page-content",
96
- });
97
- const pageContent = await page.content();
98
- pageContentSpan.end({ output: { pageContent } });
99
- const sanitizationSpan = browsingAgentSpan.span({
100
- name: "page-sanitization",
101
- });
102
- const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
103
- sanitizationSpan.end({ output: { pageSnapshot } });
104
- const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
105
- // extract all successful actions
106
- const successfulActions = executedActions
107
- .filter((a) => !a.isError)
108
- .map((a) => a.action);
109
- if (successfulActions.length > 0) {
110
- const verificationAgentResp = await (0, verification_1.verificationAgent)({
111
- llm,
112
- trace: browsingAgentSpan,
113
- task: action,
114
- conversation: [
115
- "Successfully executed actions",
116
- ...successfulActions,
117
- ],
118
- });
119
- isTaskDone = verificationAgentResp.isDone;
120
- logger.log(`isTaskDone: ${isTaskDone}`);
121
- logger.log(`reason: ${verificationAgentResp.reason}`);
122
- if (isTaskDone) {
123
- browsingAgentSpan.event({ name: "task-done" });
124
- browsingAgentSpan.end({
125
- output: { taskDone: true, reason: verificationAgentResp.reason },
126
- });
127
- break;
128
- }
129
- }
130
- const messages = await (0, utils_1.getPromptForNextAction)({
131
- pageSnapshot,
132
- previousActions: successfulActions,
133
- task: action,
134
- lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
135
- promptType: "browsing-agent-as-tool",
103
+ try {
104
+ await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
105
+ executedActions.push({
106
+ isError: false,
107
+ action: JSON.stringify(toolCall),
136
108
  });
137
- promptSpan.end({ output: { messages } });
138
- let completion;
139
- completion = await (0, o1_completion_1.getO1Completion)({
140
- //@ts-ignore
141
- messages,
142
- tools,
143
- trace: browsingAgentSpan,
109
+ lastActionExecTrace = "";
110
+ }
111
+ catch (e) {
112
+ // TODO: implement feedback loop to llm
113
+ executedActions.push({
114
+ isError: true,
115
+ action: JSON.stringify(toolCall.function.arguments)?.reason,
144
116
  });
145
- // If O1 completion fails due to any reason, resort to old flow
146
- if (!completion) {
147
- completion = await llm.createChatCompletion({
148
- messages,
149
- tools,
150
- trace: browsingAgentSpan,
151
- model: options.model || constants_1.DEFAULT_MODEL,
152
- modelParameters: {
153
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
154
- ...options.modelParameters,
155
- tool_choice: "required",
156
- },
157
- });
158
- }
159
- const toolCalls = completion?.tool_calls || [];
160
- // LLM might respond with empty tool_calls and we can go into endless loop
161
- // if we donot record this action and mark it as error
162
- if (!toolCalls.length) {
163
- executedActions.push({
164
- isError: true,
165
- action: "",
166
- });
167
- }
168
- const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
169
- for (const i in toolCalls) {
170
- const toolCall = toolCalls[i];
171
- const sessionState = await (0, session_1.getSessionState)();
172
- if (sessionState === "request_complete") {
173
- break;
174
- }
175
- try {
176
- await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
177
- executedActions.push({
178
- isError: false,
179
- action: JSON.stringify(toolCall),
180
- });
181
- lastActionExecTrace = "";
182
- }
183
- catch (e) {
184
- // TODO: implement feedback loop to llm
185
- executedActions.push({
186
- isError: true,
187
- action: JSON.stringify(toolCall.function.arguments)
188
- ?.reason,
189
- });
190
- lastActionExecTrace = e.message;
191
- void testgenUpdatesReporter.sendMessage(e.message);
192
- logger.error(lastActionExecTrace, e);
193
- }
194
- }
195
- toolCallsSpan.end({ output: { toolCalls } });
196
- // mark task as done if llm is stuck in loop
197
- if (executedActions.length >= 3) {
198
- const lastThreeActions = executedActions.slice(-3);
199
- const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
200
- // get last 3 lines of code
201
- const lastThreeLinesOfCode = actions.getLastCodeLines(3);
202
- const areLastActionsRepeatitive = lastThreeLinesOfCode.length === 3 &&
203
- lastThreeLinesOfCode.every((a) => a === lastThreeLinesOfCode[0]);
204
- if (lastThreeActionsFailed || areLastActionsRepeatitive) {
205
- // TODO: this should be sent to dashboard
206
- logger.error("Agent is not able to figure out next action, marking task as done");
207
- await testgenUpdatesReporter.sendMessage("Agent is not able to figure out next action, marking task as done");
208
- isGivenTaskDone = true;
209
- break;
210
- }
211
- }
117
+ lastActionExecTrace = e.message;
118
+ void testgenUpdatesReporter.sendMessage(e.message);
119
+ logger.error(lastActionExecTrace, e);
120
+ }
121
+ }
122
+ toolCallsSpan.end({ output: { toolCalls } });
123
+ // mark task as done if llm is stuck in loop
124
+ if (executedActions.length >= 3) {
125
+ const lastThreeActions = executedActions.slice(-3);
126
+ const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
127
+ // get last 3 lines of code
128
+ const lastThreeLinesOfCode = actions.getLastCodeLines(3);
129
+ const areLastActionsRepeatitive = lastThreeLinesOfCode.length === 3 &&
130
+ lastThreeLinesOfCode.every((a) => a === lastThreeLinesOfCode[0]);
131
+ if (lastThreeActionsFailed || areLastActionsRepeatitive) {
132
+ // TODO: this should be sent to dashboard
133
+ const error = "Agent is not able to figure out next browser action, ending retries";
134
+ logger.error(error);
135
+ await testgenUpdatesReporter.sendMessage(error);
136
+ throw Error(error);
212
137
  }
213
- masterAgentSpan.end({ output: { action, reason } });
214
- masterAgentActions.push(action);
215
138
  }
216
139
  }
217
- catch (e) {
218
- console.error("Failed to generate code for the given task. Please retry again.", e);
219
- await testgenUpdatesReporter.sendMessage(`Failed to generate code for the given task. Please retry again.`);
220
- }
221
- await page.close();
222
- const code = actions.generateCode();
223
- trace.update({ input: { task }, output: { code } });
224
- logger.success("Successfully generated code for the given task");
225
- await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
226
- logger.log(`Trace: ${trace.getTraceUrl()}`);
227
- return code;
228
140
  }
229
- exports.browsingAgentUsingMasterAgent = browsingAgentUsingMasterAgent;
141
+ exports.executeTaskUsingBrowsingAgent = executeTaskUsingBrowsingAgent;
@@ -16,7 +16,7 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
16
16
  trace =
17
17
  trace ||
18
18
  llm_1.langfuseInstance.trace({
19
- name: "update-test",
19
+ name: "create-empty-test-block",
20
20
  id: crypto.randomUUID(),
21
21
  release: session.version,
22
22
  tags: [
@@ -0,0 +1,13 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { TestCase, TestGenConfigOptions } from "../../types";
3
+ export declare function getAppropriateSkills({ testCase, options, trace, }: {
4
+ testCase: TestCase;
5
+ options?: TestGenConfigOptions;
6
+ trace?: TraceClient;
7
+ }): Promise<{
8
+ testStep: string;
9
+ filePath: string;
10
+ usageExample: string;
11
+ reason: string;
12
+ }[]>;
13
+ //# sourceMappingURL=skills-retriever.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;KAmDA"}
@@ -0,0 +1,61 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getAppropriateSkills = void 0;
7
+ const llm_1 = require("@empiricalrun/llm");
8
+ const fs_1 = __importDefault(require("fs"));
9
+ const logger_1 = require("../../bin/logger");
10
+ const context_1 = require("../../bin/utils/context");
11
+ const fs_2 = require("../../bin/utils/fs");
12
+ const constants_1 = require("../../constants");
13
+ const utils_1 = require("./utils");
14
+ async function getAppropriateSkills({ testCase, options, trace, }) {
15
+ const logger = new logger_1.CustomLogger({ useReporter: false });
16
+ logger.log("getting skill set for the repository");
17
+ const filter = await (0, context_1.createGitIgnoreFileFilter)();
18
+ const pomFiles = await (0, fs_2.generatePromptFromDirectory)("./pages", filter);
19
+ const fetchSkillsSpan = trace?.span({
20
+ name: "fetch-pom-skills",
21
+ input: {
22
+ testCase,
23
+ },
24
+ });
25
+ const promptSpan = fetchSkillsSpan?.span({
26
+ name: "fetch-pom-skills-prompt",
27
+ });
28
+ const prompt = await (0, llm_1.getPrompt)("fetch-skills-prompt", {
29
+ pageFiles: pomFiles,
30
+ scenarioName: testCase.name,
31
+ scenario: testCase.steps.join("\n"),
32
+ });
33
+ promptSpan?.end({ output: { prompt } });
34
+ const llm = new llm_1.LLM({
35
+ trace: fetchSkillsSpan,
36
+ provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
37
+ defaultModel: options?.model || constants_1.DEFAULT_MODEL,
38
+ providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
39
+ });
40
+ const firstShotMessage = await llm.createChatCompletion({
41
+ messages: prompt,
42
+ traceName: "fetch-pom-skills-llm",
43
+ modelParameters: {
44
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
45
+ ...options?.modelParameters,
46
+ },
47
+ });
48
+ let response = firstShotMessage?.content || "";
49
+ const skills = (0, utils_1.extractTestStepsSuggestions)(response);
50
+ const validateSkillsSpan = fetchSkillsSpan?.span({
51
+ name: "validate-skills",
52
+ input: {
53
+ skills,
54
+ },
55
+ });
56
+ const validatedSkills = skills.filter((skill) => fs_1.default.existsSync(skill.filePath));
57
+ validateSkillsSpan?.end({ output: { validatedSkills } });
58
+ fetchSkillsSpan?.end({ output: { validatedSkills } });
59
+ return validatedSkills;
60
+ }
61
+ exports.getAppropriateSkills = getAppropriateSkills;
@@ -0,0 +1,9 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ export declare function generateSkillUsageCode({ task, sampleUsageMethod, scopeVariablesMapStr, pageVariableName, trace, }: {
3
+ task: string;
4
+ sampleUsageMethod: string;
5
+ scopeVariablesMapStr: string;
6
+ pageVariableName: string;
7
+ trace?: TraceClient;
8
+ }): Promise<string>;
9
+ //# sourceMappingURL=use-skill.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"use-skill.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/use-skill.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAShE,wBAAsB,sBAAsB,CAAC,EAC3C,IAAI,EACJ,iBAAiB,EACjB,oBAAoB,EACpB,gBAAgB,EAChB,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,iBAAiB,EAAE,MAAM,CAAC;IAC1B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,mBA0CA"}
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.generateSkillUsageCode = void 0;
4
+ const llm_1 = require("@empiricalrun/llm");
5
+ const constants_1 = require("../../constants");
6
+ async function generateSkillUsageCode({ task, sampleUsageMethod, scopeVariablesMapStr, pageVariableName, trace, }) {
7
+ const skillUsageSpan = trace?.span({
8
+ name: "skill-usage",
9
+ input: {
10
+ task,
11
+ sampleUsageMethod,
12
+ scopeVariablesMapStr,
13
+ pageVariableName,
14
+ },
15
+ });
16
+ const promptSpan = skillUsageSpan?.span({
17
+ name: "apply-skills-prompt",
18
+ });
19
+ const prompt = await (0, llm_1.getPrompt)("apply-skills-prompt", {
20
+ task,
21
+ sampleUsageMethod,
22
+ scopeVariablesMapStr,
23
+ pageVariableName,
24
+ });
25
+ promptSpan?.end({ output: prompt });
26
+ const llm = new llm_1.LLM({
27
+ trace: skillUsageSpan,
28
+ provider: constants_1.DEFAULT_MODEL_PROVIDER,
29
+ defaultModel: constants_1.DEFAULT_MODEL,
30
+ providerApiKey: constants_1.MODEL_API_KEYS[constants_1.DEFAULT_MODEL_PROVIDER],
31
+ });
32
+ const firstShotMessage = await llm.createChatCompletion({
33
+ trace: skillUsageSpan,
34
+ messages: prompt,
35
+ traceName: "generate-skill-usage-code",
36
+ modelParameters: {
37
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
38
+ },
39
+ });
40
+ let response = firstShotMessage?.content || "";
41
+ skillUsageSpan?.end({
42
+ output: {
43
+ code: response,
44
+ },
45
+ });
46
+ console.log(`generated usage code`, response);
47
+ return response;
48
+ }
49
+ exports.generateSkillUsageCode = generateSkillUsageCode;
@@ -16,4 +16,13 @@ export declare function extractTestUpdates(input: string): {
16
16
  newCode: string | undefined;
17
17
  reason: string | undefined;
18
18
  }[];
19
+ /**
20
+ *
21
+ */
22
+ export declare function extractTestStepsSuggestions(input: string): {
23
+ testStep: string;
24
+ filePath: string;
25
+ usageExample: string;
26
+ reason: string;
27
+ }[];
19
28
  //# sourceMappingURL=utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG;IACjD,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CAiBF"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG;IACjD,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CAiBF;AAED;;GAEG;AAEH,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,MAAM,GAAG;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;CAChB,EAAE,CAeF"}
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.extractTestUpdates = void 0;
3
+ exports.extractTestStepsSuggestions = exports.extractTestUpdates = void 0;
4
4
  /**
5
5
  *
6
6
  * method to extract file path and code updates for the LLM response of update flow
@@ -30,3 +30,22 @@ function extractTestUpdates(input) {
30
30
  return result;
31
31
  }
32
32
  exports.extractTestUpdates = extractTestUpdates;
33
+ /**
34
+ *
35
+ */
36
+ function extractTestStepsSuggestions(input) {
37
+ const result = [];
38
+ const regex = /<subtask>(.*?)<\/subtask>[\s\S]*?<file_import_path>([\s\S]*?)<\/file_import_path>[\s\S]*?<usage_example>([\s\S]*?)<\/usage_example>[\s\S]*?<reason>([\s\S]*?)<\/reason>/g;
39
+ let match;
40
+ while ((match = regex.exec(input)) !== null) {
41
+ const [, testStep, filePath, usageExample, reason] = match;
42
+ result.push({
43
+ testStep: testStep?.trim() || "",
44
+ filePath: filePath?.trim() || "",
45
+ usageExample: usageExample?.trim() || "",
46
+ reason: reason?.trim() || "",
47
+ });
48
+ }
49
+ return result.filter((r) => !!r.filePath && !!r.usageExample);
50
+ }
51
+ exports.extractTestStepsSuggestions = extractTestStepsSuggestions;
@@ -1,16 +1,27 @@
1
1
  import { LLM, TraceClient } from "@empiricalrun/llm";
2
2
  import { Page } from "playwright";
3
- import { TestGenConfigOptions } from "../../types";
4
- type BrowsingAgentOptions = Partial<TestGenConfigOptions>;
5
- export declare function getNextAction({ task, executedActions, page, trace, llm, options, pageScreenshot, }: {
3
+ import { PlaywrightActions } from "../../actions";
4
+ import { TestCase } from "../../types";
5
+ import { BrowsingAgentOptions } from "../browsing";
6
+ export declare function getNextAction({ task, executedActions, failedActions, page, trace, llm, options, pageScreenshot, actions, disableSkills, }: {
6
7
  task: string;
7
8
  executedActions: string[];
9
+ failedActions: any[];
8
10
  page: Page;
9
11
  trace?: TraceClient;
10
12
  llm: LLM;
11
13
  options: BrowsingAgentOptions;
12
14
  pageScreenshot: string;
15
+ actions: PlaywrightActions;
16
+ disableSkills: boolean;
13
17
  }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
14
- export declare function masterAgent(task: string, page: Page, executedActions: string[], trace: TraceClient, llm: LLM, options: BrowsingAgentOptions): Promise<any>;
15
- export {};
18
+ export declare function createTestUsingMasterAgent({ task, page, testCase, options, }: {
19
+ task: string;
20
+ page: Page;
21
+ testCase: TestCase;
22
+ options: BrowsingAgentOptions;
23
+ }): Promise<{
24
+ code: string;
25
+ importPaths: string[];
26
+ }>;
16
27
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;AAE1D,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,IAAI,EACJ,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,GACf,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,oBAAoB,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB,2FA+CA;AAED,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,eAAe,EAAE,MAAM,EAAE,EACzB,KAAK,EAAE,WAAW,EAClB,GAAG,EAAE,GAAG,EACR,OAAO,EAAE,oBAAoB,gBAsC9B"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAOrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,IAAI,EACJ,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,OAAO,EACP,aAAa,GACd,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,oBAAoB,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;CACxB,2FAwDA;AAED,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,GACR,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;CAC/B;;;GAmLA"}