@empiricalrun/test-gen 0.42.18 → 0.42.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/dist/agent/browsing/index.d.ts +4 -6
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +49 -127
- package/dist/agent/browsing/utils.d.ts +0 -7
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +1 -13
- package/dist/agent/codegen/create-test-block.d.ts.map +1 -1
- package/dist/agent/codegen/create-test-block.js +2 -2
- package/dist/agent/codegen/lexical-scoped-vars.d.ts.map +1 -1
- package/dist/agent/codegen/lexical-scoped-vars.js +2 -2
- package/dist/agent/infer-agent/index.d.ts +2 -1
- package/dist/agent/infer-agent/index.d.ts.map +1 -1
- package/dist/agent/infer-agent/index.js +4 -11
- package/dist/agent/master/element-annotation.d.ts.map +1 -1
- package/dist/agent/master/element-annotation.js +7 -53
- package/dist/agent/master/run.d.ts +2 -1
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +23 -8
- package/dist/agent/planner/run-time-planner.d.ts.map +1 -1
- package/dist/agent/planner/run-time-planner.js +2 -1
- package/dist/agent/planner/run.d.ts +2 -3
- package/dist/agent/planner/run.d.ts.map +1 -1
- package/dist/agent/planner/run.js +6 -15
- package/dist/bin/index.js +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/prompts/lib/index.d.ts +8 -0
- package/dist/prompts/lib/index.d.ts.map +1 -0
- package/dist/prompts/lib/index.js +118 -0
- package/dist/types/index.d.ts +5 -4
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +5 -4
- package/dist/agent/codegen/promptBuilder.d.ts +0 -3
- package/dist/agent/codegen/promptBuilder.d.ts.map +0 -1
- package/dist/agent/codegen/promptBuilder.js +0 -44
- package/dist/agent/verification/index.d.ts +0 -13
- package/dist/agent/verification/index.d.ts.map +0 -1
- package/dist/agent/verification/index.js +0 -84
- package/dist/evals/verification-agent.evals.d.ts +0 -4
- package/dist/evals/verification-agent.evals.d.ts.map +0 -1
- package/dist/evals/verification-agent.evals.js +0 -23
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.42.20
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 9488f81: fix: infer-agent prompt template
|
|
8
|
+
- b625749: chore: move infer-agent and planner prompts to handlebars
|
|
9
|
+
- 5fb977c: test: clean up some unreliable tests
|
|
10
|
+
- 8914542: feat: run planner inside master agent flow
|
|
11
|
+
|
|
12
|
+
## 0.42.19
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- c36efe4: chore: remove any type for get-next-action output
|
|
17
|
+
- ebb0bfa: feat: support images in handlebar prompts
|
|
18
|
+
- 63ed479: fix: remove verification and looping inside browsing agent
|
|
19
|
+
|
|
3
20
|
## 0.42.18
|
|
4
21
|
|
|
5
22
|
### Patch Changes
|
|
@@ -1,20 +1,18 @@
|
|
|
1
1
|
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { Page } from "playwright";
|
|
3
3
|
import { PlaywrightActions } from "../../actions";
|
|
4
|
-
import { CustomLogger } from "../../bin/logger";
|
|
5
4
|
import { TestGenConfigOptions } from "../../types";
|
|
6
5
|
export type BrowsingAgentOptions = Partial<TestGenConfigOptions> & {
|
|
7
6
|
htmlSanitize?: {
|
|
8
7
|
disallowedStrings?: string[];
|
|
9
8
|
};
|
|
10
9
|
};
|
|
11
|
-
export declare function executeTaskUsingBrowsingAgent({
|
|
10
|
+
export declare function executeTaskUsingBrowsingAgent({ action, page, actions, llm, options, trace, }: {
|
|
12
11
|
action: string;
|
|
13
|
-
trace?: TraceClient;
|
|
14
|
-
logger: CustomLogger;
|
|
15
12
|
page: Page;
|
|
16
|
-
options: BrowsingAgentOptions;
|
|
17
|
-
llm: LLM;
|
|
18
13
|
actions: PlaywrightActions;
|
|
14
|
+
llm: LLM;
|
|
15
|
+
trace?: TraceClient;
|
|
16
|
+
options: BrowsingAgentOptions;
|
|
19
17
|
}): Promise<string[] | undefined>;
|
|
20
18
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAKlD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAKnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,EACP,KAAK,GACN,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,iBAAiB,CAAC;IAC3B,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,CAiEhC"}
|
|
@@ -2,148 +2,70 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.executeTaskUsingBrowsingAgent = void 0;
|
|
4
4
|
const constants_1 = require("../../constants");
|
|
5
|
+
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a browser automation agent who is given a task to generate code for navigation and assertion. This task is your\ngoal and you must achieve it.\n\nYou will be provided with already executed actions and basis that you need to pick the next step to achieve the task.\nRemember that the goal must be achieved.\n\nYou will be provided with the web page snapshot in the form of Document Object Model. Based on the goal and available\ntool calls you need to pick the appropriate tool call.\n\nInstructions:\n- Take actions one at a time. Do not try to take multiple actions\n- You can respond with multiple assertions in one shot\n- Do not repeat the same actions again otherwise your response will be marked INVALID\n- Avoid repeating errors which we got while executing the last action\n- Stick to the task provided to you and mark the task done once the task is complete\n- Do not execute any action which is not mentioned in the task\n- Do not repeat actions which are already executed more than twice otherwise your response will be marked INVALID\n- Always refer to \"Executed actions\" before deciding your next action for completion of the task.\n- End the task done if all actions required for task are executed\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\nCurrent page snapshot:\n{{pageSnapshot}}\n{{/section}}";
|
|
6
|
+
const lib_1 = require("../../prompts/lib");
|
|
5
7
|
const reporter_1 = require("../../reporter");
|
|
6
|
-
const session_1 = require("../../session");
|
|
7
8
|
const html_1 = require("../../utils/html");
|
|
8
9
|
const utils_1 = require("../utils");
|
|
9
|
-
const verification_1 = require("../verification");
|
|
10
10
|
const o1_completion_1 = require("./o1-completion");
|
|
11
|
-
|
|
12
|
-
async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }) {
|
|
13
|
-
let isTaskDone = false;
|
|
14
|
-
const executedActions = [];
|
|
15
|
-
let lastActionExecTrace = "";
|
|
11
|
+
async function executeTaskUsingBrowsingAgent({ action, page, actions, llm, options, trace, }) {
|
|
16
12
|
let generatedCodeSteps = [];
|
|
17
13
|
const tools = actions.getBrowsingActionSchemas();
|
|
18
14
|
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
task: action,
|
|
48
|
-
conversation: ["Successfully executed actions", ...successfulActions],
|
|
49
|
-
});
|
|
50
|
-
isTaskDone = verificationAgentResp.isDone;
|
|
51
|
-
logger.log(`isTaskDone: ${isTaskDone}`);
|
|
52
|
-
logger.log(`reason: ${verificationAgentResp.reason}`);
|
|
53
|
-
if (isTaskDone) {
|
|
54
|
-
browsingAgentSpan?.event({ name: "task-done" });
|
|
55
|
-
browsingAgentSpan?.end({
|
|
56
|
-
output: {
|
|
57
|
-
taskDone: true,
|
|
58
|
-
reason: verificationAgentResp.reason,
|
|
59
|
-
code: generatedCodeSteps,
|
|
60
|
-
},
|
|
61
|
-
});
|
|
62
|
-
break;
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
const messages = await (0, utils_2.getPromptForNextAction)({
|
|
66
|
-
pageSnapshot,
|
|
67
|
-
previousActions: successfulActions,
|
|
68
|
-
task: action,
|
|
69
|
-
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
70
|
-
promptType: "browsing-agent-as-tool",
|
|
71
|
-
});
|
|
72
|
-
promptSpan?.end({ output: { messages } });
|
|
73
|
-
let completion;
|
|
74
|
-
completion = await (0, o1_completion_1.getO1Completion)({
|
|
75
|
-
//@ts-ignore
|
|
15
|
+
const browsingAgentSpan = trace?.span({
|
|
16
|
+
name: `browsing-agent`,
|
|
17
|
+
input: {
|
|
18
|
+
action,
|
|
19
|
+
},
|
|
20
|
+
});
|
|
21
|
+
const pageContentSpan = browsingAgentSpan?.span({
|
|
22
|
+
name: "page-content",
|
|
23
|
+
});
|
|
24
|
+
const pageContent = await page.content();
|
|
25
|
+
pageContentSpan?.end({ output: { pageContent } });
|
|
26
|
+
const sanitizationSpan = browsingAgentSpan?.span({
|
|
27
|
+
name: "page-sanitization",
|
|
28
|
+
});
|
|
29
|
+
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
30
|
+
sanitizationSpan?.end({ output: { pageSnapshot } });
|
|
31
|
+
const promptSpan = browsingAgentSpan?.span({ name: "page-prompt" });
|
|
32
|
+
const messages = await (0, lib_1.compilePrompt)(promptTemplate_0, { pageSnapshot, task: action });
|
|
33
|
+
promptSpan?.end({ output: { messages } });
|
|
34
|
+
let completion;
|
|
35
|
+
completion = await (0, o1_completion_1.getO1Completion)({
|
|
36
|
+
messages,
|
|
37
|
+
tools,
|
|
38
|
+
trace: browsingAgentSpan,
|
|
39
|
+
});
|
|
40
|
+
// If O1 completion fails due to any reason, resort to old flow
|
|
41
|
+
if (!completion) {
|
|
42
|
+
completion = await llm.createChatCompletion({
|
|
76
43
|
messages,
|
|
77
44
|
tools,
|
|
78
45
|
trace: browsingAgentSpan,
|
|
46
|
+
model: options.model || constants_1.DEFAULT_MODEL,
|
|
47
|
+
modelParameters: {
|
|
48
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
49
|
+
...options.modelParameters,
|
|
50
|
+
tool_choice: "required",
|
|
51
|
+
},
|
|
79
52
|
});
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
...options.modelParameters,
|
|
90
|
-
tool_choice: "required",
|
|
91
|
-
},
|
|
92
|
-
});
|
|
93
|
-
}
|
|
94
|
-
const toolCalls = completion?.tool_calls || [];
|
|
95
|
-
// LLM might respond with empty tool_calls and we can go into endless loop
|
|
96
|
-
// if we donot record this action and mark it as error
|
|
97
|
-
if (!toolCalls.length) {
|
|
98
|
-
executedActions.push({
|
|
99
|
-
isError: true,
|
|
100
|
-
action: "",
|
|
101
|
-
});
|
|
102
|
-
}
|
|
103
|
-
const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
|
|
104
|
-
for (const i in toolCalls) {
|
|
105
|
-
const toolCall = toolCalls[i];
|
|
106
|
-
if (await (0, session_1.shouldStopSession)()) {
|
|
107
|
-
break;
|
|
108
|
-
}
|
|
109
|
-
try {
|
|
110
|
-
const code = await actions.executeAction(toolCall.function.name, (0, utils_1.parseJson)(toolCall.function.arguments), toolCallsSpan);
|
|
111
|
-
if (code) {
|
|
112
|
-
generatedCodeSteps.push(code);
|
|
113
|
-
}
|
|
114
|
-
executedActions.push({
|
|
115
|
-
isError: false,
|
|
116
|
-
action: (0, utils_1.parseJson)(toolCall.function.arguments)?.reason,
|
|
117
|
-
});
|
|
118
|
-
lastActionExecTrace = "";
|
|
119
|
-
}
|
|
120
|
-
catch (e) {
|
|
121
|
-
// TODO: implement feedback loop to llm
|
|
122
|
-
executedActions.push({
|
|
123
|
-
isError: true,
|
|
124
|
-
action: (0, utils_1.parseJson)(toolCall.function.arguments)?.reason,
|
|
125
|
-
});
|
|
126
|
-
lastActionExecTrace = e.message;
|
|
127
|
-
void testgenUpdatesReporter.sendMessage(e.message);
|
|
128
|
-
logger.error(lastActionExecTrace, e);
|
|
53
|
+
}
|
|
54
|
+
const toolCalls = completion?.tool_calls || [];
|
|
55
|
+
const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
|
|
56
|
+
for (const i in toolCalls) {
|
|
57
|
+
const toolCall = toolCalls[i];
|
|
58
|
+
try {
|
|
59
|
+
const code = await actions.executeAction(toolCall.function.name, (0, utils_1.parseJson)(toolCall.function.arguments), toolCallsSpan);
|
|
60
|
+
if (code) {
|
|
61
|
+
generatedCodeSteps.push(code);
|
|
129
62
|
}
|
|
130
63
|
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if (executedActions.length >= 3) {
|
|
134
|
-
const lastThreeActions = executedActions.slice(-3);
|
|
135
|
-
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
136
|
-
// get last 3 lines of code
|
|
137
|
-
const isStuckInLoop = actions.isStuckInLoop();
|
|
138
|
-
if (lastThreeActionsFailed || isStuckInLoop) {
|
|
139
|
-
// TODO: this should be sent to dashboard
|
|
140
|
-
const error = "Agent is not able to figure out next browser action, ending retries";
|
|
141
|
-
logger.error(error);
|
|
142
|
-
await testgenUpdatesReporter.sendMessage(error);
|
|
143
|
-
throw Error(error);
|
|
144
|
-
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
void testgenUpdatesReporter.sendMessage(e.message);
|
|
145
66
|
}
|
|
146
67
|
}
|
|
68
|
+
toolCallsSpan?.end({ output: { toolCalls } });
|
|
147
69
|
return generatedCodeSteps;
|
|
148
70
|
}
|
|
149
71
|
exports.executeTaskUsingBrowsingAgent = executeTaskUsingBrowsingAgent;
|
|
@@ -29,13 +29,6 @@ export declare function readPlaywrightConfig(): Promise<PlaywrightTestConfig>;
|
|
|
29
29
|
* @returns
|
|
30
30
|
*/
|
|
31
31
|
export declare function detectProjectName(testFilePath: string, playwrightConfig: PlaywrightTestConfig, pwProjectsFilter?: string[]): Promise<string>;
|
|
32
|
-
export declare function getPromptForNextAction({ pageSnapshot, task, previousActions, lastActionErrors, promptType, }: {
|
|
33
|
-
pageSnapshot: string;
|
|
34
|
-
task: string;
|
|
35
|
-
previousActions: string[];
|
|
36
|
-
lastActionErrors: string[];
|
|
37
|
-
promptType?: string;
|
|
38
|
-
}): Promise<import("openai/resources/index.mjs").ChatCompletionMessageParam[]>;
|
|
39
32
|
export declare class TeardownManager {
|
|
40
33
|
private directory;
|
|
41
34
|
constructor(directory: string);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIhD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA8FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QAIjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
|
|
@@ -3,8 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.TeardownManager = exports.
|
|
7
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
6
|
+
exports.TeardownManager = exports.detectProjectName = exports.readPlaywrightConfig = exports.canRunMasterAgent = exports.injectPwLocatorGenerator = exports.prepareFileForMasterAgent = exports.prepareBrowsingAgentTask = exports.isRegExp = void 0;
|
|
8
7
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
9
8
|
const minimatch_1 = require("minimatch");
|
|
10
9
|
const path_1 = __importDefault(require("path"));
|
|
@@ -348,17 +347,6 @@ async function detectProjectName(testFilePath, playwrightConfig, pwProjectsFilte
|
|
|
348
347
|
return filteredProjectNames[0];
|
|
349
348
|
}
|
|
350
349
|
exports.detectProjectName = detectProjectName;
|
|
351
|
-
async function getPromptForNextAction({ pageSnapshot = "", task = "", previousActions = [], lastActionErrors = [], promptType = "browsing-agent-next-action", }) {
|
|
352
|
-
const previousActionsStr = previousActions.join("\n\n ---- \n\n");
|
|
353
|
-
const prompt = await (0, llm_1.getPrompt)(promptType, {
|
|
354
|
-
pageSnapshot,
|
|
355
|
-
previousActionsStr,
|
|
356
|
-
task,
|
|
357
|
-
lastActionErrors,
|
|
358
|
-
});
|
|
359
|
-
return prompt;
|
|
360
|
-
}
|
|
361
|
-
exports.getPromptForNextAction = getPromptForNextAction;
|
|
362
350
|
class TeardownManager {
|
|
363
351
|
directory;
|
|
364
352
|
constructor(directory) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"create-test-block.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/create-test-block.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"create-test-block.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/create-test-block.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAcvE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,+BAqDA"}
|
|
@@ -7,8 +7,8 @@ const context_1 = require("../../bin/utils/context");
|
|
|
7
7
|
const web_1 = require("../../bin/utils/platform/web");
|
|
8
8
|
const constants_1 = require("../../constants");
|
|
9
9
|
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software test engineer who is given a task to write an empty test block.\nBased on the inputs you need to create an empty playwright test block with correctly imported fixture.\n\nThe test will contain a test name which you will need to use to build the empty test case block.\n\nYou will be provided with current tests, fixtures and page object models for you to use and create test case block as\nper the task provided to you.\n\nBefore responding you need to ensure that the code change is minimal and the change is reusable across tests. You need\nto ensure the code follows DRY principle.\n\nHere is the list of current tests and fixtures:\n\n{{testFiles}}\n\nHere is the list of current page object models:\n\n{{pageFiles}}\n{{/section}}\n\n{{#section \"user\"}}\nFollowing is the test scenario for which you need to write the empty test case block:\ntest name:\n{{scenarioName}}\n\ntask:\ncreate an empty test case block for the following test steps:\n{{scenario}}\n\ntest file path: {{scenarioFile}}\n\n------\n\nYou also need to ensure that the empty test case block has a starting page to begin test.\n\nIn order to identify the right page with which the test should start, follow the steps:\n- based on the similarities with other test cases mentioned in the file, identify the right page fixture to be imported\n- Read the page fixture methods step by step. Identify whether the fixture handles navigating to a page.\n- Identify whether other tests using the page fixture had to add separate steps for navigation or not\n- Based on the above analysis there will be following cases and choose either for the given test scenario:\n-- Case 1: if the test case scenario provided inside the task mentions about page navigation, then use that page\nnavigation. skip other cases if this case is satisfied.\n-- Case 2: refer other test cases which import similar fixtures and infer the first page navigation of this test case.\nYou should prefer tests which are in the same file. Tests within same file have higher overlaps in first page\nnavigation.\n- Once the page fixture is decided, look for userContext fixture in files. If its available then add \"userContext\" to\nthe test case block\n\n\n\nFollow these instructions before responding with output:\n- Read the code line by line and achieve the task provided to you\n- Read the dependencies of the code block by scanning through file paths and file provided to you. refer the same file\npath while responding with update\n- Focus only on the test case provided and associated JS methods called from the test case.\n- Respond only with the new empty test case block to be created and nothing else.\n- DO NOT respond with any backticks or markdown syntax\n- If \"userContext\" fixture is available in fixtures file, ensure importing that fixture in the test case block.\n- Provide a reason based on the test steps provided to you on why you chose the fixture or page.goto statement. The\nreason should be one of the list steps provided to you and mention why the case was chosen\n{{/section}}";
|
|
10
|
+
const lib_1 = require("../../prompts/lib");
|
|
10
11
|
const session_1 = require("../../session");
|
|
11
|
-
const promptBuilder_1 = require("./promptBuilder");
|
|
12
12
|
async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
|
|
13
13
|
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
14
14
|
logger.log("Creating new test block");
|
|
@@ -29,7 +29,7 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
|
|
|
29
29
|
const promptSpan = trace?.span({
|
|
30
30
|
name: "build-create-empty-test-case-prompt",
|
|
31
31
|
});
|
|
32
|
-
const prompt = await (0,
|
|
32
|
+
const prompt = await (0, lib_1.compilePrompt)(promptTemplate_0, {
|
|
33
33
|
testFiles: context.codePrompt,
|
|
34
34
|
pageFiles: context.pomPrompt,
|
|
35
35
|
scenarioName: testCase.name,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"lexical-scoped-vars.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/lexical-scoped-vars.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"lexical-scoped-vars.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/lexical-scoped-vars.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AASrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,wBAAsB,oBAAoB,CAAC,EACzC,KAAK,EACL,IAAI,EACJ,cAAc,EACd,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,qBAoDA"}
|
|
@@ -4,12 +4,12 @@ exports.getLexicalScopedVars = void 0;
|
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const constants_1 = require("../../constants");
|
|
6
6
|
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software engineer tasked with analysing Typescript code to identify all variables available in the lexical\nscope at a specific reference point within a file. You will be given a file that contains multiple Playwright tests or\npage object models, along with a reference point inside the file. Your goal is to evaluate the list of all variables\navailable in the lexical scope at that reference point.\n\nTo accomplish this, you need to evaluate the Abstract Syntax Tree (AST) and accumulate all variables that are in the\nlexical scope, which includes:\n1. Variables declared within the test before the reference point.\n2. Arguments of the function.\n3. Variables defined in the parent scope. Identify all variables available in the lexical scope at a specific execution\nreference point within a file, considering only those variables that have been declared and assigned prior to the\nexecution of this point in the code.\n4. Global variables defined in the file.\n\nBefore responding:\n- Ignore variables imported from the `\"./pages\"` path.\n- keep in mind temporal dead zone phenomenon before responding with variables\n{{/section}}\n\n{{#section \"user\"}}\nFile:\n{{testFile}}\n\nReference point:\n{{referencePoint}}\n{{/section}}";
|
|
7
|
-
const
|
|
7
|
+
const lib_1 = require("../../prompts/lib");
|
|
8
8
|
async function getLexicalScopedVars({ trace, file, referencePoint, options, }) {
|
|
9
9
|
const fetchLexicalScopedVarsSpan = trace?.span({
|
|
10
10
|
name: "lexical-scoped-vars",
|
|
11
11
|
});
|
|
12
|
-
const messages = await (0,
|
|
12
|
+
const messages = await (0, lib_1.compilePrompt)(promptTemplate_0, {
|
|
13
13
|
testFile: file || "",
|
|
14
14
|
referencePoint: referencePoint || "",
|
|
15
15
|
});
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { Agent
|
|
2
|
+
import { Agent } from "@empiricalrun/shared-types";
|
|
3
|
+
import { TestGenConfigOptions } from "../../types";
|
|
3
4
|
export declare function inferAgentBasedTask({ task, options, trace, }: {
|
|
4
5
|
task: string;
|
|
5
6
|
options?: TestGenConfigOptions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,KAAK,EAAE,MAAM,4BAA4B,CAAC;AAWnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAGnD,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,CAAA;CAAE,CAAC,CAmE/B"}
|
|
@@ -3,6 +3,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.inferAgentBasedTask = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const constants_1 = require("../../constants");
|
|
6
|
+
const promptTemplate_0 = "{{#section \"system\"}}\nYou are a software test engineer specializing in Playwright end-to-end tests. You are given a task which is a part of an\nend-to-end test scenario. The task may involve updating an existing end-to-end test case or writing a new test case from\nscratch. Tests involve user interactions (e.g. click on element) or other actions supported by Playwright (e.g.\nintercept network requests)\n\nYour objective is to identify whether the task requires accessing a web browser or not.\n\nTo fulfill your objective, answer the following questions:\n\n1. Does it require you to interact with a UI element in the browser? Examples of interactions are click, fill, type, key\npress, assert visibility of the element. Actions that interact with network requests are not UI element interactions.\n\n2. Is the locator of this UI element given to you in the task? Locators look like `getByText(...)`, `getByTestId(...)`\nand other locator methods in Playwright\n\n3. Decide if you need a browser: if you need to interact with a UI element AND you are NOT given the locator for that\nelement, you WILL NEED a browser.\n\n4. If you NEED a browser, then respond with answer as \"master\", otherwise respond with \"code\"\n\n\n# Example 1\n## Input\nTask:\nin this test don't delete the agent and remove steps after that\n\n## Output\n- ui_interaction_to_be_performed: There is no interaction here\n- ui_element_to_interact_with: No element specified\n- has_locator_for_that_element: No element specified\n- reasoning_for_browser_required: No interaction hence browser is not required\n- answer: code\n\n# Example 2\n## Input\nTask:\nin the swapfast test, replace the selectTokenForSwap method. Instead we will do this\\nclick on token button - this will\nshow a drawer\\nEnter usd in the search field that shows up in the drawer\\nSelect USDC.axl for Cosmos Hub - very\nimportant to choose this instead of USDC.axl on Osmosis\n\n## Output\n- ui_interaction_to_be_performed: Click on token button\n- ui_element_to_interact_with: Token button\n- has_locator_for_that_element: false\n- reasoning_for_browser_required: Task requires interacting with a UI element and identifying its locator which needs a\nbrowser\n- answer: master\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n{{/section}}";
|
|
7
|
+
const lib_1 = require("../../prompts/lib");
|
|
6
8
|
const session_1 = require("../../session");
|
|
7
9
|
const session = (0, session_1.getSessionDetails)();
|
|
8
10
|
async function inferAgentBasedTask({ task, options, trace, }) {
|
|
@@ -20,16 +22,7 @@ async function inferAgentBasedTask({ task, options, trace, }) {
|
|
|
20
22
|
options,
|
|
21
23
|
},
|
|
22
24
|
});
|
|
23
|
-
const
|
|
24
|
-
name: "infer-agent-prompt",
|
|
25
|
-
input: {
|
|
26
|
-
task,
|
|
27
|
-
},
|
|
28
|
-
});
|
|
29
|
-
const prompt = await (0, llm_1.getPrompt)("infer-agent", {
|
|
30
|
-
task,
|
|
31
|
-
});
|
|
32
|
-
promptSpan?.end({ output: { prompt } });
|
|
25
|
+
const messages = (0, lib_1.compilePrompt)(promptTemplate_0, { task });
|
|
33
26
|
const llm = new llm_1.LLM({
|
|
34
27
|
trace: inferAgentSpan,
|
|
35
28
|
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
@@ -37,7 +30,7 @@ async function inferAgentBasedTask({ task, options, trace, }) {
|
|
|
37
30
|
providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
38
31
|
});
|
|
39
32
|
const firstShotMessage = await llm.createChatCompletion({
|
|
40
|
-
messages
|
|
33
|
+
messages,
|
|
41
34
|
modelParameters: {
|
|
42
35
|
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
43
36
|
...options?.modelParameters,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AASlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA0DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA8C9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAqDD"}
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.getAnnotationKeys = exports.getElementAnnotation = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const vision_1 = require("@empiricalrun/llm/vision");
|
|
6
5
|
const constants_1 = require("../../constants");
|
|
6
|
+
const promptTemplate_0 = "{{#section \"system\"}}\nYou are an expert in describing the images and it's content. You need to provide the descriptions of annotated elements\npresent in the image.\n\nYou will be provided with an annotated screenshot where interact-able / clickable elements are annotated. The annotation\nis done by drawing a red box around the element and a small yellow box on it which contains unique element id.\n\nYou are given a Annotations which contains list of unique element id and description of the element separated by \":\".\n\nYou are also given the description of the element on which the action needs to be taken. The description includes\ninformation about how the element looks, it's position etc.\n\nYour task is to provide the annotation of the element on which the action needs to be performed based on the element\ndescription.\n\nFollow steps to fulfil your task:\n- Using the list of all element Ids provided to you, map all the element Ids on the annotated screen and describe each\nelement.\n- For describing each element Id\n-- iterate over each element Id in annotation list\n-- check if the description is already present for the element Id in the Annotation provided to you. If present skip\ndescribing it and use it as is.\n-- if the description is NA, then identify the element in the annotated screenshot and describe it using the image or\nicon enclosed in the element.\n- Respond with the mapped element Ids as \"enriched_annotations\"\n- Based on the description provided to you and the enriched annotations, first identify the element Id whose description\nmatches the task provided\n\nNote:\n- Ensure providing the description of all the elements in the list.\n- Don't update the description if its already present in the given annotations\n- Replace all the \"NA\" with description of the element. Its position, how does it look like etc.\n- There should be no \"NA\" present in any of the element description\n{{/section}}\n\n{{#section \"user\"}}\nElement description:\n{{elementDescription}}\n\nAnnotations:\n{{annotations}}\n\n{{image annotatedScreenshot}}\n{{/section}}";
|
|
7
|
+
const lib_1 = require("../../prompts/lib");
|
|
7
8
|
const utils_1 = require("../utils");
|
|
8
9
|
const annotationToolAction = {
|
|
9
10
|
name: "element_annotation",
|
|
@@ -66,58 +67,11 @@ async function getElementAnnotation({ elementDescription, annotations, annotated
|
|
|
66
67
|
preference,
|
|
67
68
|
},
|
|
68
69
|
});
|
|
69
|
-
const
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
You are given a Annotations which contains list of unique element id and description of the element separated by ":".
|
|
76
|
-
|
|
77
|
-
You are also given the description of the element on which the action needs to be taken. The description includes information about how the element looks, it's position etc.
|
|
78
|
-
|
|
79
|
-
Your task is to provide the annotation of the element on which the action needs to be performed based on the element description.
|
|
80
|
-
|
|
81
|
-
Follow steps to fulfil your task:
|
|
82
|
-
- Using the list of all element Ids provided to you, map all the element Ids on the annotated screen and describe each element.
|
|
83
|
-
- For describing each element Id
|
|
84
|
-
-- iterate over each element Id in annotation list
|
|
85
|
-
-- check if the description is already present for the element Id in the Annotation provided to you. If present skip describing it and use it as is.
|
|
86
|
-
-- if the description is NA, then identify the element in the annotated screenshot and describe it using the image or icon enclosed in the element.
|
|
87
|
-
- Respond with the mapped element Ids as "enriched_annotations"
|
|
88
|
-
- Based on the description provided to you and the enriched annotations, first identify the element Id whose description matches the task provided
|
|
89
|
-
|
|
90
|
-
Note:
|
|
91
|
-
- Ensure providing the description of all the elements in the list.
|
|
92
|
-
- Don't update the description if its already present in the given annotations
|
|
93
|
-
- Replace all the "NA" with description of the element. Its position, how does it look like etc.
|
|
94
|
-
- There should be no "NA" present in any of the element description
|
|
95
|
-
`,
|
|
96
|
-
};
|
|
97
|
-
const userMessage = {
|
|
98
|
-
role: "user",
|
|
99
|
-
content: [
|
|
100
|
-
{
|
|
101
|
-
type: "text",
|
|
102
|
-
text: `
|
|
103
|
-
Element description:
|
|
104
|
-
${elementDescription}
|
|
105
|
-
|
|
106
|
-
Annotations:
|
|
107
|
-
${annotations}`,
|
|
108
|
-
},
|
|
109
|
-
{
|
|
110
|
-
type: "image_url",
|
|
111
|
-
image_url: {
|
|
112
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedScreenshot),
|
|
113
|
-
},
|
|
114
|
-
},
|
|
115
|
-
],
|
|
116
|
-
};
|
|
117
|
-
const messages = [
|
|
118
|
-
systemMessage,
|
|
119
|
-
userMessage,
|
|
120
|
-
];
|
|
70
|
+
const messages = (0, lib_1.compilePrompt)(promptTemplate_0, {
|
|
71
|
+
elementDescription,
|
|
72
|
+
annotations,
|
|
73
|
+
annotatedScreenshot,
|
|
74
|
+
}, options);
|
|
121
75
|
llm =
|
|
122
76
|
llm ||
|
|
123
77
|
new llm_1.LLM({
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { Page } from "playwright";
|
|
2
2
|
import { ScopeVars, TestCase } from "../../types";
|
|
3
3
|
import { BrowsingAgentOptions } from "../browsing";
|
|
4
|
-
export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
|
|
4
|
+
export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
|
|
5
5
|
task: string;
|
|
6
6
|
page: Page;
|
|
7
7
|
testCase?: TestCase;
|
|
8
|
+
specPath?: string;
|
|
8
9
|
options: BrowsingAgentOptions;
|
|
9
10
|
scopeVars?: ScopeVars;
|
|
10
11
|
}): Promise<{
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA6BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA0WA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -13,6 +13,7 @@ const session_1 = require("../../session");
|
|
|
13
13
|
const browsing_1 = require("../browsing");
|
|
14
14
|
const utils_2 = require("../browsing/utils");
|
|
15
15
|
const skills_retriever_1 = require("../codegen/skills-retriever");
|
|
16
|
+
const run_1 = require("../planner/run");
|
|
16
17
|
const run_time_planner_1 = require("../planner/run-time-planner");
|
|
17
18
|
const utils_3 = require("../utils");
|
|
18
19
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
@@ -29,8 +30,9 @@ function getPageVariables(stateVariables) {
|
|
|
29
30
|
}, {});
|
|
30
31
|
return pages;
|
|
31
32
|
}
|
|
32
|
-
async function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }) {
|
|
33
|
+
async function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }) {
|
|
33
34
|
const useActionSpecificAnnotations = options?.useActionSpecificAnnotations || false;
|
|
35
|
+
const usePlannerInMaster = options?.usePlannerInMaster || false;
|
|
34
36
|
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
35
37
|
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
36
38
|
const session = (0, session_1.getSessionDetails)();
|
|
@@ -74,6 +76,19 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
74
76
|
});
|
|
75
77
|
}
|
|
76
78
|
skill_1.testCaseSkills.updateSkills(skills);
|
|
79
|
+
if (usePlannerInMaster && testCase && specPath) {
|
|
80
|
+
void testgenUpdatesReporter.sendMessage(`Planner is working on task: ${task}`);
|
|
81
|
+
logger.log(`Planner is working on task: ${task}`);
|
|
82
|
+
const plan = await (0, run_1.planTask)({
|
|
83
|
+
task,
|
|
84
|
+
specPath,
|
|
85
|
+
trace,
|
|
86
|
+
});
|
|
87
|
+
void testgenUpdatesReporter.sendMessage(`Here is the plan:\n${plan}`);
|
|
88
|
+
logger.log(`Here is the plan:\n${plan}`);
|
|
89
|
+
// Will assume this is the task hereon
|
|
90
|
+
task = plan;
|
|
91
|
+
}
|
|
77
92
|
const actions = new actions_1.PlaywrightActions(testGenPage, scopeVars);
|
|
78
93
|
await (0, utils_2.injectPwLocatorGenerator)(page);
|
|
79
94
|
trace?.update({ input: { task } });
|
|
@@ -150,11 +165,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
150
165
|
const masterAgentActionSpan = masterAgentSpan?.span({
|
|
151
166
|
name: "master-agent-execute-action",
|
|
152
167
|
});
|
|
168
|
+
output = {
|
|
169
|
+
action: args.action || args.skill,
|
|
170
|
+
reason: args.reason,
|
|
171
|
+
};
|
|
153
172
|
try {
|
|
154
|
-
output = {
|
|
155
|
-
action: args.action || args.skill,
|
|
156
|
-
reason: args.reason,
|
|
157
|
-
};
|
|
158
173
|
void testGenReporter.sendMessage(output.reason);
|
|
159
174
|
logger.log(`Next Action: ${output.action}`);
|
|
160
175
|
if (toolCall.actionType === skill_1.SKILL_USAGE) {
|
|
@@ -197,6 +212,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
197
212
|
options,
|
|
198
213
|
});
|
|
199
214
|
if (annotationKeys.length > 0) {
|
|
215
|
+
// TODO: this string has newline characters that makes it harder to read
|
|
200
216
|
const annotationMapString = annotationKeys
|
|
201
217
|
?.map((a) => `${a.elementID}: ${a.text}`)
|
|
202
218
|
.join("\n");
|
|
@@ -254,7 +270,6 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
254
270
|
generatedCodeSteps = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
255
271
|
trace: masterAgentActionSpan,
|
|
256
272
|
action: output.action,
|
|
257
|
-
logger,
|
|
258
273
|
page,
|
|
259
274
|
options,
|
|
260
275
|
llm,
|
|
@@ -307,8 +322,8 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
307
322
|
trace?.update({ input: { task }, output: { output } });
|
|
308
323
|
masterAgentSpan?.end({
|
|
309
324
|
output: {
|
|
310
|
-
action: output
|
|
311
|
-
reason: output
|
|
325
|
+
action: output?.action,
|
|
326
|
+
reason: output?.reason,
|
|
312
327
|
code: generatedCodeSteps,
|
|
313
328
|
},
|
|
314
329
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAKrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;
|
|
1
|
+
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAKrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAkHA"}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
testCase: TestCase;
|
|
2
|
+
export declare function planTask({ task, specPath, trace, }: {
|
|
3
|
+
task: string;
|
|
5
4
|
specPath: string;
|
|
6
5
|
trace?: TraceClient;
|
|
7
6
|
}): Promise<string>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AA6BrD,wBAAsB,QAAQ,CAAC,EAC7B,IAAI,EACJ,QAAQ,EACR,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,mBAyCA"}
|
|
@@ -3,6 +3,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.planTask = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const context_1 = require("../../bin/utils/context");
|
|
6
|
+
const lib_1 = require("../../prompts/lib");
|
|
7
|
+
const promptTemplate_0 = "{{#section \"system\"}}\nYou are an expert software engineer in test. You are given a task to provide a high level plan to create a test for a\ngiven scenario.\n\nYou will be provided with already added tests and page object object models which you can use to plan out how to write\nthe test.\n\nThe expected plan should be a list of bullet points and each bullet point is a step in the test.\nYou will be provided with app knowledge as well, which can help you groom the steps in the tests.\n{{/section}}\n\n{{#section \"user\"}}\nApp knowledge\n{{appKnowledge}}\n\n-----\n\nCurrent tests and page object model references\n\n{{fileContext}}\n\n------\n\nTask:\n\n{{task}}\n\n------\n\nFollow the steps to create a test plan:\n- create sub tasks\n- read the task step by step and create sub tasks from the given task\n- Ensure no new steps are added which are not mentioned in the task\n- enriched sub tasks\n- Read the app knowledge provided to you and enrich the verified sub tasks based on the provided information in app\nknowledge.\n- Fill in the missing information in the verified sub tasks based on the app knowledge.\n- based on the type of task, add sub tasks to the verified sub tasks based on the matching criteria\n- final plan:\n- Once all the subtasks are enriched, list all the sub tasks as bullet points\n- Each bullet point should be one of the following actions:\n- Open page, Click on, Fill in, Assert, hover on, press, extract textContent\n- Do not respond with points which do not start with above actions.\n\nFollow the steps before responding\n- The steps should only contain bullet points on list of steps for the test\n- Do not add any other assertion which is not mentioned in the task or app knowledge\n- Respond with <create_sub_tasks></create_sub_tasks>\n<enriched_sub_tasks></enriched_sub_tasks> and <final_plan></final_plan>\n- The final plan should not mention reference to the knowledge base used to generate it\n{{/section}}";
|
|
6
8
|
function extractTestPlan(input) {
|
|
7
9
|
const result = {
|
|
8
10
|
createSubTasks: "",
|
|
@@ -20,9 +22,7 @@ function extractTestPlan(input) {
|
|
|
20
22
|
}
|
|
21
23
|
return result;
|
|
22
24
|
}
|
|
23
|
-
async function planTask({
|
|
24
|
-
const task = testCase.steps.join("\n");
|
|
25
|
-
// TODO: fix this
|
|
25
|
+
async function planTask({ task, specPath, trace, }) {
|
|
26
26
|
let fileContext = "";
|
|
27
27
|
try {
|
|
28
28
|
const { pomPrompt, testFileContent } = await (0, context_1.contextForGeneration)(specPath);
|
|
@@ -37,24 +37,15 @@ ${pomPrompt}
|
|
|
37
37
|
const plannerSpan = trace?.span({
|
|
38
38
|
name: "planner",
|
|
39
39
|
input: {
|
|
40
|
-
|
|
40
|
+
task,
|
|
41
41
|
},
|
|
42
42
|
});
|
|
43
43
|
const appKnowledge = await (0, context_1.fetchAppKnowledge)();
|
|
44
|
-
const
|
|
45
|
-
name: "planner-prompt",
|
|
46
|
-
input: {
|
|
47
|
-
appKnowledge,
|
|
48
|
-
fileContext,
|
|
49
|
-
testCase,
|
|
50
|
-
},
|
|
51
|
-
});
|
|
52
|
-
const messages = await (0, llm_1.getPrompt)("planner", {
|
|
44
|
+
const messages = (0, lib_1.compilePrompt)(promptTemplate_0, {
|
|
53
45
|
appKnowledge,
|
|
54
46
|
fileContext,
|
|
55
47
|
task,
|
|
56
|
-
}
|
|
57
|
-
promptSpan?.end({ output: { messages } });
|
|
48
|
+
});
|
|
58
49
|
const llm = new llm_1.LLM({
|
|
59
50
|
provider: "openai",
|
|
60
51
|
//TODO: change to o1
|
package/dist/bin/index.js
CHANGED
|
@@ -102,8 +102,9 @@ async function runAgent(testGenConfig) {
|
|
|
102
102
|
testCase.steps = [requestedChangeResp.output];
|
|
103
103
|
}
|
|
104
104
|
if (agent === "plan") {
|
|
105
|
+
const task = testCase.steps.join("\n");
|
|
105
106
|
const plan = await (0, run_3.planTask)({
|
|
106
|
-
|
|
107
|
+
task,
|
|
107
108
|
specPath,
|
|
108
109
|
trace,
|
|
109
110
|
});
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAQpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAQpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAuC3E"}
|
package/dist/index.js
CHANGED
|
@@ -31,8 +31,10 @@ async function createTest(task, page, scope) {
|
|
|
31
31
|
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
32
32
|
});
|
|
33
33
|
const fileService = new client_1.default(Number(port));
|
|
34
|
+
const { testCase, specPath } = testGenConfig;
|
|
34
35
|
const { code, importPaths } = await (0, run_1.createTestUsingMasterAgent)({
|
|
35
|
-
testCase
|
|
36
|
+
testCase,
|
|
37
|
+
specPath,
|
|
36
38
|
page,
|
|
37
39
|
task,
|
|
38
40
|
options: {
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { LLMProvider } from "@empiricalrun/llm";
|
|
2
|
+
import OpenAI from "openai";
|
|
3
|
+
type PromptOptions = {
|
|
4
|
+
modelProvider?: LLMProvider;
|
|
5
|
+
};
|
|
6
|
+
export declare function compilePrompt<T extends object>(promptTemplate: string, params: T, options?: PromptOptions): OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
7
|
+
export {};
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/prompts/lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAyF5B,KAAK,aAAa,GAAG;IACnB,aAAa,CAAC,EAAE,WAAW,CAAC;CAC7B,CAAC;AAEF,wBAAgB,aAAa,CAAC,CAAC,SAAS,MAAM,EAC5C,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,CAAC,EACT,OAAO,CAAC,EAAE,aAAa,GACtB,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAwCtD"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.compilePrompt = void 0;
|
|
7
|
+
const vision_1 = require("@empiricalrun/llm/vision");
|
|
8
|
+
const handlebars_1 = __importDefault(require("handlebars"));
|
|
9
|
+
const constants_1 = require("../../constants");
|
|
10
|
+
class SectionManager {
|
|
11
|
+
sections = {};
|
|
12
|
+
getSection(name) {
|
|
13
|
+
return this.sections[name] || "";
|
|
14
|
+
}
|
|
15
|
+
setSection(name, content) {
|
|
16
|
+
this.sections[name] = content;
|
|
17
|
+
}
|
|
18
|
+
getAllSections() {
|
|
19
|
+
return this.sections;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const IMAGE_TOKEN_PREFIX = "[[[HANDLEBARS_IMAGE:";
|
|
23
|
+
const IMAGE_TOKEN_SUFFIX = "]]]";
|
|
24
|
+
function createHandlebarsEnv() {
|
|
25
|
+
const HandlebarsEnv = handlebars_1.default.create();
|
|
26
|
+
const sectionManager = new SectionManager();
|
|
27
|
+
HandlebarsEnv.registerHelper("section", function (name, options) {
|
|
28
|
+
const content = options.fn(this);
|
|
29
|
+
sectionManager.setSection(name, content);
|
|
30
|
+
return ""; // don't output anything in place
|
|
31
|
+
});
|
|
32
|
+
HandlebarsEnv.registerHelper("image", function (imageParam) {
|
|
33
|
+
const tokenPayload = JSON.stringify({ url: imageParam });
|
|
34
|
+
// Use encodeURIComponent to avoid conflicts with special characters.
|
|
35
|
+
const token = `${IMAGE_TOKEN_PREFIX}${encodeURIComponent(tokenPayload)}${IMAGE_TOKEN_SUFFIX}`;
|
|
36
|
+
return token;
|
|
37
|
+
});
|
|
38
|
+
HandlebarsEnv.registerHelper("images", function (imagesParam) {
|
|
39
|
+
if (!Array.isArray(imagesParam))
|
|
40
|
+
return "";
|
|
41
|
+
return imagesParam
|
|
42
|
+
.map((url) => {
|
|
43
|
+
const tokenPayload = JSON.stringify({ url });
|
|
44
|
+
return `${IMAGE_TOKEN_PREFIX}${encodeURIComponent(tokenPayload)}${IMAGE_TOKEN_SUFFIX}`;
|
|
45
|
+
})
|
|
46
|
+
.join("");
|
|
47
|
+
});
|
|
48
|
+
return { HandlebarsEnv, sectionManager };
|
|
49
|
+
}
|
|
50
|
+
function processSectionContent(content) {
|
|
51
|
+
if (!content.includes(IMAGE_TOKEN_PREFIX)) {
|
|
52
|
+
return content.trim();
|
|
53
|
+
}
|
|
54
|
+
const segments = [];
|
|
55
|
+
const regex = /\[\[\[HANDLEBARS_IMAGE:(.*?)\]\]\]/g;
|
|
56
|
+
let lastIndex = 0;
|
|
57
|
+
let match;
|
|
58
|
+
while ((match = regex.exec(content)) !== null) {
|
|
59
|
+
// Get the text before the token.
|
|
60
|
+
const textPart = content.slice(lastIndex, match.index).trim();
|
|
61
|
+
if (textPart) {
|
|
62
|
+
segments.push({ type: "text", text: textPart });
|
|
63
|
+
}
|
|
64
|
+
// Decode the token payload.
|
|
65
|
+
try {
|
|
66
|
+
const payloadJson = decodeURIComponent(match[1]);
|
|
67
|
+
const payload = JSON.parse(payloadJson);
|
|
68
|
+
segments.push({ type: "image_url", image_url: { url: payload.url } });
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
// If decoding/parsing fails, treat the token as literal text.
|
|
72
|
+
segments.push({ type: "text", text: match[0] });
|
|
73
|
+
}
|
|
74
|
+
lastIndex = match.index + match[0].length;
|
|
75
|
+
}
|
|
76
|
+
const remaining = content.slice(lastIndex).trim();
|
|
77
|
+
if (remaining) {
|
|
78
|
+
segments.push({ type: "text", text: remaining });
|
|
79
|
+
}
|
|
80
|
+
return segments;
|
|
81
|
+
}
|
|
82
|
+
function compilePrompt(promptTemplate, params, options) {
|
|
83
|
+
const { HandlebarsEnv, sectionManager } = createHandlebarsEnv();
|
|
84
|
+
const template = HandlebarsEnv.compile(promptTemplate, { noEscape: true });
|
|
85
|
+
template(params);
|
|
86
|
+
const sections = sectionManager.getAllSections();
|
|
87
|
+
// TODO: system cannot have images, we can add validation for that
|
|
88
|
+
const system = sections["system"];
|
|
89
|
+
const user = sections["user"];
|
|
90
|
+
if (!system || !user) {
|
|
91
|
+
// TODO: support templates that have only one section
|
|
92
|
+
throw new Error("Both system and user sections must be defined in the template");
|
|
93
|
+
}
|
|
94
|
+
const systemContent = processSectionContent(system);
|
|
95
|
+
const userContent = processSectionContent(user);
|
|
96
|
+
let userContentCorrectedForImageFormat = userContent;
|
|
97
|
+
if (Array.isArray(userContent)) {
|
|
98
|
+
const provider = options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER;
|
|
99
|
+
userContentCorrectedForImageFormat = userContent.map((c) => {
|
|
100
|
+
if (c.type === "image_url") {
|
|
101
|
+
return {
|
|
102
|
+
...c,
|
|
103
|
+
image_url: {
|
|
104
|
+
url: (0, vision_1.imageFormatForProvider)(provider, c.image_url.url),
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
return c;
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
return [
|
|
114
|
+
{ role: "system", content: systemContent },
|
|
115
|
+
{ role: "user", content: userContentCorrectedForImageFormat },
|
|
116
|
+
];
|
|
117
|
+
}
|
|
118
|
+
exports.compilePrompt = compilePrompt;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ModelParameters, TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { Agent, LLMModel, LLMProvider } from "@empiricalrun/shared-types";
|
|
2
3
|
import OpenAI from "openai";
|
|
3
4
|
import { TestGenPage } from "../page";
|
|
4
5
|
export type FileContent = {
|
|
@@ -10,14 +11,11 @@ export type Environment = {
|
|
|
10
11
|
slug: string;
|
|
11
12
|
playwrightProjects: string[];
|
|
12
13
|
};
|
|
13
|
-
export type Agent = "code" | "master" | "auto" | "plan";
|
|
14
14
|
export type TestGenConfigOptions = {
|
|
15
15
|
agent: Agent;
|
|
16
16
|
model: LLMModel;
|
|
17
17
|
modelProvider: LLMProvider;
|
|
18
18
|
modelParameters?: ModelParameters;
|
|
19
|
-
useActionSpecificAnnotations?: boolean;
|
|
20
|
-
useStrReplace?: boolean;
|
|
21
19
|
metadata: {
|
|
22
20
|
testSessionId: number;
|
|
23
21
|
generationId: number;
|
|
@@ -26,6 +24,9 @@ export type TestGenConfigOptions = {
|
|
|
26
24
|
projectName: string;
|
|
27
25
|
environment: "development" | "production";
|
|
28
26
|
};
|
|
27
|
+
useActionSpecificAnnotations?: boolean;
|
|
28
|
+
useStrReplace?: boolean;
|
|
29
|
+
usePlannerInMaster?: boolean;
|
|
29
30
|
};
|
|
30
31
|
export type Build = {
|
|
31
32
|
url?: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB,EAAE,MAAM,EAAE,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,EAAE,QAAQ,CAAC;IAChB,aAAa,EAAE,WAAW,CAAC;IAC3B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,QAAQ,EAAE;QACR,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,aAAa,GAAG,YAAY,CAAC;KAC3C,CAAC;IAEF,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG;IAClB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,kBAAkB,CAAC,EAAE,yBAAyB,CAAC;IAC/C,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG,CACtC,IAAI,EAAE,WAAW,EACjB,OAAO,EAAE;IACP,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACpC,iBAAiB,EAAE,CAAC,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,IAAI,CAAC;CAClE,KACE,MAAM,CAAC;AAEZ,MAAM,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEtE,MAAM,MAAM,MAAM,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,CAAC,OAAO,EAAE;QACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,WAAW,CAAC;KACrB,KAAK,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC,CAAC;IACjE,QAAQ,EAAE,CACR,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACzB,OAAO,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,KAChD;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.42.
|
|
3
|
+
"version": "0.42.20",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -73,8 +73,8 @@
|
|
|
73
73
|
"tsx": "^4.16.2",
|
|
74
74
|
"typescript": "^5.3.3",
|
|
75
75
|
"@empiricalrun/llm": "^0.9.32",
|
|
76
|
-
"@empiricalrun/
|
|
77
|
-
"@empiricalrun/
|
|
76
|
+
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
77
|
+
"@empiricalrun/reporter": "^0.23.1"
|
|
78
78
|
},
|
|
79
79
|
"devDependencies": {
|
|
80
80
|
"@playwright/test": "1.47.1",
|
|
@@ -86,7 +86,8 @@
|
|
|
86
86
|
"@types/md5": "^2.3.5",
|
|
87
87
|
"js-levenshtein": "^1.1.6",
|
|
88
88
|
"playwright": "1.47.1",
|
|
89
|
-
"ts-patch": "^3.3.0"
|
|
89
|
+
"ts-patch": "^3.3.0",
|
|
90
|
+
"@empiricalrun/shared-types": "0.0.0"
|
|
90
91
|
},
|
|
91
92
|
"scripts": {
|
|
92
93
|
"dev": "tspc --build --watch",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"promptBuilder.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/promptBuilder.ts"],"names":[],"mappings":"AACA,OAAO,MAAM,MAAM,QAAQ,CAAC;AA6B5B,wBAAsB,aAAa,CAAC,CAAC,SAAS,MAAM,EAClD,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,CAAC,GACR,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC,CAe/D"}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.compilePrompt = void 0;
|
|
7
|
-
const handlebars_1 = __importDefault(require("handlebars"));
|
|
8
|
-
class SectionManager {
|
|
9
|
-
sections = {};
|
|
10
|
-
getSection(name) {
|
|
11
|
-
return this.sections[name] || "";
|
|
12
|
-
}
|
|
13
|
-
setSection(name, content) {
|
|
14
|
-
this.sections[name] = content;
|
|
15
|
-
}
|
|
16
|
-
getAllSections() {
|
|
17
|
-
return this.sections;
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
function createHandlebarsEnv() {
|
|
21
|
-
const HandlebarsEnv = handlebars_1.default.create();
|
|
22
|
-
const sectionManager = new SectionManager();
|
|
23
|
-
HandlebarsEnv.registerHelper("section", function (name, options) {
|
|
24
|
-
const content = options.fn(this);
|
|
25
|
-
sectionManager.setSection(name, content);
|
|
26
|
-
return ""; // Don't output anything in place
|
|
27
|
-
});
|
|
28
|
-
return { HandlebarsEnv, sectionManager };
|
|
29
|
-
}
|
|
30
|
-
async function compilePrompt(promptTemplate, params) {
|
|
31
|
-
const { HandlebarsEnv, sectionManager } = createHandlebarsEnv();
|
|
32
|
-
const template = HandlebarsEnv.compile(promptTemplate, { noEscape: true });
|
|
33
|
-
template(params);
|
|
34
|
-
const { system, user } = sectionManager.getAllSections();
|
|
35
|
-
if (!system || !user) {
|
|
36
|
-
// TODO: support templates that have only one section
|
|
37
|
-
throw new Error("Both system and user sections must be defined in the template");
|
|
38
|
-
}
|
|
39
|
-
return [
|
|
40
|
-
{ role: "system", content: system },
|
|
41
|
-
{ role: "user", content: user },
|
|
42
|
-
];
|
|
43
|
-
}
|
|
44
|
-
exports.compilePrompt = compilePrompt;
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
/**
|
|
3
|
-
* This agent is used to verify whether the task is done basis the conversation history
|
|
4
|
-
*/
|
|
5
|
-
export declare function verificationAgent({ trace, task, conversation, }: {
|
|
6
|
-
trace?: TraceClient;
|
|
7
|
-
conversation: string[];
|
|
8
|
-
task: string;
|
|
9
|
-
}): Promise<{
|
|
10
|
-
isDone: boolean;
|
|
11
|
-
reason: string;
|
|
12
|
-
}>;
|
|
13
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIhE;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,EACtC,KAAK,EACL,IAAI,EACJ,YAAY,GACb,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;;;GA+EA"}
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.verificationAgent = void 0;
|
|
4
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const utils_1 = require("../utils");
|
|
6
|
-
/**
|
|
7
|
-
* This agent is used to verify whether the task is done basis the conversation history
|
|
8
|
-
*/
|
|
9
|
-
async function verificationAgent({ trace, task, conversation, }) {
|
|
10
|
-
const verificationAgentSpan = trace?.span({
|
|
11
|
-
name: "verification-agent",
|
|
12
|
-
input: {
|
|
13
|
-
task,
|
|
14
|
-
conversation,
|
|
15
|
-
},
|
|
16
|
-
});
|
|
17
|
-
const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
|
|
18
|
-
task,
|
|
19
|
-
conversation: conversation.join("\n"),
|
|
20
|
-
}, 5);
|
|
21
|
-
const llm = new llm_1.LLM({ provider: "openai" });
|
|
22
|
-
const response = await llm.createChatCompletion({
|
|
23
|
-
trace: verificationAgentSpan,
|
|
24
|
-
traceName: "verification-agent-llm",
|
|
25
|
-
model: "gpt-4o",
|
|
26
|
-
messages,
|
|
27
|
-
tools: [
|
|
28
|
-
{
|
|
29
|
-
type: "function",
|
|
30
|
-
function: {
|
|
31
|
-
name: "task_done",
|
|
32
|
-
description: "end the task by calling this method",
|
|
33
|
-
parameters: {
|
|
34
|
-
type: "object",
|
|
35
|
-
properties: {
|
|
36
|
-
actions: {
|
|
37
|
-
type: "string",
|
|
38
|
-
description: "actions extracted from task",
|
|
39
|
-
},
|
|
40
|
-
successful_actions: {
|
|
41
|
-
type: "string",
|
|
42
|
-
description: "successful actions mentioned in the conversation",
|
|
43
|
-
},
|
|
44
|
-
reason: {
|
|
45
|
-
type: "string",
|
|
46
|
-
description: "reasoning for identification of task status",
|
|
47
|
-
},
|
|
48
|
-
isDone: {
|
|
49
|
-
type: "boolean",
|
|
50
|
-
description: "whether the task is done",
|
|
51
|
-
},
|
|
52
|
-
},
|
|
53
|
-
required: ["isDone", "reason"],
|
|
54
|
-
},
|
|
55
|
-
},
|
|
56
|
-
},
|
|
57
|
-
],
|
|
58
|
-
modelParameters: {
|
|
59
|
-
tool_choice: "required",
|
|
60
|
-
temperature: 0.5,
|
|
61
|
-
},
|
|
62
|
-
});
|
|
63
|
-
const toolCallResp = (response?.tool_calls || [])[0];
|
|
64
|
-
if (toolCallResp) {
|
|
65
|
-
const toolCall = (0, utils_1.parseJson)(toolCallResp.function.arguments);
|
|
66
|
-
const output = {
|
|
67
|
-
isDone: toolCall.isDone,
|
|
68
|
-
reason: toolCall.reason,
|
|
69
|
-
};
|
|
70
|
-
verificationAgentSpan?.end({
|
|
71
|
-
output,
|
|
72
|
-
});
|
|
73
|
-
return output;
|
|
74
|
-
}
|
|
75
|
-
const output = {
|
|
76
|
-
isDone: false,
|
|
77
|
-
reason: "LLM failed to generate a valid response",
|
|
78
|
-
};
|
|
79
|
-
verificationAgentSpan?.end({
|
|
80
|
-
output,
|
|
81
|
-
});
|
|
82
|
-
return output;
|
|
83
|
-
}
|
|
84
|
-
exports.verificationAgent = verificationAgent;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"verification-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/verification-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,qBAAqB,EAAE,UAgBnC,CAAC;AAEF,eAAe,qBAAqB,CAAC"}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.verifierAgentEvaluate = void 0;
|
|
4
|
-
const verification_1 = require("../agent/verification");
|
|
5
|
-
const verifierAgentEvaluate = async ({ item, trace }) => {
|
|
6
|
-
const { conversation = [], task = "" } = item.input;
|
|
7
|
-
const output = await (0, verification_1.verificationAgent)({
|
|
8
|
-
conversation,
|
|
9
|
-
trace,
|
|
10
|
-
task,
|
|
11
|
-
});
|
|
12
|
-
return {
|
|
13
|
-
scores: [
|
|
14
|
-
{
|
|
15
|
-
name: "equality",
|
|
16
|
-
value: item.expectedOutput.isDone === output.isDone ? 1 : 0,
|
|
17
|
-
},
|
|
18
|
-
],
|
|
19
|
-
output,
|
|
20
|
-
};
|
|
21
|
-
};
|
|
22
|
-
exports.verifierAgentEvaluate = verifierAgentEvaluate;
|
|
23
|
-
exports.default = exports.verifierAgentEvaluate;
|