@empiricalrun/test-gen 0.31.19 → 0.31.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/actions/assert.d.ts.map +1 -1
- package/dist/actions/assert.js +6 -4
- package/dist/actions/click.d.ts.map +1 -1
- package/dist/actions/click.js +5 -3
- package/dist/actions/done.js +1 -1
- package/dist/actions/fill.d.ts.map +1 -1
- package/dist/actions/fill.js +4 -2
- package/dist/actions/goto.d.ts.map +1 -1
- package/dist/actions/goto.js +5 -3
- package/dist/actions/hover.js +2 -2
- package/dist/actions/index.d.ts +8 -3
- package/dist/actions/index.d.ts.map +1 -1
- package/dist/actions/index.js +56 -9
- package/dist/actions/reload-page.d.ts.map +1 -1
- package/dist/actions/reload-page.js +4 -2
- package/dist/actions/skill.d.ts +18 -0
- package/dist/actions/skill.d.ts.map +1 -0
- package/dist/actions/skill.js +94 -0
- package/dist/actions/text-content.d.ts.map +1 -1
- package/dist/actions/text-content.js +4 -2
- package/dist/agent/browsing/index.d.ts +13 -3
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +119 -207
- package/dist/agent/codegen/create-test-block.js +1 -1
- package/dist/agent/codegen/skills-retriever.d.ts +13 -0
- package/dist/agent/codegen/skills-retriever.d.ts.map +1 -0
- package/dist/agent/codegen/skills-retriever.js +61 -0
- package/dist/agent/codegen/use-skill.d.ts +9 -0
- package/dist/agent/codegen/use-skill.d.ts.map +1 -0
- package/dist/agent/codegen/use-skill.js +49 -0
- package/dist/agent/codegen/utils.d.ts +9 -0
- package/dist/agent/codegen/utils.d.ts.map +1 -1
- package/dist/agent/codegen/utils.js +20 -1
- package/dist/agent/master/run.d.ts +16 -5
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +178 -38
- package/dist/agent/verification/index.d.ts.map +1 -1
- package/dist/agent/verification/index.js +2 -0
- package/dist/bin/utils/context.d.ts +1 -0
- package/dist/bin/utils/context.d.ts.map +1 -1
- package/dist/bin/utils/context.js +7 -2
- package/dist/bin/utils/platform/web/index.d.ts +1 -0
- package/dist/bin/utils/platform/web/index.d.ts.map +1 -1
- package/dist/bin/utils/platform/web/index.js +28 -1
- package/dist/file/client.d.ts +2 -1
- package/dist/file/client.d.ts.map +1 -1
- package/dist/file/client.js +2 -2
- package/dist/file/server.d.ts.map +1 -1
- package/dist/file/server.js +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -5
- package/dist/types/index.d.ts +10 -4
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +3 -3
|
@@ -1,229 +1,141 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
7
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
8
|
-
const crypto_1 = __importDefault(require("crypto"));
|
|
9
|
-
const actions_1 = require("../../actions");
|
|
10
|
-
const logger_1 = require("../../bin/logger");
|
|
3
|
+
exports.executeTaskUsingBrowsingAgent = void 0;
|
|
11
4
|
const constants_1 = require("../../constants");
|
|
12
5
|
const reporter_1 = require("../../reporter");
|
|
13
6
|
const session_1 = require("../../session");
|
|
14
7
|
const html_1 = require("../../utils/html");
|
|
15
|
-
const run_1 = require("../master/run");
|
|
16
8
|
const verification_1 = require("../verification");
|
|
17
9
|
const o1_completion_1 = require("./o1-completion");
|
|
18
10
|
const utils_1 = require("./utils");
|
|
19
|
-
async function
|
|
20
|
-
|
|
21
|
-
const
|
|
22
|
-
// add timeout for the page to settle in
|
|
23
|
-
await page.waitForTimeout(3000);
|
|
24
|
-
const trace = llm_1.langfuseInstance.trace({
|
|
25
|
-
name: "test-generator",
|
|
26
|
-
id: crypto_1.default.randomUUID(),
|
|
27
|
-
version: (0, session_1.getSessionDetails)().version,
|
|
28
|
-
metadata: {
|
|
29
|
-
generationId: (0, session_1.getSessionDetails)().generationId,
|
|
30
|
-
sessionId: (0, session_1.getSessionDetails)().sessionId,
|
|
31
|
-
},
|
|
32
|
-
tags: [
|
|
33
|
-
options.metadata?.projectName,
|
|
34
|
-
options.metadata?.environment,
|
|
35
|
-
].filter((s) => !!s),
|
|
36
|
-
});
|
|
37
|
-
const llm = new llm_1.LLM({
|
|
38
|
-
trace,
|
|
39
|
-
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
40
|
-
defaultModel: options.model || constants_1.DEFAULT_MODEL,
|
|
41
|
-
providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
42
|
-
// we will be using google model for larger context window, in such cases 1 million tokens is not enough
|
|
43
|
-
maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
|
|
44
|
-
});
|
|
45
|
-
const actions = new actions_1.PlaywrightActions(page);
|
|
46
|
-
const tools = actions.getActionSchemas();
|
|
47
|
-
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
48
|
-
trace.update({ input: { task } });
|
|
11
|
+
async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }) {
|
|
12
|
+
let isTaskDone = false;
|
|
13
|
+
const executedActions = [];
|
|
49
14
|
let lastActionExecTrace = "";
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
15
|
+
const tools = actions.getBrowsingActionSchemas();
|
|
16
|
+
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
17
|
+
while (!isTaskDone) {
|
|
18
|
+
const browsingAgentSpan = trace.span({
|
|
19
|
+
name: `browsing-agent`,
|
|
20
|
+
});
|
|
21
|
+
const sessionState = await (0, session_1.getSessionState)();
|
|
22
|
+
if (sessionState === "request_complete") {
|
|
23
|
+
break;
|
|
24
|
+
}
|
|
25
|
+
const pageContentSpan = browsingAgentSpan.span({
|
|
26
|
+
name: "page-content",
|
|
27
|
+
});
|
|
28
|
+
const pageContent = await page.content();
|
|
29
|
+
pageContentSpan.end({ output: { pageContent } });
|
|
30
|
+
const sanitizationSpan = browsingAgentSpan.span({
|
|
31
|
+
name: "page-sanitization",
|
|
32
|
+
});
|
|
33
|
+
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
34
|
+
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
35
|
+
const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
|
|
36
|
+
// extract all successful actions
|
|
37
|
+
const successfulActions = executedActions
|
|
38
|
+
.filter((a) => !a.isError)
|
|
39
|
+
.map((a) => a.action);
|
|
40
|
+
if (successfulActions.length > 0) {
|
|
41
|
+
const verificationAgentResp = await (0, verification_1.verificationAgent)({
|
|
42
|
+
llm,
|
|
43
|
+
trace: browsingAgentSpan,
|
|
44
|
+
task: action,
|
|
45
|
+
conversation: ["Successfully executed actions", ...successfulActions],
|
|
46
|
+
});
|
|
47
|
+
isTaskDone = verificationAgentResp.isDone;
|
|
48
|
+
logger.log(`isTaskDone: ${isTaskDone}`);
|
|
49
|
+
logger.log(`reason: ${verificationAgentResp.reason}`);
|
|
50
|
+
if (isTaskDone) {
|
|
51
|
+
browsingAgentSpan.event({ name: "task-done" });
|
|
52
|
+
browsingAgentSpan.end({
|
|
53
|
+
output: { taskDone: true, reason: verificationAgentResp.reason },
|
|
64
54
|
});
|
|
65
|
-
|
|
66
|
-
if (isGivenTaskDone) {
|
|
67
|
-
await testgenUpdatesReporter.sendMessage(`${verificationAgentResp.reason} Marking the task as done.`);
|
|
68
|
-
break;
|
|
69
|
-
}
|
|
55
|
+
break;
|
|
70
56
|
}
|
|
57
|
+
}
|
|
58
|
+
const messages = await (0, utils_1.getPromptForNextAction)({
|
|
59
|
+
pageSnapshot,
|
|
60
|
+
previousActions: successfulActions,
|
|
61
|
+
task: action,
|
|
62
|
+
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
63
|
+
promptType: "browsing-agent-as-tool",
|
|
64
|
+
});
|
|
65
|
+
promptSpan.end({ output: { messages } });
|
|
66
|
+
let completion;
|
|
67
|
+
completion = await (0, o1_completion_1.getO1Completion)({
|
|
68
|
+
//@ts-ignore
|
|
69
|
+
messages,
|
|
70
|
+
tools,
|
|
71
|
+
trace: browsingAgentSpan,
|
|
72
|
+
});
|
|
73
|
+
// If O1 completion fails due to any reason, resort to old flow
|
|
74
|
+
if (!completion) {
|
|
75
|
+
completion = await llm.createChatCompletion({
|
|
76
|
+
messages,
|
|
77
|
+
tools,
|
|
78
|
+
trace: browsingAgentSpan,
|
|
79
|
+
model: options.model || constants_1.DEFAULT_MODEL,
|
|
80
|
+
modelParameters: {
|
|
81
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
82
|
+
...options.modelParameters,
|
|
83
|
+
tool_choice: "required",
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
const toolCalls = completion?.tool_calls || [];
|
|
88
|
+
// LLM might respond with empty tool_calls and we can go into endless loop
|
|
89
|
+
// if we donot record this action and mark it as error
|
|
90
|
+
if (!toolCalls.length) {
|
|
91
|
+
executedActions.push({
|
|
92
|
+
isError: true,
|
|
93
|
+
action: "",
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
|
|
97
|
+
for (const i in toolCalls) {
|
|
98
|
+
const toolCall = toolCalls[i];
|
|
71
99
|
const sessionState = await (0, session_1.getSessionState)();
|
|
72
100
|
if (sessionState === "request_complete") {
|
|
73
|
-
await testgenUpdatesReporter.sendMessage("Aborting task, marking the task as done.");
|
|
74
|
-
break;
|
|
75
|
-
}
|
|
76
|
-
const { action, reason } = await (0, run_1.masterAgent)(task, page, masterAgentActions, masterAgentSpan, llm, options);
|
|
77
|
-
logger.log(`Next action: ${action} \n reason: ${reason}`);
|
|
78
|
-
if (!action) {
|
|
79
101
|
break;
|
|
80
102
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
while (!isTaskDone) {
|
|
87
|
-
const browsingAgentSpan = masterAgentSpan.span({
|
|
88
|
-
name: `browsing-agent`,
|
|
89
|
-
});
|
|
90
|
-
const sessionState = await (0, session_1.getSessionState)();
|
|
91
|
-
if (sessionState === "request_complete") {
|
|
92
|
-
break;
|
|
93
|
-
}
|
|
94
|
-
const pageContentSpan = browsingAgentSpan.span({
|
|
95
|
-
name: "page-content",
|
|
96
|
-
});
|
|
97
|
-
const pageContent = await page.content();
|
|
98
|
-
pageContentSpan.end({ output: { pageContent } });
|
|
99
|
-
const sanitizationSpan = browsingAgentSpan.span({
|
|
100
|
-
name: "page-sanitization",
|
|
101
|
-
});
|
|
102
|
-
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
103
|
-
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
104
|
-
const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
|
|
105
|
-
// extract all successful actions
|
|
106
|
-
const successfulActions = executedActions
|
|
107
|
-
.filter((a) => !a.isError)
|
|
108
|
-
.map((a) => a.action);
|
|
109
|
-
if (successfulActions.length > 0) {
|
|
110
|
-
const verificationAgentResp = await (0, verification_1.verificationAgent)({
|
|
111
|
-
llm,
|
|
112
|
-
trace: browsingAgentSpan,
|
|
113
|
-
task: action,
|
|
114
|
-
conversation: [
|
|
115
|
-
"Successfully executed actions",
|
|
116
|
-
...successfulActions,
|
|
117
|
-
],
|
|
118
|
-
});
|
|
119
|
-
isTaskDone = verificationAgentResp.isDone;
|
|
120
|
-
logger.log(`isTaskDone: ${isTaskDone}`);
|
|
121
|
-
logger.log(`reason: ${verificationAgentResp.reason}`);
|
|
122
|
-
if (isTaskDone) {
|
|
123
|
-
browsingAgentSpan.event({ name: "task-done" });
|
|
124
|
-
browsingAgentSpan.end({
|
|
125
|
-
output: { taskDone: true, reason: verificationAgentResp.reason },
|
|
126
|
-
});
|
|
127
|
-
break;
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
const messages = await (0, utils_1.getPromptForNextAction)({
|
|
131
|
-
pageSnapshot,
|
|
132
|
-
previousActions: successfulActions,
|
|
133
|
-
task: action,
|
|
134
|
-
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
135
|
-
promptType: "browsing-agent-as-tool",
|
|
103
|
+
try {
|
|
104
|
+
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
|
|
105
|
+
executedActions.push({
|
|
106
|
+
isError: false,
|
|
107
|
+
action: JSON.stringify(toolCall),
|
|
136
108
|
});
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
109
|
+
lastActionExecTrace = "";
|
|
110
|
+
}
|
|
111
|
+
catch (e) {
|
|
112
|
+
// TODO: implement feedback loop to llm
|
|
113
|
+
executedActions.push({
|
|
114
|
+
isError: true,
|
|
115
|
+
action: JSON.stringify(toolCall.function.arguments)?.reason,
|
|
144
116
|
});
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
//
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
action: "",
|
|
166
|
-
});
|
|
167
|
-
}
|
|
168
|
-
const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
|
|
169
|
-
for (const i in toolCalls) {
|
|
170
|
-
const toolCall = toolCalls[i];
|
|
171
|
-
const sessionState = await (0, session_1.getSessionState)();
|
|
172
|
-
if (sessionState === "request_complete") {
|
|
173
|
-
break;
|
|
174
|
-
}
|
|
175
|
-
try {
|
|
176
|
-
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
|
|
177
|
-
executedActions.push({
|
|
178
|
-
isError: false,
|
|
179
|
-
action: JSON.stringify(toolCall),
|
|
180
|
-
});
|
|
181
|
-
lastActionExecTrace = "";
|
|
182
|
-
}
|
|
183
|
-
catch (e) {
|
|
184
|
-
// TODO: implement feedback loop to llm
|
|
185
|
-
executedActions.push({
|
|
186
|
-
isError: true,
|
|
187
|
-
action: JSON.stringify(toolCall.function.arguments)
|
|
188
|
-
?.reason,
|
|
189
|
-
});
|
|
190
|
-
lastActionExecTrace = e.message;
|
|
191
|
-
void testgenUpdatesReporter.sendMessage(e.message);
|
|
192
|
-
logger.error(lastActionExecTrace, e);
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
toolCallsSpan.end({ output: { toolCalls } });
|
|
196
|
-
// mark task as done if llm is stuck in loop
|
|
197
|
-
if (executedActions.length >= 3) {
|
|
198
|
-
const lastThreeActions = executedActions.slice(-3);
|
|
199
|
-
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
200
|
-
// get last 3 lines of code
|
|
201
|
-
const lastThreeLinesOfCode = actions.getLastCodeLines(3);
|
|
202
|
-
const areLastActionsRepeatitive = lastThreeLinesOfCode.length === 3 &&
|
|
203
|
-
lastThreeLinesOfCode.every((a) => a === lastThreeLinesOfCode[0]);
|
|
204
|
-
if (lastThreeActionsFailed || areLastActionsRepeatitive) {
|
|
205
|
-
// TODO: this should be sent to dashboard
|
|
206
|
-
logger.error("Agent is not able to figure out next action, marking task as done");
|
|
207
|
-
await testgenUpdatesReporter.sendMessage("Agent is not able to figure out next action, marking task as done");
|
|
208
|
-
isGivenTaskDone = true;
|
|
209
|
-
break;
|
|
210
|
-
}
|
|
211
|
-
}
|
|
117
|
+
lastActionExecTrace = e.message;
|
|
118
|
+
void testgenUpdatesReporter.sendMessage(e.message);
|
|
119
|
+
logger.error(lastActionExecTrace, e);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
toolCallsSpan.end({ output: { toolCalls } });
|
|
123
|
+
// mark task as done if llm is stuck in loop
|
|
124
|
+
if (executedActions.length >= 3) {
|
|
125
|
+
const lastThreeActions = executedActions.slice(-3);
|
|
126
|
+
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
127
|
+
// get last 3 lines of code
|
|
128
|
+
const lastThreeLinesOfCode = actions.getLastCodeLines(3);
|
|
129
|
+
const areLastActionsRepeatitive = lastThreeLinesOfCode.length === 3 &&
|
|
130
|
+
lastThreeLinesOfCode.every((a) => a === lastThreeLinesOfCode[0]);
|
|
131
|
+
if (lastThreeActionsFailed || areLastActionsRepeatitive) {
|
|
132
|
+
// TODO: this should be sent to dashboard
|
|
133
|
+
const error = "Agent is not able to figure out next browser action, ending retries";
|
|
134
|
+
logger.error(error);
|
|
135
|
+
await testgenUpdatesReporter.sendMessage(error);
|
|
136
|
+
throw Error(error);
|
|
212
137
|
}
|
|
213
|
-
masterAgentSpan.end({ output: { action, reason } });
|
|
214
|
-
masterAgentActions.push(action);
|
|
215
138
|
}
|
|
216
139
|
}
|
|
217
|
-
catch (e) {
|
|
218
|
-
console.error("Failed to generate code for the given task. Please retry again.", e);
|
|
219
|
-
await testgenUpdatesReporter.sendMessage(`Failed to generate code for the given task. Please retry again.`);
|
|
220
|
-
}
|
|
221
|
-
await page.close();
|
|
222
|
-
const code = actions.generateCode();
|
|
223
|
-
trace.update({ input: { task }, output: { code } });
|
|
224
|
-
logger.success("Successfully generated code for the given task");
|
|
225
|
-
await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
|
|
226
|
-
logger.log(`Trace: ${trace.getTraceUrl()}`);
|
|
227
|
-
return code;
|
|
228
140
|
}
|
|
229
|
-
exports.
|
|
141
|
+
exports.executeTaskUsingBrowsingAgent = executeTaskUsingBrowsingAgent;
|
|
@@ -16,7 +16,7 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
|
|
|
16
16
|
trace =
|
|
17
17
|
trace ||
|
|
18
18
|
llm_1.langfuseInstance.trace({
|
|
19
|
-
name: "
|
|
19
|
+
name: "create-empty-test-block",
|
|
20
20
|
id: crypto.randomUUID(),
|
|
21
21
|
release: session.version,
|
|
22
22
|
tags: [
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import { TestCase, TestGenConfigOptions } from "../../types";
|
|
3
|
+
export declare function getAppropriateSkills({ testCase, options, trace, }: {
|
|
4
|
+
testCase: TestCase;
|
|
5
|
+
options?: TestGenConfigOptions;
|
|
6
|
+
trace?: TraceClient;
|
|
7
|
+
}): Promise<{
|
|
8
|
+
testStep: string;
|
|
9
|
+
filePath: string;
|
|
10
|
+
usageExample: string;
|
|
11
|
+
reason: string;
|
|
12
|
+
}[]>;
|
|
13
|
+
//# sourceMappingURL=skills-retriever.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;KAmDA"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.getAppropriateSkills = void 0;
|
|
7
|
+
const llm_1 = require("@empiricalrun/llm");
|
|
8
|
+
const fs_1 = __importDefault(require("fs"));
|
|
9
|
+
const logger_1 = require("../../bin/logger");
|
|
10
|
+
const context_1 = require("../../bin/utils/context");
|
|
11
|
+
const fs_2 = require("../../bin/utils/fs");
|
|
12
|
+
const constants_1 = require("../../constants");
|
|
13
|
+
const utils_1 = require("./utils");
|
|
14
|
+
async function getAppropriateSkills({ testCase, options, trace, }) {
|
|
15
|
+
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
16
|
+
logger.log("getting skill set for the repository");
|
|
17
|
+
const filter = await (0, context_1.createGitIgnoreFileFilter)();
|
|
18
|
+
const pomFiles = await (0, fs_2.generatePromptFromDirectory)("./pages", filter);
|
|
19
|
+
const fetchSkillsSpan = trace?.span({
|
|
20
|
+
name: "fetch-pom-skills",
|
|
21
|
+
input: {
|
|
22
|
+
testCase,
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
const promptSpan = fetchSkillsSpan?.span({
|
|
26
|
+
name: "fetch-pom-skills-prompt",
|
|
27
|
+
});
|
|
28
|
+
const prompt = await (0, llm_1.getPrompt)("fetch-skills-prompt", {
|
|
29
|
+
pageFiles: pomFiles,
|
|
30
|
+
scenarioName: testCase.name,
|
|
31
|
+
scenario: testCase.steps.join("\n"),
|
|
32
|
+
});
|
|
33
|
+
promptSpan?.end({ output: { prompt } });
|
|
34
|
+
const llm = new llm_1.LLM({
|
|
35
|
+
trace: fetchSkillsSpan,
|
|
36
|
+
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
37
|
+
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
38
|
+
providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
39
|
+
});
|
|
40
|
+
const firstShotMessage = await llm.createChatCompletion({
|
|
41
|
+
messages: prompt,
|
|
42
|
+
traceName: "fetch-pom-skills-llm",
|
|
43
|
+
modelParameters: {
|
|
44
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
45
|
+
...options?.modelParameters,
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
let response = firstShotMessage?.content || "";
|
|
49
|
+
const skills = (0, utils_1.extractTestStepsSuggestions)(response);
|
|
50
|
+
const validateSkillsSpan = fetchSkillsSpan?.span({
|
|
51
|
+
name: "validate-skills",
|
|
52
|
+
input: {
|
|
53
|
+
skills,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
const validatedSkills = skills.filter((skill) => fs_1.default.existsSync(skill.filePath));
|
|
57
|
+
validateSkillsSpan?.end({ output: { validatedSkills } });
|
|
58
|
+
fetchSkillsSpan?.end({ output: { validatedSkills } });
|
|
59
|
+
return validatedSkills;
|
|
60
|
+
}
|
|
61
|
+
exports.getAppropriateSkills = getAppropriateSkills;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
export declare function generateSkillUsageCode({ task, sampleUsageMethod, scopeVariablesMapStr, pageVariableName, trace, }: {
|
|
3
|
+
task: string;
|
|
4
|
+
sampleUsageMethod: string;
|
|
5
|
+
scopeVariablesMapStr: string;
|
|
6
|
+
pageVariableName: string;
|
|
7
|
+
trace?: TraceClient;
|
|
8
|
+
}): Promise<string>;
|
|
9
|
+
//# sourceMappingURL=use-skill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"use-skill.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/use-skill.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAShE,wBAAsB,sBAAsB,CAAC,EAC3C,IAAI,EACJ,iBAAiB,EACjB,oBAAoB,EACpB,gBAAgB,EAChB,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,iBAAiB,EAAE,MAAM,CAAC;IAC1B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,mBA0CA"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.generateSkillUsageCode = void 0;
|
|
4
|
+
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
+
const constants_1 = require("../../constants");
|
|
6
|
+
async function generateSkillUsageCode({ task, sampleUsageMethod, scopeVariablesMapStr, pageVariableName, trace, }) {
|
|
7
|
+
const skillUsageSpan = trace?.span({
|
|
8
|
+
name: "skill-usage",
|
|
9
|
+
input: {
|
|
10
|
+
task,
|
|
11
|
+
sampleUsageMethod,
|
|
12
|
+
scopeVariablesMapStr,
|
|
13
|
+
pageVariableName,
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
const promptSpan = skillUsageSpan?.span({
|
|
17
|
+
name: "apply-skills-prompt",
|
|
18
|
+
});
|
|
19
|
+
const prompt = await (0, llm_1.getPrompt)("apply-skills-prompt", {
|
|
20
|
+
task,
|
|
21
|
+
sampleUsageMethod,
|
|
22
|
+
scopeVariablesMapStr,
|
|
23
|
+
pageVariableName,
|
|
24
|
+
});
|
|
25
|
+
promptSpan?.end({ output: prompt });
|
|
26
|
+
const llm = new llm_1.LLM({
|
|
27
|
+
trace: skillUsageSpan,
|
|
28
|
+
provider: constants_1.DEFAULT_MODEL_PROVIDER,
|
|
29
|
+
defaultModel: constants_1.DEFAULT_MODEL,
|
|
30
|
+
providerApiKey: constants_1.MODEL_API_KEYS[constants_1.DEFAULT_MODEL_PROVIDER],
|
|
31
|
+
});
|
|
32
|
+
const firstShotMessage = await llm.createChatCompletion({
|
|
33
|
+
trace: skillUsageSpan,
|
|
34
|
+
messages: prompt,
|
|
35
|
+
traceName: "generate-skill-usage-code",
|
|
36
|
+
modelParameters: {
|
|
37
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
let response = firstShotMessage?.content || "";
|
|
41
|
+
skillUsageSpan?.end({
|
|
42
|
+
output: {
|
|
43
|
+
code: response,
|
|
44
|
+
},
|
|
45
|
+
});
|
|
46
|
+
console.log(`generated usage code`, response);
|
|
47
|
+
return response;
|
|
48
|
+
}
|
|
49
|
+
exports.generateSkillUsageCode = generateSkillUsageCode;
|
|
@@ -16,4 +16,13 @@ export declare function extractTestUpdates(input: string): {
|
|
|
16
16
|
newCode: string | undefined;
|
|
17
17
|
reason: string | undefined;
|
|
18
18
|
}[];
|
|
19
|
+
/**
|
|
20
|
+
*
|
|
21
|
+
*/
|
|
22
|
+
export declare function extractTestStepsSuggestions(input: string): {
|
|
23
|
+
testStep: string;
|
|
24
|
+
filePath: string;
|
|
25
|
+
usageExample: string;
|
|
26
|
+
reason: string;
|
|
27
|
+
}[];
|
|
19
28
|
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG;IACjD,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CAiBF"}
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/utils.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG;IACjD,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CAiBF;AAED;;GAEG;AAEH,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,MAAM,GAAG;IAC1D,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;CAChB,EAAE,CAeF"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.extractTestUpdates = void 0;
|
|
3
|
+
exports.extractTestStepsSuggestions = exports.extractTestUpdates = void 0;
|
|
4
4
|
/**
|
|
5
5
|
*
|
|
6
6
|
* method to extract file path and code updates for the LLM response of update flow
|
|
@@ -30,3 +30,22 @@ function extractTestUpdates(input) {
|
|
|
30
30
|
return result;
|
|
31
31
|
}
|
|
32
32
|
exports.extractTestUpdates = extractTestUpdates;
|
|
33
|
+
/**
|
|
34
|
+
*
|
|
35
|
+
*/
|
|
36
|
+
function extractTestStepsSuggestions(input) {
|
|
37
|
+
const result = [];
|
|
38
|
+
const regex = /<subtask>(.*?)<\/subtask>[\s\S]*?<file_import_path>([\s\S]*?)<\/file_import_path>[\s\S]*?<usage_example>([\s\S]*?)<\/usage_example>[\s\S]*?<reason>([\s\S]*?)<\/reason>/g;
|
|
39
|
+
let match;
|
|
40
|
+
while ((match = regex.exec(input)) !== null) {
|
|
41
|
+
const [, testStep, filePath, usageExample, reason] = match;
|
|
42
|
+
result.push({
|
|
43
|
+
testStep: testStep?.trim() || "",
|
|
44
|
+
filePath: filePath?.trim() || "",
|
|
45
|
+
usageExample: usageExample?.trim() || "",
|
|
46
|
+
reason: reason?.trim() || "",
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
return result.filter((r) => !!r.filePath && !!r.usageExample);
|
|
50
|
+
}
|
|
51
|
+
exports.extractTestStepsSuggestions = extractTestStepsSuggestions;
|
|
@@ -1,16 +1,27 @@
|
|
|
1
1
|
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { Page } from "playwright";
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
import { PlaywrightActions } from "../../actions";
|
|
4
|
+
import { TestCase } from "../../types";
|
|
5
|
+
import { BrowsingAgentOptions } from "../browsing";
|
|
6
|
+
export declare function getNextAction({ task, executedActions, failedActions, page, trace, llm, options, pageScreenshot, actions, disableSkills, }: {
|
|
6
7
|
task: string;
|
|
7
8
|
executedActions: string[];
|
|
9
|
+
failedActions: any[];
|
|
8
10
|
page: Page;
|
|
9
11
|
trace?: TraceClient;
|
|
10
12
|
llm: LLM;
|
|
11
13
|
options: BrowsingAgentOptions;
|
|
12
14
|
pageScreenshot: string;
|
|
15
|
+
actions: PlaywrightActions;
|
|
16
|
+
disableSkills: boolean;
|
|
13
17
|
}): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
|
|
14
|
-
export declare function
|
|
15
|
-
|
|
18
|
+
export declare function createTestUsingMasterAgent({ task, page, testCase, options, }: {
|
|
19
|
+
task: string;
|
|
20
|
+
page: Page;
|
|
21
|
+
testCase: TestCase;
|
|
22
|
+
options: BrowsingAgentOptions;
|
|
23
|
+
}): Promise<{
|
|
24
|
+
code: string;
|
|
25
|
+
importPaths: string[];
|
|
26
|
+
}>;
|
|
16
27
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAOrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,IAAI,EACJ,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,OAAO,EACP,aAAa,GACd,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,oBAAoB,CAAC;IAC9B,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;CACxB,2FAwDA;AAED,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,GACR,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;CAC/B;;;GAmLA"}
|