@empiricalrun/test-gen 0.27.1 → 0.27.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/actions/index.d.ts +1 -0
- package/dist/actions/index.d.ts.map +1 -1
- package/dist/actions/index.js +5 -0
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +139 -112
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +45 -42
- package/dist/agent/codegen/update-flow.d.ts.map +1 -1
- package/dist/agent/codegen/update-flow.js +21 -5
- package/dist/agent/verification/index.d.ts.map +1 -1
- package/dist/agent/verification/index.js +6 -18
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.27.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 2fc2f93: fix: handle consecutive action error handling
|
|
8
|
+
- Updated dependencies [e7ae100]
|
|
9
|
+
- @empiricalrun/reporter@0.17.11
|
|
10
|
+
|
|
11
|
+
## 0.27.2
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- 947dc1a: feat: support array types for testMatch and testIgnore in project detection
|
|
16
|
+
|
|
3
17
|
## 0.27.1
|
|
4
18
|
|
|
5
19
|
### Patch Changes
|
package/dist/actions/index.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ export declare class PlaywrightActions {
|
|
|
7
7
|
executeAction(name: string | undefined, args: Record<string, any>): Promise<void>;
|
|
8
8
|
getActionSchemas(): ActionSchema[];
|
|
9
9
|
generateCode(): string;
|
|
10
|
+
getLastCodeLines(count: number): string[];
|
|
10
11
|
isComplete(): boolean;
|
|
11
12
|
}
|
|
12
13
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAuBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAuBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,gBAAgB,CAAC,KAAK,EAAE,MAAM;IAO9B,UAAU;CAUX"}
|
package/dist/actions/index.js
CHANGED
|
@@ -52,8 +52,13 @@ class PlaywrightActions {
|
|
|
52
52
|
generateCode() {
|
|
53
53
|
return this.recordedActions.map((a) => a.code).join("\n");
|
|
54
54
|
}
|
|
55
|
+
getLastCodeLines(count) {
|
|
56
|
+
const actions = this.recordedActions.filter((a) => a.name !== done_1.PLAYWRIGHT_DONE_ACTION_NAME);
|
|
57
|
+
return actions.slice(-count).map((a) => a.code);
|
|
58
|
+
}
|
|
55
59
|
isComplete() {
|
|
56
60
|
const [doneAction] = this.recordedActions.filter((a) => a.name === done_1.PLAYWRIGHT_DONE_ACTION_NAME);
|
|
61
|
+
// filter out done action from recorded actions aftet execution is marked complete
|
|
57
62
|
this.recordedActions = this.recordedActions.filter((a) => a.name !== done_1.PLAYWRIGHT_DONE_ACTION_NAME);
|
|
58
63
|
return !!doneAction;
|
|
59
64
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBAqO9B"}
|
|
@@ -44,137 +44,164 @@ async function browsingAgentUsingMasterAgent(task, page, options) {
|
|
|
44
44
|
let lastActionExecTrace = "";
|
|
45
45
|
let isGivenTaskDone = false;
|
|
46
46
|
const masterAgentActions = [];
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
llm,
|
|
52
|
-
trace: masterAgentSpan,
|
|
53
|
-
task,
|
|
54
|
-
conversation: ["Successfully executed actions", ...masterAgentActions],
|
|
55
|
-
});
|
|
56
|
-
isGivenTaskDone = verificationAgentResp.isDone;
|
|
57
|
-
if (isGivenTaskDone) {
|
|
58
|
-
await testgenUpdatesReporter.sendMessage(`${verificationAgentResp.reason} Marking the task as done.`);
|
|
59
|
-
break;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
const sessionState = await (0, session_1.getSessionState)();
|
|
63
|
-
if (sessionState === "request_complete") {
|
|
64
|
-
await testgenUpdatesReporter.sendMessage("Aborting task, marking the task as done.");
|
|
65
|
-
break;
|
|
66
|
-
}
|
|
67
|
-
const { action, reason } = await (0, run_1.masterAgent)(task, page, masterAgentActions, masterAgentSpan, llm, options);
|
|
68
|
-
logger.log(`Next action: ${action} \n reason: ${reason}`);
|
|
69
|
-
if (!action) {
|
|
70
|
-
break;
|
|
71
|
-
}
|
|
72
|
-
if (isGivenTaskDone) {
|
|
73
|
-
break;
|
|
74
|
-
}
|
|
75
|
-
let isTaskDone = false;
|
|
76
|
-
const executedActions = [];
|
|
77
|
-
while (!isTaskDone) {
|
|
78
|
-
const browsingAgentSpan = masterAgentSpan.span({
|
|
79
|
-
name: `browsing-agent`,
|
|
80
|
-
});
|
|
81
|
-
const sessionState = await (0, session_1.getSessionState)();
|
|
82
|
-
if (sessionState === "request_complete") {
|
|
83
|
-
break;
|
|
84
|
-
}
|
|
85
|
-
const pageContentSpan = browsingAgentSpan.span({ name: "page-content" });
|
|
86
|
-
const pageContent = await page.content();
|
|
87
|
-
pageContentSpan.end({ output: { pageContent } });
|
|
88
|
-
const sanitizationSpan = browsingAgentSpan.span({
|
|
89
|
-
name: "page-sanitization",
|
|
90
|
-
});
|
|
91
|
-
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
92
|
-
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
93
|
-
const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
|
|
94
|
-
// extract all successful actions
|
|
95
|
-
const successfulActions = executedActions
|
|
96
|
-
.filter((a) => !a.isError)
|
|
97
|
-
.map((a) => a.action);
|
|
98
|
-
if (successfulActions.length > 0) {
|
|
47
|
+
try {
|
|
48
|
+
while (!isGivenTaskDone) {
|
|
49
|
+
const masterAgentSpan = trace.span({ name: "master-agent" });
|
|
50
|
+
if (masterAgentActions.length > 0) {
|
|
99
51
|
const verificationAgentResp = await (0, verification_1.verificationAgent)({
|
|
100
52
|
llm,
|
|
101
|
-
trace:
|
|
102
|
-
task
|
|
103
|
-
conversation: [
|
|
53
|
+
trace: masterAgentSpan,
|
|
54
|
+
task,
|
|
55
|
+
conversation: [
|
|
56
|
+
"Successfully executed actions",
|
|
57
|
+
...masterAgentActions,
|
|
58
|
+
],
|
|
104
59
|
});
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
if (isTaskDone) {
|
|
109
|
-
browsingAgentSpan.event({ name: "task-done" });
|
|
110
|
-
browsingAgentSpan.end({
|
|
111
|
-
output: { taskDone: true, reason: verificationAgentResp.reason },
|
|
112
|
-
});
|
|
60
|
+
isGivenTaskDone = verificationAgentResp.isDone;
|
|
61
|
+
if (isGivenTaskDone) {
|
|
62
|
+
await testgenUpdatesReporter.sendMessage(`${verificationAgentResp.reason} Marking the task as done.`);
|
|
113
63
|
break;
|
|
114
64
|
}
|
|
115
65
|
}
|
|
116
|
-
const
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
const toolCalls = completion?.tool_calls || [];
|
|
136
|
-
const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
|
|
137
|
-
for (const i in toolCalls) {
|
|
138
|
-
const toolCall = toolCalls[i];
|
|
66
|
+
const sessionState = await (0, session_1.getSessionState)();
|
|
67
|
+
if (sessionState === "request_complete") {
|
|
68
|
+
await testgenUpdatesReporter.sendMessage("Aborting task, marking the task as done.");
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
const { action, reason } = await (0, run_1.masterAgent)(task, page, masterAgentActions, masterAgentSpan, llm, options);
|
|
72
|
+
logger.log(`Next action: ${action} \n reason: ${reason}`);
|
|
73
|
+
if (!action) {
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
if (isGivenTaskDone) {
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
let isTaskDone = false;
|
|
80
|
+
const executedActions = [];
|
|
81
|
+
while (!isTaskDone) {
|
|
82
|
+
const browsingAgentSpan = masterAgentSpan.span({
|
|
83
|
+
name: `browsing-agent`,
|
|
84
|
+
});
|
|
139
85
|
const sessionState = await (0, session_1.getSessionState)();
|
|
140
86
|
if (sessionState === "request_complete") {
|
|
141
87
|
break;
|
|
142
88
|
}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
89
|
+
const pageContentSpan = browsingAgentSpan.span({
|
|
90
|
+
name: "page-content",
|
|
91
|
+
});
|
|
92
|
+
const pageContent = await page.content();
|
|
93
|
+
pageContentSpan.end({ output: { pageContent } });
|
|
94
|
+
const sanitizationSpan = browsingAgentSpan.span({
|
|
95
|
+
name: "page-sanitization",
|
|
96
|
+
});
|
|
97
|
+
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
98
|
+
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
99
|
+
const promptSpan = browsingAgentSpan.span({ name: "page-prompt" });
|
|
100
|
+
// extract all successful actions
|
|
101
|
+
const successfulActions = executedActions
|
|
102
|
+
.filter((a) => !a.isError)
|
|
103
|
+
.map((a) => a.action);
|
|
104
|
+
if (successfulActions.length > 0) {
|
|
105
|
+
const verificationAgentResp = await (0, verification_1.verificationAgent)({
|
|
106
|
+
llm,
|
|
107
|
+
trace: browsingAgentSpan,
|
|
108
|
+
task: action,
|
|
109
|
+
conversation: [
|
|
110
|
+
"Successfully executed actions",
|
|
111
|
+
...successfulActions,
|
|
112
|
+
],
|
|
148
113
|
});
|
|
149
|
-
|
|
114
|
+
isTaskDone = verificationAgentResp.isDone;
|
|
115
|
+
logger.log(`isTaskDone: ${isTaskDone}`);
|
|
116
|
+
logger.log(`reason: ${verificationAgentResp.reason}`);
|
|
117
|
+
if (isTaskDone) {
|
|
118
|
+
browsingAgentSpan.event({ name: "task-done" });
|
|
119
|
+
browsingAgentSpan.end({
|
|
120
|
+
output: { taskDone: true, reason: verificationAgentResp.reason },
|
|
121
|
+
});
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
150
124
|
}
|
|
151
|
-
|
|
152
|
-
|
|
125
|
+
const messages = await (0, utils_1.getPromptForNextAction)({
|
|
126
|
+
pageSnapshot,
|
|
127
|
+
previousActions: successfulActions,
|
|
128
|
+
task: action,
|
|
129
|
+
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
130
|
+
promptType: "browsing-agent-as-tool",
|
|
131
|
+
});
|
|
132
|
+
promptSpan.end({ output: { messages } });
|
|
133
|
+
const completion = await llm.createChatCompletion({
|
|
134
|
+
messages,
|
|
135
|
+
tools,
|
|
136
|
+
trace: browsingAgentSpan,
|
|
137
|
+
model: options.model || constants_1.DEFAULT_MODEL,
|
|
138
|
+
modelParameters: {
|
|
139
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
140
|
+
...options.modelParameters,
|
|
141
|
+
tool_choice: "required",
|
|
142
|
+
},
|
|
143
|
+
});
|
|
144
|
+
const toolCalls = completion?.tool_calls || [];
|
|
145
|
+
// LLM might respond with empty tool_calls and we can go into endless loop
|
|
146
|
+
// if we donot record this action and mark it as error
|
|
147
|
+
if (!toolCalls.length) {
|
|
153
148
|
executedActions.push({
|
|
154
149
|
isError: true,
|
|
155
|
-
action:
|
|
156
|
-
?.reason,
|
|
150
|
+
action: "",
|
|
157
151
|
});
|
|
158
|
-
lastActionExecTrace = e.message;
|
|
159
|
-
void testgenUpdatesReporter.sendMessage(e.message);
|
|
160
|
-
logger.error(lastActionExecTrace, e);
|
|
161
152
|
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
153
|
+
const toolCallsSpan = browsingAgentSpan.span({ name: "tool-calls" });
|
|
154
|
+
for (const i in toolCalls) {
|
|
155
|
+
const toolCall = toolCalls[i];
|
|
156
|
+
const sessionState = await (0, session_1.getSessionState)();
|
|
157
|
+
if (sessionState === "request_complete") {
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
try {
|
|
161
|
+
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
|
|
162
|
+
executedActions.push({
|
|
163
|
+
isError: false,
|
|
164
|
+
action: JSON.stringify(toolCall),
|
|
165
|
+
});
|
|
166
|
+
lastActionExecTrace = "";
|
|
167
|
+
}
|
|
168
|
+
catch (e) {
|
|
169
|
+
// TODO: implement feedback loop to llm
|
|
170
|
+
executedActions.push({
|
|
171
|
+
isError: true,
|
|
172
|
+
action: JSON.stringify(toolCall.function.arguments)
|
|
173
|
+
?.reason,
|
|
174
|
+
});
|
|
175
|
+
lastActionExecTrace = e.message;
|
|
176
|
+
void testgenUpdatesReporter.sendMessage(e.message);
|
|
177
|
+
logger.error(lastActionExecTrace, e);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
toolCallsSpan.end({ output: { toolCalls } });
|
|
181
|
+
// mark task as done if llm is stuck in loop
|
|
182
|
+
if (executedActions.length >= 3) {
|
|
183
|
+
const lastThreeActions = executedActions.slice(-3);
|
|
184
|
+
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
185
|
+
// get last 3 lines of code
|
|
186
|
+
const lastThreeLinesOfCode = actions.getLastCodeLines(3);
|
|
187
|
+
const areLastActionsRepeatitive = lastThreeLinesOfCode.length === 3 &&
|
|
188
|
+
lastThreeLinesOfCode.every((a) => a === lastThreeLinesOfCode[0]);
|
|
189
|
+
if (lastThreeActionsFailed || areLastActionsRepeatitive) {
|
|
190
|
+
// TODO: this should be sent to dashboard
|
|
191
|
+
logger.error("Agent is not able to figure out next action, marking task as done");
|
|
192
|
+
await testgenUpdatesReporter.sendMessage("Agent is not able to figure out next action, marking task as done");
|
|
193
|
+
isGivenTaskDone = true;
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
173
196
|
}
|
|
174
197
|
}
|
|
198
|
+
masterAgentSpan.end({ output: { action, reason } });
|
|
199
|
+
masterAgentActions.push(action);
|
|
175
200
|
}
|
|
176
|
-
|
|
177
|
-
|
|
201
|
+
}
|
|
202
|
+
catch (e) {
|
|
203
|
+
console.error("Failed to generate code for the given task. Please retry again.", e);
|
|
204
|
+
await testgenUpdatesReporter.sendMessage(`Failed to generate code for the given task. Please retry again.`);
|
|
178
205
|
}
|
|
179
206
|
await page.close();
|
|
180
207
|
const code = actions.generateCode();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAUvD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAG5C,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAyFD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,GACvB,OAAO,CAAC,MAAM,CAAC,CAuBjB;AAwCD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAiBxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA6BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAUvD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAG5C,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AAyFD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,GACvB,OAAO,CAAC,MAAM,CAAC,CAuBjB;AAwCD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAiBxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA6BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,GACrC,OAAO,CAAC,MAAM,CAAC,CA8CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA"}
|
|
@@ -187,6 +187,15 @@ async function readPlaywrightConfig() {
|
|
|
187
187
|
return playwrightConfig;
|
|
188
188
|
}
|
|
189
189
|
exports.readPlaywrightConfig = readPlaywrightConfig;
|
|
190
|
+
function matchAgainstPattern(pattern, filePathToTest) {
|
|
191
|
+
if (isRegExp(pattern)) {
|
|
192
|
+
const regExp = pattern;
|
|
193
|
+
return regExp.test(filePathToTest);
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
return (0, minimatch_1.minimatch)(filePathToTest, pattern);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
190
199
|
/**
|
|
191
200
|
* detect the project name for the given file in playwright test repo
|
|
192
201
|
* if project and test file path for running test don't match, then playwright throws error
|
|
@@ -195,51 +204,45 @@ exports.readPlaywrightConfig = readPlaywrightConfig;
|
|
|
195
204
|
*/
|
|
196
205
|
async function detectProjectName(testFilePath, playwrightConfig) {
|
|
197
206
|
const filePath = testFilePath.replace("./tests/", "");
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
ignoreFile = regExp.test(filePath);
|
|
210
|
-
}
|
|
211
|
-
else {
|
|
212
|
-
ignoreFile = (0, minimatch_1.minimatch)(filePath, testIgnore);
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
// if test file path is in ignore list then skip it
|
|
216
|
-
// TODO: support for array based glob match
|
|
217
|
-
if (ignoreFile) {
|
|
218
|
-
return "";
|
|
219
|
-
}
|
|
220
|
-
if (typeof testMatch === "string" || isRegExp(testMatch)) {
|
|
221
|
-
let isMatch = false;
|
|
222
|
-
if (isRegExp(testMatch)) {
|
|
223
|
-
isMatch = testMatch.test(filePath);
|
|
224
|
-
}
|
|
225
|
-
else {
|
|
226
|
-
isMatch = (0, minimatch_1.minimatch)(filePath, testMatch);
|
|
227
|
-
}
|
|
228
|
-
if (isMatch && p.use?.defaultBrowserType === "chromium") {
|
|
229
|
-
return p.name;
|
|
230
|
-
}
|
|
231
|
-
else {
|
|
232
|
-
return "";
|
|
233
|
-
}
|
|
207
|
+
if (!playwrightConfig.projects || playwrightConfig.projects.length === 0) {
|
|
208
|
+
throw new Error(`No projects found in playwright config.`);
|
|
209
|
+
}
|
|
210
|
+
const filteredProjectNames = playwrightConfig.projects
|
|
211
|
+
.map((p) => {
|
|
212
|
+
const testIgnore = p.testIgnore;
|
|
213
|
+
const testMatch = p.testMatch || "**";
|
|
214
|
+
let ignoreFile = false;
|
|
215
|
+
if (testIgnore) {
|
|
216
|
+
if (typeof testIgnore === "string" || isRegExp(testIgnore)) {
|
|
217
|
+
ignoreFile = matchAgainstPattern(testIgnore, filePath);
|
|
234
218
|
}
|
|
235
|
-
else {
|
|
236
|
-
|
|
219
|
+
else if (typeof testIgnore === "object") {
|
|
220
|
+
ignoreFile = testIgnore.some((ignore) => matchAgainstPattern(ignore, filePath));
|
|
237
221
|
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
|
|
222
|
+
}
|
|
223
|
+
// if test file path is in ignore list then skip it
|
|
224
|
+
if (ignoreFile) {
|
|
225
|
+
return "";
|
|
226
|
+
}
|
|
227
|
+
let isMatch = false;
|
|
228
|
+
if (typeof testMatch === "string" || isRegExp(testMatch)) {
|
|
229
|
+
isMatch = matchAgainstPattern(testMatch, filePath);
|
|
230
|
+
}
|
|
231
|
+
else if (typeof testMatch === "object") {
|
|
232
|
+
isMatch = testMatch.some((match) => matchAgainstPattern(match, filePath));
|
|
233
|
+
}
|
|
234
|
+
if (isMatch && p.use?.defaultBrowserType === "chromium") {
|
|
235
|
+
return p.name || "";
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
return "";
|
|
239
|
+
}
|
|
240
|
+
})
|
|
241
|
+
.filter((p) => !!p);
|
|
242
|
+
if (filteredProjectNames.length === 0) {
|
|
243
|
+
throw new Error(`No project found for the test file: ${testFilePath} in playwright config.`);
|
|
241
244
|
}
|
|
242
|
-
return
|
|
245
|
+
return filteredProjectNames[0];
|
|
243
246
|
}
|
|
244
247
|
exports.detectProjectName = detectProjectName;
|
|
245
248
|
async function getPromptForNextAction({ pageSnapshot = "", task = "", previousActions = [], lastActionErrors = [], promptType = "browsing-agent-next-action", }) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAyBA,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAqB7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAEF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,GACtB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6I5B"}
|
|
@@ -7,6 +7,7 @@ exports.updateTest = void 0;
|
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const crypto_1 = __importDefault(require("crypto"));
|
|
9
9
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
10
|
+
const ts_morph_1 = require("ts-morph");
|
|
10
11
|
const logger_1 = require("../../bin/logger");
|
|
11
12
|
const context_1 = require("../../bin/utils/context");
|
|
12
13
|
const web_1 = require("../../bin/utils/platform/web");
|
|
@@ -104,13 +105,28 @@ async function updateTest(testCase, file, options, logging = true) {
|
|
|
104
105
|
logger.success(`${fileChange.filePath} file formatted successfully!`);
|
|
105
106
|
}
|
|
106
107
|
else {
|
|
107
|
-
// since we dont know what is getting updated,
|
|
108
|
-
// we believe that the patch is correct and contains few before and after lines
|
|
109
|
-
// to make the change unique for search & replace
|
|
110
108
|
const readWriteFileSpan = trace.span({ name: "write-to-file" });
|
|
111
109
|
let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
|
|
112
|
-
|
|
113
|
-
|
|
110
|
+
const project = new ts_morph_1.Project();
|
|
111
|
+
const sourceFile = project.createSourceFile("updated-code.ts", fileChange.newCode);
|
|
112
|
+
const functions = sourceFile.getFunctions();
|
|
113
|
+
// if there is a single method update in the file
|
|
114
|
+
if (functions.length === 1 &&
|
|
115
|
+
functions[0]?.getText() === fileChange.newCode) {
|
|
116
|
+
const updatedCodeFuncNames = functions.map((f) => f.getName());
|
|
117
|
+
const funcName = updatedCodeFuncNames[0];
|
|
118
|
+
const originalSource = project.createSourceFile("current-code.ts", contents);
|
|
119
|
+
const matchingNodes = originalSource
|
|
120
|
+
.getDescendantsOfKind(ts_morph_1.SyntaxKind.FunctionDeclaration)
|
|
121
|
+
.filter((node) => node.getName() === funcName);
|
|
122
|
+
contents = contents.replace(matchingNodes[0].getText(), functions[0]?.getText());
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// since we dont know what is getting updated,
|
|
126
|
+
// we believe that the patch is correct and contains few before and after lines
|
|
127
|
+
// to make the change unique for search & replace
|
|
128
|
+
contents = contents.replace(fileChange.oldCode, `\n\n${fileChange.newCode}`);
|
|
129
|
+
}
|
|
114
130
|
await fs_extra_1.default.writeFile(fileChange.filePath, contents, "utf-8");
|
|
115
131
|
readWriteFileSpan.end({ output: { contents } });
|
|
116
132
|
trace.event({ name: "format-file" });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhE;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,EACtC,GAAG,EACH,KAAK,EACL,IAAI,EACJ,YAAY,GACb,EAAE;IACD,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;;;GAgDA"}
|
|
@@ -1,30 +1,18 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.verificationAgent = void 0;
|
|
4
|
+
const llm_1 = require("@empiricalrun/llm");
|
|
4
5
|
/**
|
|
5
6
|
* This agent is used to verify whether the task is done basis the conversation history
|
|
6
7
|
*/
|
|
7
8
|
async function verificationAgent({ llm, trace, task, conversation, }) {
|
|
9
|
+
const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
|
|
10
|
+
task,
|
|
11
|
+
conversation: conversation.join("\n"),
|
|
12
|
+
});
|
|
8
13
|
const response = await llm.createChatCompletion({
|
|
9
14
|
trace,
|
|
10
|
-
messages
|
|
11
|
-
{
|
|
12
|
-
role: "system",
|
|
13
|
-
content: `Given a conversation and a task, your task is to analyse the conversation and tell if the task is completed.
|
|
14
|
-
If not, you need to tell what is not completed and suggest next steps to complete the task.
|
|
15
|
-
You need to respond assuming the conversation provided to you is truthful.
|
|
16
|
-
`,
|
|
17
|
-
},
|
|
18
|
-
{
|
|
19
|
-
role: "user",
|
|
20
|
-
content: `
|
|
21
|
-
Task: ${task}
|
|
22
|
-
|
|
23
|
-
Conversation:
|
|
24
|
-
${conversation.join("\n")}
|
|
25
|
-
`,
|
|
26
|
-
},
|
|
27
|
-
],
|
|
15
|
+
messages,
|
|
28
16
|
tools: [
|
|
29
17
|
{
|
|
30
18
|
type: "function",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.27.
|
|
3
|
+
"version": "0.27.3",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
"typescript": "^5.3.3",
|
|
44
44
|
"@empiricalrun/llm": "^0.9.1",
|
|
45
45
|
"@empiricalrun/r2-uploader": "^0.1.3",
|
|
46
|
-
"@empiricalrun/reporter": "^0.17.
|
|
46
|
+
"@empiricalrun/reporter": "^0.17.11"
|
|
47
47
|
},
|
|
48
48
|
"devDependencies": {
|
|
49
49
|
"@types/detect-port": "^1.3.5",
|