@empiricalrun/test-gen 0.46.7 → 0.46.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dist/actions/skill.d.ts.map +1 -1
- package/dist/actions/skill.js +3 -4
- package/dist/agent/browsing/index.d.ts +2 -9
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +12 -19
- package/dist/agent/codegen/skills-retriever.d.ts.map +1 -1
- package/dist/agent/codegen/skills-retriever.js +7 -34
- package/dist/agent/codegen/utils.d.ts.map +1 -1
- package/dist/agent/codegen/utils.js +20 -9
- package/dist/agent/master/action-tool-calls.d.ts +3 -1
- package/dist/agent/master/action-tool-calls.d.ts.map +1 -1
- package/dist/agent/master/action-tool-calls.js +41 -38
- package/dist/agent/master/browser-tests/fixtures.d.ts +9 -0
- package/dist/agent/master/browser-tests/fixtures.d.ts.map +1 -0
- package/dist/agent/master/browser-tests/fixtures.js +33 -0
- package/dist/agent/master/browser-tests/index.spec.js +42 -50
- package/dist/agent/master/browser-tests/skills.spec.d.ts +2 -0
- package/dist/agent/master/browser-tests/skills.spec.d.ts.map +1 -0
- package/dist/agent/master/browser-tests/skills.spec.js +109 -0
- package/dist/agent/master/element-annotation.d.ts +3 -4
- package/dist/agent/master/element-annotation.d.ts.map +1 -1
- package/dist/agent/master/element-annotation.js +2 -2
- package/dist/agent/master/execute-browser-action.d.ts +19 -0
- package/dist/agent/master/execute-browser-action.d.ts.map +1 -0
- package/dist/agent/master/execute-browser-action.js +123 -0
- package/dist/agent/master/execute-skill-action.d.ts +11 -0
- package/dist/agent/master/execute-skill-action.d.ts.map +1 -0
- package/dist/agent/master/execute-skill-action.js +25 -0
- package/dist/agent/master/next-action.d.ts +5 -8
- package/dist/agent/master/next-action.d.ts.map +1 -1
- package/dist/agent/master/next-action.js +11 -91
- package/dist/agent/master/run.d.ts +2 -3
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +38 -132
- package/dist/agent/master/scroller.d.ts +1 -1
- package/dist/agent/master/scroller.d.ts.map +1 -1
- package/dist/agent/master/scroller.js +0 -1
- package/dist/agent/planner/run-time-planner.d.ts +2 -1
- package/dist/agent/planner/run-time-planner.d.ts.map +1 -1
- package/dist/agent/planner/run-time-planner.js +13 -6
- package/dist/evals/master-agent.evals.d.ts.map +1 -1
- package/dist/evals/master-agent.evals.js +1 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -5
- package/dist/page/index.d.ts +1 -1
- package/dist/page/index.d.ts.map +1 -1
- package/dist/utils/env.d.ts.map +1 -1
- package/dist/utils/env.js +3 -1
- package/package.json +1 -1
- package/playwright.config.ts +1 -1
- package/dist/agent/browsing/o1-completion.d.ts +0 -8
- package/dist/agent/browsing/o1-completion.d.ts.map +0 -1
- package/dist/agent/browsing/o1-completion.js +0 -72
package/dist/agent/master/run.js
CHANGED
|
@@ -8,25 +8,25 @@ const utils_1 = require("../../actions/utils");
|
|
|
8
8
|
const logger_1 = require("../../bin/logger");
|
|
9
9
|
const constants_1 = require("../../constants");
|
|
10
10
|
const errors_1 = require("../../errors");
|
|
11
|
-
const human_in_the_loop_1 = require("../../human-in-the-loop");
|
|
12
11
|
const page_1 = require("../../page");
|
|
13
12
|
const reporter_1 = require("../../reporter");
|
|
14
13
|
const session_1 = require("../../session");
|
|
15
|
-
const env_1 = require("../../utils/env");
|
|
16
|
-
const browsing_1 = require("../browsing");
|
|
17
14
|
const utils_2 = require("../browsing/utils");
|
|
18
15
|
const skills_retriever_1 = require("../codegen/skills-retriever");
|
|
19
16
|
const run_1 = require("../planner/run");
|
|
20
17
|
const run_time_planner_1 = require("../planner/run-time-planner");
|
|
21
18
|
const utils_3 = require("../utils");
|
|
22
19
|
const action_tool_calls_1 = require("./action-tool-calls");
|
|
23
|
-
const
|
|
20
|
+
const execute_browser_action_1 = require("./execute-browser-action");
|
|
21
|
+
const execute_skill_action_1 = require("./execute-skill-action");
|
|
24
22
|
const next_action_1 = require("./next-action");
|
|
25
|
-
const with_hints_1 = require("./with-hints");
|
|
26
23
|
const MAX_ERROR_COUNT = 2;
|
|
27
24
|
function getPageVariables(stateVariables) {
|
|
28
25
|
const keys = Object.keys(stateVariables);
|
|
29
|
-
|
|
26
|
+
// This checks for whether page.url() exists, which is true for all pages
|
|
27
|
+
// created by playwright actions.
|
|
28
|
+
const pageVariables = keys.filter((key) => typeof stateVariables[key] === "object" &&
|
|
29
|
+
typeof stateVariables[key]?.url === "function");
|
|
30
30
|
const pages = pageVariables.reduce((acc, key) => {
|
|
31
31
|
acc[key] = stateVariables[key];
|
|
32
32
|
return acc;
|
|
@@ -110,13 +110,12 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
110
110
|
failedActions,
|
|
111
111
|
},
|
|
112
112
|
});
|
|
113
|
-
// Provides a plan to master agent to execute, also verifies whether task is done or not
|
|
114
113
|
const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
|
|
115
114
|
trace: masterAgentSpan,
|
|
116
115
|
task,
|
|
117
116
|
successfulActions: [...masterAgentActions],
|
|
118
117
|
pages: getPageVariables(actions.getStateVariables()),
|
|
119
|
-
currentPage:
|
|
118
|
+
currentPage: testGenPage.name,
|
|
120
119
|
});
|
|
121
120
|
isGivenTaskDone = plannerResp.isDone;
|
|
122
121
|
if (isGivenTaskDone) {
|
|
@@ -131,9 +130,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
131
130
|
}
|
|
132
131
|
// inject scripts in the page
|
|
133
132
|
await (0, utils_2.injectPwLocatorGenerator)(testGenPage.pwPageInstance);
|
|
134
|
-
const buffer = await page.screenshot(
|
|
135
|
-
// path: `screenshots/screenshot-${screenshotIndex++}.png`, // enable this and screenshotIndex var for local debugging
|
|
136
|
-
});
|
|
133
|
+
const buffer = await page.screenshot();
|
|
137
134
|
const testGenReporter = new reporter_1.TestGenUpdatesReporter();
|
|
138
135
|
await testGenReporter.sendCurrentView(buffer);
|
|
139
136
|
const pageScreenshot = buffer.toString("base64");
|
|
@@ -143,31 +140,29 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
143
140
|
break;
|
|
144
141
|
}
|
|
145
142
|
// Provides next action that needs to be taken
|
|
146
|
-
const
|
|
143
|
+
const nextAction = await (0, next_action_1.getNextAction)({
|
|
144
|
+
page,
|
|
145
|
+
pageScreenshot: [pageScreenshot],
|
|
147
146
|
task,
|
|
148
147
|
executedActions: masterAgentActions,
|
|
149
148
|
failedActions,
|
|
150
|
-
pageUrl: page.url(),
|
|
151
149
|
trace: masterAgentSpan,
|
|
152
150
|
llm,
|
|
153
151
|
options,
|
|
154
|
-
pageScreenshot: [pageScreenshot],
|
|
155
152
|
actions,
|
|
156
|
-
actionTypes: action_tool_calls_1.ActionType,
|
|
157
153
|
disableSkills,
|
|
158
|
-
page,
|
|
159
154
|
logger,
|
|
160
155
|
});
|
|
161
156
|
if (await (0, session_1.shouldStopSession)()) {
|
|
162
157
|
break;
|
|
163
158
|
}
|
|
164
|
-
if (
|
|
165
|
-
if (
|
|
159
|
+
if (nextAction) {
|
|
160
|
+
if (nextAction.actionType === action_tool_calls_1.ActionType.UNKNOWN) {
|
|
166
161
|
logger.error("Agent is not able to figure out next action since element is not visible on screen.");
|
|
167
162
|
await testgenUpdatesReporter.sendMessage("Agent is not able to figure out next action since element is not visible on screen.");
|
|
168
163
|
break;
|
|
169
164
|
}
|
|
170
|
-
const args = (0, utils_3.parseJson)(
|
|
165
|
+
const args = (0, utils_3.parseJson)(nextAction.toolCallArgs);
|
|
171
166
|
const masterAgentActionSpan = masterAgentSpan?.span({
|
|
172
167
|
name: "master-agent-execute-action",
|
|
173
168
|
});
|
|
@@ -178,124 +173,35 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
178
173
|
try {
|
|
179
174
|
void testGenReporter.sendMessage(output.reason);
|
|
180
175
|
logger.log(`Next Action: ${output.action}`);
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
const
|
|
184
|
-
|
|
185
|
-
|
|
176
|
+
switch (nextAction.actionType) {
|
|
177
|
+
case skill_1.SKILL_USAGE: {
|
|
178
|
+
const code = await (0, execute_skill_action_1.executeSkillAction)({
|
|
179
|
+
actions,
|
|
180
|
+
nextAction,
|
|
181
|
+
trace: masterAgentActionSpan,
|
|
186
182
|
});
|
|
187
|
-
if (
|
|
188
|
-
|
|
183
|
+
if (code) {
|
|
184
|
+
generatedCodeSteps.push(code);
|
|
189
185
|
}
|
|
186
|
+
break;
|
|
190
187
|
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
}
|
|
196
|
-
else {
|
|
197
|
-
let shouldTriggerHintsFlow;
|
|
198
|
-
let hintsExecutionCompletion;
|
|
199
|
-
let elementAnnotation;
|
|
200
|
-
const actionType = toolCall.actionType;
|
|
201
|
-
let preference = {
|
|
202
|
-
actionType: "all",
|
|
203
|
-
};
|
|
204
|
-
if (useActionSpecificAnnotations && (0, action_tool_calls_1.isValidActionType)(actionType)) {
|
|
205
|
-
switch (actionType) {
|
|
206
|
-
case action_tool_calls_1.ActionType.FILL:
|
|
207
|
-
preference = {
|
|
208
|
-
actionType: action_tool_calls_1.ActionType.FILL,
|
|
209
|
-
};
|
|
210
|
-
break;
|
|
211
|
-
case action_tool_calls_1.ActionType.ASSERT_TEXT:
|
|
212
|
-
preference = {
|
|
213
|
-
actionType: action_tool_calls_1.ActionType.ASSERT_TEXT,
|
|
214
|
-
assertionText: args.assertion_text,
|
|
215
|
-
};
|
|
216
|
-
break;
|
|
217
|
-
default:
|
|
218
|
-
preference = {
|
|
219
|
-
actionType: "all",
|
|
220
|
-
};
|
|
221
|
-
}
|
|
188
|
+
case action_tool_calls_1.ActionType.OBSERVATION: {
|
|
189
|
+
output.action = args.observation;
|
|
190
|
+
logger.log("Observation: ", output.action);
|
|
191
|
+
break;
|
|
222
192
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if (annotationKeys.length > 0) {
|
|
230
|
-
// TODO: this string has newline characters that makes it harder to read
|
|
231
|
-
const annotationMapString = annotationKeys
|
|
232
|
-
?.map((a) => `${a.elementID}: ${a.text}`)
|
|
233
|
-
.join("\n");
|
|
234
|
-
// Provides the annotations for all the element present on screen
|
|
235
|
-
// Also provides the annotation of element on which the action needs to be taken
|
|
236
|
-
elementAnnotation = await (0, element_annotation_1.getElementAnnotation)({
|
|
237
|
-
elementDescription: args.element_description,
|
|
238
|
-
annotations: annotationMapString,
|
|
239
|
-
annotatedScreenshot: annotatedPageScreenshot,
|
|
240
|
-
trace: masterAgentActionSpan,
|
|
241
|
-
llm,
|
|
242
|
-
preference,
|
|
243
|
-
});
|
|
244
|
-
output.elementAnnotation = elementAnnotation;
|
|
245
|
-
console.log("Output: ", output);
|
|
246
|
-
await testGenReporter.sendCurrentView(annotationBuffer);
|
|
247
|
-
const triggerHintsFlowSpan = masterAgentActionSpan?.span({
|
|
248
|
-
name: "trigger-hints-flow",
|
|
249
|
-
input: {
|
|
250
|
-
outputFromGetNextAction: output,
|
|
251
|
-
generatedAnnotations: annotationKeys,
|
|
193
|
+
default: {
|
|
194
|
+
const { generatedCodeSteps: codeFromExecuteAction } = await (0, execute_browser_action_1.executeBrowserAction)({
|
|
195
|
+
page,
|
|
196
|
+
nextAction,
|
|
197
|
+
flags: {
|
|
198
|
+
useActionSpecificAnnotations,
|
|
252
199
|
},
|
|
253
|
-
});
|
|
254
|
-
// Provides the action whether its a click, fill etc.
|
|
255
|
-
const result = await (0, with_hints_1.triggerHintsFlow)({
|
|
256
|
-
outputFromGetNextAction: output,
|
|
257
|
-
generatedAnnotations: annotationKeys,
|
|
258
200
|
actions,
|
|
259
201
|
llm,
|
|
260
|
-
trace: triggerHintsFlowSpan,
|
|
261
|
-
});
|
|
262
|
-
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
263
|
-
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
264
|
-
triggerHintsFlowSpan?.end({
|
|
265
|
-
output: result,
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
|
-
if (shouldTriggerHintsFlow && hintsExecutionCompletion) {
|
|
269
|
-
const toolCalls = hintsExecutionCompletion?.tool_calls || [];
|
|
270
|
-
for (const i in toolCalls) {
|
|
271
|
-
const currentToolCall = toolCalls[i];
|
|
272
|
-
const code = await actions.executeAction(currentToolCall.function.name, {
|
|
273
|
-
...JSON.parse(currentToolCall.function.arguments),
|
|
274
|
-
elementAnnotation,
|
|
275
|
-
}, masterAgentActionSpan);
|
|
276
|
-
if (code) {
|
|
277
|
-
generatedCodeSteps.push(code);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
if (actions.isStuckInLoop()) {
|
|
281
|
-
throw new Error("Agent is not able to figure out next action when using hints");
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
else {
|
|
285
|
-
const browserAction = await (0, browsing_1.executeTaskUsingBrowsingAgent)({
|
|
286
202
|
trace: masterAgentActionSpan,
|
|
287
|
-
action: output.action,
|
|
288
|
-
page,
|
|
289
|
-
options,
|
|
290
|
-
llm,
|
|
291
|
-
actions,
|
|
292
203
|
});
|
|
293
|
-
|
|
294
|
-
output.action = browserAction.action;
|
|
295
|
-
if (browserAction.code) {
|
|
296
|
-
generatedCodeSteps.push(browserAction.code);
|
|
297
|
-
}
|
|
298
|
-
}
|
|
204
|
+
generatedCodeSteps.push(...codeFromExecuteAction);
|
|
299
205
|
}
|
|
300
206
|
}
|
|
301
207
|
// resetting error count as there is a successful action
|
|
@@ -305,7 +211,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
305
211
|
input: {
|
|
306
212
|
action: output.action,
|
|
307
213
|
reason: output.reason,
|
|
308
|
-
type:
|
|
214
|
+
type: nextAction.actionType,
|
|
309
215
|
},
|
|
310
216
|
output: {
|
|
311
217
|
success: true,
|
|
@@ -315,7 +221,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
315
221
|
disableSkills = false;
|
|
316
222
|
}
|
|
317
223
|
catch (e) {
|
|
318
|
-
if (
|
|
224
|
+
if (nextAction.actionType === skill_1.SKILL_USAGE) {
|
|
319
225
|
logger.log("Disabling skill usage for next retry");
|
|
320
226
|
disableSkills = true;
|
|
321
227
|
}
|
|
@@ -323,7 +229,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
323
229
|
input: {
|
|
324
230
|
action: output.action,
|
|
325
231
|
reason: output.reason,
|
|
326
|
-
type:
|
|
232
|
+
type: nextAction.actionType,
|
|
327
233
|
},
|
|
328
234
|
output: {
|
|
329
235
|
error: true,
|
|
@@ -331,7 +237,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
|
|
|
331
237
|
},
|
|
332
238
|
});
|
|
333
239
|
if (!(e instanceof errors_1.HumanApprovalDenied)) {
|
|
334
|
-
logger.error("Failed to run master agent's next task", JSON.stringify(
|
|
240
|
+
logger.error("Failed to run master agent's next task", JSON.stringify(nextAction, null, 2), e);
|
|
335
241
|
failedActions.push(output.action);
|
|
336
242
|
if (failedActions.length >= MAX_ERROR_COUNT) {
|
|
337
243
|
const error = "Agent is not able to figure out next action, marking task as done";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scroller.d.ts","sourceRoot":"","sources":["../../../src/agent/master/scroller.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"scroller.d.ts","sourceRoot":"","sources":["../../../src/agent/master/scroller.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAchD,MAAM,MAAM,cAAc,GAAG;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AA2ZF,wBAAsB,QAAQ,CAAC,EAC7B,kBAAkB,EAClB,IAAI,EACJ,KAAK,EACL,cAAc,EACd,MAAM,GACP,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA6D5B"}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
+
import type { Page } from "playwright";
|
|
2
3
|
export declare function runtimePlanner({ trace, task, successfulActions, pages, currentPage, }: {
|
|
3
4
|
trace?: TraceClient;
|
|
4
5
|
successfulActions: string[];
|
|
5
6
|
task: string;
|
|
6
|
-
pages?: Record<string,
|
|
7
|
+
pages?: Record<string, Page>;
|
|
7
8
|
currentPage?: string;
|
|
8
9
|
}): Promise<{
|
|
9
10
|
pageName: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsB,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAsB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAKvC,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GA4FA"}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.runtimePlanner = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
|
-
const promptTemplate_0 = "{{#section \"system\"}}\
|
|
5
|
+
const promptTemplate_0 = "{{#section \"system\"}}\nYou are given a list of successfully executed actions that are done towards completing a task (which\nis also provided to you). Your goal is to analyse the list and determine if the task is completed.\n\nIf the task is not fully completed, identify which specific actions are missing\nand suggest next steps to complete the task. Assume that the conversation provided\nis entirely truthful and no additional actions were performed beyond those listed.\n\nThese actions were executed by AI agents using Playwright on a browser. These agents\nalready have access to browser tabs to execute actions. If there is a pending action,\none of the agents will execute it in the browser. However, they need your help to\nchoose which browser tab (= page) to use for the next action.\n\nTo fulfil your goal, follow these steps:\n- Divide the task into individual actions.\n- Compare each task action against the actions listed in the successfully executed actions list.\n- Identify which actions have been executed and which have not.\n- If all actions are executed, respond with the task as done.\n- If any actions are missing, respond with the task as not done, listing all actions\n and specifying which are complete and which are missing.\n- If provided with list of pages, based on the next pending action and previously executed\n action, identify the page on which next action needs to be taken\n{{/section}}\n\n{{#section \"user\"}}\nTask:\n{{task}}\n\n----\n\nSuccessfully executed actions:\n{{successfulActions}}\n\n----\n\nList of pages with their current URLs:\n{{pagesSummary}}\n\n\n{{/section}}\n";
|
|
6
6
|
const utils_1 = require("../utils");
|
|
7
7
|
async function runtimePlanner({ trace, task, successfulActions, pages, currentPage, }) {
|
|
8
8
|
const runTimePlannerSpan = trace?.span({
|
|
@@ -14,6 +14,9 @@ async function runtimePlanner({ trace, task, successfulActions, pages, currentPa
|
|
|
14
14
|
},
|
|
15
15
|
});
|
|
16
16
|
const llm = new llm_1.LLM({ provider: "openai" });
|
|
17
|
+
const pagesSummary = pages
|
|
18
|
+
? Object.entries(pages).map(([pageName, page]) => `${pageName}: Currently on ${page.url()}`)
|
|
19
|
+
: [];
|
|
17
20
|
const response = await llm.createChatCompletion({
|
|
18
21
|
trace: runTimePlannerSpan,
|
|
19
22
|
traceName: "runtime-planner-llm",
|
|
@@ -21,7 +24,7 @@ async function runtimePlanner({ trace, task, successfulActions, pages, currentPa
|
|
|
21
24
|
messages: (0, llm_1.compilePrompt)(promptTemplate_0, {
|
|
22
25
|
task,
|
|
23
26
|
successfulActions: successfulActions.join("\n"),
|
|
24
|
-
|
|
27
|
+
pagesSummary: pagesSummary.join("\n"),
|
|
25
28
|
}),
|
|
26
29
|
tools: [
|
|
27
30
|
{
|
|
@@ -44,17 +47,21 @@ async function runtimePlanner({ trace, task, successfulActions, pages, currentPa
|
|
|
44
47
|
type: "string",
|
|
45
48
|
description: "reasoning for identification of task status",
|
|
46
49
|
},
|
|
47
|
-
|
|
48
|
-
type: "
|
|
49
|
-
description: "
|
|
50
|
+
nextAction: {
|
|
51
|
+
type: "string",
|
|
52
|
+
description: "next action to be taken",
|
|
50
53
|
},
|
|
51
54
|
pageName: {
|
|
52
55
|
type: "string",
|
|
53
56
|
enum: pages ? Object.keys(pages) : [],
|
|
54
57
|
description: "page name for the next action.",
|
|
55
58
|
},
|
|
59
|
+
isDone: {
|
|
60
|
+
type: "boolean",
|
|
61
|
+
description: "whether the task is done",
|
|
62
|
+
},
|
|
56
63
|
},
|
|
57
|
-
required: ["isDone", "reason", "pageName"],
|
|
64
|
+
required: ["isDone", "reason", "pageName", "nextAction"],
|
|
58
65
|
},
|
|
59
66
|
},
|
|
60
67
|
},
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"master-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/master-agent.evals.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"master-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/master-agent.evals.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,4BAA4B,EAAE,UAoC1C,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
|
|
@@ -3,11 +3,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.masterGetNextActionEvaluator = void 0;
|
|
4
4
|
const actions_1 = require("../actions");
|
|
5
5
|
const skill_1 = require("../actions/skill");
|
|
6
|
-
const action_tool_calls_1 = require("../agent/master/action-tool-calls");
|
|
7
6
|
const next_action_1 = require("../agent/master/next-action");
|
|
8
7
|
const page_1 = require("../page");
|
|
9
8
|
const masterGetNextActionEvaluator = async ({ item, trace, }) => {
|
|
10
|
-
const { task, executedActions, failedActions,
|
|
9
|
+
const { task, executedActions, failedActions, options, pageScreenshot, disableSkills, skills = [], } = item.input;
|
|
11
10
|
const page = {};
|
|
12
11
|
skill_1.testCaseSkills.updateSkills(skills);
|
|
13
12
|
const actions = new actions_1.PlaywrightActions(new page_1.TestGenPage(page, "page"));
|
|
@@ -15,12 +14,10 @@ const masterGetNextActionEvaluator = async ({ item, trace, }) => {
|
|
|
15
14
|
task,
|
|
16
15
|
executedActions,
|
|
17
16
|
failedActions,
|
|
18
|
-
pageUrl,
|
|
19
17
|
trace,
|
|
20
18
|
options,
|
|
21
19
|
pageScreenshot,
|
|
22
20
|
actions,
|
|
23
|
-
actionTypes: action_tool_calls_1.ActionType,
|
|
24
21
|
disableSkills,
|
|
25
22
|
page,
|
|
26
23
|
});
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAYpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAYpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAiC3E"}
|
package/dist/index.js
CHANGED
package/dist/page/index.d.ts
CHANGED
package/dist/page/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/page/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/page/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAEvC,qBAAa,WAAW;IAEb,cAAc,EAAE,IAAI;IACpB,IAAI,EAAE,MAAM;gBADZ,cAAc,EAAE,IAAI,EACpB,IAAI,EAAE,MAAM;IAGrB,UAAU,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,IAAI,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;CAIxD"}
|
package/dist/utils/env.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"env.d.ts","sourceRoot":"","sources":["../../src/utils/env.ts"],"names":[],"mappings":"AAAA,wBAAgB,gBAAgB,
|
|
1
|
+
{"version":3,"file":"env.d.ts","sourceRoot":"","sources":["../../src/utils/env.ts"],"names":[],"mappings":"AAAA,wBAAgB,gBAAgB,YAM/B"}
|
package/dist/utils/env.js
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.isRunningOnCloud = void 0;
|
|
4
4
|
function isRunningOnCloud() {
|
|
5
|
-
return process.env.CI === "true"
|
|
5
|
+
return (process.env.CI === "true" ||
|
|
6
|
+
// eslint-disable-next-line turbo/no-undeclared-env-vars
|
|
7
|
+
!!process.env.RUNNING_BROWSER_TESTS_FOR_TEST_GEN);
|
|
6
8
|
}
|
|
7
9
|
exports.isRunningOnCloud = isRunningOnCloud;
|
package/package.json
CHANGED
package/playwright.config.ts
CHANGED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import { TraceClient } from "@empiricalrun/llm";
|
|
2
|
-
import { ChatCompletionMessage, ChatCompletionMessageParam, ChatCompletionTool } from "openai/resources/index.mjs";
|
|
3
|
-
export declare function getO1Completion({ messages, tools, trace, }: {
|
|
4
|
-
messages: ChatCompletionMessageParam[];
|
|
5
|
-
tools: ChatCompletionTool[];
|
|
6
|
-
trace?: TraceClient;
|
|
7
|
-
}): Promise<ChatCompletionMessage | undefined>;
|
|
8
|
-
//# sourceMappingURL=o1-completion.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"o1-completion.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/o1-completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,kBAAkB,EACnB,MAAM,4BAA4B,CAAC;AAMpC,wBAAsB,eAAe,CAAC,EACpC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,0BAA0B,EAAE,CAAC;IACvC,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,CA8D7C"}
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.getO1Completion = void 0;
|
|
7
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
8
|
-
const remove_markdown_1 = __importDefault(require("remove-markdown"));
|
|
9
|
-
const constants_1 = require("../../constants");
|
|
10
|
-
const utils_1 = require("../utils");
|
|
11
|
-
async function getO1Completion({ messages, tools, trace, }) {
|
|
12
|
-
let completion;
|
|
13
|
-
try {
|
|
14
|
-
const o1Span = trace?.span({ name: "o1-response-span" });
|
|
15
|
-
const llm = new llm_1.LLM({
|
|
16
|
-
trace: o1Span,
|
|
17
|
-
provider: "openai",
|
|
18
|
-
defaultModel: "o1-mini",
|
|
19
|
-
providerApiKey: constants_1.MODEL_API_KEYS["openai"],
|
|
20
|
-
});
|
|
21
|
-
const [userInstruction] = messages.filter((s) => s.role === "user");
|
|
22
|
-
const [systemInstruction] = messages.filter((s) => s.role === "system");
|
|
23
|
-
userInstruction.content = `${systemInstruction?.content}
|
|
24
|
-
|
|
25
|
-
${userInstruction?.content}
|
|
26
|
-
|
|
27
|
-
You need to respond with one of the following tool call with provided schema:
|
|
28
|
-
${tools.map((tool) => JSON.stringify(tool, null, 2)).join("\n --- \n")}
|
|
29
|
-
|
|
30
|
-
------
|
|
31
|
-
|
|
32
|
-
Before responding, ensure the following:
|
|
33
|
-
- Do not respond with markdown, respond only with the JSON object.
|
|
34
|
-
- Do not respond with any backticks.
|
|
35
|
-
- The reason for action should also include what was been executed in the action.
|
|
36
|
-
`;
|
|
37
|
-
o1Span?.update({ input: [userInstruction] });
|
|
38
|
-
const response = (await llm.createChatCompletion({
|
|
39
|
-
messages: [userInstruction],
|
|
40
|
-
modelParameters: {
|
|
41
|
-
...constants_1.DEFAULT_O1_MODEL_PARAMETERS,
|
|
42
|
-
},
|
|
43
|
-
}));
|
|
44
|
-
o1Span?.end({ output: response });
|
|
45
|
-
const toolResponseStr = (0, remove_markdown_1.default)(response.content);
|
|
46
|
-
const toolRespJSON = (0, utils_1.parseJson)(toolResponseStr);
|
|
47
|
-
const parameters = toolRespJSON.function.parameters || toolRespJSON.function.arguments;
|
|
48
|
-
if (!parameters) {
|
|
49
|
-
throw new Error("No parameters found in tool response");
|
|
50
|
-
}
|
|
51
|
-
const tool = {
|
|
52
|
-
type: "function",
|
|
53
|
-
function: {
|
|
54
|
-
name: toolRespJSON.function.name,
|
|
55
|
-
arguments: typeof parameters === "string"
|
|
56
|
-
? parameters
|
|
57
|
-
: JSON.stringify(parameters, null, 2),
|
|
58
|
-
},
|
|
59
|
-
};
|
|
60
|
-
completion = {
|
|
61
|
-
role: "assistant",
|
|
62
|
-
content: response.content,
|
|
63
|
-
tool_calls: [tool],
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
catch (e) {
|
|
67
|
-
console.error("O1 response error", e);
|
|
68
|
-
return undefined;
|
|
69
|
-
}
|
|
70
|
-
return completion;
|
|
71
|
-
}
|
|
72
|
-
exports.getO1Completion = getO1Completion;
|