@empiricalrun/test-gen 0.10.4 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/dist/actions/index.d.ts.map +1 -1
- package/dist/actions/index.js +2 -1
- package/dist/actions/reload-page.d.ts +4 -0
- package/dist/actions/reload-page.d.ts.map +1 -0
- package/dist/actions/reload-page.js +37 -0
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +30 -8
- package/dist/agent/browsing/utils.d.ts +1 -0
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +8 -2
- package/dist/bin/ai/index.d.ts +2 -1
- package/dist/bin/ai/index.d.ts.map +1 -1
- package/dist/bin/ai/index.js +2 -1
- package/dist/bin/scenarios/index.js +9 -8
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.11.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- a1edabb: feat: support page reload as an action
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- 21dae78: fix: add sanitisation of browsing agent task
|
|
12
|
+
|
|
13
|
+
## 0.10.5
|
|
14
|
+
|
|
15
|
+
### Patch Changes
|
|
16
|
+
|
|
17
|
+
- 08ecca2: fix: test gen should quit after 3 consecutive errors and update dashboard sink message format
|
|
18
|
+
- 08ecca2: fix: test gen should quit after 3 consecutive errors
|
|
19
|
+
|
|
3
20
|
## 0.10.4
|
|
4
21
|
|
|
5
22
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
|
package/dist/actions/index.js
CHANGED
|
@@ -6,6 +6,7 @@ const click_1 = require("./click");
|
|
|
6
6
|
const done_1 = require("./done");
|
|
7
7
|
const fill_1 = require("./fill");
|
|
8
8
|
const goto_1 = require("./goto");
|
|
9
|
+
const reload_page_1 = require("./reload-page");
|
|
9
10
|
class PlaywrightActions {
|
|
10
11
|
actions;
|
|
11
12
|
recordedActions;
|
|
@@ -16,6 +17,7 @@ class PlaywrightActions {
|
|
|
16
17
|
(0, click_1.clickActionGenerator)(page),
|
|
17
18
|
(0, done_1.doneActionGenerator)(page),
|
|
18
19
|
(0, assertTextVisibility_1.assertTextVisibilityActionGenerator)(page),
|
|
20
|
+
(0, reload_page_1.reloadActionGenerator)(page),
|
|
19
21
|
];
|
|
20
22
|
this.recordedActions = [];
|
|
21
23
|
}
|
|
@@ -33,7 +35,6 @@ class PlaywrightActions {
|
|
|
33
35
|
console.log(`code: ${code}`, "\n\n");
|
|
34
36
|
}
|
|
35
37
|
catch (e) {
|
|
36
|
-
// TODO: make these specific errors so that its easy to consume
|
|
37
38
|
throw Error(`Error executing ${name} action of playwright: ${e}`);
|
|
38
39
|
}
|
|
39
40
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reload-page.d.ts","sourceRoot":"","sources":["../../src/actions/reload-page.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAErD,eAAO,MAAM,6BAA6B,gBAAgB,CAAC;AAE3D,eAAO,MAAM,qBAAqB,EAAE,yBA8BnC,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.reloadActionGenerator = exports.PLAYWRIGHT_RELOAD_ACTION_NAME = void 0;
|
|
4
|
+
const utils_1 = require("../agent/browsing/utils");
|
|
5
|
+
exports.PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
|
|
6
|
+
const reloadActionGenerator = (page) => {
|
|
7
|
+
return {
|
|
8
|
+
execute: async () => {
|
|
9
|
+
await page.reload();
|
|
10
|
+
await page.waitForTimeout(3000);
|
|
11
|
+
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
12
|
+
},
|
|
13
|
+
template: () => {
|
|
14
|
+
const templ = `await page.reload();`;
|
|
15
|
+
return templ;
|
|
16
|
+
},
|
|
17
|
+
name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
|
|
18
|
+
schema: {
|
|
19
|
+
type: "function",
|
|
20
|
+
function: {
|
|
21
|
+
name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
|
|
22
|
+
description: "reload the page by calling this method",
|
|
23
|
+
parameters: {
|
|
24
|
+
type: "object",
|
|
25
|
+
properties: {
|
|
26
|
+
reason: {
|
|
27
|
+
type: "string",
|
|
28
|
+
description: "reason for calling this function",
|
|
29
|
+
},
|
|
30
|
+
},
|
|
31
|
+
required: ["reason"],
|
|
32
|
+
},
|
|
33
|
+
},
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
exports.reloadActionGenerator = reloadActionGenerator;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAiFP"}
|
|
@@ -17,12 +17,12 @@ async function browsingAgent(task, page, options = {}) {
|
|
|
17
17
|
const actions = new actions_1.PlaywrightActions(page);
|
|
18
18
|
const tools = actions.getActionSchemas();
|
|
19
19
|
let isTaskDone = false;
|
|
20
|
-
const
|
|
20
|
+
const executedActions = [];
|
|
21
21
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
22
|
-
|
|
22
|
+
trace.update({ input: { task } });
|
|
23
|
+
let lastActionExecTrace = "";
|
|
23
24
|
// await page.waitForTimeout(3_00_000)
|
|
24
25
|
while (!isTaskDone) {
|
|
25
|
-
trace.update({ input: { task } });
|
|
26
26
|
const pageContentSpan = trace.startSpan("page-content");
|
|
27
27
|
const pageContent = await page.content();
|
|
28
28
|
pageContentSpan.end({ output: { pageContent } });
|
|
@@ -30,33 +30,55 @@ async function browsingAgent(task, page, options = {}) {
|
|
|
30
30
|
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
31
31
|
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
32
32
|
const promptSpan = trace.startSpan("page-prompt");
|
|
33
|
+
// extract all successful actions
|
|
34
|
+
const successfulActions = executedActions
|
|
35
|
+
.filter((a) => !a.isError)
|
|
36
|
+
.map((a) => a.action);
|
|
33
37
|
const messages = await (0, provider_1.getPromptForNextAction)({
|
|
34
38
|
pageSnapshot,
|
|
35
|
-
previousActions,
|
|
39
|
+
previousActions: successfulActions,
|
|
36
40
|
task,
|
|
37
|
-
lastActionErrors,
|
|
41
|
+
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
38
42
|
});
|
|
39
43
|
promptSpan.end({ output: { messages } });
|
|
40
|
-
lastActionErrors = [];
|
|
41
44
|
const completion = await (0, ai_1.getLLMResult)({
|
|
42
45
|
messages,
|
|
43
46
|
tools,
|
|
44
47
|
trace,
|
|
48
|
+
tool_choice: "required",
|
|
45
49
|
});
|
|
46
50
|
const toolCalls = completion?.tool_calls || [];
|
|
47
51
|
for (const i in toolCalls) {
|
|
48
52
|
const toolCall = toolCalls[i];
|
|
49
53
|
try {
|
|
50
54
|
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
|
|
51
|
-
|
|
55
|
+
executedActions.push({
|
|
56
|
+
isError: false,
|
|
57
|
+
action: JSON.stringify(toolCall),
|
|
58
|
+
});
|
|
59
|
+
lastActionExecTrace = "";
|
|
52
60
|
}
|
|
53
61
|
catch (e) {
|
|
54
62
|
// TODO: implement feedback loop to llm
|
|
55
|
-
|
|
63
|
+
executedActions.push({
|
|
64
|
+
isError: true,
|
|
65
|
+
action: JSON.stringify(toolCall),
|
|
66
|
+
});
|
|
67
|
+
lastActionExecTrace = e.message;
|
|
56
68
|
logger.error(e);
|
|
57
69
|
}
|
|
58
70
|
}
|
|
59
71
|
isTaskDone = actions.isComplete();
|
|
72
|
+
// mark task as done if llm is stuck in loop
|
|
73
|
+
if (executedActions.length > 3) {
|
|
74
|
+
const lastThreeActions = executedActions.slice(-3);
|
|
75
|
+
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
76
|
+
if (lastThreeActionsFailed) {
|
|
77
|
+
// TODO: this should be sent to dashboard
|
|
78
|
+
logger.error("Agent is not able to figure out next action, marking task as done");
|
|
79
|
+
isTaskDone = true;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
60
82
|
}
|
|
61
83
|
const code = actions.generateCode();
|
|
62
84
|
trace.update({ input: { task }, output: { code } });
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { Page } from "playwright";
|
|
2
2
|
import { TestGenConfig } from "../../types";
|
|
3
|
+
export declare function prepareBrowsingAgentTask(steps: string[], assert?: string): string;
|
|
3
4
|
export declare function prepareFileForBrowsingAgent(genConfig: TestGenConfig): Promise<void>;
|
|
4
5
|
export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
|
|
5
6
|
export declare function canRunBrowsingAgent(filePath: string): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,UAIxE;AAED,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
|
|
@@ -3,15 +3,21 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = void 0;
|
|
6
|
+
exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = exports.prepareBrowsingAgentTask = void 0;
|
|
7
7
|
const child_process_1 = require("child_process");
|
|
8
8
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
9
9
|
const logger_1 = require("../../bin/logger");
|
|
10
10
|
const web_1 = require("../../bin/utils/platform/web");
|
|
11
|
+
function prepareBrowsingAgentTask(steps, assert) {
|
|
12
|
+
const sanitizedSteps = steps.map((step) => step.replace(/`/g, "\\`"));
|
|
13
|
+
const task = `${sanitizedSteps.join("\n")}\n${assert || ""}`;
|
|
14
|
+
return task;
|
|
15
|
+
}
|
|
16
|
+
exports.prepareBrowsingAgentTask = prepareBrowsingAgentTask;
|
|
11
17
|
async function prepareFileForBrowsingAgent(genConfig) {
|
|
12
18
|
const { specPath, scenarios } = genConfig;
|
|
13
19
|
const { name, steps, assert } = scenarios[0];
|
|
14
|
-
const mergedSteps =
|
|
20
|
+
const mergedSteps = prepareBrowsingAgentTask(steps, assert);
|
|
15
21
|
if (!fs_extra_1.default.existsSync(specPath)) {
|
|
16
22
|
await fs_extra_1.default.createFile(specPath);
|
|
17
23
|
await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));
|
package/dist/bin/ai/index.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import LLMTracing from "./trace";
|
|
3
|
-
export declare function getLLMResult({ messages, trace, tools, }: {
|
|
3
|
+
export declare function getLLMResult({ messages, trace, tools, tool_choice, }: {
|
|
4
4
|
messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
5
5
|
trace?: LLMTracing;
|
|
6
6
|
tools?: OpenAI.Chat.Completions.ChatCompletionTool[];
|
|
7
|
+
tool_choice?: OpenAI.Chat.Completions.ChatCompletionToolChoiceOption;
|
|
7
8
|
}): Promise<OpenAI.Chat.Completions.ChatCompletionMessage | undefined>;
|
|
8
9
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,EACL,WAAW,GACZ,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;IACrD,WAAW,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,8BAA8B,CAAC;CACtE,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAsBrE"}
|
package/dist/bin/ai/index.js
CHANGED
|
@@ -5,7 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.getLLMResult = void 0;
|
|
7
7
|
const openai_1 = __importDefault(require("openai"));
|
|
8
|
-
async function getLLMResult({ messages, trace, tools, }) {
|
|
8
|
+
async function getLLMResult({ messages, trace, tools, tool_choice, }) {
|
|
9
9
|
const openai = new openai_1.default();
|
|
10
10
|
const model = "gpt-4o";
|
|
11
11
|
const parameters = {
|
|
@@ -22,6 +22,7 @@ async function getLLMResult({ messages, trace, tools, }) {
|
|
|
22
22
|
model,
|
|
23
23
|
tools,
|
|
24
24
|
...parameters,
|
|
25
|
+
tool_choice,
|
|
25
26
|
});
|
|
26
27
|
const output = completion.choices[0]?.message;
|
|
27
28
|
generation?.end({ output });
|
|
@@ -91,6 +91,14 @@ async function loadScenarios(scenariosPath) {
|
|
|
91
91
|
else if (scenariosPath.endsWith(".yaml")) {
|
|
92
92
|
return await loadScenariosFromYAML(scenariosPath);
|
|
93
93
|
}
|
|
94
|
+
else if (scenariosPath.endsWith(".ts")) {
|
|
95
|
+
return [
|
|
96
|
+
{
|
|
97
|
+
specPath: scenariosPath,
|
|
98
|
+
scenarios: [],
|
|
99
|
+
},
|
|
100
|
+
];
|
|
101
|
+
}
|
|
94
102
|
else if (isValidJSON(atob(scenariosPath))) {
|
|
95
103
|
const str = atob(scenariosPath);
|
|
96
104
|
const config = JSON.parse(str);
|
|
@@ -101,14 +109,7 @@ async function loadScenarios(scenariosPath) {
|
|
|
101
109
|
scenarios: [
|
|
102
110
|
{
|
|
103
111
|
name: config.name,
|
|
104
|
-
steps: config.steps
|
|
105
|
-
.map((s) => {
|
|
106
|
-
if (s.trim().startsWith("-")) {
|
|
107
|
-
return s.replace("-", "");
|
|
108
|
-
}
|
|
109
|
-
return s;
|
|
110
|
-
})
|
|
111
|
-
.filter((s) => !!s),
|
|
112
|
+
steps: config.steps.filter((s) => !!s),
|
|
112
113
|
assert: config.assert,
|
|
113
114
|
},
|
|
114
115
|
],
|