@empiricalrun/test-gen 0.10.3 → 0.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/actions/index.d.ts.map +1 -1
- package/dist/actions/index.js +0 -1
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +30 -8
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +1 -0
- package/dist/bin/scenarios/index.d.ts.map +1 -1
- package/dist/bin/scenarios/index.js +8 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.10.5
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 08ecca2: fix: test gen should quit after 3 consecutive errors and update dashboard sink message format
|
|
8
|
+
- 08ecca2: fix: test gen should quit after 3 consecutive errors
|
|
9
|
+
|
|
10
|
+
## 0.10.4
|
|
11
|
+
|
|
12
|
+
### Patch Changes
|
|
13
|
+
|
|
14
|
+
- 5400dfa: fix: prepare files in subdirectory for browsing agent
|
|
15
|
+
|
|
3
16
|
## 0.10.3
|
|
4
17
|
|
|
5
18
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
|
package/dist/actions/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAgFP"}
|
|
@@ -17,12 +17,11 @@ async function browsingAgent(task, page, options = {}) {
|
|
|
17
17
|
const actions = new actions_1.PlaywrightActions(page);
|
|
18
18
|
const tools = actions.getActionSchemas();
|
|
19
19
|
let isTaskDone = false;
|
|
20
|
-
const
|
|
20
|
+
const executedActions = [];
|
|
21
21
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
22
|
-
|
|
22
|
+
trace.update({ input: { task } });
|
|
23
23
|
// await page.waitForTimeout(3_00_000)
|
|
24
24
|
while (!isTaskDone) {
|
|
25
|
-
trace.update({ input: { task } });
|
|
26
25
|
const pageContentSpan = trace.startSpan("page-content");
|
|
27
26
|
const pageContent = await page.content();
|
|
28
27
|
pageContentSpan.end({ output: { pageContent } });
|
|
@@ -30,14 +29,21 @@ async function browsingAgent(task, page, options = {}) {
|
|
|
30
29
|
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
31
30
|
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
32
31
|
const promptSpan = trace.startSpan("page-prompt");
|
|
32
|
+
// check whether last action was an error
|
|
33
|
+
const lastErrorAction = executedActions[executedActions.length - 1]?.isError
|
|
34
|
+
? executedActions[executedActions.length - 1]?.action
|
|
35
|
+
: undefined;
|
|
36
|
+
// extract all successful actions
|
|
37
|
+
const successfulActions = executedActions
|
|
38
|
+
.filter((a) => !a.isError)
|
|
39
|
+
.map((a) => a.action);
|
|
33
40
|
const messages = await (0, provider_1.getPromptForNextAction)({
|
|
34
41
|
pageSnapshot,
|
|
35
|
-
previousActions,
|
|
42
|
+
previousActions: successfulActions,
|
|
36
43
|
task,
|
|
37
|
-
lastActionErrors,
|
|
44
|
+
lastActionErrors: lastErrorAction ? [lastErrorAction] : [],
|
|
38
45
|
});
|
|
39
46
|
promptSpan.end({ output: { messages } });
|
|
40
|
-
lastActionErrors = [];
|
|
41
47
|
const completion = await (0, ai_1.getLLMResult)({
|
|
42
48
|
messages,
|
|
43
49
|
tools,
|
|
@@ -48,15 +54,31 @@ async function browsingAgent(task, page, options = {}) {
|
|
|
48
54
|
const toolCall = toolCalls[i];
|
|
49
55
|
try {
|
|
50
56
|
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
|
|
51
|
-
|
|
57
|
+
executedActions.push({
|
|
58
|
+
isError: false,
|
|
59
|
+
action: JSON.stringify(toolCall),
|
|
60
|
+
});
|
|
52
61
|
}
|
|
53
62
|
catch (e) {
|
|
54
63
|
// TODO: implement feedback loop to llm
|
|
55
|
-
|
|
64
|
+
executedActions.push({
|
|
65
|
+
isError: true,
|
|
66
|
+
action: JSON.stringify(toolCall),
|
|
67
|
+
});
|
|
56
68
|
logger.error(e);
|
|
57
69
|
}
|
|
58
70
|
}
|
|
59
71
|
isTaskDone = actions.isComplete();
|
|
72
|
+
// mark task as done if llm is stuck in loop
|
|
73
|
+
if (executedActions.length > 3) {
|
|
74
|
+
const lastThreeActions = executedActions.slice(-3);
|
|
75
|
+
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
76
|
+
if (lastThreeActionsFailed) {
|
|
77
|
+
// TODO: this should be sent to dashboard
|
|
78
|
+
logger.error("Agent is not able to figure out next action, marking task as done");
|
|
79
|
+
isTaskDone = true;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
60
82
|
}
|
|
61
83
|
const code = actions.generateCode();
|
|
62
84
|
trace.update({ input: { task }, output: { code } });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
|
|
@@ -13,6 +13,7 @@ async function prepareFileForBrowsingAgent(genConfig) {
|
|
|
13
13
|
const { name, steps, assert } = scenarios[0];
|
|
14
14
|
const mergedSteps = `${steps.join("\n")}\n${assert}`;
|
|
15
15
|
if (!fs_extra_1.default.existsSync(specPath)) {
|
|
16
|
+
await fs_extra_1.default.createFile(specPath);
|
|
16
17
|
await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));
|
|
17
18
|
}
|
|
18
19
|
// TODO: this assumes that test code repo has `page` as the main entrypoint fixture
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,CAqCxD;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}
|
|
@@ -91,6 +91,14 @@ async function loadScenarios(scenariosPath) {
|
|
|
91
91
|
else if (scenariosPath.endsWith(".yaml")) {
|
|
92
92
|
return await loadScenariosFromYAML(scenariosPath);
|
|
93
93
|
}
|
|
94
|
+
else if (scenariosPath.endsWith(".ts")) {
|
|
95
|
+
return [
|
|
96
|
+
{
|
|
97
|
+
specPath: scenariosPath,
|
|
98
|
+
scenarios: [],
|
|
99
|
+
},
|
|
100
|
+
];
|
|
101
|
+
}
|
|
94
102
|
else if (isValidJSON(atob(scenariosPath))) {
|
|
95
103
|
const str = atob(scenariosPath);
|
|
96
104
|
const config = JSON.parse(str);
|