@empiricalrun/test-gen 0.10.3 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.10.5
4
+
5
+ ### Patch Changes
6
+
7
+ - 08ecca2: fix: test gen should quit after 3 consecutive errors and update dashboard sink message format
8
+ - 08ecca2: fix: test gen should quit after 3 consecutive errors
9
+
10
+ ## 0.10.4
11
+
12
+ ### Patch Changes
13
+
14
+ - 5400dfa: fix: prepare files in subdirectory for browsing agent
15
+
3
16
  ## 0.10.3
4
17
 
5
18
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAkBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
@@ -33,7 +33,6 @@ class PlaywrightActions {
33
33
  console.log(`code: ${code}`, "\n\n");
34
34
  }
35
35
  catch (e) {
36
- // TODO: make these specific errors so that its easy to consume
37
36
  throw Error(`Error executing ${name} action of playwright: ${e}`);
38
37
  }
39
38
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAwDP"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAgFP"}
@@ -17,12 +17,11 @@ async function browsingAgent(task, page, options = {}) {
17
17
  const actions = new actions_1.PlaywrightActions(page);
18
18
  const tools = actions.getActionSchemas();
19
19
  let isTaskDone = false;
20
- const previousActions = [];
20
+ const executedActions = [];
21
21
  await (0, utils_1.injectPwLocatorGenerator)(page);
22
- let lastActionErrors = [];
22
+ trace.update({ input: { task } });
23
23
  // await page.waitForTimeout(3_00_000)
24
24
  while (!isTaskDone) {
25
- trace.update({ input: { task } });
26
25
  const pageContentSpan = trace.startSpan("page-content");
27
26
  const pageContent = await page.content();
28
27
  pageContentSpan.end({ output: { pageContent } });
@@ -30,14 +29,21 @@ async function browsingAgent(task, page, options = {}) {
30
29
  const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
31
30
  sanitizationSpan.end({ output: { pageSnapshot } });
32
31
  const promptSpan = trace.startSpan("page-prompt");
32
+ // check whether last action was an error
33
+ const lastErrorAction = executedActions[executedActions.length - 1]?.isError
34
+ ? executedActions[executedActions.length - 1]?.action
35
+ : undefined;
36
+ // extract all successful actions
37
+ const successfulActions = executedActions
38
+ .filter((a) => !a.isError)
39
+ .map((a) => a.action);
33
40
  const messages = await (0, provider_1.getPromptForNextAction)({
34
41
  pageSnapshot,
35
- previousActions,
42
+ previousActions: successfulActions,
36
43
  task,
37
- lastActionErrors,
44
+ lastActionErrors: lastErrorAction ? [lastErrorAction] : [],
38
45
  });
39
46
  promptSpan.end({ output: { messages } });
40
- lastActionErrors = [];
41
47
  const completion = await (0, ai_1.getLLMResult)({
42
48
  messages,
43
49
  tools,
@@ -48,15 +54,31 @@ async function browsingAgent(task, page, options = {}) {
48
54
  const toolCall = toolCalls[i];
49
55
  try {
50
56
  await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
51
- previousActions.push(JSON.stringify(toolCall));
57
+ executedActions.push({
58
+ isError: false,
59
+ action: JSON.stringify(toolCall),
60
+ });
52
61
  }
53
62
  catch (e) {
54
63
  // TODO: implement feedback loop to llm
55
- lastActionErrors.push(e.message);
64
+ executedActions.push({
65
+ isError: true,
66
+ action: JSON.stringify(toolCall),
67
+ });
56
68
  logger.error(e);
57
69
  }
58
70
  }
59
71
  isTaskDone = actions.isComplete();
72
+ // mark task as done if llm is stuck in loop
73
+ if (executedActions.length > 3) {
74
+ const lastThreeActions = executedActions.slice(-3);
75
+ const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
76
+ if (lastThreeActionsFailed) {
77
+ // TODO: this should be sent to dashboard
78
+ logger.error("Agent is not able to figure out next action, marking task as done");
79
+ isTaskDone = true;
80
+ }
81
+ }
60
82
  }
61
83
  const code = actions.generateCode();
62
84
  trace.update({ input: { task }, output: { code } });
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAiBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
@@ -13,6 +13,7 @@ async function prepareFileForBrowsingAgent(genConfig) {
13
13
  const { name, steps, assert } = scenarios[0];
14
14
  const mergedSteps = `${steps.join("\n")}\n${assert}`;
15
15
  if (!fs_extra_1.default.existsSync(specPath)) {
16
+ await fs_extra_1.default.createFile(specPath);
16
17
  await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));
17
18
  }
18
19
  // TODO: this assumes that test code repo has `page` as the main entrypoint fixture
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,CA8BxD;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,CAqCxD;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}
@@ -91,6 +91,14 @@ async function loadScenarios(scenariosPath) {
91
91
  else if (scenariosPath.endsWith(".yaml")) {
92
92
  return await loadScenariosFromYAML(scenariosPath);
93
93
  }
94
+ else if (scenariosPath.endsWith(".ts")) {
95
+ return [
96
+ {
97
+ specPath: scenariosPath,
98
+ scenarios: [],
99
+ },
100
+ ];
101
+ }
94
102
  else if (isValidJSON(atob(scenariosPath))) {
95
103
  const str = atob(scenariosPath);
96
104
  const config = JSON.parse(str);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.10.3",
3
+ "version": "0.10.5",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"