@empiricalrun/test-gen 0.10.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.11.0
4
+
5
+ ### Minor Changes
6
+
7
+ - a1edabb: feat: support page reload as an action
8
+
9
+ ### Patch Changes
10
+
11
+ - 21dae78: fix: add sanitisation of browsing agent task
12
+
13
+ ## 0.10.5
14
+
15
+ ### Patch Changes
16
+
17
+ - 08ecca2: fix: test gen should quit after 3 consecutive errors and update dashboard sink message format
18
+ - 08ecca2: fix: test gen should quit after 3 consecutive errors
19
+
3
20
  ## 0.10.4
4
21
 
5
22
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAkBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
@@ -6,6 +6,7 @@ const click_1 = require("./click");
6
6
  const done_1 = require("./done");
7
7
  const fill_1 = require("./fill");
8
8
  const goto_1 = require("./goto");
9
+ const reload_page_1 = require("./reload-page");
9
10
  class PlaywrightActions {
10
11
  actions;
11
12
  recordedActions;
@@ -16,6 +17,7 @@ class PlaywrightActions {
16
17
  (0, click_1.clickActionGenerator)(page),
17
18
  (0, done_1.doneActionGenerator)(page),
18
19
  (0, assertTextVisibility_1.assertTextVisibilityActionGenerator)(page),
20
+ (0, reload_page_1.reloadActionGenerator)(page),
19
21
  ];
20
22
  this.recordedActions = [];
21
23
  }
@@ -33,7 +35,6 @@ class PlaywrightActions {
33
35
  console.log(`code: ${code}`, "\n\n");
34
36
  }
35
37
  catch (e) {
36
- // TODO: make these specific errors so that its easy to consume
37
38
  throw Error(`Error executing ${name} action of playwright: ${e}`);
38
39
  }
39
40
  }
@@ -0,0 +1,4 @@
1
+ import { PlaywrightActionGenerator } from "../types";
2
+ export declare const PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
3
+ export declare const reloadActionGenerator: PlaywrightActionGenerator;
4
+ //# sourceMappingURL=reload-page.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"reload-page.d.ts","sourceRoot":"","sources":["../../src/actions/reload-page.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAErD,eAAO,MAAM,6BAA6B,gBAAgB,CAAC;AAE3D,eAAO,MAAM,qBAAqB,EAAE,yBA8BnC,CAAC"}
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.reloadActionGenerator = exports.PLAYWRIGHT_RELOAD_ACTION_NAME = void 0;
4
+ const utils_1 = require("../agent/browsing/utils");
5
+ exports.PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
6
+ const reloadActionGenerator = (page) => {
7
+ return {
8
+ execute: async () => {
9
+ await page.reload();
10
+ await page.waitForTimeout(3000);
11
+ await (0, utils_1.injectPwLocatorGenerator)(page);
12
+ },
13
+ template: () => {
14
+ const templ = `await page.reload();`;
15
+ return templ;
16
+ },
17
+ name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
18
+ schema: {
19
+ type: "function",
20
+ function: {
21
+ name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
22
+ description: "reload the page by calling this method",
23
+ parameters: {
24
+ type: "object",
25
+ properties: {
26
+ reason: {
27
+ type: "string",
28
+ description: "reason for calling this function",
29
+ },
30
+ },
31
+ required: ["reason"],
32
+ },
33
+ },
34
+ },
35
+ };
36
+ };
37
+ exports.reloadActionGenerator = reloadActionGenerator;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAwDP"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAiFP"}
@@ -17,12 +17,12 @@ async function browsingAgent(task, page, options = {}) {
17
17
  const actions = new actions_1.PlaywrightActions(page);
18
18
  const tools = actions.getActionSchemas();
19
19
  let isTaskDone = false;
20
- const previousActions = [];
20
+ const executedActions = [];
21
21
  await (0, utils_1.injectPwLocatorGenerator)(page);
22
- let lastActionErrors = [];
22
+ trace.update({ input: { task } });
23
+ let lastActionExecTrace = "";
23
24
  // await page.waitForTimeout(3_00_000)
24
25
  while (!isTaskDone) {
25
- trace.update({ input: { task } });
26
26
  const pageContentSpan = trace.startSpan("page-content");
27
27
  const pageContent = await page.content();
28
28
  pageContentSpan.end({ output: { pageContent } });
@@ -30,33 +30,55 @@ async function browsingAgent(task, page, options = {}) {
30
30
  const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
31
31
  sanitizationSpan.end({ output: { pageSnapshot } });
32
32
  const promptSpan = trace.startSpan("page-prompt");
33
+ // extract all successful actions
34
+ const successfulActions = executedActions
35
+ .filter((a) => !a.isError)
36
+ .map((a) => a.action);
33
37
  const messages = await (0, provider_1.getPromptForNextAction)({
34
38
  pageSnapshot,
35
- previousActions,
39
+ previousActions: successfulActions,
36
40
  task,
37
- lastActionErrors,
41
+ lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
38
42
  });
39
43
  promptSpan.end({ output: { messages } });
40
- lastActionErrors = [];
41
44
  const completion = await (0, ai_1.getLLMResult)({
42
45
  messages,
43
46
  tools,
44
47
  trace,
48
+ tool_choice: "required",
45
49
  });
46
50
  const toolCalls = completion?.tool_calls || [];
47
51
  for (const i in toolCalls) {
48
52
  const toolCall = toolCalls[i];
49
53
  try {
50
54
  await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
51
- previousActions.push(JSON.stringify(toolCall));
55
+ executedActions.push({
56
+ isError: false,
57
+ action: JSON.stringify(toolCall),
58
+ });
59
+ lastActionExecTrace = "";
52
60
  }
53
61
  catch (e) {
54
62
  // TODO: implement feedback loop to llm
55
- lastActionErrors.push(e.message);
63
+ executedActions.push({
64
+ isError: true,
65
+ action: JSON.stringify(toolCall),
66
+ });
67
+ lastActionExecTrace = e.message;
56
68
  logger.error(e);
57
69
  }
58
70
  }
59
71
  isTaskDone = actions.isComplete();
72
+ // mark task as done if llm is stuck in loop
73
+ if (executedActions.length > 3) {
74
+ const lastThreeActions = executedActions.slice(-3);
75
+ const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
76
+ if (lastThreeActionsFailed) {
77
+ // TODO: this should be sent to dashboard
78
+ logger.error("Agent is not able to figure out next action, marking task as done");
79
+ isTaskDone = true;
80
+ }
81
+ }
60
82
  }
61
83
  const code = actions.generateCode();
62
84
  trace.update({ input: { task }, output: { code } });
@@ -1,5 +1,6 @@
1
1
  import { Page } from "playwright";
2
2
  import { TestGenConfig } from "../../types";
3
+ export declare function prepareBrowsingAgentTask(steps: string[], assert?: string): string;
3
4
  export declare function prepareFileForBrowsingAgent(genConfig: TestGenConfig): Promise<void>;
4
5
  export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
5
6
  export declare function canRunBrowsingAgent(filePath: string): void;
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,UAIxE;AAED,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
@@ -3,15 +3,21 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = void 0;
6
+ exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = exports.prepareBrowsingAgentTask = void 0;
7
7
  const child_process_1 = require("child_process");
8
8
  const fs_extra_1 = __importDefault(require("fs-extra"));
9
9
  const logger_1 = require("../../bin/logger");
10
10
  const web_1 = require("../../bin/utils/platform/web");
11
+ function prepareBrowsingAgentTask(steps, assert) {
12
+ const sanitizedSteps = steps.map((step) => step.replace(/`/g, "\\`"));
13
+ const task = `${sanitizedSteps.join("\n")}\n${assert || ""}`;
14
+ return task;
15
+ }
16
+ exports.prepareBrowsingAgentTask = prepareBrowsingAgentTask;
11
17
  async function prepareFileForBrowsingAgent(genConfig) {
12
18
  const { specPath, scenarios } = genConfig;
13
19
  const { name, steps, assert } = scenarios[0];
14
- const mergedSteps = `${steps.join("\n")}\n${assert}`;
20
+ const mergedSteps = prepareBrowsingAgentTask(steps, assert);
15
21
  if (!fs_extra_1.default.existsSync(specPath)) {
16
22
  await fs_extra_1.default.createFile(specPath);
17
23
  await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));
@@ -1,8 +1,9 @@
1
1
  import OpenAI from "openai";
2
2
  import LLMTracing from "./trace";
3
- export declare function getLLMResult({ messages, trace, tools, }: {
3
+ export declare function getLLMResult({ messages, trace, tools, tool_choice, }: {
4
4
  messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
5
5
  trace?: LLMTracing;
6
6
  tools?: OpenAI.Chat.Completions.ChatCompletionTool[];
7
+ tool_choice?: OpenAI.Chat.Completions.ChatCompletionToolChoiceOption;
7
8
  }): Promise<OpenAI.Chat.Completions.ChatCompletionMessage | undefined>;
8
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;CACtD,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAqBrE"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,EACL,WAAW,GACZ,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;IACrD,WAAW,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,8BAA8B,CAAC;CACtE,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAsBrE"}
@@ -5,7 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.getLLMResult = void 0;
7
7
  const openai_1 = __importDefault(require("openai"));
8
- async function getLLMResult({ messages, trace, tools, }) {
8
+ async function getLLMResult({ messages, trace, tools, tool_choice, }) {
9
9
  const openai = new openai_1.default();
10
10
  const model = "gpt-4o";
11
11
  const parameters = {
@@ -22,6 +22,7 @@ async function getLLMResult({ messages, trace, tools, }) {
22
22
  model,
23
23
  tools,
24
24
  ...parameters,
25
+ tool_choice,
25
26
  });
26
27
  const output = completion.choices[0]?.message;
27
28
  generation?.end({ output });
@@ -91,6 +91,14 @@ async function loadScenarios(scenariosPath) {
91
91
  else if (scenariosPath.endsWith(".yaml")) {
92
92
  return await loadScenariosFromYAML(scenariosPath);
93
93
  }
94
+ else if (scenariosPath.endsWith(".ts")) {
95
+ return [
96
+ {
97
+ specPath: scenariosPath,
98
+ scenarios: [],
99
+ },
100
+ ];
101
+ }
94
102
  else if (isValidJSON(atob(scenariosPath))) {
95
103
  const str = atob(scenariosPath);
96
104
  const config = JSON.parse(str);
@@ -101,14 +109,7 @@ async function loadScenarios(scenariosPath) {
101
109
  scenarios: [
102
110
  {
103
111
  name: config.name,
104
- steps: config.steps
105
- .map((s) => {
106
- if (s.trim().startsWith("-")) {
107
- return s.replace("-", "");
108
- }
109
- return s;
110
- })
111
- .filter((s) => !!s),
112
+ steps: config.steps.filter((s) => !!s),
112
113
  assert: config.assert,
113
114
  },
114
115
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.10.4",
3
+ "version": "0.11.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"