@empiricalrun/test-gen 0.10.5 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.11.0
4
+
5
+ ### Minor Changes
6
+
7
+ - a1edabb: feat: support page reload as an action
8
+
9
+ ### Patch Changes
10
+
11
+ - 21dae78: fix: add sanitisation of browsing agent task
12
+
3
13
  ## 0.10.5
4
14
 
5
15
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAOhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAWhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/actions/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAU,YAAY,EAAE,MAAM,UAAU,CAAC;AAQhD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,OAAO,CAAW;IAC1B,OAAO,CAAC,eAAe,CAAmC;gBAC9C,IAAI,EAAE,IAAI;IAYhB,aAAa,CAAC,IAAI,oBAAa,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAiBhE,gBAAgB,IAAI,YAAY,EAAE;IAIlC,YAAY;IAIZ,UAAU;CAMX"}
@@ -6,6 +6,7 @@ const click_1 = require("./click");
6
6
  const done_1 = require("./done");
7
7
  const fill_1 = require("./fill");
8
8
  const goto_1 = require("./goto");
9
+ const reload_page_1 = require("./reload-page");
9
10
  class PlaywrightActions {
10
11
  actions;
11
12
  recordedActions;
@@ -16,6 +17,7 @@ class PlaywrightActions {
16
17
  (0, click_1.clickActionGenerator)(page),
17
18
  (0, done_1.doneActionGenerator)(page),
18
19
  (0, assertTextVisibility_1.assertTextVisibilityActionGenerator)(page),
20
+ (0, reload_page_1.reloadActionGenerator)(page),
19
21
  ];
20
22
  this.recordedActions = [];
21
23
  }
@@ -0,0 +1,4 @@
1
+ import { PlaywrightActionGenerator } from "../types";
2
+ export declare const PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
3
+ export declare const reloadActionGenerator: PlaywrightActionGenerator;
4
+ //# sourceMappingURL=reload-page.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"reload-page.d.ts","sourceRoot":"","sources":["../../src/actions/reload-page.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAErD,eAAO,MAAM,6BAA6B,gBAAgB,CAAC;AAE3D,eAAO,MAAM,qBAAqB,EAAE,yBA8BnC,CAAC"}
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.reloadActionGenerator = exports.PLAYWRIGHT_RELOAD_ACTION_NAME = void 0;
4
+ const utils_1 = require("../agent/browsing/utils");
5
+ exports.PLAYWRIGHT_RELOAD_ACTION_NAME = "page_reload";
6
+ const reloadActionGenerator = (page) => {
7
+ return {
8
+ execute: async () => {
9
+ await page.reload();
10
+ await page.waitForTimeout(3000);
11
+ await (0, utils_1.injectPwLocatorGenerator)(page);
12
+ },
13
+ template: () => {
14
+ const templ = `await page.reload();`;
15
+ return templ;
16
+ },
17
+ name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
18
+ schema: {
19
+ type: "function",
20
+ function: {
21
+ name: exports.PLAYWRIGHT_RELOAD_ACTION_NAME,
22
+ description: "reload the page by calling this method",
23
+ parameters: {
24
+ type: "object",
25
+ properties: {
26
+ reason: {
27
+ type: "string",
28
+ description: "reason for calling this function",
29
+ },
30
+ },
31
+ required: ["reason"],
32
+ },
33
+ },
34
+ },
35
+ };
36
+ };
37
+ exports.reloadActionGenerator = reloadActionGenerator;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAgFP"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,wBAAsB,aAAa,CACjC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,GAAE;IACP,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACE,mBAiFP"}
@@ -20,6 +20,7 @@ async function browsingAgent(task, page, options = {}) {
20
20
  const executedActions = [];
21
21
  await (0, utils_1.injectPwLocatorGenerator)(page);
22
22
  trace.update({ input: { task } });
23
+ let lastActionExecTrace = "";
23
24
  // await page.waitForTimeout(3_00_000)
24
25
  while (!isTaskDone) {
25
26
  const pageContentSpan = trace.startSpan("page-content");
@@ -29,10 +30,6 @@ async function browsingAgent(task, page, options = {}) {
29
30
  const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
30
31
  sanitizationSpan.end({ output: { pageSnapshot } });
31
32
  const promptSpan = trace.startSpan("page-prompt");
32
- // check whether last action was an error
33
- const lastErrorAction = executedActions[executedActions.length - 1]?.isError
34
- ? executedActions[executedActions.length - 1]?.action
35
- : undefined;
36
33
  // extract all successful actions
37
34
  const successfulActions = executedActions
38
35
  .filter((a) => !a.isError)
@@ -41,13 +38,14 @@ async function browsingAgent(task, page, options = {}) {
41
38
  pageSnapshot,
42
39
  previousActions: successfulActions,
43
40
  task,
44
- lastActionErrors: lastErrorAction ? [lastErrorAction] : [],
41
+ lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
45
42
  });
46
43
  promptSpan.end({ output: { messages } });
47
44
  const completion = await (0, ai_1.getLLMResult)({
48
45
  messages,
49
46
  tools,
50
47
  trace,
48
+ tool_choice: "required",
51
49
  });
52
50
  const toolCalls = completion?.tool_calls || [];
53
51
  for (const i in toolCalls) {
@@ -58,6 +56,7 @@ async function browsingAgent(task, page, options = {}) {
58
56
  isError: false,
59
57
  action: JSON.stringify(toolCall),
60
58
  });
59
+ lastActionExecTrace = "";
61
60
  }
62
61
  catch (e) {
63
62
  // TODO: implement feedback loop to llm
@@ -65,6 +64,7 @@ async function browsingAgent(task, page, options = {}) {
65
64
  isError: true,
66
65
  action: JSON.stringify(toolCall),
67
66
  });
67
+ lastActionExecTrace = e.message;
68
68
  logger.error(e);
69
69
  }
70
70
  }
@@ -1,5 +1,6 @@
1
1
  import { Page } from "playwright";
2
2
  import { TestGenConfig } from "../../types";
3
+ export declare function prepareBrowsingAgentTask(steps: string[], assert?: string): string;
3
4
  export declare function prepareFileForBrowsingAgent(genConfig: TestGenConfig): Promise<void>;
4
5
  export declare function injectPwLocatorGenerator(page: Page): Promise<void>;
5
6
  export declare function canRunBrowsingAgent(filePath: string): void;
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,MAAM,UAIxE;AAED,wBAAsB,2BAA2B,CAAC,SAAS,EAAE,aAAa,iBAkBzE;AAWD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAMxD;AAED,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,QA4BnD"}
@@ -3,15 +3,21 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = void 0;
6
+ exports.canRunBrowsingAgent = exports.injectPwLocatorGenerator = exports.prepareFileForBrowsingAgent = exports.prepareBrowsingAgentTask = void 0;
7
7
  const child_process_1 = require("child_process");
8
8
  const fs_extra_1 = __importDefault(require("fs-extra"));
9
9
  const logger_1 = require("../../bin/logger");
10
10
  const web_1 = require("../../bin/utils/platform/web");
11
+ function prepareBrowsingAgentTask(steps, assert) {
12
+ const sanitizedSteps = steps.map((step) => step.replace(/`/g, "\\`"));
13
+ const task = `${sanitizedSteps.join("\n")}\n${assert || ""}`;
14
+ return task;
15
+ }
16
+ exports.prepareBrowsingAgentTask = prepareBrowsingAgentTask;
11
17
  async function prepareFileForBrowsingAgent(genConfig) {
12
18
  const { specPath, scenarios } = genConfig;
13
19
  const { name, steps, assert } = scenarios[0];
14
- const mergedSteps = `${steps.join("\n")}\n${assert}`;
20
+ const mergedSteps = prepareBrowsingAgentTask(steps, assert);
15
21
  if (!fs_extra_1.default.existsSync(specPath)) {
16
22
  await fs_extra_1.default.createFile(specPath);
17
23
  await fs_extra_1.default.writeFile(specPath, (0, web_1.addNewImport)("", ["test", "expect"], "@playwright/test"));
@@ -1,8 +1,9 @@
1
1
  import OpenAI from "openai";
2
2
  import LLMTracing from "./trace";
3
- export declare function getLLMResult({ messages, trace, tools, }: {
3
+ export declare function getLLMResult({ messages, trace, tools, tool_choice, }: {
4
4
  messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
5
5
  trace?: LLMTracing;
6
6
  tools?: OpenAI.Chat.Completions.ChatCompletionTool[];
7
+ tool_choice?: OpenAI.Chat.Completions.ChatCompletionToolChoiceOption;
7
8
  }): Promise<OpenAI.Chat.Completions.ChatCompletionMessage | undefined>;
8
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;CACtD,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAqBrE"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,UAAU,MAAM,SAAS,CAAC;AAEjC,wBAAsB,YAAY,CAAC,EACjC,QAAQ,EACR,KAAK,EACL,KAAK,EACL,WAAW,GACZ,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,0BAA0B,EAAE,CAAC;IAC/D,KAAK,CAAC,EAAE,UAAU,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;IACrD,WAAW,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,8BAA8B,CAAC;CACtE,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAsBrE"}
@@ -5,7 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.getLLMResult = void 0;
7
7
  const openai_1 = __importDefault(require("openai"));
8
- async function getLLMResult({ messages, trace, tools, }) {
8
+ async function getLLMResult({ messages, trace, tools, tool_choice, }) {
9
9
  const openai = new openai_1.default();
10
10
  const model = "gpt-4o";
11
11
  const parameters = {
@@ -22,6 +22,7 @@ async function getLLMResult({ messages, trace, tools, }) {
22
22
  model,
23
23
  tools,
24
24
  ...parameters,
25
+ tool_choice,
25
26
  });
26
27
  const output = completion.choices[0]?.message;
27
28
  generation?.end({ output });
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,CAqCxD;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/scenarios/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAiGvC,iBAAe,aAAa,CAC1B,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,QAAQ,EAAE,CAAA;CAAE,EAAE,CAAC,CA8BxD;AAED,OAAO,EAAE,aAAa,EAAE,CAAC"}
@@ -109,14 +109,7 @@ async function loadScenarios(scenariosPath) {
109
109
  scenarios: [
110
110
  {
111
111
  name: config.name,
112
- steps: config.steps
113
- .map((s) => {
114
- if (s.trim().startsWith("-")) {
115
- return s.replace("-", "");
116
- }
117
- return s;
118
- })
119
- .filter((s) => !!s),
112
+ steps: config.steps.filter((s) => !!s),
120
113
  assert: config.assert,
121
114
  },
122
115
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.10.5",
3
+ "version": "0.11.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"