@empiricalrun/test-gen 0.38.1 → 0.38.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.38.2
4
+
5
+ ### Patch Changes
6
+
7
+ - 132c2b4: fix: generation assets failure shouldnt stop test generation
8
+ - 213c357: fix: added available annotations in master agent prompt
9
+
3
10
  ## 0.38.1
4
11
 
5
12
  ### Patch Changes
@@ -3,7 +3,7 @@ import { Page } from "playwright";
3
3
  import { PlaywrightActions } from "../../actions";
4
4
  import { ScopeVars, TestCase } from "../../types";
5
5
  import { BrowsingAgentOptions } from "../browsing";
6
- export declare function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints, }: {
6
+ export declare function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints, annotations, }: {
7
7
  task: string;
8
8
  executedActions: string[];
9
9
  failedActions: any[];
@@ -16,6 +16,7 @@ export declare function getNextAction({ task, executedActions, failedActions, pa
16
16
  actions: PlaywrightActions;
17
17
  disableSkills: boolean;
18
18
  useHints: boolean;
19
+ annotations?: string[];
19
20
  }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
20
21
  export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
21
22
  task: string;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA0TA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB,2FA2FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAuTA"}
@@ -16,7 +16,7 @@ const skills_retriever_1 = require("../codegen/skills-retriever");
16
16
  const verification_1 = require("../verification");
17
17
  const with_hints_1 = require("./with-hints");
18
18
  const MAX_ERROR_COUNT = 2;
19
- async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints = false, }) {
19
+ async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints = false, annotations, }) {
20
20
  const nextActionSpan = trace?.span({
21
21
  name: "master-agent-next-action",
22
22
  input: {
@@ -30,6 +30,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
30
30
  disableSkills,
31
31
  useHints,
32
32
  skills: skill_1.testCaseSkills.getAvailableSkills(),
33
+ annotations,
33
34
  },
34
35
  });
35
36
  const promptSpan = nextActionSpan?.span({
@@ -40,7 +41,8 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
40
41
  failedActions: failedActions.map((a) => a).join("\n"),
41
42
  executedActions: executedActions.map((a) => a).join("\n"),
42
43
  pageUrl,
43
- }, 20);
44
+ annotations,
45
+ }, 24);
44
46
  // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
45
47
  const userMessage = promptMessages.filter((m) => m.role === "user")[0];
46
48
  const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
@@ -176,12 +178,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
176
178
  // path: `screenshots/screenshot-${screenshotIndex++}.png`, // enable this and screenshotIndex var for local debugging
177
179
  });
178
180
  const testGenReporter = new reporter_1.TestGenUpdatesReporter();
179
- const testGenSnapshotUpdatePromise = testGenReporter.sendCurrentView(buffer);
181
+ await testGenReporter.sendCurrentView(buffer);
180
182
  const pageScreenshot = buffer.toString("base64");
181
183
  let output;
182
184
  let generatedCodeSteps = [];
183
185
  let annotations;
184
- let testGenAnnotatedSnapshotUpdatePromise;
185
186
  let annotatedPageScreenshot;
186
187
  if (useHints) {
187
188
  await page.waitForTimeout(2000);
@@ -209,13 +210,13 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
209
210
  window?.annotationInstance?.destroy();
210
211
  }
211
212
  });
212
- testGenAnnotatedSnapshotUpdatePromise =
213
- testGenReporter.sendCurrentView(annonationBuffer);
213
+ await testGenReporter.sendCurrentView(annonationBuffer);
214
214
  annotatedPageScreenshot = annonationBuffer.toString("base64");
215
215
  }
216
216
  if (await (0, session_1.shouldStopSession)()) {
217
217
  break;
218
218
  }
219
+ const annotationKeys = annotations ? Object.keys(annotations) : [];
219
220
  const toolCall = await getNextAction({
220
221
  task,
221
222
  executedActions: masterAgentActions,
@@ -229,6 +230,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
229
230
  actions,
230
231
  disableSkills,
231
232
  useHints,
233
+ annotations: annotationKeys,
232
234
  });
233
235
  if (await (0, session_1.shouldStopSession)()) {
234
236
  break;
@@ -347,10 +349,6 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
347
349
  }
348
350
  }
349
351
  trace?.update({ input: { task }, output: { output } });
350
- await testGenSnapshotUpdatePromise;
351
- if (testGenAnnotatedSnapshotUpdatePromise) {
352
- await testGenAnnotatedSnapshotUpdatePromise;
353
- }
354
352
  masterAgentSpan?.end({
355
353
  output: {
356
354
  action: output.action,
@@ -1 +1 @@
1
- {"version":3,"file":"master-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/master-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,4BAA4B,EAAE,UA0C1C,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
1
+ {"version":3,"file":"master-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/master-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,4BAA4B,EAAE,UA4C1C,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
@@ -5,7 +5,7 @@ const actions_1 = require("../actions");
5
5
  const skill_1 = require("../actions/skill");
6
6
  const run_1 = require("../agent/master/run");
7
7
  const masterGetNextActionEvaluator = async ({ item, trace, }) => {
8
- const { task, executedActions, failedActions, pageUrl, options, pageScreenshot, annotatedPageScreenshot, disableSkills, useHints, skills = [], } = item.input;
8
+ const { task, executedActions, failedActions, pageUrl, options, pageScreenshot, annotatedPageScreenshot, disableSkills, useHints, skills = [], annotations, } = item.input;
9
9
  const page = {};
10
10
  skill_1.testCaseSkills.updateSkills(skills);
11
11
  const actions = new actions_1.PlaywrightActions(page);
@@ -21,6 +21,7 @@ const masterGetNextActionEvaluator = async ({ item, trace, }) => {
21
21
  actions,
22
22
  disableSkills,
23
23
  useHints,
24
+ annotations,
24
25
  });
25
26
  return {
26
27
  scores: [
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;;IAE3B,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAgDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAmC9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;;IAE3B,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAgDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA2C9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
@@ -95,25 +95,30 @@ class TestGenUpdatesReporter {
95
95
  logger.log("Skipped uploading current view screenshot");
96
96
  return;
97
97
  }
98
- // upload current screenshot to r2 and report it to reporter
99
- if (!fs_extra_1.default.existsSync(path_1.default.join(process.cwd(), "gen-assets"))) {
100
- await fs_extra_1.default.mkdir((process.cwd(), "gen-assets"));
98
+ try {
99
+ // upload current screenshot to r2 and report it to reporter
100
+ if (!fs_extra_1.default.existsSync(path_1.default.join(process.cwd(), "gen-assets"))) {
101
+ await fs_extra_1.default.mkdir((process.cwd(), "gen-assets"));
102
+ }
103
+ await fs_extra_1.default.writeFile(path_1.default.join(process.cwd(), "gen-assets", `current-view-${Date.now()}.png`), buffer);
104
+ const uploadDir = (0, uploader_1.getUploadPathForRun)(reporterConfig?.projectRepoName);
105
+ const files = await (0, r2_uploader_1.uploadDirectory)({
106
+ sourceDir: path_1.default.join(process.cwd(), "gen-assets"),
107
+ destinationDir: uploadDir,
108
+ uploadBucket: uploader_1.UPLOAD_BUCKET,
109
+ });
110
+ const filePath = Object.keys(files)[0];
111
+ const relativeFilePath = filePath.replace(path_1.default.join(process.cwd(), "gen-assets"), "");
112
+ const url = `${uploader_1.UPLOAD_DOMAIN}/${uploadDir}${relativeFilePath}`;
113
+ await getReporter()?.report(new reporter_1.ProcessLogMessageBuilder({
114
+ type: "current-snapshot",
115
+ message: JSON.stringify({ type: "current-view", url }),
116
+ }));
117
+ await fs_extra_1.default.rmdir((process.cwd(), "gen-assets"), { recursive: true });
118
+ }
119
+ catch (e) {
120
+ console.warn("Failed to upload current view screenshot", e);
101
121
  }
102
- await fs_extra_1.default.writeFile(path_1.default.join(process.cwd(), "gen-assets", `current-view-${Date.now()}.png`), buffer);
103
- const uploadDir = (0, uploader_1.getUploadPathForRun)(reporterConfig?.projectRepoName);
104
- const files = await (0, r2_uploader_1.uploadDirectory)({
105
- sourceDir: path_1.default.join(process.cwd(), "gen-assets"),
106
- destinationDir: uploadDir,
107
- uploadBucket: uploader_1.UPLOAD_BUCKET,
108
- });
109
- const filePath = Object.keys(files)[0];
110
- const relativeFilePath = filePath.replace(path_1.default.join(process.cwd(), "gen-assets"), "");
111
- const url = `${uploader_1.UPLOAD_DOMAIN}/${uploadDir}${relativeFilePath}`;
112
- await getReporter()?.report(new reporter_1.ProcessLogMessageBuilder({
113
- type: "current-snapshot",
114
- message: JSON.stringify({ type: "current-view", url }),
115
- }));
116
- await fs_extra_1.default.rmdir((process.cwd(), "gen-assets"), { recursive: true });
117
122
  }
118
123
  async sendMessage(message) {
119
124
  const reporter = getReporter();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.38.1",
3
+ "version": "0.38.2",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"