@empiricalrun/test-gen 0.38.13 → 0.38.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.38.16
4
+
5
+ ### Patch Changes
6
+
7
+ - 6163918: fix: security policy injection for locators
8
+
9
+ ## 0.38.15
10
+
11
+ ### Patch Changes
12
+
13
+ - 91ded8f: fix: incorrect annotations
14
+
15
+ ## 0.38.14
16
+
17
+ ### Patch Changes
18
+
19
+ - bd5c945: fix: run update scenario prompts on claude
20
+ - 31f8805: fix: runtime planner calling out actions as done
21
+ - Updated dependencies [bd5c945]
22
+ - @empiricalrun/llm@0.9.26
23
+
3
24
  ## 0.38.13
4
25
 
5
26
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAK3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA6FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuGxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA+BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAK3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA6FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA+BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
@@ -182,6 +182,11 @@ async function injectPwLocatorGenerator(page) {
182
182
  try {
183
183
  await Promise.all(scripts.map((s) => page.addScriptTag({ content: s })));
184
184
  await page.evaluate(async () => {
185
+ //@ts-ignore
186
+ //https://developer.mozilla.org/en-US/docs/Web/API/TrustedScriptURL
187
+ const trustedPolicy = window.trustedTypes?.createPolicy(crypto.randomUUID(), {
188
+ createScriptURL: (url) => url,
189
+ });
185
190
  //@ts-ignore
186
191
  const injectScriptInIframe = (iframeDoc) => {
187
192
  try {
@@ -189,10 +194,9 @@ async function injectPwLocatorGenerator(page) {
189
194
  "https://assets-test.empirical.run/pw-selector.js",
190
195
  "https://code.jquery.com/jquery-3.7.1.min.js",
191
196
  ].forEach((url) => {
192
- const scr = iframeDoc.createElement("script");
193
- scr.src = url;
194
- console.log("Injecting script in iframe", scr);
195
- iframeDoc.head.appendChild(scr);
197
+ const script = iframeDoc.createElement("script");
198
+ script.src = trustedPolicy.createScriptURL(url);
199
+ iframeDoc.head.appendChild(script);
196
200
  });
197
201
  }
198
202
  catch (e) {
@@ -207,7 +211,8 @@ async function injectPwLocatorGenerator(page) {
207
211
  if (isVisible) {
208
212
  //@ts-ignore
209
213
  const iframeContent = iframe.contentDocument || iframe.contentWindow?.document;
210
- if (iframeContent) {
214
+ const isScriptInjected = !!iframe.contentWindow?.playwright;
215
+ if (iframeContent && !isScriptInjected) {
211
216
  injectScriptInIframe(iframeContent);
212
217
  }
213
218
  }
@@ -224,18 +229,22 @@ async function injectPwLocatorGenerator(page) {
224
229
  //@ts-ignore
225
230
  const injectScriptInIframe = (iframeDoc) => {
226
231
  try {
232
+ //@ts-ignore
233
+ //https://developer.mozilla.org/en-US/docs/Web/API/TrustedScriptURL
234
+ const trustedPolicy = window.trustedTypes.createPolicy(crypto.randomUUID(), {
235
+ createScriptURL: (url) => url,
236
+ });
227
237
  [
228
238
  "https://assets-test.empirical.run/pw-selector.js",
229
239
  "https://code.jquery.com/jquery-3.7.1.min.js",
230
240
  ].forEach((url) => {
231
241
  const scr = iframeDoc.createElement("script");
232
- scr.src = url;
233
- console.log("Injecting script in iframe", scr);
242
+ scr.src = trustedPolicy.createScriptURL(url);
234
243
  iframeDoc.head.appendChild(scr);
235
244
  });
236
245
  }
237
246
  catch (e) {
238
- console.warn("Error injecting script in iframe:");
247
+ console.warn("Error injecting script in iframe.");
239
248
  }
240
249
  };
241
250
  const iframes = document.getElementsByTagName("iframe");
@@ -246,7 +255,8 @@ async function injectPwLocatorGenerator(page) {
246
255
  if (isVisible) {
247
256
  //@ts-ignore
248
257
  const iframeContent = iframe.contentDocument || iframe.contentWindow?.document;
249
- if (iframeContent) {
258
+ const isScriptInjected = !!iframe.contentWindow?.playwright;
259
+ if (iframeContent && !isScriptInjected) {
250
260
  injectScriptInIframe(iframeContent);
251
261
  }
252
262
  }
@@ -254,7 +264,7 @@ async function injectPwLocatorGenerator(page) {
254
264
  });
255
265
  }
256
266
  catch (e) {
257
- console.warn("Error injecting script in iframe:");
267
+ console.warn("Error injecting script in iframe.");
258
268
  }
259
269
  }
260
270
  exports.injectPwLocatorGenerator = injectPwLocatorGenerator;
@@ -11,7 +11,12 @@ export declare function getUpdateTestCodeCompletion({ testCase, testFileContent,
11
11
  testFileContent: string;
12
12
  trace?: TraceClient;
13
13
  options?: TestGenConfigOptions;
14
- }): Promise<string>;
14
+ }): Promise<{
15
+ filePath: string | undefined;
16
+ oldCode: string | undefined;
17
+ newCode: string | undefined;
18
+ reason: string | undefined;
19
+ }[]>;
15
20
  export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
16
21
  export declare function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCase, testFilePath, options, trace, }: {
17
22
  trace?: TraceClient;
@@ -1 +1 @@
1
- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CAAC,MAAM,CAAC,CA6ClB;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
1
+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAoB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
@@ -128,22 +128,24 @@ async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFile
128
128
  scenarioFile: testFilePath,
129
129
  currentScenarioCodeBlock,
130
130
  });
131
- promptSpan?.end({ output: { instruction } });
132
131
  const llm = new llm_1.LLM({
133
132
  trace,
134
- provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
135
- defaultModel: options?.model || constants_1.DEFAULT_MODEL,
136
- providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
133
+ provider: "anthropic",
134
+ defaultModel: "claude-3-5-sonnet-latest",
135
+ providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
137
136
  });
138
137
  const firstShotMessage = await llm.createChatCompletion({
139
138
  messages: instruction,
140
139
  modelParameters: {
141
140
  ...constants_1.DEFAULT_MODEL_PARAMETERS,
142
141
  ...options?.modelParameters,
142
+ temperature: 0,
143
143
  },
144
144
  });
145
145
  let response = firstShotMessage?.content || "";
146
- return response;
146
+ const fileChanges = (0, utils_1.extractTestUpdates)(response);
147
+ promptSpan?.end({ output: fileChanges });
148
+ return fileChanges;
147
149
  }
148
150
  exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
149
151
  async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
@@ -176,12 +178,11 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
176
178
  name: "update-test",
177
179
  input: request,
178
180
  });
179
- const response = await getUpdateTestCodeCompletion({
181
+ const fileChanges = await getUpdateTestCodeCompletion({
180
182
  ...request,
181
183
  trace: updateTestSpan,
182
184
  });
183
185
  logger.success("Test generated successfully!");
184
- const fileChanges = (0, utils_1.extractTestUpdates)(response);
185
186
  await applyFileChanges({
186
187
  validateTypes: validate,
187
188
  trace: updateTestSpan,
@@ -199,7 +200,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
199
200
  ...testCase,
200
201
  updatedFiles: fileChanges.map((f) => f.filePath),
201
202
  });
202
- updateTestSpan?.end({ output: { response } });
203
+ updateTestSpan?.end({ output: { fileChanges } });
203
204
  await (0, llm_1.flushAllTraces)();
204
205
  return generatedTestCases;
205
206
  }
@@ -16,7 +16,10 @@ export declare function getNextAction({ task, executedActions, failedActions, pa
16
16
  actions: PlaywrightActions;
17
17
  disableSkills: boolean;
18
18
  useHints: boolean;
19
- annotations?: string[];
19
+ annotations?: {
20
+ elementID: string;
21
+ text: string;
22
+ }[];
20
23
  }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
21
24
  export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
22
25
  task: string;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB,2FA2FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAsTA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACrD,2FA6FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAiUA"}
@@ -52,8 +52,10 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
52
52
  failedActions: failedActions.map((a) => a).join("\n"),
53
53
  executedActions: executedActions.map((a) => a).join("\n"),
54
54
  pageUrl,
55
- annotations,
56
- }, 24);
55
+ annotations: annotations
56
+ ?.map((a) => `${a.elementID}:${a.text}`)
57
+ .join("\n"),
58
+ }, 27);
57
59
  // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
58
60
  const userMessage = promptMessages.filter((m) => m.role === "user")[0];
59
61
  const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
@@ -175,7 +177,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
175
177
  const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
176
178
  trace: masterAgentSpan,
177
179
  task,
178
- conversation: ["Successfully executed actions", ...masterAgentActions],
180
+ conversation: [...masterAgentActions],
179
181
  pages: getPageVariables(actions.getStateVariables()),
180
182
  currentPage: (0, utils_1.getPageVarName)(),
181
183
  });
@@ -210,8 +212,18 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
210
212
  // @ts-ignore
211
213
  // eslint-disable-next-line no-undef
212
214
  window.annotationInstance = annotateClickableElements();
215
+ const annotations = Object.entries(
213
216
  // @ts-ignore
214
- return Object.keys(window.annotationInstance.annotations);
217
+ window.annotationInstance.annotations).map(([key, value]) => ({
218
+ elementID: key, // Assign the key to elementID
219
+ text:
220
+ //@ts-ignore
221
+ value.node.text?.trim() ||
222
+ //@ts-ignore
223
+ value.node.textContent?.trim() ||
224
+ "<This is an icon or image. Check the screenshot>",
225
+ }));
226
+ return annotations;
215
227
  });
216
228
  await page.waitForTimeout(2000);
217
229
  const annonationBuffer = await page.screenshot({
@@ -13,7 +13,10 @@ export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnno
13
13
  action: string;
14
14
  elementAnnotation?: string;
15
15
  };
16
- generatedAnnotations: Record<string, any>;
16
+ generatedAnnotations: {
17
+ elementID: string;
18
+ text: string;
19
+ }[];
17
20
  page: TestGenPage;
18
21
  llm: LLM;
19
22
  trace?: TraceClient | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAuGA,CAAC"}
1
+ {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE;UACrD,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAwGA,CAAC"}
@@ -37,7 +37,7 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
37
37
  try {
38
38
  const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
39
39
  outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
40
- generatedAnnotations?.includes(outputFromGetNextAction?.elementAnnotation);
40
+ generatedAnnotations.some((annotation) => annotation.elementID === outputFromGetNextAction?.elementAnnotation);
41
41
  trace?.event({
42
42
  name: "has-element-annotation",
43
43
  output: {
@@ -1 +1 @@
1
- {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GA6GA"}
1
+ {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAiHA"}
@@ -22,14 +22,14 @@ async function runtimePlanner({ trace, task, conversation, pages, currentPage, }
22
22
  {
23
23
  role: "system",
24
24
  content: `
25
- Given a conversation that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the conversation and determine if the entire task is completed.
26
- These conversations are between AI agents using Playwright to execute actions on browser. These agents already have access to browser tabs to execute steps. The successfully executed steps on browser post browser has opened, is provided to you as conversation.
25
+ Given a successfully executed actions that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the list and determine if the entire task is completed.
26
+ These actions are executed by AI agents using Playwright on a browser. These agents already have access to browser tabs to execute actions. The successfully executed actions on browser post browser has opened, is provided to you as successfully executed actions.
27
27
 
28
28
  If the task is not fully completed, identify which specific actions are missing and suggest next steps to complete the task. Assume that the conversation provided is entirely truthful and no additional actions were performed beyond those listed.
29
29
 
30
30
  To fulfil your goal, follow these steps:
31
31
  - Divide the task into individual actions.
32
- - Compare each task action against the actions listed in the conversation.
32
+ - Compare each task action against the actions listed in the successfully executed actions list.
33
33
  - Identify which actions have been executed and which have not.
34
34
  - If all actions are executed, respond with the task as done.
35
35
  - If any actions are missing, respond with the task as not done, listing all actions and specifying which are complete and which are missing.
@@ -41,9 +41,13 @@ To fulfil your goal, follow these steps:
41
41
  content: `
42
42
  Task: ${task}
43
43
 
44
- Conversation:
44
+ ----
45
+
46
+ Following are successfully executed actions:
45
47
  ${conversation.join("\n")}
46
48
 
49
+ ----
50
+
47
51
  Current page:
48
52
  ${currentPage}
49
53
  `,
@@ -1 +1 @@
1
- {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAiDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
1
+ {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAqDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
@@ -5,10 +5,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
7
7
  const update_flow_1 = require("../agent/codegen/update-flow");
8
- const utils_1 = require("../agent/codegen/utils");
9
8
  const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
10
9
  const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
11
- const response = await (0, update_flow_1.getUpdateTestCodeCompletion)({
10
+ const fileChanges = await (0, update_flow_1.getUpdateTestCodeCompletion)({
12
11
  testCase,
13
12
  testFiles,
14
13
  pageFiles,
@@ -16,8 +15,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
16
15
  testFileContent,
17
16
  trace,
18
17
  });
19
- const fileChanges = (0, utils_1.extractTestUpdates)(response);
20
- const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
18
+ const expectedFileChanges = item.expectedOutput;
21
19
  const fileChangeCount = fileChanges.length;
22
20
  const expectedFileChangeCount = expectedFileChanges.length;
23
21
  const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
@@ -43,7 +41,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
43
41
  value: score,
44
42
  },
45
43
  ],
46
- output: response,
44
+ output: fileChanges,
47
45
  };
48
46
  };
49
47
  exports.default = updateScenarioCodeAgentEvaluate;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.38.13",
3
+ "version": "0.38.16",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -58,9 +58,9 @@
58
58
  "ts-morph": "^24.0.0",
59
59
  "tsx": "^4.16.2",
60
60
  "typescript": "^5.3.3",
61
- "@empiricalrun/llm": "^0.9.25",
62
- "@empiricalrun/r2-uploader": "^0.3.6",
63
- "@empiricalrun/reporter": "^0.21.3"
61
+ "@empiricalrun/llm": "^0.9.26",
62
+ "@empiricalrun/reporter": "^0.21.3",
63
+ "@empiricalrun/r2-uploader": "^0.3.6"
64
64
  },
65
65
  "devDependencies": {
66
66
  "@types/detect-port": "^1.3.5",