@empiricalrun/test-gen 0.38.13 → 0.38.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.38.15
4
+
5
+ ### Patch Changes
6
+
7
+ - 91ded8f: fix: incorrect annotations
8
+
9
+ ## 0.38.14
10
+
11
+ ### Patch Changes
12
+
13
+ - bd5c945: fix: run update scenario prompts on claude
14
+ - 31f8805: fix: runtime planner calling out actions as done
15
+ - Updated dependencies [bd5c945]
16
+ - @empiricalrun/llm@0.9.26
17
+
3
18
  ## 0.38.13
4
19
 
5
20
  ### Patch Changes
@@ -11,7 +11,12 @@ export declare function getUpdateTestCodeCompletion({ testCase, testFileContent,
11
11
  testFileContent: string;
12
12
  trace?: TraceClient;
13
13
  options?: TestGenConfigOptions;
14
- }): Promise<string>;
14
+ }): Promise<{
15
+ filePath: string | undefined;
16
+ oldCode: string | undefined;
17
+ newCode: string | undefined;
18
+ reason: string | undefined;
19
+ }[]>;
15
20
  export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
16
21
  export declare function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCase, testFilePath, options, trace, }: {
17
22
  trace?: TraceClient;
@@ -1 +1 @@
1
- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CAAC,MAAM,CAAC,CA6ClB;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
1
+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAoB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
@@ -128,22 +128,24 @@ async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFile
128
128
  scenarioFile: testFilePath,
129
129
  currentScenarioCodeBlock,
130
130
  });
131
- promptSpan?.end({ output: { instruction } });
132
131
  const llm = new llm_1.LLM({
133
132
  trace,
134
- provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
135
- defaultModel: options?.model || constants_1.DEFAULT_MODEL,
136
- providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
133
+ provider: "anthropic",
134
+ defaultModel: "claude-3-5-sonnet-latest",
135
+ providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
137
136
  });
138
137
  const firstShotMessage = await llm.createChatCompletion({
139
138
  messages: instruction,
140
139
  modelParameters: {
141
140
  ...constants_1.DEFAULT_MODEL_PARAMETERS,
142
141
  ...options?.modelParameters,
142
+ temperature: 0,
143
143
  },
144
144
  });
145
145
  let response = firstShotMessage?.content || "";
146
- return response;
146
+ const fileChanges = (0, utils_1.extractTestUpdates)(response);
147
+ promptSpan?.end({ output: fileChanges });
148
+ return fileChanges;
147
149
  }
148
150
  exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
149
151
  async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
@@ -176,12 +178,11 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
176
178
  name: "update-test",
177
179
  input: request,
178
180
  });
179
- const response = await getUpdateTestCodeCompletion({
181
+ const fileChanges = await getUpdateTestCodeCompletion({
180
182
  ...request,
181
183
  trace: updateTestSpan,
182
184
  });
183
185
  logger.success("Test generated successfully!");
184
- const fileChanges = (0, utils_1.extractTestUpdates)(response);
185
186
  await applyFileChanges({
186
187
  validateTypes: validate,
187
188
  trace: updateTestSpan,
@@ -199,7 +200,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
199
200
  ...testCase,
200
201
  updatedFiles: fileChanges.map((f) => f.filePath),
201
202
  });
202
- updateTestSpan?.end({ output: { response } });
203
+ updateTestSpan?.end({ output: { fileChanges } });
203
204
  await (0, llm_1.flushAllTraces)();
204
205
  return generatedTestCases;
205
206
  }
@@ -16,7 +16,10 @@ export declare function getNextAction({ task, executedActions, failedActions, pa
16
16
  actions: PlaywrightActions;
17
17
  disableSkills: boolean;
18
18
  useHints: boolean;
19
- annotations?: string[];
19
+ annotations?: {
20
+ elementID: string;
21
+ text: string;
22
+ }[];
20
23
  }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
21
24
  export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
22
25
  task: string;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB,2FA2FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAsTA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACrD,2FA6FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAiUA"}
@@ -52,8 +52,10 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
52
52
  failedActions: failedActions.map((a) => a).join("\n"),
53
53
  executedActions: executedActions.map((a) => a).join("\n"),
54
54
  pageUrl,
55
- annotations,
56
- }, 24);
55
+ annotations: annotations
56
+ ?.map((a) => `${a.elementID}:${a.text}`)
57
+ .join("\n"),
58
+ }, 27);
57
59
  // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
58
60
  const userMessage = promptMessages.filter((m) => m.role === "user")[0];
59
61
  const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
@@ -175,7 +177,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
175
177
  const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
176
178
  trace: masterAgentSpan,
177
179
  task,
178
- conversation: ["Successfully executed actions", ...masterAgentActions],
180
+ conversation: [...masterAgentActions],
179
181
  pages: getPageVariables(actions.getStateVariables()),
180
182
  currentPage: (0, utils_1.getPageVarName)(),
181
183
  });
@@ -210,8 +212,18 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
210
212
  // @ts-ignore
211
213
  // eslint-disable-next-line no-undef
212
214
  window.annotationInstance = annotateClickableElements();
215
+ const annotations = Object.entries(
213
216
  // @ts-ignore
214
- return Object.keys(window.annotationInstance.annotations);
217
+ window.annotationInstance.annotations).map(([key, value]) => ({
218
+ elementID: key, // Assign the key to elementID
219
+ text:
220
+ //@ts-ignore
221
+ value.node.text?.trim() ||
222
+ //@ts-ignore
223
+ value.node.textContent?.trim() ||
224
+ "<This is an icon or image. Check the screenshot>",
225
+ }));
226
+ return annotations;
215
227
  });
216
228
  await page.waitForTimeout(2000);
217
229
  const annonationBuffer = await page.screenshot({
@@ -13,7 +13,10 @@ export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnno
13
13
  action: string;
14
14
  elementAnnotation?: string;
15
15
  };
16
- generatedAnnotations: Record<string, any>;
16
+ generatedAnnotations: {
17
+ elementID: string;
18
+ text: string;
19
+ }[];
17
20
  page: TestGenPage;
18
21
  llm: LLM;
19
22
  trace?: TraceClient | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAuGA,CAAC"}
1
+ {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE;UACrD,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAwGA,CAAC"}
@@ -37,7 +37,7 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
37
37
  try {
38
38
  const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
39
39
  outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
40
- generatedAnnotations?.includes(outputFromGetNextAction?.elementAnnotation);
40
+ generatedAnnotations.some((annotation) => annotation.elementID === outputFromGetNextAction?.elementAnnotation);
41
41
  trace?.event({
42
42
  name: "has-element-annotation",
43
43
  output: {
@@ -1 +1 @@
1
- {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GA6GA"}
1
+ {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAiHA"}
@@ -22,14 +22,14 @@ async function runtimePlanner({ trace, task, conversation, pages, currentPage, }
22
22
  {
23
23
  role: "system",
24
24
  content: `
25
- Given a conversation that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the conversation and determine if the entire task is completed.
26
- These conversations are between AI agents using Playwright to execute actions on browser. These agents already have access to browser tabs to execute steps. The successfully executed steps on browser post browser has opened, is provided to you as conversation.
25
+ Given a successfully executed actions that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the list and determine if the entire task is completed.
26
+ These actions are executed by AI agents using Playwright on a browser. These agents already have access to browser tabs to execute actions. The successfully executed actions on browser post browser has opened, is provided to you as successfully executed actions.
27
27
 
28
28
  If the task is not fully completed, identify which specific actions are missing and suggest next steps to complete the task. Assume that the conversation provided is entirely truthful and no additional actions were performed beyond those listed.
29
29
 
30
30
  To fulfil your goal, follow these steps:
31
31
  - Divide the task into individual actions.
32
- - Compare each task action against the actions listed in the conversation.
32
+ - Compare each task action against the actions listed in the successfully executed actions list.
33
33
  - Identify which actions have been executed and which have not.
34
34
  - If all actions are executed, respond with the task as done.
35
35
  - If any actions are missing, respond with the task as not done, listing all actions and specifying which are complete and which are missing.
@@ -41,9 +41,13 @@ To fulfil your goal, follow these steps:
41
41
  content: `
42
42
  Task: ${task}
43
43
 
44
- Conversation:
44
+ ----
45
+
46
+ Following are successfully executed actions:
45
47
  ${conversation.join("\n")}
46
48
 
49
+ ----
50
+
47
51
  Current page:
48
52
  ${currentPage}
49
53
  `,
@@ -1 +1 @@
1
- {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAiDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
1
+ {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAqDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
@@ -5,10 +5,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
7
7
  const update_flow_1 = require("../agent/codegen/update-flow");
8
- const utils_1 = require("../agent/codegen/utils");
9
8
  const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
10
9
  const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
11
- const response = await (0, update_flow_1.getUpdateTestCodeCompletion)({
10
+ const fileChanges = await (0, update_flow_1.getUpdateTestCodeCompletion)({
12
11
  testCase,
13
12
  testFiles,
14
13
  pageFiles,
@@ -16,8 +15,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
16
15
  testFileContent,
17
16
  trace,
18
17
  });
19
- const fileChanges = (0, utils_1.extractTestUpdates)(response);
20
- const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
18
+ const expectedFileChanges = item.expectedOutput;
21
19
  const fileChangeCount = fileChanges.length;
22
20
  const expectedFileChangeCount = expectedFileChanges.length;
23
21
  const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
@@ -43,7 +41,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
43
41
  value: score,
44
42
  },
45
43
  ],
46
- output: response,
44
+ output: fileChanges,
47
45
  };
48
46
  };
49
47
  exports.default = updateScenarioCodeAgentEvaluate;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.38.13",
3
+ "version": "0.38.15",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -58,7 +58,7 @@
58
58
  "ts-morph": "^24.0.0",
59
59
  "tsx": "^4.16.2",
60
60
  "typescript": "^5.3.3",
61
- "@empiricalrun/llm": "^0.9.25",
61
+ "@empiricalrun/llm": "^0.9.26",
62
62
  "@empiricalrun/r2-uploader": "^0.3.6",
63
63
  "@empiricalrun/reporter": "^0.21.3"
64
64
  },