npm - @empiricalrun/test-gen - Versions diffs - 0.38.13 → 0.38.15 - Mend

@empiricalrun/test-gen 0.38.13 → 0.38.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +15 -0
package/dist/agent/codegen/update-flow.d.ts +6 -1
package/dist/agent/codegen/update-flow.d.ts.map +1 -1
package/dist/agent/codegen/update-flow.js +9 -8
package/dist/agent/master/run.d.ts +4 -1
package/dist/agent/master/run.d.ts.map +1 -1
package/dist/agent/master/run.js +16 -4
package/dist/agent/master/with-hints.d.ts +4 -1
package/dist/agent/master/with-hints.d.ts.map +1 -1
package/dist/agent/master/with-hints.js +1 -1
package/dist/agent/planner/run-time-planner.d.ts.map +1 -1
package/dist/agent/planner/run-time-planner.js +8 -4
package/dist/evals/update-scenario-agent.evals.d.ts.map +1 -1
package/dist/evals/update-scenario-agent.evals.js +3 -5
package/package.json +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,20 @@
 # @empiricalrun/test-gen
+## 0.38.15
+### Patch Changes
+- 91ded8f: fix: incorrect annotations
+## 0.38.14
+### Patch Changes
+- bd5c945: fix: run update scenario prompts on claude
+- 31f8805: fix: runtime planner calling out actions as done
+- Updated dependencies [bd5c945]
+  - @empiricalrun/llm@0.9.26
 ## 0.38.13
 ### Patch Changes

package/dist/agent/codegen/update-flow.d.ts CHANGED Viewed

@@ -11,7 +11,12 @@ export declare function getUpdateTestCodeCompletion({ testCase, testFileContent,
     testFileContent: string;
     trace?: TraceClient;
     options?: TestGenConfigOptions;
-}): Promise<string>;
+}): Promise<{
+    filePath: string | undefined;
+    oldCode: string | undefined;
+    newCode: string | undefined;
+    reason: string | undefined;
+}[]>;
 export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
 export declare function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCase, testFilePath, options, trace, }: {
     trace?: TraceClient;

package/dist/agent/codegen/update-flow.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;~~AAsB3B~~,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CAAC,MAAM,CAAC,~~CA6ClB~~;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,~~CA6D5B~~;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
1	+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAoB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}

package/dist/agent/codegen/update-flow.js CHANGED Viewed

@@ -128,22 +128,24 @@ async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFile
         scenarioFile: testFilePath,
         currentScenarioCodeBlock,
     });
-    promptSpan?.end({ output: { instruction } });
     const llm = new llm_1.LLM({
         trace,
-        provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
-        defaultModel: options?.model || constants_1.DEFAULT_MODEL,
-        providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
+        provider: "anthropic",
+        defaultModel: "claude-3-5-sonnet-latest",
+        providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
     });
     const firstShotMessage = await llm.createChatCompletion({
         messages: instruction,
         modelParameters: {
             ...constants_1.DEFAULT_MODEL_PARAMETERS,
             ...options?.modelParameters,
+            temperature: 0,
         },
     });
     let response = firstShotMessage?.content || "";
-    return response;
+    const fileChanges = (0, utils_1.extractTestUpdates)(response);
+    promptSpan?.end({ output: fileChanges });
+    return fileChanges;
 }
 exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
 async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
@@ -176,12 +178,11 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
         name: "update-test",
         input: request,
     });
-    const response = await getUpdateTestCodeCompletion({
+    const fileChanges = await getUpdateTestCodeCompletion({
         ...request,
         trace: updateTestSpan,
     });
     logger.success("Test generated successfully!");
-    const fileChanges = (0, utils_1.extractTestUpdates)(response);
     await applyFileChanges({
         validateTypes: validate,
         trace: updateTestSpan,
@@ -199,7 +200,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
         ...testCase,
         updatedFiles: fileChanges.map((f) => f.filePath),
     });
-    updateTestSpan?.end({ output: { response } });
+    updateTestSpan?.end({ output: { fileChanges } });
     await (0, llm_1.flushAllTraces)();
     return generatedTestCases;
 }

package/dist/agent/master/run.d.ts CHANGED Viewed

@@ -16,7 +16,10 @@ export declare function getNextAction({ task, executedActions, failedActions, pa
     actions: PlaywrightActions;
     disableSkills: boolean;
     useHints: boolean;
-    annotations?: string[];
+    annotations?: {
+        elementID: string;
+        text: string;
+    }[];
 }): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
 export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
     task: string;

package/dist/agent/master/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;~~CACxB~~,~~2FA2FA~~;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;~~GAsTA~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACrD,2FA6FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAiUA"}

package/dist/agent/master/run.js CHANGED Viewed

@@ -52,8 +52,10 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
         failedActions: failedActions.map((a) => a).join("\n"),
         executedActions: executedActions.map((a) => a).join("\n"),
         pageUrl,
-        annotations,
-    }, 24);
+        annotations: annotations
+            ?.map((a) => `${a.elementID}:${a.text}`)
+            .join("\n"),
+    }, 27);
     // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
     const userMessage = promptMessages.filter((m) => m.role === "user")[0];
     const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
@@ -175,7 +177,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
         const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
             trace: masterAgentSpan,
             task,
-            conversation: ["Successfully executed actions", ...masterAgentActions],
+            conversation: [...masterAgentActions],
             pages: getPageVariables(actions.getStateVariables()),
             currentPage: (0, utils_1.getPageVarName)(),
         });
@@ -210,8 +212,18 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
                 // @ts-ignore
                 // eslint-disable-next-line no-undef
                 window.annotationInstance = annotateClickableElements();
+                const annotations = Object.entries(
                 // @ts-ignore
-                return Object.keys(window.annotationInstance.annotations);
+                window.annotationInstance.annotations).map(([key, value]) => ({
+                    elementID: key, // Assign the key to elementID
+                    text:
+                    //@ts-ignore
+                    value.node.text?.trim() ||
+                        //@ts-ignore
+                        value.node.textContent?.trim() ||
+                        "<This is an icon or image. Check the screenshot>",
+                }));
+                return annotations;
             });
             await page.waitForTimeout(2000);
             const annonationBuffer = await page.screenshot({

package/dist/agent/master/with-hints.d.ts CHANGED Viewed

@@ -13,7 +13,10 @@ export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnno
         action: string;
         elementAnnotation?: string;
     };
-    generatedAnnotations: Record<string, any>;
+    generatedAnnotations: {
+        elementID: string;
+        text: string;
+    }[];
     page: TestGenPage;
     llm: LLM;
     trace?: TraceClient | undefined;

package/dist/agent/master/with-hints.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,~~OAAO~~,MAAM,EAAE,~~GAAG~~,~~CAAC~~;~~UACnC~~,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,~~CAuGA~~,CAAC"}
1	+ {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE;UACrD,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAwGA,CAAC"}

package/dist/agent/master/with-hints.js CHANGED Viewed

@@ -37,7 +37,7 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
     try {
         const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
             outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
-            generatedAnnotations?.includes(outputFromGetNextAction?.elementAnnotation);
+            generatedAnnotations.some((annotation) => annotation.elementID === outputFromGetNextAction?.elementAnnotation);
         trace?.event({
             name: "has-element-annotation",
             output: {

package/dist/agent/planner/run-time-planner.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;~~GA6GA~~"}
1	+ {"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAiHA"}

package/dist/agent/planner/run-time-planner.js CHANGED Viewed

@@ -22,14 +22,14 @@ async function runtimePlanner({ trace, task, conversation, pages, currentPage, }
         {
             role: "system",
             content: `
-Given a conversation that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the conversation and determine if the entire task is completed.
-These conversations are between AI agents using Playwright to execute actions on browser. These agents already have access to browser tabs to execute steps. The successfully executed steps on browser post browser has opened, is provided to you as conversation.
+Given a successfully executed actions that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the list and determine if the entire task is completed.
+These actions are executed by AI agents using Playwright on a browser. These agents already have access to browser tabs to execute actions. The successfully executed actions on browser post browser has opened, is provided to you as successfully executed actions.
 If the task is not fully completed, identify which specific actions are missing and suggest next steps to complete the task. Assume that the conversation provided is entirely truthful and no additional actions were performed beyond those listed.
 To fulfil your goal, follow these steps:
 - Divide the task into individual actions.
-- Compare each task action against the actions listed in the conversation.
+- Compare each task action against the actions listed in the successfully executed actions list.
 - Identify which actions have been executed and which have not.
 - If all actions are executed, respond with the task as done.
 - If any actions are missing, respond with the task as not done, listing all actions and specifying which are complete and which are missing.
@@ -41,9 +41,13 @@ To fulfil your goal, follow these steps:
             content: `
 Task: ${task}
-Conversation:
+----
+Following are successfully executed actions:
 ${conversation.join("\n")}
+----
 Current page:
 ${currentPage}
         `,

package/dist/evals/update-scenario-agent.evals.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"~~AAKA~~,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,~~UAiDtC~~,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
1	+ {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAqDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}

package/dist/evals/update-scenario-agent.evals.js CHANGED Viewed

@@ -5,10 +5,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
 const update_flow_1 = require("../agent/codegen/update-flow");
-const utils_1 = require("../agent/codegen/utils");
 const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
     const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
-    const response = await (0, update_flow_1.getUpdateTestCodeCompletion)({
+    const fileChanges = await (0, update_flow_1.getUpdateTestCodeCompletion)({
         testCase,
         testFiles,
         pageFiles,
@@ -16,8 +15,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
         testFileContent,
         trace,
     });
-    const fileChanges = (0, utils_1.extractTestUpdates)(response);
-    const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
+    const expectedFileChanges = item.expectedOutput;
     const fileChangeCount = fileChanges.length;
     const expectedFileChangeCount = expectedFileChanges.length;
     const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
@@ -43,7 +41,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
                 value: score,
             },
         ],
-        output: response,
+        output: fileChanges,
     };
 };
 exports.default = updateScenarioCodeAgentEvaluate;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.38.13",
+  "version": "0.38.15",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -58,7 +58,7 @@
     "ts-morph": "^24.0.0",
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
-    "@empiricalrun/llm": "^0.9.25",
+    "@empiricalrun/llm": "^0.9.26",
     "@empiricalrun/r2-uploader": "^0.3.6",
     "@empiricalrun/reporter": "^0.21.3"
   },