npm - @empiricalrun/test-gen - Versions diffs - 0.35.4 → 0.35.6 - Mend

@empiricalrun/test-gen 0.35.4 → 0.35.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +13 -0
package/browser-injected-scripts/annotate-elements.spec.ts +258 -0
package/dist/agent/browsing/index.js +2 -2
package/dist/agent/codegen/run.d.ts +8 -0
package/dist/agent/codegen/run.d.ts.map +1 -1
package/dist/agent/codegen/run.js +38 -33
package/dist/agent/codegen/update-flow.d.ts +9 -0
package/dist/agent/codegen/update-flow.d.ts.map +1 -1
package/dist/agent/codegen/update-flow.js +44 -40
package/dist/agent/master/run.js +1 -1
package/dist/agent/verification/index.js +1 -1
package/dist/browser-injected-scripts/annotate-elements.spec.ts +258 -0
package/dist/evals/add-scenario-agent.evals.d.ts +4 -0
package/dist/evals/add-scenario-agent.evals.d.ts.map +1 -0
package/dist/evals/add-scenario-agent.evals.js +23 -0
package/dist/evals/update-scenario-agent.evals.d.ts +4 -0
package/dist/evals/update-scenario-agent.evals.d.ts.map +1 -0
package/dist/evals/update-scenario-agent.evals.js +49 -0
package/package.json +5 -2
package/playwright.config.ts +5 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # @empiricalrun/test-gen
+## 0.35.6
+### Patch Changes
+- 948f534: fix: update verification agent prompt
+## 0.35.5
+### Patch Changes
+- 8e18e5b: feat: add scenario code agent evals
+- d6f9de2: fix: add tests for annotation script
 ## 0.35.4
 ### Patch Changes

package/browser-injected-scripts/annotate-elements.spec.ts ADDED Viewed

@@ -0,0 +1,258 @@
+// @ts-nocheck
+import { test } from "@playwright/test";
+import path from "path";
+test("should annotate all links on empirical landing page", async ({
+  page,
+}) => {
+  await page.goto(
+    "https://assets-test.empirical.run/selector-hints-testing/dom-1.html",
+  );
+  await page.addScriptTag({
+    path: path.resolve(__dirname, "./annotate-elements.js"),
+  });
+  const annotations = await page.evaluate(() => {
+    const { annotations } = window.annotateClickableElements();
+    return Object.entries(annotations).map(([hint, config]) => ({
+      hint,
+      innerText: config.node.innerText,
+      tagName: config.node.tagName,
+      href: config.node.href,
+    }));
+  });
+  test.expect(annotations).toEqual([
+    {
+      hint: "A",
+      innerText: "Empirical",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/",
+    },
+    {
+      hint: "B",
+      innerText: "Blog",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/blog",
+    },
+    {
+      hint: "C",
+      innerText: "Contact us",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/contact",
+    },
+    {
+      hint: "D",
+      href: "https://dash.empirical.run/",
+      innerText: "Login ↗\n(opens in a new tab)",
+      tagName: "A",
+    },
+    {
+      hint: "E",
+      innerText: "Get early access",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/contact",
+    },
+    {
+      hint: "F",
+      innerText: "Playwright\n(opens in a new tab)",
+      tagName: "A",
+      href: "https://github.com/microsoft/playwright",
+    },
+    {
+      hint: "G",
+      innerText: "Meet with us",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/contact",
+    },
+    {
+      hint: "H",
+      innerText: "Privacy Policy",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/privacy.html",
+    },
+  ]);
+});
+test("should annotate all important items on quizizz page", async ({
+  page,
+}) => {
+  await page.goto(
+    "https://assets-test.empirical.run/selector-hints-testing/dom-2/index.html",
+  );
+  await page.addScriptTag({
+    path: path.resolve(__dirname, "./annotate-elements.js"),
+  });
+  const annotations = await page.evaluate(() => {
+    const { annotations } = window.annotateClickableElements();
+    return Object.entries(annotations).map(([hint, config]) => ({
+      hint,
+      innerText: config.node.innerText.toLowerCase().trim(),
+      tagName: config.node.tagName,
+      testId: config.node.getAttribute("data-testid"),
+      href: config.node.href,
+    }));
+  });
+  test
+    .expect(annotations.find((item) => item.innerText.includes("enter code")))
+    .toBeTruthy();
+  test
+    .expect(annotations.find((item) => item.innerText.includes("get help")))
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("create") &&
+          item.testId === "create-content-button",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("explore") &&
+          item.href === "https://quizizz.com/admin",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("library") &&
+          item.href === "https://quizizz.com/admin/my-library/createdByMe",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("reports") &&
+          item.href === "https://quizizz.com/admin/reports",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("classes") &&
+          item.href === "https://quizizz.com/admin/classes",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("accommodations") &&
+          item.href ===
+            "https://quizizz.com/admin/differentiation/accommodations",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("quizizz ai") &&
+          item.href === "https://quizizz.com/admin/quizizz-ai",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("start your free trial") &&
+          item.href === "https://quizizz.com/super-pricing",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("upgrade") &&
+          item.href === "https://quizizz.com/super-pricing?backto=/admin",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "INPUT" &&
+          item.testId === "emphasized-search-bar-input",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" &&
+          item.innerText.includes("verify details") &&
+          item.testId === "verify-profile-cta",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("for you"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("assessments"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("lessons"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" &&
+          item.innerText.includes("interactive videos"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("passages"),
+      ),
+    )
+    .toBeTruthy();
+});

package/dist/agent/browsing/index.js CHANGED Viewed

@@ -101,7 +101,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
                 await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments), toolCallsSpan);
                 executedActions.push({
                     isError: false,
-                    action: JSON.stringify(toolCall),
+                    action: JSON.parse(toolCall.function.arguments)?.reason,
                 });
                 lastActionExecTrace = "";
             }
@@ -109,7 +109,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
                 // TODO: implement feedback loop to llm
                 executedActions.push({
                     isError: true,
-                    action: JSON.stringify(toolCall.function.arguments)?.reason,
+                    action: JSON.parse(toolCall.function.arguments)?.reason,
                 });
                 lastActionExecTrace = e.message;
                 void testgenUpdatesReporter.sendMessage(e.message);

package/dist/agent/codegen/run.d.ts CHANGED Viewed

@@ -1,4 +1,12 @@
 import { TraceClient } from "@empiricalrun/llm";
 import { TestCase, TestGenConfigOptions } from "../../types";
+export declare function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }: {
+    testCase: TestCase;
+    testFiles: string;
+    pageFiles: string;
+    testFilePath: string;
+    trace?: TraceClient;
+    options?: TestGenConfigOptions;
+}): Promise<string>;
 export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[]>;
 //# sourceMappingURL=run.d.ts.map

package/dist/agent/codegen/run.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,~~CA0GrB~~"}
1	+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,mBA4BA;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAgFrB"}

package/dist/agent/codegen/run.js CHANGED Viewed

@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.generateTest = void 0;
+exports.generateTest = exports.getAddScenarioCompletion = void 0;
 const llm_1 = require("@empiricalrun/llm");
 const fs_extra_1 = __importDefault(require("fs-extra"));
 const logger_1 = require("../../bin/logger");
@@ -12,6 +12,35 @@ const web_1 = require("../../bin/utils/platform/web");
 const constants_1 = require("../../constants");
 const fix_ts_errors_1 = require("./fix-ts-errors");
 const update_flow_1 = require("./update-flow");
+async function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }) {
+    const promptSpan = trace?.span({
+        name: "add-scenario-prompt",
+    });
+    const instruction = await (0, llm_1.getPrompt)("add-scenario", {
+        testFiles: testFiles,
+        pageFiles: pageFiles,
+        scenarioName: testCase.name,
+        scenarioSteps: testCase.steps.join("\n"),
+        scenarioFile: testFilePath,
+    });
+    promptSpan?.end({ output: { instruction } });
+    const llm = new llm_1.LLM({
+        trace,
+        provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
+        defaultModel: options?.model || constants_1.DEFAULT_MODEL,
+        providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
+    });
+    const firstShotMessage = await llm.createChatCompletion({
+        messages: instruction,
+        modelParameters: {
+            ...constants_1.DEFAULT_MODEL_PARAMETERS,
+            ...options?.modelParameters,
+        },
+    });
+    let response = firstShotMessage?.content || "";
+    return response;
+}
+exports.getAddScenarioCompletion = getAddScenarioCompletion;
 async function generateTest(testCase, file, options, trace) {
     const logger = new logger_1.CustomLogger();
     if (!fs_extra_1.default.existsSync(file)) {
@@ -35,43 +64,19 @@ async function generateTest(testCase, file, options, trace) {
         name: "create-test",
         input: {
             testCase,
-            file,
-            options,
+            testFiles: codePrompt,
+            pageFiles: pomPrompt,
+            testFilePath: file,
         },
     });
-    createTestSpan?.event({
-        name: "collate-files-as-text",
-        output: {
-            codePrompt,
-            pomPrompt,
-            testFileContent,
-        },
-    });
-    const promptSpan = createTestSpan?.span({
-        name: "add-scenario-prompt",
-    });
-    const instruction = await (0, llm_1.getPrompt)("add-scenario", {
+    const response = await getAddScenarioCompletion({
+        testCase,
         testFiles: codePrompt,
         pageFiles: pomPrompt,
-        scenarioName: testCase.name,
-        scenarioSteps: testCase.steps.join("\n"),
-        scenarioFile: file,
-    });
-    promptSpan?.end({ output: { instruction } });
-    const llm = new llm_1.LLM({
-        trace,
-        provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
-        defaultModel: options.model || constants_1.DEFAULT_MODEL,
-        providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
-    });
-    const firstShotMessage = await llm.createChatCompletion({
-        messages: instruction,
-        modelParameters: {
-            ...constants_1.DEFAULT_MODEL_PARAMETERS,
-            ...options.modelParameters,
-        },
+        testFilePath: file,
+        trace: createTestSpan,
+        options,
     });
-    let response = firstShotMessage?.content || "";
     logger.success("Test generated successfully!");
     const readWriteFileSpan = trace?.span({ name: "write-to-file" });
     let contents = fs_extra_1.default.readFileSync(file, "utf-8");

package/dist/agent/codegen/update-flow.d.ts CHANGED Viewed

@@ -3,6 +3,15 @@ import { TestCase, TestGenConfigOptions } from "../../types";
 type UpdatedTestCase = TestCase & {
     updatedFiles: string[];
 };
+export declare function getUpdateTestCodeCompletion({ testCase, testFileContent, testFiles, pageFiles, testFilePath, trace, options, }: {
+    testCase: TestCase;
+    testFiles: string;
+    pageFiles: string;
+    testFilePath: string;
+    testFileContent: string;
+    trace?: TraceClient;
+    options?: TestGenConfigOptions;
+}): Promise<string>;
 export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
 export declare function appendCreateTestBlock({ testCase, file, options, trace, validateTypes, }: {
     testCase: TestCase;

package/dist/agent/codegen/update-flow.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,~~CAsG5B~~;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}
1	+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CAAC,MAAM,CAAC,CA6ClB;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA6D5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}

package/dist/agent/codegen/update-flow.js CHANGED Viewed

@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.appendCreateTestBlock = exports.updateTest = void 0;
+exports.appendCreateTestBlock = exports.updateTest = exports.getUpdateTestCodeCompletion = void 0;
 const llm_1 = require("@empiricalrun/llm");
 const crypto_1 = __importDefault(require("crypto"));
 const fs_extra_1 = __importDefault(require("fs-extra"));
@@ -103,41 +103,8 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
         logger.success(`${fileChange.filePath} file formatted successfully!`);
     }));
 }
-async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
-    const logger = new logger_1.CustomLogger({ useReporter: logging });
-    const context = await (0, context_1.contextForGeneration)(file);
-    const { codePrompt, pomPrompt, testFileContent } = context;
-    const generatedTestCases = [];
-    logger.logEmptyLine();
-    const session = (0, session_1.getSessionDetails)();
-    trace =
-        trace ||
-            llm_1.langfuseInstance?.trace({
-                name: "update-test",
-                id: crypto_1.default.randomUUID(),
-                release: session.version,
-                tags: [
-                    options?.metadata.projectName || "",
-                    options?.metadata.environment || "",
-                ].filter((s) => !!s),
-            });
-    const updateTestSpan = trace?.span({
-        name: "update-test",
-        input: {
-            testCase,
-            file,
-            options,
-        },
-    });
-    updateTestSpan?.event({
-        name: "collate-files-as-text",
-        output: {
-            codePrompt,
-            pomPrompt,
-            testFileContent,
-        },
-    });
-    const promptSpan = updateTestSpan?.span({
+async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFiles, pageFiles, testFilePath, trace, options, }) {
+    const promptSpan = trace?.span({
         name: "update-scenario-prompt",
     });
     const promptName = "update-scenario";
@@ -154,16 +121,16 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
         suites: testCase?.suites || [],
     });
     const instruction = await (0, llm_1.getPrompt)(promptName, {
-        testFiles: codePrompt,
-        pageFiles: pomPrompt,
+        testFiles: testFiles,
+        pageFiles: pageFiles,
         scenarioName,
         scenarioSteps: testCase.steps.join("\n"),
-        scenarioFile: file,
+        scenarioFile: testFilePath,
         currentScenarioCodeBlock,
     });
     promptSpan?.end({ output: { instruction } });
     const llm = new llm_1.LLM({
-        trace: updateTestSpan,
+        trace,
         provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
         defaultModel: options?.model || constants_1.DEFAULT_MODEL,
         providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
@@ -176,6 +143,43 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
         },
     });
     let response = firstShotMessage?.content || "";
+    return response;
+}
+exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
+async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
+    const logger = new logger_1.CustomLogger({ useReporter: logging });
+    const context = await (0, context_1.contextForGeneration)(file);
+    const { codePrompt, pomPrompt, testFileContent } = context;
+    const generatedTestCases = [];
+    logger.logEmptyLine();
+    const session = (0, session_1.getSessionDetails)();
+    trace =
+        trace ||
+            llm_1.langfuseInstance?.trace({
+                name: "update-test",
+                id: crypto_1.default.randomUUID(),
+                release: session.version,
+                tags: [
+                    options?.metadata.projectName || "",
+                    options?.metadata.environment || "",
+                ].filter((s) => !!s),
+            });
+    const request = {
+        testCase,
+        testFileContent,
+        testFiles: codePrompt,
+        pageFiles: pomPrompt,
+        testFilePath: file,
+        options,
+    };
+    const updateTestSpan = trace?.span({
+        name: "update-test",
+        input: request,
+    });
+    const response = await getUpdateTestCodeCompletion({
+        ...request,
+        trace: updateTestSpan,
+    });
     logger.success("Test generated successfully!");
     const fileChanges = (0, utils_1.extractTestUpdates)(response);
     await applyFileChanges({

package/dist/agent/master/run.js CHANGED Viewed

@@ -40,7 +40,7 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
         failedActions: failedActions.map((a) => a).join("\n"),
         executedActions: executedActions.map((a) => a).join("\n"),
         pageUrl,
-    }, useHints ? 16 : 14);
+    }, 18);
     // assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
     const userMessage = promptMessages.filter((m) => m.role === "user")[0];
     const systemMessage = promptMessages.filter((m) => m.role === "system")[0];

package/dist/agent/verification/index.js CHANGED Viewed

@@ -16,7 +16,7 @@ async function verificationAgent({ trace, task, conversation, }) {
     const messages = await (0, llm_1.getPrompt)("agent-steps-verification", {
         task,
         conversation: conversation.join("\n"),
-    }, 4);
+    }, 5);
     const llm = new llm_1.LLM({ provider: "openai" });
     const response = await llm.createChatCompletion({
         trace: verificationAgentSpan,

package/dist/browser-injected-scripts/annotate-elements.spec.ts ADDED Viewed

@@ -0,0 +1,258 @@
+// @ts-nocheck
+import { test } from "@playwright/test";
+import path from "path";
+test("should annotate all links on empirical landing page", async ({
+  page,
+}) => {
+  await page.goto(
+    "https://assets-test.empirical.run/selector-hints-testing/dom-1.html",
+  );
+  await page.addScriptTag({
+    path: path.resolve(__dirname, "./annotate-elements.js"),
+  });
+  const annotations = await page.evaluate(() => {
+    const { annotations } = window.annotateClickableElements();
+    return Object.entries(annotations).map(([hint, config]) => ({
+      hint,
+      innerText: config.node.innerText,
+      tagName: config.node.tagName,
+      href: config.node.href,
+    }));
+  });
+  test.expect(annotations).toEqual([
+    {
+      hint: "A",
+      innerText: "Empirical",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/",
+    },
+    {
+      hint: "B",
+      innerText: "Blog",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/blog",
+    },
+    {
+      hint: "C",
+      innerText: "Contact us",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/contact",
+    },
+    {
+      hint: "D",
+      href: "https://dash.empirical.run/",
+      innerText: "Login ↗\n(opens in a new tab)",
+      tagName: "A",
+    },
+    {
+      hint: "E",
+      innerText: "Get early access",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/contact",
+    },
+    {
+      hint: "F",
+      innerText: "Playwright\n(opens in a new tab)",
+      tagName: "A",
+      href: "https://github.com/microsoft/playwright",
+    },
+    {
+      hint: "G",
+      innerText: "Meet with us",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/contact",
+    },
+    {
+      hint: "H",
+      innerText: "Privacy Policy",
+      tagName: "A",
+      href: "https://assets-test.empirical.run/privacy.html",
+    },
+  ]);
+});
+test("should annotate all important items on quizizz page", async ({
+  page,
+}) => {
+  await page.goto(
+    "https://assets-test.empirical.run/selector-hints-testing/dom-2/index.html",
+  );
+  await page.addScriptTag({
+    path: path.resolve(__dirname, "./annotate-elements.js"),
+  });
+  const annotations = await page.evaluate(() => {
+    const { annotations } = window.annotateClickableElements();
+    return Object.entries(annotations).map(([hint, config]) => ({
+      hint,
+      innerText: config.node.innerText.toLowerCase().trim(),
+      tagName: config.node.tagName,
+      testId: config.node.getAttribute("data-testid"),
+      href: config.node.href,
+    }));
+  });
+  test
+    .expect(annotations.find((item) => item.innerText.includes("enter code")))
+    .toBeTruthy();
+  test
+    .expect(annotations.find((item) => item.innerText.includes("get help")))
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("create") &&
+          item.testId === "create-content-button",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("explore") &&
+          item.href === "https://quizizz.com/admin",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("library") &&
+          item.href === "https://quizizz.com/admin/my-library/createdByMe",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("reports") &&
+          item.href === "https://quizizz.com/admin/reports",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("classes") &&
+          item.href === "https://quizizz.com/admin/classes",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("accommodations") &&
+          item.href ===
+            "https://quizizz.com/admin/differentiation/accommodations",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("quizizz ai") &&
+          item.href === "https://quizizz.com/admin/quizizz-ai",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("start your free trial") &&
+          item.href === "https://quizizz.com/super-pricing",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.innerText.includes("upgrade") &&
+          item.href === "https://quizizz.com/super-pricing?backto=/admin",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "INPUT" &&
+          item.testId === "emphasized-search-bar-input",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" &&
+          item.innerText.includes("verify details") &&
+          item.testId === "verify-profile-cta",
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("for you"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("assessments"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("lessons"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" &&
+          item.innerText.includes("interactive videos"),
+      ),
+    )
+    .toBeTruthy();
+  test
+    .expect(
+      annotations.find(
+        (item) =>
+          item.tagName === "BUTTON" && item.innerText.includes("passages"),
+      ),
+    )
+    .toBeTruthy();
+});

package/dist/evals/add-scenario-agent.evals.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { EvaluateFn } from "./type";
+declare const addScenarioCodeAgentEvaluate: EvaluateFn;
+export default addScenarioCodeAgentEvaluate;
+//# sourceMappingURL=add-scenario-agent.evals.d.ts.map

package/dist/evals/add-scenario-agent.evals.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"add-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/add-scenario-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,4BAA4B,EAAE,UAkBnC,CAAC;AAEF,eAAe,4BAA4B,CAAC"}

package/dist/evals/add-scenario-agent.evals.js ADDED Viewed

@@ -0,0 +1,23 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const run_1 = require("../agent/codegen/run");
+const addScenarioCodeAgentEvaluate = async ({ item, trace }) => {
+    const { testCase, testFiles, pageFiles, testFilePath } = item.input;
+    const response = await (0, run_1.getAddScenarioCompletion)({
+        testCase,
+        testFiles,
+        pageFiles,
+        testFilePath,
+        trace,
+    });
+    return {
+        scores: [
+            {
+                name: "equality",
+                value: item.expectedOutput === response ? 1 : 0,
+            },
+        ],
+        output: response,
+    };
+};
+exports.default = addScenarioCodeAgentEvaluate;

package/dist/evals/update-scenario-agent.evals.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { EvaluateFn } from "./type";
+declare const updateScenarioCodeAgentEvaluate: EvaluateFn;
+export default updateScenarioCodeAgentEvaluate;
+//# sourceMappingURL=update-scenario-agent.evals.d.ts.map

package/dist/evals/update-scenario-agent.evals.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAiDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}

package/dist/evals/update-scenario-agent.evals.js ADDED Viewed

@@ -0,0 +1,49 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
+const update_flow_1 = require("../agent/codegen/update-flow");
+const utils_1 = require("../agent/codegen/utils");
+const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
+    const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
+    const response = await (0, update_flow_1.getUpdateTestCodeCompletion)({
+        testCase,
+        testFiles,
+        pageFiles,
+        testFilePath,
+        testFileContent,
+        trace,
+    });
+    const fileChanges = (0, utils_1.extractTestUpdates)(response);
+    const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
+    const fileChangeCount = fileChanges.length;
+    const expectedFileChangeCount = expectedFileChanges.length;
+    const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
+    const distanceScores = [];
+    expectedFileChanges.forEach((ef) => fileChanges.forEach((f) => {
+        if (f.filePath === ef.filePath && f.newCode && ef.newCode) {
+            const maxLength = ef.newCode.length > f.newCode.length
+                ? ef.newCode.length
+                : f.newCode.length;
+            distanceScores.push(1 - (0, js_levenshtein_1.default)(f.newCode || "", ef.newCode || "") / maxLength);
+        }
+    }));
+    let score = 0;
+    if (fileChangeCount === expectedFileChangeCount && correctFilePathChanges) {
+        score = distanceScores.length
+            ? distanceScores.reduce((agg, s) => agg * s)
+            : 0;
+    }
+    return {
+        scores: [
+            {
+                name: "score",
+                value: score,
+            },
+        ],
+        output: response,
+    };
+};
+exports.default = updateScenarioCodeAgentEvaluate;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.35.4",
+  "version": "0.35.6",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -52,8 +52,10 @@
     "@types/detect-port": "^1.3.5",
     "@types/express": "^4.17.21",
     "@types/fs-extra": "^11.0.4",
+    "@types/js-levenshtein": "^1.1.3",
     "@types/lodash.isequal": "^4.5.8",
-    "@types/md5": "^2.3.5"
+    "@types/md5": "^2.3.5",
+    "js-levenshtein": "^1.1.6"
   },
   "scripts": {
     "dev": "tsc --build --watch",
@@ -61,6 +63,7 @@
     "clean": "tsc --build --clean",
     "lint": "eslint .",
     "test": "vitest run",
+    "e2e-test": "npx playwright test",
     "test:watch": "vitest",
     "test:watch-files": "vitest $0 --watch"
   }

package/playwright.config.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import { defineConfig } from "@playwright/test";
+export default defineConfig({
+  testDir: "./browser-injected-scripts",
+});