npm - @empiricalrun/test-gen - Versions diffs - 0.42.29 → 0.43.1 - Mend

@empiricalrun/test-gen 0.42.29 → 0.43.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +17 -0
package/README.md +3 -3
package/dist/agent/diagnosis-agent/index.d.ts +18 -0
package/dist/agent/diagnosis-agent/index.d.ts.map +1 -0
package/dist/agent/diagnosis-agent/index.js +105 -0
package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts +9 -0
package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map +1 -0
package/dist/agent/diagnosis-agent/strict-mode-violation.js +31 -0
package/dist/bin/index.js +17 -2
package/dist/bin/utils/index.d.ts +1 -1
package/dist/bin/utils/index.d.ts.map +1 -1
package/dist/bin/utils/index.js +16 -3
package/dist/bin/utils/scenarios/index.d.ts +1 -2
package/dist/bin/utils/scenarios/index.d.ts.map +1 -1
package/dist/bin/utils/scenarios/index.js +1 -1
package/dist/index.js +2 -2
package/package.json +4 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,22 @@
 # @empiricalrun/test-gen
+## 0.43.1
+### Patch Changes
+- 64f275a: feat: add `--token` arg name to cli interface
+## 0.43.0
+### Minor Changes
+- 46c0dab: feat: autofix workflow v1
+### Patch Changes
+- Updated dependencies [46c0dab]
+  - @empiricalrun/llm@0.9.35
 ## 0.42.29
 ### Patch Changes

package/README.md CHANGED Viewed

@@ -8,13 +8,13 @@ Our agents that generate Playwright tests. There are 2 agents
 ## Usage
 ```sh
-npx @empiricalrun/test-gen TEST_GEN_TOKEN
+npx @empiricalrun/test-gen --token TEST_GEN_TOKEN
 ```
 ### Add new test
 ```sh
-npx @empiricalrun/test-gen TEST_GEN_TOKEN
+npx @empiricalrun/test-gen --token TEST_GEN_TOKEN
 ```
 - This will trigger browsing agent to write a new test for this scenario
@@ -23,7 +23,7 @@ npx @empiricalrun/test-gen TEST_GEN_TOKEN
 ### Update existing test
 ```sh
-npx @empiricalrun/test-gen TEST_GEN_TOKEN
+npx @empiricalrun/test-gen --token TEST_GEN_TOKEN
 ```
 - If the test case is already present in the file, the test gen agent will update the existing test as per the steps provided in the payload

package/dist/agent/diagnosis-agent/index.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { TraceClient } from "@empiricalrun/llm";
+import { TestErrorDiagnosisDetails, TestGenConfigOptions } from "@empiricalrun/shared-types";
+import { CustomLogger } from "../../bin/logger";
+/**
+ *
+ * inputs
+ * - task
+ * - diagnosis
+ */
+export declare function createTaskUsingFailureDiagnosis({ options, trace, diagnosis, logger, }: {
+    options?: TestGenConfigOptions;
+    trace?: TraceClient;
+    diagnosis: TestErrorDiagnosisDetails;
+    logger?: CustomLogger;
+}): Promise<{
+    task: string;
+}>;
+//# sourceMappingURL=index.d.ts.map

package/dist/agent/diagnosis-agent/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EACL,yBAAyB,EACzB,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAuChD;;;;;GAKG;AACH,wBAAsB,+BAA+B,CAAC,EACpD,OAAO,EACP,KAAK,EACL,SAAS,EACT,MAAM,GACP,EAAE;IACD,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,EAAE,yBAAyB,CAAC;IACrC,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CA8E5B"}

package/dist/agent/diagnosis-agent/index.js ADDED Viewed

@@ -0,0 +1,105 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.createTaskUsingFailureDiagnosis = void 0;
+const llm_1 = require("@empiricalrun/llm");
+const session_1 = require("../../session");
+const strict_mode_violation_1 = require("./strict-mode-violation");
+const session = (0, session_1.getSessionDetails)();
+const responseFormat = {
+    type: "json_schema",
+    json_schema: {
+        name: "test-case-auto-fix-summary",
+        strict: true,
+        schema: {
+            type: "object",
+            properties: {
+                observation: {
+                    type: "array",
+                    items: {
+                        type: "string",
+                    },
+                    description: "Detailed observation of what changed between successful and failed test screenshots",
+                },
+                action: {
+                    type: "string",
+                    description: "Direct action to fix the test in natural language without code snippets or options",
+                },
+            },
+            required: ["observation", "action"],
+            additionalProperties: false,
+        },
+    },
+};
+/**
+ *
+ * inputs
+ * - task
+ * - diagnosis
+ */
+async function createTaskUsingFailureDiagnosis({ options, trace, diagnosis, logger, }) {
+    trace =
+        trace ||
+            llm_1.langfuseInstance?.trace({
+                name: "infer-agent-task",
+                id: crypto.randomUUID(),
+                release: session.version,
+            });
+    const failureDiagnosisSpan = trace?.span({
+        name: "auto-fix",
+        input: {
+            diagnosisId: diagnosis.diagnosisId,
+            prjRepoName: options?.metadata.projectRepoName,
+        },
+    });
+    logger?.log("Trying to fix the test using failure diagnosis. Fetching key moments of the diagnosis");
+    const resp = await fetch(diagnosis.keyMomentsUrl);
+    // TODO: check for response to be not ok
+    if (resp.ok) {
+        logger?.success("Successfully fetched key moments of the diagnosis");
+    }
+    else {
+        logger?.warn("Failed to fetch key moments of the diagnosis");
+    }
+    const screenshotsData = await resp.json();
+    const llm = new llm_1.LLM({
+        provider: "openai",
+        defaultModel: "o1",
+        trace,
+    });
+    // TODO: make this dynamic in nature. the prompts should be made receipe
+    // which will help to get rid of if else logic
+    // receipe to have:
+    // 1. selection criteria
+    // 2. job to be done - in this case generate a prompt
+    let prompt;
+    if (diagnosis.failed_run_metadata.stack.includes("strict mode violation")) {
+        prompt = (0, strict_mode_violation_1.fixStrictModeViolationPrompt)({
+            screenshotsData,
+            diagnosis,
+        });
+    }
+    if (prompt) {
+        const llmResponse = await llm.createChatCompletion({
+            messages: prompt,
+            modelParameters: {
+                max_completion_tokens: 40000,
+            },
+            responseFormat,
+        });
+        const { observation, action } = JSON.parse(llmResponse?.content);
+        failureDiagnosisSpan?.update({
+            output: {
+                observation,
+                action,
+            },
+        });
+        return {
+            task: action,
+        };
+    }
+    // TODO: handle default prompt
+    return {
+        task: "",
+    };
+}
+exports.createTaskUsingFailureDiagnosis = createTaskUsingFailureDiagnosis;

package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { TestErrorDiagnosisDetails } from "@empiricalrun/shared-types";
+export declare function fixStrictModeViolationPrompt({ screenshotsData, diagnosis, }: {
+    screenshotsData: {
+        success: string[];
+        failure: string[];
+    };
+    diagnosis: TestErrorDiagnosisDetails;
+}): import("openai/resources/index.mjs").ChatCompletionMessageParam[];
+//# sourceMappingURL=strict-mode-violation.d.ts.map

package/dist/agent/diagnosis-agent/strict-mode-violation.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"strict-mode-violation.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/strict-mode-violation.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAoBvE,wBAAgB,4BAA4B,CAAC,EAC3C,eAAe,EACf,SAAS,GACV,EAAE;IACD,eAAe,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAC1D,SAAS,EAAE,yBAAyB,CAAC;CACtC,qEAiBA"}

package/dist/agent/diagnosis-agent/strict-mode-violation.js ADDED Viewed

@@ -0,0 +1,31 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.fixStrictModeViolationPrompt = void 0;
+const llm_1 = require("@empiricalrun/llm");
+const promptTemplate_0 = "{{#section \"system\"}}\nAs a software engineer, your task is to identify a fix for a failing Playwright test by analyzing screenshots of both the failed and successful test steps.\n\nKey issues for test failures include:\n- Duplicate elements for the same Playwright selector.\n\nInstructions:\n1. Examine the provided successful and failed test screenshots.\n2. Identify the correct element for action based on these observations on the successful test run screenshots.\n3. Sometimes the exact same locator is not available on failed test run screenshot, you need to identify the intent from successful test screenshots and apply that intent in failed test run screenshot to identify the right locator to interact with\n4. Evaluate the playwright selector options provided to you to execute the action. Pick the selector which best matches the intent of the test.\n5. Propose a precise action that addresses the issue.\n\nExample:\n- observation: \n  - Current step failure: await page.getByText(\"Audience\").click()\n  - Two similar buttons named \"Audience\" exist in the failed run screenshots\n  - The successful test run clicked on \"Untracked Audience\"\n  - The failed test run should click on \"Untracked Audience\"\n  - Available locators: await page.getByText(\"Untracked Audience\").click() contain the selector for Untracked Audience\n- action: Replace failing line with await page.getByText(\"Untracked Audience\").click()\n\nYour action should:\n- Be directly actionable and free of ambiguity, as it will guide another LLM to generate code.\n- Be in natural language and not just code snippet.\n- Be verified as feasible on the failure screen before responding.\n- Choose from the provided possible actions that can be executed on the failure screen.\n- Action should adhere to the format mentioned in the example, i.e. it should start with \"Replace the failing line\" and the updated code with replaced selector following it.\n\nEnsure the action is executable based on the failure screen context before providing it.\n{{/section}}\n\n{{#section \"user\"}}\nSuccessful test screenshots\n\n{{images successScreenshots}}\n\nFailed test screenshots\n\n{{images failedScreenshots}}\n\nStep where test failed:\n{{failingLine}}\n\nOptions for Playwright selectors to perform actions on a failed test screen:\n{{selectorOptions}}\n\n{{/section}}\n\n";
+function extractLocatorOptions(errorStack) {
+    // This regex matches a chain of locator API calls following the pattern:
+    // functionName(arguments) optionally chained with .functionName(arguments)
+    const regex = /aka\s+((?:[A-Za-z0-9_]+\([^)]*\)(?:\.[A-Za-z0-9_]+\([^)]*\))*))/g;
+    const options = [];
+    let match;
+    while ((match = regex.exec(errorStack)) !== null) {
+        if (match[1]) {
+            options.push(match[1]);
+        }
+    }
+    return options;
+}
+function fixStrictModeViolationPrompt({ screenshotsData, diagnosis, }) {
+    const compiledPrompt = (0, llm_1.compilePrompt)(promptTemplate_0, {
+        failingLine: diagnosis.failingLine,
+        successScreenshots: screenshotsData.success,
+        failedScreenshots: screenshotsData.failure,
+        selectorOptions: extractLocatorOptions(diagnosis.failed_run_metadata.stack).join("\n"),
+    }, {
+        imageDetail: "high",
+        modelProvider: "openai",
+    });
+    return compiledPrompt;
+}
+exports.fixStrictModeViolationPrompt = fixStrictModeViolationPrompt;

package/dist/bin/index.js CHANGED Viewed

@@ -35,6 +35,7 @@ const run_1 = require("../agent/browsing/run");
 const utils_1 = require("../agent/browsing/utils");
 const repo_edit_1 = require("../agent/codegen/repo-edit");
 const run_2 = require("../agent/codegen/run");
+const diagnosis_agent_1 = require("../agent/diagnosis-agent");
 const enrich_prompt_1 = require("../agent/enrich-prompt");
 const infer_agent_1 = require("../agent/infer-agent");
 const run_3 = require("../agent/planner/run");
@@ -112,6 +113,20 @@ async function runAgent(testGenConfig, span) {
         });
         return;
     }
+    // TODO: this needs to be moved to an orchestrator which decides what needs to be done first before executing the sub tasks
+    if (testGenConfig.testErrorDiagnosis &&
+        testGenConfig.testErrorDiagnosis.failingLine &&
+        // TODO: fix this hardcoding of user prompt - ideally its an auto fix intent
+        testCase.steps[0] == "Can you please fix the test") {
+        const { task: updatedTask } = await (0, diagnosis_agent_1.createTaskUsingFailureDiagnosis)({
+            options: testGenConfig.options,
+            trace,
+            diagnosis: testGenConfig.testErrorDiagnosis,
+        });
+        if (updatedTask) {
+            testCase.steps = [updatedTask];
+        }
+    }
     if (!agent || agent === "auto") {
         agent = await resolveAgentUsingTask({
             testCase,
@@ -162,7 +177,7 @@ async function runAgent(testGenConfig, span) {
             // this is where test gen starts executing on giving the command from ci
             const logger = new logger_1.CustomLogger({ useReporter: false });
             if (process.argv.length < 3) {
-                logger.error("Please provide path to scenarios using command:", "npx @empiricalrun/test-gen <TEST_GEN_TOKEN>");
+                logger.error("Please provide path to scenarios using command:", "npx @empiricalrun/test-gen --token <TEST_GEN_TOKEN>");
                 process.exit(1);
             }
             const { testGenConfig } = await (0, utils_2.parseCliArgs)();
@@ -172,9 +187,9 @@ async function runAgent(testGenConfig, span) {
                 generationId: testGenConfig.options?.metadata.generationId,
             });
             (0, session_1.setSessionDetails)({
+                testCaseId: testGenConfig.testCase.id,
                 sessionId: testGenConfig.options?.metadata.testSessionId,
                 generationId: testGenConfig.options?.metadata.generationId,
-                testCaseId: testGenConfig.testCase.id,
                 projectRepoName: testGenConfig.options?.metadata.projectRepoName,
             });
             let testGenFailed = false;

package/dist/bin/utils/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { TestGenConfig } from "@empiricalrun/shared-types";
-export declare function parseCliArgs(testGenToken?: string): Promise<{
+export declare function parseCliArgs(): Promise<{
     testGenConfig: TestGenConfig;
 }>;
 export declare function getTestConfigCliArg(): string;

package/dist/bin/utils/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;~~AAIhE~~,wBAAsB,YAAY~~,CAChC,YAAY,GAAE,MAA8B~~;;~~GAM7C~~;AAED,wBAAgB,mBAAmB,IAAI,MAAM,~~CAE5C~~;AAED,eAAO,MAAM,WAAW,oBAA2B,CAAC;AACpD,eAAO,MAAM,OAAO,oBAA6B,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/bin/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AAKhE,wBAAsB,YAAY;;GAMjC;AAED,wBAAgB,mBAAmB,IAAI,MAAM,CAc5C;AAED,eAAO,MAAM,WAAW,oBAA2B,CAAC;AACpD,eAAO,MAAM,OAAO,oBAA6B,CAAC"}

package/dist/bin/utils/index.js CHANGED Viewed

@@ -1,16 +1,29 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.baggage = exports.sentryTrace = exports.getTestConfigCliArg = exports.parseCliArgs = void 0;
+const logger_1 = require("../logger");
 const scenarios_1 = require("./scenarios");
-async function parseCliArgs(testGenToken = getTestConfigCliArg()) {
-    const testGenConfig = await (0, scenarios_1.loadTestConfigs)(testGenToken);
+async function parseCliArgs() {
+    let rawToken = getTestConfigCliArg();
+    const testGenConfig = (0, scenarios_1.loadTestConfigs)(rawToken);
     return {
         testGenConfig,
     };
 }
 exports.parseCliArgs = parseCliArgs;
 function getTestConfigCliArg() {
-    return process.argv[2];
+    // Check for --token parameter
+    const tokenIndex = process.argv.indexOf("--token");
+    if (tokenIndex !== -1 && process.argv[tokenIndex + 1]) {
+        const token = process.argv[tokenIndex + 1];
+        if (token)
+            return token;
+    }
+    // Fallback to legacy behavior (token as first argument)
+    const legacyToken = process.argv[2];
+    const logger = new logger_1.CustomLogger({ useReporter: false });
+    logger.warn("Using legacy token format. Consider using --token parameter instead: npx @empiricalrun/test-gen --token <TEST_GEN_TOKEN>");
+    return legacyToken;
 }
 exports.getTestConfigCliArg = getTestConfigCliArg;
 exports.sentryTrace = process.env.SENTRY_TRACE;

package/dist/bin/utils/scenarios/index.d.ts CHANGED Viewed

@@ -1,4 +1,3 @@
 import type { TestGenConfig } from "@empiricalrun/shared-types";
-declare function loadTestConfigs(testGenToken: string): Promise<TestGenConfig>;
-export { loadTestConfigs };
+export declare function loadTestConfigs(testGenToken: string): TestGenConfig;
 //# sourceMappingURL=index.d.ts.map

package/dist/bin/utils/scenarios/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/bin/utils/scenarios/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAIV,aAAa,EAEd,MAAM,4BAA4B,CAAC;AAepC,~~iBAAe~~,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,~~OAAO,CAAC,~~aAAa,~~CAAC,CAmB3E;AAED,OAAO,EAAE,eAAe,EAAE,CAAC~~"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/bin/utils/scenarios/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAIV,aAAa,EAEd,MAAM,4BAA4B,CAAC;AAepC,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,aAAa,CAmBnE"}

package/dist/bin/utils/scenarios/index.js CHANGED Viewed

@@ -1,7 +1,7 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.loadTestConfigs = void 0;
-async function loadTestConfigs(testGenToken) {
+function loadTestConfigs(testGenToken) {
     const str = decodeURIComponent(atob(testGenToken));
     const config = JSON.parse(str);
     const specPath = `./tests/${config.filePath || "index.spec.ts"}`;

package/dist/index.js CHANGED Viewed

@@ -31,7 +31,7 @@ require("./initSentry");
 const llm_1 = require("@empiricalrun/llm");
 const Sentry = __importStar(require("@sentry/node"));
 const run_1 = require("./agent/master/run");
-const utils_1 = require("./bin/utils");
+const scenarios_1 = require("./bin/utils/scenarios");
 const client_1 = __importDefault(require("./file/client"));
 const reporter_1 = require("./reporter");
 const session_1 = require("./session");
@@ -47,7 +47,7 @@ process.on("SIGTERM", async () => await flushEvents());
 async function createTest(task, page, scope) {
     const port = process.env.APP_PORT || 3030;
     const testConfigArg = process.env.TEST_GEN_TOKEN;
-    const { testGenConfig } = await (0, utils_1.parseCliArgs)(testConfigArg);
+    const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
     (0, reporter_1.setReporterConfig)({
         projectRepoName: testGenConfig.options?.metadata.projectRepoName,
         testSessionId: testGenConfig.options?.metadata.testSessionId,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@empiricalrun/test-gen",
-  "version": "0.42.29",
+  "version": "0.43.1",
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
@@ -72,9 +72,9 @@
     "ts-morph": "^23.0.0",
     "tsx": "^4.16.2",
     "typescript": "^5.3.3",
-    "@empiricalrun/reporter": "^0.23.1",
+    "@empiricalrun/llm": "^0.9.35",
     "@empiricalrun/r2-uploader": "^0.3.8",
-    "@empiricalrun/llm": "^0.9.34"
+    "@empiricalrun/reporter": "^0.23.1"
   },
   "devDependencies": {
     "@playwright/test": "1.47.1",
@@ -89,7 +89,7 @@
     "js-levenshtein": "^1.1.6",
     "playwright": "1.47.1",
     "ts-patch": "^3.3.0",
-    "@empiricalrun/shared-types": "0.0.3"
+    "@empiricalrun/shared-types": "0.0.4"
   },
   "scripts": {
     "dev": "tspc --build --watch",