@empiricalrun/test-gen 0.42.28 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.43.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 46c0dab: feat: autofix workflow v1
8
+
9
+ ### Patch Changes
10
+
11
+ - Updated dependencies [46c0dab]
12
+ - @empiricalrun/llm@0.9.35
13
+
14
+ ## 0.42.29
15
+
16
+ ### Patch Changes
17
+
18
+ - f39500b: fix: removed explicit delays from master agent
19
+
3
20
  ## 0.42.28
4
21
 
5
22
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"click.d.ts","sourceRoot":"","sources":["../../src/actions/click.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,4BAA4B,kBAAkB,CAAC;AAE5D,eAAO,MAAM,oBAAoB,EAAE,yBAuDlC,CAAC"}
1
+ {"version":3,"file":"click.d.ts","sourceRoot":"","sources":["../../src/actions/click.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,4BAA4B,kBAAkB,CAAC;AAE5D,eAAO,MAAM,oBAAoB,EAAE,yBAsDlC,CAAC"}
@@ -11,7 +11,6 @@ const clickActionGenerator = (page) => {
11
11
  const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(selector, args.xpath, page.pwPageInstance, args?.elementAnnotation);
12
12
  const exec = new Function("page", `return page.${locator}.click({ timeout: 3000 })`);
13
13
  await exec(page.pwPageInstance);
14
- await page.pwPageInstance.waitForTimeout(3000);
15
14
  return {
16
15
  locator,
17
16
  };
@@ -0,0 +1,18 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { TestErrorDiagnosisDetails, TestGenConfigOptions } from "@empiricalrun/shared-types";
3
+ import { CustomLogger } from "../../bin/logger";
4
+ /**
5
+ *
6
+ * inputs
7
+ * - task
8
+ * - diagnosis
9
+ */
10
+ export declare function createTaskUsingFailureDiagnosis({ options, trace, diagnosis, logger, }: {
11
+ options?: TestGenConfigOptions;
12
+ trace?: TraceClient;
13
+ diagnosis: TestErrorDiagnosisDetails;
14
+ logger?: CustomLogger;
15
+ }): Promise<{
16
+ task: string;
17
+ }>;
18
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EACL,yBAAyB,EACzB,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAuChD;;;;;GAKG;AACH,wBAAsB,+BAA+B,CAAC,EACpD,OAAO,EACP,KAAK,EACL,SAAS,EACT,MAAM,GACP,EAAE;IACD,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,EAAE,yBAAyB,CAAC;IACrC,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB,GAAG,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CA8E5B"}
@@ -0,0 +1,105 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createTaskUsingFailureDiagnosis = void 0;
4
+ const llm_1 = require("@empiricalrun/llm");
5
+ const session_1 = require("../../session");
6
+ const strict_mode_violation_1 = require("./strict-mode-violation");
7
+ const session = (0, session_1.getSessionDetails)();
8
+ const responseFormat = {
9
+ type: "json_schema",
10
+ json_schema: {
11
+ name: "test-case-auto-fix-summary",
12
+ strict: true,
13
+ schema: {
14
+ type: "object",
15
+ properties: {
16
+ observation: {
17
+ type: "array",
18
+ items: {
19
+ type: "string",
20
+ },
21
+ description: "Detailed observation of what changed between successful and failed test screenshots",
22
+ },
23
+ action: {
24
+ type: "string",
25
+ description: "Direct action to fix the test in natural language without code snippets or options",
26
+ },
27
+ },
28
+ required: ["observation", "action"],
29
+ additionalProperties: false,
30
+ },
31
+ },
32
+ };
33
+ /**
34
+ *
35
+ * inputs
36
+ * - task
37
+ * - diagnosis
38
+ */
39
+ async function createTaskUsingFailureDiagnosis({ options, trace, diagnosis, logger, }) {
40
+ trace =
41
+ trace ||
42
+ llm_1.langfuseInstance?.trace({
43
+ name: "infer-agent-task",
44
+ id: crypto.randomUUID(),
45
+ release: session.version,
46
+ });
47
+ const failureDiagnosisSpan = trace?.span({
48
+ name: "auto-fix",
49
+ input: {
50
+ diagnosisId: diagnosis.diagnosisId,
51
+ prjRepoName: options?.metadata.projectRepoName,
52
+ },
53
+ });
54
+ logger?.log("Trying to fix the test using failure diagnosis. Fetching key moments of the diagnosis");
55
+ const resp = await fetch(diagnosis.keyMomentsUrl);
56
+ // TODO: check for response to be not ok
57
+ if (resp.ok) {
58
+ logger?.success("Successfully fetched key moments of the diagnosis");
59
+ }
60
+ else {
61
+ logger?.warn("Failed to fetch key moments of the diagnosis");
62
+ }
63
+ const screenshotsData = await resp.json();
64
+ const llm = new llm_1.LLM({
65
+ provider: "openai",
66
+ defaultModel: "o1",
67
+ trace,
68
+ });
69
+ // TODO: make this dynamic in nature. the prompts should be made receipe
70
+ // which will help to get rid of if else logic
71
+ // receipe to have:
72
+ // 1. selection criteria
73
+ // 2. job to be done - in this case generate a prompt
74
+ let prompt;
75
+ if (diagnosis.failed_run_metadata.stack.includes("strict mode violation")) {
76
+ prompt = (0, strict_mode_violation_1.fixStrictModeViolationPrompt)({
77
+ screenshotsData,
78
+ diagnosis,
79
+ });
80
+ }
81
+ if (prompt) {
82
+ const llmResponse = await llm.createChatCompletion({
83
+ messages: prompt,
84
+ modelParameters: {
85
+ max_completion_tokens: 40000,
86
+ },
87
+ responseFormat,
88
+ });
89
+ const { observation, action } = JSON.parse(llmResponse?.content);
90
+ failureDiagnosisSpan?.update({
91
+ output: {
92
+ observation,
93
+ action,
94
+ },
95
+ });
96
+ return {
97
+ task: action,
98
+ };
99
+ }
100
+ // TODO: handle default prompt
101
+ return {
102
+ task: "",
103
+ };
104
+ }
105
+ exports.createTaskUsingFailureDiagnosis = createTaskUsingFailureDiagnosis;
@@ -0,0 +1,9 @@
1
+ import { TestErrorDiagnosisDetails } from "@empiricalrun/shared-types";
2
+ export declare function fixStrictModeViolationPrompt({ screenshotsData, diagnosis, }: {
3
+ screenshotsData: {
4
+ success: string[];
5
+ failure: string[];
6
+ };
7
+ diagnosis: TestErrorDiagnosisDetails;
8
+ }): import("openai/resources/index.mjs").ChatCompletionMessageParam[];
9
+ //# sourceMappingURL=strict-mode-violation.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"strict-mode-violation.d.ts","sourceRoot":"","sources":["../../../src/agent/diagnosis-agent/strict-mode-violation.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAoBvE,wBAAgB,4BAA4B,CAAC,EAC3C,eAAe,EACf,SAAS,GACV,EAAE;IACD,eAAe,EAAE;QAAE,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAC1D,SAAS,EAAE,yBAAyB,CAAC;CACtC,qEAiBA"}
@@ -0,0 +1,31 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.fixStrictModeViolationPrompt = void 0;
4
+ const llm_1 = require("@empiricalrun/llm");
5
+ const promptTemplate_0 = "{{#section \"system\"}}\nAs a software engineer, your task is to identify a fix for a failing Playwright test by analyzing screenshots of both the failed and successful test steps.\n\nKey issues for test failures include:\n- Duplicate elements for the same Playwright selector.\n\nInstructions:\n1. Examine the provided successful and failed test screenshots.\n2. Identify the correct element for action based on these observations on the successful test run screenshots.\n3. Sometimes the exact same locator is not available on failed test run screenshot, you need to identify the intent from successful test screenshots and apply that intent in failed test run screenshot to identify the right locator to interact with\n4. Evaluate the playwright selector options provided to you to execute the action. Pick the selector which best matches the intent of the test.\n5. Propose a precise action that addresses the issue.\n\nExample:\n- observation: \n - Current step failure: await page.getByText(\"Audience\").click()\n - Two similar buttons named \"Audience\" exist in the failed run screenshots\n - The successful test run clicked on \"Untracked Audience\"\n - The failed test run should click on \"Untracked Audience\"\n - Available locators: await page.getByText(\"Untracked Audience\").click() contain the selector for Untracked Audience\n- action: Replace failing line with await page.getByText(\"Untracked Audience\").click()\n\nYour action should:\n- Be directly actionable and free of ambiguity, as it will guide another LLM to generate code.\n- Be in natural language and not just code snippet.\n- Be verified as feasible on the failure screen before responding.\n- Choose from the provided possible actions that can be executed on the failure screen.\n- Action should adhere to the format mentioned in the example, i.e. it should start with \"Replace the failing line\" and the updated code with replaced selector following it.\n\nEnsure the action is executable based on the failure screen context before providing it.\n{{/section}}\n\n{{#section \"user\"}}\nSuccessful test screenshots\n\n{{images successScreenshots}}\n\nFailed test screenshots\n\n{{images failedScreenshots}}\n\nStep where test failed:\n{{failingLine}}\n\nOptions for Playwright selectors to perform actions on a failed test screen:\n{{selectorOptions}}\n\n{{/section}}\n\n";
6
+ function extractLocatorOptions(errorStack) {
7
+ // This regex matches a chain of locator API calls following the pattern:
8
+ // functionName(arguments) optionally chained with .functionName(arguments)
9
+ const regex = /aka\s+((?:[A-Za-z0-9_]+\([^)]*\)(?:\.[A-Za-z0-9_]+\([^)]*\))*))/g;
10
+ const options = [];
11
+ let match;
12
+ while ((match = regex.exec(errorStack)) !== null) {
13
+ if (match[1]) {
14
+ options.push(match[1]);
15
+ }
16
+ }
17
+ return options;
18
+ }
19
+ function fixStrictModeViolationPrompt({ screenshotsData, diagnosis, }) {
20
+ const compiledPrompt = (0, llm_1.compilePrompt)(promptTemplate_0, {
21
+ failingLine: diagnosis.failingLine,
22
+ successScreenshots: screenshotsData.success,
23
+ failedScreenshots: screenshotsData.failure,
24
+ selectorOptions: extractLocatorOptions(diagnosis.failed_run_metadata.stack).join("\n"),
25
+ }, {
26
+ imageDetail: "high",
27
+ modelProvider: "openai",
28
+ });
29
+ return compiledPrompt;
30
+ }
31
+ exports.fixStrictModeViolationPrompt = fixStrictModeViolationPrompt;
@@ -1 +1 @@
1
- {"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA0DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA8C9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAqDD"}
1
+ {"version":3,"file":"element-annotation.d.ts","sourceRoot":"","sources":["../../../src/agent/master/element-annotation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AA0DjD,wBAAsB,oBAAoB,CAAC,EACzC,kBAAkB,EAClB,WAAW,EACX,mBAAmB,EACnB,KAAK,EACL,GAAG,EACH,OAAO,EACP,UAAU,GACX,EAAE;IACD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,UAAU,EAAE,oBAAoB,CAAC;CAClC,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CA8C9B;AAED,MAAM,MAAM,oBAAoB,GAAG;IACjC,UAAU,EACN,KAAK,GACL,UAAU,CAAC,IAAI,GACf,UAAU,CAAC,WAAW,GACtB,UAAU,CAAC,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACpC,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,EACtC,IAAI,EACJ,UAAU,EACV,OAAO,GACR,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,EAAE,oBAAoB,CAAC;IACjC,OAAO,EAAE,oBAAoB,CAAC;CAC/B,GAAG,OAAO,CAAC;IACV,cAAc,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IACtD,gBAAgB,EAAE,MAAM,CAAC;IACzB,uBAAuB,EAAE,MAAM,CAAC;CACjC,CAAC,CAoDD"}
@@ -120,7 +120,6 @@ async function getAnnotationKeys({ page, preference, options, }) {
120
120
  }));
121
121
  return annotations;
122
122
  }, { preference, options });
123
- await page.waitForTimeout(2000);
124
123
  const annotationBuffer = await page.screenshot({
125
124
  // path: `screenshots/screenshot-${screenshotIndex++}.png`,
126
125
  });
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA6BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA0WA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AA6BrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAsWA"}
@@ -37,8 +37,6 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
37
37
  const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
38
38
  const session = (0, session_1.getSessionDetails)();
39
39
  const testGenPage = new page_1.TestGenPage(page, (0, utils_1.getPageVarName)());
40
- // add timeout for the page to settle in
41
- await page.waitForTimeout(3000);
42
40
  const trace = llm_1.langfuseInstance?.trace({
43
41
  name: "test-generator",
44
42
  id: crypto.randomUUID(),
@@ -182,7 +180,6 @@ async function createTestUsingMasterAgent({ task, page, testCase, specPath, opti
182
180
  let shouldTriggerHintsFlow;
183
181
  let hintsExecutionCompletion;
184
182
  let elementAnnotation;
185
- await page.waitForTimeout(2000);
186
183
  const actionType = toolCall.actionType;
187
184
  let preference = {
188
185
  actionType: "all",
package/dist/bin/index.js CHANGED
@@ -35,6 +35,7 @@ const run_1 = require("../agent/browsing/run");
35
35
  const utils_1 = require("../agent/browsing/utils");
36
36
  const repo_edit_1 = require("../agent/codegen/repo-edit");
37
37
  const run_2 = require("../agent/codegen/run");
38
+ const diagnosis_agent_1 = require("../agent/diagnosis-agent");
38
39
  const enrich_prompt_1 = require("../agent/enrich-prompt");
39
40
  const infer_agent_1 = require("../agent/infer-agent");
40
41
  const run_3 = require("../agent/planner/run");
@@ -112,6 +113,20 @@ async function runAgent(testGenConfig, span) {
112
113
  });
113
114
  return;
114
115
  }
116
+ // TODO: this needs to be moved to an orchestrator which decides what needs to be done first before executing the sub tasks
117
+ if (testGenConfig.testErrorDiagnosis &&
118
+ testGenConfig.testErrorDiagnosis.failingLine &&
119
+ // TODO: fix this hardcoding of user prompt - ideally its an auto fix intent
120
+ testCase.steps[0] == "Can you please fix the test") {
121
+ const { task: updatedTask } = await (0, diagnosis_agent_1.createTaskUsingFailureDiagnosis)({
122
+ options: testGenConfig.options,
123
+ trace,
124
+ diagnosis: testGenConfig.testErrorDiagnosis,
125
+ });
126
+ if (updatedTask) {
127
+ testCase.steps = [updatedTask];
128
+ }
129
+ }
115
130
  if (!agent || agent === "auto") {
116
131
  agent = await resolveAgentUsingTask({
117
132
  testCase,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.42.28",
3
+ "version": "0.43.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -72,7 +72,7 @@
72
72
  "ts-morph": "^23.0.0",
73
73
  "tsx": "^4.16.2",
74
74
  "typescript": "^5.3.3",
75
- "@empiricalrun/llm": "^0.9.34",
75
+ "@empiricalrun/llm": "^0.9.35",
76
76
  "@empiricalrun/r2-uploader": "^0.3.8",
77
77
  "@empiricalrun/reporter": "^0.23.1"
78
78
  },
@@ -89,7 +89,7 @@
89
89
  "js-levenshtein": "^1.1.6",
90
90
  "playwright": "1.47.1",
91
91
  "ts-patch": "^3.3.0",
92
- "@empiricalrun/shared-types": "0.0.3"
92
+ "@empiricalrun/shared-types": "0.0.4"
93
93
  },
94
94
  "scripts": {
95
95
  "dev": "tspc --build --watch",