@empiricalrun/test-gen 0.39.0 → 0.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.40.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 451c840: feat: pro 1165 adding interactible live stream of test generation
8
+
9
+ ### Patch Changes
10
+
11
+ - 7989789: fix: replaced implementation for create test with repo edit
12
+
3
13
  ## 0.39.0
4
14
 
5
15
  ### Minor Changes
@@ -1,6 +1,30 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
+ import { ChatCompletionMessageParam } from "openai/resources/index.mjs";
3
+ export declare function generateCodeUsingRepoAgent({ task, trace, repoFiles, }: {
4
+ trace?: TraceClient;
5
+ task: string;
6
+ repoFiles?: string;
7
+ }): Promise<{
8
+ prompt: ChatCompletionMessageParam[];
9
+ agentResponse: string;
10
+ fileChanges: {
11
+ filePath: string | undefined;
12
+ oldCode: string | undefined;
13
+ newCode: string | undefined;
14
+ reason: string | undefined;
15
+ }[];
16
+ }>;
2
17
  export declare function repoEditAgent({ trace, task, }: {
3
18
  trace?: TraceClient;
4
19
  task: string;
5
- }): Promise<void>;
20
+ }): Promise<{
21
+ prompt: ChatCompletionMessageParam[];
22
+ agentResponse: string;
23
+ fileChanges: {
24
+ filePath: string | undefined;
25
+ oldCode: string | undefined;
26
+ newCode: string | undefined;
27
+ reason: string | undefined;
28
+ }[];
29
+ }>;
6
30
  //# sourceMappingURL=repo-edit.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"repo-edit.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/repo-edit.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAcrD,wBAAsB,aAAa,CAAC,EAClC,KAAK,EACL,IAAI,GACL,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;CACd,iBA+JA"}
1
+ {"version":3,"file":"repo-edit.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/repo-edit.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,0BAA0B,EAAE,MAAM,4BAA4B,CAAC;AAWxE,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,KAAK,EACL,SAAS,GACV,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC;IACV,MAAM,EAAE,0BAA0B,EAAE,CAAC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE;QACX,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;QAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;KAC5B,EAAE,CAAC;CACL,CAAC,CA6GD;AAED,wBAAsB,aAAa,CAAC,EAClC,KAAK,EACL,IAAI,GACL,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,MAAM,EAAE,0BAA0B,EAAE,CAAC;IACrC,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE;QACX,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;QAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;KAC5B,EAAE,CAAC;CACL,CAAC,CAgED"}
@@ -26,7 +26,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.repoEditAgent = void 0;
29
+ exports.repoEditAgent = exports.generateCodeUsingRepoAgent = void 0;
30
30
  const llm_1 = require("@empiricalrun/llm");
31
31
  const fsSync = __importStar(require("fs"));
32
32
  const promises_1 = __importDefault(require("fs/promises"));
@@ -37,15 +37,12 @@ const constants_1 = require("../../constants");
37
37
  const reporter_1 = require("../../reporter");
38
38
  const test_update_feedback_1 = require("./test-update-feedback");
39
39
  const utils_1 = require("./utils");
40
- async function repoEditAgent({ trace, task, }) {
41
- const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
42
- void testgenUpdatesReporter.sendMessage(`Updating test code as per the task. \n View [trace](${trace?.getTraceUrl()})`);
40
+ async function generateCodeUsingRepoAgent({ task, trace, repoFiles, }) {
43
41
  const repoEditSpan = trace?.span({
44
42
  name: "repo-edit",
45
43
  input: { task },
46
44
  });
47
45
  // TODO: add support for playwright config and other files
48
- const { prompt: repoPrompt } = await (0, context_1.generateTxtForRepository)();
49
46
  const prompt = [
50
47
  {
51
48
  role: "system",
@@ -95,7 +92,7 @@ Coding principles and guidelines:
95
92
  - Use the "function" keyword for pure functions to benefit from hoisting and clarity.
96
93
 
97
94
  Here is the list of files:
98
- ${repoPrompt}
95
+ ${repoFiles}
99
96
  `,
100
97
  },
101
98
  {
@@ -142,6 +139,23 @@ Task: ${task}
142
139
  task,
143
140
  },
144
141
  });
142
+ return {
143
+ prompt,
144
+ agentResponse: updatedUsageExampleMessage?.content,
145
+ fileChanges: updates,
146
+ };
147
+ }
148
+ exports.generateCodeUsingRepoAgent = generateCodeUsingRepoAgent;
149
+ async function repoEditAgent({ trace, task, }) {
150
+ const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
151
+ void testgenUpdatesReporter.sendMessage(`Updating test code as per the task. \n View [trace](${trace?.getTraceUrl()})`);
152
+ const { prompt: repoFiles } = await (0, context_1.generateTxtForRepository)();
153
+ const repoAgentOutput = await generateCodeUsingRepoAgent({
154
+ task,
155
+ trace,
156
+ repoFiles,
157
+ });
158
+ const updates = repoAgentOutput.fileChanges;
145
159
  const fileUpdateResponses = await (0, utils_1.applyFileChanges)({
146
160
  validateTypes: false,
147
161
  trace,
@@ -152,8 +166,8 @@ Task: ${task}
152
166
  if (errorResponses.length > 0) {
153
167
  const updatedFileChanges = await (0, test_update_feedback_1.applyTestUpdateFeedbacks)({
154
168
  trace,
155
- oldPrompt: prompt,
156
- agentResponse: updatedUsageExampleMessage?.content,
169
+ oldPrompt: repoAgentOutput.prompt,
170
+ agentResponse: repoAgentOutput.agentResponse,
157
171
  feedbacks: errorResponses.map((e) => ({
158
172
  filePath: e?.filePath,
159
173
  errorMessage: e?.errorMessage,
@@ -182,5 +196,6 @@ Task: ${task}
182
196
  })();
183
197
  }));
184
198
  await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace?.getTraceUrl()})`);
199
+ return repoAgentOutput;
185
200
  }
186
201
  exports.repoEditAgent = repoEditAgent;
@@ -1,12 +1,4 @@
1
1
  import { TraceClient } from "@empiricalrun/llm";
2
2
  import { TestCase, TestGenConfigOptions } from "../../types";
3
- export declare function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }: {
4
- testCase: TestCase;
5
- testFiles: string;
6
- pageFiles: string;
7
- testFilePath: string;
8
- trace?: TraceClient;
9
- options?: TestGenConfigOptions;
10
- }): Promise<string>;
11
- export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[]>;
3
+ export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[] | void>;
12
4
  //# sourceMappingURL=run.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,wBAAwB,CAAC,EAC7C,QAAQ,EACR,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,mBA4BA;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CAgFrB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAMhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,CAsC5B"}
@@ -3,44 +3,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.generateTest = exports.getAddScenarioCompletion = void 0;
6
+ exports.generateTest = void 0;
7
7
  const llm_1 = require("@empiricalrun/llm");
8
8
  const fs_extra_1 = __importDefault(require("fs-extra"));
9
9
  const logger_1 = require("../../bin/logger");
10
10
  const context_1 = require("../../bin/utils/context");
11
11
  const web_1 = require("../../bin/utils/platform/web");
12
- const constants_1 = require("../../constants");
13
- const fix_ts_errors_1 = require("./fix-ts-errors");
12
+ const repo_edit_1 = require("./repo-edit");
14
13
  const update_flow_1 = require("./update-flow");
15
- async function getAddScenarioCompletion({ testCase, testFiles, pageFiles, testFilePath, trace, options, }) {
16
- const promptSpan = trace?.span({
17
- name: "add-scenario-prompt",
18
- });
19
- const instruction = await (0, llm_1.getPrompt)("add-scenario", {
20
- testFiles: testFiles,
21
- pageFiles: pageFiles,
22
- scenarioName: testCase.name,
23
- scenarioSteps: testCase.steps.join("\n"),
24
- scenarioFile: testFilePath,
25
- });
26
- promptSpan?.end({ output: { instruction } });
27
- const llm = new llm_1.LLM({
28
- trace,
29
- provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
30
- defaultModel: options?.model || constants_1.DEFAULT_MODEL,
31
- providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
32
- });
33
- const firstShotMessage = await llm.createChatCompletion({
34
- messages: instruction,
35
- modelParameters: {
36
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
37
- ...options?.modelParameters,
38
- },
39
- });
40
- let response = firstShotMessage?.content || "";
41
- return response;
42
- }
43
- exports.getAddScenarioCompletion = getAddScenarioCompletion;
44
14
  async function generateTest(testCase, file, options, trace) {
45
15
  const logger = new logger_1.CustomLogger();
46
16
  if (!fs_extra_1.default.existsSync(file)) {
@@ -48,7 +18,7 @@ async function generateTest(testCase, file, options, trace) {
48
18
  fs_extra_1.default.createFileSync(file);
49
19
  }
50
20
  const context = await (0, context_1.contextForGeneration)(file);
51
- const { codePrompt, pomPrompt, nonSpecFilePrompt, testFileContent } = context;
21
+ const { codePrompt, pomPrompt, testFileContent } = context;
52
22
  const { testBlock } = (0, web_1.getTypescriptTestBlock)({
53
23
  scenarioName: testCase?.name,
54
24
  content: testFileContent,
@@ -58,8 +28,6 @@ async function generateTest(testCase, file, options, trace) {
58
28
  if (isUpdate) {
59
29
  return await (0, update_flow_1.updateTest)(testCase, file, options, true, true, trace);
60
30
  }
61
- const generatedTestCases = [];
62
- logger.logEmptyLine();
63
31
  const createTestSpan = trace?.span({
64
32
  name: "create-test",
65
33
  input: {
@@ -69,47 +37,15 @@ async function generateTest(testCase, file, options, trace) {
69
37
  testFilePath: file,
70
38
  },
71
39
  });
72
- const response = await getAddScenarioCompletion({
73
- testCase,
74
- testFiles: codePrompt,
75
- pageFiles: pomPrompt,
76
- testFilePath: file,
77
- trace: createTestSpan,
78
- options,
79
- });
80
- logger.success("Test generated successfully!");
81
- const readWriteFileSpan = trace?.span({ name: "write-to-file" });
82
- let contents = fs_extra_1.default.readFileSync(file, "utf-8");
83
- const [prependContent, strippedContent] = await (0, web_1.stripAndPrependImports)(response, testCase?.name);
84
- let updatedContent = prependContent +
85
- (0, web_1.injectCodeSnippetBySuiteChain)({
86
- testFileContent: contents,
87
- suites: testCase?.suites,
88
- codeSnippet: `\n\n${strippedContent}`,
89
- });
90
- await fs_extra_1.default.writeFile(file, updatedContent, "utf-8");
91
- readWriteFileSpan?.end({ output: { updatedContent } });
92
- logger.log("Linting generated code...");
93
- createTestSpan?.event({ name: "lint-file" });
94
- await (0, web_1.lintErrors)(file);
95
- await (0, fix_ts_errors_1.validateAndFixTypescriptErrors)({
96
- trace,
97
- logger: new logger_1.CustomLogger({ useReporter: false }),
98
- file,
99
- pomCode: pomPrompt,
100
- nonSpecFileCode: nonSpecFilePrompt,
101
- testCase: testCase,
102
- options,
103
- });
104
- createTestSpan?.event({ name: "format-file" });
105
- await (0, web_1.formatCode)(file);
106
- logger.success("File formatted successfully!");
107
- if (trace) {
108
- logger.log(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
109
- }
110
- generatedTestCases.push(testCase);
111
- createTestSpan?.end({ output: { response } });
40
+ const task = `
41
+ Create a new test at the given test file path and perform the relevant changes required:\n
42
+ Scenario name: ${testCase.name}
43
+ Test file path: ${file}
44
+ Test suite: ${testCase.suites?.join("->") || "-"}
45
+ Scenario: ${testCase.steps.join("\n")}
46
+ `;
47
+ const repoAgentResponse = await (0, repo_edit_1.repoEditAgent)({ trace, task });
48
+ createTestSpan?.end({ output: repoAgentResponse.fileChanges });
112
49
  await (0, llm_1.flushAllTraces)();
113
- return generatedTestCases;
114
50
  }
115
51
  exports.generateTest = generateTest;
@@ -1 +1 @@
1
- {"version":3,"file":"add-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/add-scenario-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,4BAA4B,EAAE,UAkBnC,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
1
+ {"version":3,"file":"add-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/add-scenario-agent.evals.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AA+BpC,QAAA,MAAM,4BAA4B,EAAE,UA6BnC,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
@@ -1,23 +1,42 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- const run_1 = require("../agent/codegen/run");
3
+ const repo_edit_1 = require("../agent/codegen/repo-edit");
4
+ function evaluateEqualityScore({ currentOutput, expectedOutput, }) {
5
+ for (const expectedFile of expectedOutput) {
6
+ if (!currentOutput.find((output) => {
7
+ return output.filePath === expectedFile.filePath;
8
+ })) {
9
+ return 0;
10
+ }
11
+ }
12
+ return 1;
13
+ }
4
14
  const addScenarioCodeAgentEvaluate = async ({ item, trace }) => {
5
- const { testCase, testFiles, pageFiles, testFilePath } = item.input;
6
- const response = await (0, run_1.getAddScenarioCompletion)({
7
- testCase,
8
- testFiles,
9
- pageFiles,
10
- testFilePath,
15
+ const { testCase, testFilePath, pageFiles, testFiles } = item.input;
16
+ const task = `
17
+ Create a new test at the given test file path and perform the relevant changes required:\n
18
+ Scenario name: ${testCase.name}
19
+ Test file path: ${testFilePath}
20
+ Test suite: ${testCase.suites?.join("->") || "-"}
21
+ Scenario: ${testCase.steps.join("\n")}
22
+ `;
23
+ const repoFiles = pageFiles + testFiles;
24
+ const repoAgentOutput = await (0, repo_edit_1.generateCodeUsingRepoAgent)({
25
+ task,
11
26
  trace,
27
+ repoFiles,
12
28
  });
13
29
  return {
14
30
  scores: [
15
31
  {
16
32
  name: "equality",
17
- value: item.expectedOutput === response ? 1 : 0,
33
+ value: evaluateEqualityScore({
34
+ currentOutput: repoAgentOutput.fileChanges,
35
+ expectedOutput: item.expectedOutput,
36
+ }),
18
37
  },
19
38
  ],
20
- output: response,
39
+ output: repoAgentOutput.fileChanges,
21
40
  };
22
41
  };
23
42
  exports.default = addScenarioCodeAgentEvaluate;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.39.0",
3
+ "version": "0.40.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -70,9 +70,9 @@
70
70
  "ts-morph": "^24.0.0",
71
71
  "tsx": "^4.16.2",
72
72
  "typescript": "^5.3.3",
73
+ "@empiricalrun/reporter": "^0.21.6",
73
74
  "@empiricalrun/llm": "^0.9.29",
74
- "@empiricalrun/r2-uploader": "^0.3.7",
75
- "@empiricalrun/reporter": "^0.21.6"
75
+ "@empiricalrun/r2-uploader": "^0.3.7"
76
76
  },
77
77
  "devDependencies": {
78
78
  "@playwright/test": "1.47.1",