@empiricalrun/test-gen 0.38.21 → 0.38.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.38.22
4
+
5
+ ### Patch Changes
6
+
7
+ - 03be230: evals: add scores for append-create-test-block
8
+ - d6c0198: fix: multiple append create test block
9
+ - d6c0198: fix: add retry for agent if there are syntax issues
10
+
3
11
  ## 0.38.21
4
12
 
5
13
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAoB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
1
+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAiB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBAuGA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA4C7B"}
@@ -237,7 +237,7 @@ async function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCa
237
237
  scenarioName: testCase.name,
238
238
  scenarioSteps: testCase.steps.join("\n"),
239
239
  scenarioFile: testFilePath,
240
- }, 25);
240
+ }, 28);
241
241
  promptSpan?.end({ output: { instruction } });
242
242
  const [userInstruction] = instruction.filter((s) => s.role === "user");
243
243
  const [systemInstruction] = instruction.filter((s) => s.role === "system");
@@ -247,17 +247,52 @@ async function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCa
247
247
  `;
248
248
  const llm = new llm_1.LLM({
249
249
  trace,
250
- provider: "openai",
251
- defaultModel: "o1-mini",
252
- providerApiKey: constants_1.MODEL_API_KEYS["openai"],
250
+ provider: "anthropic",
251
+ defaultModel: "claude-3-5-sonnet-latest",
252
+ providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
253
253
  });
254
- const firstShotMessage = await llm.createChatCompletion({
254
+ const chatCompletionParams = {
255
255
  messages: [userInstruction],
256
256
  modelParameters: {
257
257
  ...options?.modelParameters,
258
- ...constants_1.DEFAULT_O1_MODEL_PARAMETERS,
258
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
259
259
  },
260
- });
260
+ };
261
+ let firstShotMessage = await llm.createChatCompletion(chatCompletionParams);
262
+ let fileChanges = (0, utils_1.extractAppendTestUpdates)(firstShotMessage?.content);
263
+ // retry once if there is incorrect response
264
+ const feedbacks = [];
265
+ if (!fileChanges[0]?.newCode) {
266
+ feedbacks.push("No code found in the response");
267
+ }
268
+ if (fileChanges[0]?.newCode && !(0, web_1.isSyntaxValid)(fileChanges[0]?.newCode)) {
269
+ feedbacks.push("- Syntax is invalid. Please fix the JS syntax. Remove any imports if present.");
270
+ }
271
+ if (fileChanges[0]?.newCode &&
272
+ !fileChanges[0]?.newCode.includes("createTest(")) {
273
+ feedbacks.push("- Usage of createTest method is missing. Follow the guidelines provided to you.");
274
+ }
275
+ if (!fileChanges[0]?.newCode ||
276
+ !(0, web_1.isSyntaxValid)(fileChanges[0]?.newCode) ||
277
+ !fileChanges[0]?.newCode.includes("createTest(")) {
278
+ const messages = chatCompletionParams.messages;
279
+ messages.push({
280
+ role: "assistant",
281
+ content: firstShotMessage?.content,
282
+ });
283
+ messages.push({
284
+ role: "user",
285
+ content: `
286
+ There are a few feedbacks on the previous output:
287
+ ${feedbacks.join("\n")}
288
+
289
+ -----
290
+
291
+ Respond with the same xml format which was originally expected.
292
+ `,
293
+ });
294
+ firstShotMessage = await llm.createChatCompletion(chatCompletionParams);
295
+ }
261
296
  let response = firstShotMessage?.content || "";
262
297
  appendCreateTestSpan?.end({ output: response });
263
298
  return response;
@@ -276,14 +311,15 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
276
311
  ]);
277
312
  const { pomPrompt, nonSpecFilePrompt } = context;
278
313
  const generatedTestCases = [];
279
- const appendCreateTestResp = await getAppendCreateTestBlockCompletion({
314
+ const appendCreateTestParams = {
280
315
  testCase,
281
316
  testFilePath: file,
282
317
  options,
283
318
  trace,
284
319
  pageFiles: pomPrompt,
285
320
  testFiles: testCodePrompt,
286
- });
321
+ };
322
+ const appendCreateTestResp = await getAppendCreateTestBlockCompletion(appendCreateTestParams);
287
323
  const fileChanges = (0, utils_1.extractAppendTestUpdates)(appendCreateTestResp);
288
324
  await applyFileChanges({
289
325
  trace,
@@ -73,4 +73,5 @@ export declare function buildTestNamePrompt({ testName, suites, }: {
73
73
  suites: string[];
74
74
  }): string;
75
75
  export declare function getVariableDeclarationsFromCode(sourceCode: string): string[];
76
+ export declare function isSyntaxValid(code: string): boolean;
76
77
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/bin/utils/platform/web/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAGL,IAAI,EAEJ,UAAU,EAEX,MAAM,UAAU,CAAC;AAGlB,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAE7C,eAAO,MAAM,gCAAgC,eAC/B,UAAU,KACrB,MAgBF,CAAC;AAEF;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,EACrC,YAAY,EACZ,MAAM,EACN,OAAO,GACR,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,GAAG;IACF,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,QAAQ,EAAE,IAAI,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;CACnB,CA2CA;AAwBD,wBAAsB,0CAA0C,CAC9D,QAAQ,EAAE,MAAM,oBA+BjB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,IAAI,GAAG,SAAS,GACrB,IAAI,GAAG,SAAS,CA4BlB;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAG5E;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CA8C7D;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,mCAWjB;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAShD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAQhD;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,UAE5E;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,iBAMpD;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,UAcpD;AAED,wBAAsB,iCAAiC,CAAC,QAAQ,EAAE,MAAM,+BAoBvE;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,UA0CtB;AAED,eAAO,MAAM,6BAA6B;qBAKvB,MAAM;iBACV,MAAM;YACX,MAAM,EAAE;YA2DjB,CAAC;AAEF,eAAO,MAAM,iCAAiC,cACjC,MAAM,EAAE,gBACL,MAAM,sBAyBrB,CAAC;AAEF,wBAAsB,qBAAqB,CAAC,EAC1C,YAAY,EACZ,QAAQ,EACR,MAAM,GACP,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,iBA8CA;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,cAAc,EAAE,MAAM,EAAE,iBA2BzB;AAED,wBAAgB,aAAa,CAAC,EAC5B,QAAQ,EACR,QAAQ,GACT,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;CACpB,WAYA;AAED,wBAAgB,mBAAmB,CAAC,EAClC,QAAQ,EACR,MAAM,GACP,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,UAOA;AAED,wBAAgB,+BAA+B,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CA4B5E"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/bin/utils/platform/web/index.ts"],"names":[],"mappings":"AAOA,OAAO,EAGL,IAAI,EAEJ,UAAU,EAEX,MAAM,UAAU,CAAC;AAGlB,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAE7C,eAAO,MAAM,gCAAgC,eAC/B,UAAU,KACrB,MAgBF,CAAC;AAEF;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,EACrC,YAAY,EACZ,MAAM,EACN,OAAO,GACR,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,GAAG;IACF,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,QAAQ,EAAE,IAAI,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;CACnB,CA2CA;AAwBD,wBAAsB,0CAA0C,CAC9D,QAAQ,EAAE,MAAM,oBA+BjB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,IAAI,GAAG,SAAS,GACrB,IAAI,GAAG,SAAS,CA4BlB;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAG5E;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CA8C7D;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,mCAWjB;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAShD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAQhD;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,UAE5E;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,iBAMpD;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,UAcpD;AAED,wBAAsB,iCAAiC,CAAC,QAAQ,EAAE,MAAM,+BAoBvE;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,UA0CtB;AAED,eAAO,MAAM,6BAA6B;qBAKvB,MAAM;iBACV,MAAM;YACX,MAAM,EAAE;YA2DjB,CAAC;AAEF,eAAO,MAAM,iCAAiC,cACjC,MAAM,EAAE,gBACL,MAAM,sBAyBrB,CAAC;AAEF,wBAAsB,qBAAqB,CAAC,EAC1C,YAAY,EACZ,QAAQ,EACR,MAAM,GACP,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,iBA8CA;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,cAAc,EAAE,MAAM,EAAE,iBA2BzB;AAED,wBAAgB,aAAa,CAAC,EAC5B,QAAQ,EACR,QAAQ,GACT,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;CACpB,WAYA;AAED,wBAAgB,mBAAmB,CAAC,EAClC,QAAQ,EACR,MAAM,GACP,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,UAOA;AAED,wBAAgB,+BAA+B,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CA4B5E;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAQnD"}
@@ -3,7 +3,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.getVariableDeclarationsFromCode = exports.buildTestNamePrompt = exports.isTestPresent = exports.appendScopeToCreateTest = exports.addUserContextFixture = exports.importAllExportsStmtFromFilePaths = exports.injectCodeSnippetBySuiteChain = exports.replaceCreateTestWithNewCode = exports.getPageVariableNameFromCreateTest = exports.getFixtureImportPath = exports.removeTestOnly = exports.addNewImport = exports.formatCode = exports.lintErrors = exports.stripAndPrependImports = exports.validateTypescript = exports.appendToTestBlock = exports.findFirstSerialDescribeBlock = exports.hasTopLevelDescribeConfigureWithSerialMode = exports.getTypescriptTestBlock = exports.getTestModuleAliasFromSourceFile = void 0;
6
+ exports.isSyntaxValid = exports.getVariableDeclarationsFromCode = exports.buildTestNamePrompt = exports.isTestPresent = exports.appendScopeToCreateTest = exports.addUserContextFixture = exports.importAllExportsStmtFromFilePaths = exports.injectCodeSnippetBySuiteChain = exports.replaceCreateTestWithNewCode = exports.getPageVariableNameFromCreateTest = exports.getFixtureImportPath = exports.removeTestOnly = exports.addNewImport = exports.formatCode = exports.lintErrors = exports.stripAndPrependImports = exports.validateTypescript = exports.appendToTestBlock = exports.findFirstSerialDescribeBlock = exports.hasTopLevelDescribeConfigureWithSerialMode = exports.getTypescriptTestBlock = exports.getTestModuleAliasFromSourceFile = void 0;
7
+ const parser_1 = require("@babel/parser");
7
8
  const eslint_1 = require("eslint");
8
9
  const fs_1 = require("fs");
9
10
  const fs_extra_1 = __importDefault(require("fs-extra"));
@@ -496,3 +497,14 @@ function getVariableDeclarationsFromCode(sourceCode) {
496
497
  return allVariables;
497
498
  }
498
499
  exports.getVariableDeclarationsFromCode = getVariableDeclarationsFromCode;
500
+ function isSyntaxValid(code) {
501
+ let isSyntaxValid = true;
502
+ try {
503
+ (0, parser_1.parse)(code, { sourceType: "module", plugins: ["typescript"] }); // Attempt to parse the code
504
+ }
505
+ catch (error) {
506
+ isSyntaxValid = false;
507
+ }
508
+ return isSyntaxValid;
509
+ }
510
+ exports.isSyntaxValid = isSyntaxValid;
@@ -1 +1 @@
1
- {"version":3,"file":"append-create-test-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/append-create-test-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,6BAA6B,EAAE,UAapC,CAAC;AAEF,eAAe,6BAA6B,CAAC"}
1
+ {"version":3,"file":"append-create-test-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/append-create-test-agent.evals.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AA8GpC,QAAA,MAAM,6BAA6B,EAAE,UAsCpC,CAAC;AAEF,eAAe,6BAA6B,CAAC"}
@@ -1,6 +1,82 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
+ const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
7
+ const ts_morph_1 = require("ts-morph");
3
8
  const update_flow_1 = require("../agent/codegen/update-flow");
9
+ const utils_1 = require("../agent/codegen/utils");
10
+ const web_1 = require("../bin/utils/platform/web");
11
+ const hasSingleCreateTestBlock = (result) => {
12
+ // Returns 1 if there is exactly 1 `createTest` block
13
+ // 0 if there is no `createTest` or if there are >1
14
+ if (result.length && result[0].newCode) {
15
+ const createTestBlocks = result[0].newCode.match(/createTest/g);
16
+ if (createTestBlocks) {
17
+ return createTestBlocks.length === 1 ? 1 : 0;
18
+ }
19
+ }
20
+ return 0;
21
+ };
22
+ const newCodeBlockIsComplete = (testCase, result, expectedOutput) => {
23
+ // Returns 1 if the new code block is complete
24
+ // i.e., it includes test('...', async () => {
25
+ if (!expectedOutput.length || !expectedOutput[0]) {
26
+ return 0;
27
+ }
28
+ if (!result[0]?.newCode) {
29
+ return 0;
30
+ }
31
+ if (result.length &&
32
+ result[0].newCode &&
33
+ expectedOutput[0]?.newCode?.includes("test(")) {
34
+ const newCode = result[0].newCode;
35
+ return newCode.includes("test(") && newCode.includes(testCase.name) ? 1 : 0;
36
+ }
37
+ else {
38
+ const project = new ts_morph_1.Project();
39
+ const expectedSourceFile = project.createSourceFile("expected-updated-code.ts", expectedOutput[0].newCode);
40
+ const resultSourceFile = project.createSourceFile("updated-code.ts", result[0].newCode);
41
+ const expectedFunctions = expectedSourceFile.getFunctions();
42
+ const resultFunctions = resultSourceFile.getFunctions();
43
+ if (!expectedFunctions.length) {
44
+ // check for class method updates
45
+ const expectedCheckForMethod = project.createSourceFile("expected-check-method.ts", `class A {
46
+ ${expectedOutput[0].newCode}
47
+ }`);
48
+ const expectedMethods = expectedCheckForMethod.getDescendantsOfKind(ts_morph_1.SyntaxKind.MethodDeclaration);
49
+ const resultCheckForMethod = project.createSourceFile("check-method.ts", `class A {
50
+ ${result[0].newCode}
51
+ }`);
52
+ const resultMethods = resultCheckForMethod.getDescendantsOfKind(ts_morph_1.SyntaxKind.MethodDeclaration);
53
+ if (resultMethods.length === 1 && expectedMethods.length === 1) {
54
+ return expectedMethods[0]?.getName() === resultMethods[0]?.getName()
55
+ ? 1
56
+ : 0;
57
+ }
58
+ else {
59
+ return 0;
60
+ }
61
+ }
62
+ else {
63
+ // check for function updates
64
+ return expectedFunctions[0]?.getName() === resultFunctions[0]?.getName()
65
+ ? 1
66
+ : 0;
67
+ }
68
+ }
69
+ };
70
+ const oldCodeTestBlockIsComplete = (result, expected) => {
71
+ if (result.length && result[0].oldCode) {
72
+ const oldCodeFromLlm = result[0].oldCode;
73
+ const expectedOldCode = expected[0]?.oldCode || "";
74
+ return (1 -
75
+ (0, js_levenshtein_1.default)(expectedOldCode || "", oldCodeFromLlm || "") /
76
+ expectedOldCode.length);
77
+ }
78
+ return 0;
79
+ };
4
80
  const appendCreateTestAgentEvaluate = async ({ item, trace }) => {
5
81
  const { testCase, testFiles, pageFiles, testFilePath } = item.input;
6
82
  const response = await (0, update_flow_1.getAppendCreateTestBlockCompletion)({
@@ -10,8 +86,31 @@ const appendCreateTestAgentEvaluate = async ({ item, trace }) => {
10
86
  testFilePath,
11
87
  trace,
12
88
  });
89
+ const parsed = (0, utils_1.extractAppendTestUpdates)(response);
90
+ const expectedOutputParsed = (0, utils_1.extractAppendTestUpdates)(item.expectedOutput);
13
91
  return {
14
- scores: [],
92
+ scores: [
93
+ {
94
+ name: "returns_single_create_test",
95
+ value: hasSingleCreateTestBlock(parsed),
96
+ },
97
+ {
98
+ name: "returns_complete_code_block",
99
+ value: newCodeBlockIsComplete(testCase, parsed, expectedOutputParsed),
100
+ },
101
+ {
102
+ name: "copies_old_block_correctly",
103
+ value: oldCodeTestBlockIsComplete(parsed, expectedOutputParsed),
104
+ },
105
+ {
106
+ name: "syntax_check",
107
+ value: parsed[0]?.newCode
108
+ ? (0, web_1.isSyntaxValid)(parsed[0]?.newCode || "")
109
+ ? 1
110
+ : 0
111
+ : 0,
112
+ },
113
+ ],
15
114
  output: response,
16
115
  };
17
116
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.38.21",
3
+ "version": "0.38.22",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -32,6 +32,7 @@
32
32
  "@actions/core": "^1.10.1",
33
33
  "@aws-sdk/client-s3": "^3.614.0",
34
34
  "@aws-sdk/s3-request-presigner": "^3.614.0",
35
+ "@babel/parser": "^7.26.3",
35
36
  "@playwright/test": "1.47.1",
36
37
  "@types/sanitize-html": "^2.11.0",
37
38
  "commander": "^12.1.0",
@@ -59,8 +60,8 @@
59
60
  "tsx": "^4.16.2",
60
61
  "typescript": "^5.3.3",
61
62
  "@empiricalrun/llm": "^0.9.26",
62
- "@empiricalrun/r2-uploader": "^0.3.7",
63
- "@empiricalrun/reporter": "^0.21.4"
63
+ "@empiricalrun/reporter": "^0.21.4",
64
+ "@empiricalrun/r2-uploader": "^0.3.7"
64
65
  },
65
66
  "devDependencies": {
66
67
  "@types/detect-port": "^1.3.5",