@empiricalrun/test-gen 0.38.21 → 0.38.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/agent/codegen/update-flow.d.ts.map +1 -1
- package/dist/agent/codegen/update-flow.js +45 -9
- package/dist/bin/utils/platform/web/index.d.ts +1 -0
- package/dist/bin/utils/platform/web/index.d.ts.map +1 -1
- package/dist/bin/utils/platform/web/index.js +13 -1
- package/dist/evals/append-create-test-agent.evals.d.ts.map +1 -1
- package/dist/evals/append-create-test-agent.evals.js +100 -1
- package/package.json +4 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.38.22
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 03be230: evals: add scores for append-create-test-block
|
|
8
|
+
- d6c0198: fix: multiple append create test block
|
|
9
|
+
- d6c0198: fix: add retry for agent if there are syntax issues
|
|
10
|
+
|
|
3
11
|
## 0.38.21
|
|
4
12
|
|
|
5
13
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAiB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBAuGA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA4C7B"}
|
|
@@ -237,7 +237,7 @@ async function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCa
|
|
|
237
237
|
scenarioName: testCase.name,
|
|
238
238
|
scenarioSteps: testCase.steps.join("\n"),
|
|
239
239
|
scenarioFile: testFilePath,
|
|
240
|
-
},
|
|
240
|
+
}, 28);
|
|
241
241
|
promptSpan?.end({ output: { instruction } });
|
|
242
242
|
const [userInstruction] = instruction.filter((s) => s.role === "user");
|
|
243
243
|
const [systemInstruction] = instruction.filter((s) => s.role === "system");
|
|
@@ -247,17 +247,52 @@ async function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCa
|
|
|
247
247
|
`;
|
|
248
248
|
const llm = new llm_1.LLM({
|
|
249
249
|
trace,
|
|
250
|
-
provider: "
|
|
251
|
-
defaultModel: "
|
|
252
|
-
providerApiKey: constants_1.MODEL_API_KEYS["
|
|
250
|
+
provider: "anthropic",
|
|
251
|
+
defaultModel: "claude-3-5-sonnet-latest",
|
|
252
|
+
providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
|
|
253
253
|
});
|
|
254
|
-
const
|
|
254
|
+
const chatCompletionParams = {
|
|
255
255
|
messages: [userInstruction],
|
|
256
256
|
modelParameters: {
|
|
257
257
|
...options?.modelParameters,
|
|
258
|
-
...constants_1.
|
|
258
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
259
259
|
},
|
|
260
|
-
}
|
|
260
|
+
};
|
|
261
|
+
let firstShotMessage = await llm.createChatCompletion(chatCompletionParams);
|
|
262
|
+
let fileChanges = (0, utils_1.extractAppendTestUpdates)(firstShotMessage?.content);
|
|
263
|
+
// retry once if there is incorrect response
|
|
264
|
+
const feedbacks = [];
|
|
265
|
+
if (!fileChanges[0]?.newCode) {
|
|
266
|
+
feedbacks.push("No code found in the response");
|
|
267
|
+
}
|
|
268
|
+
if (fileChanges[0]?.newCode && !(0, web_1.isSyntaxValid)(fileChanges[0]?.newCode)) {
|
|
269
|
+
feedbacks.push("- Syntax is invalid. Please fix the JS syntax. Remove any imports if present.");
|
|
270
|
+
}
|
|
271
|
+
if (fileChanges[0]?.newCode &&
|
|
272
|
+
!fileChanges[0]?.newCode.includes("createTest(")) {
|
|
273
|
+
feedbacks.push("- Usage of createTest method is missing. Follow the guidelines provided to you.");
|
|
274
|
+
}
|
|
275
|
+
if (!fileChanges[0]?.newCode ||
|
|
276
|
+
!(0, web_1.isSyntaxValid)(fileChanges[0]?.newCode) ||
|
|
277
|
+
!fileChanges[0]?.newCode.includes("createTest(")) {
|
|
278
|
+
const messages = chatCompletionParams.messages;
|
|
279
|
+
messages.push({
|
|
280
|
+
role: "assistant",
|
|
281
|
+
content: firstShotMessage?.content,
|
|
282
|
+
});
|
|
283
|
+
messages.push({
|
|
284
|
+
role: "user",
|
|
285
|
+
content: `
|
|
286
|
+
There are a few feedbacks on the previous output:
|
|
287
|
+
${feedbacks.join("\n")}
|
|
288
|
+
|
|
289
|
+
-----
|
|
290
|
+
|
|
291
|
+
Respond with the same xml format which was originally expected.
|
|
292
|
+
`,
|
|
293
|
+
});
|
|
294
|
+
firstShotMessage = await llm.createChatCompletion(chatCompletionParams);
|
|
295
|
+
}
|
|
261
296
|
let response = firstShotMessage?.content || "";
|
|
262
297
|
appendCreateTestSpan?.end({ output: response });
|
|
263
298
|
return response;
|
|
@@ -276,14 +311,15 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
|
|
|
276
311
|
]);
|
|
277
312
|
const { pomPrompt, nonSpecFilePrompt } = context;
|
|
278
313
|
const generatedTestCases = [];
|
|
279
|
-
const
|
|
314
|
+
const appendCreateTestParams = {
|
|
280
315
|
testCase,
|
|
281
316
|
testFilePath: file,
|
|
282
317
|
options,
|
|
283
318
|
trace,
|
|
284
319
|
pageFiles: pomPrompt,
|
|
285
320
|
testFiles: testCodePrompt,
|
|
286
|
-
}
|
|
321
|
+
};
|
|
322
|
+
const appendCreateTestResp = await getAppendCreateTestBlockCompletion(appendCreateTestParams);
|
|
287
323
|
const fileChanges = (0, utils_1.extractAppendTestUpdates)(appendCreateTestResp);
|
|
288
324
|
await applyFileChanges({
|
|
289
325
|
trace,
|
|
@@ -73,4 +73,5 @@ export declare function buildTestNamePrompt({ testName, suites, }: {
|
|
|
73
73
|
suites: string[];
|
|
74
74
|
}): string;
|
|
75
75
|
export declare function getVariableDeclarationsFromCode(sourceCode: string): string[];
|
|
76
|
+
export declare function isSyntaxValid(code: string): boolean;
|
|
76
77
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/bin/utils/platform/web/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/bin/utils/platform/web/index.ts"],"names":[],"mappings":"AAOA,OAAO,EAGL,IAAI,EAEJ,UAAU,EAEX,MAAM,UAAU,CAAC;AAGlB,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAE7C,eAAO,MAAM,gCAAgC,eAC/B,UAAU,KACrB,MAgBF,CAAC;AAEF;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,EACrC,YAAY,EACZ,MAAM,EACN,OAAO,GACR,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,GAAG;IACF,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,QAAQ,EAAE,IAAI,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;CACnB,CA2CA;AAwBD,wBAAsB,0CAA0C,CAC9D,QAAQ,EAAE,MAAM,oBA+BjB;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,IAAI,GAAG,SAAS,GACrB,IAAI,GAAG,SAAS,CA4BlB;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAG5E;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CA8C7D;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,mCAWjB;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAShD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,iBAQhD;AAED,wBAAgB,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,UAE5E;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,iBAMpD;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,UAcpD;AAED,wBAAsB,iCAAiC,CAAC,QAAQ,EAAE,MAAM,+BAoBvE;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,UA0CtB;AAED,eAAO,MAAM,6BAA6B;qBAKvB,MAAM;iBACV,MAAM;YACX,MAAM,EAAE;YA2DjB,CAAC;AAEF,eAAO,MAAM,iCAAiC,cACjC,MAAM,EAAE,gBACL,MAAM,sBAyBrB,CAAC;AAEF,wBAAsB,qBAAqB,CAAC,EAC1C,YAAY,EACZ,QAAQ,EACR,MAAM,GACP,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,iBA8CA;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,cAAc,EAAE,MAAM,EAAE,iBA2BzB;AAED,wBAAgB,aAAa,CAAC,EAC5B,QAAQ,EACR,QAAQ,GACT,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;CACpB,WAYA;AAED,wBAAgB,mBAAmB,CAAC,EAClC,QAAQ,EACR,MAAM,GACP,EAAE;IACD,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB,UAOA;AAED,wBAAgB,+BAA+B,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CA4B5E;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAQnD"}
|
|
@@ -3,7 +3,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.getVariableDeclarationsFromCode = exports.buildTestNamePrompt = exports.isTestPresent = exports.appendScopeToCreateTest = exports.addUserContextFixture = exports.importAllExportsStmtFromFilePaths = exports.injectCodeSnippetBySuiteChain = exports.replaceCreateTestWithNewCode = exports.getPageVariableNameFromCreateTest = exports.getFixtureImportPath = exports.removeTestOnly = exports.addNewImport = exports.formatCode = exports.lintErrors = exports.stripAndPrependImports = exports.validateTypescript = exports.appendToTestBlock = exports.findFirstSerialDescribeBlock = exports.hasTopLevelDescribeConfigureWithSerialMode = exports.getTypescriptTestBlock = exports.getTestModuleAliasFromSourceFile = void 0;
|
|
6
|
+
exports.isSyntaxValid = exports.getVariableDeclarationsFromCode = exports.buildTestNamePrompt = exports.isTestPresent = exports.appendScopeToCreateTest = exports.addUserContextFixture = exports.importAllExportsStmtFromFilePaths = exports.injectCodeSnippetBySuiteChain = exports.replaceCreateTestWithNewCode = exports.getPageVariableNameFromCreateTest = exports.getFixtureImportPath = exports.removeTestOnly = exports.addNewImport = exports.formatCode = exports.lintErrors = exports.stripAndPrependImports = exports.validateTypescript = exports.appendToTestBlock = exports.findFirstSerialDescribeBlock = exports.hasTopLevelDescribeConfigureWithSerialMode = exports.getTypescriptTestBlock = exports.getTestModuleAliasFromSourceFile = void 0;
|
|
7
|
+
const parser_1 = require("@babel/parser");
|
|
7
8
|
const eslint_1 = require("eslint");
|
|
8
9
|
const fs_1 = require("fs");
|
|
9
10
|
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
@@ -496,3 +497,14 @@ function getVariableDeclarationsFromCode(sourceCode) {
|
|
|
496
497
|
return allVariables;
|
|
497
498
|
}
|
|
498
499
|
exports.getVariableDeclarationsFromCode = getVariableDeclarationsFromCode;
|
|
500
|
+
function isSyntaxValid(code) {
|
|
501
|
+
let isSyntaxValid = true;
|
|
502
|
+
try {
|
|
503
|
+
(0, parser_1.parse)(code, { sourceType: "module", plugins: ["typescript"] }); // Attempt to parse the code
|
|
504
|
+
}
|
|
505
|
+
catch (error) {
|
|
506
|
+
isSyntaxValid = false;
|
|
507
|
+
}
|
|
508
|
+
return isSyntaxValid;
|
|
509
|
+
}
|
|
510
|
+
exports.isSyntaxValid = isSyntaxValid;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"append-create-test-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/append-create-test-agent.evals.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"append-create-test-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/append-create-test-agent.evals.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AA8GpC,QAAA,MAAM,6BAA6B,EAAE,UAsCpC,CAAC;AAEF,eAAe,6BAA6B,CAAC"}
|
|
@@ -1,6 +1,82 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
|
|
7
|
+
const ts_morph_1 = require("ts-morph");
|
|
3
8
|
const update_flow_1 = require("../agent/codegen/update-flow");
|
|
9
|
+
const utils_1 = require("../agent/codegen/utils");
|
|
10
|
+
const web_1 = require("../bin/utils/platform/web");
|
|
11
|
+
const hasSingleCreateTestBlock = (result) => {
|
|
12
|
+
// Returns 1 if there is exactly 1 `createTest` block
|
|
13
|
+
// 0 if there is no `createTest` or if there are >1
|
|
14
|
+
if (result.length && result[0].newCode) {
|
|
15
|
+
const createTestBlocks = result[0].newCode.match(/createTest/g);
|
|
16
|
+
if (createTestBlocks) {
|
|
17
|
+
return createTestBlocks.length === 1 ? 1 : 0;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return 0;
|
|
21
|
+
};
|
|
22
|
+
const newCodeBlockIsComplete = (testCase, result, expectedOutput) => {
|
|
23
|
+
// Returns 1 if the new code block is complete
|
|
24
|
+
// i.e., it includes test('...', async () => {
|
|
25
|
+
if (!expectedOutput.length || !expectedOutput[0]) {
|
|
26
|
+
return 0;
|
|
27
|
+
}
|
|
28
|
+
if (!result[0]?.newCode) {
|
|
29
|
+
return 0;
|
|
30
|
+
}
|
|
31
|
+
if (result.length &&
|
|
32
|
+
result[0].newCode &&
|
|
33
|
+
expectedOutput[0]?.newCode?.includes("test(")) {
|
|
34
|
+
const newCode = result[0].newCode;
|
|
35
|
+
return newCode.includes("test(") && newCode.includes(testCase.name) ? 1 : 0;
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
const project = new ts_morph_1.Project();
|
|
39
|
+
const expectedSourceFile = project.createSourceFile("expected-updated-code.ts", expectedOutput[0].newCode);
|
|
40
|
+
const resultSourceFile = project.createSourceFile("updated-code.ts", result[0].newCode);
|
|
41
|
+
const expectedFunctions = expectedSourceFile.getFunctions();
|
|
42
|
+
const resultFunctions = resultSourceFile.getFunctions();
|
|
43
|
+
if (!expectedFunctions.length) {
|
|
44
|
+
// check for class method updates
|
|
45
|
+
const expectedCheckForMethod = project.createSourceFile("expected-check-method.ts", `class A {
|
|
46
|
+
${expectedOutput[0].newCode}
|
|
47
|
+
}`);
|
|
48
|
+
const expectedMethods = expectedCheckForMethod.getDescendantsOfKind(ts_morph_1.SyntaxKind.MethodDeclaration);
|
|
49
|
+
const resultCheckForMethod = project.createSourceFile("check-method.ts", `class A {
|
|
50
|
+
${result[0].newCode}
|
|
51
|
+
}`);
|
|
52
|
+
const resultMethods = resultCheckForMethod.getDescendantsOfKind(ts_morph_1.SyntaxKind.MethodDeclaration);
|
|
53
|
+
if (resultMethods.length === 1 && expectedMethods.length === 1) {
|
|
54
|
+
return expectedMethods[0]?.getName() === resultMethods[0]?.getName()
|
|
55
|
+
? 1
|
|
56
|
+
: 0;
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
return 0;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
// check for function updates
|
|
64
|
+
return expectedFunctions[0]?.getName() === resultFunctions[0]?.getName()
|
|
65
|
+
? 1
|
|
66
|
+
: 0;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
};
|
|
70
|
+
const oldCodeTestBlockIsComplete = (result, expected) => {
|
|
71
|
+
if (result.length && result[0].oldCode) {
|
|
72
|
+
const oldCodeFromLlm = result[0].oldCode;
|
|
73
|
+
const expectedOldCode = expected[0]?.oldCode || "";
|
|
74
|
+
return (1 -
|
|
75
|
+
(0, js_levenshtein_1.default)(expectedOldCode || "", oldCodeFromLlm || "") /
|
|
76
|
+
expectedOldCode.length);
|
|
77
|
+
}
|
|
78
|
+
return 0;
|
|
79
|
+
};
|
|
4
80
|
const appendCreateTestAgentEvaluate = async ({ item, trace }) => {
|
|
5
81
|
const { testCase, testFiles, pageFiles, testFilePath } = item.input;
|
|
6
82
|
const response = await (0, update_flow_1.getAppendCreateTestBlockCompletion)({
|
|
@@ -10,8 +86,31 @@ const appendCreateTestAgentEvaluate = async ({ item, trace }) => {
|
|
|
10
86
|
testFilePath,
|
|
11
87
|
trace,
|
|
12
88
|
});
|
|
89
|
+
const parsed = (0, utils_1.extractAppendTestUpdates)(response);
|
|
90
|
+
const expectedOutputParsed = (0, utils_1.extractAppendTestUpdates)(item.expectedOutput);
|
|
13
91
|
return {
|
|
14
|
-
scores: [
|
|
92
|
+
scores: [
|
|
93
|
+
{
|
|
94
|
+
name: "returns_single_create_test",
|
|
95
|
+
value: hasSingleCreateTestBlock(parsed),
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
name: "returns_complete_code_block",
|
|
99
|
+
value: newCodeBlockIsComplete(testCase, parsed, expectedOutputParsed),
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
name: "copies_old_block_correctly",
|
|
103
|
+
value: oldCodeTestBlockIsComplete(parsed, expectedOutputParsed),
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: "syntax_check",
|
|
107
|
+
value: parsed[0]?.newCode
|
|
108
|
+
? (0, web_1.isSyntaxValid)(parsed[0]?.newCode || "")
|
|
109
|
+
? 1
|
|
110
|
+
: 0
|
|
111
|
+
: 0,
|
|
112
|
+
},
|
|
113
|
+
],
|
|
15
114
|
output: response,
|
|
16
115
|
};
|
|
17
116
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.38.
|
|
3
|
+
"version": "0.38.22",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
"@actions/core": "^1.10.1",
|
|
33
33
|
"@aws-sdk/client-s3": "^3.614.0",
|
|
34
34
|
"@aws-sdk/s3-request-presigner": "^3.614.0",
|
|
35
|
+
"@babel/parser": "^7.26.3",
|
|
35
36
|
"@playwright/test": "1.47.1",
|
|
36
37
|
"@types/sanitize-html": "^2.11.0",
|
|
37
38
|
"commander": "^12.1.0",
|
|
@@ -59,8 +60,8 @@
|
|
|
59
60
|
"tsx": "^4.16.2",
|
|
60
61
|
"typescript": "^5.3.3",
|
|
61
62
|
"@empiricalrun/llm": "^0.9.26",
|
|
62
|
-
"@empiricalrun/
|
|
63
|
-
"@empiricalrun/
|
|
63
|
+
"@empiricalrun/reporter": "^0.21.4",
|
|
64
|
+
"@empiricalrun/r2-uploader": "^0.3.7"
|
|
64
65
|
},
|
|
65
66
|
"devDependencies": {
|
|
66
67
|
"@types/detect-port": "^1.3.5",
|