@empiricalrun/test-gen 0.42.16 → 0.42.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/agent/codegen/fix-ts-errors.d.ts.map +1 -1
- package/dist/agent/codegen/fix-ts-errors.js +4 -0
- package/dist/agent/codegen/generate-code-apply-changes.d.ts.map +1 -1
- package/dist/agent/codegen/generate-code-apply-changes.js +77 -117
- package/dist/agent/master/with-hints.d.ts +0 -7
- package/dist/agent/master/with-hints.d.ts.map +1 -1
- package/dist/agent/master/with-hints.js +1 -31
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.42.18
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 6f876ea: fix: remove code generation feedback loop from repo edit
|
|
8
|
+
- 658451e: fix: tracing for vitests
|
|
9
|
+
- Updated dependencies [658451e]
|
|
10
|
+
- @empiricalrun/llm@0.9.32
|
|
11
|
+
|
|
12
|
+
## 0.42.17
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- 65e821a: chore: remove dead code around selector hints
|
|
17
|
+
- 286a1a5: fix: annotation test
|
|
18
|
+
|
|
3
19
|
## 0.42.16
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fix-ts-errors.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/fix-ts-errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAQhD,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAsB,8BAA8B,CAAC,EACnD,KAAK,EACL,MAA2B,EAC3B,IAAI,EACJ,OAAO,EACP,eAAe,EACf,QAAQ,EACR,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,
|
|
1
|
+
{"version":3,"file":"fix-ts-errors.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/fix-ts-errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAQhD,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAsB,8BAA8B,CAAC,EACnD,KAAK,EACL,MAA2B,EAC3B,IAAI,EACJ,OAAO,EACP,eAAe,EACf,QAAQ,EACR,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,iBA4EA"}
|
|
@@ -64,6 +64,10 @@ async function validateAndFixTypescriptErrors({ trace, logger = new logger_1.Cus
|
|
|
64
64
|
await (0, web_1.lintErrors)(file);
|
|
65
65
|
const validateTypesSpan = trace?.span({
|
|
66
66
|
name: "detect-type-errors-in-file",
|
|
67
|
+
input: {
|
|
68
|
+
filePath: file,
|
|
69
|
+
content: response,
|
|
70
|
+
},
|
|
67
71
|
});
|
|
68
72
|
errors = (0, web_1.validateTypescript)(file);
|
|
69
73
|
validateTypesSpan?.end({ output: { errors } });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-code-apply-changes.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/generate-code-apply-changes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"generate-code-apply-changes.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/generate-code-apply-changes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAQrE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAOhD,OAAO,EAAsB,UAAU,EAAE,MAAM,SAAS,CAAC;AAqLzD,wBAAgB,8BAA8B,CAAC,KAAK,EAAE,MAAM,UAkD3D;AA2DD,wBAAsB,2BAA2B,CAAC,EAChD,IAAI,EACJ,KAAK,EACL,MAAM,EACN,gBAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,gBAAgB,EAAE,MAAM,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,GAAG,SAAS,CAAA;KAAE,CAAC,CAAC;CACjE,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAuIxB"}
|
|
@@ -264,12 +264,11 @@ In order to execute the task, FOLLOW BELOW STEPS:
|
|
|
264
264
|
}
|
|
265
265
|
function deDupUpdatedFiles(updatedFiles) {
|
|
266
266
|
return updatedFiles.filter((change, index, self) => index ===
|
|
267
|
-
self.findIndex((existing) => existing.filePath === change.filePath
|
|
268
|
-
existing.oldCode === change.oldCode &&
|
|
269
|
-
existing.newCode === change.newCode));
|
|
267
|
+
self.findIndex((existing) => existing.filePath === change.filePath));
|
|
270
268
|
}
|
|
271
269
|
async function generateCodeAndApplyChanges({ task, trace, logger, getRelevantFiles, }) {
|
|
272
|
-
|
|
270
|
+
// Reducing this from 5 to 3, if the required changes are getting missed will change it back.
|
|
271
|
+
let planRetries = 3;
|
|
273
272
|
let updatedFiles = [];
|
|
274
273
|
while (planRetries--) {
|
|
275
274
|
const generateCodeAndApplyChangesSpan = trace?.span({
|
|
@@ -300,120 +299,81 @@ async function generateCodeAndApplyChanges({ task, trace, logger, getRelevantFil
|
|
|
300
299
|
await (0, llm_1.flushAllTraces)();
|
|
301
300
|
return deDupUpdatedFiles(updatedFiles);
|
|
302
301
|
}
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
oldCode: "",
|
|
353
|
-
newCode: args.code,
|
|
354
|
-
reason: args.reason,
|
|
355
|
-
});
|
|
356
|
-
await fs_extra_1.default.mkdir((0, path_1.dirname)(args.filePath), { recursive: true });
|
|
357
|
-
await fs_extra_1.default.writeFile(args.filePath, args.code, "utf-8");
|
|
358
|
-
console.log(`Created file: ${args.filePath}`);
|
|
359
|
-
})();
|
|
360
|
-
}));
|
|
361
|
-
const strReplaceToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.STR_REPLACE);
|
|
362
|
-
if (strReplaceToolCalls.length > 0) {
|
|
363
|
-
console.log(`str_replace tool calls: `, strReplaceToolCalls);
|
|
364
|
-
}
|
|
365
|
-
// Filter out the tool calls which are for replacing code in existing files
|
|
366
|
-
const fileChanges = strReplaceToolCalls
|
|
367
|
-
.map((toolCall) => (0, utils_1.parseJson)(toolCall.function.arguments))
|
|
368
|
-
.filter((f) => f.filePath && fs_extra_1.default.existsSync(f.filePath));
|
|
369
|
-
updatedFiles.push(...fileChanges);
|
|
370
|
-
let failedCodeUpdates;
|
|
371
|
-
// applyChangesResponse contains the errors occurred while applying the changes
|
|
372
|
-
failedCodeUpdates = await (0, utils_2.applyFileChangesUsingStrReplace)({
|
|
373
|
-
trace: codeEditorSpan,
|
|
374
|
-
fileChanges,
|
|
375
|
-
logger,
|
|
376
|
-
});
|
|
377
|
-
// Filter out the responses having errors
|
|
378
|
-
failedCodeUpdates = failedCodeUpdates.filter((f) => f?.error);
|
|
379
|
-
// Filter out the tool calls which have errors
|
|
380
|
-
const toolCallsWithErrors = codeEditorToolCalls.filter((toolCall) => {
|
|
381
|
-
const args = (0, utils_1.parseJson)(toolCall.function.arguments);
|
|
382
|
-
return failedCodeUpdates.find((response) => response.filePath === args.filePath);
|
|
383
|
-
});
|
|
384
|
-
if (failedCodeUpdates.length === 0) {
|
|
385
|
-
break;
|
|
386
|
-
}
|
|
387
|
-
logger?.log(`Failed to apply changes, retrying...`, failedCodeUpdates);
|
|
388
|
-
const feedback = failedCodeUpdates
|
|
389
|
-
.map((updates) => `For file ${updates.filePath}: ${updates.errorMessage}`)
|
|
390
|
-
.join("\n");
|
|
391
|
-
promptForStrReplace.push({
|
|
392
|
-
role: "assistant",
|
|
393
|
-
tool_calls: toolCallsWithErrors,
|
|
394
|
-
});
|
|
395
|
-
toolCallsWithErrors.forEach((toolCall) => {
|
|
396
|
-
promptForStrReplace.push({
|
|
397
|
-
role: "tool",
|
|
398
|
-
tool_call_id: toolCall.id,
|
|
399
|
-
content: `
|
|
400
|
-
Errors while executing the changes provided in above tool call:
|
|
401
|
-
${feedback}
|
|
402
|
-
|
|
403
|
-
Please fix the errors and return the updated code.
|
|
404
|
-
|
|
405
|
-
FOLLOW BELOW STEPS TO FIX THE ISSUES:
|
|
406
|
-
- First read the error message and understand the issue.
|
|
407
|
-
- Go through the new code block and current file code, to figure out the root cause of the issue.
|
|
408
|
-
- Compile the steps that you need to follow to fix the issue.
|
|
409
|
-
- Check the test names, to ensure that the changes are applied to the correct test.
|
|
410
|
-
- Use separate 'str_replace' tool to make the changes for each update.
|
|
411
|
-
- Return the updated code in the same format as provided in the tool call.
|
|
412
|
-
|
|
413
|
-
NOTE: ONLY MAKE THE CHANGES TO FIX THE ISSUES MENTIONED IN THE ERROR MESSAGE AND NOTHING ELSE. NO EXTRA CODE REFACTORING IS REQUIRED.
|
|
414
|
-
`,
|
|
302
|
+
const promptForStrReplace = [
|
|
303
|
+
{
|
|
304
|
+
role: "system",
|
|
305
|
+
content: systemPromptBuilderForRepoEdit(files),
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
role: "user",
|
|
309
|
+
content: userPromptBuilderForStrReplace(strReplacePlan),
|
|
310
|
+
},
|
|
311
|
+
];
|
|
312
|
+
const codeEditorSpan = generateCodeAndApplyChangesSpan?.span({
|
|
313
|
+
name: "code-editor-agent",
|
|
314
|
+
input: {
|
|
315
|
+
prompt: promptForStrReplace,
|
|
316
|
+
},
|
|
317
|
+
});
|
|
318
|
+
const llm = new llm_1.LLM({
|
|
319
|
+
trace: codeEditorSpan,
|
|
320
|
+
provider: "anthropic",
|
|
321
|
+
defaultModel: "claude-3-5-sonnet-20240620",
|
|
322
|
+
providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
|
|
323
|
+
});
|
|
324
|
+
const completion = await llm.createChatCompletion({
|
|
325
|
+
messages: promptForStrReplace,
|
|
326
|
+
modelParameters: {
|
|
327
|
+
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
328
|
+
temperature: 0.1,
|
|
329
|
+
tool_choice: "required",
|
|
330
|
+
},
|
|
331
|
+
trace: codeEditorSpan,
|
|
332
|
+
tools: getCodeEditorToolCalls(),
|
|
333
|
+
});
|
|
334
|
+
codeEditorSpan?.end({ output: { completion } });
|
|
335
|
+
if (!completion?.tool_calls || completion?.tool_calls?.length === 0) {
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
338
|
+
// Filter out the tool calls which are for creating new files
|
|
339
|
+
const createFileToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.CREATE_FILE);
|
|
340
|
+
if (createFileToolCalls.length > 0) {
|
|
341
|
+
console.log(`create_file tool calls: `, createFileToolCalls);
|
|
342
|
+
}
|
|
343
|
+
await Promise.all(createFileToolCalls.map((tc) => {
|
|
344
|
+
return (async () => {
|
|
345
|
+
const args = (0, utils_1.parseJson)(tc.function.arguments);
|
|
346
|
+
updatedFiles.push({
|
|
347
|
+
filePath: args.filePath,
|
|
348
|
+
oldCode: "",
|
|
349
|
+
newCode: args.code,
|
|
350
|
+
reason: args.reason,
|
|
415
351
|
});
|
|
416
|
-
|
|
352
|
+
await fs_extra_1.default.mkdir((0, path_1.dirname)(args.filePath), { recursive: true });
|
|
353
|
+
await fs_extra_1.default.writeFile(args.filePath, args.code, "utf-8");
|
|
354
|
+
console.log(`Created file: ${args.filePath}`);
|
|
355
|
+
})();
|
|
356
|
+
}));
|
|
357
|
+
const strReplaceToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.STR_REPLACE);
|
|
358
|
+
if (strReplaceToolCalls.length > 0) {
|
|
359
|
+
console.log(`str_replace tool calls: `, strReplaceToolCalls);
|
|
360
|
+
}
|
|
361
|
+
// Filter out the tool calls which are for replacing code in existing files
|
|
362
|
+
const fileChanges = strReplaceToolCalls
|
|
363
|
+
.map((toolCall) => (0, utils_1.parseJson)(toolCall.function.arguments))
|
|
364
|
+
.filter((f) => f.filePath && fs_extra_1.default.existsSync(f.filePath));
|
|
365
|
+
// We add all the suggested changes to the updatedFiles array
|
|
366
|
+
// This is used to validate and format files later
|
|
367
|
+
updatedFiles.push(...fileChanges);
|
|
368
|
+
// applyChangesResponse contains the errors occurred while applying the changes
|
|
369
|
+
const updates = await (0, utils_2.applyFileChangesUsingStrReplace)({
|
|
370
|
+
trace: codeEditorSpan,
|
|
371
|
+
fileChanges,
|
|
372
|
+
logger,
|
|
373
|
+
});
|
|
374
|
+
const failedCodeUpdates = updates.filter((f) => f?.error);
|
|
375
|
+
if (failedCodeUpdates.length > 0) {
|
|
376
|
+
logger?.log(`Failed to apply changes, retrying...`, failedCodeUpdates);
|
|
417
377
|
}
|
|
418
378
|
}
|
|
419
379
|
return deDupUpdatedFiles(updatedFiles);
|
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
import { PlaywrightActions } from "../../actions";
|
|
4
|
-
import { BrowsingAgentOptions } from "../browsing";
|
|
5
|
-
export declare const getUserMessageWithForHints: ({ userMessage, options, pageScreenshot, annotatedPageScreenshot, }: {
|
|
6
|
-
userMessage: OpenAI.ChatCompletionUserMessageParam;
|
|
7
|
-
options?: BrowsingAgentOptions | undefined;
|
|
8
|
-
pageScreenshot: string;
|
|
9
|
-
annotatedPageScreenshot: string;
|
|
10
|
-
}) => string | OpenAI.ChatCompletionContentPart[];
|
|
11
4
|
export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnnotations, actions, llm, trace, }: {
|
|
12
5
|
outputFromGetNextAction: {
|
|
13
6
|
action: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAOlD,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;aAChC,iBAAiB;SACrB,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAyGA,CAAC"}
|
|
@@ -1,41 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.triggerHintsFlow =
|
|
4
|
-
const vision_1 = require("@empiricalrun/llm/vision");
|
|
3
|
+
exports.triggerHintsFlow = void 0;
|
|
5
4
|
const assert_1 = require("../../actions/assert");
|
|
6
5
|
const click_1 = require("../../actions/click");
|
|
7
6
|
const fill_1 = require("../../actions/fill");
|
|
8
7
|
const hover_1 = require("../../actions/hover");
|
|
9
8
|
const constants_1 = require("../../constants");
|
|
10
|
-
const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, annotatedPageScreenshot, }) => {
|
|
11
|
-
return [
|
|
12
|
-
{
|
|
13
|
-
type: "text",
|
|
14
|
-
text: userMessage.content,
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
type: "text",
|
|
18
|
-
text: "Screenshot in normal mode 👇",
|
|
19
|
-
},
|
|
20
|
-
{
|
|
21
|
-
type: "image_url",
|
|
22
|
-
image_url: {
|
|
23
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
|
|
24
|
-
},
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
type: "text",
|
|
28
|
-
text: "Screenshot in annotated mode 👇",
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
type: "image_url",
|
|
32
|
-
image_url: {
|
|
33
|
-
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedPageScreenshot),
|
|
34
|
-
},
|
|
35
|
-
},
|
|
36
|
-
];
|
|
37
|
-
};
|
|
38
|
-
exports.getUserMessageWithForHints = getUserMessageWithForHints;
|
|
39
9
|
const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations, actions, llm, trace, }) => {
|
|
40
10
|
try {
|
|
41
11
|
const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.42.
|
|
3
|
+
"version": "0.42.18",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -72,9 +72,9 @@
|
|
|
72
72
|
"ts-morph": "^23.0.0",
|
|
73
73
|
"tsx": "^4.16.2",
|
|
74
74
|
"typescript": "^5.3.3",
|
|
75
|
-
"@empiricalrun/llm": "^0.9.
|
|
76
|
-
"@empiricalrun/
|
|
77
|
-
"@empiricalrun/
|
|
75
|
+
"@empiricalrun/llm": "^0.9.32",
|
|
76
|
+
"@empiricalrun/reporter": "^0.23.1",
|
|
77
|
+
"@empiricalrun/r2-uploader": "^0.3.8"
|
|
78
78
|
},
|
|
79
79
|
"devDependencies": {
|
|
80
80
|
"@playwright/test": "1.47.1",
|