@empiricalrun/test-gen 0.42.16 → 0.42.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.42.18
4
+
5
+ ### Patch Changes
6
+
7
+ - 6f876ea: fix: remove code generation feedback loop from repo edit
8
+ - 658451e: fix: tracing for vitests
9
+ - Updated dependencies [658451e]
10
+ - @empiricalrun/llm@0.9.32
11
+
12
+ ## 0.42.17
13
+
14
+ ### Patch Changes
15
+
16
+ - 65e821a: chore: remove dead code around selector hints
17
+ - 286a1a5: fix: annotation test
18
+
3
19
  ## 0.42.16
4
20
 
5
21
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"fix-ts-errors.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/fix-ts-errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAQhD,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAsB,8BAA8B,CAAC,EACnD,KAAK,EACL,MAA2B,EAC3B,IAAI,EACJ,OAAO,EACP,eAAe,EACf,QAAQ,EACR,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,iBAwEA"}
1
+ {"version":3,"file":"fix-ts-errors.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/fix-ts-errors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGhE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAQhD,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE7D,wBAAsB,8BAA8B,CAAC,EACnD,KAAK,EACL,MAA2B,EAC3B,IAAI,EACJ,OAAO,EACP,eAAe,EACf,QAAQ,EACR,OAAO,GACR,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,iBA4EA"}
@@ -64,6 +64,10 @@ async function validateAndFixTypescriptErrors({ trace, logger = new logger_1.Cus
64
64
  await (0, web_1.lintErrors)(file);
65
65
  const validateTypesSpan = trace?.span({
66
66
  name: "detect-type-errors-in-file",
67
+ input: {
68
+ filePath: file,
69
+ content: response,
70
+ },
67
71
  });
68
72
  errors = (0, web_1.validateTypescript)(file);
69
73
  validateTypesSpan?.end({ output: { errors } });
@@ -1 +1 @@
1
- {"version":3,"file":"generate-code-apply-changes.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/generate-code-apply-changes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AASrE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAOhD,OAAO,EAAsB,UAAU,EAAsB,MAAM,SAAS,CAAC;AAqL7E,wBAAgB,8BAA8B,CAAC,KAAK,EAAE,MAAM,UAkD3D;AAgED,wBAAsB,2BAA2B,CAAC,EAChD,IAAI,EACJ,KAAK,EACL,MAAM,EACN,gBAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,gBAAgB,EAAE,MAAM,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,GAAG,SAAS,CAAA;KAAE,CAAC,CAAC;CACjE,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAyLxB"}
1
+ {"version":3,"file":"generate-code-apply-changes.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/generate-code-apply-changes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAuB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAQrE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAOhD,OAAO,EAAsB,UAAU,EAAE,MAAM,SAAS,CAAC;AAqLzD,wBAAgB,8BAA8B,CAAC,KAAK,EAAE,MAAM,UAkD3D;AA2DD,wBAAsB,2BAA2B,CAAC,EAChD,IAAI,EACJ,KAAK,EACL,MAAM,EACN,gBAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,gBAAgB,EAAE,MAAM,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,GAAG,SAAS,CAAA;KAAE,CAAC,CAAC;CACjE,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAuIxB"}
@@ -264,12 +264,11 @@ In order to execute the task, FOLLOW BELOW STEPS:
264
264
  }
265
265
  function deDupUpdatedFiles(updatedFiles) {
266
266
  return updatedFiles.filter((change, index, self) => index ===
267
- self.findIndex((existing) => existing.filePath === change.filePath &&
268
- existing.oldCode === change.oldCode &&
269
- existing.newCode === change.newCode));
267
+ self.findIndex((existing) => existing.filePath === change.filePath));
270
268
  }
271
269
  async function generateCodeAndApplyChanges({ task, trace, logger, getRelevantFiles, }) {
272
- let planRetries = 5;
270
+ // Reducing this from 5 to 3, if the required changes are getting missed will change it back.
271
+ let planRetries = 3;
273
272
  let updatedFiles = [];
274
273
  while (planRetries--) {
275
274
  const generateCodeAndApplyChangesSpan = trace?.span({
@@ -300,120 +299,81 @@ async function generateCodeAndApplyChanges({ task, trace, logger, getRelevantFil
300
299
  await (0, llm_1.flushAllTraces)();
301
300
  return deDupUpdatedFiles(updatedFiles);
302
301
  }
303
- let strReplaceRetries = 3;
304
- while (strReplaceRetries--) {
305
- const promptForStrReplace = [
306
- {
307
- role: "system",
308
- content: systemPromptBuilderForRepoEdit(files),
309
- },
310
- {
311
- role: "user",
312
- content: userPromptBuilderForStrReplace(strReplacePlan),
313
- },
314
- ];
315
- const codeEditorSpan = generateCodeAndApplyChangesSpan?.span({
316
- name: "code-editor-agent",
317
- input: {
318
- prompt: promptForStrReplace,
319
- },
320
- });
321
- const llm = new llm_1.LLM({
322
- trace: codeEditorSpan,
323
- provider: "anthropic",
324
- defaultModel: "claude-3-5-sonnet-20240620",
325
- providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
326
- });
327
- const completion = await llm.createChatCompletion({
328
- messages: promptForStrReplace,
329
- modelParameters: {
330
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
331
- temperature: 0.1,
332
- tool_choice: "required",
333
- },
334
- trace: codeEditorSpan,
335
- tools: getCodeEditorToolCalls(),
336
- });
337
- codeEditorSpan?.end({ output: { completion } });
338
- if (!completion?.tool_calls || completion?.tool_calls?.length === 0) {
339
- break;
340
- }
341
- let codeEditorToolCalls = completion.tool_calls;
342
- // Filter out the tool calls which are for creating new files
343
- const createFileToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.CREATE_FILE);
344
- if (createFileToolCalls.length > 0) {
345
- console.log(`create_file tool calls: `, createFileToolCalls);
346
- }
347
- await Promise.all(createFileToolCalls.map((tc) => {
348
- return (async () => {
349
- const args = (0, utils_1.parseJson)(tc.function.arguments);
350
- updatedFiles.push({
351
- filePath: args.filePath,
352
- oldCode: "",
353
- newCode: args.code,
354
- reason: args.reason,
355
- });
356
- await fs_extra_1.default.mkdir((0, path_1.dirname)(args.filePath), { recursive: true });
357
- await fs_extra_1.default.writeFile(args.filePath, args.code, "utf-8");
358
- console.log(`Created file: ${args.filePath}`);
359
- })();
360
- }));
361
- const strReplaceToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.STR_REPLACE);
362
- if (strReplaceToolCalls.length > 0) {
363
- console.log(`str_replace tool calls: `, strReplaceToolCalls);
364
- }
365
- // Filter out the tool calls which are for replacing code in existing files
366
- const fileChanges = strReplaceToolCalls
367
- .map((toolCall) => (0, utils_1.parseJson)(toolCall.function.arguments))
368
- .filter((f) => f.filePath && fs_extra_1.default.existsSync(f.filePath));
369
- updatedFiles.push(...fileChanges);
370
- let failedCodeUpdates;
371
- // applyChangesResponse contains the errors occurred while applying the changes
372
- failedCodeUpdates = await (0, utils_2.applyFileChangesUsingStrReplace)({
373
- trace: codeEditorSpan,
374
- fileChanges,
375
- logger,
376
- });
377
- // Filter out the responses having errors
378
- failedCodeUpdates = failedCodeUpdates.filter((f) => f?.error);
379
- // Filter out the tool calls which have errors
380
- const toolCallsWithErrors = codeEditorToolCalls.filter((toolCall) => {
381
- const args = (0, utils_1.parseJson)(toolCall.function.arguments);
382
- return failedCodeUpdates.find((response) => response.filePath === args.filePath);
383
- });
384
- if (failedCodeUpdates.length === 0) {
385
- break;
386
- }
387
- logger?.log(`Failed to apply changes, retrying...`, failedCodeUpdates);
388
- const feedback = failedCodeUpdates
389
- .map((updates) => `For file ${updates.filePath}: ${updates.errorMessage}`)
390
- .join("\n");
391
- promptForStrReplace.push({
392
- role: "assistant",
393
- tool_calls: toolCallsWithErrors,
394
- });
395
- toolCallsWithErrors.forEach((toolCall) => {
396
- promptForStrReplace.push({
397
- role: "tool",
398
- tool_call_id: toolCall.id,
399
- content: `
400
- Errors while executing the changes provided in above tool call:
401
- ${feedback}
402
-
403
- Please fix the errors and return the updated code.
404
-
405
- FOLLOW BELOW STEPS TO FIX THE ISSUES:
406
- - First read the error message and understand the issue.
407
- - Go through the new code block and current file code, to figure out the root cause of the issue.
408
- - Compile the steps that you need to follow to fix the issue.
409
- - Check the test names, to ensure that the changes are applied to the correct test.
410
- - Use separate 'str_replace' tool to make the changes for each update.
411
- - Return the updated code in the same format as provided in the tool call.
412
-
413
- NOTE: ONLY MAKE THE CHANGES TO FIX THE ISSUES MENTIONED IN THE ERROR MESSAGE AND NOTHING ELSE. NO EXTRA CODE REFACTORING IS REQUIRED.
414
- `,
302
+ const promptForStrReplace = [
303
+ {
304
+ role: "system",
305
+ content: systemPromptBuilderForRepoEdit(files),
306
+ },
307
+ {
308
+ role: "user",
309
+ content: userPromptBuilderForStrReplace(strReplacePlan),
310
+ },
311
+ ];
312
+ const codeEditorSpan = generateCodeAndApplyChangesSpan?.span({
313
+ name: "code-editor-agent",
314
+ input: {
315
+ prompt: promptForStrReplace,
316
+ },
317
+ });
318
+ const llm = new llm_1.LLM({
319
+ trace: codeEditorSpan,
320
+ provider: "anthropic",
321
+ defaultModel: "claude-3-5-sonnet-20240620",
322
+ providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
323
+ });
324
+ const completion = await llm.createChatCompletion({
325
+ messages: promptForStrReplace,
326
+ modelParameters: {
327
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
328
+ temperature: 0.1,
329
+ tool_choice: "required",
330
+ },
331
+ trace: codeEditorSpan,
332
+ tools: getCodeEditorToolCalls(),
333
+ });
334
+ codeEditorSpan?.end({ output: { completion } });
335
+ if (!completion?.tool_calls || completion?.tool_calls?.length === 0) {
336
+ continue;
337
+ }
338
+ // Filter out the tool calls which are for creating new files
339
+ const createFileToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.CREATE_FILE);
340
+ if (createFileToolCalls.length > 0) {
341
+ console.log(`create_file tool calls: `, createFileToolCalls);
342
+ }
343
+ await Promise.all(createFileToolCalls.map((tc) => {
344
+ return (async () => {
345
+ const args = (0, utils_1.parseJson)(tc.function.arguments);
346
+ updatedFiles.push({
347
+ filePath: args.filePath,
348
+ oldCode: "",
349
+ newCode: args.code,
350
+ reason: args.reason,
415
351
  });
416
- });
352
+ await fs_extra_1.default.mkdir((0, path_1.dirname)(args.filePath), { recursive: true });
353
+ await fs_extra_1.default.writeFile(args.filePath, args.code, "utf-8");
354
+ console.log(`Created file: ${args.filePath}`);
355
+ })();
356
+ }));
357
+ const strReplaceToolCalls = completion.tool_calls.filter((tc) => tc.function.name === types_1.CodeEditorToolCall.STR_REPLACE);
358
+ if (strReplaceToolCalls.length > 0) {
359
+ console.log(`str_replace tool calls: `, strReplaceToolCalls);
360
+ }
361
+ // Filter out the tool calls which are for replacing code in existing files
362
+ const fileChanges = strReplaceToolCalls
363
+ .map((toolCall) => (0, utils_1.parseJson)(toolCall.function.arguments))
364
+ .filter((f) => f.filePath && fs_extra_1.default.existsSync(f.filePath));
365
+ // We add all the suggested changes to the updatedFiles array
366
+ // This is used to validate and format files later
367
+ updatedFiles.push(...fileChanges);
368
+ // applyChangesResponse contains the errors occurred while applying the changes
369
+ const updates = await (0, utils_2.applyFileChangesUsingStrReplace)({
370
+ trace: codeEditorSpan,
371
+ fileChanges,
372
+ logger,
373
+ });
374
+ const failedCodeUpdates = updates.filter((f) => f?.error);
375
+ if (failedCodeUpdates.length > 0) {
376
+ logger?.log(`Failed to apply changes, retrying...`, failedCodeUpdates);
417
377
  }
418
378
  }
419
379
  return deDupUpdatedFiles(updatedFiles);
@@ -1,13 +1,6 @@
1
1
  import { LLM, TraceClient } from "@empiricalrun/llm";
2
2
  import OpenAI from "openai";
3
3
  import { PlaywrightActions } from "../../actions";
4
- import { BrowsingAgentOptions } from "../browsing";
5
- export declare const getUserMessageWithForHints: ({ userMessage, options, pageScreenshot, annotatedPageScreenshot, }: {
6
- userMessage: OpenAI.ChatCompletionUserMessageParam;
7
- options?: BrowsingAgentOptions | undefined;
8
- pageScreenshot: string;
9
- annotatedPageScreenshot: string;
10
- }) => string | OpenAI.ChatCompletionContentPart[];
11
4
  export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnnotations, actions, llm, trace, }: {
12
5
  outputFromGetNextAction: {
13
6
  action: string;
@@ -1 +1 @@
1
- {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AASlD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;aAChC,iBAAiB;SACrB,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAyGA,CAAC"}
1
+ {"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAOlD,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;aAChC,iBAAiB;SACrB,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAyGA,CAAC"}
@@ -1,41 +1,11 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.triggerHintsFlow = exports.getUserMessageWithForHints = void 0;
4
- const vision_1 = require("@empiricalrun/llm/vision");
3
+ exports.triggerHintsFlow = void 0;
5
4
  const assert_1 = require("../../actions/assert");
6
5
  const click_1 = require("../../actions/click");
7
6
  const fill_1 = require("../../actions/fill");
8
7
  const hover_1 = require("../../actions/hover");
9
8
  const constants_1 = require("../../constants");
10
- const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, annotatedPageScreenshot, }) => {
11
- return [
12
- {
13
- type: "text",
14
- text: userMessage.content,
15
- },
16
- {
17
- type: "text",
18
- text: "Screenshot in normal mode 👇",
19
- },
20
- {
21
- type: "image_url",
22
- image_url: {
23
- url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
24
- },
25
- },
26
- {
27
- type: "text",
28
- text: "Screenshot in annotated mode 👇",
29
- },
30
- {
31
- type: "image_url",
32
- image_url: {
33
- url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedPageScreenshot),
34
- },
35
- },
36
- ];
37
- };
38
- exports.getUserMessageWithForHints = getUserMessageWithForHints;
39
9
  const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations, actions, llm, trace, }) => {
40
10
  try {
41
11
  const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.42.16",
3
+ "version": "0.42.18",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -72,9 +72,9 @@
72
72
  "ts-morph": "^23.0.0",
73
73
  "tsx": "^4.16.2",
74
74
  "typescript": "^5.3.3",
75
- "@empiricalrun/llm": "^0.9.31",
76
- "@empiricalrun/r2-uploader": "^0.3.8",
77
- "@empiricalrun/reporter": "^0.23.1"
75
+ "@empiricalrun/llm": "^0.9.32",
76
+ "@empiricalrun/reporter": "^0.23.1",
77
+ "@empiricalrun/r2-uploader": "^0.3.8"
78
78
  },
79
79
  "devDependencies": {
80
80
  "@playwright/test": "1.47.1",