@empiricalrun/test-gen 0.56.3 → 0.57.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dist/agent/browsing/run.d.ts +2 -1
  3. package/dist/agent/browsing/run.d.ts.map +1 -1
  4. package/dist/agent/browsing/run.js +2 -1
  5. package/dist/agent/chat/index.d.ts.map +1 -1
  6. package/dist/agent/chat/index.js +9 -2
  7. package/dist/agent/chat/state.d.ts +1 -1
  8. package/dist/agent/chat/state.d.ts.map +1 -1
  9. package/dist/agent/chat/state.js +1 -1
  10. package/dist/index.d.ts.map +1 -1
  11. package/dist/index.js +18 -0
  12. package/dist/tool-call-service/index.d.ts +3 -2
  13. package/dist/tool-call-service/index.d.ts.map +1 -1
  14. package/dist/tool-call-service/index.js +8 -3
  15. package/dist/tools/commit-and-create-pr.d.ts +1 -1
  16. package/dist/tools/commit-and-create-pr.d.ts.map +1 -1
  17. package/dist/tools/commit-and-create-pr.js +13 -36
  18. package/dist/tools/{grep.d.ts → grep/index.d.ts} +1 -1
  19. package/dist/tools/grep/index.d.ts.map +1 -0
  20. package/dist/tools/grep/index.js +129 -0
  21. package/dist/tools/grep/ripgrep/index.d.ts +5 -0
  22. package/dist/tools/grep/ripgrep/index.d.ts.map +1 -0
  23. package/dist/tools/grep/ripgrep/index.js +90 -0
  24. package/dist/tools/grep/ripgrep/types.d.ts +45 -0
  25. package/dist/tools/grep/ripgrep/types.d.ts.map +1 -0
  26. package/dist/tools/grep/ripgrep/types.js +15 -0
  27. package/dist/tools/str_replace_editor.d.ts.map +1 -1
  28. package/dist/tools/str_replace_editor.js +70 -2
  29. package/dist/tools/test-gen-browser.d.ts.map +1 -1
  30. package/dist/tools/test-gen-browser.js +2 -1
  31. package/dist/utils/checkpoint.d.ts +3 -0
  32. package/dist/utils/checkpoint.d.ts.map +1 -0
  33. package/dist/utils/checkpoint.js +19 -0
  34. package/dist/utils/exec.d.ts.map +1 -1
  35. package/dist/utils/exec.js +15 -7
  36. package/dist/utils/git.d.ts +4 -0
  37. package/dist/utils/git.d.ts.map +1 -1
  38. package/dist/utils/git.js +26 -1
  39. package/package.json +2 -2
  40. package/dist/tools/grep.d.ts.map +0 -1
  41. package/dist/tools/grep.js +0 -73
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.57.0
4
+
5
+ ### Minor Changes
6
+
7
+ - df30a88: feat: commit context to git on every tool file change
8
+
9
+ ### Patch Changes
10
+
11
+ - 8a847fd: feat: tracing for CUA when it runs in a tool
12
+ - 597072c: feat: suggest unique occurences for old_str in text editor tools
13
+ - 813995a: fix: grep tool should handle whitespaces in search string
14
+ - Updated dependencies [629bbd2]
15
+ - @empiricalrun/llm@0.15.3
16
+
17
+ ## 0.56.4
18
+
19
+ ### Patch Changes
20
+
21
+ - 0d53865: fix: browser agent tool call has different exit code on linux
22
+ - 8e5dcd2: feat: move to ripgrep for platform independent grep tool calls
23
+
3
24
  ## 0.56.3
4
25
 
5
26
  ### Patch Changes
@@ -4,9 +4,10 @@ type GenerateTestsType = {
4
4
  pwProjectsFilter: string[];
5
5
  testGenToken: string;
6
6
  repoDir: string;
7
+ traceId?: string;
7
8
  editFileWithGeneratedCode: boolean;
8
9
  };
9
- export declare function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, editFileWithGeneratedCode, }: GenerateTestsType): Promise<{
10
+ export declare function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, traceId, editFileWithGeneratedCode, }: GenerateTestsType): Promise<{
10
11
  isError: boolean;
11
12
  error: string;
12
13
  actionsSummary?: string;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB,EAAE,OAAO,CAAC;CACpC,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,EACP,yBAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB,CAAC,CAsFD"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAiBA,KAAK,iBAAiB,GAAG;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB,EAAE,OAAO,CAAC;CACpC,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,YAAY,EACZ,OAAO,EACP,OAAO,EACP,yBAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB,CAAC,CAuFD"}
@@ -10,7 +10,7 @@ const web_1 = require("../../bin/utils/platform/web");
10
10
  const server_1 = require("../../file/server");
11
11
  const exec_1 = require("../../utils/exec");
12
12
  const utils_1 = require("./utils");
13
- async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, editFileWithGeneratedCode, }) {
13
+ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, pwProjectsFilter, testGenToken, repoDir, traceId, editFileWithGeneratedCode, }) {
14
14
  if (!fs_1.default.existsSync(testFilePath)) {
15
15
  throw new Error(`File for master agent to run not found: ${testFilePath}`);
16
16
  }
@@ -53,6 +53,7 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, p
53
53
  TEST_GEN_TOKEN: testGenToken,
54
54
  PAGE_VAR_NAME: pageVar || "page",
55
55
  DISPLAY: ":99",
56
+ LANGFUSE_TRACE_ID: traceId || "",
56
57
  },
57
58
  });
58
59
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAaA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAiBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAsFA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA2CA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAaA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAiBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBA8FA;AA0BD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA+CA"}
@@ -23,6 +23,10 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
23
23
  if (useDiskForChatState) {
24
24
  chatState = (0, state_1.loadChatState)();
25
25
  }
26
+ // TODO: Store branch name in chat state so that we don't recreate it every time
27
+ const randomId = crypto.randomUUID().substring(0, 8);
28
+ const branchName = `branch-${randomId}`;
29
+ await (0, git_1.checkoutBranch)(branchName);
26
30
  let messagesLoadedFromDisk = chatState?.messages || [];
27
31
  let chatModel = (0, model_1.createChatModel)(messagesLoadedFromDisk, selectedModel);
28
32
  if (initialPromptContent && chatModel.messages.length === 0) {
@@ -84,7 +88,7 @@ async function runChatAgentForCLI({ useDiskForChatState, selectedModel, initialP
84
88
  }
85
89
  else {
86
90
  // TODO: Should we pass a loader function? That would allow us to show a spinner
87
- const toolCallService = new tool_call_service_1.ToolCallService(null, selectedModel);
91
+ const toolCallService = new tool_call_service_1.ToolCallService(null, selectedModel, branchName);
88
92
  await (0, agent_loop_1.chatAgentLoop)({
89
93
  chatModel,
90
94
  selectedModel,
@@ -112,6 +116,9 @@ async function getChatSessionFromDashboard(chatSessionId) {
112
116
  Authorization: `Bearer ${process.env.EMPIRICALRUN_API_KEY}`,
113
117
  },
114
118
  });
119
+ if (!response.ok) {
120
+ throw new Error(`Failed to get chat session: ${response.statusText}`);
121
+ }
115
122
  const data = await response.json();
116
123
  return data.data.chat_session;
117
124
  }
@@ -128,7 +135,7 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
128
135
  chatSessionId,
129
136
  },
130
137
  });
131
- const toolCallService = new tool_call_service_1.ToolCallService(chatSessionId, selectedModel);
138
+ const toolCallService = new tool_call_service_1.ToolCallService(chatSessionId, selectedModel, branchName);
132
139
  await (0, git_1.checkoutBranch)(branchName);
133
140
  let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
134
141
  let reporterFunc = async (chatState, latest) => {
@@ -7,7 +7,7 @@ export type ChatStateOnDisk<T> = {
7
7
  model: SupportedChatModels;
8
8
  messages: T[];
9
9
  };
10
- export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
10
+ export declare function createChatState(userPrompt: string, existingState: ChatStateOnDisk<any> | undefined, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
11
11
  export declare function createChatStateForMessages<T>(messages: any, selectedModel: SupportedChatModels): ChatStateOnDisk<T>;
12
12
  export declare function chatStateFromModel<T>(chatModel: IChatModel<T>, selectedModel: SupportedChatModels): ChatStateOnDisk<unknown>;
13
13
  export declare function loadChatState<T>(): ChatStateOnDisk<T> | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,mBAAmB,CAAC;IAC3B,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,EACnC,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,EACb,aAAa,EAAE,mBAAmB,GACjC,eAAe,CAAC,CAAC,CAAC,CAOpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAClC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,EACxB,aAAa,EAAE,mBAAmB,4BAGnC;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,QAsBnC"}
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAKpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAE9C,eAAO,MAAM,0BAA0B,eAAe,CAAC;AAEvD,eAAO,MAAM,eAAe,QAI3B,CAAC;AAEF,MAAM,MAAM,eAAe,CAAC,CAAC,IAAI;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,mBAAmB,CAAC;IAC3B,QAAQ,EAAE,CAAC,EAAE,CAAC;CACf,CAAC;AAEF,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,aAAa,EAAE,eAAe,CAAC,GAAG,CAAC,GAAG,SAAS,EAC/C,aAAa,EAAE,mBAAmB,4BAMnC;AAED,wBAAgB,0BAA0B,CAAC,CAAC,EAC1C,QAAQ,EAAE,GAAG,EACb,aAAa,EAAE,mBAAmB,GACjC,eAAe,CAAC,CAAC,CAAC,CAOpB;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAClC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,EACxB,aAAa,EAAE,mBAAmB,4BAGnC;AAED,wBAAgB,aAAa,CAAC,CAAC,KAAK,eAAe,CAAC,CAAC,CAAC,GAAG,SAAS,CAajE;AAED,wBAAgB,UAAU,CAAC,CAAC,EAC1B,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,EAClB,aAAa,EAAE,mBAAmB,QAsBnC"}
@@ -10,7 +10,7 @@ const model_1 = require("./model");
10
10
  exports.CURRENT_CHAT_STATE_VERSION = "20250327.1";
11
11
  exports.CHAT_STATE_PATH = path_1.default.join(process.cwd(), ".empiricalrun", "last-chat.json");
12
12
  function createChatState(userPrompt, existingState, selectedModel) {
13
- const messages = existingState.messages || [];
13
+ const messages = existingState?.messages || [];
14
14
  const chatModel = (0, model_1.createChatModel)(messages, selectedModel);
15
15
  chatModel.pushUserMessage(userPrompt);
16
16
  return createChatStateForMessages(chatModel.messages, selectedModel);
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoBpC,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBAwDlB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQhD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoBpC,wBAAsB,UAAU,CAC9B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,IAAI,GAAG,YAAY,EAC5B,KAAK,CAAC,EAAE,SAAS,iBA0ElB"}
package/dist/index.js CHANGED
@@ -27,6 +27,18 @@ async function createTest(task, pageRef, scope) {
27
27
  const testConfigArg = process.env.TEST_GEN_TOKEN;
28
28
  const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
29
29
  const useComputerUseAgent = testGenConfig.options?.useComputerUseAgent;
30
+ let trace, span;
31
+ if (process.env.LANGFUSE_TRACE_ID) {
32
+ trace = llm_1.langfuseInstance?.trace({
33
+ id: process.env.LANGFUSE_TRACE_ID,
34
+ });
35
+ span = trace?.span({
36
+ name: "computer-use-agent-span",
37
+ input: {
38
+ task,
39
+ },
40
+ });
41
+ }
30
42
  if (testGenConfig.options && testGenConfig.options.metadata) {
31
43
  (0, reporter_1.setReporterConfig)({
32
44
  projectRepoName: testGenConfig.options?.metadata.projectRepoName,
@@ -48,6 +60,7 @@ async function createTest(task, pageRef, scope) {
48
60
  agentResult = await (0, cua_1.createTestUsingComputerUseAgent)({
49
61
  task,
50
62
  page,
63
+ trace: span,
51
64
  });
52
65
  }
53
66
  else {
@@ -69,6 +82,11 @@ async function createTest(task, pageRef, scope) {
69
82
  importPaths,
70
83
  actionsSummary,
71
84
  });
85
+ span?.end({
86
+ output: {
87
+ actionsSummary,
88
+ },
89
+ });
72
90
  }
73
91
  finally {
74
92
  // Ensure listeners are removed even if an error occurs
@@ -3,14 +3,15 @@ import { PendingToolCall, Tool, ToolResult } from "@empiricalrun/llm/chat";
3
3
  import { SupportedChatModels } from "../agent/chat/types";
4
4
  export type { SupportedChatModels };
5
5
  type ToolExecutors = {
6
- [key: string]: (input: any) => Promise<ToolResult>;
6
+ [key: string]: (input: any, trace?: TraceClient) => Promise<ToolResult>;
7
7
  };
8
8
  export declare class ToolCallService {
9
9
  tools: Tool[];
10
10
  toolExecutors: ToolExecutors;
11
11
  chatSessionId: number | null;
12
12
  selectedModel: SupportedChatModels;
13
- constructor(chatSessionId: number | null, selectedModel: SupportedChatModels);
13
+ branchName: string;
14
+ constructor(chatSessionId: number | null, selectedModel: SupportedChatModels, branchName: string);
14
15
  getTools(): Promise<{
15
16
  tools: Tool[];
16
17
  }>;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAE3E,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAa1D,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACpD,CAAC;AA4BF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;gBAGjC,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB;IAgB9B,QAAQ;;;IAaR,OAAO,CACX,SAAS,EAAE,eAAe,EAAE,EAC5B,QAAQ,EAAE,OAAO,EACjB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,EAAE,CAAC;CAuDzB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tool-call-service/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAE3E,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAe1D,YAAY,EAAE,mBAAmB,EAAE,CAAC;AAEpC,KAAK,aAAa,GAAG;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CACzE,CAAC;AA6BF,qBAAa,eAAe;IAC1B,KAAK,EAAE,IAAI,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAAM;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,mBAAmB,CAAC;IACnC,UAAU,EAAE,MAAM,CAAC;gBAEjB,aAAa,EAAE,MAAM,GAAG,IAAI,EAC5B,aAAa,EAAE,mBAAmB,EAClC,UAAU,EAAE,MAAM;IAiBd,QAAQ;;;IAaR,OAAO,CACX,SAAS,EAAE,eAAe,EAAE,EAC5B,QAAQ,EAAE,OAAO,EACjB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,EAAE,CAAC;CAyDzB"}
@@ -11,6 +11,7 @@ const str_replace_editor_1 = require("../tools/str_replace_editor");
11
11
  const test_gen_browser_1 = require("../tools/test-gen-browser");
12
12
  const test_run_1 = require("../tools/test-run");
13
13
  const test_run_fetcher_1 = require("../tools/test-run-fetcher");
14
+ const checkpoint_1 = require("../utils/checkpoint");
14
15
  async function sendToolRequestToRemoteQueue(payload) {
15
16
  const sqs = new client_sqs_1.SQSClient({
16
17
  region: process.env.AWS_REGION,
@@ -35,16 +36,18 @@ class ToolCallService {
35
36
  toolExecutors = {};
36
37
  chatSessionId;
37
38
  selectedModel;
38
- constructor(chatSessionId, selectedModel) {
39
+ branchName;
40
+ constructor(chatSessionId, selectedModel, branchName) {
39
41
  this.chatSessionId = chatSessionId;
40
42
  this.selectedModel = selectedModel;
43
+ this.branchName = branchName;
41
44
  this.tools = [
42
45
  grep_1.grepTool,
43
46
  test_run_1.runTestTool,
44
47
  test_run_fetcher_1.fetchTestRunReportTool,
45
48
  diagnosis_fetcher_1.fetchDiagnosisReportTool,
46
49
  test_gen_browser_1.generateTestWithBrowserAgent,
47
- commit_and_create_pr_1.commitAndPushChangesTool,
50
+ commit_and_create_pr_1.createPullRequestTool,
48
51
  environment_crud_1.getEnvironmentTool,
49
52
  download_build_1.downloadBuildTool,
50
53
  ];
@@ -68,6 +71,7 @@ class ToolCallService {
68
71
  requestId: crypto.randomUUID(),
69
72
  chatSessionId: this.chatSessionId,
70
73
  selectedModel: this.selectedModel,
74
+ branchName: this.branchName,
71
75
  });
72
76
  return toolCalls.map(() => ({
73
77
  isError: false,
@@ -96,7 +100,7 @@ class ToolCallService {
96
100
  continue;
97
101
  }
98
102
  try {
99
- const result = await toolExecutor(toolCall.input);
103
+ const result = await toolExecutor(toolCall.input, trace);
100
104
  toolResults.push(result);
101
105
  span?.end({ output: result });
102
106
  }
@@ -109,6 +113,7 @@ class ToolCallService {
109
113
  span?.end({ output: errorResult });
110
114
  }
111
115
  }
116
+ await (0, checkpoint_1.createCheckpoint)(toolCalls, this.branchName);
112
117
  executeSpan?.end({ output: { toolResults } });
113
118
  return toolResults;
114
119
  }
@@ -4,6 +4,6 @@ interface GitHubRepoInfo {
4
4
  repo: string;
5
5
  }
6
6
  export declare function parseGitHubUrl(url: string): GitHubRepoInfo;
7
- export declare const commitAndPushChangesTool: Tool;
7
+ export declare const createPullRequestTool: Tool;
8
8
  export {};
9
9
  //# sourceMappingURL=commit-and-create-pr.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAQnD,UAAU,cAAc;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CAiB1D;AA2CD,eAAO,MAAM,wBAAwB,EAAE,IAwFtC,CAAC"}
1
+ {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAOnD,UAAU,cAAc;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CAiB1D;AA2CD,eAAO,MAAM,qBAAqB,EAAE,IAuEnC,CAAC"}
@@ -1,11 +1,7 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.commitAndPushChangesTool = exports.parseGitHubUrl = void 0;
3
+ exports.createPullRequestTool = exports.parseGitHubUrl = void 0;
7
4
  const child_process_1 = require("child_process");
8
- const crypto_1 = __importDefault(require("crypto"));
9
5
  const zod_1 = require("zod");
10
6
  const git_1 = require("../utils/git");
11
7
  const utils_1 = require("./utils");
@@ -24,11 +20,11 @@ function parseGitHubUrl(url) {
24
20
  return { owner, repo };
25
21
  }
26
22
  exports.parseGitHubUrl = parseGitHubUrl;
27
- const CommitAndPushChangesSchema = zod_1.z.object({
28
- commitMessage: zod_1.z
23
+ const createPullRequestSchema = zod_1.z.object({
24
+ pullRequestTitle: zod_1.z
29
25
  .string()
30
26
  .describe("A short message to use for the commit. Should not be more than 8 words. Should follow conventional commit format."),
31
- description: zod_1.z.string().describe(`A longer description of the changes you made. This will be used as the description of a pull request on GitHub, and so you should follow markdown formatting.
27
+ pullRequestDescription: zod_1.z.string().describe(`A longer description of the changes you made. This will be used as the description of a pull request on GitHub, and so you should follow markdown formatting.
32
28
  Your code will be reviewed by a human, and you should include everything that will provide context and improve the reviewer's confidence in the changes.
33
29
 
34
30
  For example, if you used the test run tool, you should include the results (and the report URL if available). Report URL is especially important, because it contains
@@ -46,38 +42,19 @@ function formatDescriptionWithTimestamp(description, existingBody, type = "creat
46
42
  }
47
43
  return `${description}\n\n${timestampText}`;
48
44
  }
49
- exports.commitAndPushChangesTool = {
45
+ exports.createPullRequestTool = {
50
46
  schema: {
51
- name: "commitAndPushChanges",
52
- description: `Creates a commit with all modified files and pushes them to the current branch.
53
- If currently on main branch, creates a new branch with a random name.
47
+ name: "createPullRequest",
48
+ description: `Creates a new Pull Request on GitHub.
54
49
  If the current branch already has an open PR, commits and pushes changes to that PR.
55
50
  Uses the empiricalrun[bot] credentials for git operations.
56
51
  Returns the URL of the created or updated pull request.`,
57
- parameters: CommitAndPushChangesSchema,
52
+ parameters: createPullRequestSchema,
58
53
  },
59
54
  execute: async (input) => {
60
55
  try {
61
- const { commitMessage, description } = input;
62
- const currentBranch = (0, child_process_1.execSync)("git rev-parse --abbrev-ref HEAD")
63
- .toString()
64
- .trim();
65
- let branchName = currentBranch;
66
- if (currentBranch === "main") {
67
- // If on main, create a new branch
68
- const randomId = crypto_1.default.randomUUID().substring(0, 8);
69
- branchName = `branch-${randomId}`;
70
- (0, child_process_1.execSync)(`git checkout -b ${branchName}`);
71
- }
72
- const modifiedFiles = (0, child_process_1.execSync)("git status --porcelain")
73
- .toString()
74
- .split("\n")
75
- .filter((line) => line && !line.includes(".bak"))
76
- .map((line) => line.substring(3)); // Remove status prefix
77
- for (const file of modifiedFiles) {
78
- (0, child_process_1.execSync)(`git add "${file}"`);
79
- }
80
- await (0, git_1.commitAsBotUser)(commitMessage);
56
+ const { pullRequestTitle, pullRequestDescription } = input;
57
+ const branchName = await (0, git_1.getCurrentBranchName)();
81
58
  const repoUrl = (0, child_process_1.execSync)("git config --get remote.origin.url")
82
59
  .toString()
83
60
  .trim();
@@ -94,7 +71,7 @@ Returns the URL of the created or updated pull request.`,
94
71
  const existingPR = existingPRs?.find((pr) => pr.head.ref === branchName);
95
72
  if (existingPR) {
96
73
  // Append the new description to the existing PR description
97
- const updatedDescription = formatDescriptionWithTimestamp(description, existingPR.body, "update");
74
+ const updatedDescription = formatDescriptionWithTimestamp(pullRequestDescription, existingPR.body, "update");
98
75
  await (0, utils_1.callGitHubProxy)({
99
76
  method: "PATCH",
100
77
  url: `https://api.github.com/repos/${owner}/${repo}/pulls/${existingPR.number}`,
@@ -107,12 +84,12 @@ Returns the URL of the created or updated pull request.`,
107
84
  result: `Committed and pushed changes to existing PR: ${existingPR.html_url}`,
108
85
  };
109
86
  }
110
- const initialDescription = formatDescriptionWithTimestamp(description);
87
+ const initialDescription = formatDescriptionWithTimestamp(pullRequestDescription);
111
88
  const pr = (await (0, utils_1.callGitHubProxy)({
112
89
  method: "POST",
113
90
  url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
114
91
  body: {
115
- title: commitMessage,
92
+ title: pullRequestTitle,
116
93
  head: branchName,
117
94
  base: "main",
118
95
  body: initialDescription,
@@ -1,3 +1,3 @@
1
1
  import type { Tool } from "@empiricalrun/llm/chat";
2
2
  export declare const grepTool: Tool;
3
- //# sourceMappingURL=grep.d.ts.map
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/grep/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAc,MAAM,wBAAwB,CAAC;AAqH/D,eAAO,MAAM,QAAQ,EAAE,IAkBtB,CAAC"}
@@ -0,0 +1,129 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.grepTool = void 0;
7
+ const child_process_1 = require("child_process");
8
+ const path_1 = __importDefault(require("path"));
9
+ const util_1 = require("util");
10
+ const zod_1 = require("zod");
11
+ const repo_tree_1 = require("../../utils/repo-tree");
12
+ const ripgrep_1 = require("./ripgrep");
13
+ const GrepInputSchema = zod_1.z.object({
14
+ pattern: zod_1.z.string().describe("The pattern to search for"),
15
+ directory: zod_1.z
16
+ .string()
17
+ .optional()
18
+ .describe("The directory to search in (defaults to current directory)"),
19
+ filePattern: zod_1.z
20
+ .string()
21
+ .optional()
22
+ .describe("File pattern to search in (e.g., '*.ts' for TypeScript files)"),
23
+ });
24
+ async function usingSystemGrep(input) {
25
+ try {
26
+ const dir = input.directory || process.cwd();
27
+ // Create exclude pattern for grep
28
+ const excludePatterns = repo_tree_1.DEFAULT_EXCLUDE.map((pattern) => typeof pattern === "string" ? pattern : pattern.source)
29
+ .map((pattern) => `--exclude-dir="${pattern}"`)
30
+ .join(" ");
31
+ // Using -n to show line numbers in output
32
+ let cmd = `grep -rin ${excludePatterns} "${input.pattern}" ${dir}`;
33
+ if (input.filePattern) {
34
+ // For file pattern searches, we'll use find with exclusions
35
+ const excludeFind = repo_tree_1.DEFAULT_EXCLUDE.map((pattern) => typeof pattern === "string" ? pattern : pattern.source)
36
+ .map((pattern) => `-not -path "*/${pattern}/*"`)
37
+ .join(" ");
38
+ // Modified command to ensure filepath is included in the output
39
+ // Using grep with -H to always show filename and the filepath as part of the output
40
+ cmd = `find ${dir} ${excludeFind} -name "${input.filePattern}" | xargs grep -Hn "${input.pattern}"`;
41
+ }
42
+ const execAsync = (0, util_1.promisify)(child_process_1.exec);
43
+ const { stdout, stderr } = await execAsync(cmd);
44
+ if (stdout) {
45
+ return {
46
+ isError: false,
47
+ result: stdout,
48
+ };
49
+ }
50
+ else if (!stdout && stderr) {
51
+ return {
52
+ isError: true,
53
+ result: stderr,
54
+ };
55
+ }
56
+ else {
57
+ // Both are empty, which means no results were found
58
+ return {
59
+ isError: false,
60
+ result: "No results found.",
61
+ };
62
+ }
63
+ }
64
+ catch (error) {
65
+ console.error("Error executing grep", error);
66
+ return {
67
+ isError: true,
68
+ result: error instanceof Error ? error.message : String(error),
69
+ };
70
+ }
71
+ }
72
+ async function usingRipgrep(input) {
73
+ try {
74
+ const dir = path_1.default.join(process.cwd(), input.directory || "");
75
+ const escapedPattern = input.pattern.replace(/\s+/g, "\\ ");
76
+ const results = await (0, ripgrep_1.ripgrep)(dir, {
77
+ string: escapedPattern,
78
+ globs: input.filePattern ? [input.filePattern] : undefined,
79
+ });
80
+ const resultsSummary = results
81
+ .map((result) => {
82
+ // Can add submatches and offset info to the summary if needed
83
+ return {
84
+ lines: result.lines.text,
85
+ path: path_1.default.relative(process.cwd(), result.path.text),
86
+ // line number is 1-indexed
87
+ line_number: result.line_number,
88
+ };
89
+ })
90
+ .map((result) => {
91
+ return `
92
+ ${result.path}:${result.line_number}
93
+ \`\`\`
94
+ ${result.lines}\`\`\`
95
+ `;
96
+ });
97
+ const relDir = path_1.default.relative(process.cwd(), dir);
98
+ const header = `Found ${resultsSummary.length} results for "${input.pattern}" in "${relDir}".
99
+ All paths are relative to the current working directory.`;
100
+ return {
101
+ isError: false,
102
+ result: `${header}\n${resultsSummary.join("\n")}`,
103
+ };
104
+ }
105
+ catch (error) {
106
+ console.error("Error executing ripgrep", error);
107
+ return {
108
+ isError: true,
109
+ result: error instanceof ripgrep_1.RipGrepError ? error.message : String(error),
110
+ };
111
+ }
112
+ }
113
+ exports.grepTool = {
114
+ schema: {
115
+ name: "grep",
116
+ description: "Search for a pattern in files using grep (case insensitive)",
117
+ parameters: GrepInputSchema,
118
+ },
119
+ execute: async (input) => {
120
+ const isAvailable = await (0, ripgrep_1.isRgAvailable)();
121
+ if (isAvailable) {
122
+ return usingRipgrep(input);
123
+ }
124
+ else {
125
+ console.warn("ripgrep is not available, falling back to system grep.");
126
+ return usingSystemGrep(input);
127
+ }
128
+ },
129
+ };
@@ -0,0 +1,5 @@
1
+ import { Match, Options } from "./types";
2
+ export * from "./types";
3
+ export declare function isRgAvailable(): Promise<unknown>;
4
+ export declare function ripgrep(cwd: string, options: Options): Promise<Array<Match>>;
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/tools/grep/ripgrep/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,EAAE,OAAO,EAAgB,MAAM,SAAS,CAAC;AAEvD,cAAc,SAAS,CAAC;AAcxB,wBAAgB,aAAa,qBAY5B;AAED,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CA8C5E"}
@@ -0,0 +1,90 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.ripgrep = exports.isRgAvailable = void 0;
18
+ const child_process_1 = require("child_process");
19
+ const types_1 = require("./types");
20
+ __exportStar(require("./types"), exports);
21
+ function formatResults(stdout) {
22
+ stdout = stdout.trim();
23
+ if (!stdout) {
24
+ return [];
25
+ }
26
+ return stdout
27
+ .split("\n")
28
+ .map((line) => JSON.parse(line))
29
+ .filter((jsonLine) => jsonLine.type === "match")
30
+ .map((jsonLine) => jsonLine.data);
31
+ }
32
+ function isRgAvailable() {
33
+ return new Promise((resolve) => {
34
+ // Use 'where' on Windows and 'which' on Unix-like systems
35
+ const command = process.platform === "win32" ? "where rg" : "which rg";
36
+ (0, child_process_1.exec)(command, (error) => {
37
+ if (error) {
38
+ resolve(false);
39
+ }
40
+ else {
41
+ resolve(true);
42
+ }
43
+ });
44
+ });
45
+ }
46
+ exports.isRgAvailable = isRgAvailable;
47
+ function ripgrep(cwd, options) {
48
+ if (!cwd) {
49
+ return Promise.reject(new Error("No `cwd` provided"));
50
+ }
51
+ if (arguments.length === 1) {
52
+ return Promise.reject(new Error("No search term provided"));
53
+ }
54
+ let execString = "rg --json";
55
+ if ("regex" in options) {
56
+ execString = `${execString} -e ${options.regex}`;
57
+ }
58
+ else if ("string" in options) {
59
+ execString = `${execString} -F ${options.string}`;
60
+ }
61
+ if (options.fileType) {
62
+ if (!Array.isArray(options.fileType)) {
63
+ options.fileType = [options.fileType];
64
+ }
65
+ for (const fileType of options.fileType) {
66
+ execString = `${execString} -t ${fileType}`;
67
+ }
68
+ }
69
+ if (options.globs) {
70
+ execString = options.globs.reduce((command, glob) => {
71
+ return `${command} -g '${glob}'`;
72
+ }, execString);
73
+ }
74
+ if (options.multiline) {
75
+ execString = `${execString} --multiline`;
76
+ }
77
+ // https://github.com/alexlafroscia/ripgrep-js/pull/175/files
78
+ execString += ` -- ${cwd}`;
79
+ return new Promise(function (resolve, reject) {
80
+ (0, child_process_1.exec)(execString, { cwd }, (error, stdout, stderr) => {
81
+ if (!error || (error && stderr === "")) {
82
+ resolve(formatResults(stdout));
83
+ }
84
+ else {
85
+ reject(new types_1.RipGrepError(error, stderr));
86
+ }
87
+ });
88
+ });
89
+ }
90
+ exports.ripgrep = ripgrep;
@@ -0,0 +1,45 @@
1
+ /// <reference types=".pnpm/@types+node@20.14.11/node_modules/@types/node/child_process" />
2
+ /// <reference types=".pnpm/@types+node@20.16.10/node_modules/@types/node/child_process" />
3
+ import { ExecException } from "child_process";
4
+ type StringSearchOptions = {
5
+ string: string;
6
+ };
7
+ type RegexSearchOptions = {
8
+ regex: string;
9
+ };
10
+ type LocatorOptions = StringSearchOptions | RegexSearchOptions;
11
+ export type Options = LocatorOptions & {
12
+ globs?: Array<string>;
13
+ fileType?: string | Array<string>;
14
+ multiline?: boolean;
15
+ };
16
+ export type RipgrepJsonSubmatch = {
17
+ match: {
18
+ text: string;
19
+ };
20
+ start: number;
21
+ end: number;
22
+ };
23
+ export type RipGrepJsonMatch = {
24
+ type: "match";
25
+ data: {
26
+ path: {
27
+ text: string;
28
+ };
29
+ lines: {
30
+ text: string;
31
+ };
32
+ line_number: number;
33
+ absolute_offset: number;
34
+ submatches: Array<RipgrepJsonSubmatch>;
35
+ };
36
+ };
37
+ export type Match = RipGrepJsonMatch["data"];
38
+ export declare class RipGrepError {
39
+ private error;
40
+ stderr: string;
41
+ constructor(error: ExecException, stderr: string);
42
+ get message(): string;
43
+ }
44
+ export {};
45
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/tools/grep/ripgrep/types.ts"],"names":[],"mappings":";;AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAE9C,KAAK,mBAAmB,GAAG;IACzB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,KAAK,kBAAkB,GAAG;IACxB,KAAK,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,KAAK,cAAc,GAAG,mBAAmB,GAAG,kBAAkB,CAAC;AAE/D,MAAM,MAAM,OAAO,GAAG,cAAc,GAAG;IACrC,KAAK,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;IAClC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;CACb,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE;QACJ,IAAI,EAAE;YACJ,IAAI,EAAE,MAAM,CAAC;SACd,CAAC;QACF,KAAK,EAAE;YACL,IAAI,EAAE,MAAM,CAAC;SACd,CAAC;QACF,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,EAAE,MAAM,CAAC;QACxB,UAAU,EAAE,KAAK,CAAC,mBAAmB,CAAC,CAAC;KACxC,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,KAAK,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;AAE7C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAgB;IAE7B,MAAM,EAAE,MAAM,CAAC;gBAEH,KAAK,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM;IAKhD,IAAI,OAAO,IAAI,MAAM,CAEpB;CACF"}
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RipGrepError = void 0;
4
+ class RipGrepError {
5
+ error;
6
+ stderr;
7
+ constructor(error, stderr) {
8
+ this.error = error;
9
+ this.stderr = stderr;
10
+ }
11
+ get message() {
12
+ return this.error.message;
13
+ }
14
+ }
15
+ exports.RipGrepError = RipGrepError;
@@ -1 +1 @@
1
- {"version":3,"file":"str_replace_editor.d.ts","sourceRoot":"","sources":["../../src/tools/str_replace_editor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AA2B1D,UAAU,eAAe;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAwC1D;AAMD;;;GAGG;AACH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,UAAU,CAAC,CA6JrB;AA+FD,eAAO,MAAM,eAAe,EAAE,IAAI,EAKjC,CAAC"}
1
+ {"version":3,"file":"str_replace_editor.d.ts","sourceRoot":"","sources":["../../src/tools/str_replace_editor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AA2B1D,UAAU,eAAe;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAqED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAwC1D;AAMD;;;GAGG;AACH,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,UAAU,CAAC,CA0KrB;AA+FD,eAAO,MAAM,eAAe,EAAE,IAAI,EAKjC,CAAC"}
@@ -25,6 +25,64 @@ function restoreBackup(filePath) {
25
25
  fs_1.default.unlinkSync(backupPath);
26
26
  }
27
27
  }
28
+ /**
29
+ * While running str_replace command, we've seen LLM can struggle to send unique old_str.
30
+ * This function tries to find unique contexts for each occurrence of old_str, so that the error
31
+ * message to the LLM is more informative, enabling it to pick a unique old_str in the next attempt.
32
+ */
33
+ function getUniqueOccurences(contents, old_str) {
34
+ const lines = contents.split("\n");
35
+ const lineIndices = [];
36
+ lines.forEach((line, index) => {
37
+ if (line.includes(old_str)) {
38
+ lineIndices.push(index);
39
+ }
40
+ });
41
+ if (lineIndices.length === 0) {
42
+ return [];
43
+ }
44
+ if (lineIndices.length == 1) {
45
+ // line number should be 1-indexed, hence we add 1 to the line index
46
+ return [{ uniqueContext: old_str, lineNumber: lineIndices[0] + 1 }];
47
+ }
48
+ const uniqueContexts = [];
49
+ const maxContextSize = Math.min(10, lines.length);
50
+ for (const lineIndex of lineIndices) {
51
+ let contextSize = 0; // Start with just the line itself
52
+ let isUnique = false;
53
+ let context = "";
54
+ while (!isUnique && contextSize < maxContextSize) {
55
+ // Expand context to include more lines (both above and below)
56
+ const startLine = Math.max(0, lineIndex - contextSize);
57
+ const endLine = Math.min(lines.length - 1, lineIndex + contextSize);
58
+ // Extract the context as a group of lines
59
+ context = lines.slice(startLine, endLine + 1).join("\n");
60
+ // Check if this context is unique among all occurrences
61
+ isUnique = lineIndices.every((idx) => {
62
+ if (idx === lineIndex)
63
+ return true; // Skip self
64
+ const otherStartLine = Math.max(0, idx - contextSize);
65
+ const otherEndLine = Math.min(lines.length - 1, idx + contextSize);
66
+ const otherContext = lines
67
+ .slice(otherStartLine, otherEndLine + 1)
68
+ .join("\n");
69
+ return context !== otherContext;
70
+ });
71
+ // If not unique, expand context by one more line in each direction
72
+ if (!isUnique) {
73
+ contextSize++;
74
+ }
75
+ }
76
+ // Only add context if we actually found a unique one
77
+ if (isUnique) {
78
+ uniqueContexts.push({
79
+ uniqueContext: context,
80
+ lineNumber: lineIndex + 1,
81
+ });
82
+ }
83
+ }
84
+ return uniqueContexts;
85
+ }
28
86
  /**
29
87
  * Cleans up any backup files that were created during the editing process
30
88
  * @returns The number of backup files that were cleaned up
@@ -127,6 +185,7 @@ async function strReplaceEditorExecutor(input) {
127
185
  throw new Error("file_text is required for create command");
128
186
  }
129
187
  fs_1.default.writeFileSync(filePath, input.file_text);
188
+ // TODO: Add type checking
130
189
  return {
131
190
  result: `Successfully created file ${filePath}`,
132
191
  isError: false,
@@ -156,9 +215,18 @@ async function strReplaceEditorExecutor(input) {
156
215
  const escapedOldStr = escapeRegExp(normalizedOldStr);
157
216
  const occurences = normalizedContent.match(new RegExp(escapedOldStr, "g"));
158
217
  if (occurences && occurences.length > 1) {
159
- // TODO: Help find unique matches
218
+ const uniqueContexts = getUniqueOccurences(content, input.old_str);
219
+ if (uniqueContexts.length === 0) {
220
+ return {
221
+ result: `Error: old_str found ${occurences.length} times in file: ${filePath}, but no unique contexts could be identified. Try using a more specific string.`,
222
+ isError: true,
223
+ };
224
+ }
225
+ const uniqueContextsString = uniqueContexts
226
+ .map(({ uniqueContext, lineNumber }, idx) => `${idx + 1}. For occurence at line number ${lineNumber}, unique context is:\n\`\`\`\n${uniqueContext}\n\`\`\`\n`)
227
+ .join("\n");
160
228
  return {
161
- result: `Error: old_str found ${occurences.length} times in file: ${filePath}. Please provide more context to make a unique match.`,
229
+ result: `Error: old_str found ${occurences.length} times in file: ${filePath}. Please use one of these unique contexts instead:\n\n${uniqueContextsString}`,
162
230
  isError: true,
163
231
  };
164
232
  }
@@ -1 +1 @@
1
- {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyFnD,eAAO,MAAM,4BAA4B,EAAE,IA0E1C,CAAC"}
1
+ {"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAyFnD,eAAO,MAAM,4BAA4B,EAAE,IA8E1C,CAAC"}
@@ -86,7 +86,7 @@ exports.generateTestWithBrowserAgent = {
86
86
  description: BROWSER_AGENT_DESCRIPTION,
87
87
  parameters: BrowserAgentSchema,
88
88
  },
89
- execute: async (input) => {
89
+ execute: async (input, trace) => {
90
90
  const { testName, testSuites, fileName, changeToMake, project } = input;
91
91
  const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
92
92
  const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
@@ -120,6 +120,7 @@ exports.generateTestWithBrowserAgent = {
120
120
  testFilePath: fileName,
121
121
  filePathToUpdate: fileName,
122
122
  pwProjectsFilter: [project],
123
+ traceId: trace?.id,
123
124
  testGenToken: (0, scenarios_1.buildTokenFromOptions)({
124
125
  name: testName,
125
126
  file: fileName,
@@ -0,0 +1,3 @@
1
+ import { PendingToolCall } from "@empiricalrun/llm/chat";
2
+ export declare function createCheckpoint(toolCalls: PendingToolCall[], branchName: string): Promise<void>;
3
+ //# sourceMappingURL=checkpoint.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"checkpoint.d.ts","sourceRoot":"","sources":["../../src/utils/checkpoint.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAIzD,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,eAAe,EAAE,EAC5B,UAAU,EAAE,MAAM,iBAcnB"}
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createCheckpoint = void 0;
4
+ const git_1 = require("./git");
5
+ async function createCheckpoint(toolCalls, branchName) {
6
+ const filesChanged = await (0, git_1.getFilesChanged)();
7
+ const toolsWithUpdatedFiles = toolCalls
8
+ .filter((tc) => tc.input.path && filesChanged.includes(tc.input.path))
9
+ .map((toolCall) => ({
10
+ name: toolCall.name,
11
+ path: toolCall.input.path,
12
+ }));
13
+ const filesToCommit = toolsWithUpdatedFiles.map((tool) => tool.path);
14
+ if (toolsWithUpdatedFiles.length > 0) {
15
+ let commitMessage = `Toolcall Checkpoint: ${toolsWithUpdatedFiles.map((tool) => `${tool.name} on ${tool.path}`).join(", ")}`;
16
+ await (0, git_1.commitFilesAndPushBranch)(commitMessage, branchName, filesToCommit);
17
+ }
18
+ }
19
+ exports.createCheckpoint = createCheckpoint;
@@ -1 +1 @@
1
- {"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../../src/utils/exec.ts"],"names":[],"mappings":"AAQA,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAA6B;IAE3C,OAAO,CACX,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,GACxC,OAAO,CAAC,MAAM,CAAC;IAkDlB,SAAS,IAAI,IAAI;IASjB,SAAS,IAAI,OAAO;CAGrB;AAED,wBAAsB,GAAG,CACvB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACxC,OAAO,CAAC,MAAM,CAAC,CAGjB"}
1
+ {"version":3,"file":"exec.d.ts","sourceRoot":"","sources":["../../src/utils/exec.ts"],"names":[],"mappings":"AAmBA,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAA6B;IAE3C,OAAO,CACX,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,GACxC,OAAO,CAAC,MAAM,CAAC;IAkDlB,SAAS,IAAI,IAAI;IASjB,SAAS,IAAI,OAAO;CAGrB;AAED,wBAAsB,GAAG,CACvB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,GACxC,OAAO,CAAC,MAAM,CAAC,CAGjB"}
@@ -6,10 +6,18 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.cmd = exports.ProcessManager = void 0;
7
7
  const child_process_1 = require("child_process");
8
8
  const process_1 = __importDefault(require("process"));
9
- const acceptableExitCodes = [
10
- 0, // Implies successful execution (no errors)
11
- 130, // Implies user interrupted the process (for SIGINT)
12
- ];
9
+ function isAcceptableExit(exitCode, signal) {
10
+ // On Linux, code is null and signal is SIGINT
11
+ if (signal === "SIGINT") {
12
+ return true;
13
+ }
14
+ // On macOS, signal is null and exit code is 130
15
+ const acceptableExitCodes = [
16
+ 0, // Implies successful execution (no errors)
17
+ 130, // Implies user interrupted the process (for SIGINT)
18
+ ];
19
+ return acceptableExitCodes.includes(exitCode ?? 1);
20
+ }
13
21
  class ProcessManager {
14
22
  childProcess = null;
15
23
  async execute(command, options) {
@@ -43,14 +51,14 @@ class ProcessManager {
43
51
  this.childProcess = null;
44
52
  rejectFunc(err);
45
53
  });
46
- p.on("exit", async (code) => {
54
+ p.on("exit", async (code, signal) => {
47
55
  this.childProcess = null;
48
- if (!acceptableExitCodes.includes(code ?? 1)) {
56
+ if (!isAcceptableExit(code, signal)) {
49
57
  const errorMessage = errorLogs.slice(-3).join("\n");
50
58
  rejectFunc(new Error(errorMessage));
51
59
  }
52
60
  else {
53
- resolveFunc(code ?? 1);
61
+ resolveFunc(0);
54
62
  }
55
63
  });
56
64
  });
@@ -2,4 +2,8 @@ export declare function getGitDiff(filepath: string): string;
2
2
  export declare function checkoutBranch(branchName: string): Promise<void>;
3
3
  export declare function commitAsBotUser(commitMessage: string): Promise<void>;
4
4
  export declare function commitLocalAndPushBranchToRemote(branchName: string): Promise<void>;
5
+ export declare function getCurrentBranchName(): Promise<string>;
6
+ export declare function pullBranch(branchName: string): Promise<void>;
7
+ export declare function commitFilesAndPushBranch(commitMessage: string, branchName: string, files: string[]): Promise<void>;
8
+ export declare function getFilesChanged(): Promise<string[]>;
5
9
  //# sourceMappingURL=git.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"git.d.ts","sourceRoot":"","sources":["../../src/utils/git.ts"],"names":[],"mappings":"AAKA,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAKnD;AAED,wBAAsB,cAAc,CAAC,UAAU,EAAE,MAAM,iBAQtD;AAED,wBAAsB,eAAe,CAAC,aAAa,EAAE,MAAM,iBAS1D;AAED,wBAAsB,gCAAgC,CAAC,UAAU,EAAE,MAAM,iBAIxE"}
1
+ {"version":3,"file":"git.d.ts","sourceRoot":"","sources":["../../src/utils/git.ts"],"names":[],"mappings":"AAKA,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAKnD;AAED,wBAAsB,cAAc,CAAC,UAAU,EAAE,MAAM,iBAQtD;AAED,wBAAsB,eAAe,CAAC,aAAa,EAAE,MAAM,iBAS1D;AAED,wBAAsB,gCAAgC,CAAC,UAAU,EAAE,MAAM,iBAIxE;AAED,wBAAsB,oBAAoB,oBAGzC;AAED,wBAAsB,UAAU,CAAC,UAAU,EAAE,MAAM,iBAElD;AAED,wBAAsB,wBAAwB,CAC5C,aAAa,EAAE,MAAM,EACrB,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EAAE,iBAOhB;AAED,wBAAsB,eAAe,sBAQpC"}
package/dist/utils/git.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.commitLocalAndPushBranchToRemote = exports.commitAsBotUser = exports.checkoutBranch = exports.getGitDiff = void 0;
3
+ exports.getFilesChanged = exports.commitFilesAndPushBranch = exports.pullBranch = exports.getCurrentBranchName = exports.commitLocalAndPushBranchToRemote = exports.commitAsBotUser = exports.checkoutBranch = exports.getGitDiff = void 0;
4
4
  const child_process_1 = require("child_process");
5
5
  const GIT_USER_NAME = "empiricalrun[bot]";
6
6
  const GIT_USER_EMAIL = "180257021+empiricalrun[bot]@users.noreply.github.com";
@@ -37,3 +37,28 @@ async function commitLocalAndPushBranchToRemote(branchName) {
37
37
  (0, child_process_1.execSync)(`git push origin ${branchName}`);
38
38
  }
39
39
  exports.commitLocalAndPushBranchToRemote = commitLocalAndPushBranchToRemote;
40
+ async function getCurrentBranchName() {
41
+ const branchName = (0, child_process_1.execSync)("git branch --show-current").toString().trim();
42
+ return branchName;
43
+ }
44
+ exports.getCurrentBranchName = getCurrentBranchName;
45
+ async function pullBranch(branchName) {
46
+ (0, child_process_1.execSync)(`git pull origin ${branchName}`);
47
+ }
48
+ exports.pullBranch = pullBranch;
49
+ async function commitFilesAndPushBranch(commitMessage, branchName, files) {
50
+ (0, child_process_1.execSync)(`git add ${files.join(" ")}`);
51
+ await commitAsBotUser(commitMessage || "Intermediate commit from @empiricalrun/test-gen");
52
+ (0, child_process_1.execSync)(`git push origin ${branchName}`);
53
+ }
54
+ exports.commitFilesAndPushBranch = commitFilesAndPushBranch;
55
+ async function getFilesChanged() {
56
+ const output = (0, child_process_1.execSync)("git status --porcelain").toString();
57
+ let filesChanged = output
58
+ .split("\n")
59
+ .map((line) => line.trim().split(" ").pop())
60
+ .filter((fileName) => !!fileName)
61
+ .filter((fileName) => fileName != "");
62
+ return filesChanged;
63
+ }
64
+ exports.getFilesChanged = getFilesChanged;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.56.3",
3
+ "version": "0.57.0",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -60,7 +60,7 @@
60
60
  "tsx": "^4.16.2",
61
61
  "typescript": "^5.3.3",
62
62
  "zod": "^3.23.8",
63
- "@empiricalrun/llm": "^0.15.2",
63
+ "@empiricalrun/llm": "^0.15.3",
64
64
  "@empiricalrun/r2-uploader": "^0.3.8",
65
65
  "@empiricalrun/test-run": "^0.8.4"
66
66
  },
@@ -1 +0,0 @@
1
- {"version":3,"file":"grep.d.ts","sourceRoot":"","sources":["../../src/tools/grep.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAc,MAAM,wBAAwB,CAAC;AAmB/D,eAAO,MAAM,QAAQ,EAAE,IA0DtB,CAAC"}
@@ -1,73 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.grepTool = void 0;
4
- const child_process_1 = require("child_process");
5
- const util_1 = require("util");
6
- const zod_1 = require("zod");
7
- const repo_tree_1 = require("../utils/repo-tree");
8
- const GrepInputSchema = zod_1.z.object({
9
- pattern: zod_1.z.string().describe("The pattern to search for"),
10
- directory: zod_1.z
11
- .string()
12
- .optional()
13
- .describe("The directory to search in (defaults to current directory)"),
14
- filePattern: zod_1.z
15
- .string()
16
- .optional()
17
- .describe("File pattern to search in (e.g., '*.ts' for TypeScript files)"),
18
- });
19
- exports.grepTool = {
20
- schema: {
21
- name: "grep",
22
- description: "Search for a pattern in files using grep (case insensitive)",
23
- parameters: GrepInputSchema,
24
- },
25
- execute: async (input) => {
26
- try {
27
- const dir = input.directory || process.cwd();
28
- // Create exclude pattern for grep
29
- const excludePatterns = repo_tree_1.DEFAULT_EXCLUDE.map((pattern) => typeof pattern === "string" ? pattern : pattern.source)
30
- .map((pattern) => `--exclude-dir="${pattern}"`)
31
- .join(" ");
32
- // Using -n to show line numbers in output
33
- let cmd = `grep -rin ${excludePatterns} "${input.pattern}" ${dir}`;
34
- if (input.filePattern) {
35
- // For file pattern searches, we'll use find with exclusions
36
- const excludeFind = repo_tree_1.DEFAULT_EXCLUDE.map((pattern) => typeof pattern === "string" ? pattern : pattern.source)
37
- .map((pattern) => `-not -path "*/${pattern}/*"`)
38
- .join(" ");
39
- // Modified command to ensure filepath is included in the output
40
- // Using grep with -H to always show filename and the filepath as part of the output
41
- cmd = `find ${dir} ${excludeFind} -name "${input.filePattern}" | xargs grep -Hn "${input.pattern}"`;
42
- }
43
- const execAsync = (0, util_1.promisify)(child_process_1.exec);
44
- const { stdout, stderr } = await execAsync(cmd);
45
- if (stdout) {
46
- return {
47
- isError: false,
48
- result: stdout,
49
- };
50
- }
51
- else if (!stdout && stderr) {
52
- return {
53
- isError: true,
54
- result: stderr,
55
- };
56
- }
57
- else {
58
- // Both are empty, which means no results were found
59
- return {
60
- isError: false,
61
- result: "No results found.",
62
- };
63
- }
64
- }
65
- catch (error) {
66
- console.error("Error executing grep", error);
67
- return {
68
- isError: true,
69
- result: error instanceof Error ? error.message : String(error),
70
- };
71
- }
72
- },
73
- };