@empiricalrun/test-gen 0.53.9 → 0.53.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.53.11
4
+
5
+ ### Patch Changes
6
+
7
+ - fb78386: feat: enabled LLM tracing for chatAgent dashboard
8
+ - Updated dependencies [a40b338]
9
+ - @empiricalrun/llm@0.14.6
10
+
11
+ ## 0.53.10
12
+
13
+ ### Patch Changes
14
+
15
+ - 088545c: feat: upload test reports from run-test tool calls
16
+ - d003ea8: feat: add description to PRs created by chat agent
17
+ - Updated dependencies [088545c]
18
+ - @empiricalrun/test-run@0.7.7
19
+
3
20
  ## 0.53.9
4
21
 
5
22
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAYA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAiBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AAqBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBA+BA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/chat/index.ts"],"names":[],"mappings":"AAYA,OAAO,EAAoB,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAiBhE,wBAAsB,kBAAkB,CAAC,EACvC,mBAAmB,EACnB,aAAa,EACb,oBAAoB,GACrB,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,mBAAmB,EAAE,OAAO,CAAC;IAC7B,oBAAoB,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1C,iBAoFA;AAuBD,wBAAsB,wBAAwB,CAAC,EAC7C,aAAa,EACb,aAAa,GACd,EAAE;IACD,aAAa,EAAE,mBAAmB,CAAC;IACnC,aAAa,EAAE,MAAM,CAAC;CACvB,iBAyCA"}
@@ -116,6 +116,15 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
116
116
  const chatSession = await getChatSessionFromDashboard(chatSessionId);
117
117
  const chatState = chatSession.chat_state;
118
118
  const branchName = chatSession.branch_name;
119
+ const trace = llm_1.langfuseInstance?.trace({
120
+ id: chatSession.langfuse_trace_id,
121
+ name: "chat_agent",
122
+ input: chatState,
123
+ tags: [selectedModel, "chat_agent"],
124
+ metadata: {
125
+ chatSessionId,
126
+ },
127
+ });
119
128
  await (0, git_1.checkoutBranch)(branchName);
120
129
  let chatModel = (0, model_1.createChatModel)(chatState.messages, selectedModel);
121
130
  let reporterFunc = async (chatState, latest) => {
@@ -137,6 +146,7 @@ async function runChatAgentForDashboard({ chatSessionId, selectedModel, }) {
137
146
  chatModel,
138
147
  selectedModel,
139
148
  reporter: reporterFunc,
149
+ trace,
140
150
  });
141
151
  await (0, git_1.commitLocalAndPushBranchToRemote)(branchName);
142
152
  }
@@ -1 +1 @@
1
- {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAQnD,UAAU,cAAc;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CAiB1D;AAiBD,eAAO,MAAM,wBAAwB,EAAE,IAkFtC,CAAC"}
1
+ {"version":3,"file":"commit-and-create-pr.d.ts","sourceRoot":"","sources":["../../src/tools/commit-and-create-pr.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAQnD,UAAU,cAAc;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CAiB1D;AA2CD,eAAO,MAAM,wBAAwB,EAAE,IA8FtC,CAAC"}
@@ -28,7 +28,24 @@ const CommitAndPushChangesSchema = zod_1.z.object({
28
28
  commitMessage: zod_1.z
29
29
  .string()
30
30
  .describe("A short message to use for the commit. Should not be more than 8 words. Should follow conventional commit format."),
31
+ description: zod_1.z.string().describe(`A longer description of the changes you made. This will be used as the description of a pull request on GitHub, and so you should follow markdown formatting.
32
+ Your code will be reviewed by a human, and you should include everything that will provide context and improve the reviewer's confidence in the changes.
33
+
34
+ For example, if you used the test run tool, you should include the results (and the report URL if available). Report URL is especially important, because it contains
35
+ videos and other artifacts that help the reviewer gain more context and confidence in the changes. If tests pass, reviewer will see the video and merge the PR.
36
+ If tests fail, reviewer will see the video and the test artifacts, and will be able to help you debug the issue.`),
31
37
  });
38
+ function formatDescriptionWithTimestamp(description, existingBody, type = "create") {
39
+ const timestamp = new Date()
40
+ .toISOString()
41
+ .replace("T", " ")
42
+ .replace("Z", " UTC");
43
+ const timestampText = `<sup>${type === "create" ? "Created" : "Updated"} at ${timestamp}</sup>`;
44
+ if (existingBody) {
45
+ return `${existingBody}\n\n---\n${description}\n\n${timestampText}`;
46
+ }
47
+ return `${description}\n\n${timestampText}`;
48
+ }
32
49
  exports.commitAndPushChangesTool = {
33
50
  schema: {
34
51
  name: "commitAndPushChanges",
@@ -41,7 +58,7 @@ Returns the URL of the created or updated pull request.`,
41
58
  },
42
59
  execute: async (input) => {
43
60
  try {
44
- const { commitMessage } = input;
61
+ const { commitMessage, description } = input;
45
62
  const currentBranch = (0, child_process_1.execSync)("git rev-parse --abbrev-ref HEAD")
46
63
  .toString()
47
64
  .trim();
@@ -82,11 +99,21 @@ Returns the URL of the created or updated pull request.`,
82
99
  (0, child_process_1.execSync)(`git push origin ${branchName} --set-upstream`);
83
100
  const existingPR = existingPRs?.find((pr) => pr.head.ref === branchName);
84
101
  if (existingPR) {
102
+ // Append the new description to the existing PR description
103
+ const updatedDescription = formatDescriptionWithTimestamp(description, existingPR.body, "update");
104
+ await (0, utils_1.callGitHubProxy)({
105
+ method: "PATCH",
106
+ url: `https://api.github.com/repos/${owner}/${repo}/pulls/${existingPR.number}`,
107
+ body: {
108
+ body: updatedDescription,
109
+ },
110
+ });
85
111
  return {
86
112
  isError: false,
87
113
  result: `Committed and pushed changes to existing PR: ${existingPR.html_url}`,
88
114
  };
89
115
  }
116
+ const initialDescription = formatDescriptionWithTimestamp(description);
90
117
  const pr = (await (0, utils_1.callGitHubProxy)({
91
118
  method: "POST",
92
119
  url: `https://api.github.com/repos/${owner}/${repo}/pulls`,
@@ -94,7 +121,7 @@ Returns the URL of the created or updated pull request.`,
94
121
  title: commitMessage,
95
122
  head: branchName,
96
123
  base: "main",
97
- body: "Created via CommitAndPushChanges tool",
124
+ body: initialDescription,
98
125
  },
99
126
  }));
100
127
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAuBnD,eAAO,MAAM,WAAW,EAAE,IAgCzB,CAAC"}
1
+ {"version":3,"file":"test-run.d.ts","sourceRoot":"","sources":["../../src/tools/test-run.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAgDnD,eAAO,MAAM,WAAW,EAAE,IAuDzB,CAAC"}
@@ -12,11 +12,25 @@ const RunTestSchema = zod_1.z.object({
12
12
  .string()
13
13
  .describe("The name of the file where the test is located. File name must end with .spec.ts"),
14
14
  project: zod_1.z.string().describe("The project to run the test on"),
15
- headed: zod_1.z
16
- .boolean()
17
- .describe("Whether to run the test in headed mode (default is false, which is headless)")
18
- .optional(),
19
15
  });
16
+ function hasCloudflareCredentials() {
17
+ return (process.env.R2_ACCOUNT_ID &&
18
+ process.env.R2_ACCESS_KEY_ID &&
19
+ process.env.R2_SECRET_ACCESS_KEY);
20
+ }
21
+ function buildReportUrl(projectName, testRunId) {
22
+ return `https://reports.empirical.run/${projectName}/${testRunId}/index.html`;
23
+ }
24
+ function buildResult({ hasTestPassed, summaryJson, reportUrl, }) {
25
+ return `
26
+ Test run is complete. Result: ${hasTestPassed ? "Passed" : "Failed"}
27
+
28
+ ${reportUrl ? `Report URL: ${reportUrl}` : ""}
29
+
30
+ # Raw result (in Playwright JSON format)
31
+ ${JSON.stringify(summaryJson)}
32
+ `;
33
+ }
20
34
  exports.runTestTool = {
21
35
  schema: {
22
36
  name: "runTest",
@@ -24,7 +38,18 @@ exports.runTestTool = {
24
38
  parameters: RunTestSchema,
25
39
  },
26
40
  execute: async (input) => {
27
- const { testName, suites, fileName, project, headed } = input;
41
+ let reportUrl = undefined;
42
+ let projectName = undefined;
43
+ let testRunId = undefined;
44
+ if (hasCloudflareCredentials()) {
45
+ projectName = "test-gen-chat-agent";
46
+ testRunId = Date.now().toString();
47
+ reportUrl = buildReportUrl(projectName, testRunId);
48
+ }
49
+ else {
50
+ console.warn("R2 credentials not found: report artifacts will not be uploaded");
51
+ }
52
+ const { testName, suites, fileName, project } = input;
28
53
  try {
29
54
  // {"project":"chromium","suites":[],"fileName":"tests/quizizz-for-work/group.spec.ts","testName":"Create a group"}
30
55
  // This runs all tests - TODO: Debug this, should only run the testName
@@ -33,10 +58,20 @@ exports.runTestTool = {
33
58
  suites,
34
59
  fileName,
35
60
  projects: [project],
36
- headed,
61
+ // Adding these to enforce report artifacts are uploaded
62
+ envOverrides: projectName && testRunId
63
+ ? {
64
+ PROJECT_NAME: projectName,
65
+ TEST_RUN_GITHUB_ACTION_ID: testRunId,
66
+ }
67
+ : undefined,
37
68
  });
38
69
  return {
39
- result: JSON.stringify(result),
70
+ result: buildResult({
71
+ hasTestPassed: result.hasTestPassed,
72
+ summaryJson: result.summaryJson,
73
+ reportUrl: reportUrl,
74
+ }),
40
75
  isError: false,
41
76
  };
42
77
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.53.9",
3
+ "version": "0.53.11",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -56,9 +56,9 @@
56
56
  "tsx": "^4.16.2",
57
57
  "typescript": "^5.3.3",
58
58
  "zod": "^3.23.8",
59
- "@empiricalrun/llm": "^0.14.5",
59
+ "@empiricalrun/llm": "^0.14.6",
60
60
  "@empiricalrun/r2-uploader": "^0.3.8",
61
- "@empiricalrun/test-run": "^0.7.6"
61
+ "@empiricalrun/test-run": "^0.7.7"
62
62
  },
63
63
  "devDependencies": {
64
64
  "@playwright/test": "1.47.1",