@empiricalrun/test-gen 0.51.3 → 0.51.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/agent/chat.d.ts.map +1 -1
- package/dist/agent/chat.js +29 -9
- package/dist/agent/cua/index.d.ts +1 -1
- package/dist/agent/cua/index.d.ts.map +1 -1
- package/dist/agent/cua/index.js +4 -3
- package/dist/agent/master/browser-tests/index.spec.js +1 -1
- package/dist/agent/master/run.d.ts +1 -1
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +2 -2
- package/dist/bin/utils/scenarios/index.d.ts +3 -1
- package/dist/bin/utils/scenarios/index.d.ts.map +1 -1
- package/dist/bin/utils/scenarios/index.js +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +35 -22
- package/dist/tools/test-gen-browser.d.ts +3 -0
- package/dist/tools/test-gen-browser.d.ts.map +1 -0
- package/dist/tools/{browser-agent.js → test-gen-browser.js} +12 -10
- package/dist/tools/test-run-fetcher/index.d.ts.map +1 -1
- package/dist/tools/test-run-fetcher/index.js +40 -38
- package/package.json +3 -3
- package/dist/tools/browser-agent.d.ts +0 -3
- package/dist/tools/browser-agent.d.ts.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.51.5
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- ae47eed: feat: add type checking to text editor tool call
|
|
8
|
+
- beb704e: chore: move claude chat stuff to make space for gemini
|
|
9
|
+
- 3d9c868: fix: system prompt for computer use agent
|
|
10
|
+
- Updated dependencies [beb704e]
|
|
11
|
+
- Updated dependencies [f283696]
|
|
12
|
+
- @empiricalrun/llm@0.11.4
|
|
13
|
+
|
|
14
|
+
## 0.51.4
|
|
15
|
+
|
|
16
|
+
### Patch Changes
|
|
17
|
+
|
|
18
|
+
- 7613f20: fix: update test-run-fetcher tool call response to include project
|
|
19
|
+
- f57ab88: fix: add playwright best practices to system prompt
|
|
20
|
+
- 83a4788: feat: use cua in chat agent tool call with feature flag
|
|
21
|
+
- a8c11eb: fix: tool call log should show error
|
|
22
|
+
- Updated dependencies [307a1cf]
|
|
23
|
+
- @empiricalrun/llm@0.11.3
|
|
24
|
+
|
|
3
25
|
## 0.51.3
|
|
4
26
|
|
|
5
27
|
### Patch Changes
|
package/dist/agent/chat.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA+
|
|
1
|
+
{"version":3,"file":"chat.d.ts","sourceRoot":"","sources":["../../src/agent/chat.ts"],"names":[],"mappings":"AA+FA,wBAAsB,SAAS,CAAC,EAC9B,SAAwC,EACxC,mBAAmB,GACpB,EAAE;IACD,SAAS,CAAC,EAAE,4BAA4B,GAAG,4BAA4B,CAAC;IACxE,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B,mBAkHA"}
|
package/dist/agent/chat.js
CHANGED
|
@@ -4,13 +4,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.chatAgent = void 0;
|
|
7
|
-
const
|
|
7
|
+
const chat_1 = require("@empiricalrun/llm/chat");
|
|
8
8
|
const path_1 = __importDefault(require("path"));
|
|
9
9
|
const picocolors_1 = require("picocolors");
|
|
10
|
+
const web_1 = require("../bin/utils/platform/web");
|
|
10
11
|
const human_in_the_loop_1 = require("../human-in-the-loop");
|
|
11
|
-
const browser_agent_1 = require("../tools/browser-agent");
|
|
12
12
|
const diagnosis_fetcher_1 = require("../tools/diagnosis-fetcher");
|
|
13
13
|
const grep_1 = require("../tools/grep");
|
|
14
|
+
const test_gen_browser_1 = require("../tools/test-gen-browser");
|
|
14
15
|
const test_run_1 = require("../tools/test-run");
|
|
15
16
|
const test_run_fetcher_1 = require("../tools/test-run-fetcher");
|
|
16
17
|
const zod_schema_1 = require("../tools/zod-schema");
|
|
@@ -20,6 +21,8 @@ You are a helpful assistant that can answer questions and help with tasks.
|
|
|
20
21
|
You are given a set of tools to use to fulfill the user's request. Read their descriptions to
|
|
21
22
|
understand what each tool does.
|
|
22
23
|
|
|
24
|
+
# Tools
|
|
25
|
+
|
|
23
26
|
For example, if the user asks you to run a test, you could use the runTest tool.
|
|
24
27
|
Once the test is run, you will receive the results in the form of a JSON object.
|
|
25
28
|
Summarize the results in a few sentences.
|
|
@@ -46,6 +49,7 @@ the actual code to click on the login button. If you are fixing a failing test,
|
|
|
46
49
|
around the failing line of code, so that it can be replaced/modified.
|
|
47
50
|
|
|
48
51
|
# Repo context
|
|
52
|
+
|
|
49
53
|
You are running as a CLI tool inside the directory of the repo where this test file is located. Here is
|
|
50
54
|
the repo directory structure:
|
|
51
55
|
|
|
@@ -55,27 +59,38 @@ While specifying paths to files, use relative paths from the current working dir
|
|
|
55
59
|
- Correct path: "tests/lesson.spec.ts"
|
|
56
60
|
- Incorrect path: "/repo/tests/lesson.spec.ts" or "${path_1.default.basename(process.cwd())}/tests/lesson.spec.ts"
|
|
57
61
|
|
|
62
|
+
# Rules for fixing Playwright tests
|
|
63
|
+
|
|
64
|
+
You must follow these rules while adding new tests or modifying existing tests. There can be exceptions to these rules, but
|
|
65
|
+
ONLY when explicitly asked for by the user.
|
|
66
|
+
|
|
67
|
+
1. Do not add any conditional logic or try catch blocks in a test. A good test deterministically tests a user scenario
|
|
68
|
+
2. Trust Playwright's ability to auto-wait while taking actions on elements. For example, do not add checks on locator.isVisible() before clicking on it: Playwright already does this
|
|
69
|
+
3. Do not add waitForTimeout or waitForLoadState in a test. Playwright will automatically wait for the page to load.
|
|
70
|
+
4. You can't delete some steps from the test to make it pass. The test needs to accomplish its objective (which is to validate a particular user scenario)
|
|
71
|
+
|
|
58
72
|
# Proactiveness
|
|
73
|
+
|
|
59
74
|
You are allowed to be proactive, but only when the user asks you to do something. You should strive to
|
|
60
75
|
strike a balance between:
|
|
61
76
|
1. Doing the right thing when asked, including taking actions and follow-up actions
|
|
62
|
-
2. Not surprising the user with actions you take without asking
|
|
77
|
+
2. Not surprising the user with actions you take without asking. It is okay to ask the user for confirmation before taking actions.
|
|
63
78
|
`;
|
|
64
79
|
const tools = [
|
|
65
80
|
test_run_1.runTestTool,
|
|
66
|
-
|
|
81
|
+
test_gen_browser_1.generateTestWithBrowserAgent,
|
|
67
82
|
diagnosis_fetcher_1.diagnosisTool,
|
|
68
83
|
grep_1.grepTool,
|
|
69
84
|
test_run_fetcher_1.testRunTool,
|
|
70
85
|
];
|
|
71
86
|
const toolExecutors = {
|
|
72
87
|
...Object.fromEntries(tools.map((tool) => [tool.schema.name, tool.execute])),
|
|
73
|
-
str_replace_editor:
|
|
88
|
+
str_replace_editor: (input) => (0, chat_1.strReplaceEditorTool)(input, web_1.validateTypescript),
|
|
74
89
|
};
|
|
75
90
|
async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForChatState, }) {
|
|
76
91
|
const ora = (await import("ora")).default;
|
|
77
92
|
let userPrompt = undefined;
|
|
78
|
-
let chatState = useDiskForChatState ?
|
|
93
|
+
let chatState = useDiskForChatState ? chat_1.ChatState.load() : new chat_1.ChatState(false);
|
|
79
94
|
const handleSigInt = () => {
|
|
80
95
|
console.log(`\n${(0, picocolors_1.gray)("Usage summary -> " + chatState.getUsageSummary())}`);
|
|
81
96
|
process.exit(0);
|
|
@@ -129,7 +144,12 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
|
|
|
129
144
|
throw new Error(`Tool ${toolUse.name} not found`);
|
|
130
145
|
}
|
|
131
146
|
const toolResult = await toolExecutor(toolUse.input);
|
|
132
|
-
|
|
147
|
+
if (toolResult.isError) {
|
|
148
|
+
spinner.fail(`Tool ${toolUse.name} failed with error: ${toolResult.result}`);
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
spinner.succeed(`Tool ${toolUse.name} completed`);
|
|
152
|
+
}
|
|
133
153
|
chatState.pushMessage({
|
|
134
154
|
role: "user",
|
|
135
155
|
content: [
|
|
@@ -144,10 +164,10 @@ async function chatAgent({ chatModel = "claude-3-7-sonnet-20250219", useDiskForC
|
|
|
144
164
|
continue;
|
|
145
165
|
}
|
|
146
166
|
const spinner = ora("Claude is working...").start();
|
|
147
|
-
const response = await (0,
|
|
167
|
+
const response = await (0, chat_1.createClaudeMessage)({
|
|
148
168
|
systemPrompt,
|
|
149
169
|
messages: chatState.getMessagesForCreateCompletion(),
|
|
150
|
-
tools: tools.map((tool) => (0,
|
|
170
|
+
tools: tools.map((tool) => (0, chat_1.convertOpenAISchemaToAnthropic)((0, zod_schema_1.zodToOpenAITool)(tool.schema))),
|
|
151
171
|
model: chatModel,
|
|
152
172
|
withStrReplaceEditor: true,
|
|
153
173
|
});
|
|
@@ -3,7 +3,7 @@ export declare function startPlaywrightCodegen(page: Page): Promise<void>;
|
|
|
3
3
|
/**
|
|
4
4
|
* Run the loop that executes computer actions until no 'computer_call' is found.
|
|
5
5
|
*/
|
|
6
|
-
export declare function
|
|
6
|
+
export declare function createTestUsingComputerUseAgent({ page, task, }: {
|
|
7
7
|
page: Page;
|
|
8
8
|
task: string;
|
|
9
9
|
}): Promise<{
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAWlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED;;GAEG;AACH,wBAAsB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/cua/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAWlC,wBAAsB,sBAAsB,CAAC,IAAI,EAAE,IAAI,iBAoBtD;AAED;;GAEG;AACH,wBAAsB,+BAA+B,CAAC,EACpD,IAAI,EACJ,IAAI,GACL,EAAE;IACD,IAAI,EAAE,IAAI,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd,GAAG,OAAO,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,EAAE,CAAC;CACvB,CAAC,CAmGD"}
|
package/dist/agent/cua/index.js
CHANGED
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
6
|
+
exports.createTestUsingComputerUseAgent = exports.startPlaywrightCodegen = void 0;
|
|
7
7
|
const openai_1 = __importDefault(require("openai"));
|
|
8
8
|
const utils_1 = require("../browsing/utils");
|
|
9
9
|
const computer_1 = require("./computer");
|
|
@@ -37,7 +37,7 @@ exports.startPlaywrightCodegen = startPlaywrightCodegen;
|
|
|
37
37
|
/**
|
|
38
38
|
* Run the loop that executes computer actions until no 'computer_call' is found.
|
|
39
39
|
*/
|
|
40
|
-
async function
|
|
40
|
+
async function createTestUsingComputerUseAgent({ page, task, }) {
|
|
41
41
|
let generatedCode = "";
|
|
42
42
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
43
43
|
const screenshotBytes = await (0, computer_1.getScreenshot)(page);
|
|
@@ -110,6 +110,7 @@ async function executeUsingComputerUseAgent({ page, task, }) {
|
|
|
110
110
|
environment: "browser",
|
|
111
111
|
},
|
|
112
112
|
],
|
|
113
|
+
instructions: INSTRUCTIONS,
|
|
113
114
|
input: [
|
|
114
115
|
{
|
|
115
116
|
call_id: lastCallId,
|
|
@@ -129,4 +130,4 @@ async function executeUsingComputerUseAgent({ page, task, }) {
|
|
|
129
130
|
importPaths: [],
|
|
130
131
|
};
|
|
131
132
|
}
|
|
132
|
-
exports.
|
|
133
|
+
exports.createTestUsingComputerUseAgent = createTestUsingComputerUseAgent;
|
|
@@ -75,7 +75,7 @@ click on maverick inside ford dropdown`,
|
|
|
75
75
|
(0, fixtures_1.test)("cua agent can click icons accurately", async ({ page, server }) => {
|
|
76
76
|
await page.goto(`${server.baseURL}/icons-navbar.html`);
|
|
77
77
|
await (0, fixtures_1.expect)(page.getByText("select an icon")).toBeVisible();
|
|
78
|
-
const response = await (0, run_1.
|
|
78
|
+
const response = await (0, run_1.createTestUsingComputerUseAgent)({
|
|
79
79
|
task: `click on the gear icon`,
|
|
80
80
|
page,
|
|
81
81
|
});
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { TestCase, TestGenConfigOptions } from "@empiricalrun/shared-types";
|
|
2
2
|
import { Page } from "playwright";
|
|
3
3
|
import { ScopeVars } from "../../types";
|
|
4
|
-
export {
|
|
4
|
+
export { createTestUsingComputerUseAgent } from "../cua";
|
|
5
5
|
export declare const IS_ALLOWED_TO_USE_SKILLS = false;
|
|
6
6
|
export declare function createTestUsingMasterAgent({ task, page, testCase, specPath, options, scopeVars, }: {
|
|
7
7
|
task: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,QAAQ,EACR,oBAAoB,EACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAelC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAAE,+BAA+B,EAAE,MAAM,QAAQ,CAAC;AAKzD,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAqB9C,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAwRA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.createTestUsingMasterAgent = exports.IS_ALLOWED_TO_USE_SKILLS = exports.
|
|
3
|
+
exports.createTestUsingMasterAgent = exports.IS_ALLOWED_TO_USE_SKILLS = exports.createTestUsingComputerUseAgent = void 0;
|
|
4
4
|
const llm_1 = require("@empiricalrun/llm");
|
|
5
5
|
const actions_1 = require("../../actions");
|
|
6
6
|
const skill_1 = require("../../actions/skill");
|
|
@@ -20,7 +20,7 @@ const execute_browser_action_1 = require("./execute-browser-action");
|
|
|
20
20
|
const execute_skill_action_1 = require("./execute-skill-action");
|
|
21
21
|
const next_action_1 = require("./next-action");
|
|
22
22
|
var cua_1 = require("../cua");
|
|
23
|
-
Object.defineProperty(exports, "
|
|
23
|
+
Object.defineProperty(exports, "createTestUsingComputerUseAgent", { enumerable: true, get: function () { return cua_1.createTestUsingComputerUseAgent; } });
|
|
24
24
|
const MAX_ERROR_COUNT = 2;
|
|
25
25
|
// Disabling skills as we're seeing false usage with chat agent
|
|
26
26
|
exports.IS_ALLOWED_TO_USE_SKILLS = false;
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { TestGenConfig } from "@empiricalrun/shared-types";
|
|
2
2
|
import { CliOptions } from "../index";
|
|
3
|
-
export declare function buildTokenFromOptions(options: Omit<CliOptions, "token"
|
|
3
|
+
export declare function buildTokenFromOptions(options: Omit<CliOptions, "token">, featureFlags?: {
|
|
4
|
+
useComputerUseAgent?: boolean;
|
|
5
|
+
}): string;
|
|
4
6
|
export declare function buildTestConfigFromOptions(options: Omit<CliOptions, "token">): TestGenConfig;
|
|
5
7
|
export declare function loadTestConfigs(testGenToken: string): TestGenConfig;
|
|
6
8
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/bin/utils/scenarios/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAIV,aAAa,EAEd,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAetC,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/bin/utils/scenarios/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAIV,aAAa,EAEd,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAetC,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,EAClC,YAAY,CAAC,EAAE;IACb,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B,GACA,MAAM,CAqBR;AAED,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,GACjC,aAAa,CAaf;AAED,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,aAAa,CAmBnE"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.loadTestConfigs = exports.buildTestConfigFromOptions = exports.buildTokenFromOptions = void 0;
|
|
4
|
-
function buildTokenFromOptions(options) {
|
|
4
|
+
function buildTokenFromOptions(options, featureFlags) {
|
|
5
5
|
const genConfig = buildTestConfigFromOptions(options);
|
|
6
6
|
const requestConfig = {
|
|
7
7
|
specPath: genConfig.specPath,
|
|
@@ -12,7 +12,12 @@ function buildTokenFromOptions(options) {
|
|
|
12
12
|
suites: genConfig.testCase.suites,
|
|
13
13
|
ai_gist: genConfig.testCase.ai_gist,
|
|
14
14
|
build: genConfig.build,
|
|
15
|
-
options: genConfig.options
|
|
15
|
+
options: genConfig.options
|
|
16
|
+
? {
|
|
17
|
+
...genConfig.options,
|
|
18
|
+
...featureFlags,
|
|
19
|
+
}
|
|
20
|
+
: featureFlags,
|
|
16
21
|
environment: genConfig.environment,
|
|
17
22
|
testErrorDiagnosis: genConfig.testErrorDiagnosis,
|
|
18
23
|
};
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAepC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAsD3E"}
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.createTest = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
|
+
const cua_1 = require("./agent/cua");
|
|
8
9
|
const run_1 = require("./agent/master/run");
|
|
9
10
|
const scenarios_1 = require("./bin/utils/scenarios");
|
|
10
11
|
const client_1 = __importDefault(require("./file/client"));
|
|
@@ -26,29 +27,41 @@ async function createTest(task, page, scope) {
|
|
|
26
27
|
try {
|
|
27
28
|
const testConfigArg = process.env.TEST_GEN_TOKEN;
|
|
28
29
|
const testGenConfig = (0, scenarios_1.loadTestConfigs)(testConfigArg);
|
|
29
|
-
(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
30
|
+
if (testGenConfig.options && testGenConfig.options.metadata) {
|
|
31
|
+
(0, reporter_1.setReporterConfig)({
|
|
32
|
+
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
33
|
+
testSessionId: testGenConfig.options?.metadata.testSessionId,
|
|
34
|
+
generationId: testGenConfig.options?.metadata.generationId,
|
|
35
|
+
});
|
|
36
|
+
(0, session_1.setSessionDetails)({
|
|
37
|
+
sessionId: testGenConfig.options?.metadata.testSessionId,
|
|
38
|
+
generationId: testGenConfig.options?.metadata.generationId,
|
|
39
|
+
testCaseId: testGenConfig.testCase.id,
|
|
40
|
+
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
40
43
|
const fileService = new client_1.default();
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
44
|
+
const useComputerUseAgent = testGenConfig.options?.useComputerUseAgent;
|
|
45
|
+
let agentResult;
|
|
46
|
+
if (useComputerUseAgent) {
|
|
47
|
+
agentResult = await (0, cua_1.createTestUsingComputerUseAgent)({
|
|
48
|
+
task,
|
|
49
|
+
page,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
agentResult = await (0, run_1.createTestUsingMasterAgent)({
|
|
54
|
+
task,
|
|
55
|
+
page,
|
|
56
|
+
testCase: testGenConfig.testCase,
|
|
57
|
+
specPath: testGenConfig.specPath,
|
|
58
|
+
options: {
|
|
59
|
+
...testGenConfig.options,
|
|
60
|
+
},
|
|
61
|
+
scopeVars: scope,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
const { code, importPaths } = agentResult;
|
|
52
65
|
await fileService.updateTest({
|
|
53
66
|
task,
|
|
54
67
|
generatedCode: code,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-gen-browser.d.ts","sourceRoot":"","sources":["../../src/tools/test-gen-browser.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,4BAA4B,EAAE,IAiE1C,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.generateTestWithBrowserAgent = void 0;
|
|
4
4
|
const zod_1 = require("zod");
|
|
5
5
|
const run_1 = require("../agent/browsing/run");
|
|
6
6
|
const utils_1 = require("../agent/browsing/utils");
|
|
@@ -49,7 +49,7 @@ test("Example test code", async ({ page }) => {
|
|
|
49
49
|
});
|
|
50
50
|
\`\`\`
|
|
51
51
|
`;
|
|
52
|
-
exports.
|
|
52
|
+
exports.generateTestWithBrowserAgent = {
|
|
53
53
|
schema: {
|
|
54
54
|
name: "generateTestWithBrowserAgent",
|
|
55
55
|
description: BROWSER_AGENT_DESCRIPTION,
|
|
@@ -57,6 +57,14 @@ exports.browserAgentTool = {
|
|
|
57
57
|
},
|
|
58
58
|
execute: async (input) => {
|
|
59
59
|
const { testName, testSuites, fileName, changeToMake, project } = input;
|
|
60
|
+
const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
|
|
61
|
+
const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
|
|
62
|
+
if (!validProjectNames.includes(project)) {
|
|
63
|
+
return {
|
|
64
|
+
isError: true,
|
|
65
|
+
result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
60
68
|
try {
|
|
61
69
|
await (0, utils_1.replaceTodoWithCreateTest)({
|
|
62
70
|
testCaseName: testName,
|
|
@@ -70,14 +78,6 @@ exports.browserAgentTool = {
|
|
|
70
78
|
result: `Error running tool: ${error}`,
|
|
71
79
|
};
|
|
72
80
|
}
|
|
73
|
-
const playwrightConfig = await (0, utils_1.readPlaywrightConfig)(process.cwd());
|
|
74
|
-
const validProjectNames = await (0, utils_1.getValidProjectNames)(playwrightConfig);
|
|
75
|
-
if (!validProjectNames.includes(project)) {
|
|
76
|
-
return {
|
|
77
|
-
isError: true,
|
|
78
|
-
result: `Invalid project name: ${project}. Valid project names are: ${validProjectNames.join(", ")}`,
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
81
|
const { isError, error } = await (0, run_1.generateTestsUsingMasterAgent)({
|
|
82
82
|
testFilePath: fileName,
|
|
83
83
|
filePathToUpdate: fileName,
|
|
@@ -86,6 +86,8 @@ exports.browserAgentTool = {
|
|
|
86
86
|
name: testName,
|
|
87
87
|
file: fileName,
|
|
88
88
|
prompt: changeToMake,
|
|
89
|
+
}, {
|
|
90
|
+
useComputerUseAgent: true,
|
|
89
91
|
}),
|
|
90
92
|
repoDir: process.cwd(),
|
|
91
93
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAWrC,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tools/test-run-fetcher/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAWrC,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAOnE;AAED,eAAO,MAAM,WAAW,EAAE,IA4HzB,CAAC"}
|
|
@@ -52,55 +52,57 @@ exports.testRunTool = {
|
|
|
52
52
|
// 1. Truncate stack trace to last 300 characters
|
|
53
53
|
// 2. Remove request/response headers from network metadata
|
|
54
54
|
// 3. Focus on failed tests only
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
55
|
+
const testRun = data.data?.test_run;
|
|
56
|
+
if (testRun?.flattenedSummaryDetails) {
|
|
57
|
+
testRun.flattenedSummaryDetails = testRun.flattenedSummaryDetails.map((detail) => {
|
|
58
|
+
// Truncate stack trace to last 300 characters
|
|
59
|
+
if (detail.failed_run_metadata?.stack) {
|
|
60
|
+
const stack = detail.failed_run_metadata.stack;
|
|
61
|
+
// TODO: change to last 10 lines
|
|
62
|
+
detail.failed_run_metadata.stack =
|
|
63
|
+
stack.length > 300 ? `...${stack.slice(-300)}` : stack;
|
|
64
|
+
}
|
|
65
|
+
// Remove headers from network metadata
|
|
66
|
+
if (detail.network_metadata?.failed_calls) {
|
|
67
|
+
detail.network_metadata.failed_calls =
|
|
68
|
+
detail.network_metadata.failed_calls.map((call) => ({
|
|
69
|
+
...call,
|
|
70
|
+
request: { ...call.request, headers: [] },
|
|
71
|
+
response: { ...call.response, headers: [] },
|
|
72
|
+
}));
|
|
73
|
+
}
|
|
74
|
+
if (detail.network_metadata?.failed_calls_within_time_range) {
|
|
75
|
+
detail.network_metadata.failed_calls_within_time_range =
|
|
76
|
+
detail.network_metadata.failed_calls_within_time_range.map((call) => ({
|
|
77
|
+
...call,
|
|
78
|
+
request: { ...call.request, headers: [] },
|
|
79
|
+
response: { ...call.response, headers: [] },
|
|
80
|
+
}));
|
|
81
|
+
}
|
|
82
|
+
return detail;
|
|
83
|
+
});
|
|
84
84
|
}
|
|
85
|
-
const failedTests =
|
|
85
|
+
const failedTests = testRun?.flattenedSummaryDetails.filter((detail) => detail.status === "failed");
|
|
86
86
|
const failedTestInMarkdown = failedTests
|
|
87
87
|
?.map((detail) => {
|
|
88
88
|
const { nesting } = detail.failed_run_metadata;
|
|
89
89
|
return {
|
|
90
90
|
testName: nesting.slice(1).join(" > "),
|
|
91
91
|
fileName: extractPathAfterSourceRepo(detail.failed_run_metadata.location.file),
|
|
92
|
+
project: detail.test_project,
|
|
92
93
|
detail,
|
|
93
94
|
};
|
|
94
95
|
})
|
|
95
|
-
.map((
|
|
96
|
+
.map((res) => {
|
|
96
97
|
return `
|
|
97
|
-
### Test name: ${
|
|
98
|
-
File: ${
|
|
98
|
+
### Test name: ${res.testName}
|
|
99
|
+
File: ${res.fileName}
|
|
100
|
+
Project: ${res.project}
|
|
99
101
|
|
|
100
102
|
Failure data below
|
|
101
103
|
|
|
102
104
|
\`\`\`json
|
|
103
|
-
${JSON.stringify(
|
|
105
|
+
${JSON.stringify(res.detail, null, 2)}
|
|
104
106
|
\`\`\`
|
|
105
107
|
`;
|
|
106
108
|
})
|
|
@@ -111,10 +113,10 @@ ${JSON.stringify(test.detail, null, 2)}
|
|
|
111
113
|
## Run info
|
|
112
114
|
- Run ID: ${runId}
|
|
113
115
|
- Repository: ${repoName}
|
|
114
|
-
- Started at: ${
|
|
115
|
-
- Ended at: ${
|
|
116
|
-
- Duration: ${
|
|
117
|
-
- Environment name: ${
|
|
116
|
+
- Started at: ${testRun?.testRun.run_started_at}
|
|
117
|
+
- Ended at: ${testRun?.testRun.run_ended_at}
|
|
118
|
+
- Duration: ${testRun?.testRun.duration} seconds
|
|
119
|
+
- Environment name: ${testRun?.testRun.environment_name}
|
|
118
120
|
|
|
119
121
|
## Failed tests
|
|
120
122
|
${failedTestInMarkdown}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.51.
|
|
3
|
+
"version": "0.51.5",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -74,7 +74,7 @@
|
|
|
74
74
|
"tsx": "^4.16.2",
|
|
75
75
|
"typescript": "^5.3.3",
|
|
76
76
|
"zod": "^3.23.8",
|
|
77
|
-
"@empiricalrun/llm": "^0.11.
|
|
77
|
+
"@empiricalrun/llm": "^0.11.4",
|
|
78
78
|
"@empiricalrun/r2-uploader": "^0.3.8",
|
|
79
79
|
"@empiricalrun/reporter": "^0.23.2",
|
|
80
80
|
"@empiricalrun/test-run": "^0.7.6"
|
|
@@ -92,7 +92,7 @@
|
|
|
92
92
|
"js-levenshtein": "^1.1.6",
|
|
93
93
|
"playwright": "1.47.1",
|
|
94
94
|
"ts-patch": "^3.3.0",
|
|
95
|
-
"@empiricalrun/shared-types": "0.0.
|
|
95
|
+
"@empiricalrun/shared-types": "0.0.7"
|
|
96
96
|
},
|
|
97
97
|
"scripts": {
|
|
98
98
|
"dev": "tspc --build --watch",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"browser-agent.d.ts","sourceRoot":"","sources":["../../src/tools/browser-agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAmDpC,eAAO,MAAM,gBAAgB,EAAE,IA4D9B,CAAC"}
|