@empiricalrun/test-gen 0.34.5 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/agent/browsing/index.d.ts +1 -1
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +11 -11
- package/dist/agent/codegen/create-test-block.js +1 -1
- package/dist/agent/codegen/run.d.ts +1 -1
- package/dist/agent/codegen/run.d.ts.map +1 -1
- package/dist/agent/codegen/run.js +12 -10
- package/dist/agent/codegen/skills-retriever.d.ts +11 -0
- package/dist/agent/codegen/skills-retriever.d.ts.map +1 -1
- package/dist/agent/codegen/skills-retriever.js +27 -9
- package/dist/agent/codegen/update-flow.d.ts.map +1 -1
- package/dist/agent/codegen/update-flow.js +21 -17
- package/dist/agent/infer-agent/index.d.ts +0 -1
- package/dist/agent/infer-agent/index.d.ts.map +1 -1
- package/dist/agent/infer-agent/index.js +4 -5
- package/dist/agent/master/run.d.ts +4 -4
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +48 -20
- package/dist/agent/master/with-hints.d.ts +1 -1
- package/dist/agent/master/with-hints.d.ts.map +1 -1
- package/dist/agent/master/with-hints.js +2 -2
- package/dist/bin/index.js +8 -6
- package/dist/evals/fetch-pom-skills-agent.evals.d.ts +4 -0
- package/dist/evals/fetch-pom-skills-agent.evals.d.ts.map +1 -0
- package/dist/evals/fetch-pom-skills-agent.evals.js +36 -0
- package/dist/evals/master-agent.evals.d.ts +4 -0
- package/dist/evals/master-agent.evals.d.ts.map +1 -0
- package/dist/evals/master-agent.evals.js +36 -0
- package/package.json +2 -2
- package/dist/evals/infer-master-code.d.ts +0 -2
- package/dist/evals/infer-master-code.d.ts.map +0 -1
- package/dist/evals/infer-master-code.js +0 -18
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.35.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 069347f: feat: add support for master agent evals
|
|
8
|
+
- 11e4cbd: feat: add fetch skills agent evals
|
|
9
|
+
|
|
10
|
+
### Patch Changes
|
|
11
|
+
|
|
12
|
+
- 297508d: fix: langfuse key errors
|
|
13
|
+
- Updated dependencies [069347f]
|
|
14
|
+
- Updated dependencies [297508d]
|
|
15
|
+
- @empiricalrun/llm@0.9.21
|
|
16
|
+
|
|
3
17
|
## 0.34.5
|
|
4
18
|
|
|
5
19
|
### Patch Changes
|
|
@@ -10,7 +10,7 @@ export type BrowsingAgentOptions = Partial<TestGenConfigOptions> & {
|
|
|
10
10
|
};
|
|
11
11
|
export declare function executeTaskUsingBrowsingAgent({ trace, action, logger, page, options, llm, actions, }: {
|
|
12
12
|
action: string;
|
|
13
|
-
trace
|
|
13
|
+
trace?: TraceClient;
|
|
14
14
|
logger: CustomLogger;
|
|
15
15
|
page: Page;
|
|
16
16
|
options: BrowsingAgentOptions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAIhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,MAAM,MAAM,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IACjE,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CAAC,EAClD,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,OAAO,EACP,GAAG,EACH,OAAO,GACR,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,oBAAoB,CAAC;IAC9B,GAAG,EAAE,GAAG,CAAC;IACT,OAAO,EAAE,iBAAiB,CAAC;CAC5B,iBAwIA"}
|
|
@@ -15,23 +15,23 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
15
15
|
const tools = actions.getBrowsingActionSchemas();
|
|
16
16
|
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
17
17
|
while (!isTaskDone) {
|
|
18
|
-
const browsingAgentSpan = trace
|
|
18
|
+
const browsingAgentSpan = trace?.span({
|
|
19
19
|
name: `browsing-agent`,
|
|
20
20
|
});
|
|
21
21
|
if (await (0, session_1.shouldStopSession)()) {
|
|
22
22
|
break;
|
|
23
23
|
}
|
|
24
|
-
const pageContentSpan = browsingAgentSpan
|
|
24
|
+
const pageContentSpan = browsingAgentSpan?.span({
|
|
25
25
|
name: "page-content",
|
|
26
26
|
});
|
|
27
27
|
const pageContent = await page.content();
|
|
28
|
-
pageContentSpan
|
|
29
|
-
const sanitizationSpan = browsingAgentSpan
|
|
28
|
+
pageContentSpan?.end({ output: { pageContent } });
|
|
29
|
+
const sanitizationSpan = browsingAgentSpan?.span({
|
|
30
30
|
name: "page-sanitization",
|
|
31
31
|
});
|
|
32
32
|
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
33
|
-
sanitizationSpan
|
|
34
|
-
const promptSpan = browsingAgentSpan
|
|
33
|
+
sanitizationSpan?.end({ output: { pageSnapshot } });
|
|
34
|
+
const promptSpan = browsingAgentSpan?.span({ name: "page-prompt" });
|
|
35
35
|
// extract all successful actions
|
|
36
36
|
const successfulActions = executedActions
|
|
37
37
|
.filter((a) => !a.isError)
|
|
@@ -46,8 +46,8 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
46
46
|
logger.log(`isTaskDone: ${isTaskDone}`);
|
|
47
47
|
logger.log(`reason: ${verificationAgentResp.reason}`);
|
|
48
48
|
if (isTaskDone) {
|
|
49
|
-
browsingAgentSpan
|
|
50
|
-
browsingAgentSpan
|
|
49
|
+
browsingAgentSpan?.event({ name: "task-done" });
|
|
50
|
+
browsingAgentSpan?.end({
|
|
51
51
|
output: { taskDone: true, reason: verificationAgentResp.reason },
|
|
52
52
|
});
|
|
53
53
|
break;
|
|
@@ -60,7 +60,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
60
60
|
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
61
61
|
promptType: "browsing-agent-as-tool",
|
|
62
62
|
});
|
|
63
|
-
promptSpan
|
|
63
|
+
promptSpan?.end({ output: { messages } });
|
|
64
64
|
let completion;
|
|
65
65
|
completion = await (0, o1_completion_1.getO1Completion)({
|
|
66
66
|
//@ts-ignore
|
|
@@ -91,7 +91,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
91
91
|
action: "",
|
|
92
92
|
});
|
|
93
93
|
}
|
|
94
|
-
const toolCallsSpan = browsingAgentSpan
|
|
94
|
+
const toolCallsSpan = browsingAgentSpan?.span({ name: "tool-calls" });
|
|
95
95
|
for (const i in toolCalls) {
|
|
96
96
|
const toolCall = toolCalls[i];
|
|
97
97
|
if (await (0, session_1.shouldStopSession)()) {
|
|
@@ -116,7 +116,7 @@ async function executeTaskUsingBrowsingAgent({ trace, action, logger, page, opti
|
|
|
116
116
|
logger.error(lastActionExecTrace, e);
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
|
-
toolCallsSpan
|
|
119
|
+
toolCallsSpan?.end({ output: { toolCalls } });
|
|
120
120
|
// mark task as done if llm is stuck in loop
|
|
121
121
|
if (executedActions.length >= 3) {
|
|
122
122
|
const lastThreeActions = executedActions.slice(-3);
|
|
@@ -15,7 +15,7 @@ async function createEmptyTestCaseBlock({ testCase, file, options, trace, }) {
|
|
|
15
15
|
const session = (0, session_1.getSessionDetails)();
|
|
16
16
|
trace =
|
|
17
17
|
trace ||
|
|
18
|
-
llm_1.langfuseInstance
|
|
18
|
+
llm_1.langfuseInstance?.trace({
|
|
19
19
|
name: "create-empty-test-block",
|
|
20
20
|
id: crypto.randomUUID(),
|
|
21
21
|
release: session.version,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { TestCase, TestGenConfigOptions } from "../../types";
|
|
3
|
-
export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace
|
|
3
|
+
export declare function generateTest(testCase: TestCase, file: string, options: TestGenConfigOptions, trace?: TraceClient): Promise<TestCase[]>;
|
|
4
4
|
//# sourceMappingURL=run.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,EAAE,WAAW,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkC,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAkBhF,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,EAC7B,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,QAAQ,EAAE,CAAC,CA0GrB"}
|
|
@@ -31,7 +31,7 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
31
31
|
}
|
|
32
32
|
const generatedTestCases = [];
|
|
33
33
|
logger.logEmptyLine();
|
|
34
|
-
const createTestSpan = trace
|
|
34
|
+
const createTestSpan = trace?.span({
|
|
35
35
|
name: "create-test",
|
|
36
36
|
input: {
|
|
37
37
|
testCase,
|
|
@@ -39,7 +39,7 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
39
39
|
options,
|
|
40
40
|
},
|
|
41
41
|
});
|
|
42
|
-
createTestSpan
|
|
42
|
+
createTestSpan?.event({
|
|
43
43
|
name: "collate-files-as-text",
|
|
44
44
|
output: {
|
|
45
45
|
codePrompt,
|
|
@@ -47,7 +47,7 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
47
47
|
testFileContent,
|
|
48
48
|
},
|
|
49
49
|
});
|
|
50
|
-
const promptSpan = createTestSpan
|
|
50
|
+
const promptSpan = createTestSpan?.span({
|
|
51
51
|
name: "add-scenario-prompt",
|
|
52
52
|
});
|
|
53
53
|
const instruction = await (0, llm_1.getPrompt)("add-scenario", {
|
|
@@ -57,7 +57,7 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
57
57
|
scenarioSteps: testCase.steps.join("\n"),
|
|
58
58
|
scenarioFile: file,
|
|
59
59
|
});
|
|
60
|
-
promptSpan
|
|
60
|
+
promptSpan?.end({ output: { instruction } });
|
|
61
61
|
const llm = new llm_1.LLM({
|
|
62
62
|
trace,
|
|
63
63
|
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
@@ -73,7 +73,7 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
73
73
|
});
|
|
74
74
|
let response = firstShotMessage?.content || "";
|
|
75
75
|
logger.success("Test generated successfully!");
|
|
76
|
-
const readWriteFileSpan = trace
|
|
76
|
+
const readWriteFileSpan = trace?.span({ name: "write-to-file" });
|
|
77
77
|
let contents = fs_extra_1.default.readFileSync(file, "utf-8");
|
|
78
78
|
const [prependContent, strippedContent] = await (0, web_1.stripAndPrependImports)(response, testCase?.name);
|
|
79
79
|
let updatedContent = prependContent +
|
|
@@ -83,9 +83,9 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
83
83
|
codeSnippet: `\n\n${strippedContent}`,
|
|
84
84
|
});
|
|
85
85
|
await fs_extra_1.default.writeFile(file, updatedContent, "utf-8");
|
|
86
|
-
readWriteFileSpan
|
|
86
|
+
readWriteFileSpan?.end({ output: { updatedContent } });
|
|
87
87
|
logger.log("Linting generated code...");
|
|
88
|
-
createTestSpan
|
|
88
|
+
createTestSpan?.event({ name: "lint-file" });
|
|
89
89
|
await (0, web_1.lintErrors)(file);
|
|
90
90
|
await (0, fix_ts_errors_1.validateAndFixTypescriptErrors)({
|
|
91
91
|
trace,
|
|
@@ -96,12 +96,14 @@ async function generateTest(testCase, file, options, trace) {
|
|
|
96
96
|
testCase: testCase,
|
|
97
97
|
options,
|
|
98
98
|
});
|
|
99
|
-
createTestSpan
|
|
99
|
+
createTestSpan?.event({ name: "format-file" });
|
|
100
100
|
await (0, web_1.formatCode)(file);
|
|
101
101
|
logger.success("File formatted successfully!");
|
|
102
|
-
|
|
102
|
+
if (trace) {
|
|
103
|
+
logger.log(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
|
|
104
|
+
}
|
|
103
105
|
generatedTestCases.push(testCase);
|
|
104
|
-
createTestSpan
|
|
106
|
+
createTestSpan?.end({ output: { response } });
|
|
105
107
|
await (0, llm_1.flushAllTraces)();
|
|
106
108
|
return generatedTestCases;
|
|
107
109
|
}
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
import { TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import { TestCase, TestGenConfigOptions } from "../../types";
|
|
3
|
+
export declare const fetchPomSkills: ({ testCase, pomFiles, options, trace, }: {
|
|
4
|
+
testCase: TestCase;
|
|
5
|
+
pomFiles?: string | undefined;
|
|
6
|
+
trace?: TraceClient | undefined;
|
|
7
|
+
options?: TestGenConfigOptions | undefined;
|
|
8
|
+
}) => Promise<{
|
|
9
|
+
testStep: string;
|
|
10
|
+
filePath: string;
|
|
11
|
+
usageExample: string;
|
|
12
|
+
reason: string;
|
|
13
|
+
}[]>;
|
|
3
14
|
export declare function getAppropriateSkills({ testCase, options, trace, }: {
|
|
4
15
|
testCase: TestCase;
|
|
5
16
|
options?: TestGenConfigOptions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;
|
|
1
|
+
{"version":3,"file":"skills-retriever.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/skills-retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAkB,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAYhE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,eAAO,MAAM,cAAc;cAMf,QAAQ;;;;;;;;;IA0CnB,CAAC;AAEF,wBAAsB,oBAAoB,CAAC,EACzC,QAAQ,EACR,OAAO,EACP,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB;;;;;KA6BA"}
|
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.getAppropriateSkills = void 0;
|
|
6
|
+
exports.getAppropriateSkills = exports.fetchPomSkills = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const fs_1 = __importDefault(require("fs"));
|
|
9
9
|
const logger_1 = require("../../bin/logger");
|
|
@@ -11,18 +11,15 @@ const context_1 = require("../../bin/utils/context");
|
|
|
11
11
|
const fs_2 = require("../../bin/utils/fs");
|
|
12
12
|
const constants_1 = require("../../constants");
|
|
13
13
|
const utils_1 = require("./utils");
|
|
14
|
-
async
|
|
15
|
-
const
|
|
16
|
-
logger.log("getting skill set for the repository");
|
|
17
|
-
const filter = await (0, context_1.createGitIgnoreFileFilter)();
|
|
18
|
-
const pomFiles = await (0, fs_2.generatePromptFromDirectory)("./pages", filter);
|
|
19
|
-
const fetchSkillsSpan = trace?.span({
|
|
14
|
+
const fetchPomSkills = async ({ testCase, pomFiles, options, trace, }) => {
|
|
15
|
+
const fetchSkillsUsingPOMFilesSpan = trace?.span({
|
|
20
16
|
name: "fetch-pom-skills",
|
|
21
17
|
input: {
|
|
18
|
+
pomFiles,
|
|
22
19
|
testCase,
|
|
23
20
|
},
|
|
24
21
|
});
|
|
25
|
-
const promptSpan =
|
|
22
|
+
const promptSpan = fetchSkillsUsingPOMFilesSpan?.span({
|
|
26
23
|
name: "fetch-pom-skills-prompt",
|
|
27
24
|
});
|
|
28
25
|
const prompt = await (0, llm_1.getPrompt)("fetch-skills-prompt", {
|
|
@@ -32,7 +29,7 @@ async function getAppropriateSkills({ testCase, options, trace, }) {
|
|
|
32
29
|
});
|
|
33
30
|
promptSpan?.end({ output: { prompt } });
|
|
34
31
|
const llm = new llm_1.LLM({
|
|
35
|
-
trace:
|
|
32
|
+
trace: fetchSkillsUsingPOMFilesSpan,
|
|
36
33
|
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
37
34
|
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
38
35
|
providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
@@ -47,6 +44,27 @@ async function getAppropriateSkills({ testCase, options, trace, }) {
|
|
|
47
44
|
});
|
|
48
45
|
let response = firstShotMessage?.content || "";
|
|
49
46
|
const skills = (0, utils_1.extractTestStepsSuggestions)(response);
|
|
47
|
+
fetchSkillsUsingPOMFilesSpan?.end({ output: { skills } });
|
|
48
|
+
return skills;
|
|
49
|
+
};
|
|
50
|
+
exports.fetchPomSkills = fetchPomSkills;
|
|
51
|
+
async function getAppropriateSkills({ testCase, options, trace, }) {
|
|
52
|
+
const logger = new logger_1.CustomLogger({ useReporter: false });
|
|
53
|
+
logger.log("getting skill set for the repository");
|
|
54
|
+
const filter = await (0, context_1.createGitIgnoreFileFilter)();
|
|
55
|
+
const pomFiles = await (0, fs_2.generatePromptFromDirectory)("./pages", filter);
|
|
56
|
+
const fetchSkillsSpan = trace?.span({
|
|
57
|
+
name: "get-appropriate-skills",
|
|
58
|
+
input: {
|
|
59
|
+
testCase,
|
|
60
|
+
},
|
|
61
|
+
});
|
|
62
|
+
const skills = await (0, exports.fetchPomSkills)({
|
|
63
|
+
testCase,
|
|
64
|
+
pomFiles,
|
|
65
|
+
trace: fetchSkillsSpan,
|
|
66
|
+
options,
|
|
67
|
+
});
|
|
50
68
|
const validateSkillsSpan = fetchSkillsSpan?.span({
|
|
51
69
|
name: "validate-skills",
|
|
52
70
|
input: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAsB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CAsG5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA+E7B"}
|
|
@@ -29,7 +29,7 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
|
|
|
29
29
|
if (testBlockUpdate) {
|
|
30
30
|
// assuming the test case getting updated
|
|
31
31
|
// maintaining the previous accuracy of the test case update
|
|
32
|
-
const readWriteFileSpan = trace
|
|
32
|
+
const readWriteFileSpan = trace?.span({ name: "write-to-file" });
|
|
33
33
|
let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
|
|
34
34
|
const [prependContent, strippedContent] = await (0, web_1.stripAndPrependImports)(fileChange.newCode, testCase?.name);
|
|
35
35
|
let updatedContent = prependContent + contents + `\n\n${strippedContent}`;
|
|
@@ -41,10 +41,10 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
|
|
|
41
41
|
contents = contents.replace(testBlock, `\n\n${strippedContent}`);
|
|
42
42
|
updatedContent = prependContent + contents;
|
|
43
43
|
await fs_extra_1.default.writeFile(fileChange.filePath, updatedContent, "utf-8");
|
|
44
|
-
readWriteFileSpan
|
|
44
|
+
readWriteFileSpan?.end({ output: { updatedContent } });
|
|
45
45
|
}
|
|
46
46
|
else {
|
|
47
|
-
const readWriteFileSpan = trace
|
|
47
|
+
const readWriteFileSpan = trace?.span({ name: "write-to-file" });
|
|
48
48
|
let contents = await fs_extra_1.default.readFile(fileChange.filePath, "utf-8");
|
|
49
49
|
const project = new ts_morph_1.Project();
|
|
50
50
|
const sourceFile = project.createSourceFile("updated-code.ts", fileChange.newCode);
|
|
@@ -84,7 +84,7 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
|
|
|
84
84
|
contents = contents.replace(fileChange.oldCode, `\n\n${fileChange.newCode}`);
|
|
85
85
|
}
|
|
86
86
|
await fs_extra_1.default.writeFile(fileChange.filePath, contents, "utf-8");
|
|
87
|
-
readWriteFileSpan
|
|
87
|
+
readWriteFileSpan?.end({ output: { contents } });
|
|
88
88
|
}
|
|
89
89
|
// format and validate file change
|
|
90
90
|
if (validateTypes) {
|
|
@@ -98,7 +98,7 @@ async function applyFileChanges({ validateTypes = true, trace, testCase, fileCha
|
|
|
98
98
|
options: testGenOptions,
|
|
99
99
|
});
|
|
100
100
|
}
|
|
101
|
-
trace
|
|
101
|
+
trace?.event({ name: "format-file" });
|
|
102
102
|
await (0, web_1.formatCode)(fileChange.filePath);
|
|
103
103
|
logger.success(`${fileChange.filePath} file formatted successfully!`);
|
|
104
104
|
}));
|
|
@@ -112,7 +112,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
112
112
|
const session = (0, session_1.getSessionDetails)();
|
|
113
113
|
trace =
|
|
114
114
|
trace ||
|
|
115
|
-
llm_1.langfuseInstance
|
|
115
|
+
llm_1.langfuseInstance?.trace({
|
|
116
116
|
name: "update-test",
|
|
117
117
|
id: crypto_1.default.randomUUID(),
|
|
118
118
|
release: session.version,
|
|
@@ -121,7 +121,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
121
121
|
options?.metadata.environment || "",
|
|
122
122
|
].filter((s) => !!s),
|
|
123
123
|
});
|
|
124
|
-
const updateTestSpan = trace
|
|
124
|
+
const updateTestSpan = trace?.span({
|
|
125
125
|
name: "update-test",
|
|
126
126
|
input: {
|
|
127
127
|
testCase,
|
|
@@ -129,7 +129,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
129
129
|
options,
|
|
130
130
|
},
|
|
131
131
|
});
|
|
132
|
-
updateTestSpan
|
|
132
|
+
updateTestSpan?.event({
|
|
133
133
|
name: "collate-files-as-text",
|
|
134
134
|
output: {
|
|
135
135
|
codePrompt,
|
|
@@ -137,7 +137,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
137
137
|
testFileContent,
|
|
138
138
|
},
|
|
139
139
|
});
|
|
140
|
-
const promptSpan = updateTestSpan
|
|
140
|
+
const promptSpan = updateTestSpan?.span({
|
|
141
141
|
name: "update-scenario-prompt",
|
|
142
142
|
});
|
|
143
143
|
const promptName = "update-scenario";
|
|
@@ -161,7 +161,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
161
161
|
scenarioFile: file,
|
|
162
162
|
currentScenarioCodeBlock,
|
|
163
163
|
});
|
|
164
|
-
promptSpan
|
|
164
|
+
promptSpan?.end({ output: { instruction } });
|
|
165
165
|
const llm = new llm_1.LLM({
|
|
166
166
|
trace: updateTestSpan,
|
|
167
167
|
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
@@ -188,12 +188,14 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
188
188
|
pomPrompt: pomPrompt,
|
|
189
189
|
codePrompt: codePrompt,
|
|
190
190
|
});
|
|
191
|
-
|
|
191
|
+
if (trace) {
|
|
192
|
+
logger.log(`Trace: ${trace?.getTraceUrl()}`);
|
|
193
|
+
}
|
|
192
194
|
generatedTestCases.push({
|
|
193
195
|
...testCase,
|
|
194
196
|
updatedFiles: fileChanges.map((f) => f.filePath),
|
|
195
197
|
});
|
|
196
|
-
updateTestSpan
|
|
198
|
+
updateTestSpan?.end({ output: { response } });
|
|
197
199
|
await (0, llm_1.flushAllTraces)();
|
|
198
200
|
return generatedTestCases;
|
|
199
201
|
}
|
|
@@ -215,7 +217,7 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
|
|
|
215
217
|
const session = (0, session_1.getSessionDetails)();
|
|
216
218
|
trace =
|
|
217
219
|
trace ||
|
|
218
|
-
llm_1.langfuseInstance
|
|
220
|
+
llm_1.langfuseInstance?.trace({
|
|
219
221
|
name: "append-create-test-block",
|
|
220
222
|
id: crypto_1.default.randomUUID(),
|
|
221
223
|
release: session.version,
|
|
@@ -225,7 +227,7 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
|
|
|
225
227
|
].filter((s) => !!s),
|
|
226
228
|
});
|
|
227
229
|
const promptName = "append-create-test-block";
|
|
228
|
-
const promptSpan = trace
|
|
230
|
+
const promptSpan = trace?.span({
|
|
229
231
|
name: "append-create-test-block-prompt",
|
|
230
232
|
});
|
|
231
233
|
const instruction = await (0, llm_1.getPrompt)(promptName, {
|
|
@@ -235,7 +237,7 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
|
|
|
235
237
|
scenarioSteps: testCase.steps.join("\n"),
|
|
236
238
|
scenarioFile: file,
|
|
237
239
|
});
|
|
238
|
-
promptSpan
|
|
240
|
+
promptSpan?.end({ output: { instruction } });
|
|
239
241
|
const [userInstruction] = instruction.filter((s) => s.role === "user");
|
|
240
242
|
const [systemInstruction] = instruction.filter((s) => s.role === "system");
|
|
241
243
|
userInstruction.content = `${systemInstruction?.content}
|
|
@@ -267,12 +269,14 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
|
|
|
267
269
|
codePrompt: codePrompt,
|
|
268
270
|
validateTypes,
|
|
269
271
|
});
|
|
270
|
-
|
|
272
|
+
if (trace) {
|
|
273
|
+
logger.log(`Trace: ${trace.getTraceUrl()}`);
|
|
274
|
+
}
|
|
271
275
|
generatedTestCases.push({
|
|
272
276
|
...testCase,
|
|
273
277
|
updatedFiles: fileChanges.map((f) => f.filePath),
|
|
274
278
|
});
|
|
275
|
-
trace
|
|
279
|
+
trace?.update({ input: { testCase }, output: { response } });
|
|
276
280
|
await (0, llm_1.flushAllTraces)();
|
|
277
281
|
return generatedTestCases;
|
|
278
282
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG1D,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/infer-agent/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,KAAK,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG1D,wBAAsB,mBAAmB,CAAC,EACxC,IAAI,EACJ,OAAO,EACP,KAAK,GACN,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,KAAK,CAAA;CAAE,CAAC,CA4E/B"}
|
|
@@ -8,19 +8,19 @@ const session = (0, session_1.getSessionDetails)();
|
|
|
8
8
|
async function inferAgentBasedTask({ task, options, trace, }) {
|
|
9
9
|
trace =
|
|
10
10
|
trace ||
|
|
11
|
-
llm_1.langfuseInstance
|
|
11
|
+
llm_1.langfuseInstance?.trace({
|
|
12
12
|
name: "infer-agent-task",
|
|
13
13
|
id: crypto.randomUUID(),
|
|
14
14
|
release: session.version,
|
|
15
15
|
});
|
|
16
|
-
const inferAgentSpan = trace
|
|
16
|
+
const inferAgentSpan = trace?.span({
|
|
17
17
|
name: "infer-agent",
|
|
18
18
|
input: {
|
|
19
19
|
task,
|
|
20
20
|
options,
|
|
21
21
|
},
|
|
22
22
|
});
|
|
23
|
-
const promptSpan = inferAgentSpan
|
|
23
|
+
const promptSpan = inferAgentSpan?.span({
|
|
24
24
|
name: "infer-agent-prompt",
|
|
25
25
|
input: {
|
|
26
26
|
task,
|
|
@@ -65,7 +65,7 @@ async function inferAgentBasedTask({ task, options, trace, }) {
|
|
|
65
65
|
},
|
|
66
66
|
});
|
|
67
67
|
const output = JSON.parse(firstShotMessage?.content || "{}");
|
|
68
|
-
inferAgentSpan
|
|
68
|
+
inferAgentSpan?.end({
|
|
69
69
|
output: {
|
|
70
70
|
response: output.response,
|
|
71
71
|
reason: output.reason,
|
|
@@ -73,7 +73,6 @@ async function inferAgentBasedTask({ task, options, trace, }) {
|
|
|
73
73
|
});
|
|
74
74
|
return {
|
|
75
75
|
response: output.response,
|
|
76
|
-
trace: inferAgentSpan,
|
|
77
76
|
};
|
|
78
77
|
}
|
|
79
78
|
exports.inferAgentBasedTask = inferAgentBasedTask;
|
|
@@ -3,14 +3,14 @@ import { Page } from "playwright";
|
|
|
3
3
|
import { PlaywrightActions } from "../../actions";
|
|
4
4
|
import { TestCase } from "../../types";
|
|
5
5
|
import { BrowsingAgentOptions } from "../browsing";
|
|
6
|
-
export declare function getNextAction({ task, executedActions, failedActions,
|
|
6
|
+
export declare function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints, }: {
|
|
7
7
|
task: string;
|
|
8
8
|
executedActions: string[];
|
|
9
9
|
failedActions: any[];
|
|
10
|
-
|
|
10
|
+
pageUrl: string;
|
|
11
11
|
trace?: TraceClient;
|
|
12
|
-
llm
|
|
13
|
-
options
|
|
12
|
+
llm?: LLM;
|
|
13
|
+
options?: BrowsingAgentOptions;
|
|
14
14
|
pageScreenshot: string;
|
|
15
15
|
annotatedPageScreenshot?: string;
|
|
16
16
|
actions: PlaywrightActions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,GACR,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;CAC/B;;;GAiRA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -16,13 +16,30 @@ const skills_retriever_1 = require("../codegen/skills-retriever");
|
|
|
16
16
|
const verification_1 = require("../verification");
|
|
17
17
|
const with_hints_1 = require("./with-hints");
|
|
18
18
|
const MAX_ERROR_COUNT = 2;
|
|
19
|
-
async function getNextAction({ task, executedActions, failedActions,
|
|
20
|
-
const
|
|
19
|
+
async function getNextAction({ task, executedActions, failedActions, pageUrl, trace, llm, options, pageScreenshot, annotatedPageScreenshot, actions, disableSkills, useHints = false, }) {
|
|
20
|
+
const nextActionSpan = trace?.span({
|
|
21
|
+
name: "master-agent-next-action",
|
|
22
|
+
input: {
|
|
23
|
+
task,
|
|
24
|
+
executedActions,
|
|
25
|
+
failedActions,
|
|
26
|
+
pageUrl,
|
|
27
|
+
options,
|
|
28
|
+
pageScreenshot,
|
|
29
|
+
annotatedPageScreenshot,
|
|
30
|
+
disableSkills,
|
|
31
|
+
useHints,
|
|
32
|
+
skills: skill_1.testCaseSkills.getAvailableSkills(),
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
const promptSpan = nextActionSpan?.span({
|
|
36
|
+
name: "master-agent-prompt",
|
|
37
|
+
});
|
|
21
38
|
const promptMessages = await (0, llm_1.getPrompt)("test-gen", {
|
|
22
39
|
task,
|
|
23
40
|
failedActions: failedActions.map((a) => a).join("\n"),
|
|
24
41
|
executedActions: executedActions.map((a) => a).join("\n"),
|
|
25
|
-
pageUrl
|
|
42
|
+
pageUrl,
|
|
26
43
|
}, useHints ? 16 : 14);
|
|
27
44
|
// assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
|
|
28
45
|
const userMessage = promptMessages.filter((m) => m.role === "user")[0];
|
|
@@ -43,7 +60,7 @@ async function getNextAction({ task, executedActions, failedActions, page, trace
|
|
|
43
60
|
{
|
|
44
61
|
type: "image_url",
|
|
45
62
|
image_url: {
|
|
46
|
-
url: (0, vision_1.imageFormatForProvider)(options
|
|
63
|
+
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
|
|
47
64
|
},
|
|
48
65
|
},
|
|
49
66
|
];
|
|
@@ -56,20 +73,27 @@ async function getNextAction({ task, executedActions, failedActions, page, trace
|
|
|
56
73
|
: actions.getMasterActionSchemas();
|
|
57
74
|
const tools = [next_task_1.NextTaskAction.schema, ...actionSchemas];
|
|
58
75
|
promptSpan?.end({ output: { messages } });
|
|
76
|
+
llm =
|
|
77
|
+
llm ||
|
|
78
|
+
new llm_1.LLM({
|
|
79
|
+
provider: options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
80
|
+
defaultModel: options?.model || constants_1.DEFAULT_MODEL,
|
|
81
|
+
});
|
|
59
82
|
const completion = await llm.createChatCompletion({
|
|
60
83
|
messages,
|
|
61
84
|
modelParameters: {
|
|
62
85
|
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
63
|
-
...options
|
|
86
|
+
...options?.modelParameters,
|
|
64
87
|
tool_choice: "required",
|
|
65
88
|
temperature: 1,
|
|
66
89
|
},
|
|
67
|
-
trace,
|
|
90
|
+
trace: nextActionSpan,
|
|
68
91
|
traceName: "master-agent-llm",
|
|
69
92
|
// @ts-ignore
|
|
70
93
|
tools,
|
|
71
94
|
});
|
|
72
95
|
const toolCall = completion?.tool_calls?.[0];
|
|
96
|
+
nextActionSpan?.end({ output: toolCall });
|
|
73
97
|
return toolCall;
|
|
74
98
|
}
|
|
75
99
|
exports.getNextAction = getNextAction;
|
|
@@ -80,7 +104,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
80
104
|
const testgenUpdatesReporter = new reporter_1.TestGenUpdatesReporter();
|
|
81
105
|
// add timeout for the page to settle in
|
|
82
106
|
await page.waitForTimeout(3000);
|
|
83
|
-
const trace = llm_1.langfuseInstance
|
|
107
|
+
const trace = llm_1.langfuseInstance?.trace({
|
|
84
108
|
name: "test-generator",
|
|
85
109
|
id: crypto.randomUUID(),
|
|
86
110
|
version: (0, session_1.getSessionDetails)().version,
|
|
@@ -93,9 +117,11 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
93
117
|
options.metadata?.environment,
|
|
94
118
|
].filter((s) => !!s),
|
|
95
119
|
});
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
120
|
+
if (trace) {
|
|
121
|
+
void testgenUpdatesReporter.sendMessage(`Starting master agent. [view trace](${trace?.getTraceUrl()})`);
|
|
122
|
+
logger.log(`Starting master agent: ${trace?.getTraceUrl()}`);
|
|
123
|
+
void testgenUpdatesReporter.sendAgentTraceUrl(trace.getTraceUrl());
|
|
124
|
+
}
|
|
99
125
|
const llm = new llm_1.LLM({
|
|
100
126
|
trace,
|
|
101
127
|
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
@@ -113,7 +139,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
113
139
|
skill_1.testCaseSkills.updateSkills(skills);
|
|
114
140
|
const actions = new actions_1.PlaywrightActions(page);
|
|
115
141
|
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
116
|
-
trace
|
|
142
|
+
trace?.update({ input: { task } });
|
|
117
143
|
let isGivenTaskDone = false;
|
|
118
144
|
const masterAgentActions = [];
|
|
119
145
|
let failedActions = [];
|
|
@@ -122,7 +148,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
122
148
|
if (await (0, session_1.shouldStopSession)()) {
|
|
123
149
|
break;
|
|
124
150
|
}
|
|
125
|
-
const masterAgentSpan = trace
|
|
151
|
+
const masterAgentSpan = trace?.span({
|
|
126
152
|
name: "master-agent",
|
|
127
153
|
input: {
|
|
128
154
|
task,
|
|
@@ -189,7 +215,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
189
215
|
task,
|
|
190
216
|
executedActions: masterAgentActions,
|
|
191
217
|
failedActions,
|
|
192
|
-
page,
|
|
218
|
+
pageUrl: page.url(),
|
|
193
219
|
trace: masterAgentSpan,
|
|
194
220
|
llm,
|
|
195
221
|
options,
|
|
@@ -204,7 +230,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
204
230
|
}
|
|
205
231
|
if (toolCall) {
|
|
206
232
|
const args = JSON.parse(toolCall.function.arguments);
|
|
207
|
-
const masterAgentActionSpan = masterAgentSpan
|
|
233
|
+
const masterAgentActionSpan = masterAgentSpan?.span({
|
|
208
234
|
name: "master-agent-action",
|
|
209
235
|
});
|
|
210
236
|
try {
|
|
@@ -256,7 +282,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
256
282
|
// resetting error count as there is a successful action
|
|
257
283
|
failedActions = [];
|
|
258
284
|
masterAgentActions.push(output.action);
|
|
259
|
-
masterAgentActionSpan
|
|
285
|
+
masterAgentActionSpan?.end({
|
|
260
286
|
input: {
|
|
261
287
|
action: output.action,
|
|
262
288
|
reason: output.reason,
|
|
@@ -274,7 +300,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
274
300
|
logger.log("Disabling skill usage for next retry");
|
|
275
301
|
disableSkills = true;
|
|
276
302
|
}
|
|
277
|
-
masterAgentActionSpan
|
|
303
|
+
masterAgentActionSpan?.end({
|
|
278
304
|
input: {
|
|
279
305
|
action: output.action,
|
|
280
306
|
reason: output.reason,
|
|
@@ -295,19 +321,21 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, }) {
|
|
|
295
321
|
}
|
|
296
322
|
}
|
|
297
323
|
}
|
|
298
|
-
trace
|
|
324
|
+
trace?.update({ input: { task }, output: { output } });
|
|
299
325
|
await testGenSnapshotUpdatePromise;
|
|
300
326
|
if (testGenAnnotatedSnapshotUpdatePromise) {
|
|
301
327
|
await testGenAnnotatedSnapshotUpdatePromise;
|
|
302
328
|
}
|
|
303
|
-
masterAgentSpan
|
|
329
|
+
masterAgentSpan?.end({
|
|
304
330
|
output: { action: output.action, reason: output.reason },
|
|
305
331
|
});
|
|
306
332
|
}
|
|
307
333
|
const { code, importPaths } = actions.generateCode();
|
|
308
|
-
trace
|
|
334
|
+
trace?.update({ input: { task }, output: { code } });
|
|
309
335
|
logger.success("Successfully generated code for the given task");
|
|
310
|
-
|
|
336
|
+
if (trace) {
|
|
337
|
+
await testgenUpdatesReporter.sendMessage(`Successfully generated code for the given task. \n View [trace](${trace.getTraceUrl()})`);
|
|
338
|
+
}
|
|
311
339
|
return {
|
|
312
340
|
code,
|
|
313
341
|
importPaths,
|
|
@@ -4,7 +4,7 @@ import { Page } from "playwright";
|
|
|
4
4
|
import { BrowsingAgentOptions } from "../browsing";
|
|
5
5
|
export declare const getUserMessageWithForHints: ({ userMessage, options, pageScreenshot, annotatedPageScreenshot, }: {
|
|
6
6
|
userMessage: OpenAI.ChatCompletionUserMessageParam;
|
|
7
|
-
options
|
|
7
|
+
options?: BrowsingAgentOptions | undefined;
|
|
8
8
|
pageScreenshot: string;
|
|
9
9
|
annotatedPageScreenshot: string;
|
|
10
10
|
}) => string | OpenAI.ChatCompletionContentPart[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;AAExC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B
|
|
1
|
+
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;AAExC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAMF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,IAAI;SACL,GAAG;MACN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAgFA,CAAC"}
|
|
@@ -17,7 +17,7 @@ const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, anno
|
|
|
17
17
|
{
|
|
18
18
|
type: "image_url",
|
|
19
19
|
image_url: {
|
|
20
|
-
url: (0, vision_1.imageFormatForProvider)(options
|
|
20
|
+
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, pageScreenshot),
|
|
21
21
|
},
|
|
22
22
|
},
|
|
23
23
|
{
|
|
@@ -27,7 +27,7 @@ const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, anno
|
|
|
27
27
|
{
|
|
28
28
|
type: "image_url",
|
|
29
29
|
image_url: {
|
|
30
|
-
url: (0, vision_1.imageFormatForProvider)(options
|
|
30
|
+
url: (0, vision_1.imageFormatForProvider)(options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER, annotatedPageScreenshot),
|
|
31
31
|
},
|
|
32
32
|
},
|
|
33
33
|
];
|
package/dist/bin/index.js
CHANGED
|
@@ -46,7 +46,7 @@ async function runAgent(testGenConfig) {
|
|
|
46
46
|
}
|
|
47
47
|
let agent = testGenConfig.options?.agent;
|
|
48
48
|
const session = (0, session_1.getSessionDetails)();
|
|
49
|
-
const trace = llm_1.langfuseInstance
|
|
49
|
+
const trace = llm_1.langfuseInstance?.trace({
|
|
50
50
|
name: "generate-test",
|
|
51
51
|
id: crypto.randomUUID(),
|
|
52
52
|
release: session.version,
|
|
@@ -55,11 +55,13 @@ async function runAgent(testGenConfig) {
|
|
|
55
55
|
testGenConfig.options?.metadata.environment || "",
|
|
56
56
|
].filter((s) => !!s),
|
|
57
57
|
});
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
58
|
+
if (trace) {
|
|
59
|
+
try {
|
|
60
|
+
await new reporter_1.TestGenUpdatesReporter().sendAgentTraceUrl(trace.getTraceUrl());
|
|
61
|
+
}
|
|
62
|
+
catch (e) {
|
|
63
|
+
console.warn("Failed to send trace url as test gen update", e);
|
|
64
|
+
}
|
|
63
65
|
}
|
|
64
66
|
if (!agent || agent === "auto") {
|
|
65
67
|
agent = await resolveAgentUsingTask({
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-pom-skills-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/fetch-pom-skills-agent.evals.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,QAAA,MAAM,yBAAyB,EAAE,UAiChC,CAAC;AAEF,eAAe,yBAAyB,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const skills_retriever_1 = require("../agent/codegen/skills-retriever");
|
|
4
|
+
const fetchSkillsAgentEvaluator = async ({ item, trace }) => {
|
|
5
|
+
const { testCase, pomFiles } = item.input;
|
|
6
|
+
const output = await (0, skills_retriever_1.fetchPomSkills)({
|
|
7
|
+
testCase,
|
|
8
|
+
pomFiles,
|
|
9
|
+
trace,
|
|
10
|
+
});
|
|
11
|
+
if (item.expectedOutput.length === 0 && output.length === 0) {
|
|
12
|
+
return {
|
|
13
|
+
scores: [
|
|
14
|
+
{
|
|
15
|
+
name: "equality",
|
|
16
|
+
value: 1,
|
|
17
|
+
},
|
|
18
|
+
],
|
|
19
|
+
output,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
return {
|
|
23
|
+
scores: [
|
|
24
|
+
{
|
|
25
|
+
name: "equality",
|
|
26
|
+
value: output.some((o) => {
|
|
27
|
+
return item.expectedOutput.some((e) => e.usageExample === o.usageExample);
|
|
28
|
+
})
|
|
29
|
+
? 1
|
|
30
|
+
: 0,
|
|
31
|
+
},
|
|
32
|
+
],
|
|
33
|
+
output,
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
exports.default = fetchSkillsAgentEvaluator;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"master-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/master-agent.evals.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,eAAO,MAAM,4BAA4B,EAAE,UA0C1C,CAAC;AAEF,eAAe,4BAA4B,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.masterGetNextActionEvaluator = void 0;
|
|
4
|
+
const actions_1 = require("../actions");
|
|
5
|
+
const skill_1 = require("../actions/skill");
|
|
6
|
+
const run_1 = require("../agent/master/run");
|
|
7
|
+
const masterGetNextActionEvaluator = async ({ item, trace, }) => {
|
|
8
|
+
const { task, executedActions, failedActions, pageUrl, options, pageScreenshot, annotatedPageScreenshot, disableSkills, useHints, skills = [], } = item.input;
|
|
9
|
+
const page = {};
|
|
10
|
+
skill_1.testCaseSkills.updateSkills(skills);
|
|
11
|
+
const actions = new actions_1.PlaywrightActions(page);
|
|
12
|
+
const output = await (0, run_1.getNextAction)({
|
|
13
|
+
task,
|
|
14
|
+
executedActions,
|
|
15
|
+
failedActions,
|
|
16
|
+
pageUrl,
|
|
17
|
+
trace,
|
|
18
|
+
options,
|
|
19
|
+
pageScreenshot,
|
|
20
|
+
annotatedPageScreenshot,
|
|
21
|
+
actions,
|
|
22
|
+
disableSkills,
|
|
23
|
+
useHints,
|
|
24
|
+
});
|
|
25
|
+
return {
|
|
26
|
+
scores: [
|
|
27
|
+
{
|
|
28
|
+
name: "action_correctness",
|
|
29
|
+
value: item.expectedOutput.function.name === output?.function.name ? 1 : 0,
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
output,
|
|
33
|
+
};
|
|
34
|
+
};
|
|
35
|
+
exports.masterGetNextActionEvaluator = masterGetNextActionEvaluator;
|
|
36
|
+
exports.default = exports.masterGetNextActionEvaluator;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.35.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
"ts-morph": "^23.0.0",
|
|
45
45
|
"tsx": "^4.16.2",
|
|
46
46
|
"typescript": "^5.3.3",
|
|
47
|
-
"@empiricalrun/llm": "^0.9.
|
|
47
|
+
"@empiricalrun/llm": "^0.9.21",
|
|
48
48
|
"@empiricalrun/r2-uploader": "^0.3.6",
|
|
49
49
|
"@empiricalrun/reporter": "^0.21.2"
|
|
50
50
|
},
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"infer-master-code.d.ts","sourceRoot":"","sources":["../../src/evals/infer-master-code.ts"],"names":[],"mappings":""}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const llm_1 = require("@empiricalrun/llm");
|
|
4
|
-
const infer_agent_1 = require("../agent/infer-agent");
|
|
5
|
-
const datasetName = "infer-master-or-code-agent";
|
|
6
|
-
(async function main() {
|
|
7
|
-
const dataset = await llm_1.langfuseInstance.getDataset(datasetName);
|
|
8
|
-
const runName = `${datasetName}-${Date.now()}`;
|
|
9
|
-
for (const item of dataset.items) {
|
|
10
|
-
const { response, trace } = await (0, infer_agent_1.inferAgentBasedTask)(item.input);
|
|
11
|
-
await item.link(trace, runName, {});
|
|
12
|
-
trace?.score({
|
|
13
|
-
name: "equality",
|
|
14
|
-
value: item.expectedOutput === response ? 1 : 0, // score value
|
|
15
|
-
});
|
|
16
|
-
}
|
|
17
|
-
await llm_1.langfuseInstance.flushAsync();
|
|
18
|
-
})();
|