@empiricalrun/test-gen 0.24.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/dist/agent/browsing/index.d.ts +0 -1
- package/dist/agent/browsing/index.d.ts.map +1 -1
- package/dist/agent/browsing/index.js +1 -100
- package/dist/agent/browsing/run.js +1 -1
- package/dist/agent/browsing/utils.js +1 -1
- package/dist/agent/codegen/run.d.ts.map +1 -1
- package/dist/agent/codegen/run.js +2 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -6
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.25.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- a3fe47f: feat: deprecate browser agent and remove dependency of test object in create test
|
|
8
|
+
|
|
9
|
+
## 0.24.3
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- Updated dependencies [be30850]
|
|
14
|
+
- @empiricalrun/llm@0.9.1
|
|
15
|
+
|
|
16
|
+
## 0.24.2
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- ce74ef8: fix: local mock github flow
|
|
21
|
+
|
|
3
22
|
## 0.24.1
|
|
4
23
|
|
|
5
24
|
### Patch Changes
|
|
@@ -6,6 +6,5 @@ type BrowsingAgentOptions = Partial<TestGenConfigOptions> & {
|
|
|
6
6
|
};
|
|
7
7
|
};
|
|
8
8
|
export declare function browsingAgentUsingMasterAgent(task: string, page: Page, options: BrowsingAgentOptions): Promise<string>;
|
|
9
|
-
export declare function browsingAgent(task: string, page: Page, options: BrowsingAgentOptions): Promise<string>;
|
|
10
9
|
export {};
|
|
11
10
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBA+L9B
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBA+L9B"}
|
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
6
|
+
exports.browsingAgentUsingMasterAgent = void 0;
|
|
7
7
|
const llm_1 = require("@empiricalrun/llm");
|
|
8
8
|
const crypto_1 = __importDefault(require("crypto"));
|
|
9
9
|
const actions_1 = require("../../actions");
|
|
@@ -185,102 +185,3 @@ async function browsingAgentUsingMasterAgent(task, page, options) {
|
|
|
185
185
|
return code;
|
|
186
186
|
}
|
|
187
187
|
exports.browsingAgentUsingMasterAgent = browsingAgentUsingMasterAgent;
|
|
188
|
-
async function browsingAgent(task, page, options) {
|
|
189
|
-
const logger = new logger_1.CustomLogger();
|
|
190
|
-
const session = (0, session_1.getSessionDetails)();
|
|
191
|
-
const trace = llm_1.langfuseInstance.trace({
|
|
192
|
-
name: "browsing-agent",
|
|
193
|
-
id: crypto_1.default.randomUUID(),
|
|
194
|
-
version: (0, session_1.getSessionDetails)().version,
|
|
195
|
-
metadata: {
|
|
196
|
-
generationId: (0, session_1.getSessionDetails)().generationId,
|
|
197
|
-
sessionId: (0, session_1.getSessionDetails)().sessionId,
|
|
198
|
-
},
|
|
199
|
-
release: session.version,
|
|
200
|
-
tags: [
|
|
201
|
-
options.metadata?.projectName,
|
|
202
|
-
options.metadata?.environment,
|
|
203
|
-
].filter((s) => !!s),
|
|
204
|
-
});
|
|
205
|
-
const actions = new actions_1.PlaywrightActions(page);
|
|
206
|
-
const tools = actions.getActionSchemas();
|
|
207
|
-
let isTaskDone = false;
|
|
208
|
-
const executedActions = [];
|
|
209
|
-
await (0, utils_1.injectPwLocatorGenerator)(page);
|
|
210
|
-
trace.update({ input: { task } });
|
|
211
|
-
let lastActionExecTrace = "";
|
|
212
|
-
const llm = new llm_1.LLM({
|
|
213
|
-
trace,
|
|
214
|
-
provider: options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER,
|
|
215
|
-
defaultModel: options.model || constants_1.DEFAULT_MODEL,
|
|
216
|
-
providerApiKey: constants_1.MODEL_API_KEYS[options.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
|
|
217
|
-
});
|
|
218
|
-
while (!isTaskDone) {
|
|
219
|
-
const pageContentSpan = trace.span({ name: "page-content" });
|
|
220
|
-
const pageContent = await page.content();
|
|
221
|
-
pageContentSpan.end({ output: { pageContent } });
|
|
222
|
-
const sanitizationSpan = trace.span({ name: "page-sanitization" });
|
|
223
|
-
const pageSnapshot = (0, html_1.sanitizeHtml)(pageContent, options.htmlSanitize);
|
|
224
|
-
sanitizationSpan.end({ output: { pageSnapshot } });
|
|
225
|
-
const promptSpan = trace.span({ name: "page-prompt" });
|
|
226
|
-
// extract all successful actions
|
|
227
|
-
const successfulActions = executedActions
|
|
228
|
-
.filter((a) => !a.isError)
|
|
229
|
-
.map((a) => a.action);
|
|
230
|
-
const messages = await (0, utils_1.getPromptForNextAction)({
|
|
231
|
-
pageSnapshot,
|
|
232
|
-
previousActions: successfulActions,
|
|
233
|
-
task,
|
|
234
|
-
lastActionErrors: lastActionExecTrace ? [lastActionExecTrace] : [],
|
|
235
|
-
});
|
|
236
|
-
promptSpan.end({ output: { messages } });
|
|
237
|
-
const completion = await llm.createChatCompletion({
|
|
238
|
-
messages,
|
|
239
|
-
tools,
|
|
240
|
-
modelParameters: {
|
|
241
|
-
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
242
|
-
...options.modelParameters,
|
|
243
|
-
tool_choice: "required",
|
|
244
|
-
},
|
|
245
|
-
});
|
|
246
|
-
const toolCalls = completion?.tool_calls || [];
|
|
247
|
-
for (const i in toolCalls) {
|
|
248
|
-
const toolCall = toolCalls[i];
|
|
249
|
-
try {
|
|
250
|
-
await actions.executeAction(toolCall.function.name, JSON.parse(toolCall.function.arguments));
|
|
251
|
-
executedActions.push({
|
|
252
|
-
isError: false,
|
|
253
|
-
action: JSON.stringify(toolCall),
|
|
254
|
-
});
|
|
255
|
-
lastActionExecTrace = "";
|
|
256
|
-
}
|
|
257
|
-
catch (e) {
|
|
258
|
-
// TODO: implement feedback loop to llm
|
|
259
|
-
executedActions.push({
|
|
260
|
-
isError: true,
|
|
261
|
-
action: JSON.stringify(toolCall),
|
|
262
|
-
});
|
|
263
|
-
lastActionExecTrace = e.message;
|
|
264
|
-
logger.error(lastActionExecTrace, e);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
isTaskDone = actions.isComplete();
|
|
268
|
-
// mark task as done if llm is stuck in loop
|
|
269
|
-
if (executedActions.length > 3) {
|
|
270
|
-
const lastThreeActions = executedActions.slice(-3);
|
|
271
|
-
const lastThreeActionsFailed = lastThreeActions.every((a) => a.isError);
|
|
272
|
-
if (lastThreeActionsFailed) {
|
|
273
|
-
// TODO: this should be sent to dashboard
|
|
274
|
-
logger.error("Agent is not able to figure out next action, marking task as done");
|
|
275
|
-
isTaskDone = true;
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
await page.close();
|
|
280
|
-
const code = actions.generateCode();
|
|
281
|
-
trace.update({ input: { task }, output: { code } });
|
|
282
|
-
logger.success("Successfully generated code for the given task");
|
|
283
|
-
logger.log(`Trace: ${trace.getTraceUrl()}`);
|
|
284
|
-
return code;
|
|
285
|
-
}
|
|
286
|
-
exports.browsingAgent = browsingAgent;
|
|
@@ -24,7 +24,7 @@ async function generateTestsUsingBrowsingAgent(testFilePath) {
|
|
|
24
24
|
const project = await (0, utils_2.detectProjectName)(testFilePath, playwrightConfig);
|
|
25
25
|
logger.log(`Detected playwright project name: ${project}`);
|
|
26
26
|
//TODO: change this to per test
|
|
27
|
-
let command = `npx playwright test ${testFilePath} --retries 0 --project ${project}`;
|
|
27
|
+
let command = `npx playwright test ${testFilePath} --retries 0 --project ${project} --timeout 0`;
|
|
28
28
|
if (!process.env.CI) {
|
|
29
29
|
command = command.concat(` --headed`);
|
|
30
30
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/run.ts"],"names":[],"mappings":"AAoBA,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAG7D,wBAAsB,YAAY,CAChC,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,QAAQ,EAAE,CAAC,CAmJrB"}
|
|
@@ -21,7 +21,8 @@ async function generateTest(testCase, file, options) {
|
|
|
21
21
|
}
|
|
22
22
|
const context = await (0, context_1.contextForGeneration)(file);
|
|
23
23
|
const { codePrompt, pomPrompt, testFileContent } = context;
|
|
24
|
-
const
|
|
24
|
+
const { testBlock } = (0, web_1.getTypescriptTestBlock)(testCase?.name, testFileContent);
|
|
25
|
+
const isUpdate = !!testBlock;
|
|
25
26
|
if (isUpdate) {
|
|
26
27
|
return await (0, update_flow_1.updateTest)(testCase, file, options);
|
|
27
28
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAalC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAalC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,iBA0BxD"}
|
package/dist/index.js
CHANGED
|
@@ -14,7 +14,7 @@ process.on("beforeExit", async () => await (0, llm_1.flushAllTraces)());
|
|
|
14
14
|
process.on("exit", async () => await (0, llm_1.flushAllTraces)());
|
|
15
15
|
process.on("SIGINT", async () => await (0, llm_1.flushAllTraces)());
|
|
16
16
|
process.on("SIGTERM", async () => await (0, llm_1.flushAllTraces)());
|
|
17
|
-
async function createTest(task, page
|
|
17
|
+
async function createTest(task, page) {
|
|
18
18
|
const port = process.env.APP_PORT || 3030;
|
|
19
19
|
const testConfigArg = process.env.TEST_GEN_TOKEN;
|
|
20
20
|
const { testGenConfig } = await (0, utils_1.parseCliArgs)(testConfigArg);
|
|
@@ -30,11 +30,7 @@ async function createTest(task, page, test) {
|
|
|
30
30
|
generationId: testGenConfig.options?.metadata.generationId,
|
|
31
31
|
});
|
|
32
32
|
const fileService = new client_1.default(Number(port));
|
|
33
|
-
|
|
34
|
-
const agent = testGenConfig.options?.agent === "browser"
|
|
35
|
-
? browsing_1.browsingAgent
|
|
36
|
-
: browsing_1.browsingAgentUsingMasterAgent;
|
|
37
|
-
const code = await agent(task, page, {
|
|
33
|
+
const code = await (0, browsing_1.browsingAgentUsingMasterAgent)(task, page, {
|
|
38
34
|
htmlSanitize: {
|
|
39
35
|
disallowedStrings: ["v-data-table__td v-data-table-column--align-start"],
|
|
40
36
|
},
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,MAAM,GAAG,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,MAAM,MAAM,WAAW,GAAG;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG;IACjC,KAAK,EAAE,MAAM,GAAG,QAAQ,CAAC;IACzB,KAAK,EAAE,QAAQ,CAAC;IAChB,aAAa,EAAE,WAAW,CAAC;IAC3B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,QAAQ,EAAE;QACR,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,aAAa,GAAG,YAAY,CAAC;KAC3C,CAAC;CACH,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG,CAAC,IAAI,EAAE,IAAI,KAAK,MAAM,CAAC;AAE/D,MAAM,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,CAAC;AAEtE,MAAM,MAAM,MAAM,GAAG;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC,CAAC;IAC5E,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,OAAO,EAAE;QAAE,OAAO,EAAE,MAAM,CAAA;KAAE,KAAK,MAAM,CAAC;CAC/E,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"ts-morph": "^23.0.0",
|
|
42
42
|
"tsx": "^4.16.2",
|
|
43
43
|
"typescript": "^5.3.3",
|
|
44
|
-
"@empiricalrun/llm": "^0.9.
|
|
44
|
+
"@empiricalrun/llm": "^0.9.1",
|
|
45
45
|
"@empiricalrun/r2-uploader": "^0.1.1",
|
|
46
46
|
"@empiricalrun/reporter": "^0.17.10"
|
|
47
47
|
},
|