@empiricalrun/test-gen 0.38.13 → 0.38.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dist/agent/browsing/utils.d.ts.map +1 -1
- package/dist/agent/browsing/utils.js +20 -10
- package/dist/agent/codegen/update-flow.d.ts +6 -1
- package/dist/agent/codegen/update-flow.d.ts.map +1 -1
- package/dist/agent/codegen/update-flow.js +9 -8
- package/dist/agent/master/run.d.ts +4 -1
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +16 -4
- package/dist/agent/master/with-hints.d.ts +4 -1
- package/dist/agent/master/with-hints.d.ts.map +1 -1
- package/dist/agent/master/with-hints.js +1 -1
- package/dist/agent/planner/run-time-planner.d.ts.map +1 -1
- package/dist/agent/planner/run-time-planner.js +8 -4
- package/dist/evals/update-scenario-agent.evals.d.ts.map +1 -1
- package/dist/evals/update-scenario-agent.evals.js +3 -5
- package/package.json +4 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.38.16
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 6163918: fix: security policy injection for locators
|
|
8
|
+
|
|
9
|
+
## 0.38.15
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- 91ded8f: fix: incorrect annotations
|
|
14
|
+
|
|
15
|
+
## 0.38.14
|
|
16
|
+
|
|
17
|
+
### Patch Changes
|
|
18
|
+
|
|
19
|
+
- bd5c945: fix: run update scenario prompts on claude
|
|
20
|
+
- 31f8805: fix: runtime planner calling out actions as done
|
|
21
|
+
- Updated dependencies [bd5c945]
|
|
22
|
+
- @empiricalrun/llm@0.9.26
|
|
23
|
+
|
|
3
24
|
## 0.38.13
|
|
4
25
|
|
|
5
26
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAK3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA6FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAa,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAK3D,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAiBvD,OAAO,EAAe,aAAa,EAAE,MAAM,aAAa,CAAC;AAMzD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,GAAG,GAAG,GAAG,IAAI,MAAM,CAKhD;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,MAAM,EAAE,UAIvD;AA6FD;;;;GAIG;AACH,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,aAAa,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,MAAM,CAAC,CA0DjB;AAyBD,wBAAsB,wBAAwB,CAAC,IAAI,EAAE,IAAI,iBAuHxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,QA+BjD;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,oBAAoB,CAAC,CAM1E;AAWD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,YAAY,EAAE,MAAM,EACpB,gBAAgB,EAAE,oBAAoB,EACtC,gBAAgB,GAAE,MAAM,EAAU,GACjC,OAAO,CAAC,MAAM,CAAC,CA+CjB;AAED,wBAAsB,sBAAsB,CAAC,EAC3C,YAAiB,EACjB,IAAS,EACT,eAAoB,EACpB,gBAAqB,EACrB,UAAyC,GAC1C,EAAE;IACD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,8EASA;AAED,qBAAa,eAAe;IACd,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IACrC,OAAO,CAAC,aAAa,CAAqB;YAE5B,mBAAmB;YAUnB,gBAAgB;IAsBjB,OAAO;IAuBb,SAAS;CAKjB"}
|
|
@@ -182,6 +182,11 @@ async function injectPwLocatorGenerator(page) {
|
|
|
182
182
|
try {
|
|
183
183
|
await Promise.all(scripts.map((s) => page.addScriptTag({ content: s })));
|
|
184
184
|
await page.evaluate(async () => {
|
|
185
|
+
//@ts-ignore
|
|
186
|
+
//https://developer.mozilla.org/en-US/docs/Web/API/TrustedScriptURL
|
|
187
|
+
const trustedPolicy = window.trustedTypes?.createPolicy(crypto.randomUUID(), {
|
|
188
|
+
createScriptURL: (url) => url,
|
|
189
|
+
});
|
|
185
190
|
//@ts-ignore
|
|
186
191
|
const injectScriptInIframe = (iframeDoc) => {
|
|
187
192
|
try {
|
|
@@ -189,10 +194,9 @@ async function injectPwLocatorGenerator(page) {
|
|
|
189
194
|
"https://assets-test.empirical.run/pw-selector.js",
|
|
190
195
|
"https://code.jquery.com/jquery-3.7.1.min.js",
|
|
191
196
|
].forEach((url) => {
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
iframeDoc.head.appendChild(scr);
|
|
197
|
+
const script = iframeDoc.createElement("script");
|
|
198
|
+
script.src = trustedPolicy.createScriptURL(url);
|
|
199
|
+
iframeDoc.head.appendChild(script);
|
|
196
200
|
});
|
|
197
201
|
}
|
|
198
202
|
catch (e) {
|
|
@@ -207,7 +211,8 @@ async function injectPwLocatorGenerator(page) {
|
|
|
207
211
|
if (isVisible) {
|
|
208
212
|
//@ts-ignore
|
|
209
213
|
const iframeContent = iframe.contentDocument || iframe.contentWindow?.document;
|
|
210
|
-
|
|
214
|
+
const isScriptInjected = !!iframe.contentWindow?.playwright;
|
|
215
|
+
if (iframeContent && !isScriptInjected) {
|
|
211
216
|
injectScriptInIframe(iframeContent);
|
|
212
217
|
}
|
|
213
218
|
}
|
|
@@ -224,18 +229,22 @@ async function injectPwLocatorGenerator(page) {
|
|
|
224
229
|
//@ts-ignore
|
|
225
230
|
const injectScriptInIframe = (iframeDoc) => {
|
|
226
231
|
try {
|
|
232
|
+
//@ts-ignore
|
|
233
|
+
//https://developer.mozilla.org/en-US/docs/Web/API/TrustedScriptURL
|
|
234
|
+
const trustedPolicy = window.trustedTypes.createPolicy(crypto.randomUUID(), {
|
|
235
|
+
createScriptURL: (url) => url,
|
|
236
|
+
});
|
|
227
237
|
[
|
|
228
238
|
"https://assets-test.empirical.run/pw-selector.js",
|
|
229
239
|
"https://code.jquery.com/jquery-3.7.1.min.js",
|
|
230
240
|
].forEach((url) => {
|
|
231
241
|
const scr = iframeDoc.createElement("script");
|
|
232
|
-
scr.src = url;
|
|
233
|
-
console.log("Injecting script in iframe", scr);
|
|
242
|
+
scr.src = trustedPolicy.createScriptURL(url);
|
|
234
243
|
iframeDoc.head.appendChild(scr);
|
|
235
244
|
});
|
|
236
245
|
}
|
|
237
246
|
catch (e) {
|
|
238
|
-
console.warn("Error injecting script in iframe
|
|
247
|
+
console.warn("Error injecting script in iframe.");
|
|
239
248
|
}
|
|
240
249
|
};
|
|
241
250
|
const iframes = document.getElementsByTagName("iframe");
|
|
@@ -246,7 +255,8 @@ async function injectPwLocatorGenerator(page) {
|
|
|
246
255
|
if (isVisible) {
|
|
247
256
|
//@ts-ignore
|
|
248
257
|
const iframeContent = iframe.contentDocument || iframe.contentWindow?.document;
|
|
249
|
-
|
|
258
|
+
const isScriptInjected = !!iframe.contentWindow?.playwright;
|
|
259
|
+
if (iframeContent && !isScriptInjected) {
|
|
250
260
|
injectScriptInIframe(iframeContent);
|
|
251
261
|
}
|
|
252
262
|
}
|
|
@@ -254,7 +264,7 @@ async function injectPwLocatorGenerator(page) {
|
|
|
254
264
|
});
|
|
255
265
|
}
|
|
256
266
|
catch (e) {
|
|
257
|
-
console.warn("Error injecting script in iframe
|
|
267
|
+
console.warn("Error injecting script in iframe.");
|
|
258
268
|
}
|
|
259
269
|
}
|
|
260
270
|
exports.injectPwLocatorGenerator = injectPwLocatorGenerator;
|
|
@@ -11,7 +11,12 @@ export declare function getUpdateTestCodeCompletion({ testCase, testFileContent,
|
|
|
11
11
|
testFileContent: string;
|
|
12
12
|
trace?: TraceClient;
|
|
13
13
|
options?: TestGenConfigOptions;
|
|
14
|
-
}): Promise<
|
|
14
|
+
}): Promise<{
|
|
15
|
+
filePath: string | undefined;
|
|
16
|
+
oldCode: string | undefined;
|
|
17
|
+
newCode: string | undefined;
|
|
18
|
+
reason: string | undefined;
|
|
19
|
+
}[]>;
|
|
15
20
|
export declare function updateTest(testCase: TestCase, file: string, options: TestGenConfigOptions | undefined, logging?: boolean, validate?: boolean, trace?: TraceClient): Promise<UpdatedTestCase[]>;
|
|
16
21
|
export declare function getAppendCreateTestBlockCompletion({ testFiles, pageFiles, testCase, testFilePath, options, trace, }: {
|
|
17
22
|
trace?: TraceClient;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAoB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,2BAA2B,CAAC,EAChD,QAAQ,EACR,eAAe,EACf,SAAS,EACT,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,GACR,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,OAAO,CAAC,EAAE,oBAAoB,CAAC;CAChC,GAAG,OAAO,CACT;IACE,QAAQ,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;CAC5B,EAAE,CACJ,CA+CA;AAED,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,EACxB,KAAK,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4D5B;AAED,wBAAsB,kCAAkC,CAAC,EACvD,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,OAAO,EACP,KAAK,GACN,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB,mBA4DA;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAyC7B"}
|
|
@@ -128,22 +128,24 @@ async function getUpdateTestCodeCompletion({ testCase, testFileContent, testFile
|
|
|
128
128
|
scenarioFile: testFilePath,
|
|
129
129
|
currentScenarioCodeBlock,
|
|
130
130
|
});
|
|
131
|
-
promptSpan?.end({ output: { instruction } });
|
|
132
131
|
const llm = new llm_1.LLM({
|
|
133
132
|
trace,
|
|
134
|
-
provider:
|
|
135
|
-
defaultModel:
|
|
136
|
-
providerApiKey: constants_1.MODEL_API_KEYS[
|
|
133
|
+
provider: "anthropic",
|
|
134
|
+
defaultModel: "claude-3-5-sonnet-latest",
|
|
135
|
+
providerApiKey: constants_1.MODEL_API_KEYS["anthropic"],
|
|
137
136
|
});
|
|
138
137
|
const firstShotMessage = await llm.createChatCompletion({
|
|
139
138
|
messages: instruction,
|
|
140
139
|
modelParameters: {
|
|
141
140
|
...constants_1.DEFAULT_MODEL_PARAMETERS,
|
|
142
141
|
...options?.modelParameters,
|
|
142
|
+
temperature: 0,
|
|
143
143
|
},
|
|
144
144
|
});
|
|
145
145
|
let response = firstShotMessage?.content || "";
|
|
146
|
-
|
|
146
|
+
const fileChanges = (0, utils_1.extractTestUpdates)(response);
|
|
147
|
+
promptSpan?.end({ output: fileChanges });
|
|
148
|
+
return fileChanges;
|
|
147
149
|
}
|
|
148
150
|
exports.getUpdateTestCodeCompletion = getUpdateTestCodeCompletion;
|
|
149
151
|
async function updateTest(testCase, file, options, logging = true, validate = true, trace) {
|
|
@@ -176,12 +178,11 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
176
178
|
name: "update-test",
|
|
177
179
|
input: request,
|
|
178
180
|
});
|
|
179
|
-
const
|
|
181
|
+
const fileChanges = await getUpdateTestCodeCompletion({
|
|
180
182
|
...request,
|
|
181
183
|
trace: updateTestSpan,
|
|
182
184
|
});
|
|
183
185
|
logger.success("Test generated successfully!");
|
|
184
|
-
const fileChanges = (0, utils_1.extractTestUpdates)(response);
|
|
185
186
|
await applyFileChanges({
|
|
186
187
|
validateTypes: validate,
|
|
187
188
|
trace: updateTestSpan,
|
|
@@ -199,7 +200,7 @@ async function updateTest(testCase, file, options, logging = true, validate = tr
|
|
|
199
200
|
...testCase,
|
|
200
201
|
updatedFiles: fileChanges.map((f) => f.filePath),
|
|
201
202
|
});
|
|
202
|
-
updateTestSpan?.end({ output: {
|
|
203
|
+
updateTestSpan?.end({ output: { fileChanges } });
|
|
203
204
|
await (0, llm_1.flushAllTraces)();
|
|
204
205
|
return generatedTestCases;
|
|
205
206
|
}
|
|
@@ -16,7 +16,10 @@ export declare function getNextAction({ task, executedActions, failedActions, pa
|
|
|
16
16
|
actions: PlaywrightActions;
|
|
17
17
|
disableSkills: boolean;
|
|
18
18
|
useHints: boolean;
|
|
19
|
-
annotations?:
|
|
19
|
+
annotations?: {
|
|
20
|
+
elementID: string;
|
|
21
|
+
text: string;
|
|
22
|
+
}[];
|
|
20
23
|
}): Promise<import("openai/resources/index.mjs").ChatCompletionMessageToolCall | undefined>;
|
|
21
24
|
export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
|
|
22
25
|
task: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAclD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAoBrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,EAChB,WAAW,GACZ,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACrD,2FA6FA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAiUA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -52,8 +52,10 @@ async function getNextAction({ task, executedActions, failedActions, pageUrl, tr
|
|
|
52
52
|
failedActions: failedActions.map((a) => a).join("\n"),
|
|
53
53
|
executedActions: executedActions.map((a) => a).join("\n"),
|
|
54
54
|
pageUrl,
|
|
55
|
-
annotations
|
|
56
|
-
|
|
55
|
+
annotations: annotations
|
|
56
|
+
?.map((a) => `${a.elementID}:${a.text}`)
|
|
57
|
+
.join("\n"),
|
|
58
|
+
}, 27);
|
|
57
59
|
// assuming there is only one user message in the prompt. if there is a change in langfuse prompt format, this will need to be updated
|
|
58
60
|
const userMessage = promptMessages.filter((m) => m.role === "user")[0];
|
|
59
61
|
const systemMessage = promptMessages.filter((m) => m.role === "system")[0];
|
|
@@ -175,7 +177,7 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
175
177
|
const plannerResp = await (0, run_time_planner_1.runtimePlanner)({
|
|
176
178
|
trace: masterAgentSpan,
|
|
177
179
|
task,
|
|
178
|
-
conversation: [
|
|
180
|
+
conversation: [...masterAgentActions],
|
|
179
181
|
pages: getPageVariables(actions.getStateVariables()),
|
|
180
182
|
currentPage: (0, utils_1.getPageVarName)(),
|
|
181
183
|
});
|
|
@@ -210,8 +212,18 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
210
212
|
// @ts-ignore
|
|
211
213
|
// eslint-disable-next-line no-undef
|
|
212
214
|
window.annotationInstance = annotateClickableElements();
|
|
215
|
+
const annotations = Object.entries(
|
|
213
216
|
// @ts-ignore
|
|
214
|
-
|
|
217
|
+
window.annotationInstance.annotations).map(([key, value]) => ({
|
|
218
|
+
elementID: key, // Assign the key to elementID
|
|
219
|
+
text:
|
|
220
|
+
//@ts-ignore
|
|
221
|
+
value.node.text?.trim() ||
|
|
222
|
+
//@ts-ignore
|
|
223
|
+
value.node.textContent?.trim() ||
|
|
224
|
+
"<This is an icon or image. Check the screenshot>",
|
|
225
|
+
}));
|
|
226
|
+
return annotations;
|
|
215
227
|
});
|
|
216
228
|
await page.waitForTimeout(2000);
|
|
217
229
|
const annonationBuffer = await page.screenshot({
|
|
@@ -13,7 +13,10 @@ export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnno
|
|
|
13
13
|
action: string;
|
|
14
14
|
elementAnnotation?: string;
|
|
15
15
|
};
|
|
16
|
-
generatedAnnotations:
|
|
16
|
+
generatedAnnotations: {
|
|
17
|
+
elementID: string;
|
|
18
|
+
text: string;
|
|
19
|
+
}[];
|
|
17
20
|
page: TestGenPage;
|
|
18
21
|
llm: LLM;
|
|
19
22
|
trace?: TraceClient | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,
|
|
1
|
+
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAI5B,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,EAAE;UACrD,WAAW;SACZ,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAwGA,CAAC"}
|
|
@@ -37,7 +37,7 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
|
|
|
37
37
|
try {
|
|
38
38
|
const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
|
|
39
39
|
outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
|
|
40
|
-
generatedAnnotations
|
|
40
|
+
generatedAnnotations.some((annotation) => annotation.elementID === outputFromGetNextAction?.elementAnnotation);
|
|
41
41
|
trace?.event({
|
|
42
42
|
name: "has-element-annotation",
|
|
43
43
|
output: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;
|
|
1
|
+
{"version":3,"file":"run-time-planner.d.ts","sourceRoot":"","sources":["../../../src/agent/planner/run-time-planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,EACnC,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAiHA"}
|
|
@@ -22,14 +22,14 @@ async function runtimePlanner({ trace, task, conversation, pages, currentPage, }
|
|
|
22
22
|
{
|
|
23
23
|
role: "system",
|
|
24
24
|
content: `
|
|
25
|
-
Given a
|
|
26
|
-
These
|
|
25
|
+
Given a successfully executed actions that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the list and determine if the entire task is completed.
|
|
26
|
+
These actions are executed by AI agents using Playwright on a browser. These agents already have access to browser tabs to execute actions. The successfully executed actions on browser post browser has opened, is provided to you as successfully executed actions.
|
|
27
27
|
|
|
28
28
|
If the task is not fully completed, identify which specific actions are missing and suggest next steps to complete the task. Assume that the conversation provided is entirely truthful and no additional actions were performed beyond those listed.
|
|
29
29
|
|
|
30
30
|
To fulfil your goal, follow these steps:
|
|
31
31
|
- Divide the task into individual actions.
|
|
32
|
-
- Compare each task action against the actions listed in the
|
|
32
|
+
- Compare each task action against the actions listed in the successfully executed actions list.
|
|
33
33
|
- Identify which actions have been executed and which have not.
|
|
34
34
|
- If all actions are executed, respond with the task as done.
|
|
35
35
|
- If any actions are missing, respond with the task as not done, listing all actions and specifying which are complete and which are missing.
|
|
@@ -41,9 +41,13 @@ To fulfil your goal, follow these steps:
|
|
|
41
41
|
content: `
|
|
42
42
|
Task: ${task}
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
----
|
|
45
|
+
|
|
46
|
+
Following are successfully executed actions:
|
|
45
47
|
${conversation.join("\n")}
|
|
46
48
|
|
|
49
|
+
----
|
|
50
|
+
|
|
47
51
|
Current page:
|
|
48
52
|
${currentPage}
|
|
49
53
|
`,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"update-scenario-agent.evals.d.ts","sourceRoot":"","sources":["../../src/evals/update-scenario-agent.evals.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAUpC,QAAA,MAAM,+BAA+B,EAAE,UAqDtC,CAAC;AAEF,eAAe,+BAA+B,CAAC"}
|
|
@@ -5,10 +5,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
const js_levenshtein_1 = __importDefault(require("js-levenshtein"));
|
|
7
7
|
const update_flow_1 = require("../agent/codegen/update-flow");
|
|
8
|
-
const utils_1 = require("../agent/codegen/utils");
|
|
9
8
|
const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
|
|
10
9
|
const { testCase, testFiles, pageFiles, testFilePath, testFileContent } = item.input;
|
|
11
|
-
const
|
|
10
|
+
const fileChanges = await (0, update_flow_1.getUpdateTestCodeCompletion)({
|
|
12
11
|
testCase,
|
|
13
12
|
testFiles,
|
|
14
13
|
pageFiles,
|
|
@@ -16,8 +15,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
|
|
|
16
15
|
testFileContent,
|
|
17
16
|
trace,
|
|
18
17
|
});
|
|
19
|
-
const
|
|
20
|
-
const expectedFileChanges = (0, utils_1.extractTestUpdates)(item.expectedOutput);
|
|
18
|
+
const expectedFileChanges = item.expectedOutput;
|
|
21
19
|
const fileChangeCount = fileChanges.length;
|
|
22
20
|
const expectedFileChangeCount = expectedFileChanges.length;
|
|
23
21
|
const correctFilePathChanges = expectedFileChanges.every((ef) => fileChanges.some((f) => f.filePath === ef.filePath));
|
|
@@ -43,7 +41,7 @@ const updateScenarioCodeAgentEvaluate = async ({ item, trace }) => {
|
|
|
43
41
|
value: score,
|
|
44
42
|
},
|
|
45
43
|
],
|
|
46
|
-
output:
|
|
44
|
+
output: fileChanges,
|
|
47
45
|
};
|
|
48
46
|
};
|
|
49
47
|
exports.default = updateScenarioCodeAgentEvaluate;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.38.
|
|
3
|
+
"version": "0.38.16",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -58,9 +58,9 @@
|
|
|
58
58
|
"ts-morph": "^24.0.0",
|
|
59
59
|
"tsx": "^4.16.2",
|
|
60
60
|
"typescript": "^5.3.3",
|
|
61
|
-
"@empiricalrun/llm": "^0.9.
|
|
62
|
-
"@empiricalrun/
|
|
63
|
-
"@empiricalrun/
|
|
61
|
+
"@empiricalrun/llm": "^0.9.26",
|
|
62
|
+
"@empiricalrun/reporter": "^0.21.3",
|
|
63
|
+
"@empiricalrun/r2-uploader": "^0.3.6"
|
|
64
64
|
},
|
|
65
65
|
"devDependencies": {
|
|
66
66
|
"@types/detect-port": "^1.3.5",
|