@empiricalrun/test-gen 0.36.1 → 0.36.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/agent/master/run.d.ts.map +1 -1
- package/dist/agent/master/run.js +11 -0
- package/dist/agent/master/with-hints.d.ts +3 -2
- package/dist/agent/master/with-hints.d.ts.map +1 -1
- package/dist/agent/master/with-hints.js +21 -1
- package/dist/bin/index.js +0 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -1
- package/dist/reporter/index.d.ts +0 -1
- package/dist/reporter/index.d.ts.map +1 -1
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# @empiricalrun/test-gen
|
|
2
2
|
|
|
3
|
+
## 0.36.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 7e635e3: fix: remove testGroup usage inside test-gen and reporter
|
|
8
|
+
- Updated dependencies [7e635e3]
|
|
9
|
+
- @empiricalrun/reporter@0.21.3
|
|
10
|
+
|
|
11
|
+
## 0.36.2
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- 8c238de: fix: add trace for triggerHintsFlow
|
|
16
|
+
|
|
3
17
|
## 0.36.1
|
|
4
18
|
|
|
5
19
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,GAAG,EACH,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAYlD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAQrB,wBAAsB,aAAa,CAAC,EAClC,IAAI,EACJ,eAAe,EACf,aAAa,EACb,OAAO,EACP,KAAK,EACL,GAAG,EACH,OAAO,EACP,cAAc,EACd,uBAAuB,EACvB,OAAO,EACP,aAAa,EACb,QAAgB,GACjB,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,GAAG,EAAE,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,MAAM,CAAC;IACjC,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;CACnB,2FAwFA;AAGD,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA8SA"}
|
package/dist/agent/master/run.js
CHANGED
|
@@ -246,14 +246,25 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
|
|
|
246
246
|
let shouldTriggerHintsFlow;
|
|
247
247
|
let hintsExecutionCompletion;
|
|
248
248
|
if (useHints) {
|
|
249
|
+
const triggerHintsFlowSpan = trace?.span({
|
|
250
|
+
name: "trigger-hints-flow",
|
|
251
|
+
input: {
|
|
252
|
+
outputFromGetNextAction: output,
|
|
253
|
+
generatedAnnotations: annotations,
|
|
254
|
+
},
|
|
255
|
+
});
|
|
249
256
|
const result = await (0, with_hints_1.triggerHintsFlow)({
|
|
250
257
|
outputFromGetNextAction: output,
|
|
251
258
|
generatedAnnotations: annotations,
|
|
252
259
|
page,
|
|
253
260
|
llm,
|
|
261
|
+
trace: triggerHintsFlowSpan,
|
|
254
262
|
});
|
|
255
263
|
shouldTriggerHintsFlow = result.shouldTriggerHintsFlow;
|
|
256
264
|
hintsExecutionCompletion = result.hintsExecutionCompletion;
|
|
265
|
+
triggerHintsFlowSpan?.end({
|
|
266
|
+
output: result,
|
|
267
|
+
});
|
|
257
268
|
}
|
|
258
269
|
void testGenReporter.sendMessage(output.reason);
|
|
259
270
|
logger.log(`Next Action: ${output.action}`);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LLM } from "@empiricalrun/llm";
|
|
1
|
+
import { LLM, TraceClient } from "@empiricalrun/llm";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
import { Page } from "playwright";
|
|
4
4
|
import { BrowsingAgentOptions } from "../browsing";
|
|
@@ -8,7 +8,7 @@ export declare const getUserMessageWithForHints: ({ userMessage, options, pageSc
|
|
|
8
8
|
pageScreenshot: string;
|
|
9
9
|
annotatedPageScreenshot: string;
|
|
10
10
|
}) => string | OpenAI.ChatCompletionContentPart[];
|
|
11
|
-
export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnnotations, page, llm, }: {
|
|
11
|
+
export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnnotations, page, llm, trace, }: {
|
|
12
12
|
outputFromGetNextAction: {
|
|
13
13
|
action: string;
|
|
14
14
|
elementAnnotation?: string;
|
|
@@ -16,6 +16,7 @@ export declare const triggerHintsFlow: ({ outputFromGetNextAction, generatedAnno
|
|
|
16
16
|
generatedAnnotations: Record<string, any>;
|
|
17
17
|
page: Page;
|
|
18
18
|
llm: LLM;
|
|
19
|
+
trace?: TraceClient | undefined;
|
|
19
20
|
}) => Promise<{
|
|
20
21
|
shouldTriggerHintsFlow: boolean;
|
|
21
22
|
hintsExecutionCompletion: OpenAI.ChatCompletionMessage | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"with-hints.d.ts","sourceRoot":"","sources":["../../../src/agent/master/with-hints.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAC5B,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAIlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,0BAA0B;iBAMxB,OAAO,8BAA8B;;oBAElC,MAAM;6BACG,MAAM;MAC7B,MAAM,GAAG,OAAO,yBAAyB,EAiC5C,CAAC;AAEF,eAAO,MAAM,gBAAgB;6BAOF;QACvB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B;0BACqB,OAAO,MAAM,EAAE,GAAG,CAAC;UACnC,IAAI;SACL,GAAG;;MAEN,QAAQ;IACV,sBAAsB,EAAE,OAAO,CAAC;IAChC,wBAAwB,EAAE,OAAO,qBAAqB,GAAG,SAAS,CAAC;CACpE,CAqGA,CAAC"}
|
|
@@ -33,12 +33,18 @@ const getUserMessageWithForHints = ({ userMessage, options, pageScreenshot, anno
|
|
|
33
33
|
];
|
|
34
34
|
};
|
|
35
35
|
exports.getUserMessageWithForHints = getUserMessageWithForHints;
|
|
36
|
-
const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations, page, llm, }) => {
|
|
36
|
+
const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations, page, llm, trace, }) => {
|
|
37
37
|
try {
|
|
38
38
|
const hasElementAnnotation = outputFromGetNextAction?.elementAnnotation?.length &&
|
|
39
39
|
outputFromGetNextAction?.elementAnnotation?.trim()?.length &&
|
|
40
40
|
outputFromGetNextAction?.elementAnnotation in
|
|
41
41
|
(generatedAnnotations || {});
|
|
42
|
+
trace?.event({
|
|
43
|
+
name: "has-element-annotation",
|
|
44
|
+
output: {
|
|
45
|
+
hasElementAnnotation,
|
|
46
|
+
},
|
|
47
|
+
});
|
|
42
48
|
if (!hasElementAnnotation) {
|
|
43
49
|
return {
|
|
44
50
|
shouldTriggerHintsFlow: false,
|
|
@@ -79,9 +85,16 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
|
|
|
79
85
|
max_completion_tokens: 4000,
|
|
80
86
|
tool_choice: "required",
|
|
81
87
|
},
|
|
88
|
+
trace,
|
|
82
89
|
});
|
|
83
90
|
const canTriggerHintsFlow = completion?.tool_calls?.some((currentToolCall) => currentToolCall.function.name === "fill_input_element" ||
|
|
84
91
|
currentToolCall.function.name === "click_element");
|
|
92
|
+
trace?.event({
|
|
93
|
+
name: "can-trigger-hints-flow",
|
|
94
|
+
output: {
|
|
95
|
+
canTriggerHintsFlow,
|
|
96
|
+
},
|
|
97
|
+
});
|
|
85
98
|
if (!canTriggerHintsFlow) {
|
|
86
99
|
return {
|
|
87
100
|
shouldTriggerHintsFlow: false,
|
|
@@ -96,6 +109,13 @@ const triggerHintsFlow = async ({ outputFromGetNextAction, generatedAnnotations,
|
|
|
96
109
|
}
|
|
97
110
|
catch (e) {
|
|
98
111
|
console.error("Error in triggerHintsFlow", e);
|
|
112
|
+
trace?.event({
|
|
113
|
+
name: "trigger-hints-flow-error",
|
|
114
|
+
output: {
|
|
115
|
+
errorMessage: e?.message,
|
|
116
|
+
errorStack: e?.stack,
|
|
117
|
+
},
|
|
118
|
+
});
|
|
99
119
|
return {
|
|
100
120
|
shouldTriggerHintsFlow: false,
|
|
101
121
|
hintsExecutionCompletion: undefined,
|
package/dist/bin/index.js
CHANGED
|
@@ -103,7 +103,6 @@ async function runAgent(testGenConfig) {
|
|
|
103
103
|
(0, reporter_1.setReporterConfig)({
|
|
104
104
|
testCaseId: testGenConfig.testCase.id,
|
|
105
105
|
testCaseName: testGenConfig.testCase.name,
|
|
106
|
-
testGroup: testGenConfig.testCase.group,
|
|
107
106
|
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
108
107
|
testSessionId: testGenConfig.options?.metadata.testSessionId,
|
|
109
108
|
generationId: testGenConfig.options?.metadata.generationId,
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAOpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAQlC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAOpC,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,EAAE,SAAS,iBAsC3E"}
|
package/dist/index.js
CHANGED
|
@@ -22,7 +22,6 @@ async function createTest(task, page, scope) {
|
|
|
22
22
|
(0, reporter_1.setReporterConfig)({
|
|
23
23
|
testCaseId: testGenConfig.testCase.id,
|
|
24
24
|
testCaseName: testGenConfig.testCase.name,
|
|
25
|
-
testGroup: testGenConfig.testCase.group,
|
|
26
25
|
projectRepoName: testGenConfig.options?.metadata.projectRepoName,
|
|
27
26
|
testSessionId: testGenConfig.options?.metadata.testSessionId,
|
|
28
27
|
generationId: testGenConfig.options?.metadata.generationId,
|
package/dist/reporter/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;;IAE3B,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IAgDK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAmC9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY3C,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAWxD"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@empiricalrun/test-gen",
|
|
3
|
-
"version": "0.36.
|
|
3
|
+
"version": "0.36.3",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"registry": "https://registry.npmjs.org/",
|
|
6
6
|
"access": "public"
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
"typescript": "^5.3.3",
|
|
47
47
|
"@empiricalrun/llm": "^0.9.23",
|
|
48
48
|
"@empiricalrun/r2-uploader": "^0.3.6",
|
|
49
|
-
"@empiricalrun/reporter": "^0.21.
|
|
49
|
+
"@empiricalrun/reporter": "^0.21.3"
|
|
50
50
|
},
|
|
51
51
|
"devDependencies": {
|
|
52
52
|
"@types/detect-port": "^1.3.5",
|