@empiricalrun/test-gen 0.38.29 → 0.38.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.38.30
4
+
5
+ ### Patch Changes
6
+
7
+ - dbe713b: fix: auto dismiss popup at test runtime
8
+
3
9
  ## 0.38.29
4
10
 
5
11
  ### Patch Changes
@@ -1,6 +1,6 @@
1
1
  import { PlaywrightActionGenerator } from "../types";
2
2
  export declare const SKILL_USAGE = "skill_usage";
3
- type Skill = {
3
+ export type Skill = {
4
4
  testStep: string;
5
5
  filePath: string;
6
6
  usageExample: string;
@@ -1 +1 @@
1
- {"version":3,"file":"skill.d.ts","sourceRoot":"","sources":["../../src/actions/skill.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAGrD,eAAO,MAAM,WAAW,gBAAgB,CAAC;AAEzC,KAAK,KAAK,GAAG;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,cAAM,cAAc;IACN,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,KAAK,EAAE;IAEnC,kBAAkB;IAIlB,YAAY,CAAC,MAAM,EAAE,KAAK,EAAE;CAG7B;AAED,eAAO,MAAM,cAAc,gBAAyB,CAAC;AAErD,eAAO,MAAM,oBAAoB,EAAE,yBAmHlC,CAAC"}
1
+ {"version":3,"file":"skill.d.ts","sourceRoot":"","sources":["../../src/actions/skill.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAGrD,eAAO,MAAM,WAAW,gBAAgB,CAAC;AAEzC,MAAM,MAAM,KAAK,GAAG;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,cAAM,cAAc;IACN,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,KAAK,EAAE;IAEnC,kBAAkB;IAIlB,YAAY,CAAC,MAAM,EAAE,KAAK,EAAE;CAG7B;AAED,eAAO,MAAM,cAAc,gBAAyB,CAAC;AAErD,eAAO,MAAM,oBAAoB,EAAE,yBAmHlC,CAAC"}
@@ -0,0 +1,15 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { Page } from "playwright/test";
3
+ export declare function runtimePlannerWithScreenshot({ trace, task, conversation, pages, page, currentPage, }: {
4
+ trace?: TraceClient;
5
+ conversation: string[];
6
+ task: string;
7
+ pages?: Record<string, any>;
8
+ page: Page;
9
+ currentPage?: string;
10
+ }): Promise<{
11
+ pageName: string;
12
+ isDone: boolean;
13
+ reason: string;
14
+ }>;
15
+ //# sourceMappingURL=planner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"planner.d.ts","sourceRoot":"","sources":["../../../src/agent/master/planner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAGrD,OAAO,EAAE,IAAI,EAAE,MAAM,iBAAiB,CAAC;AAIvC,wBAAsB,4BAA4B,CAAC,EACjD,KAAK,EACL,IAAI,EACJ,YAAY,EACZ,KAAK,EACL,IAAI,EACJ,WAAW,GACZ,EAAE;IACD,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC5B,IAAI,EAAE,IAAI,CAAC;IACX,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;;;;GAwIA"}
@@ -0,0 +1,143 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.runtimePlannerWithScreenshot = void 0;
4
+ const llm_1 = require("@empiricalrun/llm");
5
+ const vision_1 = require("@empiricalrun/llm/vision");
6
+ const constants_1 = require("../../constants");
7
+ async function runtimePlannerWithScreenshot({ trace, task, conversation, pages, page, currentPage, }) {
8
+ const buffer = await page.screenshot({
9
+ //This is done to improve element annotation accuracy, anyways it doesn't annotate elements which are out of viewport
10
+ // fullPage: true,
11
+ // path: `screenshots/screenshot-${screenshotIndex++}.png`, // enable this and screenshotIndex var for local debugging
12
+ });
13
+ const pageBuffer = buffer.toString("base64");
14
+ const runTimePlannerSpan = trace?.span({
15
+ name: "popup-verifier",
16
+ input: {
17
+ task,
18
+ conversation,
19
+ },
20
+ });
21
+ const llm = new llm_1.LLM({ provider: "openai" });
22
+ const prompt = [
23
+ {
24
+ role: "system",
25
+ content: `
26
+ Given a successfully executed actions that lists only the actions that were successfully executed and a task comprising multiple actions, your goal is to analyse the list and determine if the entire task is completed.
27
+ These actions are executed by AI agents using Playwright on a browser. These agents already have access to browser tabs to execute actions. The successfully executed actions on browser post browser has opened, is provided to you as successfully executed actions.
28
+
29
+ You are also given a screenshot of the current screen which you can also use to determine whether the entire task is completed or not.
30
+
31
+ If the task is not fully completed, identify which specific actions are missing and suggest next steps to complete the task. Assume that the conversation provided is entirely truthful and no additional actions were performed beyond those listed.
32
+
33
+ To fulfil your goal, follow these steps:
34
+ - Divide the task into individual actions.
35
+ - Compare each task action against the actions listed in the successfully executed actions list.
36
+ - Identify which actions have been executed and which have not.
37
+ - If all actions are executed, respond with the task as done.
38
+ - If any actions are missing, respond with the task as not done, listing all actions and specifying which are complete and which are missing.
39
+ - If provided with list of pages, based on the next pending action and previously executed action, identify the page on which next action needs to be taken
40
+ `,
41
+ },
42
+ {
43
+ role: "user",
44
+ content: [
45
+ {
46
+ type: "text",
47
+ text: `
48
+ Task: ${task}
49
+
50
+ ----
51
+
52
+ Following are successfully executed actions:
53
+ ${conversation.join("\n")}
54
+
55
+ ----
56
+
57
+ Current page:
58
+ ${currentPage}
59
+ `,
60
+ },
61
+ {
62
+ type: "text",
63
+ text: "Page Screenshot",
64
+ },
65
+ {
66
+ type: "image_url",
67
+ image_url: {
68
+ url: (0, vision_1.imageFormatForProvider)(constants_1.DEFAULT_MODEL_PROVIDER, pageBuffer),
69
+ },
70
+ },
71
+ ],
72
+ },
73
+ ];
74
+ const response = await llm.createChatCompletion({
75
+ trace: runTimePlannerSpan,
76
+ traceName: "runtime-planner-llm",
77
+ model: "gpt-4o",
78
+ messages: prompt,
79
+ tools: [
80
+ {
81
+ type: "function",
82
+ function: {
83
+ name: "task_done",
84
+ description: "end the task by calling this method",
85
+ parameters: {
86
+ type: "object",
87
+ properties: {
88
+ actions: {
89
+ type: "string",
90
+ description: "actions extracted from task",
91
+ },
92
+ successful_actions: {
93
+ type: "string",
94
+ description: "successful actions mentioned in the conversation",
95
+ },
96
+ reason: {
97
+ type: "string",
98
+ description: "reasoning for identification of task status",
99
+ },
100
+ isDone: {
101
+ type: "boolean",
102
+ description: "whether the task is done",
103
+ },
104
+ pageName: {
105
+ type: "string",
106
+ enum: pages ? Object.keys(pages) : [],
107
+ description: "page name for the next action.",
108
+ },
109
+ },
110
+ required: ["isDone", "reason", "pageName"],
111
+ },
112
+ },
113
+ },
114
+ ],
115
+ modelParameters: {
116
+ tool_choice: "required",
117
+ temperature: 0.5,
118
+ },
119
+ });
120
+ const toolCallResp = (response?.tool_calls || [])[0];
121
+ if (toolCallResp) {
122
+ const toolCall = JSON.parse(toolCallResp.function.arguments);
123
+ const output = {
124
+ pageName: toolCall.pageName,
125
+ isDone: toolCall.isDone,
126
+ reason: toolCall.reason,
127
+ };
128
+ runTimePlannerSpan?.end({
129
+ output,
130
+ });
131
+ return output;
132
+ }
133
+ const output = {
134
+ pageName: "",
135
+ isDone: false,
136
+ reason: "LLM failed to generate a valid response",
137
+ };
138
+ runTimePlannerSpan?.end({
139
+ output,
140
+ });
141
+ return output;
142
+ }
143
+ exports.runtimePlannerWithScreenshot = runtimePlannerWithScreenshot;
@@ -4,7 +4,7 @@ import { BrowsingAgentOptions } from "../browsing";
4
4
  export declare function createTestUsingMasterAgent({ task, page, testCase, options, scopeVars, }: {
5
5
  task: string;
6
6
  page: Page;
7
- testCase: TestCase;
7
+ testCase?: TestCase;
8
8
  options: BrowsingAgentOptions;
9
9
  scopeVars?: ScopeVars;
10
10
  }): Promise<{
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAsBrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GA6VA"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/master/run.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAclC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EACL,oBAAoB,EAErB,MAAM,aAAa,CAAC;AAsBrB,wBAAsB,0BAA0B,CAAC,EAC/C,IAAI,EACJ,IAAI,EACJ,QAAQ,EACR,OAAO,EACP,SAAS,GACV,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,OAAO,EAAE,oBAAoB,CAAC;IAC9B,SAAS,CAAC,EAAE,SAAS,CAAC;CACvB;;;GAkWA"}
@@ -62,13 +62,16 @@ async function createTestUsingMasterAgent({ task, page, testCase, options, scope
62
62
  // we will be using google model for larger context window, in such cases 1 million tokens is not enough
63
63
  maxTokens: options.modelProvider === "google" ? 3000000 : 1000000,
64
64
  });
65
- //Fetching available skills
66
- const skills = await (0, skills_retriever_1.getAppropriateSkills)({
67
- testCase,
68
- trace,
69
- //@ts-ignore
70
- options,
71
- });
65
+ let skills = [];
66
+ if (testCase) {
67
+ //Fetching available skills
68
+ skills = await (0, skills_retriever_1.getAppropriateSkills)({
69
+ testCase,
70
+ trace,
71
+ //@ts-ignore
72
+ options,
73
+ });
74
+ }
72
75
  skill_1.testCaseSkills.updateSkills(skills);
73
76
  const actions = new actions_1.PlaywrightActions(testGenPage, scopeVars);
74
77
  await (0, utils_2.injectPwLocatorGenerator)(page);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.38.29",
3
+ "version": "0.38.30",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"
@@ -18,6 +18,14 @@
18
18
  "types": "./dist/types/index.d.ts",
19
19
  "default": "./dist/types/index.js"
20
20
  },
21
+ "./agent/master/run": {
22
+ "types": "./dist/agent/master/run.d.ts",
23
+ "default": "./dist/agent/master/run.js"
24
+ },
25
+ "./agent/master/planner": {
26
+ "types": "./dist/agent/master/planner.d.ts",
27
+ "default": "./dist/agent/master/planner.js"
28
+ },
21
29
  ".": {
22
30
  "types": "./dist/index.d.ts",
23
31
  "default": "./dist/index.js"