@empiricalrun/test-gen 0.31.3 → 0.31.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # @empiricalrun/test-gen
2
2
 
3
+ ## 0.31.6
4
+
5
+ ### Patch Changes
6
+
7
+ - 52cd5ee: feat: add o1 support for browsing agent
8
+
9
+ ## 0.31.5
10
+
11
+ ### Patch Changes
12
+
13
+ - 7cafaf6: fix: add logline for test gen failure
14
+
15
+ ## 0.31.4
16
+
17
+ ### Patch Changes
18
+
19
+ - 0d8e839: fix: no response issues in o1 models
20
+
3
21
  ## 0.31.3
4
22
 
5
23
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"assert.d.ts","sourceRoot":"","sources":["../../src/actions/assert.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,6CAA6C,2BAChC,CAAC;AAE3B,eAAO,MAAM,mCAAmC,EAAE,yBA8CjD,CAAC"}
1
+ {"version":3,"file":"assert.d.ts","sourceRoot":"","sources":["../../src/actions/assert.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,6CAA6C,2BAChC,CAAC;AAE3B,eAAO,MAAM,mCAAmC,EAAE,yBAoDjD,CAAC"}
@@ -7,7 +7,7 @@ exports.PLAYWRIGHT_ASSERT_TEXT_VISIBILITY_ACTION_NAME = "assert_text_visibility"
7
7
  const assertTextVisibilityActionGenerator = (page) => {
8
8
  return {
9
9
  execute: async (args) => {
10
- const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(args.css_selector, page);
10
+ const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(args.css_selector, args.xpath, page);
11
11
  const exec = new Function("page", `return page.${locator}.isVisible({ timeout: 3000 })`);
12
12
  await exec(page);
13
13
  return {
@@ -31,12 +31,16 @@ const assertTextVisibilityActionGenerator = (page) => {
31
31
  type: "string",
32
32
  description: "CSS selector to identify the element uniquely and click it. When creating CSS selectors, ensure they are unique and specific enough to select only one element, even if there are multiple elements of the same type (like multiple h1 elements)",
33
33
  },
34
+ xpath: {
35
+ type: "string",
36
+ description: "XPath selector to identify the element uniquely and click it. When creating XPATH selector, ensure they are unique and specific enough to select only one element, even if there are multiple elements of the same type (like multiple h1 elements)",
37
+ },
34
38
  reason: {
35
39
  type: "string",
36
40
  description: constants_1.DEFAULT_ACTION_REASON_PROMPT,
37
41
  },
38
42
  },
39
- required: ["css_selector", "reason"],
43
+ required: ["css_selector", "xpath", "reason"],
40
44
  },
41
45
  },
42
46
  },
@@ -1 +1 @@
1
- {"version":3,"file":"click.d.ts","sourceRoot":"","sources":["../../src/actions/click.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,4BAA4B,kBAAkB,CAAC;AAE5D,eAAO,MAAM,oBAAoB,EAAE,yBA8ClC,CAAC"}
1
+ {"version":3,"file":"click.d.ts","sourceRoot":"","sources":["../../src/actions/click.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,4BAA4B,kBAAkB,CAAC;AAE5D,eAAO,MAAM,oBAAoB,EAAE,yBAoDlC,CAAC"}
@@ -8,7 +8,7 @@ const clickActionGenerator = (page) => {
8
8
  return {
9
9
  execute: async (args) => {
10
10
  const selector = args.css_selector;
11
- const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(selector, page);
11
+ const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(selector, args.xpath, page);
12
12
  const exec = new Function("page", `return page.${locator}.click({ timeout: 3000 })`);
13
13
  await exec(page);
14
14
  await page.waitForTimeout(3000);
@@ -33,12 +33,16 @@ const clickActionGenerator = (page) => {
33
33
  type: "string",
34
34
  description: "CSS selector to identify the element uniquely and click it. When creating CSS selectors, ensure they are unique and specific enough to select only one element, even if there are multiple elements of the same type (like multiple h1 elements)",
35
35
  },
36
+ xpath: {
37
+ type: "string",
38
+ description: "XPath selector to identify the element uniquely and click it. When creating XPATH selector, ensure they are unique and specific enough to select only one element, even if there are multiple elements of the same type (like multiple h1 elements)",
39
+ },
36
40
  reason: {
37
41
  type: "string",
38
42
  description: constants_1.DEFAULT_ACTION_REASON_PROMPT,
39
43
  },
40
44
  },
41
- required: ["css_selector", "reason"],
45
+ required: ["css_selector", "xpath", "reason"],
42
46
  },
43
47
  },
44
48
  },
@@ -1 +1 @@
1
- {"version":3,"file":"fill.d.ts","sourceRoot":"","sources":["../../src/actions/fill.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,2BAA2B,uBAAuB,CAAC;AAGhE,eAAO,MAAM,mBAAmB,EAAE,yBAgEjC,CAAC"}
1
+ {"version":3,"file":"fill.d.ts","sourceRoot":"","sources":["../../src/actions/fill.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,2BAA2B,uBAAuB,CAAC;AAGhE,eAAO,MAAM,mBAAmB,EAAE,yBA+EjC,CAAC"}
@@ -11,7 +11,7 @@ const fillActionGenerator = (page, options) => {
11
11
  return {
12
12
  execute: async (args) => {
13
13
  const css = args.css_selector;
14
- const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(css, page);
14
+ const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(css, args.xpath, page);
15
15
  const textToFill = options?.stateVariables[args.variable_name] || args.text;
16
16
  const exec = new Function("page", `return page.${locator}.fill("${textToFill}", { timeout: 3000 })`);
17
17
  await exec(page);
@@ -51,12 +51,22 @@ const fillActionGenerator = (page, options) => {
51
51
  type: "string",
52
52
  description: "CSS selector to identify the element uniquely.When creating CSS selectors, ensure they are unique to the page and specific enough to select only one element.",
53
53
  },
54
+ xpath: {
55
+ type: "string",
56
+ description: "XPath selector to identify the element uniquely and click it. When creating XPATH selector, ensure they are unique and specific enough to select only one element, even if there are multiple elements of the same type (like multiple h1 elements)",
57
+ },
54
58
  reason: {
55
59
  type: "string",
56
60
  description: constants_1.DEFAULT_ACTION_REASON_PROMPT,
57
61
  },
58
62
  },
59
- required: ["variable_name", "text", "css_selector", "reason"],
63
+ required: [
64
+ "variable_name",
65
+ "text",
66
+ "css_selector",
67
+ "xpath",
68
+ "reason",
69
+ ],
60
70
  },
61
71
  },
62
72
  },
@@ -1 +1 @@
1
- {"version":3,"file":"text-content.d.ts","sourceRoot":"","sources":["../../src/actions/text-content.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,2BAA2B,iBAAiB,CAAC;AAE1D,eAAO,MAAM,0BAA0B,EAAE,yBAkDxC,CAAC"}
1
+ {"version":3,"file":"text-content.d.ts","sourceRoot":"","sources":["../../src/actions/text-content.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,MAAM,UAAU,CAAC;AAIrD,eAAO,MAAM,2BAA2B,iBAAiB,CAAC;AAE1D,eAAO,MAAM,0BAA0B,EAAE,yBA2DxC,CAAC"}
@@ -8,7 +8,7 @@ const textContentActionGenerator = (page, options) => {
8
8
  return {
9
9
  execute: async (args) => {
10
10
  const css = args.css_selector;
11
- const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(css, page);
11
+ const locator = await (0, utils_1.getPlaywrightLocatorUsingCssSelector)(css, args.xpath, page);
12
12
  const exec = new Function("page", `return page.${locator}.textContent()`);
13
13
  const value = (await exec(page));
14
14
  if (options) {
@@ -39,12 +39,16 @@ const textContentActionGenerator = (page, options) => {
39
39
  type: "string",
40
40
  description: "CSS selector to identify the element uniquely.When creating CSS selectors, ensure they are unique to the page and specific enough to select only one element.",
41
41
  },
42
+ xpath: {
43
+ type: "string",
44
+ description: "XPath selector to identify the element uniquely and click it. When creating XPATH selector, ensure they are unique and specific enough to select only one element, even if there are multiple elements of the same type (like multiple h1 elements)",
45
+ },
42
46
  reason: {
43
47
  type: "string",
44
48
  description: constants_1.DEFAULT_ACTION_REASON_PROMPT,
45
49
  },
46
50
  },
47
- required: ["variable_name", "css_selector", "reason"],
51
+ required: ["variable_name", "css_selector", "xpath", "reason"],
48
52
  },
49
53
  },
50
54
  },
@@ -5,6 +5,6 @@ declare global {
5
5
  jQuery: any;
6
6
  }
7
7
  }
8
- export declare function getPlaywrightLocatorUsingCssSelector(cssSelector: string, page: Page): Promise<any>;
8
+ export declare function getPlaywrightLocatorUsingCssSelector(cssSelector: string, xpath: string, page: Page): Promise<any>;
9
9
  export declare function getPageVarName(): string;
10
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/actions/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,CAAC,MAAM,CAAC;IACb,UAAU,MAAM;QACd,UAAU,EAAE,GAAG,CAAC;QAChB,MAAM,EAAE,GAAG,CAAC;KACb;CACF;AAED,wBAAsB,oCAAoC,CACxD,WAAW,EAAE,MAAM,EACnB,IAAI,EAAE,IAAI,gBAwBX;AAED,wBAAgB,cAAc,WAG7B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/actions/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,OAAO,CAAC,MAAM,CAAC;IACb,UAAU,MAAM;QACd,UAAU,EAAE,GAAG,CAAC;QAChB,MAAM,EAAE,GAAG,CAAC;KACb;CACF;AAED,wBAAsB,oCAAoC,CACxD,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,IAAI,gBA6CX;AAED,wBAAgB,cAAc,WAG7B"}
@@ -1,26 +1,43 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.getPageVarName = exports.getPlaywrightLocatorUsingCssSelector = void 0;
4
- async function getPlaywrightLocatorUsingCssSelector(cssSelector, page) {
4
+ async function getPlaywrightLocatorUsingCssSelector(cssSelector, xpath, page) {
5
5
  // TODO: analyse other solutions than just css. Also find other solutions to support :has-text -> prompting ?
6
6
  // jquery doesnt support :has-text. neither css. Only playwright locator supports this selector though.
7
7
  const sanitizedCssSelectorForJQuery = cssSelector
8
8
  .replaceAll(":has-text", ":contains")
9
9
  .replaceAll(":text", ":contains");
10
10
  return await page.evaluate((locator) => {
11
- const elements = window.jQuery(locator.cssForJq);
12
- let elIdx = 0;
13
- Array.from(elements).forEach((el, i) => {
14
- if (window.jQuery(el).is(":visible")) {
15
- elIdx = i;
16
- }
17
- });
18
- let selectedElem = elements[elIdx];
11
+ // @ts-ignore
12
+ function getElementByXpath(path) {
13
+ return document.evaluate(path, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
14
+ }
15
+ let selectedElem;
16
+ try {
17
+ selectedElem = getElementByXpath(locator.xpath);
18
+ }
19
+ catch (e) {
20
+ // ignore error
21
+ }
22
+ if (!selectedElem) {
23
+ const elements = window.jQuery(locator.cssForJq);
24
+ let elIdx = 0;
25
+ Array.from(elements).forEach((el, i) => {
26
+ if (window.jQuery(el).is(":visible")) {
27
+ elIdx = i;
28
+ }
29
+ });
30
+ //@ts-ignore
31
+ selectedElem = elements[elIdx];
32
+ }
33
+ else {
34
+ console.log("using xpath for locator");
35
+ }
19
36
  if (!selectedElem) {
20
37
  throw Error(`Unable to find element, css: ${locator.css}`);
21
38
  }
22
39
  return window.playwright.generateLocator(selectedElem);
23
- }, { css: cssSelector, cssForJq: sanitizedCssSelectorForJQuery });
40
+ }, { css: cssSelector, cssForJq: sanitizedCssSelectorForJQuery, xpath: xpath });
24
41
  }
25
42
  exports.getPlaywrightLocatorUsingCssSelector = getPlaywrightLocatorUsingCssSelector;
26
43
  function getPageVarName() {
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAMnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBAuO9B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAYlC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAOnD,KAAK,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG;IAC1D,YAAY,CAAC,EAAE;QACb,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC9B,CAAC;CACH,CAAC;AAEF,wBAAsB,6BAA6B,CACjD,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,IAAI,EACV,OAAO,EAAE,oBAAoB,mBAmP9B"}
@@ -14,6 +14,7 @@ const session_1 = require("../../session");
14
14
  const html_1 = require("../../utils/html");
15
15
  const run_1 = require("../master/run");
16
16
  const verification_1 = require("../verification");
17
+ const o1_completion_1 = require("./o1-completion");
17
18
  const utils_1 = require("./utils");
18
19
  async function browsingAgentUsingMasterAgent(task, page, options) {
19
20
  const logger = new logger_1.CustomLogger({ useReporter: false });
@@ -132,17 +133,27 @@ async function browsingAgentUsingMasterAgent(task, page, options) {
132
133
  promptType: "browsing-agent-as-tool",
133
134
  });
134
135
  promptSpan.end({ output: { messages } });
135
- const completion = await llm.createChatCompletion({
136
+ let completion;
137
+ completion = await (0, o1_completion_1.getO1Completion)({
138
+ //@ts-ignore
136
139
  messages,
137
140
  tools,
138
141
  trace: browsingAgentSpan,
139
- model: options.model || constants_1.DEFAULT_MODEL,
140
- modelParameters: {
141
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
142
- ...options.modelParameters,
143
- tool_choice: "required",
144
- },
145
142
  });
143
+ // If O1 completion fails due to any reason, resort to old flow
144
+ if (!completion) {
145
+ completion = await llm.createChatCompletion({
146
+ messages,
147
+ tools,
148
+ trace: browsingAgentSpan,
149
+ model: options.model || constants_1.DEFAULT_MODEL,
150
+ modelParameters: {
151
+ ...constants_1.DEFAULT_MODEL_PARAMETERS,
152
+ ...options.modelParameters,
153
+ tool_choice: "required",
154
+ },
155
+ });
156
+ }
146
157
  const toolCalls = completion?.tool_calls || [];
147
158
  // LLM might respond with empty tool_calls and we can go into endless loop
148
159
  // if we donot record this action and mark it as error
@@ -0,0 +1,8 @@
1
+ import { TraceClient } from "@empiricalrun/llm";
2
+ import { ChatCompletionMessage, ChatCompletionMessageParam, ChatCompletionTool } from "openai/resources/index.mjs";
3
+ export declare function getO1Completion({ messages, tools, trace, }: {
4
+ messages: ChatCompletionMessageParam[];
5
+ tools: ChatCompletionTool[];
6
+ trace?: TraceClient;
7
+ }): Promise<ChatCompletionMessage | undefined>;
8
+ //# sourceMappingURL=o1-completion.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"o1-completion.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/o1-completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAO,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,kBAAkB,EACnB,MAAM,4BAA4B,CAAC;AAKpC,wBAAsB,eAAe,CAAC,EACpC,QAAQ,EACR,KAAK,EACL,KAAK,GACN,EAAE;IACD,QAAQ,EAAE,0BAA0B,EAAE,CAAC;IACvC,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,KAAK,CAAC,EAAE,WAAW,CAAC;CACrB,GAAG,OAAO,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAiD7C"}
@@ -0,0 +1,60 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.getO1Completion = void 0;
7
+ const llm_1 = require("@empiricalrun/llm");
8
+ const remove_markdown_1 = __importDefault(require("remove-markdown"));
9
+ const constants_1 = require("../../constants");
10
+ async function getO1Completion({ messages, tools, trace, }) {
11
+ let completion;
12
+ try {
13
+ const o1Span = trace?.span({ name: "o1-response-span" });
14
+ const llm = new llm_1.LLM({
15
+ trace: o1Span,
16
+ provider: "openai",
17
+ defaultModel: "o1-mini",
18
+ providerApiKey: constants_1.MODEL_API_KEYS["openai"],
19
+ });
20
+ const [userInstruction] = messages.filter((s) => s.role === "user");
21
+ const [systemInstruction] = messages.filter((s) => s.role === "system");
22
+ userInstruction.content = `${systemInstruction?.content}
23
+
24
+ ${userInstruction?.content}
25
+
26
+ You need to respond with one of the following tool call with provided schema:
27
+ ${tools.map((tool) => JSON.stringify(tool)).join("\n --- \n")}
28
+
29
+ Do not respond with markdown, respond only with the JSON object.
30
+ Do not respond with any backticks.
31
+ `;
32
+ const response = (await llm.createChatCompletion({
33
+ messages: [userInstruction],
34
+ modelParameters: {
35
+ ...constants_1.DEFAULT_O1_MODEL_PARAMETERS,
36
+ },
37
+ }));
38
+ o1Span?.end({ output: response });
39
+ const toolResponseStr = (0, remove_markdown_1.default)(response.content);
40
+ const toolRespJSON = JSON.parse(toolResponseStr);
41
+ const tool = {
42
+ type: "function",
43
+ function: {
44
+ name: toolRespJSON.function.name,
45
+ arguments: JSON.stringify(toolRespJSON.function.parameters, null, 2),
46
+ },
47
+ };
48
+ completion = {
49
+ role: "assistant",
50
+ content: response.content,
51
+ tool_calls: [tool],
52
+ };
53
+ }
54
+ catch (e) {
55
+ console.error("O1 response error", e);
56
+ return undefined;
57
+ }
58
+ return completion;
59
+ }
60
+ exports.getO1Completion = getO1Completion;
@@ -1 +1 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAkBA,KAAK,iBAAiB,GAAG;IACvB;;;;OAIG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB;;;;OAIG;IACH,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC;AAEF;;;;;;;;GAQG;AACH,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,GACjB,EAAE,iBAAiB,iBAuDnB"}
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/agent/browsing/run.ts"],"names":[],"mappings":"AAkBA,KAAK,iBAAiB,GAAG;IACvB;;;;OAIG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB;;;;OAIG;IACH,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC;AAEF;;;;;;;;GAQG;AACH,wBAAsB,6BAA6B,CAAC,EAClD,YAAY,EACZ,gBAAgB,GACjB,EAAE,iBAAiB,iBA+DnB"}
@@ -42,7 +42,7 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, }
42
42
  if (!process.env.CI) {
43
43
  command = command.concat(` --headed`);
44
44
  }
45
- let isError = false;
45
+ let isError = false, error = "";
46
46
  try {
47
47
  await (0, exec_1.cmd)(command.split(" "), {
48
48
  env: {
@@ -55,9 +55,11 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, }
55
55
  });
56
56
  }
57
57
  catch (e) {
58
- console.error(e);
58
+ error = `Failed to generate test using master agent ${e}`;
59
+ console.error(error);
59
60
  isError = true;
60
61
  }
62
+ // clean up the file if there is any error
61
63
  if (isError) {
62
64
  try {
63
65
  const fileContent = await fs_extra_1.default.readFile(filePathToUpdate, "utf-8");
@@ -69,6 +71,11 @@ async function generateTestsUsingMasterAgent({ testFilePath, filePathToUpdate, }
69
71
  console.error("Failed to remove extra scripts from files post test gen error", e);
70
72
  }
71
73
  }
74
+ // remove the test only from the file
72
75
  await (0, web_1.removeTestOnly)(testFilePath);
76
+ if (isError) {
77
+ // throw the error because of which test gen failed
78
+ throw Error(error);
79
+ }
73
80
  }
74
81
  exports.generateTestsUsingMasterAgent = generateTestsUsingMasterAgent;
@@ -1 +1 @@
1
- {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAoB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,GACvB,OAAO,CAAC,eAAe,EAAE,CAAC,CAuF5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAmF7B"}
1
+ {"version":3,"file":"update-flow.d.ts","sourceRoot":"","sources":["../../../src/agent/codegen/update-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAKL,WAAW,EACZ,MAAM,mBAAmB,CAAC;AAqB3B,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAI7D,KAAK,eAAe,GAAG,QAAQ,GAAG;IAChC,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB,CAAC;AAqIF,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,QAAQ,EAClB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,oBAAoB,GAAG,SAAS,EACzC,OAAO,GAAE,OAAc,EACvB,QAAQ,GAAE,OAAc,GACvB,OAAO,CAAC,eAAe,EAAE,CAAC,CAuF5B;AAED,wBAAsB,qBAAqB,CAAC,EAC1C,QAAQ,EACR,IAAI,EACJ,OAAO,EACP,KAAK,EACL,aAAoB,GACrB,EAAE;IACD,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,oBAAoB,CAAC;IAC/B,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAiF7B"}
@@ -232,15 +232,14 @@ async function appendCreateTestBlock({ testCase, file, options, trace, validateT
232
232
  const llm = new llm_1.LLM({
233
233
  trace,
234
234
  provider: "openai",
235
- defaultModel: "o1-preview",
236
- providerApiKey: constants_1.MODEL_API_KEYS[options?.modelProvider || constants_1.DEFAULT_MODEL_PROVIDER],
235
+ defaultModel: "o1-mini",
236
+ providerApiKey: constants_1.MODEL_API_KEYS["openai"],
237
237
  });
238
238
  const firstShotMessage = await llm.createChatCompletion({
239
239
  messages: [userInstruction],
240
240
  modelParameters: {
241
- ...constants_1.DEFAULT_MODEL_PARAMETERS,
242
241
  ...options?.modelParameters,
243
- temperature: 1,
242
+ ...constants_1.DEFAULT_O1_MODEL_PARAMETERS,
244
243
  },
245
244
  });
246
245
  let response = firstShotMessage?.content || "";
package/dist/bin/index.js CHANGED
@@ -71,7 +71,7 @@ async function runAgent(testGenConfig) {
71
71
  }
72
72
  catch (e) {
73
73
  testGenFailed = true;
74
- logger.error("Failed to run agent for the scenario", e);
74
+ new logger_1.CustomLogger().error(`Failed to run agent for the scenario`, e, process.env.LOG_URL ? `[view logs](${process.env.LOG_URL})` : "");
75
75
  }
76
76
  // TODO: move these reporters to a better lifecycle
77
77
  await (0, ci_1.reportOnCI)(testGenConfig.testCase);
@@ -33,7 +33,7 @@ class CustomLogger {
33
33
  this.logToReporter(message);
34
34
  }
35
35
  error(message, ...optionalParams) {
36
- console.log("🚨", (0, picocolors_1.red)(message), ...optionalParams);
36
+ console.error("🚨", (0, picocolors_1.red)(message), ...optionalParams);
37
37
  this.logToReporter(message);
38
38
  }
39
39
  logEmptyLine() {
@@ -3,4 +3,5 @@ export declare const MODEL_API_KEYS: Record<LLMProvider, string>;
3
3
  export declare const DEFAULT_MODEL_PROVIDER: LLMProvider;
4
4
  export declare const DEFAULT_MODEL: LLMModel;
5
5
  export declare const DEFAULT_MODEL_PARAMETERS: ModelParameters;
6
+ export declare const DEFAULT_O1_MODEL_PARAMETERS: ModelParameters;
6
7
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/constants/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAE3E,eAAO,MAAM,cAAc,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAItD,CAAC;AAEF,eAAO,MAAM,sBAAsB,EAAE,WAAsB,CAAC;AAE5D,eAAO,MAAM,aAAa,EAAE,QAA8B,CAAC;AAE3D,eAAO,MAAM,wBAAwB,EAAE,eAGtC,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/constants/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAE3E,eAAO,MAAM,cAAc,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAItD,CAAC;AAEF,eAAO,MAAM,sBAAsB,EAAE,WAAsB,CAAC;AAE5D,eAAO,MAAM,aAAa,EAAE,QAA8B,CAAC;AAE3D,eAAO,MAAM,wBAAwB,EAAE,eAGtC,CAAC;AAEF,eAAO,MAAM,2BAA2B,EAAE,eAGzC,CAAC"}
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DEFAULT_MODEL_PARAMETERS = exports.DEFAULT_MODEL = exports.DEFAULT_MODEL_PROVIDER = exports.MODEL_API_KEYS = void 0;
3
+ exports.DEFAULT_O1_MODEL_PARAMETERS = exports.DEFAULT_MODEL_PARAMETERS = exports.DEFAULT_MODEL = exports.DEFAULT_MODEL_PROVIDER = exports.MODEL_API_KEYS = void 0;
4
4
  exports.MODEL_API_KEYS = {
5
5
  google: process.env.GOOGLE_API_KEY || "",
6
6
  anthropic: process.env.ANTHROPIC_API_KEY || "",
@@ -12,3 +12,7 @@ exports.DEFAULT_MODEL_PARAMETERS = {
12
12
  temperature: 0.5,
13
13
  max_completion_tokens: 4000,
14
14
  };
15
+ exports.DEFAULT_O1_MODEL_PARAMETERS = {
16
+ temperature: 1,
17
+ max_completion_tokens: 35000,
18
+ };
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;;IAE3B,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IA6CK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA6B9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAUlD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporter/index.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAa5E,KAAK,kBAAkB,GAAG;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAKF,wBAAgB,WAAW,IAAI,QAAQ,GAAG,SAAS,CAUlD;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI,CAGlE;AAED,qBAAa,sBAAsB;;IAE3B,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK1C,eAAe,CAAC,EACpB,eAAe,EACf,QAAQ,GACT,EAAE;QACD,eAAe,EAAE,MAAM,CAAC;QACxB,QAAQ,EAAE,MAAM,CAAC;KAClB;IA8CK,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkC9C,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAUlD"}
@@ -50,6 +50,7 @@ class TestGenUpdatesReporter {
50
50
  try {
51
51
  if (!(0, uploader_1.checkIfResultsUploadAllowed)()) {
52
52
  logger.log("Skipped uploading generated test video");
53
+ return;
53
54
  }
54
55
  console.log("Uploading assets for the test genearation");
55
56
  const { videoUrls, traceFiles } = await (0, uploader_1.uploadTestResultsUsingPrjRepo)({
@@ -87,6 +88,11 @@ class TestGenUpdatesReporter {
87
88
  }
88
89
  }
89
90
  async sendCurrentView(buffer) {
91
+ const logger = new logger_1.CustomLogger();
92
+ if (!(0, uploader_1.checkIfResultsUploadAllowed)()) {
93
+ logger.log("Skipped uploading current view screenshot");
94
+ return;
95
+ }
90
96
  // upload current screenshot to r2 and report it to reporter
91
97
  if (!fs_extra_1.default.existsSync(path_1.default.join(process.cwd(), "gen-assets"))) {
92
98
  await fs_extra_1.default.mkdir((process.cwd(), "gen-assets"));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@empiricalrun/test-gen",
3
- "version": "0.31.3",
3
+ "version": "0.31.6",
4
4
  "publishConfig": {
5
5
  "registry": "https://registry.npmjs.org/",
6
6
  "access": "public"