@midscene/core 0.18.1-beta-20250611081529.0 → 0.18.1-beta-20250612055514.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/es/ai-model.d.ts +53 -5
  2. package/dist/es/ai-model.js +9 -1
  3. package/dist/es/{chunk-CLWM3F4J.js → chunk-EYIL4AHP.js} +340 -4
  4. package/dist/es/chunk-EYIL4AHP.js.map +1 -0
  5. package/dist/es/{chunk-TO33YH6H.js → chunk-GVJFQT7E.js} +3 -3
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +16 -19
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-45954424.d.ts → llm-planning-a951deb9.d.ts} +2 -2
  10. package/dist/es/{types-e7be1eb0.d.ts → types-dce56c26.d.ts} +1 -2
  11. package/dist/es/utils.d.ts +1 -1
  12. package/dist/es/utils.js +1 -1
  13. package/dist/lib/ai-model.d.ts +53 -5
  14. package/dist/lib/ai-model.js +10 -2
  15. package/dist/lib/{chunk-CLWM3F4J.js → chunk-EYIL4AHP.js} +361 -25
  16. package/dist/lib/chunk-EYIL4AHP.js.map +1 -0
  17. package/dist/lib/{chunk-TO33YH6H.js → chunk-GVJFQT7E.js} +3 -3
  18. package/dist/lib/index.d.ts +4 -4
  19. package/dist/lib/index.js +28 -31
  20. package/dist/lib/index.js.map +1 -1
  21. package/dist/lib/{llm-planning-45954424.d.ts → llm-planning-a951deb9.d.ts} +2 -2
  22. package/dist/{types/types-e7be1eb0.d.ts → lib/types-dce56c26.d.ts} +1 -2
  23. package/dist/lib/utils.d.ts +1 -1
  24. package/dist/lib/utils.js +2 -2
  25. package/dist/types/ai-model.d.ts +53 -5
  26. package/dist/types/index.d.ts +4 -4
  27. package/dist/types/{llm-planning-45954424.d.ts → llm-planning-a951deb9.d.ts} +2 -2
  28. package/dist/{lib/types-e7be1eb0.d.ts → types/types-dce56c26.d.ts} +1 -2
  29. package/dist/types/utils.d.ts +1 -1
  30. package/package.json +4 -3
  31. package/dist/es/chunk-CLWM3F4J.js.map +0 -1
  32. package/dist/lib/chunk-CLWM3F4J.js.map +0 -1
  33. /package/dist/es/{chunk-TO33YH6H.js.map → chunk-GVJFQT7E.js.map} +0 -0
  34. /package/dist/lib/{chunk-TO33YH6H.js.map → chunk-GVJFQT7E.js.map} +0 -0
@@ -1,13 +1,19 @@
1
- import { l as AIUsageInfo, T as PlanningAction, j as MidsceneYamlFlowItem } from './types-e7be1eb0.js';
1
+ import { l as AIUsageInfo, T as PlanningAction, j as MidsceneYamlFlowItem } from './types-dce56c26.js';
2
+ import OpenAI from 'openai';
2
3
  import { ChatCompletionMessageParam } from 'openai/resources';
3
4
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { b as AIActionType } from './llm-planning-45954424.js';
5
- export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-45954424.js';
5
+ import { b as AIActionType } from './llm-planning-a951deb9.js';
6
+ export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-a951deb9.js';
6
7
  import { vlLocateMode } from '@midscene/shared/env';
8
+ import { ChromeRecordedEvent as ChromeRecordedEvent$1 } from '@midscene/recorder';
7
9
  import { actionParser } from '@ui-tars/action-parser';
8
10
  import { Size } from '@midscene/shared/types';
9
11
  import '@midscene/shared/constants';
10
12
 
13
+ declare function call(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, responseFormat?: OpenAI.ChatCompletionCreateParams['response_format'] | OpenAI.ResponseFormatJSONObject): Promise<{
14
+ content: string;
15
+ usage?: AIUsageInfo;
16
+ }>;
11
17
  declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
12
18
  content: T;
13
19
  usage?: AIUsageInfo;
@@ -15,6 +21,48 @@ declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[],
15
21
 
16
22
  declare function systemPromptToLocateElement(vlMode: ReturnType<typeof vlLocateMode>): string;
17
23
 
24
+ interface PlaywrightGenerationOptions {
25
+ testName?: string;
26
+ includeScreenshots?: boolean;
27
+ includeTimestamps?: boolean;
28
+ maxScreenshots?: number;
29
+ description?: string;
30
+ viewportSize?: {
31
+ width: number;
32
+ height: number;
33
+ };
34
+ waitForNetworkIdle?: boolean;
35
+ waitForNetworkIdleTimeout?: number;
36
+ }
37
+ interface ChromeRecordedEvent {
38
+ type: string;
39
+ timestamp: number;
40
+ url?: string;
41
+ title?: string;
42
+ elementDescription?: string;
43
+ value?: string;
44
+ pageInfo?: any;
45
+ elementRect?: any;
46
+ screenshotBefore?: string;
47
+ screenshotAfter?: string;
48
+ screenshotWithBox?: string;
49
+ }
50
+ /**
51
+ * Generates Playwright test code from recorded events
52
+ */
53
+ declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options?: PlaywrightGenerationOptions) => Promise<string>;
54
+
55
+ interface YamlGenerationOptions {
56
+ testName?: string;
57
+ includeTimestamps?: boolean;
58
+ maxScreenshots?: number;
59
+ description?: string;
60
+ }
61
+ /**
62
+ * Generates YAML test configuration from recorded events using AI
63
+ */
64
+ declare const generateYamlTest: (events: ChromeRecordedEvent$1[], options?: YamlGenerationOptions) => Promise<string>;
65
+
18
66
  declare function vlmPlanning(options: {
19
67
  userInstruction: string;
20
68
  conversationHistory: ChatCompletionMessageParam[];
@@ -26,8 +74,8 @@ declare function vlmPlanning(options: {
26
74
  actions: PlanningAction<any>[];
27
75
  actionsFromModel: ReturnType<typeof actionParser>['parsed'];
28
76
  action_summary: string;
29
- yamlFlow: MidsceneYamlFlowItem[];
77
+ yamlFlow?: MidsceneYamlFlowItem[];
30
78
  }>;
31
79
  declare function resizeImageForUiTars(imageBase64: string, size: Size): Promise<string>;
32
80
 
33
- export { callToGetJSONObject, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
81
+ export { AIActionType, call as callAi, callToGetJSONObject, generatePlaywrightTest, generateYamlTest, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
@@ -1,28 +1,36 @@
1
1
  import {
2
+ AIActionType,
2
3
  AiAssert,
3
4
  AiExtractElementInfo,
4
5
  AiLocateElement,
5
6
  AiLocateSection,
6
7
  adaptBboxToRect,
8
+ call,
7
9
  callAiFn,
8
10
  callToGetJSONObject,
9
11
  describeUserPage,
10
12
  elementByPositionWithElementInfo,
13
+ generatePlaywrightTest,
14
+ generateYamlTest,
11
15
  plan,
12
16
  resizeImageForUiTars,
13
17
  systemPromptToLocateElement,
14
18
  vlmPlanning
15
- } from "./chunk-CLWM3F4J.js";
19
+ } from "./chunk-EYIL4AHP.js";
16
20
  export {
21
+ AIActionType,
17
22
  AiAssert,
18
23
  AiExtractElementInfo,
19
24
  AiLocateElement,
20
25
  AiLocateSection,
21
26
  adaptBboxToRect,
27
+ call as callAi,
22
28
  callAiFn,
23
29
  callToGetJSONObject,
24
30
  describeUserPage,
25
31
  elementByPositionWithElementInfo,
32
+ generatePlaywrightTest,
33
+ generateYamlTest,
26
34
  plan,
27
35
  resizeImageForUiTars,
28
36
  systemPromptToLocateElement,
@@ -47,6 +47,14 @@ import { vlLocateMode } from "@midscene/shared/env";
47
47
  import { treeToList } from "@midscene/shared/extractor";
48
48
  import { compositeElementInfoImg } from "@midscene/shared/img";
49
49
  import { getDebug } from "@midscene/shared/logger";
50
+ var AIActionType = /* @__PURE__ */ ((AIActionType2) => {
51
+ AIActionType2[AIActionType2["ASSERT"] = 0] = "ASSERT";
52
+ AIActionType2[AIActionType2["INSPECT_ELEMENT"] = 1] = "INSPECT_ELEMENT";
53
+ AIActionType2[AIActionType2["EXTRACT_DATA"] = 2] = "EXTRACT_DATA";
54
+ AIActionType2[AIActionType2["PLAN"] = 3] = "PLAN";
55
+ AIActionType2[AIActionType2["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
56
+ return AIActionType2;
57
+ })(AIActionType || {});
50
58
  async function callAiFn(msgs, AIActionTypeValue) {
51
59
  assert(
52
60
  checkAIConfig(),
@@ -694,6 +702,11 @@ async function describeUserPage(context, opt) {
694
702
  const treeRoot = context.tree;
695
703
  const idElementMap = {};
696
704
  const flatElements = treeToList2(treeRoot);
705
+ if (opt?.domIncluded === true && flatElements.length >= 5e3) {
706
+ console.warn(
707
+ 'The number of elements is too large, it may cause the prompt to be too long, please use domIncluded: "visible-only" to reduce the number of elements'
708
+ );
709
+ }
697
710
  flatElements.forEach((element) => {
698
711
  idElementMap[element.id] = element;
699
712
  if (typeof element.indexId !== "undefined") {
@@ -701,12 +714,13 @@ async function describeUserPage(context, opt) {
701
714
  }
702
715
  });
703
716
  let pageDescription = "";
717
+ const visibleOnly = opt?.visibleOnly ?? opt?.domIncluded === "visible-only";
704
718
  if (opt?.domIncluded) {
705
719
  const contentTree = await descriptionOfTree(
706
720
  treeRoot,
707
721
  opt?.truncateTextLength,
708
722
  opt?.filterNonTextContent,
709
- opt?.visibleOnly
723
+ visibleOnly
710
724
  );
711
725
  const sizeDescription = describeSize({ width, height });
712
726
  pageDescription = `The size of the page: ${sizeDescription}
@@ -1448,6 +1462,325 @@ function safeParseJson(input) {
1448
1462
  throw Error(`failed to parse json response: ${input}`);
1449
1463
  }
1450
1464
 
1465
+ // src/ai-model/prompt/playwright-generator.ts
1466
+ import { PLAYWRIGHT_EXAMPLE_CODE } from "@midscene/shared/constants";
1467
+ var getScreenshotsForLLM = (events, maxScreenshots = 1) => {
1468
+ const eventsWithScreenshots = events.filter(
1469
+ (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
1470
+ );
1471
+ const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
1472
+ if (a.type === "navigation" && b.type !== "navigation")
1473
+ return -1;
1474
+ if (a.type !== "navigation" && b.type === "navigation")
1475
+ return 1;
1476
+ if (a.type === "click" && b.type !== "click")
1477
+ return -1;
1478
+ if (a.type !== "click" && b.type === "click")
1479
+ return 1;
1480
+ return 0;
1481
+ });
1482
+ const screenshots = [];
1483
+ for (const event of sortedEvents) {
1484
+ const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
1485
+ if (screenshot && !screenshots.includes(screenshot)) {
1486
+ screenshots.push(screenshot);
1487
+ if (screenshots.length >= maxScreenshots)
1488
+ break;
1489
+ }
1490
+ }
1491
+ return screenshots;
1492
+ };
1493
+ var filterEventsByType = (events) => {
1494
+ return {
1495
+ navigationEvents: events.filter((event) => event.type === "navigation"),
1496
+ clickEvents: events.filter((event) => event.type === "click"),
1497
+ inputEvents: events.filter((event) => event.type === "input"),
1498
+ scrollEvents: events.filter((event) => event.type === "scroll")
1499
+ };
1500
+ };
1501
+ var createEventCounts = (filteredEvents, totalEvents) => {
1502
+ return {
1503
+ navigation: filteredEvents.navigationEvents.length,
1504
+ click: filteredEvents.clickEvents.length,
1505
+ input: filteredEvents.inputEvents.length,
1506
+ scroll: filteredEvents.scrollEvents.length,
1507
+ total: totalEvents
1508
+ };
1509
+ };
1510
+ var extractInputDescriptions = (inputEvents) => {
1511
+ return inputEvents.map((event) => ({
1512
+ description: event.elementDescription || "",
1513
+ value: event.value || ""
1514
+ })).filter((item) => item.description && item.value);
1515
+ };
1516
+ var processEventsForLLM = (events) => {
1517
+ return events.map((event) => ({
1518
+ type: event.type,
1519
+ timestamp: event.timestamp,
1520
+ url: event.url,
1521
+ title: event.title,
1522
+ elementDescription: event.elementDescription,
1523
+ value: event.value,
1524
+ pageInfo: event.pageInfo,
1525
+ elementRect: event.elementRect
1526
+ }));
1527
+ };
1528
+ var prepareEventSummary = (events, options = {}) => {
1529
+ const filteredEvents = filterEventsByType(events);
1530
+ const eventCounts = createEventCounts(filteredEvents, events.length);
1531
+ const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1532
+ const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1533
+ const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1534
+ const inputDescriptions = extractInputDescriptions(
1535
+ filteredEvents.inputEvents
1536
+ ).slice(0, 10);
1537
+ const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
1538
+ const processedEvents = processEventsForLLM(events);
1539
+ return {
1540
+ testName: options.testName || "Automated test from recorded events",
1541
+ startUrl,
1542
+ eventCounts,
1543
+ pageTitles,
1544
+ urls,
1545
+ clickDescriptions,
1546
+ inputDescriptions,
1547
+ events: processedEvents
1548
+ };
1549
+ };
1550
+ var createMessageContent = (promptText, screenshots = [], includeScreenshots = true) => {
1551
+ const messageContent = [
1552
+ {
1553
+ type: "text",
1554
+ text: promptText
1555
+ }
1556
+ ];
1557
+ if (includeScreenshots && screenshots.length > 0) {
1558
+ messageContent.unshift({
1559
+ type: "text",
1560
+ text: "Here are screenshots from the recording session to help you understand the context:"
1561
+ });
1562
+ screenshots.forEach((screenshot) => {
1563
+ messageContent.push({
1564
+ type: "image_url",
1565
+ image_url: {
1566
+ url: screenshot
1567
+ }
1568
+ });
1569
+ });
1570
+ }
1571
+ return messageContent;
1572
+ };
1573
+ var validateEvents = (events) => {
1574
+ if (!events.length) {
1575
+ throw new Error("No events provided for test generation");
1576
+ }
1577
+ };
1578
+ var generatePlaywrightTest = async (events, options = {}) => {
1579
+ validateEvents(events);
1580
+ const summary = prepareEventSummary(events, {
1581
+ testName: options.testName,
1582
+ maxScreenshots: options.maxScreenshots || 3
1583
+ });
1584
+ const playwrightSummary = {
1585
+ ...summary,
1586
+ waitForNetworkIdle: options.waitForNetworkIdle !== false,
1587
+ waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2e3,
1588
+ viewportSize: options.viewportSize || { width: 1280, height: 800 }
1589
+ };
1590
+ const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
1591
+ const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
1592
+
1593
+ Event Summary:
1594
+ ${JSON.stringify(playwrightSummary, null, 2)}
1595
+
1596
+ Generated code should:
1597
+ 1. Import required dependencies
1598
+ 2. Set up the test with proper configuration
1599
+ 3. Include a beforeEach hook to navigate to the starting URL
1600
+ 4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
1601
+ 5. Include appropriate assertions and validations
1602
+ 6. Follow best practices for Playwright tests
1603
+ 7. Be ready to execute without further modification
1604
+
1605
+ Respond ONLY with the complete Playwright test code, no explanations.`;
1606
+ const messageContent = createMessageContent(
1607
+ promptText,
1608
+ screenshots,
1609
+ options.includeScreenshots !== false
1610
+ );
1611
+ const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
1612
+ Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
1613
+
1614
+ ${PLAYWRIGHT_EXAMPLE_CODE}`;
1615
+ const prompt = [
1616
+ {
1617
+ role: "system",
1618
+ content: systemPrompt
1619
+ },
1620
+ {
1621
+ role: "user",
1622
+ content: messageContent
1623
+ }
1624
+ ];
1625
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1626
+ if (response?.content && typeof response.content === "string") {
1627
+ return response.content;
1628
+ }
1629
+ throw new Error("Failed to generate Playwright test code");
1630
+ };
1631
+
1632
+ // src/ai-model/prompt/yaml-generator.ts
1633
+ import { YAML_EXAMPLE_CODE } from "@midscene/shared/constants";
1634
+ var getScreenshotsForLLM2 = (events, maxScreenshots = 1) => {
1635
+ const eventsWithScreenshots = events.filter(
1636
+ (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
1637
+ );
1638
+ const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
1639
+ if (a.type === "navigation" && b.type !== "navigation")
1640
+ return -1;
1641
+ if (a.type !== "navigation" && b.type === "navigation")
1642
+ return 1;
1643
+ if (a.type === "click" && b.type !== "click")
1644
+ return -1;
1645
+ if (a.type !== "click" && b.type === "click")
1646
+ return 1;
1647
+ return 0;
1648
+ });
1649
+ const screenshots = [];
1650
+ for (const event of sortedEvents) {
1651
+ const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
1652
+ if (screenshot && !screenshots.includes(screenshot)) {
1653
+ screenshots.push(screenshot);
1654
+ if (screenshots.length >= maxScreenshots)
1655
+ break;
1656
+ }
1657
+ }
1658
+ return screenshots;
1659
+ };
1660
+ var filterEventsByType2 = (events) => {
1661
+ return {
1662
+ navigationEvents: events.filter((event) => event.type === "navigation"),
1663
+ clickEvents: events.filter((event) => event.type === "click"),
1664
+ inputEvents: events.filter((event) => event.type === "input"),
1665
+ scrollEvents: events.filter((event) => event.type === "scroll")
1666
+ };
1667
+ };
1668
+ var createEventCounts2 = (filteredEvents, totalEvents) => {
1669
+ return {
1670
+ navigation: filteredEvents.navigationEvents.length,
1671
+ click: filteredEvents.clickEvents.length,
1672
+ input: filteredEvents.inputEvents.length,
1673
+ scroll: filteredEvents.scrollEvents.length,
1674
+ total: totalEvents
1675
+ };
1676
+ };
1677
+ var extractInputDescriptions2 = (inputEvents) => {
1678
+ return inputEvents.map((event) => ({
1679
+ description: event.elementDescription || "",
1680
+ value: event.value || ""
1681
+ })).filter((item) => item.description && item.value);
1682
+ };
1683
+ var processEventsForLLM2 = (events) => {
1684
+ return events.map((event) => ({
1685
+ type: event.type,
1686
+ timestamp: event.timestamp,
1687
+ url: event.url,
1688
+ title: event.title,
1689
+ elementDescription: event.elementDescription,
1690
+ value: event.value,
1691
+ pageInfo: event.pageInfo,
1692
+ elementRect: event.elementRect
1693
+ }));
1694
+ };
1695
+ var prepareEventSummary2 = (events, options = {}) => {
1696
+ const filteredEvents = filterEventsByType2(events);
1697
+ const eventCounts = createEventCounts2(filteredEvents, events.length);
1698
+ const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
1699
+ const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
1700
+ const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
1701
+ const inputDescriptions = extractInputDescriptions2(
1702
+ filteredEvents.inputEvents
1703
+ ).slice(0, 10);
1704
+ const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
1705
+ const processedEvents = processEventsForLLM2(events);
1706
+ return {
1707
+ testName: options.testName || "Automated test from recorded events",
1708
+ startUrl,
1709
+ eventCounts,
1710
+ pageTitles,
1711
+ urls,
1712
+ clickDescriptions,
1713
+ inputDescriptions,
1714
+ events: processedEvents
1715
+ };
1716
+ };
1717
+ var validateEvents2 = (events) => {
1718
+ if (!events.length) {
1719
+ throw new Error("No events provided for test generation");
1720
+ }
1721
+ };
1722
+ var generateYamlTest = async (events, options = {}) => {
1723
+ try {
1724
+ validateEvents2(events);
1725
+ const summary = prepareEventSummary2(events, {
1726
+ testName: options.testName,
1727
+ maxScreenshots: options.maxScreenshots || 3
1728
+ });
1729
+ const yamlSummary = {
1730
+ ...summary,
1731
+ includeTimestamps: options.includeTimestamps || false
1732
+ };
1733
+ const screenshots = getScreenshotsForLLM2(
1734
+ events,
1735
+ options.maxScreenshots || 3
1736
+ );
1737
+ const prompt = [
1738
+ {
1739
+ role: "system",
1740
+ content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`
1741
+ },
1742
+ {
1743
+ role: "user",
1744
+ content: `Generate YAML test for Midscene.js automation from recorded browser events.
1745
+
1746
+ Event Summary:
1747
+ ${JSON.stringify(yamlSummary, null, 2)}
1748
+
1749
+ Convert events:
1750
+ - navigation → target.url
1751
+ - click → aiTap with element description
1752
+ - input → aiInput with value and locate
1753
+ - scroll → aiScroll with appropriate direction
1754
+ - Add aiAssert for important state changes
1755
+
1756
+ Respond with YAML only, no explanations.`
1757
+ }
1758
+ ];
1759
+ if (screenshots.length > 0) {
1760
+ prompt.push({
1761
+ role: "user",
1762
+ content: "Here are screenshots from the recording session to help you understand the context:"
1763
+ });
1764
+ prompt.push({
1765
+ role: "user",
1766
+ content: screenshots.map((screenshot) => ({
1767
+ type: "image_url",
1768
+ image_url: {
1769
+ url: screenshot
1770
+ }
1771
+ }))
1772
+ });
1773
+ }
1774
+ const response = await call(prompt, 2 /* EXTRACT_DATA */);
1775
+ if (response?.content && typeof response.content === "string") {
1776
+ return response.content;
1777
+ }
1778
+ throw new Error("Failed to generate YAML test configuration");
1779
+ } catch (error) {
1780
+ throw new Error(`Failed to generate YAML test: ${error}`);
1781
+ }
1782
+ };
1783
+
1451
1784
  // src/ai-model/inspect.ts
1452
1785
  import {
1453
1786
  MIDSCENE_USE_QWEN_VL as MIDSCENE_USE_QWEN_VL2,
@@ -2179,8 +2512,7 @@ async function vlmPlanning(options) {
2179
2512
  return {
2180
2513
  actions: transformActions,
2181
2514
  actionsFromModel: parsed,
2182
- action_summary: getSummary(res.content),
2183
- yamlFlow: buildYamlFlowFromPlans(transformActions)
2515
+ action_summary: getSummary(res.content)
2184
2516
  };
2185
2517
  }
2186
2518
  function convertBboxToCoordinates(text) {
@@ -2229,10 +2561,14 @@ export {
2229
2561
  systemPromptToLocateElement,
2230
2562
  elementByPositionWithElementInfo,
2231
2563
  describeUserPage,
2564
+ call,
2232
2565
  callToGetJSONObject,
2566
+ AIActionType,
2233
2567
  callAiFn,
2234
2568
  adaptBboxToRect,
2235
2569
  expandSearchArea,
2570
+ generatePlaywrightTest,
2571
+ generateYamlTest,
2236
2572
  AiLocateElement,
2237
2573
  AiLocateSection,
2238
2574
  AiExtractElementInfo,
@@ -2242,4 +2578,4 @@ export {
2242
2578
  resizeImageForUiTars
2243
2579
  };
2244
2580
 
2245
- //# sourceMappingURL=chunk-CLWM3F4J.js.map
2581
+ //# sourceMappingURL=chunk-EYIL4AHP.js.map