npm - @midscene/core - Versions diffs - 0.18.1-beta-20250611081529.0 → 0.18.1-beta-20250612055514.0 - Mend

@midscene/core 0.18.1-beta-20250611081529.0 → 0.18.1-beta-20250612055514.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/dist/es/ai-model.d.ts +53 -5
package/dist/es/ai-model.js +9 -1
package/dist/es/{chunk-CLWM3F4J.js → chunk-EYIL4AHP.js} +340 -4
package/dist/es/chunk-EYIL4AHP.js.map +1 -0
package/dist/es/{chunk-TO33YH6H.js → chunk-GVJFQT7E.js} +3 -3
package/dist/es/index.d.ts +4 -4
package/dist/es/index.js +16 -19
package/dist/es/index.js.map +1 -1
package/dist/es/{llm-planning-45954424.d.ts → llm-planning-a951deb9.d.ts} +2 -2
package/dist/es/{types-e7be1eb0.d.ts → types-dce56c26.d.ts} +1 -2
package/dist/es/utils.d.ts +1 -1
package/dist/es/utils.js +1 -1
package/dist/lib/ai-model.d.ts +53 -5
package/dist/lib/ai-model.js +10 -2
package/dist/lib/{chunk-CLWM3F4J.js → chunk-EYIL4AHP.js} +361 -25
package/dist/lib/chunk-EYIL4AHP.js.map +1 -0
package/dist/lib/{chunk-TO33YH6H.js → chunk-GVJFQT7E.js} +3 -3
package/dist/lib/index.d.ts +4 -4
package/dist/lib/index.js +28 -31
package/dist/lib/index.js.map +1 -1
package/dist/lib/{llm-planning-45954424.d.ts → llm-planning-a951deb9.d.ts} +2 -2
package/dist/{types/types-e7be1eb0.d.ts → lib/types-dce56c26.d.ts} +1 -2
package/dist/lib/utils.d.ts +1 -1
package/dist/lib/utils.js +2 -2
package/dist/types/ai-model.d.ts +53 -5
package/dist/types/index.d.ts +4 -4
package/dist/types/{llm-planning-45954424.d.ts → llm-planning-a951deb9.d.ts} +2 -2
package/dist/{lib/types-e7be1eb0.d.ts → types/types-dce56c26.d.ts} +1 -2
package/dist/types/utils.d.ts +1 -1
package/package.json +4 -3
package/dist/es/chunk-CLWM3F4J.js.map +0 -1
package/dist/lib/chunk-CLWM3F4J.js.map +0 -1
/package/dist/es/{chunk-TO33YH6H.js.map → chunk-GVJFQT7E.js.map} +0 -0
/package/dist/lib/{chunk-TO33YH6H.js.map → chunk-GVJFQT7E.js.map} +0 -0

package/dist/es/ai-model.d.ts CHANGED Viewed

@@ -1,13 +1,19 @@
-import { l as AIUsageInfo, T as PlanningAction, j as MidsceneYamlFlowItem } from './types-e7be1eb0.js';
+import { l as AIUsageInfo, T as PlanningAction, j as MidsceneYamlFlowItem } from './types-dce56c26.js';
+import OpenAI from 'openai';
 import { ChatCompletionMessageParam } from 'openai/resources';
 export { ChatCompletionMessageParam } from 'openai/resources';
-import { b as AIActionType } from './llm-planning-45954424.js';
-export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-45954424.js';
+import { b as AIActionType } from './llm-planning-a951deb9.js';
+export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-a951deb9.js';
 import { vlLocateMode } from '@midscene/shared/env';
+import { ChromeRecordedEvent as ChromeRecordedEvent$1 } from '@midscene/recorder';
 import { actionParser } from '@ui-tars/action-parser';
 import { Size } from '@midscene/shared/types';
 import '@midscene/shared/constants';
+declare function call(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, responseFormat?: OpenAI.ChatCompletionCreateParams['response_format'] | OpenAI.ResponseFormatJSONObject): Promise<{
+    content: string;
+    usage?: AIUsageInfo;
+}>;
 declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
     content: T;
     usage?: AIUsageInfo;
@@ -15,6 +21,48 @@ declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[],
 declare function systemPromptToLocateElement(vlMode: ReturnType<typeof vlLocateMode>): string;
+interface PlaywrightGenerationOptions {
+    testName?: string;
+    includeScreenshots?: boolean;
+    includeTimestamps?: boolean;
+    maxScreenshots?: number;
+    description?: string;
+    viewportSize?: {
+        width: number;
+        height: number;
+    };
+    waitForNetworkIdle?: boolean;
+    waitForNetworkIdleTimeout?: number;
+}
+interface ChromeRecordedEvent {
+    type: string;
+    timestamp: number;
+    url?: string;
+    title?: string;
+    elementDescription?: string;
+    value?: string;
+    pageInfo?: any;
+    elementRect?: any;
+    screenshotBefore?: string;
+    screenshotAfter?: string;
+    screenshotWithBox?: string;
+}
+/**
+ * Generates Playwright test code from recorded events
+ */
+declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options?: PlaywrightGenerationOptions) => Promise<string>;
+interface YamlGenerationOptions {
+    testName?: string;
+    includeTimestamps?: boolean;
+    maxScreenshots?: number;
+    description?: string;
+}
+/**
+ * Generates YAML test configuration from recorded events using AI
+ */
+declare const generateYamlTest: (events: ChromeRecordedEvent$1[], options?: YamlGenerationOptions) => Promise<string>;
 declare function vlmPlanning(options: {
     userInstruction: string;
     conversationHistory: ChatCompletionMessageParam[];
@@ -26,8 +74,8 @@ declare function vlmPlanning(options: {
     actions: PlanningAction<any>[];
     actionsFromModel: ReturnType<typeof actionParser>['parsed'];
     action_summary: string;
-    yamlFlow: MidsceneYamlFlowItem[];
+    yamlFlow?: MidsceneYamlFlowItem[];
 }>;
 declare function resizeImageForUiTars(imageBase64: string, size: Size): Promise<string>;
-export { callToGetJSONObject, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
+export { AIActionType, call as callAi, callToGetJSONObject, generatePlaywrightTest, generateYamlTest, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };

package/dist/es/ai-model.js CHANGED Viewed

@@ -1,28 +1,36 @@
 import {
+  AIActionType,
   AiAssert,
   AiExtractElementInfo,
   AiLocateElement,
   AiLocateSection,
   adaptBboxToRect,
+  call,
   callAiFn,
   callToGetJSONObject,
   describeUserPage,
   elementByPositionWithElementInfo,
+  generatePlaywrightTest,
+  generateYamlTest,
   plan,
   resizeImageForUiTars,
   systemPromptToLocateElement,
   vlmPlanning
-} from "./chunk-CLWM3F4J.js";
+} from "./chunk-EYIL4AHP.js";
 export {
+  AIActionType,
   AiAssert,
   AiExtractElementInfo,
   AiLocateElement,
   AiLocateSection,
   adaptBboxToRect,
+  call as callAi,
   callAiFn,
   callToGetJSONObject,
   describeUserPage,
   elementByPositionWithElementInfo,
+  generatePlaywrightTest,
+  generateYamlTest,
   plan,
   resizeImageForUiTars,
   systemPromptToLocateElement,

package/dist/es/{chunk-CLWM3F4J.js → chunk-EYIL4AHP.js} RENAMED Viewed

@@ -47,6 +47,14 @@ import { vlLocateMode } from "@midscene/shared/env";
 import { treeToList } from "@midscene/shared/extractor";
 import { compositeElementInfoImg } from "@midscene/shared/img";
 import { getDebug } from "@midscene/shared/logger";
+var AIActionType = /* @__PURE__ */ ((AIActionType2) => {
+  AIActionType2[AIActionType2["ASSERT"] = 0] = "ASSERT";
+  AIActionType2[AIActionType2["INSPECT_ELEMENT"] = 1] = "INSPECT_ELEMENT";
+  AIActionType2[AIActionType2["EXTRACT_DATA"] = 2] = "EXTRACT_DATA";
+  AIActionType2[AIActionType2["PLAN"] = 3] = "PLAN";
+  AIActionType2[AIActionType2["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
+  return AIActionType2;
+})(AIActionType || {});
 async function callAiFn(msgs, AIActionTypeValue) {
   assert(
     checkAIConfig(),
@@ -694,6 +702,11 @@ async function describeUserPage(context, opt) {
   const treeRoot = context.tree;
   const idElementMap = {};
   const flatElements = treeToList2(treeRoot);
+  if (opt?.domIncluded === true && flatElements.length >= 5e3) {
+    console.warn(
+      'The number of elements is too large, it may cause the prompt to be too long, please use domIncluded: "visible-only" to reduce the number of elements'
+    );
+  }
   flatElements.forEach((element) => {
     idElementMap[element.id] = element;
     if (typeof element.indexId !== "undefined") {
@@ -701,12 +714,13 @@ async function describeUserPage(context, opt) {
     }
   });
   let pageDescription = "";
+  const visibleOnly = opt?.visibleOnly ?? opt?.domIncluded === "visible-only";
   if (opt?.domIncluded) {
     const contentTree = await descriptionOfTree(
       treeRoot,
       opt?.truncateTextLength,
       opt?.filterNonTextContent,
-      opt?.visibleOnly
+      visibleOnly
     );
     const sizeDescription = describeSize({ width, height });
     pageDescription = `The size of the page: ${sizeDescription}
@@ -1448,6 +1462,325 @@ function safeParseJson(input) {
   throw Error(`failed to parse json response: ${input}`);
 }
+// src/ai-model/prompt/playwright-generator.ts
+import { PLAYWRIGHT_EXAMPLE_CODE } from "@midscene/shared/constants";
+var getScreenshotsForLLM = (events, maxScreenshots = 1) => {
+  const eventsWithScreenshots = events.filter(
+    (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
+  );
+  const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
+    if (a.type === "navigation" && b.type !== "navigation")
+      return -1;
+    if (a.type !== "navigation" && b.type === "navigation")
+      return 1;
+    if (a.type === "click" && b.type !== "click")
+      return -1;
+    if (a.type !== "click" && b.type === "click")
+      return 1;
+    return 0;
+  });
+  const screenshots = [];
+  for (const event of sortedEvents) {
+    const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
+    if (screenshot && !screenshots.includes(screenshot)) {
+      screenshots.push(screenshot);
+      if (screenshots.length >= maxScreenshots)
+        break;
+    }
+  }
+  return screenshots;
+};
+var filterEventsByType = (events) => {
+  return {
+    navigationEvents: events.filter((event) => event.type === "navigation"),
+    clickEvents: events.filter((event) => event.type === "click"),
+    inputEvents: events.filter((event) => event.type === "input"),
+    scrollEvents: events.filter((event) => event.type === "scroll")
+  };
+};
+var createEventCounts = (filteredEvents, totalEvents) => {
+  return {
+    navigation: filteredEvents.navigationEvents.length,
+    click: filteredEvents.clickEvents.length,
+    input: filteredEvents.inputEvents.length,
+    scroll: filteredEvents.scrollEvents.length,
+    total: totalEvents
+  };
+};
+var extractInputDescriptions = (inputEvents) => {
+  return inputEvents.map((event) => ({
+    description: event.elementDescription || "",
+    value: event.value || ""
+  })).filter((item) => item.description && item.value);
+};
+var processEventsForLLM = (events) => {
+  return events.map((event) => ({
+    type: event.type,
+    timestamp: event.timestamp,
+    url: event.url,
+    title: event.title,
+    elementDescription: event.elementDescription,
+    value: event.value,
+    pageInfo: event.pageInfo,
+    elementRect: event.elementRect
+  }));
+};
+var prepareEventSummary = (events, options = {}) => {
+  const filteredEvents = filterEventsByType(events);
+  const eventCounts = createEventCounts(filteredEvents, events.length);
+  const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
+  const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
+  const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
+  const inputDescriptions = extractInputDescriptions(
+    filteredEvents.inputEvents
+  ).slice(0, 10);
+  const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
+  const processedEvents = processEventsForLLM(events);
+  return {
+    testName: options.testName || "Automated test from recorded events",
+    startUrl,
+    eventCounts,
+    pageTitles,
+    urls,
+    clickDescriptions,
+    inputDescriptions,
+    events: processedEvents
+  };
+};
+var createMessageContent = (promptText, screenshots = [], includeScreenshots = true) => {
+  const messageContent = [
+    {
+      type: "text",
+      text: promptText
+    }
+  ];
+  if (includeScreenshots && screenshots.length > 0) {
+    messageContent.unshift({
+      type: "text",
+      text: "Here are screenshots from the recording session to help you understand the context:"
+    });
+    screenshots.forEach((screenshot) => {
+      messageContent.push({
+        type: "image_url",
+        image_url: {
+          url: screenshot
+        }
+      });
+    });
+  }
+  return messageContent;
+};
+var validateEvents = (events) => {
+  if (!events.length) {
+    throw new Error("No events provided for test generation");
+  }
+};
+var generatePlaywrightTest = async (events, options = {}) => {
+  validateEvents(events);
+  const summary = prepareEventSummary(events, {
+    testName: options.testName,
+    maxScreenshots: options.maxScreenshots || 3
+  });
+  const playwrightSummary = {
+    ...summary,
+    waitForNetworkIdle: options.waitForNetworkIdle !== false,
+    waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout || 2e3,
+    viewportSize: options.viewportSize || { width: 1280, height: 800 }
+  };
+  const screenshots = getScreenshotsForLLM(events, options.maxScreenshots || 3);
+  const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.
+Event Summary:
+${JSON.stringify(playwrightSummary, null, 2)}
+Generated code should:
+1. Import required dependencies
+2. Set up the test with proper configuration
+3. Include a beforeEach hook to navigate to the starting URL
+4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)
+5. Include appropriate assertions and validations
+6. Follow best practices for Playwright tests
+7. Be ready to execute without further modification
+Respond ONLY with the complete Playwright test code, no explanations.`;
+  const messageContent = createMessageContent(
+    promptText,
+    screenshots,
+    options.includeScreenshots !== false
+  );
+  const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene.
+Your task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.
+${PLAYWRIGHT_EXAMPLE_CODE}`;
+  const prompt = [
+    {
+      role: "system",
+      content: systemPrompt
+    },
+    {
+      role: "user",
+      content: messageContent
+    }
+  ];
+  const response = await call(prompt, 2 /* EXTRACT_DATA */);
+  if (response?.content && typeof response.content === "string") {
+    return response.content;
+  }
+  throw new Error("Failed to generate Playwright test code");
+};
+// src/ai-model/prompt/yaml-generator.ts
+import { YAML_EXAMPLE_CODE } from "@midscene/shared/constants";
+var getScreenshotsForLLM2 = (events, maxScreenshots = 1) => {
+  const eventsWithScreenshots = events.filter(
+    (event) => event.screenshotBefore || event.screenshotAfter || event.screenshotWithBox
+  );
+  const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {
+    if (a.type === "navigation" && b.type !== "navigation")
+      return -1;
+    if (a.type !== "navigation" && b.type === "navigation")
+      return 1;
+    if (a.type === "click" && b.type !== "click")
+      return -1;
+    if (a.type !== "click" && b.type === "click")
+      return 1;
+    return 0;
+  });
+  const screenshots = [];
+  for (const event of sortedEvents) {
+    const screenshot = event.screenshotWithBox || event.screenshotAfter || event.screenshotBefore;
+    if (screenshot && !screenshots.includes(screenshot)) {
+      screenshots.push(screenshot);
+      if (screenshots.length >= maxScreenshots)
+        break;
+    }
+  }
+  return screenshots;
+};
+var filterEventsByType2 = (events) => {
+  return {
+    navigationEvents: events.filter((event) => event.type === "navigation"),
+    clickEvents: events.filter((event) => event.type === "click"),
+    inputEvents: events.filter((event) => event.type === "input"),
+    scrollEvents: events.filter((event) => event.type === "scroll")
+  };
+};
+var createEventCounts2 = (filteredEvents, totalEvents) => {
+  return {
+    navigation: filteredEvents.navigationEvents.length,
+    click: filteredEvents.clickEvents.length,
+    input: filteredEvents.inputEvents.length,
+    scroll: filteredEvents.scrollEvents.length,
+    total: totalEvents
+  };
+};
+var extractInputDescriptions2 = (inputEvents) => {
+  return inputEvents.map((event) => ({
+    description: event.elementDescription || "",
+    value: event.value || ""
+  })).filter((item) => item.description && item.value);
+};
+var processEventsForLLM2 = (events) => {
+  return events.map((event) => ({
+    type: event.type,
+    timestamp: event.timestamp,
+    url: event.url,
+    title: event.title,
+    elementDescription: event.elementDescription,
+    value: event.value,
+    pageInfo: event.pageInfo,
+    elementRect: event.elementRect
+  }));
+};
+var prepareEventSummary2 = (events, options = {}) => {
+  const filteredEvents = filterEventsByType2(events);
+  const eventCounts = createEventCounts2(filteredEvents, events.length);
+  const startUrl = filteredEvents.navigationEvents.length > 0 ? filteredEvents.navigationEvents[0].url || "" : "";
+  const pageTitles = filteredEvents.navigationEvents.map((event) => event.title).filter((title) => Boolean(title)).slice(0, 5);
+  const clickDescriptions = filteredEvents.clickEvents.map((event) => event.elementDescription).filter((desc) => Boolean(desc)).slice(0, 10);
+  const inputDescriptions = extractInputDescriptions2(
+    filteredEvents.inputEvents
+  ).slice(0, 10);
+  const urls = filteredEvents.navigationEvents.map((e) => e.url).filter((url) => Boolean(url)).slice(0, 5);
+  const processedEvents = processEventsForLLM2(events);
+  return {
+    testName: options.testName || "Automated test from recorded events",
+    startUrl,
+    eventCounts,
+    pageTitles,
+    urls,
+    clickDescriptions,
+    inputDescriptions,
+    events: processedEvents
+  };
+};
+var validateEvents2 = (events) => {
+  if (!events.length) {
+    throw new Error("No events provided for test generation");
+  }
+};
+var generateYamlTest = async (events, options = {}) => {
+  try {
+    validateEvents2(events);
+    const summary = prepareEventSummary2(events, {
+      testName: options.testName,
+      maxScreenshots: options.maxScreenshots || 3
+    });
+    const yamlSummary = {
+      ...summary,
+      includeTimestamps: options.includeTimestamps || false
+    };
+    const screenshots = getScreenshotsForLLM2(
+      events,
+      options.maxScreenshots || 3
+    );
+    const prompt = [
+      {
+        role: "system",
+        content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`
+      },
+      {
+        role: "user",
+        content: `Generate YAML test for Midscene.js automation from recorded browser events.
+Event Summary:
+${JSON.stringify(yamlSummary, null, 2)}
+Convert events:
+- navigation → target.url
+- click → aiTap with element description
+- input → aiInput with value and locate
+- scroll → aiScroll with appropriate direction
+- Add aiAssert for important state changes
+Respond with YAML only, no explanations.`
+      }
+    ];
+    if (screenshots.length > 0) {
+      prompt.push({
+        role: "user",
+        content: "Here are screenshots from the recording session to help you understand the context:"
+      });
+      prompt.push({
+        role: "user",
+        content: screenshots.map((screenshot) => ({
+          type: "image_url",
+          image_url: {
+            url: screenshot
+          }
+        }))
+      });
+    }
+    const response = await call(prompt, 2 /* EXTRACT_DATA */);
+    if (response?.content && typeof response.content === "string") {
+      return response.content;
+    }
+    throw new Error("Failed to generate YAML test configuration");
+  } catch (error) {
+    throw new Error(`Failed to generate YAML test: ${error}`);
+  }
+};
 // src/ai-model/inspect.ts
 import {
   MIDSCENE_USE_QWEN_VL as MIDSCENE_USE_QWEN_VL2,
@@ -2179,8 +2512,7 @@ async function vlmPlanning(options) {
   return {
     actions: transformActions,
     actionsFromModel: parsed,
-    action_summary: getSummary(res.content),
-    yamlFlow: buildYamlFlowFromPlans(transformActions)
+    action_summary: getSummary(res.content)
   };
 }
 function convertBboxToCoordinates(text) {
@@ -2229,10 +2561,14 @@ export {
   systemPromptToLocateElement,
   elementByPositionWithElementInfo,
   describeUserPage,
+  call,
   callToGetJSONObject,
+  AIActionType,
   callAiFn,
   adaptBboxToRect,
   expandSearchArea,
+  generatePlaywrightTest,
+  generateYamlTest,
   AiLocateElement,
   AiLocateSection,
   AiExtractElementInfo,
@@ -2242,4 +2578,4 @@ export {
   resizeImageForUiTars
 };
-//# sourceMappingURL=chunk-CLWM3F4J.js.map
+//# sourceMappingURL=chunk-EYIL4AHP.js.map