npm - @midscene/core - Versions diffs - 0.23.5-beta-20250728070606.0 → 0.24.1-beta-20250728094050.0 - Mend

@midscene/core 0.23.5-beta-20250728070606.0 → 0.24.1-beta-20250728094050.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/dist/es/ai-model.d.ts +3 -3
package/dist/es/ai-model.js +1 -1
package/dist/es/{chunk-QSWMWTO4.js → chunk-AKL2GGPA.js} +3 -3
package/dist/es/{chunk-QFXN2AP7.js → chunk-YCHAOUOW.js} +37 -73
package/dist/es/chunk-YCHAOUOW.js.map +1 -0
package/dist/es/index.d.ts +8 -6
package/dist/es/index.js +13 -6
package/dist/es/index.js.map +1 -1
package/dist/es/{llm-planning-4ac580dc.d.ts → llm-planning-3f26578e.d.ts} +3 -3
package/dist/es/{types-27dc17c9.d.ts → types-c519555a.d.ts} +4 -6
package/dist/es/utils.d.ts +1 -1
package/dist/es/utils.js +1 -1
package/dist/lib/ai-model.d.ts +3 -3
package/dist/lib/ai-model.js +2 -2
package/dist/lib/{chunk-QSWMWTO4.js → chunk-AKL2GGPA.js} +3 -3
package/dist/lib/{chunk-QFXN2AP7.js → chunk-YCHAOUOW.js} +36 -72
package/dist/lib/chunk-YCHAOUOW.js.map +1 -0
package/dist/lib/index.d.ts +8 -6
package/dist/lib/index.js +22 -15
package/dist/lib/index.js.map +1 -1
package/dist/lib/{llm-planning-4ac580dc.d.ts → llm-planning-3f26578e.d.ts} +3 -3
package/dist/{types/types-27dc17c9.d.ts → lib/types-c519555a.d.ts} +4 -6
package/dist/lib/utils.d.ts +1 -1
package/dist/lib/utils.js +2 -2
package/dist/types/ai-model.d.ts +3 -3
package/dist/types/index.d.ts +8 -6
package/dist/types/{llm-planning-4ac580dc.d.ts → llm-planning-3f26578e.d.ts} +3 -3
package/dist/{lib/types-27dc17c9.d.ts → types/types-c519555a.d.ts} +4 -6
package/dist/types/utils.d.ts +1 -1
package/package.json +4 -4
package/dist/es/chunk-QFXN2AP7.js.map +0 -1
package/dist/lib/chunk-QFXN2AP7.js.map +0 -1
/package/dist/es/{chunk-QSWMWTO4.js.map → chunk-AKL2GGPA.js.map} +0 -0
/package/dist/lib/{chunk-QSWMWTO4.js.map → chunk-AKL2GGPA.js.map} +0 -0

package/dist/es/{chunk-QFXN2AP7.js → chunk-YCHAOUOW.js} RENAMED Viewed

@@ -386,31 +386,47 @@ You are an expert in software testing.
 ## Objective:
 - Identify elements in screenshots and text that match the user's description.
 - Give the coordinates of the element that matches the user's description best in the screenshot.
+- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
 ## Output Format:
 \`\`\`json
 {
   "bbox": [number, number, number, number],  // ${bboxComment}
-  "errors"?: string[]
+  "errors"?: string[],
+  "isOrderSensitive": boolean // Whether the targetElementDescription is order-sensitive (true/false)
 }
 \`\`\`
 Fields:
 * \`bbox\` is the bounding box of the element that matches the user's description best in the screenshot
+* \`isOrderSensitive\` is a boolean indicating whether the user's description is order-sensitive (true/false)
 * \`errors\` is an optional array of error messages (if any)
-For example, when an element is found:
+Order-sensitive means the description contains phrases like:
+- "the third item in the list"
+- "the last button"
+- "the first input box"
+- "the second row"
+Not order-sensitive means the description is like:
+- "confirm button"
+- "search box"
+- "password input"
+For example, when an element is found and the description is order-sensitive:
 \`\`\`json
 {
   "bbox": [100, 100, 200, 200],
+  "isOrderSensitive": true,
   "errors": []
 }
 \`\`\`
-When no element is found:
+When no element is found and the description is not order-sensitive:
 \`\`\`json
 {
   "bbox": [],
+  "isOrderSensitive": false,
   "errors": ["I can see ..., but {some element} is not found"]
 }
 \`\`\`
@@ -423,6 +439,7 @@ You are an expert in software page image (2D) and page element text analysis.
 ## Objective:
 - Identify elements in screenshots and text that match the user's description.
 - Return JSON data containing the selection reason and element ID.
+- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
 ## Skills:
 - Image analysis and recognition
@@ -434,6 +451,7 @@ You are an expert in software page image (2D) and page element text analysis.
 2. Based on the user's description, locate the target element ID in the list of element descriptions and the screenshot.
 3. Found the required number of elements
 4. Return JSON data containing the selection reason and element ID.
+5. Judge whether the user's description is order-sensitive (see below for definition and examples).
 ## Constraints:
 - Strictly adhere to the specified location when describing the required element; do not select elements from other locations.
@@ -443,6 +461,10 @@ You are an expert in software page image (2D) and page element text analysis.
 - The returned data must conform to the specified JSON format.
 - The returned value id information must use the id from element info (important: **use id not indexId, id is hash content**)
+## Order-Sensitive Definition:
+- If the description contains phrases like "the third item in the list", "the last button", "the first input box", "the second row", etc., it is order-sensitive (isOrderSensitive = true).
+- If the description is like "confirm button", "search box", "password input", etc., it is not order-sensitive (isOrderSensitive = false).
 ## Output Format:
 Please return the result in JSON format as follows:
@@ -458,6 +480,7 @@ Please return the result in JSON format as follows:
     }
     // More elements...
   ],
+  "isOrderSensitive": true, // or false, depending on the user's description
   "errors": [] // Array of strings containing any error messages
 }
 \`\`\`
@@ -546,6 +569,7 @@ Output Example:
       "id": "1231"
     }
   ],
+  "isOrderSensitive": true,
   "errors": []
 }
 \`\`\`
@@ -583,6 +607,10 @@ var locatorSchema = {
           },
           description: "List of found elements"
         },
+        isOrderSensitive: {
+          type: "boolean",
+          description: "Whether the targetElementDescription is order-sensitive (true/false)"
+        },
         errors: {
           type: "array",
           items: {
@@ -591,7 +619,7 @@ var locatorSchema = {
           description: "List of error messages, if any"
         }
       },
-      required: ["elements", "errors"],
+      required: ["elements", "isOrderSensitive", "errors"],
       additionalProperties: false
     }
   }
@@ -1970,11 +1998,7 @@ import {
   getAIConfigInBoolean as getAIConfigInBoolean2,
   vlLocateMode as vlLocateMode4
 } from "@midscene/shared/env";
-import {
-  cropByRect,
-  paddingToMatchBlockByBase64,
-  transformImgPathToBase64Str
-} from "@midscene/shared/img";
+import { cropByRect, paddingToMatchBlockByBase64 } from "@midscene/shared/img";
 import { getDebug as getDebug3 } from "@midscene/shared/logger";
 import { assert as assert4 } from "@midscene/shared/utils";
@@ -1988,8 +2012,6 @@ The user will give you a screenshot, the contents of it (optional), and some dat
 If a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.
-If the user provides multiple reference images, please carefully compare the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.
 Return in the following JSON format:
 {
   data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.
@@ -2236,7 +2258,8 @@ async function AiLocateElement(options) {
     },
     rawResponse,
     elementById,
-    usage: res.usage
+    usage: res.usage,
+    isOrderSensitive: typeof res.content === "object" && res.content !== null && "isOrderSensitive" in res.content ? res.content.isOrderSensitive : void 0
   };
 }
 async function AiLocateSection(options) {
@@ -2305,28 +2328,8 @@ async function AiLocateSection(options) {
     usage: result.usage
   };
 }
-var imageUrl2Base64 = async (url) => {
-  if (url.startsWith("data:")) {
-    return url;
-  } else if (url.startsWith("http://") || url.startsWith("https://")) {
-    const response = await fetch(url);
-    if (!response.ok) {
-      throw new Error(`Failed to fetch image: ${url}`);
-    }
-    const contentType = response.headers.get("content-type");
-    if (!contentType) {
-      throw new Error(`Failed to fetch image: ${url}`);
-    }
-    const ext = contentType.split("/")[1];
-    assert4(ext, "get mime-type extension from response headers failed");
-    const buffer = Buffer.from(await response.arrayBuffer());
-    return `data:image/${ext};base64,${buffer.toString("base64")}`;
-  } else {
-    return await transformImgPathToBase64Str(url);
-  }
-};
 async function AiExtractElementInfo(options) {
-  const { dataQuery, context, extractOption, promptImages } = options;
+  const { dataQuery, context, extractOption } = options;
   const systemPrompt = systemPromptToExtract();
   const { screenshotBase64 } = context;
   const { description, elementById } = await describeUserPage(context, {
@@ -2358,47 +2361,8 @@ async function AiExtractElementInfo(options) {
     {
       role: "user",
       content: userContent
-    },
-    {
-      role: "user",
-      content: [
-        {
-          type: "text",
-          text: ""
-        }
-      ]
     }
   ];
-  const multiMsg = false;
-  if (promptImages) {
-    for (const [key, url] of Object.entries(promptImages)) {
-      const base64 = await imageUrl2Base64(url);
-      const text = {
-        type: "text",
-        text: `reference image ${key}:`
-      };
-      const img = {
-        type: "image_url",
-        image_url: {
-          url: base64,
-          detail: "high"
-        }
-      };
-      if (multiMsg) {
-        msgs.push({
-          role: "user",
-          content: [text]
-        });
-        msgs.push({
-          role: "user",
-          content: [img]
-        });
-      } else {
-        userContent.push(text);
-        userContent.push(img);
-      }
-    }
-  }
   const result = await callAiFn(
     msgs,
     2 /* EXTRACT_DATA */
@@ -2829,4 +2793,4 @@ export {
   resizeImageForUiTars
 };
-//# sourceMappingURL=chunk-QFXN2AP7.js.map
+//# sourceMappingURL=chunk-YCHAOUOW.js.map