@midscene/core 0.23.5-beta-20250728070606.0 → 0.24.1-beta-20250728094050.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model.d.ts +3 -3
- package/dist/es/ai-model.js +1 -1
- package/dist/es/{chunk-QSWMWTO4.js → chunk-AKL2GGPA.js} +3 -3
- package/dist/es/{chunk-QFXN2AP7.js → chunk-YCHAOUOW.js} +37 -73
- package/dist/es/chunk-YCHAOUOW.js.map +1 -0
- package/dist/es/index.d.ts +8 -6
- package/dist/es/index.js +13 -6
- package/dist/es/index.js.map +1 -1
- package/dist/es/{llm-planning-4ac580dc.d.ts → llm-planning-3f26578e.d.ts} +3 -3
- package/dist/es/{types-27dc17c9.d.ts → types-c519555a.d.ts} +4 -6
- package/dist/es/utils.d.ts +1 -1
- package/dist/es/utils.js +1 -1
- package/dist/lib/ai-model.d.ts +3 -3
- package/dist/lib/ai-model.js +2 -2
- package/dist/lib/{chunk-QSWMWTO4.js → chunk-AKL2GGPA.js} +3 -3
- package/dist/lib/{chunk-QFXN2AP7.js → chunk-YCHAOUOW.js} +36 -72
- package/dist/lib/chunk-YCHAOUOW.js.map +1 -0
- package/dist/lib/index.d.ts +8 -6
- package/dist/lib/index.js +22 -15
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/{llm-planning-4ac580dc.d.ts → llm-planning-3f26578e.d.ts} +3 -3
- package/dist/{types/types-27dc17c9.d.ts → lib/types-c519555a.d.ts} +4 -6
- package/dist/lib/utils.d.ts +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model.d.ts +3 -3
- package/dist/types/index.d.ts +8 -6
- package/dist/types/{llm-planning-4ac580dc.d.ts → llm-planning-3f26578e.d.ts} +3 -3
- package/dist/{lib/types-27dc17c9.d.ts → types/types-c519555a.d.ts} +4 -6
- package/dist/types/utils.d.ts +1 -1
- package/package.json +4 -4
- package/dist/es/chunk-QFXN2AP7.js.map +0 -1
- package/dist/lib/chunk-QFXN2AP7.js.map +0 -1
- /package/dist/es/{chunk-QSWMWTO4.js.map → chunk-AKL2GGPA.js.map} +0 -0
- /package/dist/lib/{chunk-QSWMWTO4.js.map → chunk-AKL2GGPA.js.map} +0 -0
|
@@ -386,31 +386,47 @@ You are an expert in software testing.
|
|
|
386
386
|
## Objective:
|
|
387
387
|
- Identify elements in screenshots and text that match the user's description.
|
|
388
388
|
- Give the coordinates of the element that matches the user's description best in the screenshot.
|
|
389
|
+
- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
|
|
389
390
|
|
|
390
391
|
## Output Format:
|
|
391
392
|
\`\`\`json
|
|
392
393
|
{
|
|
393
394
|
"bbox": [number, number, number, number], // ${bboxComment}
|
|
394
|
-
"errors"?: string[]
|
|
395
|
+
"errors"?: string[],
|
|
396
|
+
"isOrderSensitive": boolean // Whether the targetElementDescription is order-sensitive (true/false)
|
|
395
397
|
}
|
|
396
398
|
\`\`\`
|
|
397
399
|
|
|
398
400
|
Fields:
|
|
399
401
|
* \`bbox\` is the bounding box of the element that matches the user's description best in the screenshot
|
|
402
|
+
* \`isOrderSensitive\` is a boolean indicating whether the user's description is order-sensitive (true/false)
|
|
400
403
|
* \`errors\` is an optional array of error messages (if any)
|
|
401
404
|
|
|
402
|
-
|
|
405
|
+
Order-sensitive means the description contains phrases like:
|
|
406
|
+
- "the third item in the list"
|
|
407
|
+
- "the last button"
|
|
408
|
+
- "the first input box"
|
|
409
|
+
- "the second row"
|
|
410
|
+
|
|
411
|
+
Not order-sensitive means the description is like:
|
|
412
|
+
- "confirm button"
|
|
413
|
+
- "search box"
|
|
414
|
+
- "password input"
|
|
415
|
+
|
|
416
|
+
For example, when an element is found and the description is order-sensitive:
|
|
403
417
|
\`\`\`json
|
|
404
418
|
{
|
|
405
419
|
"bbox": [100, 100, 200, 200],
|
|
420
|
+
"isOrderSensitive": true,
|
|
406
421
|
"errors": []
|
|
407
422
|
}
|
|
408
423
|
\`\`\`
|
|
409
424
|
|
|
410
|
-
When no element is found:
|
|
425
|
+
When no element is found and the description is not order-sensitive:
|
|
411
426
|
\`\`\`json
|
|
412
427
|
{
|
|
413
428
|
"bbox": [],
|
|
429
|
+
"isOrderSensitive": false,
|
|
414
430
|
"errors": ["I can see ..., but {some element} is not found"]
|
|
415
431
|
}
|
|
416
432
|
\`\`\`
|
|
@@ -423,6 +439,7 @@ You are an expert in software page image (2D) and page element text analysis.
|
|
|
423
439
|
## Objective:
|
|
424
440
|
- Identify elements in screenshots and text that match the user's description.
|
|
425
441
|
- Return JSON data containing the selection reason and element ID.
|
|
442
|
+
- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
|
|
426
443
|
|
|
427
444
|
## Skills:
|
|
428
445
|
- Image analysis and recognition
|
|
@@ -434,6 +451,7 @@ You are an expert in software page image (2D) and page element text analysis.
|
|
|
434
451
|
2. Based on the user's description, locate the target element ID in the list of element descriptions and the screenshot.
|
|
435
452
|
3. Found the required number of elements
|
|
436
453
|
4. Return JSON data containing the selection reason and element ID.
|
|
454
|
+
5. Judge whether the user's description is order-sensitive (see below for definition and examples).
|
|
437
455
|
|
|
438
456
|
## Constraints:
|
|
439
457
|
- Strictly adhere to the specified location when describing the required element; do not select elements from other locations.
|
|
@@ -443,6 +461,10 @@ You are an expert in software page image (2D) and page element text analysis.
|
|
|
443
461
|
- The returned data must conform to the specified JSON format.
|
|
444
462
|
- The returned value id information must use the id from element info (important: **use id not indexId, id is hash content**)
|
|
445
463
|
|
|
464
|
+
## Order-Sensitive Definition:
|
|
465
|
+
- If the description contains phrases like "the third item in the list", "the last button", "the first input box", "the second row", etc., it is order-sensitive (isOrderSensitive = true).
|
|
466
|
+
- If the description is like "confirm button", "search box", "password input", etc., it is not order-sensitive (isOrderSensitive = false).
|
|
467
|
+
|
|
446
468
|
## Output Format:
|
|
447
469
|
|
|
448
470
|
Please return the result in JSON format as follows:
|
|
@@ -458,6 +480,7 @@ Please return the result in JSON format as follows:
|
|
|
458
480
|
}
|
|
459
481
|
// More elements...
|
|
460
482
|
],
|
|
483
|
+
"isOrderSensitive": true, // or false, depending on the user's description
|
|
461
484
|
"errors": [] // Array of strings containing any error messages
|
|
462
485
|
}
|
|
463
486
|
\`\`\`
|
|
@@ -546,6 +569,7 @@ Output Example:
|
|
|
546
569
|
"id": "1231"
|
|
547
570
|
}
|
|
548
571
|
],
|
|
572
|
+
"isOrderSensitive": true,
|
|
549
573
|
"errors": []
|
|
550
574
|
}
|
|
551
575
|
\`\`\`
|
|
@@ -583,6 +607,10 @@ var locatorSchema = {
|
|
|
583
607
|
},
|
|
584
608
|
description: "List of found elements"
|
|
585
609
|
},
|
|
610
|
+
isOrderSensitive: {
|
|
611
|
+
type: "boolean",
|
|
612
|
+
description: "Whether the targetElementDescription is order-sensitive (true/false)"
|
|
613
|
+
},
|
|
586
614
|
errors: {
|
|
587
615
|
type: "array",
|
|
588
616
|
items: {
|
|
@@ -591,7 +619,7 @@ var locatorSchema = {
|
|
|
591
619
|
description: "List of error messages, if any"
|
|
592
620
|
}
|
|
593
621
|
},
|
|
594
|
-
required: ["elements", "errors"],
|
|
622
|
+
required: ["elements", "isOrderSensitive", "errors"],
|
|
595
623
|
additionalProperties: false
|
|
596
624
|
}
|
|
597
625
|
}
|
|
@@ -1970,11 +1998,7 @@ import {
|
|
|
1970
1998
|
getAIConfigInBoolean as getAIConfigInBoolean2,
|
|
1971
1999
|
vlLocateMode as vlLocateMode4
|
|
1972
2000
|
} from "@midscene/shared/env";
|
|
1973
|
-
import {
|
|
1974
|
-
cropByRect,
|
|
1975
|
-
paddingToMatchBlockByBase64,
|
|
1976
|
-
transformImgPathToBase64Str
|
|
1977
|
-
} from "@midscene/shared/img";
|
|
2001
|
+
import { cropByRect, paddingToMatchBlockByBase64 } from "@midscene/shared/img";
|
|
1978
2002
|
import { getDebug as getDebug3 } from "@midscene/shared/logger";
|
|
1979
2003
|
import { assert as assert4 } from "@midscene/shared/utils";
|
|
1980
2004
|
|
|
@@ -1988,8 +2012,6 @@ The user will give you a screenshot, the contents of it (optional), and some dat
|
|
|
1988
2012
|
|
|
1989
2013
|
If a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.
|
|
1990
2014
|
|
|
1991
|
-
If the user provides multiple reference images, please carefully compare the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.
|
|
1992
|
-
|
|
1993
2015
|
Return in the following JSON format:
|
|
1994
2016
|
{
|
|
1995
2017
|
data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.
|
|
@@ -2236,7 +2258,8 @@ async function AiLocateElement(options) {
|
|
|
2236
2258
|
},
|
|
2237
2259
|
rawResponse,
|
|
2238
2260
|
elementById,
|
|
2239
|
-
usage: res.usage
|
|
2261
|
+
usage: res.usage,
|
|
2262
|
+
isOrderSensitive: typeof res.content === "object" && res.content !== null && "isOrderSensitive" in res.content ? res.content.isOrderSensitive : void 0
|
|
2240
2263
|
};
|
|
2241
2264
|
}
|
|
2242
2265
|
async function AiLocateSection(options) {
|
|
@@ -2305,28 +2328,8 @@ async function AiLocateSection(options) {
|
|
|
2305
2328
|
usage: result.usage
|
|
2306
2329
|
};
|
|
2307
2330
|
}
|
|
2308
|
-
var imageUrl2Base64 = async (url) => {
|
|
2309
|
-
if (url.startsWith("data:")) {
|
|
2310
|
-
return url;
|
|
2311
|
-
} else if (url.startsWith("http://") || url.startsWith("https://")) {
|
|
2312
|
-
const response = await fetch(url);
|
|
2313
|
-
if (!response.ok) {
|
|
2314
|
-
throw new Error(`Failed to fetch image: ${url}`);
|
|
2315
|
-
}
|
|
2316
|
-
const contentType = response.headers.get("content-type");
|
|
2317
|
-
if (!contentType) {
|
|
2318
|
-
throw new Error(`Failed to fetch image: ${url}`);
|
|
2319
|
-
}
|
|
2320
|
-
const ext = contentType.split("/")[1];
|
|
2321
|
-
assert4(ext, "get mime-type extension from response headers failed");
|
|
2322
|
-
const buffer = Buffer.from(await response.arrayBuffer());
|
|
2323
|
-
return `data:image/${ext};base64,${buffer.toString("base64")}`;
|
|
2324
|
-
} else {
|
|
2325
|
-
return await transformImgPathToBase64Str(url);
|
|
2326
|
-
}
|
|
2327
|
-
};
|
|
2328
2331
|
async function AiExtractElementInfo(options) {
|
|
2329
|
-
const { dataQuery, context, extractOption
|
|
2332
|
+
const { dataQuery, context, extractOption } = options;
|
|
2330
2333
|
const systemPrompt = systemPromptToExtract();
|
|
2331
2334
|
const { screenshotBase64 } = context;
|
|
2332
2335
|
const { description, elementById } = await describeUserPage(context, {
|
|
@@ -2358,47 +2361,8 @@ async function AiExtractElementInfo(options) {
|
|
|
2358
2361
|
{
|
|
2359
2362
|
role: "user",
|
|
2360
2363
|
content: userContent
|
|
2361
|
-
},
|
|
2362
|
-
{
|
|
2363
|
-
role: "user",
|
|
2364
|
-
content: [
|
|
2365
|
-
{
|
|
2366
|
-
type: "text",
|
|
2367
|
-
text: ""
|
|
2368
|
-
}
|
|
2369
|
-
]
|
|
2370
2364
|
}
|
|
2371
2365
|
];
|
|
2372
|
-
const multiMsg = false;
|
|
2373
|
-
if (promptImages) {
|
|
2374
|
-
for (const [key, url] of Object.entries(promptImages)) {
|
|
2375
|
-
const base64 = await imageUrl2Base64(url);
|
|
2376
|
-
const text = {
|
|
2377
|
-
type: "text",
|
|
2378
|
-
text: `reference image ${key}:`
|
|
2379
|
-
};
|
|
2380
|
-
const img = {
|
|
2381
|
-
type: "image_url",
|
|
2382
|
-
image_url: {
|
|
2383
|
-
url: base64,
|
|
2384
|
-
detail: "high"
|
|
2385
|
-
}
|
|
2386
|
-
};
|
|
2387
|
-
if (multiMsg) {
|
|
2388
|
-
msgs.push({
|
|
2389
|
-
role: "user",
|
|
2390
|
-
content: [text]
|
|
2391
|
-
});
|
|
2392
|
-
msgs.push({
|
|
2393
|
-
role: "user",
|
|
2394
|
-
content: [img]
|
|
2395
|
-
});
|
|
2396
|
-
} else {
|
|
2397
|
-
userContent.push(text);
|
|
2398
|
-
userContent.push(img);
|
|
2399
|
-
}
|
|
2400
|
-
}
|
|
2401
|
-
}
|
|
2402
2366
|
const result = await callAiFn(
|
|
2403
2367
|
msgs,
|
|
2404
2368
|
2 /* EXTRACT_DATA */
|
|
@@ -2829,4 +2793,4 @@ export {
|
|
|
2829
2793
|
resizeImageForUiTars
|
|
2830
2794
|
};
|
|
2831
2795
|
|
|
2832
|
-
//# sourceMappingURL=chunk-
|
|
2796
|
+
//# sourceMappingURL=chunk-YCHAOUOW.js.map
|