@midscene/core 0.24.2-beta-20250801024655.0 → 0.24.2-beta-20250801111909.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model.d.ts +3 -3
- package/dist/es/ai-model.js +1 -1
- package/dist/es/{chunk-FMBJ3CM2.js → chunk-2RCMQS5O.js} +3 -3
- package/dist/es/{chunk-5HH6E7M4.js → chunk-KFA65L55.js} +93 -9
- package/dist/es/chunk-KFA65L55.js.map +1 -0
- package/dist/es/index.d.ts +7 -9
- package/dist/es/index.js +4 -8
- package/dist/es/index.js.map +1 -1
- package/dist/es/{llm-planning-d7096b0d.d.ts → llm-planning-4c782a8d.d.ts} +6 -5
- package/dist/es/{types-d836fa73.d.ts → types-7b64b80b.d.ts} +33 -13
- package/dist/es/utils.d.ts +1 -1
- package/dist/es/utils.js +1 -1
- package/dist/lib/ai-model.d.ts +3 -3
- package/dist/lib/ai-model.js +2 -2
- package/dist/lib/{chunk-FMBJ3CM2.js → chunk-2RCMQS5O.js} +3 -3
- package/dist/lib/{chunk-5HH6E7M4.js → chunk-KFA65L55.js} +99 -15
- package/dist/lib/chunk-KFA65L55.js.map +1 -0
- package/dist/lib/index.d.ts +7 -9
- package/dist/lib/index.js +14 -18
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/{llm-planning-d7096b0d.d.ts → llm-planning-4c782a8d.d.ts} +6 -5
- package/dist/{types/types-d836fa73.d.ts → lib/types-7b64b80b.d.ts} +33 -13
- package/dist/lib/utils.d.ts +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model.d.ts +3 -3
- package/dist/types/index.d.ts +7 -9
- package/dist/types/{llm-planning-d7096b0d.d.ts → llm-planning-4c782a8d.d.ts} +6 -5
- package/dist/{lib/types-d836fa73.d.ts → types/types-7b64b80b.d.ts} +33 -13
- package/dist/types/utils.d.ts +1 -1
- package/package.json +3 -3
- package/dist/es/chunk-5HH6E7M4.js.map +0 -1
- package/dist/lib/chunk-5HH6E7M4.js.map +0 -1
- /package/dist/es/{chunk-FMBJ3CM2.js.map → chunk-2RCMQS5O.js.map} +0 -0
- /package/dist/lib/{chunk-FMBJ3CM2.js.map → chunk-2RCMQS5O.js.map} +0 -0
|
@@ -643,9 +643,9 @@ import { PromptTemplate as PromptTemplate2 } from "@langchain/core/prompts";
|
|
|
643
643
|
import {
|
|
644
644
|
imageInfo,
|
|
645
645
|
imageInfoOfBase64,
|
|
646
|
-
|
|
646
|
+
localImg2Base64,
|
|
647
|
+
httpImg2Base64,
|
|
647
648
|
resizeImg,
|
|
648
|
-
transformImgPathToBase64,
|
|
649
649
|
saveBase64Image,
|
|
650
650
|
zoomForGPT4o
|
|
651
651
|
} from "@midscene/shared/img";
|
|
@@ -2004,7 +2004,11 @@ import {
|
|
|
2004
2004
|
getAIConfigInBoolean as getAIConfigInBoolean2,
|
|
2005
2005
|
vlLocateMode as vlLocateMode4
|
|
2006
2006
|
} from "@midscene/shared/env";
|
|
2007
|
-
import {
|
|
2007
|
+
import {
|
|
2008
|
+
cropByRect,
|
|
2009
|
+
paddingToMatchBlockByBase64,
|
|
2010
|
+
preProcessImageUrl
|
|
2011
|
+
} from "@midscene/shared/img";
|
|
2008
2012
|
import { getDebug as getDebug3 } from "@midscene/shared/logger";
|
|
2009
2013
|
import { assert as assert4 } from "@midscene/shared/utils";
|
|
2010
2014
|
|
|
@@ -2018,6 +2022,8 @@ The user will give you a screenshot, the contents of it (optional), and some dat
|
|
|
2018
2022
|
|
|
2019
2023
|
If a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.
|
|
2020
2024
|
|
|
2025
|
+
If the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.
|
|
2026
|
+
|
|
2021
2027
|
Return in the following JSON format:
|
|
2022
2028
|
{
|
|
2023
2029
|
data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.
|
|
@@ -2158,6 +2164,55 @@ var sectionLocatorInstruction = new PromptTemplate4({
|
|
|
2158
2164
|
// src/ai-model/inspect.ts
|
|
2159
2165
|
var debugInspect = getDebug3("ai:inspect");
|
|
2160
2166
|
var debugSection = getDebug3("ai:section");
|
|
2167
|
+
var extraTextFromUserPrompt = (prompt) => {
|
|
2168
|
+
if (typeof prompt === "string") {
|
|
2169
|
+
return prompt;
|
|
2170
|
+
} else {
|
|
2171
|
+
return prompt.prompt;
|
|
2172
|
+
}
|
|
2173
|
+
};
|
|
2174
|
+
var promptsToChatParam = async (multimodalPrompt) => {
|
|
2175
|
+
const msgs = [];
|
|
2176
|
+
if (multimodalPrompt?.images?.length) {
|
|
2177
|
+
msgs.push({
|
|
2178
|
+
role: "user",
|
|
2179
|
+
content: [
|
|
2180
|
+
{
|
|
2181
|
+
type: "text",
|
|
2182
|
+
text: "Next, I will provide all the reference images."
|
|
2183
|
+
}
|
|
2184
|
+
]
|
|
2185
|
+
});
|
|
2186
|
+
for (const item of multimodalPrompt.images) {
|
|
2187
|
+
const base64 = await preProcessImageUrl(
|
|
2188
|
+
item.url,
|
|
2189
|
+
!!multimodalPrompt.convertHttpImage2Base64
|
|
2190
|
+
);
|
|
2191
|
+
msgs.push({
|
|
2192
|
+
role: "user",
|
|
2193
|
+
content: [
|
|
2194
|
+
{
|
|
2195
|
+
type: "text",
|
|
2196
|
+
text: `reference image ${item.name}:`
|
|
2197
|
+
}
|
|
2198
|
+
]
|
|
2199
|
+
});
|
|
2200
|
+
msgs.push({
|
|
2201
|
+
role: "user",
|
|
2202
|
+
content: [
|
|
2203
|
+
{
|
|
2204
|
+
type: "image_url",
|
|
2205
|
+
image_url: {
|
|
2206
|
+
url: base64,
|
|
2207
|
+
detail: "high"
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
]
|
|
2211
|
+
});
|
|
2212
|
+
}
|
|
2213
|
+
}
|
|
2214
|
+
return msgs;
|
|
2215
|
+
};
|
|
2161
2216
|
async function AiLocateElement(options) {
|
|
2162
2217
|
const { context, targetElementDescription, callAI } = options;
|
|
2163
2218
|
const { screenshotBase64 } = context;
|
|
@@ -2168,7 +2223,7 @@ async function AiLocateElement(options) {
|
|
|
2168
2223
|
);
|
|
2169
2224
|
const userInstructionPrompt = await findElementPrompt.format({
|
|
2170
2225
|
pageDescription: description,
|
|
2171
|
-
targetElementDescription
|
|
2226
|
+
targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
|
|
2172
2227
|
});
|
|
2173
2228
|
const systemPrompt = systemPromptToLocateElement(vlLocateMode4());
|
|
2174
2229
|
let imagePayload = screenshotBase64;
|
|
@@ -2210,6 +2265,13 @@ async function AiLocateElement(options) {
|
|
|
2210
2265
|
]
|
|
2211
2266
|
}
|
|
2212
2267
|
];
|
|
2268
|
+
if (typeof targetElementDescription !== "string") {
|
|
2269
|
+
const addOns = await promptsToChatParam({
|
|
2270
|
+
images: targetElementDescription.images,
|
|
2271
|
+
convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64
|
|
2272
|
+
});
|
|
2273
|
+
msgs.push(...addOns);
|
|
2274
|
+
}
|
|
2213
2275
|
const callAIFn = callAI || callToGetJSONObject;
|
|
2214
2276
|
const res = await callAIFn(msgs, 1 /* INSPECT_ELEMENT */);
|
|
2215
2277
|
const rawResponse = JSON.stringify(res.content);
|
|
@@ -2265,7 +2327,7 @@ async function AiLocateSection(options) {
|
|
|
2265
2327
|
const { screenshotBase64 } = context;
|
|
2266
2328
|
const systemPrompt = systemPromptToLocateSection(vlLocateMode4());
|
|
2267
2329
|
const sectionLocatorInstructionText = await sectionLocatorInstruction.format({
|
|
2268
|
-
sectionDescription
|
|
2330
|
+
sectionDescription: extraTextFromUserPrompt(sectionDescription)
|
|
2269
2331
|
});
|
|
2270
2332
|
const msgs = [
|
|
2271
2333
|
{ role: "system", content: systemPrompt },
|
|
@@ -2286,6 +2348,13 @@ async function AiLocateSection(options) {
|
|
|
2286
2348
|
]
|
|
2287
2349
|
}
|
|
2288
2350
|
];
|
|
2351
|
+
if (typeof sectionDescription !== "string") {
|
|
2352
|
+
const addOns = await promptsToChatParam({
|
|
2353
|
+
images: sectionDescription.images,
|
|
2354
|
+
convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64
|
|
2355
|
+
});
|
|
2356
|
+
msgs.push(...addOns);
|
|
2357
|
+
}
|
|
2289
2358
|
const result = await callAiFn(
|
|
2290
2359
|
msgs,
|
|
2291
2360
|
2 /* EXTRACT_DATA */
|
|
@@ -2327,7 +2396,7 @@ async function AiLocateSection(options) {
|
|
|
2327
2396
|
};
|
|
2328
2397
|
}
|
|
2329
2398
|
async function AiExtractElementInfo(options) {
|
|
2330
|
-
const { dataQuery, context, extractOption } = options;
|
|
2399
|
+
const { dataQuery, context, extractOption, multimodalPrompt } = options;
|
|
2331
2400
|
const systemPrompt = systemPromptToExtract();
|
|
2332
2401
|
const { screenshotBase64 } = context;
|
|
2333
2402
|
const { description, elementById } = await describeUserPage(context, {
|
|
@@ -2361,6 +2430,13 @@ async function AiExtractElementInfo(options) {
|
|
|
2361
2430
|
content: userContent
|
|
2362
2431
|
}
|
|
2363
2432
|
];
|
|
2433
|
+
if (multimodalPrompt) {
|
|
2434
|
+
const addOns = await promptsToChatParam({
|
|
2435
|
+
images: multimodalPrompt.images,
|
|
2436
|
+
convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64
|
|
2437
|
+
});
|
|
2438
|
+
msgs.push(...addOns);
|
|
2439
|
+
}
|
|
2364
2440
|
const result = await callAiFn(
|
|
2365
2441
|
msgs,
|
|
2366
2442
|
2 /* EXTRACT_DATA */
|
|
@@ -2373,11 +2449,12 @@ async function AiExtractElementInfo(options) {
|
|
|
2373
2449
|
}
|
|
2374
2450
|
async function AiAssert(options) {
|
|
2375
2451
|
const { assertion, context } = options;
|
|
2376
|
-
assert4(assertion, "assertion should be
|
|
2452
|
+
assert4(assertion, "assertion should not be empty");
|
|
2377
2453
|
const { screenshotBase64 } = context;
|
|
2378
2454
|
const systemPrompt = systemPromptToAssert({
|
|
2379
2455
|
isUITars: getAIConfigInBoolean2(MIDSCENE_USE_VLM_UI_TARS)
|
|
2380
2456
|
});
|
|
2457
|
+
const assertionText = extraTextFromUserPrompt(assertion);
|
|
2381
2458
|
const msgs = [
|
|
2382
2459
|
{ role: "system", content: systemPrompt },
|
|
2383
2460
|
{
|
|
@@ -2395,13 +2472,20 @@ async function AiAssert(options) {
|
|
|
2395
2472
|
text: `
|
|
2396
2473
|
Here is the assertion. Please tell whether it is truthy according to the screenshot.
|
|
2397
2474
|
=====================================
|
|
2398
|
-
${
|
|
2475
|
+
${assertionText}
|
|
2399
2476
|
=====================================
|
|
2400
2477
|
`
|
|
2401
2478
|
}
|
|
2402
2479
|
]
|
|
2403
2480
|
}
|
|
2404
2481
|
];
|
|
2482
|
+
if (typeof assertion !== "string") {
|
|
2483
|
+
const addOns = await promptsToChatParam({
|
|
2484
|
+
images: assertion.images,
|
|
2485
|
+
convertHttpImage2Base64: assertion.convertHttpImage2Base64
|
|
2486
|
+
});
|
|
2487
|
+
msgs.push(...addOns);
|
|
2488
|
+
}
|
|
2405
2489
|
const { content: assertResult, usage } = await callAiFn(
|
|
2406
2490
|
msgs,
|
|
2407
2491
|
0 /* ASSERT */
|
|
@@ -2824,4 +2908,4 @@ export {
|
|
|
2824
2908
|
resizeImageForUiTars
|
|
2825
2909
|
};
|
|
2826
2910
|
|
|
2827
|
-
//# sourceMappingURL=chunk-
|
|
2911
|
+
//# sourceMappingURL=chunk-KFA65L55.js.map
|