@midscene/core 1.4.5 → 1.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +4 -56
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/task-builder.mjs +4 -1
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +91 -18
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/auto-glm/planning.mjs +1 -1
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
- package/dist/es/ai-model/inspect.mjs +5 -5
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +3 -3
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +73 -47
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +14 -14
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/common.mjs +2 -15
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/dump/html-utils.mjs +2 -1
- package/dist/es/dump/html-utils.mjs.map +1 -1
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/report-generator.mjs +2 -2
- package/dist/es/report-generator.mjs.map +1 -1
- package/dist/es/report.mjs +39 -7
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/service/index.mjs +6 -6
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +4 -56
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/task-builder.js +4 -1
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/utils.js +96 -14
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/auto-glm/planning.js +1 -1
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +5 -5
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +3 -3
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +75 -49
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +14 -14
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/common.js +2 -15
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/dump/html-utils.js +4 -0
- package/dist/lib/dump/html-utils.js.map +1 -1
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/report-generator.js +1 -1
- package/dist/lib/report-generator.js.map +1 -1
- package/dist/lib/report.js +36 -4
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/service/index.js +6 -6
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/agent.d.ts +1 -15
- package/dist/types/agent/utils.d.ts +13 -1
- package/dist/types/ai-model/index.d.ts +1 -1
- package/dist/types/ai-model/service-caller/index.d.ts +4 -2
- package/dist/types/common.d.ts +0 -310
- package/dist/types/device/index.d.ts +1 -2
- package/dist/types/dump/html-utils.d.ts +11 -0
- package/dist/types/index.d.ts +1 -1
- package/dist/types/report.d.ts +5 -0
- package/dist/types/types.d.ts +2 -2
- package/package.json +2 -2
package/dist/es/agent/utils.mjs
CHANGED
|
@@ -2,31 +2,60 @@ import { ScreenshotItem } from "../screenshot-item.mjs";
|
|
|
2
2
|
import { uploadTestInfoToServer } from "../utils.mjs";
|
|
3
3
|
import { MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, globalConfigManager } from "@midscene/shared/env";
|
|
4
4
|
import { generateElementByRect } from "@midscene/shared/extractor";
|
|
5
|
+
import { imageInfoOfBase64, resizeImgBase64 } from "@midscene/shared/img";
|
|
5
6
|
import { getDebug } from "@midscene/shared/logger";
|
|
6
7
|
import { assert, logMsg, uuid } from "@midscene/shared/utils";
|
|
7
8
|
import dayjs from "dayjs";
|
|
8
|
-
import { debug } from "./task-cache.mjs";
|
|
9
|
-
const debugProfile = getDebug('web:tool:profile');
|
|
9
|
+
import { debug as external_task_cache_mjs_debug } from "./task-cache.mjs";
|
|
10
10
|
async function commonContextParser(interfaceInstance, _opt) {
|
|
11
|
+
const debug = getDebug('commonContextParser');
|
|
11
12
|
assert(interfaceInstance, 'interfaceInstance is required');
|
|
12
|
-
|
|
13
|
+
debug("Getting interface description");
|
|
13
14
|
const description = interfaceInstance.describe?.() || '';
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
debug("Interface description end");
|
|
16
|
+
debug('Uploading test info to server');
|
|
16
17
|
uploadTestInfoToServer({
|
|
17
18
|
testUrl: description,
|
|
18
19
|
serverUrl: _opt.uploadServerUrl
|
|
19
20
|
});
|
|
20
|
-
|
|
21
|
+
debug('UploadTestInfoToServer end');
|
|
22
|
+
debug('will get size');
|
|
23
|
+
const interfaceSize = await interfaceInstance.size();
|
|
24
|
+
const { width: logicalWidth, height: logicalHeight } = interfaceSize;
|
|
25
|
+
if (interfaceSize.dpr) console.warn('Warning: return value of interface.size() include a dpr property, which is not expected and ignored. ');
|
|
26
|
+
if (!Number.isFinite(logicalWidth) || !Number.isFinite(logicalHeight)) throw new Error(`Invalid interface size: width and height must be finite numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`);
|
|
27
|
+
debug(`size: ${logicalWidth}x${logicalHeight}`);
|
|
21
28
|
const screenshotBase64 = await interfaceInstance.screenshotBase64();
|
|
22
29
|
assert(screenshotBase64, 'screenshotBase64 is required');
|
|
23
|
-
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
const
|
|
30
|
+
debug('will get screenshot dimensions');
|
|
31
|
+
const { width: imgWidth, height: imgHeight } = await imageInfoOfBase64(screenshotBase64);
|
|
32
|
+
debug('screenshot dimensions', imgWidth, 'x', imgHeight);
|
|
33
|
+
const shrinkFactor = imgWidth / logicalWidth;
|
|
34
|
+
debug('calculated shrink factor:', shrinkFactor);
|
|
35
|
+
if (1 !== shrinkFactor) {
|
|
36
|
+
const targetWidth = Math.round(imgWidth / shrinkFactor);
|
|
37
|
+
const targetHeight = Math.round(imgHeight / shrinkFactor);
|
|
38
|
+
debug(`Applying screenshot shrink factor: ${shrinkFactor} (physical: ${imgWidth}x${imgHeight} -> target: ${targetWidth}x${targetHeight})`);
|
|
39
|
+
const resizedBase64 = await resizeImgBase64(screenshotBase64, {
|
|
40
|
+
width: targetWidth,
|
|
41
|
+
height: targetHeight
|
|
42
|
+
});
|
|
43
|
+
return {
|
|
44
|
+
shotSize: {
|
|
45
|
+
width: targetWidth,
|
|
46
|
+
height: targetHeight
|
|
47
|
+
},
|
|
48
|
+
deprecatedDpr: shrinkFactor,
|
|
49
|
+
screenshot: ScreenshotItem.create(resizedBase64)
|
|
50
|
+
};
|
|
51
|
+
}
|
|
27
52
|
return {
|
|
28
|
-
|
|
29
|
-
|
|
53
|
+
shotSize: {
|
|
54
|
+
width: imgWidth,
|
|
55
|
+
height: imgHeight
|
|
56
|
+
},
|
|
57
|
+
deprecatedDpr: 1,
|
|
58
|
+
screenshot: ScreenshotItem.create(screenshotBase64)
|
|
30
59
|
};
|
|
31
60
|
}
|
|
32
61
|
function getReportFileName(tag = 'web') {
|
|
@@ -86,9 +115,9 @@ function matchElementFromPlan(planLocateParam) {
|
|
|
86
115
|
}
|
|
87
116
|
async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable) {
|
|
88
117
|
if (!cacheEntry) return;
|
|
89
|
-
if (false === cacheable) return void
|
|
118
|
+
if (false === cacheable) return void external_task_cache_mjs_debug('cache disabled for prompt: %s', cachePrompt);
|
|
90
119
|
if (!context.taskCache?.isCacheResultUsed) return;
|
|
91
|
-
if (!context.interfaceInstance.rectMatchesCacheFeature) return void
|
|
120
|
+
if (!context.interfaceInstance.rectMatchesCacheFeature) return void external_task_cache_mjs_debug('interface does not implement rectMatchesCacheFeature, skip cache');
|
|
92
121
|
try {
|
|
93
122
|
const rect = await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);
|
|
94
123
|
const element = {
|
|
@@ -99,14 +128,14 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
99
128
|
rect,
|
|
100
129
|
description: 'string' == typeof cachePrompt ? cachePrompt : cachePrompt.prompt || ''
|
|
101
130
|
};
|
|
102
|
-
|
|
131
|
+
external_task_cache_mjs_debug('cache hit, prompt: %s', cachePrompt);
|
|
103
132
|
return element;
|
|
104
133
|
} catch (error) {
|
|
105
|
-
|
|
134
|
+
external_task_cache_mjs_debug('rectMatchesCacheFeature error: %s', error);
|
|
106
135
|
return;
|
|
107
136
|
}
|
|
108
137
|
}
|
|
109
|
-
const getMidsceneVersion = ()=>"1.4.
|
|
138
|
+
const getMidsceneVersion = ()=>"1.4.7";
|
|
110
139
|
const parsePrompt = (prompt)=>{
|
|
111
140
|
if ('string' == typeof prompt) return {
|
|
112
141
|
textPrompt: prompt,
|
|
@@ -120,6 +149,50 @@ const parsePrompt = (prompt)=>{
|
|
|
120
149
|
} : void 0
|
|
121
150
|
};
|
|
122
151
|
};
|
|
123
|
-
|
|
152
|
+
const transformScreenshotElementToLogical = (element, shrunkShotToLogicalRatio)=>{
|
|
153
|
+
if (1 === shrunkShotToLogicalRatio) return element;
|
|
154
|
+
return {
|
|
155
|
+
...element,
|
|
156
|
+
center: [
|
|
157
|
+
Math.round(element.center[0] / shrunkShotToLogicalRatio),
|
|
158
|
+
Math.round(element.center[1] / shrunkShotToLogicalRatio)
|
|
159
|
+
],
|
|
160
|
+
rect: {
|
|
161
|
+
...element.rect,
|
|
162
|
+
left: Math.round(element.rect.left / shrunkShotToLogicalRatio),
|
|
163
|
+
top: Math.round(element.rect.top / shrunkShotToLogicalRatio),
|
|
164
|
+
width: Math.round(element.rect.width / shrunkShotToLogicalRatio),
|
|
165
|
+
height: Math.round(element.rect.height / shrunkShotToLogicalRatio)
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
};
|
|
169
|
+
const transformLogicalElementToScreenshot = (element, shrunkShotToLogicalRatio)=>{
|
|
170
|
+
if (1 === shrunkShotToLogicalRatio) return element;
|
|
171
|
+
return {
|
|
172
|
+
...element,
|
|
173
|
+
center: [
|
|
174
|
+
Math.round(element.center[0] * shrunkShotToLogicalRatio),
|
|
175
|
+
Math.round(element.center[1] * shrunkShotToLogicalRatio)
|
|
176
|
+
],
|
|
177
|
+
rect: {
|
|
178
|
+
...element.rect,
|
|
179
|
+
left: Math.round(element.rect.left * shrunkShotToLogicalRatio),
|
|
180
|
+
top: Math.round(element.rect.top * shrunkShotToLogicalRatio),
|
|
181
|
+
width: Math.round(element.rect.width * shrunkShotToLogicalRatio),
|
|
182
|
+
height: Math.round(element.rect.height * shrunkShotToLogicalRatio)
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
};
|
|
186
|
+
const transformLogicalRectToScreenshotRect = (rect, shrunkShotToLogicalRatio)=>{
|
|
187
|
+
if (1 === shrunkShotToLogicalRatio) return rect;
|
|
188
|
+
return {
|
|
189
|
+
...rect,
|
|
190
|
+
left: Math.round(rect.left * shrunkShotToLogicalRatio),
|
|
191
|
+
top: Math.round(rect.top * shrunkShotToLogicalRatio),
|
|
192
|
+
width: Math.round(rect.width * shrunkShotToLogicalRatio),
|
|
193
|
+
height: Math.round(rect.height * shrunkShotToLogicalRatio)
|
|
194
|
+
};
|
|
195
|
+
};
|
|
196
|
+
export { commonContextParser, generateCacheId, getCurrentExecutionFile, getMidsceneVersion, getReportFileName, ifPlanLocateParamIsBbox, matchElementFromCache, matchElementFromPlan, parsePrompt, printReportMsg, transformLogicalElementToScreenshot, transformLogicalRectToScreenshotRect, transformScreenshotElementToLogical };
|
|
124
197
|
|
|
125
198
|
//# sourceMappingURL=utils.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/utils.mjs","sources":["../../../src/agent/utils.ts"],"sourcesContent":["import type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport { ScreenshotItem } from '@/screenshot-item';\nimport type {\n ElementCacheFeature,\n LocateResultElement,\n PlanningLocateParam,\n UIContext,\n} from '@/types';\nimport { uploadTestInfoToServer } from '@/utils';\nimport {\n MIDSCENE_REPORT_QUIET,\n MIDSCENE_REPORT_TAG_NAME,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { assert, logMsg, uuid } from '@midscene/shared/utils';\nimport dayjs from 'dayjs';\nimport type { TaskCache } from './task-cache';\nimport { debug as cacheDebug } from './task-cache';\n\nconst debugProfile = getDebug('web:tool:profile');\n\nexport async function commonContextParser(\n interfaceInstance: AbstractInterface,\n _opt: { uploadServerUrl?: string },\n): Promise<UIContext> {\n assert(interfaceInstance, 'interfaceInstance is required');\n\n debugProfile('Getting interface description');\n const description = interfaceInstance.describe?.() || '';\n debugProfile('Interface description end');\n\n debugProfile('Uploading test info to server');\n uploadTestInfoToServer({\n testUrl: description,\n serverUrl: _opt.uploadServerUrl,\n });\n debugProfile('UploadTestInfoToServer end');\n\n const screenshotBase64 = await interfaceInstance.screenshotBase64();\n assert(screenshotBase64!, 'screenshotBase64 is required');\n\n debugProfile('will get size');\n const size = await interfaceInstance.size();\n debugProfile(`size: ${size.width}x${size.height} dpr: ${size.dpr}`);\n\n const screenshot = ScreenshotItem.create(screenshotBase64!);\n\n return {\n size,\n screenshot,\n };\n}\n\nexport function getReportFileName(tag = 'web') {\n const reportTagName = globalConfigManager.getEnvConfigValue(\n MIDSCENE_REPORT_TAG_NAME,\n );\n const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');\n // ensure uniqueness at the same time\n const uniqueId = uuid().substring(0, 8);\n return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;\n}\n\nexport function printReportMsg(filepath: string) {\n if (globalConfigManager.getEnvConfigInBoolean(MIDSCENE_REPORT_QUIET)) {\n return;\n }\n logMsg(`Midscene - report file updated: ${filepath}`);\n}\n\n/**\n * Get the current execution file name\n * @returns The name of the current execution file\n */\nexport function getCurrentExecutionFile(trace?: string): string | false {\n const error = new Error();\n const stackTrace = trace || error.stack;\n const pkgDir = process.cwd() || '';\n if (stackTrace) {\n const stackLines = stackTrace.split('\\n');\n for (const line of stackLines) {\n if (\n line.includes('.spec.') ||\n line.includes('.test.') ||\n line.includes('.ts') ||\n line.includes('.js')\n ) {\n const match = line.match(/(?:at\\s+)?(.*?\\.(?:spec|test)\\.[jt]s)/);\n if (match?.[1]) {\n const targetFileName = match[1]\n .replace(pkgDir, '')\n .trim()\n .replace('at ', '');\n return targetFileName;\n }\n }\n }\n }\n return false;\n}\n\nconst testFileIndex = new Map<string, number>();\n\nexport function generateCacheId(fileName?: string): string {\n let taskFile = fileName || getCurrentExecutionFile();\n if (!taskFile) {\n taskFile = uuid();\n console.warn(\n 'Midscene - using random UUID for cache id. Cache may be invalid.',\n );\n }\n\n if (testFileIndex.has(taskFile)) {\n const currentIndex = testFileIndex.get(taskFile);\n if (currentIndex !== undefined) {\n testFileIndex.set(taskFile, currentIndex + 1);\n }\n } else {\n testFileIndex.set(taskFile, 1);\n }\n return `${taskFile}-${testFileIndex.get(taskFile)}`;\n}\n\nexport function ifPlanLocateParamIsBbox(\n planLocateParam: PlanningLocateParam,\n): boolean {\n return !!(\n planLocateParam.bbox &&\n Array.isArray(planLocateParam.bbox) &&\n planLocateParam.bbox.length === 4\n );\n}\n\nexport function matchElementFromPlan(\n planLocateParam: PlanningLocateParam,\n): LocateResultElement | undefined {\n if (!planLocateParam) {\n return undefined;\n }\n\n if (planLocateParam.bbox) {\n // Convert bbox [x1, y1, x2, y2] to rect {left, top, width, height}\n const rect = {\n left: planLocateParam.bbox[0],\n top: planLocateParam.bbox[1],\n width: planLocateParam.bbox[2] - planLocateParam.bbox[0] + 1,\n height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1,\n };\n\n const element = generateElementByRect(\n rect,\n typeof planLocateParam.prompt === 'string'\n ? planLocateParam.prompt\n : planLocateParam.prompt?.prompt || '',\n );\n return element;\n }\n\n return undefined;\n}\n\nexport async function matchElementFromCache(\n context: {\n taskCache?: TaskCache;\n interfaceInstance: AbstractInterface;\n },\n cacheEntry: ElementCacheFeature | undefined,\n cachePrompt: TUserPrompt,\n cacheable: boolean | undefined,\n): Promise<LocateResultElement | undefined> {\n if (!cacheEntry) {\n return undefined;\n }\n\n if (cacheable === false) {\n cacheDebug('cache disabled for prompt: %s', cachePrompt);\n return undefined;\n }\n\n if (!context.taskCache?.isCacheResultUsed) {\n return undefined;\n }\n\n if (!context.interfaceInstance.rectMatchesCacheFeature) {\n cacheDebug(\n 'interface does not implement rectMatchesCacheFeature, skip cache',\n );\n return undefined;\n }\n\n try {\n const rect =\n await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);\n const element: LocateResultElement = {\n center: [\n Math.round(rect.left + rect.width / 2),\n Math.round(rect.top + rect.height / 2),\n ],\n rect,\n description:\n typeof cachePrompt === 'string'\n ? cachePrompt\n : cachePrompt.prompt || '',\n };\n\n cacheDebug('cache hit, prompt: %s', cachePrompt);\n return element;\n } catch (error) {\n cacheDebug('rectMatchesCacheFeature error: %s', error);\n return undefined;\n }\n}\n\ndeclare const __VERSION__: string | undefined;\n\nexport const getMidsceneVersion = (): string => {\n if (typeof __VERSION__ !== 'undefined') {\n return __VERSION__;\n } else if (\n process.env.__VERSION__ &&\n process.env.__VERSION__ !== 'undefined'\n ) {\n return process.env.__VERSION__;\n }\n throw new Error('__VERSION__ inject failed during build');\n};\n\nexport const parsePrompt = (\n prompt: TUserPrompt,\n): {\n textPrompt: string;\n multimodalPrompt?: TMultimodalPrompt;\n} => {\n if (typeof prompt === 'string') {\n return {\n textPrompt: prompt,\n multimodalPrompt: undefined,\n };\n }\n return {\n textPrompt: prompt.prompt,\n multimodalPrompt: prompt.images\n ? {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n }\n : undefined,\n };\n};\n"],"names":["debugProfile","getDebug","commonContextParser","interfaceInstance","_opt","assert","description","uploadTestInfoToServer","screenshotBase64","size","screenshot","ScreenshotItem","getReportFileName","tag","reportTagName","globalConfigManager","MIDSCENE_REPORT_TAG_NAME","dateTimeInFileName","dayjs","uniqueId","uuid","printReportMsg","filepath","MIDSCENE_REPORT_QUIET","logMsg","getCurrentExecutionFile","trace","error","Error","stackTrace","pkgDir","process","stackLines","line","match","targetFileName","testFileIndex","Map","generateCacheId","fileName","taskFile","console","currentIndex","undefined","ifPlanLocateParamIsBbox","planLocateParam","Array","matchElementFromPlan","rect","element","generateElementByRect","matchElementFromCache","context","cacheEntry","cachePrompt","cacheable","cacheDebug","Math","getMidsceneVersion","__VERSION__","parsePrompt","prompt"],"mappings":";;;;;;;;AAuBA,MAAMA,eAAeC,SAAS;AAEvB,eAAeC,oBACpBC,iBAAoC,EACpCC,IAAkC;IAElCC,OAAOF,mBAAmB;IAE1BH,aAAa;IACb,MAAMM,cAAcH,kBAAkB,QAAQ,QAAQ;IACtDH,aAAa;IAEbA,aAAa;IACbO,uBAAuB;QACrB,SAASD;QACT,WAAWF,KAAK,eAAe;IACjC;IACAJ,aAAa;IAEb,MAAMQ,mBAAmB,MAAML,kBAAkB,gBAAgB;IACjEE,OAAOG,kBAAmB;IAE1BR,aAAa;IACb,MAAMS,OAAO,MAAMN,kBAAkB,IAAI;IACzCH,aAAa,CAAC,MAAM,EAAES,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,MAAM,EAAEA,KAAK,GAAG,EAAE;IAElE,MAAMC,aAAaC,eAAe,MAAM,CAACH;IAEzC,OAAO;QACLC;QACAC;IACF;AACF;AAEO,SAASE,kBAAkBC,MAAM,KAAK;IAC3C,MAAMC,gBAAgBC,oBAAoB,iBAAiB,CACzDC;IAEF,MAAMC,qBAAqBC,QAAQ,MAAM,CAAC;IAE1C,MAAMC,WAAWC,OAAO,SAAS,CAAC,GAAG;IACrC,OAAO,GAAGN,iBAAiBD,IAAI,CAAC,EAAEI,mBAAmB,CAAC,EAAEE,UAAU;AACpE;AAEO,SAASE,eAAeC,QAAgB;IAC7C,IAAIP,oBAAoB,qBAAqB,CAACQ,wBAC5C;IAEFC,OAAO,CAAC,gCAAgC,EAAEF,UAAU;AACtD;AAMO,SAASG,wBAAwBC,KAAc;IACpD,MAAMC,QAAQ,IAAIC;IAClB,MAAMC,aAAaH,SAASC,MAAM,KAAK;IACvC,MAAMG,SAASC,QAAQ,GAAG,MAAM;IAChC,IAAIF,YAAY;QACd,MAAMG,aAAaH,WAAW,KAAK,CAAC;QACpC,KAAK,MAAMI,QAAQD,WACjB,IACEC,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,UACdA,KAAK,QAAQ,CAAC,QACd;YACA,MAAMC,QAAQD,KAAK,KAAK,CAAC;YACzB,IAAIC,OAAO,CAAC,EAAE,EAAE;gBACd,MAAMC,iBAAiBD,KAAK,CAAC,EAAE,CAC5B,OAAO,CAACJ,QAAQ,IAChB,IAAI,GACJ,OAAO,CAAC,OAAO;gBAClB,OAAOK;YACT;QACF;IAEJ;IACA,OAAO;AACT;AAEA,MAAMC,gBAAgB,IAAIC;AAEnB,SAASC,gBAAgBC,QAAiB;IAC/C,IAAIC,WAAWD,YAAYd;IAC3B,IAAI,CAACe,UAAU;QACbA,WAAWpB;QACXqB,QAAQ,IAAI,CACV;IAEJ;IAEA,IAAIL,cAAc,GAAG,CAACI,WAAW;QAC/B,MAAME,eAAeN,cAAc,GAAG,CAACI;QACvC,IAAIE,AAAiBC,WAAjBD,cACFN,cAAc,GAAG,CAACI,UAAUE,eAAe;IAE/C,OACEN,cAAc,GAAG,CAACI,UAAU;IAE9B,OAAO,GAAGA,SAAS,CAAC,EAAEJ,cAAc,GAAG,CAACI,WAAW;AACrD;AAEO,SAASI,wBACdC,eAAoC;IAEpC,OAAO,CAAC,CACNA,CAAAA,gBAAgB,IAAI,IACpBC,MAAM,OAAO,CAACD,gBAAgB,IAAI,KAClCA,AAAgC,MAAhCA,gBAAgB,IAAI,CAAC,MAAM,AAAK;AAEpC;AAEO,SAASE,qBACdF,eAAoC;IAEpC,IAAI,CAACA,iBACH;IAGF,IAAIA,gBAAgB,IAAI,EAAE;QAExB,MAAMG,OAAO;YACX,MAAMH,gBAAgB,IAAI,CAAC,EAAE;YAC7B,KAAKA,gBAAgB,IAAI,CAAC,EAAE;YAC5B,OAAOA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;YAC3D,QAAQA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;QAC9D;QAEA,MAAMI,UAAUC,sBACdF,MACA,AAAkC,YAAlC,OAAOH,gBAAgB,MAAM,GACzBA,gBAAgB,MAAM,GACtBA,gBAAgB,MAAM,EAAE,UAAU;QAExC,OAAOI;IACT;AAGF;AAEO,eAAeE,sBACpBC,OAGC,EACDC,UAA2C,EAC3CC,WAAwB,EACxBC,SAA8B;IAE9B,IAAI,CAACF,YACH;IAGF,IAAIE,AAAc,UAAdA,WAAqB,YACvBC,MAAW,iCAAiCF;IAI9C,IAAI,CAACF,QAAQ,SAAS,EAAE,mBACtB;IAGF,IAAI,CAACA,QAAQ,iBAAiB,CAAC,uBAAuB,EAAE,YACtDI,MACE;IAKJ,IAAI;QACF,MAAMR,OACJ,MAAMI,QAAQ,iBAAiB,CAAC,uBAAuB,CAACC;QAC1D,MAAMJ,UAA+B;YACnC,QAAQ;gBACNQ,KAAK,KAAK,CAACT,KAAK,IAAI,GAAGA,KAAK,KAAK,GAAG;gBACpCS,KAAK,KAAK,CAACT,KAAK,GAAG,GAAGA,KAAK,MAAM,GAAG;aACrC;YACDA;YACA,aACE,AAAuB,YAAvB,OAAOM,cACHA,cACAA,YAAY,MAAM,IAAI;QAC9B;QAEAE,MAAW,yBAAyBF;QACpC,OAAOL;IACT,EAAE,OAAOtB,OAAO;QACd6B,MAAW,qCAAqC7B;QAChD;IACF;AACF;AAIO,MAAM+B,qBAAqB,IAEvBC;AAUJ,MAAMC,cAAc,CACzBC;IAKA,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAO;QACL,YAAYA;QACZ,kBAAkBlB;IACpB;IAEF,OAAO;QACL,YAAYkB,OAAO,MAAM;QACzB,kBAAkBA,OAAO,MAAM,GAC3B;YACE,QAAQA,OAAO,MAAM;YACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;QAC3D,IACAlB;IACN;AACF"}
|
|
1
|
+
{"version":3,"file":"agent/utils.mjs","sources":["../../../src/agent/utils.ts"],"sourcesContent":["import type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport { ScreenshotItem } from '@/screenshot-item';\nimport type {\n ElementCacheFeature,\n LocateResultElement,\n PlanningLocateParam,\n Rect,\n UIContext,\n} from '@/types';\nimport { uploadTestInfoToServer } from '@/utils';\nimport {\n MIDSCENE_REPORT_QUIET,\n MIDSCENE_REPORT_TAG_NAME,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { imageInfoOfBase64, resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { assert, logMsg, uuid } from '@midscene/shared/utils';\nimport dayjs from 'dayjs';\nimport type { TaskCache } from './task-cache';\nimport { debug as cacheDebug } from './task-cache';\n\nexport async function commonContextParser(\n interfaceInstance: AbstractInterface,\n _opt: { uploadServerUrl?: string },\n): Promise<UIContext> {\n const debug = getDebug('commonContextParser');\n\n assert(interfaceInstance, 'interfaceInstance is required');\n\n debug('Getting interface description');\n const description = interfaceInstance.describe?.() || '';\n debug('Interface description end');\n\n debug('Uploading test info to server');\n uploadTestInfoToServer({\n testUrl: description,\n serverUrl: _opt.uploadServerUrl,\n });\n debug('UploadTestInfoToServer end');\n\n debug('will get size');\n const interfaceSize = await interfaceInstance.size();\n const { width: logicalWidth, height: logicalHeight } = interfaceSize;\n\n if ((interfaceSize as unknown as { dpr: number }).dpr) {\n console.warn(\n 'Warning: return value of interface.size() include a dpr property, which is not expected and ignored. ',\n );\n }\n\n if (!Number.isFinite(logicalWidth) || !Number.isFinite(logicalHeight)) {\n throw new Error(\n `Invalid interface size: width and height must be finite numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`,\n );\n }\n debug(`size: ${logicalWidth}x${logicalHeight}`);\n\n const screenshotBase64 = await interfaceInstance.screenshotBase64();\n assert(screenshotBase64!, 'screenshotBase64 is required');\n\n // Get physical screenshot dimensions\n debug('will get screenshot dimensions');\n const { width: imgWidth, height: imgHeight } =\n await imageInfoOfBase64(screenshotBase64);\n debug('screenshot dimensions', imgWidth, 'x', imgHeight);\n\n const shrinkFactor = imgWidth / logicalWidth;\n\n debug('calculated shrink factor:', shrinkFactor);\n\n if (shrinkFactor !== 1) {\n const targetWidth = Math.round(imgWidth / shrinkFactor);\n const targetHeight = Math.round(imgHeight / shrinkFactor);\n\n debug(\n `Applying screenshot shrink factor: ${shrinkFactor} (physical: ${imgWidth}x${imgHeight} -> target: ${targetWidth}x${targetHeight})`,\n );\n\n const resizedBase64 = await resizeImgBase64(screenshotBase64, {\n width: targetWidth,\n height: targetHeight,\n });\n return {\n shotSize: {\n width: targetWidth,\n height: targetHeight,\n },\n deprecatedDpr: shrinkFactor,\n screenshot: ScreenshotItem.create(resizedBase64),\n };\n }\n return {\n shotSize: {\n width: imgWidth,\n height: imgHeight,\n },\n deprecatedDpr: 1,\n screenshot: ScreenshotItem.create(screenshotBase64),\n };\n}\n\nexport function getReportFileName(tag = 'web') {\n const reportTagName = globalConfigManager.getEnvConfigValue(\n MIDSCENE_REPORT_TAG_NAME,\n );\n const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');\n // ensure uniqueness at the same time\n const uniqueId = uuid().substring(0, 8);\n return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;\n}\n\nexport function printReportMsg(filepath: string) {\n if (globalConfigManager.getEnvConfigInBoolean(MIDSCENE_REPORT_QUIET)) {\n return;\n }\n logMsg(`Midscene - report file updated: ${filepath}`);\n}\n\n/**\n * Get the current execution file name\n * @returns The name of the current execution file\n */\nexport function getCurrentExecutionFile(trace?: string): string | false {\n const error = new Error();\n const stackTrace = trace || error.stack;\n const pkgDir = process.cwd() || '';\n if (stackTrace) {\n const stackLines = stackTrace.split('\\n');\n for (const line of stackLines) {\n if (\n line.includes('.spec.') ||\n line.includes('.test.') ||\n line.includes('.ts') ||\n line.includes('.js')\n ) {\n const match = line.match(/(?:at\\s+)?(.*?\\.(?:spec|test)\\.[jt]s)/);\n if (match?.[1]) {\n const targetFileName = match[1]\n .replace(pkgDir, '')\n .trim()\n .replace('at ', '');\n return targetFileName;\n }\n }\n }\n }\n return false;\n}\n\nconst testFileIndex = new Map<string, number>();\n\nexport function generateCacheId(fileName?: string): string {\n let taskFile = fileName || getCurrentExecutionFile();\n if (!taskFile) {\n taskFile = uuid();\n console.warn(\n 'Midscene - using random UUID for cache id. Cache may be invalid.',\n );\n }\n\n if (testFileIndex.has(taskFile)) {\n const currentIndex = testFileIndex.get(taskFile);\n if (currentIndex !== undefined) {\n testFileIndex.set(taskFile, currentIndex + 1);\n }\n } else {\n testFileIndex.set(taskFile, 1);\n }\n return `${taskFile}-${testFileIndex.get(taskFile)}`;\n}\n\nexport function ifPlanLocateParamIsBbox(\n planLocateParam: PlanningLocateParam,\n): boolean {\n return !!(\n planLocateParam.bbox &&\n Array.isArray(planLocateParam.bbox) &&\n planLocateParam.bbox.length === 4\n );\n}\n\nexport function matchElementFromPlan(\n planLocateParam: PlanningLocateParam,\n): LocateResultElement | undefined {\n if (!planLocateParam) {\n return undefined;\n }\n\n if (planLocateParam.bbox) {\n // Convert bbox [x1, y1, x2, y2] to rect {left, top, width, height}\n const rect = {\n left: planLocateParam.bbox[0],\n top: planLocateParam.bbox[1],\n width: planLocateParam.bbox[2] - planLocateParam.bbox[0] + 1,\n height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1,\n };\n\n const element = generateElementByRect(\n rect,\n typeof planLocateParam.prompt === 'string'\n ? planLocateParam.prompt\n : planLocateParam.prompt?.prompt || '',\n );\n return element;\n }\n\n return undefined;\n}\n\nexport async function matchElementFromCache(\n context: {\n taskCache?: TaskCache;\n interfaceInstance: AbstractInterface;\n },\n cacheEntry: ElementCacheFeature | undefined,\n cachePrompt: TUserPrompt,\n cacheable: boolean | undefined,\n): Promise<LocateResultElement | undefined> {\n if (!cacheEntry) {\n return undefined;\n }\n\n if (cacheable === false) {\n cacheDebug('cache disabled for prompt: %s', cachePrompt);\n return undefined;\n }\n\n if (!context.taskCache?.isCacheResultUsed) {\n return undefined;\n }\n\n if (!context.interfaceInstance.rectMatchesCacheFeature) {\n cacheDebug(\n 'interface does not implement rectMatchesCacheFeature, skip cache',\n );\n return undefined;\n }\n\n try {\n const rect =\n await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);\n const element: LocateResultElement = {\n center: [\n Math.round(rect.left + rect.width / 2),\n Math.round(rect.top + rect.height / 2),\n ],\n rect,\n description:\n typeof cachePrompt === 'string'\n ? cachePrompt\n : cachePrompt.prompt || '',\n };\n\n cacheDebug('cache hit, prompt: %s', cachePrompt);\n return element;\n } catch (error) {\n cacheDebug('rectMatchesCacheFeature error: %s', error);\n return undefined;\n }\n}\n\ndeclare const __VERSION__: string | undefined;\n\nexport const getMidsceneVersion = (): string => {\n if (typeof __VERSION__ !== 'undefined') {\n return __VERSION__;\n } else if (\n process.env.__VERSION__ &&\n process.env.__VERSION__ !== 'undefined'\n ) {\n return process.env.__VERSION__;\n }\n throw new Error('__VERSION__ inject failed during build');\n};\n\nexport const parsePrompt = (\n prompt: TUserPrompt,\n): {\n textPrompt: string;\n multimodalPrompt?: TMultimodalPrompt;\n} => {\n if (typeof prompt === 'string') {\n return {\n textPrompt: prompt,\n multimodalPrompt: undefined,\n };\n }\n return {\n textPrompt: prompt.prompt,\n multimodalPrompt: prompt.images\n ? {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n }\n : undefined,\n };\n};\n\n/**\n * Transform coordinates from screenshot coordinate system to logical coordinate system.\n * When shrunkShotToLogicalRatio > 1, the screenshot is larger than logical size,\n * so we need to divide coordinates by shrunkShotToLogicalRatio.\n *\n * @param element - The locate result element with coordinates in screenshot space\n * @param shrunkShotToLogicalRatio - The ratio of screenshot size to logical size\n * @returns A new element with coordinates transformed to logical space\n */\nexport const transformScreenshotElementToLogical = (\n element: LocateResultElement,\n shrunkShotToLogicalRatio: number,\n): LocateResultElement => {\n if (shrunkShotToLogicalRatio === 1) {\n return element;\n }\n\n return {\n ...element,\n center: [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ],\n rect: {\n ...element.rect,\n left: Math.round(element.rect.left / shrunkShotToLogicalRatio),\n top: Math.round(element.rect.top / shrunkShotToLogicalRatio),\n width: Math.round(element.rect.width / shrunkShotToLogicalRatio),\n height: Math.round(element.rect.height / shrunkShotToLogicalRatio),\n },\n };\n};\n\nexport const transformLogicalElementToScreenshot = (\n element: LocateResultElement,\n shrunkShotToLogicalRatio: number,\n): LocateResultElement => {\n if (shrunkShotToLogicalRatio === 1) {\n return element;\n }\n\n return {\n ...element,\n center: [\n Math.round(element.center[0] * shrunkShotToLogicalRatio),\n Math.round(element.center[1] * shrunkShotToLogicalRatio),\n ],\n rect: {\n ...element.rect,\n left: Math.round(element.rect.left * shrunkShotToLogicalRatio),\n top: Math.round(element.rect.top * shrunkShotToLogicalRatio),\n width: Math.round(element.rect.width * shrunkShotToLogicalRatio),\n height: Math.round(element.rect.height * shrunkShotToLogicalRatio),\n },\n };\n};\n\nexport const transformLogicalRectToScreenshotRect = (\n rect: Rect,\n shrunkShotToLogicalRatio: number,\n): Rect => {\n if (shrunkShotToLogicalRatio === 1) {\n return rect;\n }\n\n return {\n ...rect,\n left: Math.round(rect.left * shrunkShotToLogicalRatio),\n top: Math.round(rect.top * shrunkShotToLogicalRatio),\n width: Math.round(rect.width * shrunkShotToLogicalRatio),\n height: Math.round(rect.height * shrunkShotToLogicalRatio),\n };\n};\n"],"names":["commonContextParser","interfaceInstance","_opt","debug","getDebug","assert","description","uploadTestInfoToServer","interfaceSize","logicalWidth","logicalHeight","console","Number","Error","screenshotBase64","imgWidth","imgHeight","imageInfoOfBase64","shrinkFactor","targetWidth","Math","targetHeight","resizedBase64","resizeImgBase64","ScreenshotItem","getReportFileName","tag","reportTagName","globalConfigManager","MIDSCENE_REPORT_TAG_NAME","dateTimeInFileName","dayjs","uniqueId","uuid","printReportMsg","filepath","MIDSCENE_REPORT_QUIET","logMsg","getCurrentExecutionFile","trace","error","stackTrace","pkgDir","process","stackLines","line","match","targetFileName","testFileIndex","Map","generateCacheId","fileName","taskFile","currentIndex","undefined","ifPlanLocateParamIsBbox","planLocateParam","Array","matchElementFromPlan","rect","element","generateElementByRect","matchElementFromCache","context","cacheEntry","cachePrompt","cacheable","cacheDebug","getMidsceneVersion","__VERSION__","parsePrompt","prompt","transformScreenshotElementToLogical","shrunkShotToLogicalRatio","transformLogicalElementToScreenshot","transformLogicalRectToScreenshotRect"],"mappings":";;;;;;;;;AAyBO,eAAeA,oBACpBC,iBAAoC,EACpCC,IAAkC;IAElC,MAAMC,QAAQC,SAAS;IAEvBC,OAAOJ,mBAAmB;IAE1BE,MAAM;IACN,MAAMG,cAAcL,kBAAkB,QAAQ,QAAQ;IACtDE,MAAM;IAENA,MAAM;IACNI,uBAAuB;QACrB,SAASD;QACT,WAAWJ,KAAK,eAAe;IACjC;IACAC,MAAM;IAENA,MAAM;IACN,MAAMK,gBAAgB,MAAMP,kBAAkB,IAAI;IAClD,MAAM,EAAE,OAAOQ,YAAY,EAAE,QAAQC,aAAa,EAAE,GAAGF;IAEvD,IAAKA,cAA6C,GAAG,EACnDG,QAAQ,IAAI,CACV;IAIJ,IAAI,CAACC,OAAO,QAAQ,CAACH,iBAAiB,CAACG,OAAO,QAAQ,CAACF,gBACrD,MAAM,IAAIG,MACR,CAAC,iFAAiF,EAAEJ,aAAa,UAAU,EAAEC,eAAe;IAGhIP,MAAM,CAAC,MAAM,EAAEM,aAAa,CAAC,EAAEC,eAAe;IAE9C,MAAMI,mBAAmB,MAAMb,kBAAkB,gBAAgB;IACjEI,OAAOS,kBAAmB;IAG1BX,MAAM;IACN,MAAM,EAAE,OAAOY,QAAQ,EAAE,QAAQC,SAAS,EAAE,GAC1C,MAAMC,kBAAkBH;IAC1BX,MAAM,yBAAyBY,UAAU,KAAKC;IAE9C,MAAME,eAAeH,WAAWN;IAEhCN,MAAM,6BAA6Be;IAEnC,IAAIA,AAAiB,MAAjBA,cAAoB;QACtB,MAAMC,cAAcC,KAAK,KAAK,CAACL,WAAWG;QAC1C,MAAMG,eAAeD,KAAK,KAAK,CAACJ,YAAYE;QAE5Cf,MACE,CAAC,mCAAmC,EAAEe,aAAa,YAAY,EAAEH,SAAS,CAAC,EAAEC,UAAU,YAAY,EAAEG,YAAY,CAAC,EAAEE,aAAa,CAAC,CAAC;QAGrI,MAAMC,gBAAgB,MAAMC,gBAAgBT,kBAAkB;YAC5D,OAAOK;YACP,QAAQE;QACV;QACA,OAAO;YACL,UAAU;gBACR,OAAOF;gBACP,QAAQE;YACV;YACA,eAAeH;YACf,YAAYM,eAAe,MAAM,CAACF;QACpC;IACF;IACA,OAAO;QACL,UAAU;YACR,OAAOP;YACP,QAAQC;QACV;QACA,eAAe;QACf,YAAYQ,eAAe,MAAM,CAACV;IACpC;AACF;AAEO,SAASW,kBAAkBC,MAAM,KAAK;IAC3C,MAAMC,gBAAgBC,oBAAoB,iBAAiB,CACzDC;IAEF,MAAMC,qBAAqBC,QAAQ,MAAM,CAAC;IAE1C,MAAMC,WAAWC,OAAO,SAAS,CAAC,GAAG;IACrC,OAAO,GAAGN,iBAAiBD,IAAI,CAAC,EAAEI,mBAAmB,CAAC,EAAEE,UAAU;AACpE;AAEO,SAASE,eAAeC,QAAgB;IAC7C,IAAIP,oBAAoB,qBAAqB,CAACQ,wBAC5C;IAEFC,OAAO,CAAC,gCAAgC,EAAEF,UAAU;AACtD;AAMO,SAASG,wBAAwBC,KAAc;IACpD,MAAMC,QAAQ,IAAI3B;IAClB,MAAM4B,aAAaF,SAASC,MAAM,KAAK;IACvC,MAAME,SAASC,QAAQ,GAAG,MAAM;IAChC,IAAIF,YAAY;QACd,MAAMG,aAAaH,WAAW,KAAK,CAAC;QACpC,KAAK,MAAMI,QAAQD,WACjB,IACEC,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,UACdA,KAAK,QAAQ,CAAC,QACd;YACA,MAAMC,QAAQD,KAAK,KAAK,CAAC;YACzB,IAAIC,OAAO,CAAC,EAAE,EAAE;gBACd,MAAMC,iBAAiBD,KAAK,CAAC,EAAE,CAC5B,OAAO,CAACJ,QAAQ,IAChB,IAAI,GACJ,OAAO,CAAC,OAAO;gBAClB,OAAOK;YACT;QACF;IAEJ;IACA,OAAO;AACT;AAEA,MAAMC,gBAAgB,IAAIC;AAEnB,SAASC,gBAAgBC,QAAiB;IAC/C,IAAIC,WAAWD,YAAYb;IAC3B,IAAI,CAACc,UAAU;QACbA,WAAWnB;QACXtB,QAAQ,IAAI,CACV;IAEJ;IAEA,IAAIqC,cAAc,GAAG,CAACI,WAAW;QAC/B,MAAMC,eAAeL,cAAc,GAAG,CAACI;QACvC,IAAIC,AAAiBC,WAAjBD,cACFL,cAAc,GAAG,CAACI,UAAUC,eAAe;IAE/C,OACEL,cAAc,GAAG,CAACI,UAAU;IAE9B,OAAO,GAAGA,SAAS,CAAC,EAAEJ,cAAc,GAAG,CAACI,WAAW;AACrD;AAEO,SAASG,wBACdC,eAAoC;IAEpC,OAAO,CAAC,CACNA,CAAAA,gBAAgB,IAAI,IACpBC,MAAM,OAAO,CAACD,gBAAgB,IAAI,KAClCA,AAAgC,MAAhCA,gBAAgB,IAAI,CAAC,MAAM,AAAK;AAEpC;AAEO,SAASE,qBACdF,eAAoC;IAEpC,IAAI,CAACA,iBACH;IAGF,IAAIA,gBAAgB,IAAI,EAAE;QAExB,MAAMG,OAAO;YACX,MAAMH,gBAAgB,IAAI,CAAC,EAAE;YAC7B,KAAKA,gBAAgB,IAAI,CAAC,EAAE;YAC5B,OAAOA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;YAC3D,QAAQA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAE,GAAG;QAC9D;QAEA,MAAMI,UAAUC,sBACdF,MACA,AAAkC,YAAlC,OAAOH,gBAAgB,MAAM,GACzBA,gBAAgB,MAAM,GACtBA,gBAAgB,MAAM,EAAE,UAAU;QAExC,OAAOI;IACT;AAGF;AAEO,eAAeE,sBACpBC,OAGC,EACDC,UAA2C,EAC3CC,WAAwB,EACxBC,SAA8B;IAE9B,IAAI,CAACF,YACH;IAGF,IAAIE,AAAc,UAAdA,WAAqB,YACvBC,8BAAW,iCAAiCF;IAI9C,IAAI,CAACF,QAAQ,SAAS,EAAE,mBACtB;IAGF,IAAI,CAACA,QAAQ,iBAAiB,CAAC,uBAAuB,EAAE,YACtDI,8BACE;IAKJ,IAAI;QACF,MAAMR,OACJ,MAAMI,QAAQ,iBAAiB,CAAC,uBAAuB,CAACC;QAC1D,MAAMJ,UAA+B;YACnC,QAAQ;gBACNxC,KAAK,KAAK,CAACuC,KAAK,IAAI,GAAGA,KAAK,KAAK,GAAG;gBACpCvC,KAAK,KAAK,CAACuC,KAAK,GAAG,GAAGA,KAAK,MAAM,GAAG;aACrC;YACDA;YACA,aACE,AAAuB,YAAvB,OAAOM,cACHA,cACAA,YAAY,MAAM,IAAI;QAC9B;QAEAE,8BAAW,yBAAyBF;QACpC,OAAOL;IACT,EAAE,OAAOpB,OAAO;QACd2B,8BAAW,qCAAqC3B;QAChD;IACF;AACF;AAIO,MAAM4B,qBAAqB,IAEvBC;AAUJ,MAAMC,cAAc,CACzBC;IAKA,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAO;QACL,YAAYA;QACZ,kBAAkBjB;IACpB;IAEF,OAAO;QACL,YAAYiB,OAAO,MAAM;QACzB,kBAAkBA,OAAO,MAAM,GAC3B;YACE,QAAQA,OAAO,MAAM;YACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;QAC3D,IACAjB;IACN;AACF;AAWO,MAAMkB,sCAAsC,CACjDZ,SACAa;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAOb;IAGT,OAAO;QACL,GAAGA,OAAO;QACV,QAAQ;YACNxC,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAGa;YAC/BrD,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAGa;SAChC;QACD,MAAM;YACJ,GAAGb,QAAQ,IAAI;YACf,MAAMxC,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,IAAI,GAAGa;YACrC,KAAKrD,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,GAAG,GAAGa;YACnC,OAAOrD,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,KAAK,GAAGa;YACvC,QAAQrD,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,MAAM,GAAGa;QAC3C;IACF;AACF;AAEO,MAAMC,sCAAsC,CACjDd,SACAa;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAOb;IAGT,OAAO;QACL,GAAGA,OAAO;QACV,QAAQ;YACNxC,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAGa;YAC/BrD,KAAK,KAAK,CAACwC,QAAQ,MAAM,CAAC,EAAE,GAAGa;SAChC;QACD,MAAM;YACJ,GAAGb,QAAQ,IAAI;YACf,MAAMxC,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,IAAI,GAAGa;YACrC,KAAKrD,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,GAAG,GAAGa;YACnC,OAAOrD,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,KAAK,GAAGa;YACvC,QAAQrD,KAAK,KAAK,CAACwC,QAAQ,IAAI,CAAC,MAAM,GAAGa;QAC3C;IACF;AACF;AAEO,MAAME,uCAAuC,CAClDhB,MACAc;IAEA,IAAIA,AAA6B,MAA7BA,0BACF,OAAOd;IAGT,OAAO;QACL,GAAGA,IAAI;QACP,MAAMvC,KAAK,KAAK,CAACuC,KAAK,IAAI,GAAGc;QAC7B,KAAKrD,KAAK,KAAK,CAACuC,KAAK,GAAG,GAAGc;QAC3B,OAAOrD,KAAK,KAAK,CAACuC,KAAK,KAAK,GAAGc;QAC/B,QAAQrD,KAAK,KAAK,CAACuC,KAAK,MAAM,GAAGc;IACnC;AACF"}
|
|
@@ -45,7 +45,7 @@ async function autoGLMPlanning(userInstruction, options) {
|
|
|
45
45
|
debug('action in response:', parsedResponse.content);
|
|
46
46
|
const parsedAction = parseAction(parsedResponse);
|
|
47
47
|
debug('Parsed action object:', parsedAction);
|
|
48
|
-
transformedActions = transformAutoGLMAction(parsedAction, context.
|
|
48
|
+
transformedActions = transformAutoGLMAction(parsedAction, context.shotSize);
|
|
49
49
|
debug('Transformed actions:', transformedActions);
|
|
50
50
|
} catch (parseError) {
|
|
51
51
|
const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/auto-glm/planning.mjs","sources":["../../../../src/ai-model/auto-glm/planning.ts"],"sourcesContent":["import type { PlanningAIResponse, UIContext } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { ConversationHistory } from '../conversation-history';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../service-caller/index';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\nimport { getAutoGLMPlanPrompt } from './prompt';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGLMPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n\n const systemPrompt =\n getAutoGLMPlanPrompt(modelConfig.modelFamily) +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'text', text: userInstruction }],\n });\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n modelConfig,\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(parsedAction, context.
|
|
1
|
+
{"version":3,"file":"ai-model/auto-glm/planning.mjs","sources":["../../../../src/ai-model/auto-glm/planning.ts"],"sourcesContent":["import type { PlanningAIResponse, UIContext } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { ConversationHistory } from '../conversation-history';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../service-caller/index';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\nimport { getAutoGLMPlanPrompt } from './prompt';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGLMPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n\n const systemPrompt =\n getAutoGLMPlanPrompt(modelConfig.modelFamily) +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'text', text: userInstruction }],\n });\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n modelConfig,\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(parsedAction, context.shotSize);\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["debug","getDebug","autoGLMPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","systemPrompt","getAutoGLMPlanPrompt","imagePayloadBase64","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;;;AAaA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,gBACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IAErE,MAAMK,eACJC,qBAAqBH,YAAY,WAAW,IAC3CC,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAMG,qBAAqBL,QAAQ,UAAU,CAAC,MAAM;IAEpDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMF;YAAgB;SAAE;IACpD;IACAE,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKM;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMC,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASH;QAAa;WACrCJ,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASQ,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,yBAC5CH,MACAL;IAGFP,MAAM,gCAAgCa;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,qBAAqBL;QACtCb,MAAM,yBAAyBgB,eAAe,KAAK;QACnDhB,MAAM,uBAAuBgB,eAAe,OAAO;QAEnD,MAAMG,eAAeC,YAAYJ;QACjChB,MAAM,yBAAyBmB;QAC/BF,qBAAqBI,uBAAuBF,cAAcb,QAAQ,QAAQ;QAC1EN,MAAM,wBAAwBiB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAT,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAEW,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
|
|
@@ -65,8 +65,8 @@ async function AiLocateElement(options) {
|
|
|
65
65
|
const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);
|
|
66
66
|
const systemPrompt = isAutoGLM(modelFamily) ? getAutoGLMLocatePrompt(modelFamily) : systemPromptToLocateElement(modelFamily);
|
|
67
67
|
let imagePayload = screenshotBase64;
|
|
68
|
-
let imageWidth = context.
|
|
69
|
-
let imageHeight = context.
|
|
68
|
+
let imageWidth = context.shotSize.width;
|
|
69
|
+
let imageHeight = context.shotSize.height;
|
|
70
70
|
let originalImageWidth = imageWidth;
|
|
71
71
|
let originalImageHeight = imageHeight;
|
|
72
72
|
if (options.searchConfig) {
|
|
@@ -270,18 +270,18 @@ async function AiLocateSection(options) {
|
|
|
270
270
|
let sectionRect;
|
|
271
271
|
const sectionBbox = result.content.bbox;
|
|
272
272
|
if (sectionBbox) {
|
|
273
|
-
const targetRect = adaptBboxToRect(sectionBbox, context.
|
|
273
|
+
const targetRect = adaptBboxToRect(sectionBbox, context.shotSize.width, context.shotSize.height, 0, 0, context.shotSize.width, context.shotSize.height, modelFamily);
|
|
274
274
|
debugSection('original targetRect %j', targetRect);
|
|
275
275
|
const referenceBboxList = result.content.references_bbox || [];
|
|
276
276
|
debugSection('referenceBboxList %j', referenceBboxList);
|
|
277
|
-
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.
|
|
277
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.shotSize.width, context.shotSize.height, 0, 0, context.shotSize.width, context.shotSize.height, modelFamily));
|
|
278
278
|
debugSection('referenceRects %j', referenceRects);
|
|
279
279
|
const mergedRect = mergeRects([
|
|
280
280
|
targetRect,
|
|
281
281
|
...referenceRects
|
|
282
282
|
]);
|
|
283
283
|
debugSection('mergedRect %j', mergedRect);
|
|
284
|
-
sectionRect = expandSearchArea(mergedRect, context.
|
|
284
|
+
sectionRect = expandSearchArea(mergedRect, context.shotSize);
|
|
285
285
|
debugSection('expanded sectionRect %j', sectionRect);
|
|
286
286
|
}
|
|
287
287
|
let imageBase64 = screenshotBase64;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n sectionRect = expandSearchArea(mergedRect, context.size);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n let scale: number | undefined;\n\n if (sectionRect) {\n const originalWidth = sectionRect.width;\n const originalHeight = sectionRect.height;\n\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaleRatio = 2;\n const scaledResult = await scaleImage(\n croppedResult.imageBase64,\n scaleRatio,\n );\n\n imageBase64 = scaledResult.imageBase64;\n scale = scaleRatio;\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n sectionRect.width,\n sectionRect.height,\n scale,\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","modelConfig","modelFamily","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","scale","originalWidth","originalHeight","croppedResult","cropByRect","scaleRatio","scaledResult","scaleImage","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AA6DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OAKrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,WAAW,EAAE,GAAGH;IAC3D,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElDK,OACEJ,0BACA;IAEF,MAAMK,+BAA+BhB,wBACnCW;IAEF,MAAMM,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUP,eAC3BQ,uBAAuBR,eACvBS,4BAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAad,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIe,cAAcf,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIgB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIhB,QAAQ,YAAY,EAAE;QACxBM,OACEN,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFM,OACEN,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFc,eAAed,QAAQ,YAAY,CAAC,WAAW;QAC/Ce,aAAaf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCgB,cAAchB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCiB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMe,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMxB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUP,eACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMmB,SAAS,MAAM5B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIV,UAAUP,cAAc;QAC1B,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB7B,MAAMQ;QAEvCf,aAAa,yBAAyBkC;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1ClC,aAAa,sBAAsBqC,OAAO,KAAK;QAC/CrC,aAAa,yBAAyBqC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DrC,aAAa,yBAAyByC,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCrC,aAAa,iCAAiC;gBAAE0C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C5B,aAAa,+BAA+B;gBAAE4C;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAIlC,QAAQ,YAAY,EAAE,MAAM;gBAC9BmC,UAAUnC,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCoC,UAAUpC,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMqC,UAA+BC,uBACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtBjD,aAAa,qBAAqBuC;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,yBACV7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,gBACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAhB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BiB,oBACAC,qBACAd,aACAJ,QAAQ,YAAY,EAAE;YAGxBZ,aAAa,WAAWuC;YAExB,MAAMU,UAA+Bc,sBACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBtD,OAIrC;IAQC,MAAM,EAAEC,OAAO,EAAEsD,kBAAkB,EAAEpD,WAAW,EAAE,GAAGH;IACrD,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMS,eAAe8C,4BAA4BpD;IACjD,MAAMqD,gCAAgCC,0BACpCnE,wBAAwBgE;IAE1B,MAAM5D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAM5B,mBAAmB;YACtC,QAAQ8D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA5D,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,yBACb7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IACJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,gBACjBW,aACA5D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBG;QAEFd,aAAa,0BAA0BwE;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DrE,aAAa,wBAAwByE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,gBACLe,MACAhE,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBG;QAGNd,aAAa,qBAAqB0E;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7D1E,aAAa,iBAAiB4E;QAE9BN,cAAcQ,iBAAiBF,YAAYjE,QAAQ,IAAI;QACvDX,aAAa,2BAA2BsE;IAC1C;IAEA,IAAIS,cAAchE;IAClB,IAAIiE;IAEJ,IAAIV,aAAa;QACf,MAAMW,gBAAgBX,YAAY,KAAK;QACvC,MAAMY,iBAAiBZ,YAAY,MAAM;QAEzC,MAAMa,gBAAgB,MAAMC,WAC1BrE,kBACAuD,aACAxD,AAAgB,iBAAhBA;QAGF,MAAMuE,aAAa;QACnB,MAAMC,eAAe,MAAMC,WACzBJ,cAAc,WAAW,EACzBE;QAGFN,cAAcO,aAAa,WAAW;QACtCN,QAAQK;QACRf,YAAY,KAAK,GAAGgB,aAAa,KAAK;QACtChB,YAAY,MAAM,GAAGgB,aAAa,MAAM;QAExCtF,aACE,qDACAiF,eACAC,gBACAZ,YAAY,KAAK,EACjBA,YAAY,MAAM,EAClBU;IAEJ;IAEA,OAAO;QACL,MAAMV;QACNS;QACAC;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAemB,qBAAwB9E,OAO7C;IACC,MAAM,EAAE+E,SAAS,EAAE9E,OAAO,EAAE+E,aAAa,EAAEtF,gBAAgB,EAAES,WAAW,EAAE,GACxEH;IACF,MAAMU,eAAeuE;IACrB,MAAM5E,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMiF,wBAAwBC,uBAC5BnF,QAAQ,eAAe,IAAI,IAC3B+E;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK/E;YACL,QAAQ;QACV;IACF;IAGF+E,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS0E;QACX;KACD;IAED,IAAI1F,kBAAkB;QACpB,MAAM2B,SAAS,MAAM5B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACL8D,iBAAiB,EAClB,GAAG,MAAMC,OAAO3F,MAAMQ;IAGvB,IAAIoF;IACJ,IAAI;QACFA,cAAcC,2BAA8B3C;IAC9C,EAAE,OAAO4C,YAAY;QAEnB,MAAM/C,eACJ+C,sBAAsB9C,QAAQ8C,WAAW,OAAO,GAAG7C,OAAO6C;QAC5D,MAAM,IAAI3C,qBACR,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLgE;QACA1C;QACAtB;QACA8D;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEzF,WAAyB;IAKzB,MAAMO,eAAemF;IACrB,MAAMC,aAAaC,0BAA0BJ;IAE7C,MAAMhG,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAASoF;QACX;KACD;IAED,MAAMnC,SAAS,MAAMiC,SAASjG,MAAMQ;IAEpC,OAAO;QACL,kBAAkBwD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.shotSize.width;\n let imageHeight = context.shotSize.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n sectionRect = expandSearchArea(mergedRect, context.shotSize);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n let scale: number | undefined;\n\n if (sectionRect) {\n const originalWidth = sectionRect.width;\n const originalHeight = sectionRect.height;\n\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaleRatio = 2;\n const scaledResult = await scaleImage(\n croppedResult.imageBase64,\n scaleRatio,\n );\n\n imageBase64 = scaledResult.imageBase64;\n scale = scaleRatio;\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n sectionRect.width,\n sectionRect.height,\n scale,\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","modelConfig","modelFamily","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","scale","originalWidth","originalHeight","croppedResult","cropByRect","scaleRatio","scaledResult","scaleImage","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AA6DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OAKrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,WAAW,EAAE,GAAGH;IAC3D,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElDK,OACEJ,0BACA;IAEF,MAAMK,+BAA+BhB,wBACnCW;IAEF,MAAMM,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUP,eAC3BQ,uBAAuBR,eACvBS,4BAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAad,QAAQ,QAAQ,CAAC,KAAK;IACvC,IAAIe,cAAcf,QAAQ,QAAQ,CAAC,MAAM;IACzC,IAAIgB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIhB,QAAQ,YAAY,EAAE;QACxBM,OACEN,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFM,OACEN,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFc,eAAed,QAAQ,YAAY,CAAC,WAAW;QAC/Ce,aAAaf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCgB,cAAchB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCiB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMe,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMxB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUP,eACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMmB,SAAS,MAAM5B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIV,UAAUP,cAAc;QAC1B,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB7B,MAAMQ;QAEvCf,aAAa,yBAAyBkC;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1ClC,aAAa,sBAAsBqC,OAAO,KAAK;QAC/CrC,aAAa,yBAAyBqC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DrC,aAAa,yBAAyByC,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCrC,aAAa,iCAAiC;gBAAE0C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C5B,aAAa,+BAA+B;gBAAE4C;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAIlC,QAAQ,YAAY,EAAE,MAAM;gBAC9BmC,UAAUnC,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCoC,UAAUpC,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMqC,UAA+BC,uBACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtBjD,aAAa,qBAAqBuC;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,yBACV7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,gBACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAhB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BiB,oBACAC,qBACAd,aACAJ,QAAQ,YAAY,EAAE;YAGxBZ,aAAa,WAAWuC;YAExB,MAAMU,UAA+Bc,sBACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBtD,OAIrC;IAQC,MAAM,EAAEC,OAAO,EAAEsD,kBAAkB,EAAEpD,WAAW,EAAE,GAAGH;IACrD,MAAM,EAAEI,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMS,eAAe8C,4BAA4BpD;IACjD,MAAMqD,gCAAgCC,0BACpCnE,wBAAwBgE;IAE1B,MAAM5D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAM5B,mBAAmB;YACtC,QAAQ8D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA5D,KAAK,IAAI,IAAI0B;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,yBACb7C,MACAQ;IAEJ,EAAE,OAAOsC,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IACJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,gBACjBW,aACA5D,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBG;QAEFd,aAAa,0BAA0BwE;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DrE,aAAa,wBAAwByE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,gBACLe,MACAhE,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBG;QAGNd,aAAa,qBAAqB0E;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7D1E,aAAa,iBAAiB4E;QAE9BN,cAAcQ,iBAAiBF,YAAYjE,QAAQ,QAAQ;QAC3DX,aAAa,2BAA2BsE;IAC1C;IAEA,IAAIS,cAAchE;IAClB,IAAIiE;IAEJ,IAAIV,aAAa;QACf,MAAMW,gBAAgBX,YAAY,KAAK;QACvC,MAAMY,iBAAiBZ,YAAY,MAAM;QAEzC,MAAMa,gBAAgB,MAAMC,WAC1BrE,kBACAuD,aACAxD,AAAgB,iBAAhBA;QAGF,MAAMuE,aAAa;QACnB,MAAMC,eAAe,MAAMC,WACzBJ,cAAc,WAAW,EACzBE;QAGFN,cAAcO,aAAa,WAAW;QACtCN,QAAQK;QACRf,YAAY,KAAK,GAAGgB,aAAa,KAAK;QACtChB,YAAY,MAAM,GAAGgB,aAAa,MAAM;QAExCtF,aACE,qDACAiF,eACAC,gBACAZ,YAAY,KAAK,EACjBA,YAAY,MAAM,EAClBU;IAEJ;IAEA,OAAO;QACL,MAAMV;QACNS;QACAC;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAemB,qBAAwB9E,OAO7C;IACC,MAAM,EAAE+E,SAAS,EAAE9E,OAAO,EAAE+E,aAAa,EAAEtF,gBAAgB,EAAES,WAAW,EAAE,GACxEH;IACF,MAAMU,eAAeuE;IACrB,MAAM5E,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMiF,wBAAwBC,uBAC5BnF,QAAQ,eAAe,IAAI,IAC3B+E;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK/E;YACL,QAAQ;QACV;IACF;IAGF+E,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS0E;QACX;KACD;IAED,IAAI1F,kBAAkB;QACpB,MAAM2B,SAAS,MAAM5B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACL8D,iBAAiB,EAClB,GAAG,MAAMC,OAAO3F,MAAMQ;IAGvB,IAAIoF;IACJ,IAAI;QACFA,cAAcC,2BAA8B3C;IAC9C,EAAE,OAAO4C,YAAY;QAEnB,MAAM/C,eACJ+C,sBAAsB9C,QAAQ8C,WAAW,OAAO,GAAG7C,OAAO6C;QAC5D,MAAM,IAAI3C,qBACR,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLgE;QACA1C;QACAtB;QACA8D;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEzF,WAAyB;IAKzB,MAAMO,eAAemF;IACrB,MAAMC,aAAaC,0BAA0BJ;IAE7C,MAAMhG,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAASoF;QACX;KACD;IAED,MAAMnC,SAAS,MAAMiC,SAASjG,MAAMQ;IAEpC,OAAO;QACL,kBAAkBwD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
@@ -62,7 +62,7 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
|
|
|
62
62
|
}
|
|
63
63
|
async function plan(userInstruction, opts) {
|
|
64
64
|
const { context, modelConfig, conversationHistory } = opts;
|
|
65
|
-
const {
|
|
65
|
+
const { shotSize } = context;
|
|
66
66
|
const screenshotBase64 = context.screenshot.base64;
|
|
67
67
|
const { modelFamily } = modelConfig;
|
|
68
68
|
const systemPrompt = await systemPromptToTaskPlanning({
|
|
@@ -73,8 +73,8 @@ async function plan(userInstruction, opts) {
|
|
|
73
73
|
deepThink: true === opts.deepThink
|
|
74
74
|
});
|
|
75
75
|
let imagePayload = screenshotBase64;
|
|
76
|
-
let imageWidth =
|
|
77
|
-
let imageHeight =
|
|
76
|
+
let imageWidth = shotSize.width;
|
|
77
|
+
let imageHeight = shotSize.height;
|
|
78
78
|
if ('qwen2.5-vl' === modelFamily) {
|
|
79
79
|
const paddedResult = await paddingToMatchBlockByBase64(imagePayload);
|
|
80
80
|
imageWidth = paddedResult.width;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { extractXMLTag } from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n safeParseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n const type = actionType.trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n deepThink: opts.deepThink === true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build historical execution logs text to include in the message\n const historicalLogsText = conversationHistory.historicalLogsToText();\n const historicalLogsSection = historicalLogsText\n ? `\\n\\n${historicalLogsText}`\n : '';\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the latest screenshot${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n });\n\n // Parse XML response to JSON object, capture parsing errors\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Accumulate logs as historical execution steps\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","action","type","param","safeParseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historicalLogsText","historicalLogsSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAK9C,SAASE,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,IAAIC,SAAc;IAClB,IAAIP,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QACrD,MAAMQ,OAAOR,WAAW,IAAI;QAC5B,IAAIS;QAEJ,IAAIR,gBACF,IAAI;YAEFQ,QAAQC,cAAcT,gBAAgBP;QACxC,EAAE,OAAOiB,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUH,WAAVG,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAId,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1BQ;QACA,GAAIH,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;IAC9D;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEtB,WAAW,EAAE,GAAGuB;IAExB,MAAMI,eAAe,MAAMC,2BAA2B;QACpD,aAAaP,KAAK,WAAW;QAC7BrB;QACA,aAAaqB,KAAK,WAAW;QAC7B,gBAAgB;QAChB,WAAWA,AAAmB,SAAnBA,KAAK,SAAS;IAC3B;IAEA,IAAIQ,eAAeH;IACnB,IAAII,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAK7B,IAAIzB,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMgC,eAAe,MAAMC,4BAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBb,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMc,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEd,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIgB;IAGJ,MAAMC,qBAAqBb,oBAAoB,oBAAoB;IACnE,MAAMc,wBAAwBD,qBAC1B,CAAC,IAAI,EAAEA,oBAAoB,GAC3B;IAGJ,MAAME,eAAef,oBAAoB,cAAc;IACvD,MAAMgB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9CY,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGZ,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEgB,kBAAkBF,uBAAuB;gBACtN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAL,oBAAoB,mCAAmC;IACzD,OACEY,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,6BAA6B,EAAEI,kBAAkBF,uBAAuB;YACjF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFL,oBAAoB,MAAM,CAACY;IAG3BZ,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMiB,aAAajB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMqB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,MAAM,EACJ,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMnB,aAAa;QAClC,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAeT,SAAYS,KAAK,SAAS;IACpE;IAGA,IAAI0B;IACJ,IAAI;QACFA,aAAajD,yBAAyB6C,aAAa3C;QAEnD,IAAI+C,WAAW,MAAM,IAAIA,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YACjElD,QACE;YAEFkD,WAAW,eAAe,GAAGnC;YAC7BmC,WAAW,eAAe,GAAGnC;QAC/B;QAEA,MAAMoC,UAAUD,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIE,yBAAyB;QAG7B,IAAIF,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YAC5CpD,MAAM;YACNsD,yBAAyB;QAC3B;QAEA,MAAMC,cAAkC;YACtC,GAAGH,UAAU;YACbC;YACAL;YACAC;YACAC;YACA,UAAUM,uBAAuBH,SAAS3B,KAAK,WAAW;YAC1D4B;QACF;QAEAG,OAAOL,YAAY;QAEnBC,QAAQ,OAAO,CAAC,CAACnC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAMwC,sBAAsBhC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9BnB,MAAM,+BAA+B0D;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAEN1D,MAAM,gBAAgB2D;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe5C,OAAO,KAAK,CAAC2C,MAAM;gBACxC,IAAIC,gBAAgBzD,AAAgBY,WAAhBZ,aAElBa,OAAO,KAAK,CAAC2C,MAAM,GAAGE,cACpBD,cACA3B,YACAC,aACA/B;YAGN;QACF;QAGA,IAAI+C,WAAW,GAAG,EAChBvB,oBAAoB,mBAAmB,CAACuB,WAAW,GAAG;QAIxD,IAAIA,WAAW,MAAM,EACnBvB,oBAAoB,YAAY,CAACuB,WAAW,MAAM;QAGpDvB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMmB;gBACR;aACD;QACH;QAEA,OAAOO;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsBzC,QAAQyC,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClCjB,aACAC;IAEJ;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { extractXMLTag } from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n safeParseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n const type = actionType.trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n deepThink: opts.deepThink === true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build historical execution logs text to include in the message\n const historicalLogsText = conversationHistory.historicalLogsToText();\n const historicalLogsSection = historicalLogsText\n ? `\\n\\n${historicalLogsText}`\n : '';\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the latest screenshot${memoriesSection}${historicalLogsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n });\n\n // Parse XML response to JSON object, capture parsing errors\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Accumulate logs as historical execution steps\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","action","type","param","safeParseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historicalLogsText","historicalLogsSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAK9C,SAASE,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,IAAIC,SAAc;IAClB,IAAIP,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QACrD,MAAMQ,OAAOR,WAAW,IAAI;QAC5B,IAAIS;QAEJ,IAAIR,gBACF,IAAI;YAEFQ,QAAQC,cAAcT,gBAAgBP;QACxC,EAAE,OAAOiB,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUH,WAAVG,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAId,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1BQ;QACA,GAAIH,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;IAC9D;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEtB,WAAW,EAAE,GAAGuB;IAExB,MAAMI,eAAe,MAAMC,2BAA2B;QACpD,aAAaP,KAAK,WAAW;QAC7BrB;QACA,aAAaqB,KAAK,WAAW;QAC7B,gBAAgB;QAChB,WAAWA,AAAmB,SAAnBA,KAAK,SAAS;IAC3B;IAEA,IAAIQ,eAAeH;IACnB,IAAII,aAAaL,SAAS,KAAK;IAC/B,IAAIM,cAAcN,SAAS,MAAM;IAKjC,IAAIzB,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMgC,eAAe,MAAMC,4BAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBb,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMc,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEd,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIgB;IAGJ,MAAMC,qBAAqBb,oBAAoB,oBAAoB;IACnE,MAAMc,wBAAwBD,qBAC1B,CAAC,IAAI,EAAEA,oBAAoB,GAC3B;IAGJ,MAAME,eAAef,oBAAoB,cAAc;IACvD,MAAMgB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9CY,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGZ,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEgB,kBAAkBF,uBAAuB;gBACtN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAL,oBAAoB,mCAAmC;IACzD,OACEY,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,6BAA6B,EAAEI,kBAAkBF,uBAAuB;YACjF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFL,oBAAoB,MAAM,CAACY;IAG3BZ,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMiB,aAAajB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMqB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,MAAM,EACJ,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMnB,aAAa;QAClC,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAeT,SAAYS,KAAK,SAAS;IACpE;IAGA,IAAI0B;IACJ,IAAI;QACFA,aAAajD,yBAAyB6C,aAAa3C;QAEnD,IAAI+C,WAAW,MAAM,IAAIA,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YACjElD,QACE;YAEFkD,WAAW,eAAe,GAAGnC;YAC7BmC,WAAW,eAAe,GAAGnC;QAC/B;QAEA,MAAMoC,UAAUD,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIE,yBAAyB;QAG7B,IAAIF,AAA+BnC,WAA/BmC,WAAW,eAAe,EAAgB;YAC5CpD,MAAM;YACNsD,yBAAyB;QAC3B;QAEA,MAAMC,cAAkC;YACtC,GAAGH,UAAU;YACbC;YACAL;YACAC;YACAC;YACA,UAAUM,uBAAuBH,SAAS3B,KAAK,WAAW;YAC1D4B;QACF;QAEAG,OAAOL,YAAY;QAEnBC,QAAQ,OAAO,CAAC,CAACnC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAMwC,sBAAsBhC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9BnB,MAAM,+BAA+B0D;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAEN1D,MAAM,gBAAgB2D;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe5C,OAAO,KAAK,CAAC2C,MAAM;gBACxC,IAAIC,gBAAgBzD,AAAgBY,WAAhBZ,aAElBa,OAAO,KAAK,CAAC2C,MAAM,GAAGE,cACpBD,cACA3B,YACAC,aACA/B;YAGN;QACF;QAGA,IAAI+C,WAAW,GAAG,EAChBvB,oBAAoB,mBAAmB,CAACuB,WAAW,GAAG;QAIxD,IAAIA,WAAW,MAAM,EACnBvB,oBAAoB,YAAY,CAACuB,WAAW,MAAM;QAGpDvB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMmB;gBACR;aACD;QACH;QAEA,OAAOO;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsBzC,QAAQyC,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClCjB,aACAC;IAEJ;AACF"}
|